tree.h (enum tree_code_class): Add tcc_vl_exp.
[official-gcc.git] / gcc / config / sh / sh.c
blob96a00f6cc6bf41fea397de2c4abee24e602a1931
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
72 int current_function_interrupt;
74 tree sh_deferred_function_attributes;
75 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
77 /* Global variables for machine-dependent things. */
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
82 /* Definitions used in ready queue reordering for first scheduling pass. */
84 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
85 static short *regmode_weight[2];
87 /* Total SFmode and SImode weights of scheduled insns. */
88 static int curr_regmode_pressure[2];
90 /* If true, skip cycles for Q -> R movement. */
91 static int skip_cycles = 0;
93 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
94 and returned from sh_reorder2. */
95 static short cached_can_issue_more;
97 /* Saved operands from the last compare to use when we generate an scc
98 or bcc insn. */
100 rtx sh_compare_op0;
101 rtx sh_compare_op1;
103 /* Provides the class number of the smallest class containing
104 reg number. */
106 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS, GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. *: target independently reserved letter.
158 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
160 enum reg_class reg_class_from_letter[] =
162 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
163 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
164 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
165 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
166 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
167 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
168 /* y */ FPUL_REGS, /* z */ R0_REGS
171 int assembler_dialect;
173 static bool shmedia_space_reserved_for_target_registers;
175 static bool sh_handle_option (size_t, const char *, int);
176 static void split_branches (rtx);
177 static int branch_dest (rtx);
178 static void force_into (rtx, rtx);
179 static void print_slot (rtx);
180 static rtx add_constant (rtx, enum machine_mode, rtx);
181 static void dump_table (rtx, rtx);
182 static int hi_const (rtx);
183 static int broken_move (rtx);
184 static int mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static int noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static void mark_use (rtx, rtx *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static rtx mark_constant_pool_use (rtx);
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static int sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (tree);
230 static void sh_init_builtins (void);
231 static void sh_media_init_builtins (void);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *);
244 static int sh_address_cost (rtx);
245 #ifdef TARGET_ADJUST_UNROLL_MAX
246 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
247 #endif
248 static int sh_pr_n_sets (void);
249 static rtx sh_allocate_initial_value (rtx);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static bool sh_return_in_memory (tree, tree);
260 static rtx sh_builtin_saveregs (void);
261 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
262 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
263 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
264 static tree sh_build_builtin_va_list (void);
265 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
267 tree, bool);
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
269 tree, bool);
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
271 tree, bool);
272 static int sh_dwarf_calling_convention (tree);
273 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
276 /* Initialize the GCC target structure. */
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
280 /* The next two are used for debug info when compiling with -gdwarf. */
281 #undef TARGET_ASM_UNALIGNED_HI_OP
282 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
283 #undef TARGET_ASM_UNALIGNED_SI_OP
284 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
286 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
287 #undef TARGET_ASM_UNALIGNED_DI_OP
288 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
301 #undef TARGET_ASM_FILE_START
302 #define TARGET_ASM_FILE_START sh_file_start
303 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
304 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION sh_handle_option
311 #undef TARGET_INSERT_ATTRIBUTES
312 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
320 /* The next 5 hooks have been implemented for reenabling sched1. With the
321 help of these macros we are limiting the movement of insns in sched1 to
322 reduce the register pressure. The overall idea is to keep count of SImode
323 and SFmode regs required by already scheduled insns. When these counts
324 cross some threshold values; give priority to insns that free registers.
325 The insn that frees registers is most likely to be the insn with lowest
326 LUID (original insn order); but such an insn might be there in the stalled
327 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
328 upto a max of 8 cycles so that such insns may move from Q -> R.
330 The description of the hooks are as below:
332 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
333 scheduler; it is called inside the sched_init function just after
334 find_insn_reg_weights function call. It is used to calculate the SImode
335 and SFmode weights of insns of basic blocks; much similar to what
336 find_insn_reg_weights does.
337 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
339 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
340 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
341 (Q)->(R).
343 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
344 high; reorder the ready queue so that the insn with lowest LUID will be
345 issued next.
347 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
348 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
350 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
351 can be returned from TARGET_SCHED_REORDER2.
353 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
355 #undef TARGET_SCHED_DFA_NEW_CYCLE
356 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
358 #undef TARGET_SCHED_INIT_GLOBAL
359 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
361 #undef TARGET_SCHED_FINISH_GLOBAL
362 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
364 #undef TARGET_SCHED_VARIABLE_ISSUE
365 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
367 #undef TARGET_SCHED_REORDER
368 #define TARGET_SCHED_REORDER sh_reorder
370 #undef TARGET_SCHED_REORDER2
371 #define TARGET_SCHED_REORDER2 sh_reorder2
373 #undef TARGET_SCHED_INIT
374 #define TARGET_SCHED_INIT sh_md_init
376 #undef TARGET_CANNOT_MODIFY_JUMPS_P
377 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
378 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
379 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
380 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
381 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
382 sh_optimize_target_register_callee_saved
384 #undef TARGET_MS_BITFIELD_LAYOUT_P
385 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
387 #undef TARGET_INIT_BUILTINS
388 #define TARGET_INIT_BUILTINS sh_init_builtins
389 #undef TARGET_EXPAND_BUILTIN
390 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
395 #undef TARGET_CANNOT_COPY_INSN_P
396 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS sh_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST sh_address_cost
401 #undef TARGET_ALLOCATE_INITIAL_VALUE
402 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
404 #undef TARGET_MACHINE_DEPENDENT_REORG
405 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
407 #ifdef HAVE_AS_TLS
408 #undef TARGET_HAVE_TLS
409 #define TARGET_HAVE_TLS true
410 #endif
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
414 #undef TARGET_PROMOTE_FUNCTION_ARGS
415 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
419 #undef TARGET_STRUCT_VALUE_RTX
420 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
424 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
425 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
428 #undef TARGET_STRICT_ARGUMENT_NAMING
429 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
430 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
431 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
432 #undef TARGET_MUST_PASS_IN_STACK
433 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
434 #undef TARGET_PASS_BY_REFERENCE
435 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
436 #undef TARGET_CALLEE_COPIES
437 #define TARGET_CALLEE_COPIES sh_callee_copies
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
441 #undef TARGET_BUILD_BUILTIN_VA_LIST
442 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
443 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
444 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
446 #undef TARGET_VECTOR_MODE_SUPPORTED_P
447 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
449 #undef TARGET_CHECK_PCH_TARGET_FLAGS
450 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
452 #undef TARGET_DWARF_CALLING_CONVENTION
453 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
455 /* Return regmode weight for insn. */
456 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
458 /* Return current register pressure for regmode. */
459 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
461 #ifdef SYMBIAN
463 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
465 #undef TARGET_STRIP_NAME_ENCODING
466 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
467 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
468 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
470 #endif /* SYMBIAN */
472 #ifdef TARGET_ADJUST_UNROLL_MAX
473 #undef TARGET_ADJUST_UNROLL_MAX
474 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
475 #endif
477 #undef TARGET_SECONDARY_RELOAD
478 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
480 struct gcc_target targetm = TARGET_INITIALIZER;
482 /* Implement TARGET_HANDLE_OPTION. */
484 static bool
485 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
486 int value ATTRIBUTE_UNUSED)
488 switch (code)
490 case OPT_m1:
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
492 return true;
494 case OPT_m2:
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
496 return true;
498 case OPT_m2a:
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
500 return true;
502 case OPT_m2a_nofpu:
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
504 return true;
506 case OPT_m2a_single:
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
508 return true;
510 case OPT_m2a_single_only:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
512 return true;
514 case OPT_m2e:
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
516 return true;
518 case OPT_m3:
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
520 return true;
522 case OPT_m3e:
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
524 return true;
526 case OPT_m4:
527 case OPT_m4_100:
528 case OPT_m4_200:
529 case OPT_m4_300:
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
531 return true;
533 case OPT_m4_nofpu:
534 case OPT_m4_100_nofpu:
535 case OPT_m4_200_nofpu:
536 case OPT_m4_300_nofpu:
537 case OPT_m4_340:
538 case OPT_m4_400:
539 case OPT_m4_500:
540 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
541 return true;
543 case OPT_m4_single:
544 case OPT_m4_100_single:
545 case OPT_m4_200_single:
546 case OPT_m4_300_single:
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
548 return true;
550 case OPT_m4_single_only:
551 case OPT_m4_100_single_only:
552 case OPT_m4_200_single_only:
553 case OPT_m4_300_single_only:
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
555 return true;
557 case OPT_m4a:
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
559 return true;
561 case OPT_m4a_nofpu:
562 case OPT_m4al:
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
564 return true;
566 case OPT_m4a_single:
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
568 return true;
570 case OPT_m4a_single_only:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
572 return true;
574 case OPT_m5_32media:
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
576 return true;
578 case OPT_m5_32media_nofpu:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
580 return true;
582 case OPT_m5_64media:
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
584 return true;
586 case OPT_m5_64media_nofpu:
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
588 return true;
590 case OPT_m5_compact:
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
592 return true;
594 case OPT_m5_compact_nofpu:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
596 return true;
598 default:
599 return true;
603 /* Print the operand address in x to the stream. */
605 void
606 print_operand_address (FILE *stream, rtx x)
608 switch (GET_CODE (x))
610 case REG:
611 case SUBREG:
612 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
613 break;
615 case PLUS:
617 rtx base = XEXP (x, 0);
618 rtx index = XEXP (x, 1);
620 switch (GET_CODE (index))
622 case CONST_INT:
623 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
624 reg_names[true_regnum (base)]);
625 break;
627 case REG:
628 case SUBREG:
630 int base_num = true_regnum (base);
631 int index_num = true_regnum (index);
633 fprintf (stream, "@(r0,%s)",
634 reg_names[MAX (base_num, index_num)]);
635 break;
638 default:
639 gcc_unreachable ();
642 break;
644 case PRE_DEC:
645 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
646 break;
648 case POST_INC:
649 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
650 break;
652 default:
653 x = mark_constant_pool_use (x);
654 output_addr_const (stream, x);
655 break;
659 /* Print operand x (an rtx) in assembler syntax to file stream
660 according to modifier code.
662 '.' print a .s if insn needs delay slot
663 ',' print LOCAL_LABEL_PREFIX
664 '@' print trap, rte or rts depending upon pragma interruptness
665 '#' output a nop if there is nothing to put in the delay slot
666 ''' print likelihood suffix (/u for unlikely).
667 '>' print branch target if -fverbose-asm
668 'O' print a constant without the #
669 'R' print the LSW of a dp value - changes if in little endian
670 'S' print the MSW of a dp value - changes if in little endian
671 'T' print the next word of a dp value - same as 'R' in big endian mode.
672 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
673 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
674 'N' print 'r63' if the operand is (const_int 0).
675 'd' print a V2SF reg as dN instead of fpN.
676 'm' print a pair `base,offset' or `base,index', for LD and ST.
677 'U' Likewise for {LD,ST}{HI,LO}.
678 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
679 'o' output an operator. */
681 void
682 print_operand (FILE *stream, rtx x, int code)
684 int regno;
685 enum machine_mode mode;
687 switch (code)
689 tree trapa_attr;
691 case '.':
692 if (final_sequence
693 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
694 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
695 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
696 break;
697 case ',':
698 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
699 break;
700 case '@':
701 trapa_attr = lookup_attribute ("trap_exit",
702 DECL_ATTRIBUTES (current_function_decl));
703 if (trapa_attr)
704 fprintf (stream, "trapa #%ld",
705 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
706 else if (sh_cfun_interrupt_handler_p ())
707 fprintf (stream, "rte");
708 else
709 fprintf (stream, "rts");
710 break;
711 case '#':
712 /* Output a nop if there's nothing in the delay slot. */
713 if (dbr_sequence_length () == 0)
714 fprintf (stream, "\n\tnop");
715 break;
716 case '\'':
718 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
720 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
721 fputs ("/u", stream);
722 break;
724 case '>':
725 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
727 fputs ("\t! target: ", stream);
728 output_addr_const (stream, JUMP_LABEL (current_output_insn));
730 break;
731 case 'O':
732 x = mark_constant_pool_use (x);
733 output_addr_const (stream, x);
734 break;
735 /* N.B.: %R / %S / %T adjust memory addresses by four.
736 For SHMEDIA, that means they can be used to access the first and
737 second 32 bit part of a 64 bit (or larger) value that
738 might be held in floating point registers or memory.
739 While they can be used to access 64 bit parts of a larger value
740 held in general purpose registers, that won't work with memory -
741 neither for fp registers, since the frxx names are used. */
742 case 'R':
743 if (REG_P (x) || GET_CODE (x) == SUBREG)
745 regno = true_regnum (x);
746 regno += FP_REGISTER_P (regno) ? 1 : LSW;
747 fputs (reg_names[regno], (stream));
749 else if (MEM_P (x))
751 x = adjust_address (x, SImode, 4 * LSW);
752 print_operand_address (stream, XEXP (x, 0));
754 else
756 rtx sub = NULL_RTX;
758 mode = GET_MODE (x);
759 if (mode == VOIDmode)
760 mode = DImode;
761 if (GET_MODE_SIZE (mode) >= 8)
762 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
763 if (sub)
764 print_operand (stream, sub, 0);
765 else
766 output_operand_lossage ("invalid operand to %%R");
768 break;
769 case 'S':
770 if (REG_P (x) || GET_CODE (x) == SUBREG)
772 regno = true_regnum (x);
773 regno += FP_REGISTER_P (regno) ? 0 : MSW;
774 fputs (reg_names[regno], (stream));
776 else if (MEM_P (x))
778 x = adjust_address (x, SImode, 4 * MSW);
779 print_operand_address (stream, XEXP (x, 0));
781 else
783 rtx sub = NULL_RTX;
785 mode = GET_MODE (x);
786 if (mode == VOIDmode)
787 mode = DImode;
788 if (GET_MODE_SIZE (mode) >= 8)
789 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
790 if (sub)
791 print_operand (stream, sub, 0);
792 else
793 output_operand_lossage ("invalid operand to %%S");
795 break;
796 case 'T':
797 /* Next word of a double. */
798 switch (GET_CODE (x))
800 case REG:
801 fputs (reg_names[REGNO (x) + 1], (stream));
802 break;
803 case MEM:
804 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
805 && GET_CODE (XEXP (x, 0)) != POST_INC)
806 x = adjust_address (x, SImode, 4);
807 print_operand_address (stream, XEXP (x, 0));
808 break;
809 default:
810 break;
812 break;
813 case 'o':
814 switch (GET_CODE (x))
816 case PLUS: fputs ("add", stream); break;
817 case MINUS: fputs ("sub", stream); break;
818 case MULT: fputs ("mul", stream); break;
819 case DIV: fputs ("div", stream); break;
820 case EQ: fputs ("eq", stream); break;
821 case NE: fputs ("ne", stream); break;
822 case GT: case LT: fputs ("gt", stream); break;
823 case GE: case LE: fputs ("ge", stream); break;
824 case GTU: case LTU: fputs ("gtu", stream); break;
825 case GEU: case LEU: fputs ("geu", stream); break;
826 default:
827 break;
829 break;
830 case 'M':
831 if (TARGET_SHMEDIA)
833 if (GET_CODE (x) == MEM
834 && GET_CODE (XEXP (x, 0)) == PLUS
835 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
836 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
837 fputc ('x', stream);
839 else
841 if (GET_CODE (x) == MEM)
843 switch (GET_MODE (x))
845 case QImode: fputs (".b", stream); break;
846 case HImode: fputs (".w", stream); break;
847 case SImode: fputs (".l", stream); break;
848 case SFmode: fputs (".s", stream); break;
849 case DFmode: fputs (".d", stream); break;
850 default: gcc_unreachable ();
854 break;
856 case 'm':
857 gcc_assert (GET_CODE (x) == MEM);
858 x = XEXP (x, 0);
859 /* Fall through. */
860 case 'U':
861 switch (GET_CODE (x))
863 case REG:
864 case SUBREG:
865 print_operand (stream, x, 0);
866 fputs (", 0", stream);
867 break;
869 case PLUS:
870 print_operand (stream, XEXP (x, 0), 0);
871 fputs (", ", stream);
872 print_operand (stream, XEXP (x, 1), 0);
873 break;
875 default:
876 gcc_unreachable ();
878 break;
880 case 'd':
881 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
883 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
884 break;
886 case 'N':
887 if (x == CONST0_RTX (GET_MODE (x)))
889 fprintf ((stream), "r63");
890 break;
892 goto default_output;
893 case 'u':
894 if (GET_CODE (x) == CONST_INT)
896 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
897 break;
899 /* Fall through. */
901 default_output:
902 default:
903 regno = 0;
904 mode = GET_MODE (x);
906 switch (GET_CODE (x))
908 case TRUNCATE:
910 rtx inner = XEXP (x, 0);
911 int offset = 0;
912 enum machine_mode inner_mode;
914 /* We might see SUBREGs with vector mode registers inside. */
915 if (GET_CODE (inner) == SUBREG
916 && (GET_MODE_SIZE (GET_MODE (inner))
917 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
918 && subreg_lowpart_p (inner))
919 inner = SUBREG_REG (inner);
920 if (GET_CODE (inner) == CONST_INT)
922 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
923 goto default_output;
925 inner_mode = GET_MODE (inner);
926 if (GET_CODE (inner) == SUBREG
927 && (GET_MODE_SIZE (GET_MODE (inner))
928 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
929 && GET_CODE (SUBREG_REG (inner)) == REG)
931 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
932 GET_MODE (SUBREG_REG (inner)),
933 SUBREG_BYTE (inner),
934 GET_MODE (inner));
935 inner = SUBREG_REG (inner);
937 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
938 abort ();
939 /* Floating point register pairs are always big endian;
940 general purpose registers are 64 bit wide. */
941 regno = REGNO (inner);
942 regno = (HARD_REGNO_NREGS (regno, inner_mode)
943 - HARD_REGNO_NREGS (regno, mode))
944 + offset;
945 x = inner;
946 goto reg;
948 case SIGN_EXTEND:
949 x = XEXP (x, 0);
950 goto reg;
951 /* FIXME: We need this on SHmedia32 because reload generates
952 some sign-extended HI or QI loads into DImode registers
953 but, because Pmode is SImode, the address ends up with a
954 subreg:SI of the DImode register. Maybe reload should be
955 fixed so as to apply alter_subreg to such loads? */
956 case IF_THEN_ELSE:
957 gcc_assert (trapping_target_operand (x, VOIDmode));
958 x = XEXP (XEXP (x, 2), 0);
959 goto default_output;
960 case SUBREG:
961 gcc_assert (SUBREG_BYTE (x) == 0
962 && GET_CODE (SUBREG_REG (x)) == REG);
964 x = SUBREG_REG (x);
965 /* Fall through. */
967 reg:
968 case REG:
969 regno += REGNO (x);
970 if (FP_REGISTER_P (regno)
971 && mode == V16SFmode)
972 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
973 else if (FP_REGISTER_P (REGNO (x))
974 && mode == V4SFmode)
975 fprintf ((stream), "fv%s", reg_names[regno] + 2);
976 else if (GET_CODE (x) == REG
977 && mode == V2SFmode)
978 fprintf ((stream), "fp%s", reg_names[regno] + 2);
979 else if (FP_REGISTER_P (REGNO (x))
980 && GET_MODE_SIZE (mode) > 4)
981 fprintf ((stream), "d%s", reg_names[regno] + 1);
982 else
983 fputs (reg_names[regno], (stream));
984 break;
986 case MEM:
987 output_address (XEXP (x, 0));
988 break;
990 case CONST:
991 if (TARGET_SHMEDIA
992 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
993 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
994 && (GET_MODE (XEXP (x, 0)) == DImode
995 || GET_MODE (XEXP (x, 0)) == SImode)
996 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
997 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
999 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
1000 rtx val2 = val;
1001 bool nested_expr = false;
1003 fputc ('(', stream);
1004 if (GET_CODE (val) == ASHIFTRT)
1006 fputc ('(', stream);
1007 val2 = XEXP (val, 0);
1009 if (GET_CODE (val2) == CONST
1010 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
1012 fputc ('(', stream);
1013 nested_expr = true;
1015 output_addr_const (stream, val2);
1016 if (nested_expr)
1017 fputc (')', stream);
1018 if (GET_CODE (val) == ASHIFTRT)
1020 fputs (" >> ", stream);
1021 output_addr_const (stream, XEXP (val, 1));
1022 fputc (')', stream);
1024 fputs (" & 65535)", stream);
1025 break;
1028 /* Fall through. */
1029 default:
1030 if (TARGET_SH1)
1031 fputc ('#', stream);
1032 output_addr_const (stream, x);
1033 break;
1035 break;
1039 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1040 static void
1041 force_into (rtx value, rtx target)
1043 value = force_operand (value, target);
1044 if (! rtx_equal_p (value, target))
1045 emit_insn (gen_move_insn (target, value));
1048 /* Emit code to perform a block move. Choose the best method.
1050 OPERANDS[0] is the destination.
1051 OPERANDS[1] is the source.
1052 OPERANDS[2] is the size.
1053 OPERANDS[3] is the alignment safe to use. */
1056 expand_block_move (rtx *operands)
1058 int align = INTVAL (operands[3]);
1059 int constp = (GET_CODE (operands[2]) == CONST_INT);
1060 int bytes = (constp ? INTVAL (operands[2]) : 0);
1062 if (! constp)
1063 return 0;
1065 /* If we could use mov.l to move words and dest is word-aligned, we
1066 can use movua.l for loads and still generate a relatively short
1067 and efficient sequence. */
1068 if (TARGET_SH4A_ARCH && align < 4
1069 && MEM_ALIGN (operands[0]) >= 32
1070 && can_move_by_pieces (bytes, 32))
1072 rtx dest = copy_rtx (operands[0]);
1073 rtx src = copy_rtx (operands[1]);
1074 /* We could use different pseudos for each copied word, but
1075 since movua can only load into r0, it's kind of
1076 pointless. */
1077 rtx temp = gen_reg_rtx (SImode);
1078 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1079 int copied = 0;
1081 while (copied + 4 <= bytes)
1083 rtx to = adjust_address (dest, SImode, copied);
1084 rtx from = adjust_automodify_address (src, BLKmode,
1085 src_addr, copied);
1087 set_mem_size (from, GEN_INT (4));
1088 emit_insn (gen_movua (temp, from));
1089 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1090 emit_move_insn (to, temp);
1091 copied += 4;
1094 if (copied < bytes)
1095 move_by_pieces (adjust_address (dest, BLKmode, copied),
1096 adjust_automodify_address (src, BLKmode,
1097 src_addr, copied),
1098 bytes - copied, align, 0);
1100 return 1;
1103 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1104 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1105 if (align < 4 || (bytes % 4 != 0))
1106 return 0;
1108 if (TARGET_HARD_SH4)
1110 if (bytes < 12)
1111 return 0;
1112 else if (bytes == 12)
1114 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1115 rtx r4 = gen_rtx_REG (SImode, 4);
1116 rtx r5 = gen_rtx_REG (SImode, 5);
1118 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1119 force_into (XEXP (operands[0], 0), r4);
1120 force_into (XEXP (operands[1], 0), r5);
1121 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1122 return 1;
1124 else if (! TARGET_SMALLCODE)
1126 const char *entry_name;
1127 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1128 int dwords;
1129 rtx r4 = gen_rtx_REG (SImode, 4);
1130 rtx r5 = gen_rtx_REG (SImode, 5);
1131 rtx r6 = gen_rtx_REG (SImode, 6);
1133 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1134 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1135 force_into (XEXP (operands[0], 0), r4);
1136 force_into (XEXP (operands[1], 0), r5);
1138 dwords = bytes >> 3;
1139 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1140 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1141 return 1;
1143 else
1144 return 0;
1146 if (bytes < 64)
1148 char entry[30];
1149 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1150 rtx r4 = gen_rtx_REG (SImode, 4);
1151 rtx r5 = gen_rtx_REG (SImode, 5);
1153 sprintf (entry, "__movmemSI%d", bytes);
1154 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1155 force_into (XEXP (operands[0], 0), r4);
1156 force_into (XEXP (operands[1], 0), r5);
1157 emit_insn (gen_block_move_real (func_addr_rtx));
1158 return 1;
1161 /* This is the same number of bytes as a memcpy call, but to a different
1162 less common function name, so this will occasionally use more space. */
1163 if (! TARGET_SMALLCODE)
1165 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1166 int final_switch, while_loop;
1167 rtx r4 = gen_rtx_REG (SImode, 4);
1168 rtx r5 = gen_rtx_REG (SImode, 5);
1169 rtx r6 = gen_rtx_REG (SImode, 6);
1171 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1172 force_into (XEXP (operands[0], 0), r4);
1173 force_into (XEXP (operands[1], 0), r5);
1175 /* r6 controls the size of the move. 16 is decremented from it
1176 for each 64 bytes moved. Then the negative bit left over is used
1177 as an index into a list of move instructions. e.g., a 72 byte move
1178 would be set up with size(r6) = 14, for one iteration through the
1179 big while loop, and a switch of -2 for the last part. */
1181 final_switch = 16 - ((bytes / 4) % 16);
1182 while_loop = ((bytes / 4) / 16 - 1) * 16;
1183 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1184 emit_insn (gen_block_lump_real (func_addr_rtx));
1185 return 1;
1188 return 0;
1191 /* Prepare operands for a move define_expand; specifically, one of the
1192 operands must be in a register. */
1195 prepare_move_operands (rtx operands[], enum machine_mode mode)
1197 if ((mode == SImode || mode == DImode)
1198 && flag_pic
1199 && ! ((mode == Pmode || mode == ptr_mode)
1200 && tls_symbolic_operand (operands[1], Pmode) != 0))
1202 rtx temp;
1203 if (SYMBOLIC_CONST_P (operands[1]))
1205 if (GET_CODE (operands[0]) == MEM)
1206 operands[1] = force_reg (Pmode, operands[1]);
1207 else if (TARGET_SHMEDIA
1208 && GET_CODE (operands[1]) == LABEL_REF
1209 && target_reg_operand (operands[0], mode))
1210 /* It's ok. */;
1211 else
1213 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1214 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1217 else if (GET_CODE (operands[1]) == CONST
1218 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1219 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1221 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1222 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1223 mode, temp);
1224 operands[1] = expand_binop (mode, add_optab, temp,
1225 XEXP (XEXP (operands[1], 0), 1),
1226 no_new_pseudos ? temp
1227 : gen_reg_rtx (Pmode),
1228 0, OPTAB_LIB_WIDEN);
1232 if (! reload_in_progress && ! reload_completed)
1234 /* Copy the source to a register if both operands aren't registers. */
1235 if (! register_operand (operands[0], mode)
1236 && ! sh_register_operand (operands[1], mode))
1237 operands[1] = copy_to_mode_reg (mode, operands[1]);
1239 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1241 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1242 except that we can't use that function because it is static. */
1243 rtx new = change_address (operands[0], mode, 0);
1244 MEM_COPY_ATTRIBUTES (new, operands[0]);
1245 operands[0] = new;
1248 /* This case can happen while generating code to move the result
1249 of a library call to the target. Reject `st r0,@(rX,rY)' because
1250 reload will fail to find a spill register for rX, since r0 is already
1251 being used for the source. */
1252 else if (TARGET_SH1
1253 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1254 && GET_CODE (operands[0]) == MEM
1255 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1256 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1257 operands[1] = copy_to_mode_reg (mode, operands[1]);
1260 if (mode == Pmode || mode == ptr_mode)
1262 rtx op0, op1, opc;
1263 enum tls_model tls_kind;
1265 op0 = operands[0];
1266 op1 = operands[1];
1267 if (GET_CODE (op1) == CONST
1268 && GET_CODE (XEXP (op1, 0)) == PLUS
1269 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1271 opc = XEXP (XEXP (op1, 0), 1);
1272 op1 = XEXP (XEXP (op1, 0), 0);
1274 else
1275 opc = NULL_RTX;
1277 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1279 rtx tga_op1, tga_ret, tmp, tmp2;
1281 switch (tls_kind)
1283 case TLS_MODEL_GLOBAL_DYNAMIC:
1284 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1285 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1286 op1 = tga_ret;
1287 break;
1289 case TLS_MODEL_LOCAL_DYNAMIC:
1290 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1291 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1293 tmp = gen_reg_rtx (Pmode);
1294 emit_move_insn (tmp, tga_ret);
1296 if (register_operand (op0, Pmode))
1297 tmp2 = op0;
1298 else
1299 tmp2 = gen_reg_rtx (Pmode);
1301 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1302 op1 = tmp2;
1303 break;
1305 case TLS_MODEL_INITIAL_EXEC:
1306 if (! flag_pic)
1308 /* Don't schedule insns for getting GOT address when
1309 the first scheduling is enabled, to avoid spill
1310 failures for R0. */
1311 if (flag_schedule_insns)
1312 emit_insn (gen_blockage ());
1313 emit_insn (gen_GOTaddr2picreg ());
1314 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1315 PIC_REG)));
1316 if (flag_schedule_insns)
1317 emit_insn (gen_blockage ());
1319 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1320 tmp = gen_sym2GOTTPOFF (op1);
1321 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1322 op1 = tga_op1;
1323 break;
1325 case TLS_MODEL_LOCAL_EXEC:
1326 tmp2 = gen_reg_rtx (Pmode);
1327 emit_insn (gen_load_gbr (tmp2));
1328 tmp = gen_reg_rtx (Pmode);
1329 emit_insn (gen_symTPOFF2reg (tmp, op1));
1331 if (register_operand (op0, Pmode))
1332 op1 = op0;
1333 else
1334 op1 = gen_reg_rtx (Pmode);
1336 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1337 break;
1339 default:
1340 gcc_unreachable ();
1342 if (opc)
1343 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1344 operands[1] = op1;
1348 return 0;
1351 enum rtx_code
1352 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1353 enum rtx_code comparison)
1355 rtx op1;
1356 rtx scratch = NULL_RTX;
1358 if (comparison == CODE_FOR_nothing)
1359 comparison = GET_CODE (operands[0]);
1360 else
1361 scratch = operands[4];
1362 if (GET_CODE (operands[1]) == CONST_INT
1363 && GET_CODE (operands[2]) != CONST_INT)
1365 rtx tmp = operands[1];
1367 operands[1] = operands[2];
1368 operands[2] = tmp;
1369 comparison = swap_condition (comparison);
1371 if (GET_CODE (operands[2]) == CONST_INT)
1373 HOST_WIDE_INT val = INTVAL (operands[2]);
1374 if ((val == -1 || val == -0x81)
1375 && (comparison == GT || comparison == LE))
1377 comparison = (comparison == GT) ? GE : LT;
1378 operands[2] = gen_int_mode (val + 1, mode);
1380 else if ((val == 1 || val == 0x80)
1381 && (comparison == GE || comparison == LT))
1383 comparison = (comparison == GE) ? GT : LE;
1384 operands[2] = gen_int_mode (val - 1, mode);
1386 else if (val == 1 && (comparison == GEU || comparison == LTU))
1388 comparison = (comparison == GEU) ? NE : EQ;
1389 operands[2] = CONST0_RTX (mode);
1391 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1393 comparison = (comparison == GEU) ? GTU : LEU;
1394 operands[2] = gen_int_mode (val - 1, mode);
1396 else if (val == 0 && (comparison == GTU || comparison == LEU))
1397 comparison = (comparison == GTU) ? NE : EQ;
1398 else if (mode == SImode
1399 && ((val == 0x7fffffff
1400 && (comparison == GTU || comparison == LEU))
1401 || ((unsigned HOST_WIDE_INT) val
1402 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1403 && (comparison == GEU || comparison == LTU))))
1405 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1406 operands[2] = CONST0_RTX (mode);
1409 op1 = operands[1];
1410 if (!no_new_pseudos)
1411 operands[1] = force_reg (mode, op1);
1412 /* When we are handling DImode comparisons, we want to keep constants so
1413 that we can optimize the component comparisons; however, memory loads
1414 are better issued as a whole so that they can be scheduled well.
1415 SImode equality comparisons allow I08 constants, but only when they
1416 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1417 into a register, that register might as well be r0, and we allow the
1418 constant. If it is already in a register, this is likely to be
1419 allocated to a different hard register, thus we load the constant into
1420 a register unless it is zero. */
1421 if (!REG_P (operands[2])
1422 && (GET_CODE (operands[2]) != CONST_INT
1423 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1424 && ((comparison != EQ && comparison != NE)
1425 || (REG_P (op1) && REGNO (op1) != R0_REG)
1426 || !CONST_OK_FOR_I08 (INTVAL (operands[2]))))))
1428 if (scratch && GET_MODE (scratch) == mode)
1430 emit_move_insn (scratch, operands[2]);
1431 operands[2] = scratch;
1433 else if (!no_new_pseudos)
1434 operands[2] = force_reg (mode, operands[2]);
1436 return comparison;
1439 void
1440 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1442 rtx (*branch_expander) (rtx) = gen_branch_true;
1443 rtx jump;
1445 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1446 switch (comparison)
1448 case NE: case LT: case LE: case LTU: case LEU:
1449 comparison = reverse_condition (comparison);
1450 branch_expander = gen_branch_false;
1451 default: ;
1453 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1454 gen_rtx_fmt_ee (comparison, SImode,
1455 operands[1], operands[2])));
1456 jump = emit_jump_insn (branch_expander (operands[3]));
1457 if (probability >= 0)
1458 REG_NOTES (jump)
1459 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1460 REG_NOTES (jump));
1464 /* ??? How should we distribute probabilities when more than one branch
1465 is generated. So far we only have soem ad-hoc observations:
1466 - If the operands are random, they are likely to differ in both parts.
1467 - If comparing items in a hash chain, the operands are random or equal;
1468 operation should be EQ or NE.
1469 - If items are searched in an ordered tree from the root, we can expect
1470 the highpart to be unequal about half of the time; operation should be
1471 an inequality comparison, operands non-constant, and overall probability
1472 about 50%. Likewise for quicksort.
1473 - Range checks will be often made against constants. Even if we assume for
1474 simplicity an even distribution of the non-constant operand over a
1475 sub-range here, the same probability could be generated with differently
1476 wide sub-ranges - as long as the ratio of the part of the subrange that
1477 is before the threshold to the part that comes after the threshold stays
1478 the same. Thus, we can't really tell anything here;
1479 assuming random distribution is at least simple.
1482 bool
1483 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1485 enum rtx_code msw_taken, msw_skip, lsw_taken;
1486 rtx skip_label = NULL_RTX;
1487 rtx op1h, op1l, op2h, op2l;
1488 int num_branches;
1489 int prob, rev_prob;
1490 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1491 rtx scratch = operands[4];
1493 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1494 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1495 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1496 op1l = gen_lowpart (SImode, operands[1]);
1497 op2l = gen_lowpart (SImode, operands[2]);
1498 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1499 prob = split_branch_probability;
1500 rev_prob = REG_BR_PROB_BASE - prob;
1501 switch (comparison)
1503 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1504 That costs 1 cycle more when the first branch can be predicted taken,
1505 but saves us mispredicts because only one branch needs prediction.
1506 It also enables generating the cmpeqdi_t-1 pattern. */
1507 case EQ:
1508 if (TARGET_CMPEQDI_T)
1510 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1511 emit_jump_insn (gen_branch_true (operands[3]));
1512 return true;
1514 msw_skip = NE;
1515 lsw_taken = EQ;
1516 if (prob >= 0)
1518 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1520 msw_skip_prob = rev_prob;
1521 if (REG_BR_PROB_BASE <= 65535)
1522 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1523 else
1525 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1526 lsw_taken_prob
1527 = (prob
1528 ? (REG_BR_PROB_BASE
1529 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1530 / ((HOST_WIDEST_INT) prob << 32)))
1531 : 0);
1534 break;
1535 case NE:
1536 if (TARGET_CMPEQDI_T)
1538 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1539 emit_jump_insn (gen_branch_false (operands[3]));
1540 return true;
1542 msw_taken = NE;
1543 msw_taken_prob = prob;
1544 lsw_taken = NE;
1545 lsw_taken_prob = 0;
1546 break;
1547 case GTU: case GT:
1548 msw_taken = comparison;
1549 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1550 break;
1551 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1552 msw_skip = swap_condition (msw_taken);
1553 lsw_taken = GTU;
1554 break;
1555 case GEU: case GE:
1556 if (op2l == CONST0_RTX (SImode))
1557 msw_taken = comparison;
1558 else
1560 msw_taken = comparison == GE ? GT : GTU;
1561 msw_skip = swap_condition (msw_taken);
1562 lsw_taken = GEU;
1564 break;
1565 case LTU: case LT:
1566 msw_taken = comparison;
1567 if (op2l == CONST0_RTX (SImode))
1568 break;
1569 msw_skip = swap_condition (msw_taken);
1570 lsw_taken = LTU;
1571 break;
1572 case LEU: case LE:
1573 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1574 msw_taken = comparison;
1575 else
1577 lsw_taken = LEU;
1578 if (comparison == LE)
1579 msw_taken = LT;
1580 else if (op2h != CONST0_RTX (SImode))
1581 msw_taken = LTU;
1582 else
1583 break;
1584 msw_skip = swap_condition (msw_taken);
1586 break;
1587 default: return false;
1589 num_branches = ((msw_taken != CODE_FOR_nothing)
1590 + (msw_skip != CODE_FOR_nothing)
1591 + (lsw_taken != CODE_FOR_nothing));
1592 if (comparison != EQ && comparison != NE && num_branches > 1)
1594 if (!CONSTANT_P (operands[2])
1595 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1596 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1598 msw_taken_prob = prob / 2U;
1599 msw_skip_prob
1600 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1601 lsw_taken_prob = prob;
1603 else
1605 msw_taken_prob = prob;
1606 msw_skip_prob = REG_BR_PROB_BASE;
1607 /* ??? If we have a constant op2h, should we use that when
1608 calculating lsw_taken_prob? */
1609 lsw_taken_prob = prob;
1612 operands[1] = op1h;
1613 operands[2] = op2h;
1614 operands[4] = NULL_RTX;
1615 if (reload_completed
1616 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1617 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1619 emit_move_insn (scratch, operands[2]);
1620 operands[2] = scratch;
1622 if (msw_taken != CODE_FOR_nothing)
1623 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1624 if (msw_skip != CODE_FOR_nothing)
1626 rtx taken_label = operands[3];
1628 operands[3] = skip_label = gen_label_rtx ();
1629 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1630 operands[3] = taken_label;
1632 operands[1] = op1l;
1633 operands[2] = op2l;
1634 if (lsw_taken != CODE_FOR_nothing)
1636 if (reload_completed
1637 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1638 operands[4] = scratch;
1639 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1641 if (msw_skip != CODE_FOR_nothing)
1642 emit_label (skip_label);
1643 return true;
1646 /* Prepare the operands for an scc instruction; make sure that the
1647 compare has been done. */
1649 prepare_scc_operands (enum rtx_code code)
1651 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1652 enum rtx_code oldcode = code;
1653 enum machine_mode mode;
1655 /* First need a compare insn. */
1656 switch (code)
1658 case NE:
1659 /* It isn't possible to handle this case. */
1660 gcc_unreachable ();
1661 case LT:
1662 code = GT;
1663 break;
1664 case LE:
1665 code = GE;
1666 break;
1667 case LTU:
1668 code = GTU;
1669 break;
1670 case LEU:
1671 code = GEU;
1672 break;
1673 default:
1674 break;
1676 if (code != oldcode)
1678 rtx tmp = sh_compare_op0;
1679 sh_compare_op0 = sh_compare_op1;
1680 sh_compare_op1 = tmp;
1683 mode = GET_MODE (sh_compare_op0);
1684 if (mode == VOIDmode)
1685 mode = GET_MODE (sh_compare_op1);
1687 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1688 if ((code != EQ && code != NE
1689 && (sh_compare_op1 != const0_rtx
1690 || code == GTU || code == GEU || code == LTU || code == LEU))
1691 || (mode == DImode && sh_compare_op1 != const0_rtx)
1692 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1693 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1695 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1696 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1697 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1698 gen_rtx_SET (VOIDmode, t_reg,
1699 gen_rtx_fmt_ee (code, SImode,
1700 sh_compare_op0, sh_compare_op1)),
1701 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1702 else
1703 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1704 gen_rtx_fmt_ee (code, SImode,
1705 sh_compare_op0, sh_compare_op1)));
1707 return t_reg;
1710 /* Called from the md file, set up the operands of a compare instruction. */
1712 void
1713 from_compare (rtx *operands, int code)
1715 enum machine_mode mode = GET_MODE (sh_compare_op0);
1716 rtx insn;
1717 if (mode == VOIDmode)
1718 mode = GET_MODE (sh_compare_op1);
1719 if (code != EQ
1720 || mode == DImode
1721 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1723 /* Force args into regs, since we can't use constants here. */
1724 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1725 if (sh_compare_op1 != const0_rtx
1726 || code == GTU || code == GEU
1727 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1728 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1730 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1732 from_compare (operands, GT);
1733 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1735 else
1736 insn = gen_rtx_SET (VOIDmode,
1737 gen_rtx_REG (SImode, T_REG),
1738 gen_rtx_fmt_ee (code, SImode,
1739 sh_compare_op0, sh_compare_op1));
1740 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1742 insn = gen_rtx_PARALLEL (VOIDmode,
1743 gen_rtvec (2, insn,
1744 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1745 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1747 else
1748 emit_insn (insn);
1751 /* Functions to output assembly code. */
1753 /* Return a sequence of instructions to perform DI or DF move.
1755 Since the SH cannot move a DI or DF in one instruction, we have
1756 to take care when we see overlapping source and dest registers. */
1758 const char *
1759 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1760 enum machine_mode mode)
1762 rtx dst = operands[0];
1763 rtx src = operands[1];
1765 if (GET_CODE (dst) == MEM
1766 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1767 return "mov.l %T1,%0\n\tmov.l %1,%0";
1769 if (register_operand (dst, mode)
1770 && register_operand (src, mode))
1772 if (REGNO (src) == MACH_REG)
1773 return "sts mach,%S0\n\tsts macl,%R0";
1775 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1776 when mov.d r1,r0 do r1->r0 then r2->r1. */
1778 if (REGNO (src) + 1 == REGNO (dst))
1779 return "mov %T1,%T0\n\tmov %1,%0";
1780 else
1781 return "mov %1,%0\n\tmov %T1,%T0";
1783 else if (GET_CODE (src) == CONST_INT)
1785 if (INTVAL (src) < 0)
1786 output_asm_insn ("mov #-1,%S0", operands);
1787 else
1788 output_asm_insn ("mov #0,%S0", operands);
1790 return "mov %1,%R0";
1792 else if (GET_CODE (src) == MEM)
1794 int ptrreg = -1;
1795 int dreg = REGNO (dst);
1796 rtx inside = XEXP (src, 0);
1798 switch (GET_CODE (inside))
1800 case REG:
1801 ptrreg = REGNO (inside);
1802 break;
1804 case SUBREG:
1805 ptrreg = subreg_regno (inside);
1806 break;
1808 case PLUS:
1809 ptrreg = REGNO (XEXP (inside, 0));
1810 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1811 an offsettable address. Unfortunately, offsettable addresses use
1812 QImode to check the offset, and a QImode offsettable address
1813 requires r0 for the other operand, which is not currently
1814 supported, so we can't use the 'o' constraint.
1815 Thus we must check for and handle r0+REG addresses here.
1816 We punt for now, since this is likely very rare. */
1817 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1818 break;
1820 case LABEL_REF:
1821 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1822 case POST_INC:
1823 return "mov.l %1,%0\n\tmov.l %1,%T0";
1824 default:
1825 gcc_unreachable ();
1828 /* Work out the safe way to copy. Copy into the second half first. */
1829 if (dreg == ptrreg)
1830 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1833 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1836 /* Print an instruction which would have gone into a delay slot after
1837 another instruction, but couldn't because the other instruction expanded
1838 into a sequence where putting the slot insn at the end wouldn't work. */
1840 static void
1841 print_slot (rtx insn)
1843 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1845 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1848 const char *
1849 output_far_jump (rtx insn, rtx op)
1851 struct { rtx lab, reg, op; } this;
1852 rtx braf_base_lab = NULL_RTX;
1853 const char *jump;
1854 int far;
1855 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1856 rtx prev;
1858 this.lab = gen_label_rtx ();
1860 if (TARGET_SH2
1861 && offset >= -32764
1862 && offset - get_attr_length (insn) <= 32766)
1864 far = 0;
1865 jump = "mov.w %O0,%1; braf %1";
1867 else
1869 far = 1;
1870 if (flag_pic)
1872 if (TARGET_SH2)
1873 jump = "mov.l %O0,%1; braf %1";
1874 else
1875 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1877 else
1878 jump = "mov.l %O0,%1; jmp @%1";
1880 /* If we have a scratch register available, use it. */
1881 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1882 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1884 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1885 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1886 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1887 output_asm_insn (jump, &this.lab);
1888 if (dbr_sequence_length ())
1889 print_slot (final_sequence);
1890 else
1891 output_asm_insn ("nop", 0);
1893 else
1895 /* Output the delay slot insn first if any. */
1896 if (dbr_sequence_length ())
1897 print_slot (final_sequence);
1899 this.reg = gen_rtx_REG (SImode, 13);
1900 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1901 Fortunately, MACL is fixed and call-clobbered, and we never
1902 need its value across jumps, so save r13 in it instead of in
1903 the stack. */
1904 if (TARGET_SH5)
1905 output_asm_insn ("lds r13, macl", 0);
1906 else
1907 output_asm_insn ("mov.l r13,@-r15", 0);
1908 output_asm_insn (jump, &this.lab);
1909 if (TARGET_SH5)
1910 output_asm_insn ("sts macl, r13", 0);
1911 else
1912 output_asm_insn ("mov.l @r15+,r13", 0);
1914 if (far && flag_pic && TARGET_SH2)
1916 braf_base_lab = gen_label_rtx ();
1917 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1918 CODE_LABEL_NUMBER (braf_base_lab));
1920 if (far)
1921 output_asm_insn (".align 2", 0);
1922 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1923 this.op = op;
1924 if (far && flag_pic)
1926 if (TARGET_SH2)
1927 this.lab = braf_base_lab;
1928 output_asm_insn (".long %O2-%O0", &this.lab);
1930 else
1931 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1932 return "";
1935 /* Local label counter, used for constants in the pool and inside
1936 pattern branches. */
1938 static int lf = 100;
1940 /* Output code for ordinary branches. */
1942 const char *
1943 output_branch (int logic, rtx insn, rtx *operands)
1945 switch (get_attr_length (insn))
1947 case 6:
1948 /* This can happen if filling the delay slot has caused a forward
1949 branch to exceed its range (we could reverse it, but only
1950 when we know we won't overextend other branches; this should
1951 best be handled by relaxation).
1952 It can also happen when other condbranches hoist delay slot insn
1953 from their destination, thus leading to code size increase.
1954 But the branch will still be in the range -4092..+4098 bytes. */
1956 if (! TARGET_RELAX)
1958 int label = lf++;
1959 /* The call to print_slot will clobber the operands. */
1960 rtx op0 = operands[0];
1962 /* If the instruction in the delay slot is annulled (true), then
1963 there is no delay slot where we can put it now. The only safe
1964 place for it is after the label. final will do that by default. */
1966 if (final_sequence
1967 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1968 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1970 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1971 ASSEMBLER_DIALECT ? "/" : ".", label);
1972 print_slot (final_sequence);
1974 else
1975 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1977 output_asm_insn ("bra\t%l0", &op0);
1978 fprintf (asm_out_file, "\tnop\n");
1979 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1981 return "";
1983 /* When relaxing, handle this like a short branch. The linker
1984 will fix it up if it still doesn't fit after relaxation. */
1985 case 2:
1986 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1988 /* These are for SH2e, in which we have to account for the
1989 extra nop because of the hardware bug in annulled branches. */
1990 case 8:
1991 if (! TARGET_RELAX)
1993 int label = lf++;
1995 gcc_assert (!final_sequence
1996 || !(INSN_ANNULLED_BRANCH_P
1997 (XVECEXP (final_sequence, 0, 0))));
1998 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1999 logic ? "f" : "t",
2000 ASSEMBLER_DIALECT ? "/" : ".", label);
2001 fprintf (asm_out_file, "\tnop\n");
2002 output_asm_insn ("bra\t%l0", operands);
2003 fprintf (asm_out_file, "\tnop\n");
2004 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2006 return "";
2008 /* When relaxing, fall through. */
2009 case 4:
2011 char buffer[10];
2013 sprintf (buffer, "b%s%ss\t%%l0",
2014 logic ? "t" : "f",
2015 ASSEMBLER_DIALECT ? "/" : ".");
2016 output_asm_insn (buffer, &operands[0]);
2017 return "nop";
2020 default:
2021 /* There should be no longer branches now - that would
2022 indicate that something has destroyed the branches set
2023 up in machine_dependent_reorg. */
2024 gcc_unreachable ();
2028 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2029 fill in operands 9 as a label to the successor insn.
2030 We try to use jump threading where possible.
2031 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2032 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2033 follow jmp and bt, if the address is in range. */
2034 const char *
2035 output_branchy_insn (enum rtx_code code, const char *template,
2036 rtx insn, rtx *operands)
2038 rtx next_insn = NEXT_INSN (insn);
2040 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2042 rtx src = SET_SRC (PATTERN (next_insn));
2043 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2045 /* Following branch not taken */
2046 operands[9] = gen_label_rtx ();
2047 emit_label_after (operands[9], next_insn);
2048 INSN_ADDRESSES_NEW (operands[9],
2049 INSN_ADDRESSES (INSN_UID (next_insn))
2050 + get_attr_length (next_insn));
2051 return template;
2053 else
2055 int offset = (branch_dest (next_insn)
2056 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2057 if (offset >= -252 && offset <= 258)
2059 if (GET_CODE (src) == IF_THEN_ELSE)
2060 /* branch_true */
2061 src = XEXP (src, 1);
2062 operands[9] = src;
2063 return template;
2067 operands[9] = gen_label_rtx ();
2068 emit_label_after (operands[9], insn);
2069 INSN_ADDRESSES_NEW (operands[9],
2070 INSN_ADDRESSES (INSN_UID (insn))
2071 + get_attr_length (insn));
2072 return template;
2075 const char *
2076 output_ieee_ccmpeq (rtx insn, rtx *operands)
2078 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2079 insn, operands);
2082 /* Output the start of the assembler file. */
2084 static void
2085 sh_file_start (void)
2087 default_file_start ();
2089 #ifdef SYMBIAN
2090 /* Declare the .directive section before it is used. */
2091 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2092 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2093 #endif
2095 if (TARGET_ELF)
2096 /* We need to show the text section with the proper
2097 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2098 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2099 will complain. We can teach GAS specifically about the
2100 default attributes for our choice of text section, but
2101 then we would have to change GAS again if/when we change
2102 the text section name. */
2103 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2104 else
2105 /* Switch to the data section so that the coffsem symbol
2106 isn't in the text section. */
2107 switch_to_section (data_section);
2109 if (TARGET_LITTLE_ENDIAN)
2110 fputs ("\t.little\n", asm_out_file);
2112 if (!TARGET_ELF)
2114 if (TARGET_SHCOMPACT)
2115 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2116 else if (TARGET_SHMEDIA)
2117 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2118 TARGET_SHMEDIA64 ? 64 : 32);
2122 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2124 static bool
2125 unspec_caller_rtx_p (rtx pat)
2127 switch (GET_CODE (pat))
2129 case CONST:
2130 return unspec_caller_rtx_p (XEXP (pat, 0));
2131 case PLUS:
2132 case MINUS:
2133 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2134 return true;
2135 return unspec_caller_rtx_p (XEXP (pat, 1));
2136 case UNSPEC:
2137 if (XINT (pat, 1) == UNSPEC_CALLER)
2138 return true;
2139 default:
2140 break;
2143 return false;
2146 /* Indicate that INSN cannot be duplicated. This is true for insn
2147 that generates a unique label. */
2149 static bool
2150 sh_cannot_copy_insn_p (rtx insn)
2152 rtx pat;
2154 if (!reload_completed || !flag_pic)
2155 return false;
2157 if (GET_CODE (insn) != INSN)
2158 return false;
2159 if (asm_noperands (insn) >= 0)
2160 return false;
2162 pat = PATTERN (insn);
2163 if (GET_CODE (pat) != SET)
2164 return false;
2165 pat = SET_SRC (pat);
2167 if (unspec_caller_rtx_p (pat))
2168 return true;
2170 return false;
2173 /* Actual number of instructions used to make a shift by N. */
2174 static const char ashiftrt_insns[] =
2175 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2177 /* Left shift and logical right shift are the same. */
2178 static const char shift_insns[] =
2179 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2181 /* Individual shift amounts needed to get the above length sequences.
2182 One bit right shifts clobber the T bit, so when possible, put one bit
2183 shifts in the middle of the sequence, so the ends are eligible for
2184 branch delay slots. */
2185 static const short shift_amounts[32][5] = {
2186 {0}, {1}, {2}, {2, 1},
2187 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2188 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2189 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2190 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2191 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2192 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2193 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2195 /* Likewise, but for shift amounts < 16, up to three highmost bits
2196 might be clobbered. This is typically used when combined with some
2197 kind of sign or zero extension. */
2199 static const char ext_shift_insns[] =
2200 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2202 static const short ext_shift_amounts[32][4] = {
2203 {0}, {1}, {2}, {2, 1},
2204 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2205 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2206 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2207 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2208 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2209 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2210 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2212 /* Assuming we have a value that has been sign-extended by at least one bit,
2213 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2214 to shift it by N without data loss, and quicker than by other means? */
2215 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2217 /* This is used in length attributes in sh.md to help compute the length
2218 of arbitrary constant shift instructions. */
2221 shift_insns_rtx (rtx insn)
2223 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2224 int shift_count = INTVAL (XEXP (set_src, 1));
2225 enum rtx_code shift_code = GET_CODE (set_src);
2227 switch (shift_code)
2229 case ASHIFTRT:
2230 return ashiftrt_insns[shift_count];
2231 case LSHIFTRT:
2232 case ASHIFT:
2233 return shift_insns[shift_count];
2234 default:
2235 gcc_unreachable ();
2239 /* Return the cost of a shift. */
2241 static inline int
2242 shiftcosts (rtx x)
2244 int value;
2246 if (TARGET_SHMEDIA)
2247 return 1;
2249 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2251 if (GET_MODE (x) == DImode
2252 && GET_CODE (XEXP (x, 1)) == CONST_INT
2253 && INTVAL (XEXP (x, 1)) == 1)
2254 return 2;
2256 /* Everything else is invalid, because there is no pattern for it. */
2257 return MAX_COST;
2259 /* If shift by a non constant, then this will be expensive. */
2260 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2261 return SH_DYNAMIC_SHIFT_COST;
2263 value = INTVAL (XEXP (x, 1));
2265 /* Otherwise, return the true cost in instructions. */
2266 if (GET_CODE (x) == ASHIFTRT)
2268 int cost = ashiftrt_insns[value];
2269 /* If SH3, then we put the constant in a reg and use shad. */
2270 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2271 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2272 return cost;
2274 else
2275 return shift_insns[value];
2278 /* Return the cost of an AND operation. */
2280 static inline int
2281 andcosts (rtx x)
2283 int i;
2285 /* Anding with a register is a single cycle and instruction. */
2286 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2287 return 1;
2289 i = INTVAL (XEXP (x, 1));
2291 if (TARGET_SHMEDIA)
2293 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2294 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
2295 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
2296 return 1;
2297 else
2298 return 1 + rtx_cost (XEXP (x, 1), AND);
2301 /* These constants are single cycle extu.[bw] instructions. */
2302 if (i == 0xff || i == 0xffff)
2303 return 1;
2304 /* Constants that can be used in an and immediate instruction in a single
2305 cycle, but this requires r0, so make it a little more expensive. */
2306 if (CONST_OK_FOR_K08 (i))
2307 return 2;
2308 /* Constants that can be loaded with a mov immediate and an and.
2309 This case is probably unnecessary. */
2310 if (CONST_OK_FOR_I08 (i))
2311 return 2;
2312 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2313 This case is probably unnecessary. */
2314 return 3;
2317 /* Return the cost of an addition or a subtraction. */
2319 static inline int
2320 addsubcosts (rtx x)
2322 /* Adding a register is a single cycle insn. */
2323 if (GET_CODE (XEXP (x, 1)) == REG
2324 || GET_CODE (XEXP (x, 1)) == SUBREG)
2325 return 1;
2327 /* Likewise for small constants. */
2328 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2329 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2330 return 1;
2332 if (TARGET_SHMEDIA)
2333 switch (GET_CODE (XEXP (x, 1)))
2335 case CONST:
2336 case LABEL_REF:
2337 case SYMBOL_REF:
2338 return TARGET_SHMEDIA64 ? 5 : 3;
2340 case CONST_INT:
2341 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2342 return 2;
2343 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2344 return 3;
2345 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2346 return 4;
2348 /* Fall through. */
2349 default:
2350 return 5;
2353 /* Any other constant requires a 2 cycle pc-relative load plus an
2354 addition. */
2355 return 3;
2358 /* Return the cost of a multiply. */
2359 static inline int
2360 multcosts (rtx x ATTRIBUTE_UNUSED)
2362 if (sh_multcost >= 0)
2363 return sh_multcost;
2364 if (TARGET_SHMEDIA)
2365 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2366 accept constants. Ideally, we would use a cost of one or two and
2367 add the cost of the operand, but disregard the latter when inside loops
2368 and loop invariant code motion is still to follow.
2369 Using a multiply first and splitting it later if it's a loss
2370 doesn't work because of different sign / zero extension semantics
2371 of multiplies vs. shifts. */
2372 return TARGET_SMALLCODE ? 2 : 3;
2374 if (TARGET_SH2)
2376 /* We have a mul insn, so we can never take more than the mul and the
2377 read of the mac reg, but count more because of the latency and extra
2378 reg usage. */
2379 if (TARGET_SMALLCODE)
2380 return 2;
2381 return 3;
2384 /* If we're aiming at small code, then just count the number of
2385 insns in a multiply call sequence. */
2386 if (TARGET_SMALLCODE)
2387 return 5;
2389 /* Otherwise count all the insns in the routine we'd be calling too. */
2390 return 20;
2393 /* Compute a (partial) cost for rtx X. Return true if the complete
2394 cost has been computed, and false if subexpressions should be
2395 scanned. In either case, *TOTAL contains the cost result. */
2397 static bool
2398 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2400 switch (code)
2402 case CONST_INT:
2403 if (TARGET_SHMEDIA)
2405 if (INTVAL (x) == 0)
2406 *total = 0;
2407 else if (outer_code == AND && and_operand ((x), DImode))
2408 *total = 0;
2409 else if ((outer_code == IOR || outer_code == XOR
2410 || outer_code == PLUS)
2411 && CONST_OK_FOR_I10 (INTVAL (x)))
2412 *total = 0;
2413 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2414 *total = COSTS_N_INSNS (outer_code != SET);
2415 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2416 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2417 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2418 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2419 else
2420 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2421 return true;
2423 if (CONST_OK_FOR_I08 (INTVAL (x)))
2424 *total = 0;
2425 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2426 && CONST_OK_FOR_K08 (INTVAL (x)))
2427 *total = 1;
2428 /* prepare_cmp_insn will force costly constants int registers before
2429 the cbrach[sd]i4 patterns can see them, so preserve potentially
2430 interesting ones not covered by I08 above. */
2431 else if (outer_code == COMPARE
2432 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2433 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2434 || INTVAL (x) == 0x7fffffff
2435 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2436 *total = 1;
2437 else
2438 *total = 8;
2439 return true;
2441 case CONST:
2442 case LABEL_REF:
2443 case SYMBOL_REF:
2444 if (TARGET_SHMEDIA64)
2445 *total = COSTS_N_INSNS (4);
2446 else if (TARGET_SHMEDIA32)
2447 *total = COSTS_N_INSNS (2);
2448 else
2449 *total = 5;
2450 return true;
2452 case CONST_DOUBLE:
2453 if (TARGET_SHMEDIA)
2454 *total = COSTS_N_INSNS (4);
2455 /* prepare_cmp_insn will force costly constants int registers before
2456 the cbrachdi4 pattern can see them, so preserve potentially
2457 interesting ones. */
2458 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2459 *total = 1;
2460 else
2461 *total = 10;
2462 return true;
2463 case CONST_VECTOR:
2464 if (x == CONST0_RTX (GET_MODE (x)))
2465 *total = 0;
2466 else if (sh_1el_vec (x, VOIDmode))
2467 *total = outer_code != SET;
2468 if (sh_rep_vec (x, VOIDmode))
2469 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2470 + (outer_code != SET));
2471 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2472 return true;
2474 case PLUS:
2475 case MINUS:
2476 *total = COSTS_N_INSNS (addsubcosts (x));
2477 return true;
2479 case AND:
2480 *total = COSTS_N_INSNS (andcosts (x));
2481 return true;
2483 case MULT:
2484 *total = COSTS_N_INSNS (multcosts (x));
2485 return true;
2487 case ASHIFT:
2488 case ASHIFTRT:
2489 case LSHIFTRT:
2490 *total = COSTS_N_INSNS (shiftcosts (x));
2491 return true;
2493 case DIV:
2494 case UDIV:
2495 case MOD:
2496 case UMOD:
2497 *total = COSTS_N_INSNS (20);
2498 return true;
2500 case PARALLEL:
2501 if (sh_1el_vec (x, VOIDmode))
2502 *total = outer_code != SET;
2503 if (sh_rep_vec (x, VOIDmode))
2504 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2505 + (outer_code != SET));
2506 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2507 return true;
2509 case FLOAT:
2510 case FIX:
2511 *total = 100;
2512 return true;
2514 default:
2515 return false;
2519 /* Compute the cost of an address. For the SH, all valid addresses are
2520 the same cost. Use a slightly higher cost for reg + reg addressing,
2521 since it increases pressure on r0. */
2523 static int
2524 sh_address_cost (rtx X)
2526 return (GET_CODE (X) == PLUS
2527 && ! CONSTANT_P (XEXP (X, 1))
2528 && ! TARGET_SHMEDIA ? 1 : 0);
2531 /* Code to expand a shift. */
2533 void
2534 gen_ashift (int type, int n, rtx reg)
2536 /* Negative values here come from the shift_amounts array. */
2537 if (n < 0)
2539 if (type == ASHIFT)
2540 type = LSHIFTRT;
2541 else
2542 type = ASHIFT;
2543 n = -n;
2546 switch (type)
2548 case ASHIFTRT:
2549 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2550 break;
2551 case LSHIFTRT:
2552 if (n == 1)
2553 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2554 else
2555 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2556 break;
2557 case ASHIFT:
2558 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2559 break;
2563 /* Same for HImode */
2565 void
2566 gen_ashift_hi (int type, int n, rtx reg)
2568 /* Negative values here come from the shift_amounts array. */
2569 if (n < 0)
2571 if (type == ASHIFT)
2572 type = LSHIFTRT;
2573 else
2574 type = ASHIFT;
2575 n = -n;
2578 switch (type)
2580 case ASHIFTRT:
2581 case LSHIFTRT:
2582 /* We don't have HImode right shift operations because using the
2583 ordinary 32 bit shift instructions for that doesn't generate proper
2584 zero/sign extension.
2585 gen_ashift_hi is only called in contexts where we know that the
2586 sign extension works out correctly. */
2588 int offset = 0;
2589 if (GET_CODE (reg) == SUBREG)
2591 offset = SUBREG_BYTE (reg);
2592 reg = SUBREG_REG (reg);
2594 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2595 break;
2597 case ASHIFT:
2598 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2599 break;
2603 /* Output RTL to split a constant shift into its component SH constant
2604 shift instructions. */
2606 void
2607 gen_shifty_op (int code, rtx *operands)
2609 int value = INTVAL (operands[2]);
2610 int max, i;
2612 /* Truncate the shift count in case it is out of bounds. */
2613 value = value & 0x1f;
2615 if (value == 31)
2617 if (code == LSHIFTRT)
2619 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2620 emit_insn (gen_movt (operands[0]));
2621 return;
2623 else if (code == ASHIFT)
2625 /* There is a two instruction sequence for 31 bit left shifts,
2626 but it requires r0. */
2627 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2629 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2630 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2631 return;
2635 else if (value == 0)
2637 /* This can happen even when optimizing, if there were subregs before
2638 reload. Don't output a nop here, as this is never optimized away;
2639 use a no-op move instead. */
2640 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2641 return;
2644 max = shift_insns[value];
2645 for (i = 0; i < max; i++)
2646 gen_ashift (code, shift_amounts[value][i], operands[0]);
2649 /* Same as above, but optimized for values where the topmost bits don't
2650 matter. */
2652 void
2653 gen_shifty_hi_op (int code, rtx *operands)
2655 int value = INTVAL (operands[2]);
2656 int max, i;
2657 void (*gen_fun) (int, int, rtx);
2659 /* This operation is used by and_shl for SImode values with a few
2660 high bits known to be cleared. */
2661 value &= 31;
2662 if (value == 0)
2664 emit_insn (gen_nop ());
2665 return;
2668 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2669 if (code == ASHIFT)
2671 max = ext_shift_insns[value];
2672 for (i = 0; i < max; i++)
2673 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2675 else
2676 /* When shifting right, emit the shifts in reverse order, so that
2677 solitary negative values come first. */
2678 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2679 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2682 /* Output RTL for an arithmetic right shift. */
2684 /* ??? Rewrite to use super-optimizer sequences. */
2687 expand_ashiftrt (rtx *operands)
2689 rtx wrk;
2690 char func[18];
2691 int value;
2693 if (TARGET_SH3)
2695 if (GET_CODE (operands[2]) != CONST_INT)
2697 rtx count = copy_to_mode_reg (SImode, operands[2]);
2698 emit_insn (gen_negsi2 (count, count));
2699 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2700 return 1;
2702 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2703 > 1 + SH_DYNAMIC_SHIFT_COST)
2705 rtx count
2706 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2707 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2708 return 1;
2711 if (GET_CODE (operands[2]) != CONST_INT)
2712 return 0;
2714 value = INTVAL (operands[2]) & 31;
2716 if (value == 31)
2718 /* If we are called from abs expansion, arrange things so that we
2719 we can use a single MT instruction that doesn't clobber the source,
2720 if LICM can hoist out the load of the constant zero. */
2721 if (currently_expanding_to_rtl)
2723 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2724 operands[1]));
2725 emit_insn (gen_mov_neg_si_t (operands[0]));
2726 return 1;
2728 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2729 return 1;
2731 else if (value >= 16 && value <= 19)
2733 wrk = gen_reg_rtx (SImode);
2734 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2735 value -= 16;
2736 while (value--)
2737 gen_ashift (ASHIFTRT, 1, wrk);
2738 emit_move_insn (operands[0], wrk);
2739 return 1;
2741 /* Expand a short sequence inline, longer call a magic routine. */
2742 else if (value <= 5)
2744 wrk = gen_reg_rtx (SImode);
2745 emit_move_insn (wrk, operands[1]);
2746 while (value--)
2747 gen_ashift (ASHIFTRT, 1, wrk);
2748 emit_move_insn (operands[0], wrk);
2749 return 1;
2752 wrk = gen_reg_rtx (Pmode);
2754 /* Load the value into an arg reg and call a helper. */
2755 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2756 sprintf (func, "__ashiftrt_r4_%d", value);
2757 function_symbol (wrk, func, SFUNC_STATIC);
2758 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2759 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2760 return 1;
2764 sh_dynamicalize_shift_p (rtx count)
2766 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2769 /* Try to find a good way to implement the combiner pattern
2770 [(set (match_operand:SI 0 "register_operand" "r")
2771 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2772 (match_operand:SI 2 "const_int_operand" "n"))
2773 (match_operand:SI 3 "const_int_operand" "n"))) .
2774 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2775 return 0 for simple right / left or left/right shift combination.
2776 return 1 for a combination of shifts with zero_extend.
2777 return 2 for a combination of shifts with an AND that needs r0.
2778 return 3 for a combination of shifts with an AND that needs an extra
2779 scratch register, when the three highmost bits of the AND mask are clear.
2780 return 4 for a combination of shifts with an AND that needs an extra
2781 scratch register, when any of the three highmost bits of the AND mask
2782 is set.
2783 If ATTRP is set, store an initial right shift width in ATTRP[0],
2784 and the instruction length in ATTRP[1] . These values are not valid
2785 when returning 0.
2786 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2787 shift_amounts for the last shift value that is to be used before the
2788 sign extend. */
2790 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2792 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2793 int left = INTVAL (left_rtx), right;
2794 int best = 0;
2795 int cost, best_cost = 10000;
2796 int best_right = 0, best_len = 0;
2797 int i;
2798 int can_ext;
2800 if (left < 0 || left > 31)
2801 return 0;
2802 if (GET_CODE (mask_rtx) == CONST_INT)
2803 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2804 else
2805 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2806 /* Can this be expressed as a right shift / left shift pair? */
2807 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2808 right = exact_log2 (lsb);
2809 mask2 = ~(mask + lsb - 1);
2810 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2811 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2812 if (! mask2)
2813 best_cost = shift_insns[right] + shift_insns[right + left];
2814 /* mask has no trailing zeroes <==> ! right */
2815 else if (! right && mask2 == ~(lsb2 - 1))
2817 int late_right = exact_log2 (lsb2);
2818 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2820 /* Try to use zero extend. */
2821 if (mask2 == ~(lsb2 - 1))
2823 int width, first;
2825 for (width = 8; width <= 16; width += 8)
2827 /* Can we zero-extend right away? */
2828 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2830 cost
2831 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2832 if (cost < best_cost)
2834 best = 1;
2835 best_cost = cost;
2836 best_right = right;
2837 best_len = cost;
2838 if (attrp)
2839 attrp[2] = -1;
2841 continue;
2843 /* ??? Could try to put zero extend into initial right shift,
2844 or even shift a bit left before the right shift. */
2845 /* Determine value of first part of left shift, to get to the
2846 zero extend cut-off point. */
2847 first = width - exact_log2 (lsb2) + right;
2848 if (first >= 0 && right + left - first >= 0)
2850 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2851 + ext_shift_insns[right + left - first];
2852 if (cost < best_cost)
2854 best = 1;
2855 best_cost = cost;
2856 best_right = right;
2857 best_len = cost;
2858 if (attrp)
2859 attrp[2] = first;
2864 /* Try to use r0 AND pattern */
2865 for (i = 0; i <= 2; i++)
2867 if (i > right)
2868 break;
2869 if (! CONST_OK_FOR_K08 (mask >> i))
2870 continue;
2871 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2872 if (cost < best_cost)
2874 best = 2;
2875 best_cost = cost;
2876 best_right = i;
2877 best_len = cost - 1;
2880 /* Try to use a scratch register to hold the AND operand. */
2881 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2882 for (i = 0; i <= 2; i++)
2884 if (i > right)
2885 break;
2886 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2887 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2888 if (cost < best_cost)
2890 best = 4 - can_ext;
2891 best_cost = cost;
2892 best_right = i;
2893 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2897 if (attrp)
2899 attrp[0] = best_right;
2900 attrp[1] = best_len;
2902 return best;
2905 /* This is used in length attributes of the unnamed instructions
2906 corresponding to shl_and_kind return values of 1 and 2. */
2908 shl_and_length (rtx insn)
2910 rtx set_src, left_rtx, mask_rtx;
2911 int attributes[3];
2913 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2914 left_rtx = XEXP (XEXP (set_src, 0), 1);
2915 mask_rtx = XEXP (set_src, 1);
2916 shl_and_kind (left_rtx, mask_rtx, attributes);
2917 return attributes[1];
2920 /* This is used in length attribute of the and_shl_scratch instruction. */
2923 shl_and_scr_length (rtx insn)
2925 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2926 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2927 rtx op = XEXP (set_src, 0);
2928 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2929 op = XEXP (XEXP (op, 0), 0);
2930 return len + shift_insns[INTVAL (XEXP (op, 1))];
2933 /* Generate rtl for instructions for which shl_and_kind advised a particular
2934 method of generating them, i.e. returned zero. */
2937 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2939 int attributes[3];
2940 unsigned HOST_WIDE_INT mask;
2941 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2942 int right, total_shift;
2943 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2945 right = attributes[0];
2946 total_shift = INTVAL (left_rtx) + right;
2947 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2948 switch (kind)
2950 default:
2951 return -1;
2952 case 1:
2954 int first = attributes[2];
2955 rtx operands[3];
2957 if (first < 0)
2959 emit_insn ((mask << right) <= 0xff
2960 ? gen_zero_extendqisi2 (dest,
2961 gen_lowpart (QImode, source))
2962 : gen_zero_extendhisi2 (dest,
2963 gen_lowpart (HImode, source)));
2964 source = dest;
2966 if (source != dest)
2967 emit_insn (gen_movsi (dest, source));
2968 operands[0] = dest;
2969 if (right)
2971 operands[2] = GEN_INT (right);
2972 gen_shifty_hi_op (LSHIFTRT, operands);
2974 if (first > 0)
2976 operands[2] = GEN_INT (first);
2977 gen_shifty_hi_op (ASHIFT, operands);
2978 total_shift -= first;
2979 mask <<= first;
2981 if (first >= 0)
2982 emit_insn (mask <= 0xff
2983 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2984 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2985 if (total_shift > 0)
2987 operands[2] = GEN_INT (total_shift);
2988 gen_shifty_hi_op (ASHIFT, operands);
2990 break;
2992 case 4:
2993 shift_gen_fun = gen_shifty_op;
2994 case 3:
2995 /* If the topmost bit that matters is set, set the topmost bits
2996 that don't matter. This way, we might be able to get a shorter
2997 signed constant. */
2998 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2999 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3000 case 2:
3001 /* Don't expand fine-grained when combining, because that will
3002 make the pattern fail. */
3003 if (currently_expanding_to_rtl
3004 || reload_in_progress || reload_completed)
3006 rtx operands[3];
3008 /* Cases 3 and 4 should be handled by this split
3009 only while combining */
3010 gcc_assert (kind <= 2);
3011 if (right)
3013 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3014 source = dest;
3016 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3017 if (total_shift)
3019 operands[0] = dest;
3020 operands[1] = dest;
3021 operands[2] = GEN_INT (total_shift);
3022 shift_gen_fun (ASHIFT, operands);
3024 break;
3026 else
3028 int neg = 0;
3029 if (kind != 4 && total_shift < 16)
3031 neg = -ext_shift_amounts[total_shift][1];
3032 if (neg > 0)
3033 neg -= ext_shift_amounts[total_shift][2];
3034 else
3035 neg = 0;
3037 emit_insn (gen_and_shl_scratch (dest, source,
3038 GEN_INT (right),
3039 GEN_INT (mask),
3040 GEN_INT (total_shift + neg),
3041 GEN_INT (neg)));
3042 emit_insn (gen_movsi (dest, dest));
3043 break;
3046 return 0;
3049 /* Try to find a good way to implement the combiner pattern
3050 [(set (match_operand:SI 0 "register_operand" "=r")
3051 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3052 (match_operand:SI 2 "const_int_operand" "n")
3053 (match_operand:SI 3 "const_int_operand" "n")
3054 (const_int 0)))
3055 (clobber (reg:SI T_REG))]
3056 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3057 return 0 for simple left / right shift combination.
3058 return 1 for left shift / 8 bit sign extend / left shift.
3059 return 2 for left shift / 16 bit sign extend / left shift.
3060 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3061 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3062 return 5 for left shift / 16 bit sign extend / right shift
3063 return 6 for < 8 bit sign extend / left shift.
3064 return 7 for < 8 bit sign extend / left shift / single right shift.
3065 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3068 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3070 int left, size, insize, ext;
3071 int cost = 0, best_cost;
3072 int kind;
3074 left = INTVAL (left_rtx);
3075 size = INTVAL (size_rtx);
3076 insize = size - left;
3077 gcc_assert (insize > 0);
3078 /* Default to left / right shift. */
3079 kind = 0;
3080 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3081 if (size <= 16)
3083 /* 16 bit shift / sign extend / 16 bit shift */
3084 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3085 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3086 below, by alternative 3 or something even better. */
3087 if (cost < best_cost)
3089 kind = 5;
3090 best_cost = cost;
3093 /* Try a plain sign extend between two shifts. */
3094 for (ext = 16; ext >= insize; ext -= 8)
3096 if (ext <= size)
3098 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3099 if (cost < best_cost)
3101 kind = ext / (unsigned) 8;
3102 best_cost = cost;
3105 /* Check if we can do a sloppy shift with a final signed shift
3106 restoring the sign. */
3107 if (EXT_SHIFT_SIGNED (size - ext))
3108 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3109 /* If not, maybe it's still cheaper to do the second shift sloppy,
3110 and do a final sign extend? */
3111 else if (size <= 16)
3112 cost = ext_shift_insns[ext - insize] + 1
3113 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3114 else
3115 continue;
3116 if (cost < best_cost)
3118 kind = ext / (unsigned) 8 + 2;
3119 best_cost = cost;
3122 /* Check if we can sign extend in r0 */
3123 if (insize < 8)
3125 cost = 3 + shift_insns[left];
3126 if (cost < best_cost)
3128 kind = 6;
3129 best_cost = cost;
3131 /* Try the same with a final signed shift. */
3132 if (left < 31)
3134 cost = 3 + ext_shift_insns[left + 1] + 1;
3135 if (cost < best_cost)
3137 kind = 7;
3138 best_cost = cost;
3142 if (TARGET_SH3)
3144 /* Try to use a dynamic shift. */
3145 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3146 if (cost < best_cost)
3148 kind = 0;
3149 best_cost = cost;
3152 if (costp)
3153 *costp = cost;
3154 return kind;
3157 /* Function to be used in the length attribute of the instructions
3158 implementing this pattern. */
3161 shl_sext_length (rtx insn)
3163 rtx set_src, left_rtx, size_rtx;
3164 int cost;
3166 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3167 left_rtx = XEXP (XEXP (set_src, 0), 1);
3168 size_rtx = XEXP (set_src, 1);
3169 shl_sext_kind (left_rtx, size_rtx, &cost);
3170 return cost;
3173 /* Generate rtl for this pattern */
3176 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3178 int kind;
3179 int left, size, insize, cost;
3180 rtx operands[3];
3182 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3183 left = INTVAL (left_rtx);
3184 size = INTVAL (size_rtx);
3185 insize = size - left;
3186 switch (kind)
3188 case 1:
3189 case 2:
3190 case 3:
3191 case 4:
3193 int ext = kind & 1 ? 8 : 16;
3194 int shift2 = size - ext;
3196 /* Don't expand fine-grained when combining, because that will
3197 make the pattern fail. */
3198 if (! currently_expanding_to_rtl
3199 && ! reload_in_progress && ! reload_completed)
3201 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3202 emit_insn (gen_movsi (dest, source));
3203 break;
3205 if (dest != source)
3206 emit_insn (gen_movsi (dest, source));
3207 operands[0] = dest;
3208 if (ext - insize)
3210 operands[2] = GEN_INT (ext - insize);
3211 gen_shifty_hi_op (ASHIFT, operands);
3213 emit_insn (kind & 1
3214 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3215 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3216 if (kind <= 2)
3218 if (shift2)
3220 operands[2] = GEN_INT (shift2);
3221 gen_shifty_op (ASHIFT, operands);
3224 else
3226 if (shift2 > 0)
3228 if (EXT_SHIFT_SIGNED (shift2))
3230 operands[2] = GEN_INT (shift2 + 1);
3231 gen_shifty_op (ASHIFT, operands);
3232 operands[2] = const1_rtx;
3233 gen_shifty_op (ASHIFTRT, operands);
3234 break;
3236 operands[2] = GEN_INT (shift2);
3237 gen_shifty_hi_op (ASHIFT, operands);
3239 else if (shift2)
3241 operands[2] = GEN_INT (-shift2);
3242 gen_shifty_hi_op (LSHIFTRT, operands);
3244 emit_insn (size <= 8
3245 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3246 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3248 break;
3250 case 5:
3252 int i = 16 - size;
3253 if (! currently_expanding_to_rtl
3254 && ! reload_in_progress && ! reload_completed)
3255 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3256 else
3258 operands[0] = dest;
3259 operands[2] = GEN_INT (16 - insize);
3260 gen_shifty_hi_op (ASHIFT, operands);
3261 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3263 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3264 while (--i >= 0)
3265 gen_ashift (ASHIFTRT, 1, dest);
3266 break;
3268 case 6:
3269 case 7:
3270 /* Don't expand fine-grained when combining, because that will
3271 make the pattern fail. */
3272 if (! currently_expanding_to_rtl
3273 && ! reload_in_progress && ! reload_completed)
3275 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3276 emit_insn (gen_movsi (dest, source));
3277 break;
3279 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3280 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3281 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3282 operands[0] = dest;
3283 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3284 gen_shifty_op (ASHIFT, operands);
3285 if (kind == 7)
3286 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3287 break;
3288 default:
3289 return -1;
3291 return 0;
3294 /* Prefix a symbol_ref name with "datalabel". */
3297 gen_datalabel_ref (rtx sym)
3299 const char *str;
3301 if (GET_CODE (sym) == LABEL_REF)
3302 return gen_rtx_CONST (GET_MODE (sym),
3303 gen_rtx_UNSPEC (GET_MODE (sym),
3304 gen_rtvec (1, sym),
3305 UNSPEC_DATALABEL));
3307 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3309 str = XSTR (sym, 0);
3310 /* Share all SYMBOL_REF strings with the same value - that is important
3311 for cse. */
3312 str = IDENTIFIER_POINTER (get_identifier (str));
3313 XSTR (sym, 0) = str;
3315 return sym;
3319 static alloc_pool label_ref_list_pool;
3321 typedef struct label_ref_list_d
3323 rtx label;
3324 struct label_ref_list_d *next;
3325 } *label_ref_list_t;
3327 /* The SH cannot load a large constant into a register, constants have to
3328 come from a pc relative load. The reference of a pc relative load
3329 instruction must be less than 1k in front of the instruction. This
3330 means that we often have to dump a constant inside a function, and
3331 generate code to branch around it.
3333 It is important to minimize this, since the branches will slow things
3334 down and make things bigger.
3336 Worst case code looks like:
3338 mov.l L1,rn
3339 bra L2
3341 align
3342 L1: .long value
3346 mov.l L3,rn
3347 bra L4
3349 align
3350 L3: .long value
3354 We fix this by performing a scan before scheduling, which notices which
3355 instructions need to have their operands fetched from the constant table
3356 and builds the table.
3358 The algorithm is:
3360 scan, find an instruction which needs a pcrel move. Look forward, find the
3361 last barrier which is within MAX_COUNT bytes of the requirement.
3362 If there isn't one, make one. Process all the instructions between
3363 the find and the barrier.
3365 In the above example, we can tell that L3 is within 1k of L1, so
3366 the first move can be shrunk from the 3 insn+constant sequence into
3367 just 1 insn, and the constant moved to L3 to make:
3369 mov.l L1,rn
3371 mov.l L3,rn
3372 bra L4
3374 align
3375 L3:.long value
3376 L4:.long value
3378 Then the second move becomes the target for the shortening process. */
3380 typedef struct
3382 rtx value; /* Value in table. */
3383 rtx label; /* Label of value. */
3384 label_ref_list_t wend; /* End of window. */
3385 enum machine_mode mode; /* Mode of value. */
3387 /* True if this constant is accessed as part of a post-increment
3388 sequence. Note that HImode constants are never accessed in this way. */
3389 bool part_of_sequence_p;
3390 } pool_node;
3392 /* The maximum number of constants that can fit into one pool, since
3393 constants in the range 0..510 are at least 2 bytes long, and in the
3394 range from there to 1018 at least 4 bytes. */
3396 #define MAX_POOL_SIZE 372
3397 static pool_node pool_vector[MAX_POOL_SIZE];
3398 static int pool_size;
3399 static rtx pool_window_label;
3400 static int pool_window_last;
3402 static int max_labelno_before_reorg;
3404 /* ??? If we need a constant in HImode which is the truncated value of a
3405 constant we need in SImode, we could combine the two entries thus saving
3406 two bytes. Is this common enough to be worth the effort of implementing
3407 it? */
3409 /* ??? This stuff should be done at the same time that we shorten branches.
3410 As it is now, we must assume that all branches are the maximum size, and
3411 this causes us to almost always output constant pools sooner than
3412 necessary. */
3414 /* Add a constant to the pool and return its label. */
3416 static rtx
3417 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3419 int i;
3420 rtx lab, new;
3421 label_ref_list_t ref, newref;
3423 /* First see if we've already got it. */
3424 for (i = 0; i < pool_size; i++)
3426 if (x->code == pool_vector[i].value->code
3427 && mode == pool_vector[i].mode)
3429 if (x->code == CODE_LABEL)
3431 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3432 continue;
3434 if (rtx_equal_p (x, pool_vector[i].value))
3436 lab = new = 0;
3437 if (! last_value
3438 || ! i
3439 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3441 new = gen_label_rtx ();
3442 LABEL_REFS (new) = pool_vector[i].label;
3443 pool_vector[i].label = lab = new;
3445 if (lab && pool_window_label)
3447 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3448 newref->label = pool_window_label;
3449 ref = pool_vector[pool_window_last].wend;
3450 newref->next = ref;
3451 pool_vector[pool_window_last].wend = newref;
3453 if (new)
3454 pool_window_label = new;
3455 pool_window_last = i;
3456 return lab;
3461 /* Need a new one. */
3462 pool_vector[pool_size].value = x;
3463 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3465 lab = 0;
3466 pool_vector[pool_size - 1].part_of_sequence_p = true;
3468 else
3469 lab = gen_label_rtx ();
3470 pool_vector[pool_size].mode = mode;
3471 pool_vector[pool_size].label = lab;
3472 pool_vector[pool_size].wend = NULL;
3473 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3474 if (lab && pool_window_label)
3476 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3477 newref->label = pool_window_label;
3478 ref = pool_vector[pool_window_last].wend;
3479 newref->next = ref;
3480 pool_vector[pool_window_last].wend = newref;
3482 if (lab)
3483 pool_window_label = lab;
3484 pool_window_last = pool_size;
3485 pool_size++;
3486 return lab;
3489 /* Output the literal table. START, if nonzero, is the first instruction
3490 this table is needed for, and also indicates that there is at least one
3491 casesi_worker_2 instruction; We have to emit the operand3 labels from
3492 these insns at a 4-byte aligned position. BARRIER is the barrier
3493 after which we are to place the table. */
3495 static void
3496 dump_table (rtx start, rtx barrier)
3498 rtx scan = barrier;
3499 int i;
3500 int need_align = 1;
3501 rtx lab;
3502 label_ref_list_t ref;
3503 int have_df = 0;
3505 /* Do two passes, first time dump out the HI sized constants. */
3507 for (i = 0; i < pool_size; i++)
3509 pool_node *p = &pool_vector[i];
3511 if (p->mode == HImode)
3513 if (need_align)
3515 scan = emit_insn_after (gen_align_2 (), scan);
3516 need_align = 0;
3518 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3519 scan = emit_label_after (lab, scan);
3520 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3521 scan);
3522 for (ref = p->wend; ref; ref = ref->next)
3524 lab = ref->label;
3525 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3528 else if (p->mode == DFmode)
3529 have_df = 1;
3532 need_align = 1;
3534 if (start)
3536 scan = emit_insn_after (gen_align_4 (), scan);
3537 need_align = 0;
3538 for (; start != barrier; start = NEXT_INSN (start))
3539 if (GET_CODE (start) == INSN
3540 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3542 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3543 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3545 scan = emit_label_after (lab, scan);
3548 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3550 rtx align_insn = NULL_RTX;
3552 scan = emit_label_after (gen_label_rtx (), scan);
3553 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3554 need_align = 0;
3556 for (i = 0; i < pool_size; i++)
3558 pool_node *p = &pool_vector[i];
3560 switch (p->mode)
3562 case HImode:
3563 break;
3564 case SImode:
3565 case SFmode:
3566 if (align_insn && !p->part_of_sequence_p)
3568 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3569 emit_label_before (lab, align_insn);
3570 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3571 align_insn);
3572 for (ref = p->wend; ref; ref = ref->next)
3574 lab = ref->label;
3575 emit_insn_before (gen_consttable_window_end (lab),
3576 align_insn);
3578 delete_insn (align_insn);
3579 align_insn = NULL_RTX;
3580 continue;
3582 else
3584 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3585 scan = emit_label_after (lab, scan);
3586 scan = emit_insn_after (gen_consttable_4 (p->value,
3587 const0_rtx), scan);
3588 need_align = ! need_align;
3590 break;
3591 case DFmode:
3592 if (need_align)
3594 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3595 align_insn = scan;
3596 need_align = 0;
3598 case DImode:
3599 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3600 scan = emit_label_after (lab, scan);
3601 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3602 scan);
3603 break;
3604 default:
3605 gcc_unreachable ();
3608 if (p->mode != HImode)
3610 for (ref = p->wend; ref; ref = ref->next)
3612 lab = ref->label;
3613 scan = emit_insn_after (gen_consttable_window_end (lab),
3614 scan);
3619 pool_size = 0;
3622 for (i = 0; i < pool_size; i++)
3624 pool_node *p = &pool_vector[i];
3626 switch (p->mode)
3628 case HImode:
3629 break;
3630 case SImode:
3631 case SFmode:
3632 if (need_align)
3634 need_align = 0;
3635 scan = emit_label_after (gen_label_rtx (), scan);
3636 scan = emit_insn_after (gen_align_4 (), scan);
3638 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3639 scan = emit_label_after (lab, scan);
3640 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3641 scan);
3642 break;
3643 case DFmode:
3644 case DImode:
3645 if (need_align)
3647 need_align = 0;
3648 scan = emit_label_after (gen_label_rtx (), scan);
3649 scan = emit_insn_after (gen_align_4 (), scan);
3651 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3652 scan = emit_label_after (lab, scan);
3653 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3654 scan);
3655 break;
3656 default:
3657 gcc_unreachable ();
3660 if (p->mode != HImode)
3662 for (ref = p->wend; ref; ref = ref->next)
3664 lab = ref->label;
3665 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3670 scan = emit_insn_after (gen_consttable_end (), scan);
3671 scan = emit_barrier_after (scan);
3672 pool_size = 0;
3673 pool_window_label = NULL_RTX;
3674 pool_window_last = 0;
3677 /* Return nonzero if constant would be an ok source for a
3678 mov.w instead of a mov.l. */
3680 static int
3681 hi_const (rtx src)
3683 return (GET_CODE (src) == CONST_INT
3684 && INTVAL (src) >= -32768
3685 && INTVAL (src) <= 32767);
3688 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3690 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3692 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3693 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3694 need to fix it if the input value is CONST_OK_FOR_I08. */
3696 static int
3697 broken_move (rtx insn)
3699 if (GET_CODE (insn) == INSN)
3701 rtx pat = PATTERN (insn);
3702 if (GET_CODE (pat) == PARALLEL)
3703 pat = XVECEXP (pat, 0, 0);
3704 if (GET_CODE (pat) == SET
3705 /* We can load any 8 bit value if we don't care what the high
3706 order bits end up as. */
3707 && GET_MODE (SET_DEST (pat)) != QImode
3708 && (CONSTANT_P (SET_SRC (pat))
3709 /* Match mova_const. */
3710 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3711 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3712 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3713 && ! (TARGET_SH2E
3714 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3715 && (fp_zero_operand (SET_SRC (pat))
3716 || fp_one_operand (SET_SRC (pat)))
3717 /* ??? If this is a -m4 or -m4-single compilation, in general
3718 we don't know the current setting of fpscr, so disable fldi.
3719 There is an exception if this was a register-register move
3720 before reload - and hence it was ascertained that we have
3721 single precision setting - and in a post-reload optimization
3722 we changed this to do a constant load. In that case
3723 we don't have an r0 clobber, hence we must use fldi. */
3724 && (! TARGET_SH4 || TARGET_FMOVD
3725 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3726 == SCRATCH))
3727 && GET_CODE (SET_DEST (pat)) == REG
3728 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3729 && ! (TARGET_SH2A
3730 && GET_MODE (SET_DEST (pat)) == SImode
3731 && GET_CODE (SET_SRC (pat)) == CONST_INT
3732 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3733 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3734 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3735 return 1;
3738 return 0;
3741 static int
3742 mova_p (rtx insn)
3744 return (GET_CODE (insn) == INSN
3745 && GET_CODE (PATTERN (insn)) == SET
3746 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3747 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3748 /* Don't match mova_const. */
3749 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3752 /* Fix up a mova from a switch that went out of range. */
3753 static void
3754 fixup_mova (rtx mova)
3756 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3757 if (! flag_pic)
3759 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3760 INSN_CODE (mova) = -1;
3762 else
3764 rtx worker = mova;
3765 rtx lab = gen_label_rtx ();
3766 rtx wpat, wpat0, wpat1, wsrc, diff;
3770 worker = NEXT_INSN (worker);
3771 gcc_assert (worker
3772 && GET_CODE (worker) != CODE_LABEL
3773 && GET_CODE (worker) != JUMP_INSN);
3774 } while (GET_CODE (worker) == NOTE
3775 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3776 wpat = PATTERN (worker);
3777 wpat0 = XVECEXP (wpat, 0, 0);
3778 wpat1 = XVECEXP (wpat, 0, 1);
3779 wsrc = SET_SRC (wpat0);
3780 PATTERN (worker) = (gen_casesi_worker_2
3781 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3782 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3783 XEXP (wpat1, 0)));
3784 INSN_CODE (worker) = -1;
3785 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3786 gen_rtx_LABEL_REF (Pmode, lab));
3787 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3788 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3789 INSN_CODE (mova) = -1;
3793 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3794 *num_mova, and check if the new mova is not nested within the first one.
3795 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3796 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3797 static int
3798 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3800 int n_addr = 0; /* Initialization to shut up spurious warning. */
3801 int f_target, n_target = 0; /* Likewise. */
3803 if (optimize)
3805 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3806 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3807 if (n_addr > n_target || n_addr + 1022 < n_target)
3809 /* Change the mova into a load.
3810 broken_move will then return true for it. */
3811 fixup_mova (new_mova);
3812 return 1;
3815 if (!(*num_mova)++)
3817 *first_mova = new_mova;
3818 return 2;
3820 if (!optimize
3821 || ((f_target
3822 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3823 >= n_target))
3824 return -1;
3826 (*num_mova)--;
3827 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3828 > n_target - n_addr)
3830 fixup_mova (*first_mova);
3831 return 0;
3833 else
3835 fixup_mova (new_mova);
3836 return 1;
3840 /* Find the last barrier from insn FROM which is close enough to hold the
3841 constant pool. If we can't find one, then create one near the end of
3842 the range. */
3844 static rtx
3845 find_barrier (int num_mova, rtx mova, rtx from)
3847 int count_si = 0;
3848 int count_hi = 0;
3849 int found_hi = 0;
3850 int found_si = 0;
3851 int found_di = 0;
3852 int hi_align = 2;
3853 int si_align = 2;
3854 int leading_mova = num_mova;
3855 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3856 int si_limit;
3857 int hi_limit;
3859 /* For HImode: range is 510, add 4 because pc counts from address of
3860 second instruction after this one, subtract 2 for the jump instruction
3861 that we may need to emit before the table, subtract 2 for the instruction
3862 that fills the jump delay slot (in very rare cases, reorg will take an
3863 instruction from after the constant pool or will leave the delay slot
3864 empty). This gives 510.
3865 For SImode: range is 1020, add 4 because pc counts from address of
3866 second instruction after this one, subtract 2 in case pc is 2 byte
3867 aligned, subtract 2 for the jump instruction that we may need to emit
3868 before the table, subtract 2 for the instruction that fills the jump
3869 delay slot. This gives 1018. */
3871 /* The branch will always be shortened now that the reference address for
3872 forward branches is the successor address, thus we need no longer make
3873 adjustments to the [sh]i_limit for -O0. */
3875 si_limit = 1018;
3876 hi_limit = 510;
3878 while (from && count_si < si_limit && count_hi < hi_limit)
3880 int inc = get_attr_length (from);
3881 int new_align = 1;
3883 /* If this is a label that existed at the time of the compute_alignments
3884 call, determine the alignment. N.B. When find_barrier recurses for
3885 an out-of-reach mova, we might see labels at the start of previously
3886 inserted constant tables. */
3887 if (GET_CODE (from) == CODE_LABEL
3888 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3890 if (optimize)
3891 new_align = 1 << label_to_alignment (from);
3892 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3893 new_align = 1 << barrier_align (from);
3894 else
3895 new_align = 1;
3896 inc = 0;
3898 /* In case we are scanning a constant table because of recursion, check
3899 for explicit alignments. If the table is long, we might be forced
3900 to emit the new table in front of it; the length of the alignment
3901 might be the last straw. */
3902 else if (GET_CODE (from) == INSN
3903 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3904 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3905 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3906 /* When we find the end of a constant table, paste the new constant
3907 at the end. That is better than putting it in front because
3908 this way, we don't need extra alignment for adding a 4-byte-aligned
3909 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3910 else if (GET_CODE (from) == INSN
3911 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3912 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3913 return from;
3915 if (GET_CODE (from) == BARRIER)
3918 found_barrier = from;
3920 /* If we are at the end of the function, or in front of an alignment
3921 instruction, we need not insert an extra alignment. We prefer
3922 this kind of barrier. */
3923 if (barrier_align (from) > 2)
3924 good_barrier = from;
3927 if (broken_move (from))
3929 rtx pat, src, dst;
3930 enum machine_mode mode;
3932 pat = PATTERN (from);
3933 if (GET_CODE (pat) == PARALLEL)
3934 pat = XVECEXP (pat, 0, 0);
3935 src = SET_SRC (pat);
3936 dst = SET_DEST (pat);
3937 mode = GET_MODE (dst);
3939 /* We must explicitly check the mode, because sometimes the
3940 front end will generate code to load unsigned constants into
3941 HImode targets without properly sign extending them. */
3942 if (mode == HImode
3943 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3945 found_hi += 2;
3946 /* We put the short constants before the long constants, so
3947 we must count the length of short constants in the range
3948 for the long constants. */
3949 /* ??? This isn't optimal, but is easy to do. */
3950 si_limit -= 2;
3952 else
3954 /* We dump DF/DI constants before SF/SI ones, because
3955 the limit is the same, but the alignment requirements
3956 are higher. We may waste up to 4 additional bytes
3957 for alignment, and the DF/DI constant may have
3958 another SF/SI constant placed before it. */
3959 if (TARGET_SHCOMPACT
3960 && ! found_di
3961 && (mode == DFmode || mode == DImode))
3963 found_di = 1;
3964 si_limit -= 8;
3966 while (si_align > 2 && found_si + si_align - 2 > count_si)
3967 si_align >>= 1;
3968 if (found_si > count_si)
3969 count_si = found_si;
3970 found_si += GET_MODE_SIZE (mode);
3971 if (num_mova)
3972 si_limit -= GET_MODE_SIZE (mode);
3976 if (mova_p (from))
3978 switch (untangle_mova (&num_mova, &mova, from))
3980 case 0: return find_barrier (0, 0, mova);
3981 case 2:
3983 leading_mova = 0;
3984 barrier_before_mova
3985 = good_barrier ? good_barrier : found_barrier;
3987 default: break;
3989 if (found_si > count_si)
3990 count_si = found_si;
3992 else if (GET_CODE (from) == JUMP_INSN
3993 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3994 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3996 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3997 || (num_mova
3998 && (prev_nonnote_insn (from)
3999 == XEXP (MOVA_LABELREF (mova), 0))))
4000 num_mova--;
4001 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4003 /* We have just passed the barrier in front of the
4004 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4005 the ADDR_DIFF_VEC is accessed as data, just like our pool
4006 constants, this is a good opportunity to accommodate what
4007 we have gathered so far.
4008 If we waited any longer, we could end up at a barrier in
4009 front of code, which gives worse cache usage for separated
4010 instruction / data caches. */
4011 good_barrier = found_barrier;
4012 break;
4014 else
4016 rtx body = PATTERN (from);
4017 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4020 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4021 else if (GET_CODE (from) == JUMP_INSN
4022 && ! TARGET_SH2
4023 && ! TARGET_SMALLCODE)
4024 new_align = 4;
4026 if (found_si)
4028 count_si += inc;
4029 if (new_align > si_align)
4031 si_limit -= (count_si - 1) & (new_align - si_align);
4032 si_align = new_align;
4034 count_si = (count_si + new_align - 1) & -new_align;
4036 if (found_hi)
4038 count_hi += inc;
4039 if (new_align > hi_align)
4041 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4042 hi_align = new_align;
4044 count_hi = (count_hi + new_align - 1) & -new_align;
4046 from = NEXT_INSN (from);
4049 if (num_mova)
4051 if (leading_mova)
4053 /* Try as we might, the leading mova is out of range. Change
4054 it into a load (which will become a pcload) and retry. */
4055 fixup_mova (mova);
4056 return find_barrier (0, 0, mova);
4058 else
4060 /* Insert the constant pool table before the mova instruction,
4061 to prevent the mova label reference from going out of range. */
4062 from = mova;
4063 good_barrier = found_barrier = barrier_before_mova;
4067 if (found_barrier)
4069 if (good_barrier && next_real_insn (found_barrier))
4070 found_barrier = good_barrier;
4072 else
4074 /* We didn't find a barrier in time to dump our stuff,
4075 so we'll make one. */
4076 rtx label = gen_label_rtx ();
4078 /* If we exceeded the range, then we must back up over the last
4079 instruction we looked at. Otherwise, we just need to undo the
4080 NEXT_INSN at the end of the loop. */
4081 if (count_hi > hi_limit || count_si > si_limit)
4082 from = PREV_INSN (PREV_INSN (from));
4083 else
4084 from = PREV_INSN (from);
4086 /* Walk back to be just before any jump or label.
4087 Putting it before a label reduces the number of times the branch
4088 around the constant pool table will be hit. Putting it before
4089 a jump makes it more likely that the bra delay slot will be
4090 filled. */
4091 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4092 || GET_CODE (from) == CODE_LABEL)
4093 from = PREV_INSN (from);
4095 from = emit_jump_insn_after (gen_jump (label), from);
4096 JUMP_LABEL (from) = label;
4097 LABEL_NUSES (label) = 1;
4098 found_barrier = emit_barrier_after (from);
4099 emit_label_after (label, found_barrier);
4102 return found_barrier;
4105 /* If the instruction INSN is implemented by a special function, and we can
4106 positively find the register that is used to call the sfunc, and this
4107 register is not used anywhere else in this instruction - except as the
4108 destination of a set, return this register; else, return 0. */
4110 sfunc_uses_reg (rtx insn)
4112 int i;
4113 rtx pattern, part, reg_part, reg;
4115 if (GET_CODE (insn) != INSN)
4116 return 0;
4117 pattern = PATTERN (insn);
4118 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4119 return 0;
4121 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4123 part = XVECEXP (pattern, 0, i);
4124 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4125 reg_part = part;
4127 if (! reg_part)
4128 return 0;
4129 reg = XEXP (reg_part, 0);
4130 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4132 part = XVECEXP (pattern, 0, i);
4133 if (part == reg_part || GET_CODE (part) == CLOBBER)
4134 continue;
4135 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4136 && GET_CODE (SET_DEST (part)) == REG)
4137 ? SET_SRC (part) : part)))
4138 return 0;
4140 return reg;
4143 /* See if the only way in which INSN uses REG is by calling it, or by
4144 setting it while calling it. Set *SET to a SET rtx if the register
4145 is set by INSN. */
4147 static int
4148 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4150 rtx pattern, reg2;
4152 *set = NULL_RTX;
4154 reg2 = sfunc_uses_reg (insn);
4155 if (reg2 && REGNO (reg2) == REGNO (reg))
4157 pattern = single_set (insn);
4158 if (pattern
4159 && GET_CODE (SET_DEST (pattern)) == REG
4160 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4161 *set = pattern;
4162 return 0;
4164 if (GET_CODE (insn) != CALL_INSN)
4166 /* We don't use rtx_equal_p because we don't care if the mode is
4167 different. */
4168 pattern = single_set (insn);
4169 if (pattern
4170 && GET_CODE (SET_DEST (pattern)) == REG
4171 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4173 rtx par, part;
4174 int i;
4176 *set = pattern;
4177 par = PATTERN (insn);
4178 if (GET_CODE (par) == PARALLEL)
4179 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4181 part = XVECEXP (par, 0, i);
4182 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4183 return 1;
4185 return reg_mentioned_p (reg, SET_SRC (pattern));
4188 return 1;
4191 pattern = PATTERN (insn);
4193 if (GET_CODE (pattern) == PARALLEL)
4195 int i;
4197 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4198 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4199 return 1;
4200 pattern = XVECEXP (pattern, 0, 0);
4203 if (GET_CODE (pattern) == SET)
4205 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4207 /* We don't use rtx_equal_p, because we don't care if the
4208 mode is different. */
4209 if (GET_CODE (SET_DEST (pattern)) != REG
4210 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4211 return 1;
4213 *set = pattern;
4216 pattern = SET_SRC (pattern);
4219 if (GET_CODE (pattern) != CALL
4220 || GET_CODE (XEXP (pattern, 0)) != MEM
4221 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4222 return 1;
4224 return 0;
4227 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4228 general registers. Bits 0..15 mean that the respective registers
4229 are used as inputs in the instruction. Bits 16..31 mean that the
4230 registers 0..15, respectively, are used as outputs, or are clobbered.
4231 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4233 regs_used (rtx x, int is_dest)
4235 enum rtx_code code;
4236 const char *fmt;
4237 int i, used = 0;
4239 if (! x)
4240 return used;
4241 code = GET_CODE (x);
4242 switch (code)
4244 case REG:
4245 if (REGNO (x) < 16)
4246 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4247 << (REGNO (x) + is_dest));
4248 return 0;
4249 case SUBREG:
4251 rtx y = SUBREG_REG (x);
4253 if (GET_CODE (y) != REG)
4254 break;
4255 if (REGNO (y) < 16)
4256 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4257 << (REGNO (y) +
4258 subreg_regno_offset (REGNO (y),
4259 GET_MODE (y),
4260 SUBREG_BYTE (x),
4261 GET_MODE (x)) + is_dest));
4262 return 0;
4264 case SET:
4265 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4266 case RETURN:
4267 /* If there was a return value, it must have been indicated with USE. */
4268 return 0x00ffff00;
4269 case CLOBBER:
4270 is_dest = 1;
4271 break;
4272 case MEM:
4273 is_dest = 0;
4274 break;
4275 case CALL:
4276 used |= 0x00ff00f0;
4277 break;
4278 default:
4279 break;
4282 fmt = GET_RTX_FORMAT (code);
4284 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4286 if (fmt[i] == 'E')
4288 register int j;
4289 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4290 used |= regs_used (XVECEXP (x, i, j), is_dest);
4292 else if (fmt[i] == 'e')
4293 used |= regs_used (XEXP (x, i), is_dest);
4295 return used;
4298 /* Create an instruction that prevents redirection of a conditional branch
4299 to the destination of the JUMP with address ADDR.
4300 If the branch needs to be implemented as an indirect jump, try to find
4301 a scratch register for it.
4302 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4303 If any preceding insn that doesn't fit into a delay slot is good enough,
4304 pass 1. Pass 2 if a definite blocking insn is needed.
4305 -1 is used internally to avoid deep recursion.
4306 If a blocking instruction is made or recognized, return it. */
4308 static rtx
4309 gen_block_redirect (rtx jump, int addr, int need_block)
4311 int dead = 0;
4312 rtx prev = prev_nonnote_insn (jump);
4313 rtx dest;
4315 /* First, check if we already have an instruction that satisfies our need. */
4316 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4318 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4319 return prev;
4320 if (GET_CODE (PATTERN (prev)) == USE
4321 || GET_CODE (PATTERN (prev)) == CLOBBER
4322 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4323 prev = jump;
4324 else if ((need_block &= ~1) < 0)
4325 return prev;
4326 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4327 need_block = 0;
4329 if (GET_CODE (PATTERN (jump)) == RETURN)
4331 if (! need_block)
4332 return prev;
4333 /* Reorg even does nasty things with return insns that cause branches
4334 to go out of range - see find_end_label and callers. */
4335 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4337 /* We can't use JUMP_LABEL here because it might be undefined
4338 when not optimizing. */
4339 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4340 /* If the branch is out of range, try to find a scratch register for it. */
4341 if (optimize
4342 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4343 > 4092 + 4098))
4345 rtx scan;
4346 /* Don't look for the stack pointer as a scratch register,
4347 it would cause trouble if an interrupt occurred. */
4348 unsigned try = 0x7fff, used;
4349 int jump_left = flag_expensive_optimizations + 1;
4351 /* It is likely that the most recent eligible instruction is wanted for
4352 the delay slot. Therefore, find out which registers it uses, and
4353 try to avoid using them. */
4355 for (scan = jump; (scan = PREV_INSN (scan)); )
4357 enum rtx_code code;
4359 if (INSN_DELETED_P (scan))
4360 continue;
4361 code = GET_CODE (scan);
4362 if (code == CODE_LABEL || code == JUMP_INSN)
4363 break;
4364 if (code == INSN
4365 && GET_CODE (PATTERN (scan)) != USE
4366 && GET_CODE (PATTERN (scan)) != CLOBBER
4367 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4369 try &= ~regs_used (PATTERN (scan), 0);
4370 break;
4373 for (used = dead = 0, scan = JUMP_LABEL (jump);
4374 (scan = NEXT_INSN (scan)); )
4376 enum rtx_code code;
4378 if (INSN_DELETED_P (scan))
4379 continue;
4380 code = GET_CODE (scan);
4381 if (INSN_P (scan))
4383 used |= regs_used (PATTERN (scan), 0);
4384 if (code == CALL_INSN)
4385 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4386 dead |= (used >> 16) & ~used;
4387 if (dead & try)
4389 dead &= try;
4390 break;
4392 if (code == JUMP_INSN)
4394 if (jump_left-- && simplejump_p (scan))
4395 scan = JUMP_LABEL (scan);
4396 else
4397 break;
4401 /* Mask out the stack pointer again, in case it was
4402 the only 'free' register we have found. */
4403 dead &= 0x7fff;
4405 /* If the immediate destination is still in range, check for possible
4406 threading with a jump beyond the delay slot insn.
4407 Don't check if we are called recursively; the jump has been or will be
4408 checked in a different invocation then. */
4410 else if (optimize && need_block >= 0)
4412 rtx next = next_active_insn (next_active_insn (dest));
4413 if (next && GET_CODE (next) == JUMP_INSN
4414 && GET_CODE (PATTERN (next)) == SET
4415 && recog_memoized (next) == CODE_FOR_jump_compact)
4417 dest = JUMP_LABEL (next);
4418 if (dest
4419 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4420 > 4092 + 4098))
4421 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4425 if (dead)
4427 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4429 /* It would be nice if we could convert the jump into an indirect
4430 jump / far branch right now, and thus exposing all constituent
4431 instructions to further optimization. However, reorg uses
4432 simplejump_p to determine if there is an unconditional jump where
4433 it should try to schedule instructions from the target of the
4434 branch; simplejump_p fails for indirect jumps even if they have
4435 a JUMP_LABEL. */
4436 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4437 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4438 , jump);
4439 /* ??? We would like this to have the scope of the jump, but that
4440 scope will change when a delay slot insn of an inner scope is added.
4441 Hence, after delay slot scheduling, we'll have to expect
4442 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4443 the jump. */
4445 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4446 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4447 return insn;
4449 else if (need_block)
4450 /* We can't use JUMP_LABEL here because it might be undefined
4451 when not optimizing. */
4452 return emit_insn_before (gen_block_branch_redirect
4453 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4454 , jump);
4455 return prev;
4458 #define CONDJUMP_MIN -252
4459 #define CONDJUMP_MAX 262
4460 struct far_branch
4462 /* A label (to be placed) in front of the jump
4463 that jumps to our ultimate destination. */
4464 rtx near_label;
4465 /* Where we are going to insert it if we cannot move the jump any farther,
4466 or the jump itself if we have picked up an existing jump. */
4467 rtx insert_place;
4468 /* The ultimate destination. */
4469 rtx far_label;
4470 struct far_branch *prev;
4471 /* If the branch has already been created, its address;
4472 else the address of its first prospective user. */
4473 int address;
4476 static void gen_far_branch (struct far_branch *);
4477 enum mdep_reorg_phase_e mdep_reorg_phase;
4478 static void
4479 gen_far_branch (struct far_branch *bp)
4481 rtx insn = bp->insert_place;
4482 rtx jump;
4483 rtx label = gen_label_rtx ();
4484 int ok;
4486 emit_label_after (label, insn);
4487 if (bp->far_label)
4489 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4490 LABEL_NUSES (bp->far_label)++;
4492 else
4493 jump = emit_jump_insn_after (gen_return (), insn);
4494 /* Emit a barrier so that reorg knows that any following instructions
4495 are not reachable via a fall-through path.
4496 But don't do this when not optimizing, since we wouldn't suppress the
4497 alignment for the barrier then, and could end up with out-of-range
4498 pc-relative loads. */
4499 if (optimize)
4500 emit_barrier_after (jump);
4501 emit_label_after (bp->near_label, insn);
4502 JUMP_LABEL (jump) = bp->far_label;
4503 ok = invert_jump (insn, label, 1);
4504 gcc_assert (ok);
4506 /* If we are branching around a jump (rather than a return), prevent
4507 reorg from using an insn from the jump target as the delay slot insn -
4508 when reorg did this, it pessimized code (we rather hide the delay slot)
4509 and it could cause branches to go out of range. */
4510 if (bp->far_label)
4511 (emit_insn_after
4512 (gen_stuff_delay_slot
4513 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4514 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4515 insn));
4516 /* Prevent reorg from undoing our splits. */
4517 gen_block_redirect (jump, bp->address += 2, 2);
4520 /* Fix up ADDR_DIFF_VECs. */
4521 void
4522 fixup_addr_diff_vecs (rtx first)
4524 rtx insn;
4526 for (insn = first; insn; insn = NEXT_INSN (insn))
4528 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4530 if (GET_CODE (insn) != JUMP_INSN
4531 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4532 continue;
4533 pat = PATTERN (insn);
4534 vec_lab = XEXP (XEXP (pat, 0), 0);
4536 /* Search the matching casesi_jump_2. */
4537 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4539 if (GET_CODE (prev) != JUMP_INSN)
4540 continue;
4541 prevpat = PATTERN (prev);
4542 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4543 continue;
4544 x = XVECEXP (prevpat, 0, 1);
4545 if (GET_CODE (x) != USE)
4546 continue;
4547 x = XEXP (x, 0);
4548 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4549 break;
4551 /* FIXME: This is a bug in the optimizer, but it seems harmless
4552 to just avoid panicing. */
4553 if (!prev)
4554 continue;
4556 /* Emit the reference label of the braf where it belongs, right after
4557 the casesi_jump_2 (i.e. braf). */
4558 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4559 emit_label_after (braf_label, prev);
4561 /* Fix up the ADDR_DIF_VEC to be relative
4562 to the reference address of the braf. */
4563 XEXP (XEXP (pat, 0), 0) = braf_label;
4567 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4568 a barrier. Return the base 2 logarithm of the desired alignment. */
4570 barrier_align (rtx barrier_or_label)
4572 rtx next = next_real_insn (barrier_or_label), pat, prev;
4573 int slot, credit, jump_to_next = 0;
4575 if (! next)
4576 return 0;
4578 pat = PATTERN (next);
4580 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4581 return 2;
4583 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4584 /* This is a barrier in front of a constant table. */
4585 return 0;
4587 prev = prev_real_insn (barrier_or_label);
4588 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4590 pat = PATTERN (prev);
4591 /* If this is a very small table, we want to keep the alignment after
4592 the table to the minimum for proper code alignment. */
4593 return ((TARGET_SMALLCODE
4594 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4595 <= (unsigned) 1 << (CACHE_LOG - 2)))
4596 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4599 if (TARGET_SMALLCODE)
4600 return 0;
4602 if (! TARGET_SH2 || ! optimize)
4603 return align_jumps_log;
4605 /* When fixing up pcloads, a constant table might be inserted just before
4606 the basic block that ends with the barrier. Thus, we can't trust the
4607 instruction lengths before that. */
4608 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4610 /* Check if there is an immediately preceding branch to the insn beyond
4611 the barrier. We must weight the cost of discarding useful information
4612 from the current cache line when executing this branch and there is
4613 an alignment, against that of fetching unneeded insn in front of the
4614 branch target when there is no alignment. */
4616 /* There are two delay_slot cases to consider. One is the simple case
4617 where the preceding branch is to the insn beyond the barrier (simple
4618 delay slot filling), and the other is where the preceding branch has
4619 a delay slot that is a duplicate of the insn after the barrier
4620 (fill_eager_delay_slots) and the branch is to the insn after the insn
4621 after the barrier. */
4623 /* PREV is presumed to be the JUMP_INSN for the barrier under
4624 investigation. Skip to the insn before it. */
4625 prev = prev_real_insn (prev);
4627 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4628 credit >= 0 && prev && GET_CODE (prev) == INSN;
4629 prev = prev_real_insn (prev))
4631 jump_to_next = 0;
4632 if (GET_CODE (PATTERN (prev)) == USE
4633 || GET_CODE (PATTERN (prev)) == CLOBBER)
4634 continue;
4635 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4637 prev = XVECEXP (PATTERN (prev), 0, 1);
4638 if (INSN_UID (prev) == INSN_UID (next))
4640 /* Delay slot was filled with insn at jump target. */
4641 jump_to_next = 1;
4642 continue;
4646 if (slot &&
4647 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4648 slot = 0;
4649 credit -= get_attr_length (prev);
4651 if (prev
4652 && GET_CODE (prev) == JUMP_INSN
4653 && JUMP_LABEL (prev))
4655 rtx x;
4656 if (jump_to_next
4657 || next_real_insn (JUMP_LABEL (prev)) == next
4658 /* If relax_delay_slots() decides NEXT was redundant
4659 with some previous instruction, it will have
4660 redirected PREV's jump to the following insn. */
4661 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4662 /* There is no upper bound on redundant instructions
4663 that might have been skipped, but we must not put an
4664 alignment where none had been before. */
4665 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4666 (INSN_P (x)
4667 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4668 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4669 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4671 rtx pat = PATTERN (prev);
4672 if (GET_CODE (pat) == PARALLEL)
4673 pat = XVECEXP (pat, 0, 0);
4674 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4675 return 0;
4680 return align_jumps_log;
4683 /* If we are inside a phony loop, almost any kind of label can turn up as the
4684 first one in the loop. Aligning a braf label causes incorrect switch
4685 destination addresses; we can detect braf labels because they are
4686 followed by a BARRIER.
4687 Applying loop alignment to small constant or switch tables is a waste
4688 of space, so we suppress this too. */
4690 sh_loop_align (rtx label)
4692 rtx next = label;
4695 next = next_nonnote_insn (next);
4696 while (next && GET_CODE (next) == CODE_LABEL);
4698 if (! next
4699 || ! INSN_P (next)
4700 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4701 || recog_memoized (next) == CODE_FOR_consttable_2)
4702 return 0;
4704 return align_loops_log;
4707 /* Do a final pass over the function, just before delayed branch
4708 scheduling. */
4710 static void
4711 sh_reorg (void)
4713 rtx first, insn, mova = NULL_RTX;
4714 int num_mova;
4715 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4716 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4718 first = get_insns ();
4719 max_labelno_before_reorg = max_label_num ();
4721 /* We must split call insns before introducing `mova's. If we're
4722 optimizing, they'll have already been split. Otherwise, make
4723 sure we don't split them too late. */
4724 if (! optimize)
4725 split_all_insns_noflow ();
4727 if (TARGET_SHMEDIA)
4728 return;
4730 /* If relaxing, generate pseudo-ops to associate function calls with
4731 the symbols they call. It does no harm to not generate these
4732 pseudo-ops. However, when we can generate them, it enables to
4733 linker to potentially relax the jsr to a bsr, and eliminate the
4734 register load and, possibly, the constant pool entry. */
4736 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4737 if (TARGET_RELAX)
4739 /* Remove all REG_LABEL notes. We want to use them for our own
4740 purposes. This works because none of the remaining passes
4741 need to look at them.
4743 ??? But it may break in the future. We should use a machine
4744 dependent REG_NOTE, or some other approach entirely. */
4745 for (insn = first; insn; insn = NEXT_INSN (insn))
4747 if (INSN_P (insn))
4749 rtx note;
4751 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4752 remove_note (insn, note);
4756 for (insn = first; insn; insn = NEXT_INSN (insn))
4758 rtx pattern, reg, link, set, scan, dies, label;
4759 int rescan = 0, foundinsn = 0;
4761 if (GET_CODE (insn) == CALL_INSN)
4763 pattern = PATTERN (insn);
4765 if (GET_CODE (pattern) == PARALLEL)
4766 pattern = XVECEXP (pattern, 0, 0);
4767 if (GET_CODE (pattern) == SET)
4768 pattern = SET_SRC (pattern);
4770 if (GET_CODE (pattern) != CALL
4771 || GET_CODE (XEXP (pattern, 0)) != MEM)
4772 continue;
4774 reg = XEXP (XEXP (pattern, 0), 0);
4776 else
4778 reg = sfunc_uses_reg (insn);
4779 if (! reg)
4780 continue;
4783 if (GET_CODE (reg) != REG)
4784 continue;
4786 /* This is a function call via REG. If the only uses of REG
4787 between the time that it is set and the time that it dies
4788 are in function calls, then we can associate all the
4789 function calls with the setting of REG. */
4791 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4793 rtx linked_insn;
4795 if (REG_NOTE_KIND (link) != 0)
4796 continue;
4797 linked_insn = XEXP (link, 0);
4798 set = single_set (linked_insn);
4799 if (set
4800 && rtx_equal_p (reg, SET_DEST (set))
4801 && ! INSN_DELETED_P (linked_insn))
4803 link = linked_insn;
4804 break;
4808 if (! link)
4810 /* ??? Sometimes global register allocation will have
4811 deleted the insn pointed to by LOG_LINKS. Try
4812 scanning backward to find where the register is set. */
4813 for (scan = PREV_INSN (insn);
4814 scan && GET_CODE (scan) != CODE_LABEL;
4815 scan = PREV_INSN (scan))
4817 if (! INSN_P (scan))
4818 continue;
4820 if (! reg_mentioned_p (reg, scan))
4821 continue;
4823 if (noncall_uses_reg (reg, scan, &set))
4824 break;
4826 if (set)
4828 link = scan;
4829 break;
4834 if (! link)
4835 continue;
4837 /* The register is set at LINK. */
4839 /* We can only optimize the function call if the register is
4840 being set to a symbol. In theory, we could sometimes
4841 optimize calls to a constant location, but the assembler
4842 and linker do not support that at present. */
4843 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4844 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4845 continue;
4847 /* Scan forward from LINK to the place where REG dies, and
4848 make sure that the only insns which use REG are
4849 themselves function calls. */
4851 /* ??? This doesn't work for call targets that were allocated
4852 by reload, since there may not be a REG_DEAD note for the
4853 register. */
4855 dies = NULL_RTX;
4856 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4858 rtx scanset;
4860 /* Don't try to trace forward past a CODE_LABEL if we haven't
4861 seen INSN yet. Ordinarily, we will only find the setting insn
4862 in LOG_LINKS if it is in the same basic block. However,
4863 cross-jumping can insert code labels in between the load and
4864 the call, and can result in situations where a single call
4865 insn may have two targets depending on where we came from. */
4867 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4868 break;
4870 if (! INSN_P (scan))
4871 continue;
4873 /* Don't try to trace forward past a JUMP. To optimize
4874 safely, we would have to check that all the
4875 instructions at the jump destination did not use REG. */
4877 if (GET_CODE (scan) == JUMP_INSN)
4878 break;
4880 if (! reg_mentioned_p (reg, scan))
4881 continue;
4883 if (noncall_uses_reg (reg, scan, &scanset))
4884 break;
4886 if (scan == insn)
4887 foundinsn = 1;
4889 if (scan != insn
4890 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4892 /* There is a function call to this register other
4893 than the one we are checking. If we optimize
4894 this call, we need to rescan again below. */
4895 rescan = 1;
4898 /* ??? We shouldn't have to worry about SCANSET here.
4899 We should just be able to check for a REG_DEAD note
4900 on a function call. However, the REG_DEAD notes are
4901 apparently not dependable around libcalls; c-torture
4902 execute/920501-2 is a test case. If SCANSET is set,
4903 then this insn sets the register, so it must have
4904 died earlier. Unfortunately, this will only handle
4905 the cases in which the register is, in fact, set in a
4906 later insn. */
4908 /* ??? We shouldn't have to use FOUNDINSN here.
4909 However, the LOG_LINKS fields are apparently not
4910 entirely reliable around libcalls;
4911 newlib/libm/math/e_pow.c is a test case. Sometimes
4912 an insn will appear in LOG_LINKS even though it is
4913 not the most recent insn which sets the register. */
4915 if (foundinsn
4916 && (scanset
4917 || find_reg_note (scan, REG_DEAD, reg)))
4919 dies = scan;
4920 break;
4924 if (! dies)
4926 /* Either there was a branch, or some insn used REG
4927 other than as a function call address. */
4928 continue;
4931 /* Create a code label, and put it in a REG_LABEL note on
4932 the insn which sets the register, and on each call insn
4933 which uses the register. In final_prescan_insn we look
4934 for the REG_LABEL notes, and output the appropriate label
4935 or pseudo-op. */
4937 label = gen_label_rtx ();
4938 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4939 REG_NOTES (link));
4940 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4941 REG_NOTES (insn));
4942 if (rescan)
4944 scan = link;
4947 rtx reg2;
4949 scan = NEXT_INSN (scan);
4950 if (scan != insn
4951 && ((GET_CODE (scan) == CALL_INSN
4952 && reg_mentioned_p (reg, scan))
4953 || ((reg2 = sfunc_uses_reg (scan))
4954 && REGNO (reg2) == REGNO (reg))))
4955 REG_NOTES (scan)
4956 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4958 while (scan != dies);
4963 if (TARGET_SH2)
4964 fixup_addr_diff_vecs (first);
4966 if (optimize)
4968 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4969 shorten_branches (first);
4972 /* Scan the function looking for move instructions which have to be
4973 changed to pc-relative loads and insert the literal tables. */
4974 label_ref_list_pool = create_alloc_pool ("label references list",
4975 sizeof (struct label_ref_list_d),
4976 30);
4977 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4978 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4980 if (mova_p (insn))
4982 /* ??? basic block reordering can move a switch table dispatch
4983 below the switch table. Check if that has happened.
4984 We only have the addresses available when optimizing; but then,
4985 this check shouldn't be needed when not optimizing. */
4986 if (!untangle_mova (&num_mova, &mova, insn))
4988 insn = mova;
4989 num_mova = 0;
4992 else if (GET_CODE (insn) == JUMP_INSN
4993 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4994 && num_mova
4995 /* ??? loop invariant motion can also move a mova out of a
4996 loop. Since loop does this code motion anyway, maybe we
4997 should wrap UNSPEC_MOVA into a CONST, so that reload can
4998 move it back. */
4999 && ((num_mova > 1
5000 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5001 || (prev_nonnote_insn (insn)
5002 == XEXP (MOVA_LABELREF (mova), 0))))
5004 rtx scan;
5005 int total;
5007 num_mova--;
5009 /* Some code might have been inserted between the mova and
5010 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5011 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5012 total += get_attr_length (scan);
5014 /* range of mova is 1020, add 4 because pc counts from address of
5015 second instruction after this one, subtract 2 in case pc is 2
5016 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5017 cancels out with alignment effects of the mova itself. */
5018 if (total > 1022)
5020 /* Change the mova into a load, and restart scanning
5021 there. broken_move will then return true for mova. */
5022 fixup_mova (mova);
5023 insn = mova;
5026 if (broken_move (insn)
5027 || (GET_CODE (insn) == INSN
5028 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5030 rtx scan;
5031 /* Scan ahead looking for a barrier to stick the constant table
5032 behind. */
5033 rtx barrier = find_barrier (num_mova, mova, insn);
5034 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5035 int need_aligned_label = 0;
5037 if (num_mova && ! mova_p (mova))
5039 /* find_barrier had to change the first mova into a
5040 pcload; thus, we have to start with this new pcload. */
5041 insn = mova;
5042 num_mova = 0;
5044 /* Now find all the moves between the points and modify them. */
5045 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5047 if (GET_CODE (scan) == CODE_LABEL)
5048 last_float = 0;
5049 if (GET_CODE (scan) == INSN
5050 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5051 need_aligned_label = 1;
5052 if (broken_move (scan))
5054 rtx *patp = &PATTERN (scan), pat = *patp;
5055 rtx src, dst;
5056 rtx lab;
5057 rtx newsrc;
5058 enum machine_mode mode;
5060 if (GET_CODE (pat) == PARALLEL)
5061 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5062 src = SET_SRC (pat);
5063 dst = SET_DEST (pat);
5064 mode = GET_MODE (dst);
5066 if (mode == SImode && hi_const (src)
5067 && REGNO (dst) != FPUL_REG)
5069 int offset = 0;
5071 mode = HImode;
5072 while (GET_CODE (dst) == SUBREG)
5074 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5075 GET_MODE (SUBREG_REG (dst)),
5076 SUBREG_BYTE (dst),
5077 GET_MODE (dst));
5078 dst = SUBREG_REG (dst);
5080 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5082 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5084 /* This must be an insn that clobbers r0. */
5085 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5086 XVECLEN (PATTERN (scan), 0)
5087 - 1);
5088 rtx clobber = *clobberp;
5090 gcc_assert (GET_CODE (clobber) == CLOBBER
5091 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5093 if (last_float
5094 && reg_set_between_p (r0_rtx, last_float_move, scan))
5095 last_float = 0;
5096 if (last_float
5097 && TARGET_SHCOMPACT
5098 && GET_MODE_SIZE (mode) != 4
5099 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5100 last_float = 0;
5101 lab = add_constant (src, mode, last_float);
5102 if (lab)
5103 emit_insn_before (gen_mova (lab), scan);
5104 else
5106 /* There will be a REG_UNUSED note for r0 on
5107 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5108 lest reorg:mark_target_live_regs will not
5109 consider r0 to be used, and we end up with delay
5110 slot insn in front of SCAN that clobbers r0. */
5111 rtx note
5112 = find_regno_note (last_float_move, REG_UNUSED, 0);
5114 /* If we are not optimizing, then there may not be
5115 a note. */
5116 if (note)
5117 PUT_MODE (note, REG_INC);
5119 *last_float_addr = r0_inc_rtx;
5121 last_float_move = scan;
5122 last_float = src;
5123 newsrc = gen_const_mem (mode,
5124 (((TARGET_SH4 && ! TARGET_FMOVD)
5125 || REGNO (dst) == FPUL_REG)
5126 ? r0_inc_rtx
5127 : r0_rtx));
5128 last_float_addr = &XEXP (newsrc, 0);
5130 /* Remove the clobber of r0. */
5131 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5132 gen_rtx_SCRATCH (Pmode));
5134 /* This is a mova needing a label. Create it. */
5135 else if (GET_CODE (src) == UNSPEC
5136 && XINT (src, 1) == UNSPEC_MOVA
5137 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5139 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5140 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5141 newsrc = gen_rtx_UNSPEC (SImode,
5142 gen_rtvec (1, newsrc),
5143 UNSPEC_MOVA);
5145 else
5147 lab = add_constant (src, mode, 0);
5148 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5149 newsrc = gen_const_mem (mode, newsrc);
5151 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5152 INSN_CODE (scan) = -1;
5155 dump_table (need_aligned_label ? insn : 0, barrier);
5156 insn = barrier;
5159 free_alloc_pool (label_ref_list_pool);
5160 for (insn = first; insn; insn = NEXT_INSN (insn))
5161 PUT_MODE (insn, VOIDmode);
5163 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5164 INSN_ADDRESSES_FREE ();
5165 split_branches (first);
5167 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5168 also has an effect on the register that holds the address of the sfunc.
5169 Insert an extra dummy insn in front of each sfunc that pretends to
5170 use this register. */
5171 if (flag_delayed_branch)
5173 for (insn = first; insn; insn = NEXT_INSN (insn))
5175 rtx reg = sfunc_uses_reg (insn);
5177 if (! reg)
5178 continue;
5179 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5182 #if 0
5183 /* fpscr is not actually a user variable, but we pretend it is for the
5184 sake of the previous optimization passes, since we want it handled like
5185 one. However, we don't have any debugging information for it, so turn
5186 it into a non-user variable now. */
5187 if (TARGET_SH4)
5188 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5189 #endif
5190 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5194 get_dest_uid (rtx label, int max_uid)
5196 rtx dest = next_real_insn (label);
5197 int dest_uid;
5198 if (! dest)
5199 /* This can happen for an undefined label. */
5200 return 0;
5201 dest_uid = INSN_UID (dest);
5202 /* If this is a newly created branch redirection blocking instruction,
5203 we cannot index the branch_uid or insn_addresses arrays with its
5204 uid. But then, we won't need to, because the actual destination is
5205 the following branch. */
5206 while (dest_uid >= max_uid)
5208 dest = NEXT_INSN (dest);
5209 dest_uid = INSN_UID (dest);
5211 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5212 return 0;
5213 return dest_uid;
5216 /* Split condbranches that are out of range. Also add clobbers for
5217 scratch registers that are needed in far jumps.
5218 We do this before delay slot scheduling, so that it can take our
5219 newly created instructions into account. It also allows us to
5220 find branches with common targets more easily. */
5222 static void
5223 split_branches (rtx first)
5225 rtx insn;
5226 struct far_branch **uid_branch, *far_branch_list = 0;
5227 int max_uid = get_max_uid ();
5228 int ok;
5230 /* Find out which branches are out of range. */
5231 shorten_branches (first);
5233 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5234 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5236 for (insn = first; insn; insn = NEXT_INSN (insn))
5237 if (! INSN_P (insn))
5238 continue;
5239 else if (INSN_DELETED_P (insn))
5241 /* Shorten_branches would split this instruction again,
5242 so transform it into a note. */
5243 PUT_CODE (insn, NOTE);
5244 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
5245 NOTE_SOURCE_FILE (insn) = 0;
5247 else if (GET_CODE (insn) == JUMP_INSN
5248 /* Don't mess with ADDR_DIFF_VEC */
5249 && (GET_CODE (PATTERN (insn)) == SET
5250 || GET_CODE (PATTERN (insn)) == RETURN))
5252 enum attr_type type = get_attr_type (insn);
5253 if (type == TYPE_CBRANCH)
5255 rtx next, beyond;
5257 if (get_attr_length (insn) > 4)
5259 rtx src = SET_SRC (PATTERN (insn));
5260 rtx olabel = XEXP (XEXP (src, 1), 0);
5261 int addr = INSN_ADDRESSES (INSN_UID (insn));
5262 rtx label = 0;
5263 int dest_uid = get_dest_uid (olabel, max_uid);
5264 struct far_branch *bp = uid_branch[dest_uid];
5266 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5267 the label if the LABEL_NUSES count drops to zero. There is
5268 always a jump_optimize pass that sets these values, but it
5269 proceeds to delete unreferenced code, and then if not
5270 optimizing, to un-delete the deleted instructions, thus
5271 leaving labels with too low uses counts. */
5272 if (! optimize)
5274 JUMP_LABEL (insn) = olabel;
5275 LABEL_NUSES (olabel)++;
5277 if (! bp)
5279 bp = (struct far_branch *) alloca (sizeof *bp);
5280 uid_branch[dest_uid] = bp;
5281 bp->prev = far_branch_list;
5282 far_branch_list = bp;
5283 bp->far_label
5284 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5285 LABEL_NUSES (bp->far_label)++;
5287 else
5289 label = bp->near_label;
5290 if (! label && bp->address - addr >= CONDJUMP_MIN)
5292 rtx block = bp->insert_place;
5294 if (GET_CODE (PATTERN (block)) == RETURN)
5295 block = PREV_INSN (block);
5296 else
5297 block = gen_block_redirect (block,
5298 bp->address, 2);
5299 label = emit_label_after (gen_label_rtx (),
5300 PREV_INSN (block));
5301 bp->near_label = label;
5303 else if (label && ! NEXT_INSN (label))
5305 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5306 bp->insert_place = insn;
5307 else
5308 gen_far_branch (bp);
5311 if (! label
5312 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5314 bp->near_label = label = gen_label_rtx ();
5315 bp->insert_place = insn;
5316 bp->address = addr;
5318 ok = redirect_jump (insn, label, 1);
5319 gcc_assert (ok);
5321 else
5323 /* get_attr_length (insn) == 2 */
5324 /* Check if we have a pattern where reorg wants to redirect
5325 the branch to a label from an unconditional branch that
5326 is too far away. */
5327 /* We can't use JUMP_LABEL here because it might be undefined
5328 when not optimizing. */
5329 /* A syntax error might cause beyond to be NULL_RTX. */
5330 beyond
5331 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5332 0));
5334 if (beyond
5335 && (GET_CODE (beyond) == JUMP_INSN
5336 || ((beyond = next_active_insn (beyond))
5337 && GET_CODE (beyond) == JUMP_INSN))
5338 && GET_CODE (PATTERN (beyond)) == SET
5339 && recog_memoized (beyond) == CODE_FOR_jump_compact
5340 && ((INSN_ADDRESSES
5341 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5342 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5343 > 252 + 258 + 2))
5344 gen_block_redirect (beyond,
5345 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5348 next = next_active_insn (insn);
5350 if ((GET_CODE (next) == JUMP_INSN
5351 || ((next = next_active_insn (next))
5352 && GET_CODE (next) == JUMP_INSN))
5353 && GET_CODE (PATTERN (next)) == SET
5354 && recog_memoized (next) == CODE_FOR_jump_compact
5355 && ((INSN_ADDRESSES
5356 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5357 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5358 > 252 + 258 + 2))
5359 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5361 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5363 int addr = INSN_ADDRESSES (INSN_UID (insn));
5364 rtx far_label = 0;
5365 int dest_uid = 0;
5366 struct far_branch *bp;
5368 if (type == TYPE_JUMP)
5370 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5371 dest_uid = get_dest_uid (far_label, max_uid);
5372 if (! dest_uid)
5374 /* Parse errors can lead to labels outside
5375 the insn stream. */
5376 if (! NEXT_INSN (far_label))
5377 continue;
5379 if (! optimize)
5381 JUMP_LABEL (insn) = far_label;
5382 LABEL_NUSES (far_label)++;
5384 redirect_jump (insn, NULL_RTX, 1);
5385 far_label = 0;
5388 bp = uid_branch[dest_uid];
5389 if (! bp)
5391 bp = (struct far_branch *) alloca (sizeof *bp);
5392 uid_branch[dest_uid] = bp;
5393 bp->prev = far_branch_list;
5394 far_branch_list = bp;
5395 bp->near_label = 0;
5396 bp->far_label = far_label;
5397 if (far_label)
5398 LABEL_NUSES (far_label)++;
5400 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5401 if (addr - bp->address <= CONDJUMP_MAX)
5402 emit_label_after (bp->near_label, PREV_INSN (insn));
5403 else
5405 gen_far_branch (bp);
5406 bp->near_label = 0;
5408 else
5409 bp->near_label = 0;
5410 bp->address = addr;
5411 bp->insert_place = insn;
5412 if (! far_label)
5413 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5414 else
5415 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5418 /* Generate all pending far branches,
5419 and free our references to the far labels. */
5420 while (far_branch_list)
5422 if (far_branch_list->near_label
5423 && ! NEXT_INSN (far_branch_list->near_label))
5424 gen_far_branch (far_branch_list);
5425 if (optimize
5426 && far_branch_list->far_label
5427 && ! --LABEL_NUSES (far_branch_list->far_label))
5428 delete_insn (far_branch_list->far_label);
5429 far_branch_list = far_branch_list->prev;
5432 /* Instruction length information is no longer valid due to the new
5433 instructions that have been generated. */
5434 init_insn_lengths ();
5437 /* Dump out instruction addresses, which is useful for debugging the
5438 constant pool table stuff.
5440 If relaxing, output the label and pseudo-ops used to link together
5441 calls and the instruction which set the registers. */
5443 /* ??? The addresses printed by this routine for insns are nonsense for
5444 insns which are inside of a sequence where none of the inner insns have
5445 variable length. This is because the second pass of shorten_branches
5446 does not bother to update them. */
5448 void
5449 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5450 int noperands ATTRIBUTE_UNUSED)
5452 if (TARGET_DUMPISIZE)
5453 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5455 if (TARGET_RELAX)
5457 rtx note;
5459 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5460 if (note)
5462 rtx pattern;
5464 pattern = PATTERN (insn);
5465 if (GET_CODE (pattern) == PARALLEL)
5466 pattern = XVECEXP (pattern, 0, 0);
5467 switch (GET_CODE (pattern))
5469 case SET:
5470 if (GET_CODE (SET_SRC (pattern)) != CALL
5471 && get_attr_type (insn) != TYPE_SFUNC)
5473 targetm.asm_out.internal_label
5474 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5475 break;
5477 /* else FALLTHROUGH */
5478 case CALL:
5479 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5480 CODE_LABEL_NUMBER (XEXP (note, 0)));
5481 break;
5483 default:
5484 gcc_unreachable ();
5490 /* Dump out any constants accumulated in the final pass. These will
5491 only be labels. */
5493 const char *
5494 output_jump_label_table (void)
5496 int i;
5498 if (pool_size)
5500 fprintf (asm_out_file, "\t.align 2\n");
5501 for (i = 0; i < pool_size; i++)
5503 pool_node *p = &pool_vector[i];
5505 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5506 CODE_LABEL_NUMBER (p->label));
5507 output_asm_insn (".long %O0", &p->value);
5509 pool_size = 0;
5512 return "";
5515 /* A full frame looks like:
5517 arg-5
5518 arg-4
5519 [ if current_function_anonymous_args
5520 arg-3
5521 arg-2
5522 arg-1
5523 arg-0 ]
5524 saved-fp
5525 saved-r10
5526 saved-r11
5527 saved-r12
5528 saved-pr
5529 local-n
5531 local-1
5532 local-0 <- fp points here. */
5534 /* Number of bytes pushed for anonymous args, used to pass information
5535 between expand_prologue and expand_epilogue. */
5537 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5538 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5539 for an epilogue and a negative value means that it's for a sibcall
5540 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5541 all the registers that are about to be restored, and hence dead. */
5543 static void
5544 output_stack_adjust (int size, rtx reg, int epilogue_p,
5545 HARD_REG_SET *live_regs_mask)
5547 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5548 if (size)
5550 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5552 /* This test is bogus, as output_stack_adjust is used to re-align the
5553 stack. */
5554 #if 0
5555 gcc_assert (!(size % align));
5556 #endif
5558 if (CONST_OK_FOR_ADD (size))
5559 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5560 /* Try to do it with two partial adjustments; however, we must make
5561 sure that the stack is properly aligned at all times, in case
5562 an interrupt occurs between the two partial adjustments. */
5563 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5564 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5566 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5567 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5569 else
5571 rtx const_reg;
5572 rtx insn;
5573 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5574 int i;
5576 /* If TEMP is invalid, we could temporarily save a general
5577 register to MACL. However, there is currently no need
5578 to handle this case, so just die when we see it. */
5579 if (epilogue_p < 0
5580 || current_function_interrupt
5581 || ! call_really_used_regs[temp] || fixed_regs[temp])
5582 temp = -1;
5583 if (temp < 0 && ! current_function_interrupt
5584 && (TARGET_SHMEDIA || epilogue_p >= 0))
5586 HARD_REG_SET temps;
5587 COPY_HARD_REG_SET (temps, call_used_reg_set);
5588 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5589 if (epilogue_p > 0)
5591 int nreg = 0;
5592 if (current_function_return_rtx)
5594 enum machine_mode mode;
5595 mode = GET_MODE (current_function_return_rtx);
5596 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5597 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5599 for (i = 0; i < nreg; i++)
5600 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5601 if (current_function_calls_eh_return)
5603 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5604 for (i = 0; i <= 3; i++)
5605 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5608 if (TARGET_SHMEDIA && epilogue_p < 0)
5609 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5610 CLEAR_HARD_REG_BIT (temps, i);
5611 if (epilogue_p <= 0)
5613 for (i = FIRST_PARM_REG;
5614 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5615 CLEAR_HARD_REG_BIT (temps, i);
5616 if (cfun->static_chain_decl != NULL)
5617 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5619 temp = scavenge_reg (&temps);
5621 if (temp < 0 && live_regs_mask)
5622 temp = scavenge_reg (live_regs_mask);
5623 if (temp < 0)
5625 rtx adj_reg, tmp_reg, mem;
5627 /* If we reached here, the most likely case is the (sibcall)
5628 epilogue for non SHmedia. Put a special push/pop sequence
5629 for such case as the last resort. This looks lengthy but
5630 would not be problem because it seems to be very
5631 rare. */
5633 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5636 /* ??? There is still the slight possibility that r4 or
5637 r5 have been reserved as fixed registers or assigned
5638 as global registers, and they change during an
5639 interrupt. There are possible ways to handle this:
5641 - If we are adjusting the frame pointer (r14), we can do
5642 with a single temp register and an ordinary push / pop
5643 on the stack.
5644 - Grab any call-used or call-saved registers (i.e. not
5645 fixed or globals) for the temps we need. We might
5646 also grab r14 if we are adjusting the stack pointer.
5647 If we can't find enough available registers, issue
5648 a diagnostic and die - the user must have reserved
5649 way too many registers.
5650 But since all this is rather unlikely to happen and
5651 would require extra testing, we just die if r4 / r5
5652 are not available. */
5653 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5654 && !global_regs[4] && !global_regs[5]);
5656 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5657 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5658 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5659 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5660 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5661 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5662 emit_move_insn (mem, tmp_reg);
5663 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5664 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5665 emit_move_insn (mem, tmp_reg);
5666 emit_move_insn (reg, adj_reg);
5667 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5668 emit_move_insn (adj_reg, mem);
5669 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5670 emit_move_insn (tmp_reg, mem);
5671 return;
5673 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5675 /* If SIZE is negative, subtract the positive value.
5676 This sometimes allows a constant pool entry to be shared
5677 between prologue and epilogue code. */
5678 if (size < 0)
5680 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5681 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5683 else
5685 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5686 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5688 if (! epilogue_p)
5689 REG_NOTES (insn)
5690 = (gen_rtx_EXPR_LIST
5691 (REG_FRAME_RELATED_EXPR,
5692 gen_rtx_SET (VOIDmode, reg,
5693 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5694 REG_NOTES (insn)));
5699 static rtx
5700 frame_insn (rtx x)
5702 x = emit_insn (x);
5703 RTX_FRAME_RELATED_P (x) = 1;
5704 return x;
5707 /* Output RTL to push register RN onto the stack. */
5709 static rtx
5710 push (int rn)
5712 rtx x;
5713 if (rn == FPUL_REG)
5714 x = gen_push_fpul ();
5715 else if (rn == FPSCR_REG)
5716 x = gen_push_fpscr ();
5717 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5718 && FP_OR_XD_REGISTER_P (rn))
5720 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5721 return NULL_RTX;
5722 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5724 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5725 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5726 else
5727 x = gen_push (gen_rtx_REG (SImode, rn));
5729 x = frame_insn (x);
5730 REG_NOTES (x)
5731 = gen_rtx_EXPR_LIST (REG_INC,
5732 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5733 return x;
5736 /* Output RTL to pop register RN from the stack. */
5738 static void
5739 pop (int rn)
5741 rtx x;
5742 if (rn == FPUL_REG)
5743 x = gen_pop_fpul ();
5744 else if (rn == FPSCR_REG)
5745 x = gen_pop_fpscr ();
5746 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5747 && FP_OR_XD_REGISTER_P (rn))
5749 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5750 return;
5751 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5753 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5754 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5755 else
5756 x = gen_pop (gen_rtx_REG (SImode, rn));
5758 x = emit_insn (x);
5759 REG_NOTES (x)
5760 = gen_rtx_EXPR_LIST (REG_INC,
5761 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5764 /* Generate code to push the regs specified in the mask. */
5766 static void
5767 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5769 int i;
5770 int skip_fpscr = 0;
5772 /* Push PR last; this gives better latencies after the prologue, and
5773 candidates for the return delay slot when there are no general
5774 registers pushed. */
5775 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5777 /* If this is an interrupt handler, and the SZ bit varies,
5778 and we have to push any floating point register, we need
5779 to switch to the correct precision first. */
5780 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5781 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5783 HARD_REG_SET unsaved;
5785 push (FPSCR_REG);
5786 COMPL_HARD_REG_SET (unsaved, *mask);
5787 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5788 skip_fpscr = 1;
5790 if (i != PR_REG
5791 && (i != FPSCR_REG || ! skip_fpscr)
5792 && TEST_HARD_REG_BIT (*mask, i))
5793 push (i);
5795 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5796 push (PR_REG);
5799 /* Calculate how much extra space is needed to save all callee-saved
5800 target registers.
5801 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5803 static int
5804 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5806 int reg;
5807 int stack_space = 0;
5808 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5810 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5811 if ((! call_really_used_regs[reg] || interrupt_handler)
5812 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5813 /* Leave space to save this target register on the stack,
5814 in case target register allocation wants to use it. */
5815 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5816 return stack_space;
5819 /* Decide whether we should reserve space for callee-save target registers,
5820 in case target register allocation wants to use them. REGS_SAVED is
5821 the space, in bytes, that is already required for register saves.
5822 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5824 static int
5825 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5826 HARD_REG_SET *live_regs_mask)
5828 if (optimize_size)
5829 return 0;
5830 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5833 /* Decide how much space to reserve for callee-save target registers
5834 in case target register allocation wants to use them.
5835 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5837 static int
5838 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5840 if (shmedia_space_reserved_for_target_registers)
5841 return shmedia_target_regs_stack_space (live_regs_mask);
5842 else
5843 return 0;
5846 /* Work out the registers which need to be saved, both as a mask and a
5847 count of saved words. Return the count.
5849 If doing a pragma interrupt function, then push all regs used by the
5850 function, and if we call another function (we can tell by looking at PR),
5851 make sure that all the regs it clobbers are safe too. */
5853 static int
5854 calc_live_regs (HARD_REG_SET *live_regs_mask)
5856 unsigned int reg;
5857 int count;
5858 tree attrs;
5859 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5860 bool nosave_low_regs;
5861 int pr_live, has_call;
5863 attrs = DECL_ATTRIBUTES (current_function_decl);
5864 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5865 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5866 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5867 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5869 CLEAR_HARD_REG_SET (*live_regs_mask);
5870 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5871 && regs_ever_live[FPSCR_REG])
5872 target_flags &= ~MASK_FPU_SINGLE;
5873 /* If we can save a lot of saves by switching to double mode, do that. */
5874 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5875 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5876 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5877 && (! call_really_used_regs[reg]
5878 || interrupt_handler)
5879 && ++count > 2)
5881 target_flags &= ~MASK_FPU_SINGLE;
5882 break;
5884 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5885 knows how to use it. That means the pseudo originally allocated for
5886 the initial value can become the PR_MEDIA_REG hard register, as seen for
5887 execute/20010122-1.c:test9. */
5888 if (TARGET_SHMEDIA)
5889 /* ??? this function is called from initial_elimination_offset, hence we
5890 can't use the result of sh_media_register_for_return here. */
5891 pr_live = sh_pr_n_sets ();
5892 else
5894 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5895 pr_live = (pr_initial
5896 ? (GET_CODE (pr_initial) != REG
5897 || REGNO (pr_initial) != (PR_REG))
5898 : regs_ever_live[PR_REG]);
5899 /* For Shcompact, if not optimizing, we end up with a memory reference
5900 using the return address pointer for __builtin_return_address even
5901 though there is no actual need to put the PR register on the stack. */
5902 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5904 /* Force PR to be live if the prologue has to call the SHmedia
5905 argument decoder or register saver. */
5906 if (TARGET_SHCOMPACT
5907 && ((current_function_args_info.call_cookie
5908 & ~ CALL_COOKIE_RET_TRAMP (1))
5909 || current_function_has_nonlocal_label))
5910 pr_live = 1;
5911 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5912 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5914 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5915 ? pr_live
5916 : interrupt_handler
5917 ? (/* Need to save all the regs ever live. */
5918 (regs_ever_live[reg]
5919 || (call_really_used_regs[reg]
5920 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5921 || reg == PIC_OFFSET_TABLE_REGNUM)
5922 && has_call)
5923 || (TARGET_SHMEDIA && has_call
5924 && REGISTER_NATURAL_MODE (reg) == SImode
5925 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5926 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5927 && reg != RETURN_ADDRESS_POINTER_REGNUM
5928 && reg != T_REG && reg != GBR_REG
5929 /* Push fpscr only on targets which have FPU */
5930 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5931 : (/* Only push those regs which are used and need to be saved. */
5932 (TARGET_SHCOMPACT
5933 && flag_pic
5934 && current_function_args_info.call_cookie
5935 && reg == PIC_OFFSET_TABLE_REGNUM)
5936 || (regs_ever_live[reg]
5937 && (!call_really_used_regs[reg]
5938 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5939 || (current_function_calls_eh_return
5940 && (reg == EH_RETURN_DATA_REGNO (0)
5941 || reg == EH_RETURN_DATA_REGNO (1)
5942 || reg == EH_RETURN_DATA_REGNO (2)
5943 || reg == EH_RETURN_DATA_REGNO (3)))
5944 || ((reg == MACL_REG || reg == MACH_REG)
5945 && regs_ever_live[reg]
5946 && sh_cfun_attr_renesas_p ())
5949 SET_HARD_REG_BIT (*live_regs_mask, reg);
5950 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5952 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5953 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5955 if (FP_REGISTER_P (reg))
5957 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5959 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5960 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5963 else if (XD_REGISTER_P (reg))
5965 /* Must switch to double mode to access these registers. */
5966 target_flags &= ~MASK_FPU_SINGLE;
5970 if (nosave_low_regs && reg == R8_REG)
5971 break;
5973 /* If we have a target register optimization pass after prologue / epilogue
5974 threading, we need to assume all target registers will be live even if
5975 they aren't now. */
5976 if (flag_branch_target_load_optimize2
5977 && TARGET_SAVE_ALL_TARGET_REGS
5978 && shmedia_space_reserved_for_target_registers)
5979 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5980 if ((! call_really_used_regs[reg] || interrupt_handler)
5981 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5983 SET_HARD_REG_BIT (*live_regs_mask, reg);
5984 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5986 /* If this is an interrupt handler, we don't have any call-clobbered
5987 registers we can conveniently use for target register save/restore.
5988 Make sure we save at least one general purpose register when we need
5989 to save target registers. */
5990 if (interrupt_handler
5991 && hard_regs_intersect_p (live_regs_mask,
5992 &reg_class_contents[TARGET_REGS])
5993 && ! hard_regs_intersect_p (live_regs_mask,
5994 &reg_class_contents[GENERAL_REGS]))
5996 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5997 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6000 return count;
6003 /* Code to generate prologue and epilogue sequences */
6005 /* PUSHED is the number of bytes that are being pushed on the
6006 stack for register saves. Return the frame size, padded
6007 appropriately so that the stack stays properly aligned. */
6008 static HOST_WIDE_INT
6009 rounded_frame_size (int pushed)
6011 HOST_WIDE_INT size = get_frame_size ();
6012 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6014 return ((size + pushed + align - 1) & -align) - pushed;
6017 /* Choose a call-clobbered target-branch register that remains
6018 unchanged along the whole function. We set it up as the return
6019 value in the prologue. */
6021 sh_media_register_for_return (void)
6023 int regno;
6024 int tr0_used;
6026 if (! current_function_is_leaf)
6027 return -1;
6028 if (lookup_attribute ("interrupt_handler",
6029 DECL_ATTRIBUTES (current_function_decl)))
6030 return -1;
6031 if (sh_cfun_interrupt_handler_p ())
6032 return -1;
6034 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
6036 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6037 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
6038 return regno;
6040 return -1;
6043 /* The maximum registers we need to save are:
6044 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6045 - 32 floating point registers (for each pair, we save none,
6046 one single precision value, or a double precision value).
6047 - 8 target registers
6048 - add 1 entry for a delimiter. */
6049 #define MAX_SAVED_REGS (62+32+8)
6051 typedef struct save_entry_s
6053 unsigned char reg;
6054 unsigned char mode;
6055 short offset;
6056 } save_entry;
6058 #define MAX_TEMPS 4
6060 /* There will be a delimiter entry with VOIDmode both at the start and the
6061 end of a filled in schedule. The end delimiter has the offset of the
6062 save with the smallest (i.e. most negative) offset. */
6063 typedef struct save_schedule_s
6065 save_entry entries[MAX_SAVED_REGS + 2];
6066 int temps[MAX_TEMPS+1];
6067 } save_schedule;
6069 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6070 use reverse order. Returns the last entry written to (not counting
6071 the delimiter). OFFSET_BASE is a number to be added to all offset
6072 entries. */
6074 static save_entry *
6075 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6076 int offset_base)
6078 int align, i;
6079 save_entry *entry = schedule->entries;
6080 int tmpx = 0;
6081 int offset;
6083 if (! current_function_interrupt)
6084 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6085 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6086 && ! FUNCTION_ARG_REGNO_P (i)
6087 && i != FIRST_RET_REG
6088 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6089 && ! (current_function_calls_eh_return
6090 && (i == EH_RETURN_STACKADJ_REGNO
6091 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6092 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6093 schedule->temps[tmpx++] = i;
6094 entry->reg = -1;
6095 entry->mode = VOIDmode;
6096 entry->offset = offset_base;
6097 entry++;
6098 /* We loop twice: first, we save 8-byte aligned registers in the
6099 higher addresses, that are known to be aligned. Then, we
6100 proceed to saving 32-bit registers that don't need 8-byte
6101 alignment.
6102 If this is an interrupt function, all registers that need saving
6103 need to be saved in full. moreover, we need to postpone saving
6104 target registers till we have saved some general purpose registers
6105 we can then use as scratch registers. */
6106 offset = offset_base;
6107 for (align = 1; align >= 0; align--)
6109 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6110 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6112 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6113 int reg = i;
6115 if (current_function_interrupt)
6117 if (TARGET_REGISTER_P (i))
6118 continue;
6119 if (GENERAL_REGISTER_P (i))
6120 mode = DImode;
6122 if (mode == SFmode && (i % 2) == 1
6123 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6124 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6126 mode = DFmode;
6127 i--;
6128 reg--;
6131 /* If we're doing the aligned pass and this is not aligned,
6132 or we're doing the unaligned pass and this is aligned,
6133 skip it. */
6134 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6135 != align)
6136 continue;
6138 if (current_function_interrupt
6139 && GENERAL_REGISTER_P (i)
6140 && tmpx < MAX_TEMPS)
6141 schedule->temps[tmpx++] = i;
6143 offset -= GET_MODE_SIZE (mode);
6144 entry->reg = i;
6145 entry->mode = mode;
6146 entry->offset = offset;
6147 entry++;
6149 if (align && current_function_interrupt)
6150 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6151 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6153 offset -= GET_MODE_SIZE (DImode);
6154 entry->reg = i;
6155 entry->mode = DImode;
6156 entry->offset = offset;
6157 entry++;
6160 entry->reg = -1;
6161 entry->mode = VOIDmode;
6162 entry->offset = offset;
6163 schedule->temps[tmpx] = -1;
6164 return entry - 1;
6167 void
6168 sh_expand_prologue (void)
6170 HARD_REG_SET live_regs_mask;
6171 int d, i;
6172 int d_rounding = 0;
6173 int save_flags = target_flags;
6174 int pretend_args;
6175 tree sp_switch_attr
6176 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6178 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6180 /* We have pretend args if we had an object sent partially in registers
6181 and partially on the stack, e.g. a large structure. */
6182 pretend_args = current_function_pretend_args_size;
6183 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6184 && (NPARM_REGS(SImode)
6185 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
6186 pretend_args = 0;
6187 output_stack_adjust (-pretend_args
6188 - current_function_args_info.stack_regs * 8,
6189 stack_pointer_rtx, 0, NULL);
6191 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
6192 /* We're going to use the PIC register to load the address of the
6193 incoming-argument decoder and/or of the return trampoline from
6194 the GOT, so make sure the PIC register is preserved and
6195 initialized. */
6196 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6198 if (TARGET_SHCOMPACT
6199 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6201 int reg;
6203 /* First, make all registers with incoming arguments that will
6204 be pushed onto the stack live, so that register renaming
6205 doesn't overwrite them. */
6206 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6207 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
6208 >= NPARM_REGS (SImode) - reg)
6209 for (; reg < NPARM_REGS (SImode); reg++)
6210 emit_insn (gen_shcompact_preserve_incoming_args
6211 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6212 else if (CALL_COOKIE_INT_REG_GET
6213 (current_function_args_info.call_cookie, reg) == 1)
6214 emit_insn (gen_shcompact_preserve_incoming_args
6215 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6217 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6218 stack_pointer_rtx);
6219 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6220 GEN_INT (current_function_args_info.call_cookie));
6221 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6222 gen_rtx_REG (SImode, R0_REG));
6224 else if (TARGET_SHMEDIA)
6226 int tr = sh_media_register_for_return ();
6228 if (tr >= 0)
6230 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
6231 gen_rtx_REG (DImode, PR_MEDIA_REG));
6233 /* ??? We should suppress saving pr when we don't need it, but this
6234 is tricky because of builtin_return_address. */
6236 /* If this function only exits with sibcalls, this copy
6237 will be flagged as dead. */
6238 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6239 const0_rtx,
6240 REG_NOTES (insn));
6244 /* Emit the code for SETUP_VARARGS. */
6245 if (current_function_stdarg)
6247 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6249 /* Push arg regs as if they'd been provided by caller in stack. */
6250 for (i = 0; i < NPARM_REGS(SImode); i++)
6252 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6253 rtx insn;
6255 if (i >= (NPARM_REGS(SImode)
6256 - current_function_args_info.arg_count[(int) SH_ARG_INT]
6258 break;
6259 insn = push (rn);
6260 RTX_FRAME_RELATED_P (insn) = 0;
6265 /* If we're supposed to switch stacks at function entry, do so now. */
6266 if (sp_switch_attr)
6268 /* The argument specifies a variable holding the address of the
6269 stack the interrupt function should switch to/from at entry/exit. */
6270 const char *s
6271 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6272 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6274 emit_insn (gen_sp_switch_1 (sp_switch));
6277 d = calc_live_regs (&live_regs_mask);
6278 /* ??? Maybe we could save some switching if we can move a mode switch
6279 that already happens to be at the function start into the prologue. */
6280 if (target_flags != save_flags && ! current_function_interrupt)
6281 emit_insn (gen_toggle_sz ());
6283 if (TARGET_SH5)
6285 int offset_base, offset;
6286 rtx r0 = NULL_RTX;
6287 int offset_in_r0 = -1;
6288 int sp_in_r0 = 0;
6289 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6290 int total_size, save_size;
6291 save_schedule schedule;
6292 save_entry *entry;
6293 int *tmp_pnt;
6295 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6296 && ! current_function_interrupt)
6297 r0 = gen_rtx_REG (Pmode, R0_REG);
6299 /* D is the actual number of bytes that we need for saving registers,
6300 however, in initial_elimination_offset we have committed to using
6301 an additional TREGS_SPACE amount of bytes - in order to keep both
6302 addresses to arguments supplied by the caller and local variables
6303 valid, we must keep this gap. Place it between the incoming
6304 arguments and the actually saved registers in a bid to optimize
6305 locality of reference. */
6306 total_size = d + tregs_space;
6307 total_size += rounded_frame_size (total_size);
6308 save_size = total_size - rounded_frame_size (d);
6309 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6310 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6311 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6313 /* If adjusting the stack in a single step costs nothing extra, do so.
6314 I.e. either if a single addi is enough, or we need a movi anyway,
6315 and we don't exceed the maximum offset range (the test for the
6316 latter is conservative for simplicity). */
6317 if (TARGET_SHMEDIA
6318 && (CONST_OK_FOR_I10 (-total_size)
6319 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6320 && total_size <= 2044)))
6321 d_rounding = total_size - save_size;
6323 offset_base = d + d_rounding;
6325 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6326 0, NULL);
6328 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6329 tmp_pnt = schedule.temps;
6330 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6332 enum machine_mode mode = entry->mode;
6333 unsigned int reg = entry->reg;
6334 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6335 rtx orig_reg_rtx;
6337 offset = entry->offset;
6339 reg_rtx = gen_rtx_REG (mode, reg);
6341 mem_rtx = gen_frame_mem (mode,
6342 gen_rtx_PLUS (Pmode,
6343 stack_pointer_rtx,
6344 GEN_INT (offset)));
6346 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6348 gcc_assert (r0);
6349 mem_rtx = NULL_RTX;
6351 try_pre_dec:
6353 if (HAVE_PRE_DECREMENT
6354 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6355 || mem_rtx == NULL_RTX
6356 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6358 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6360 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6361 pre_dec_ok);
6363 pre_dec = NULL_RTX;
6365 break;
6367 pre_dec_ok:
6368 mem_rtx = NULL_RTX;
6369 offset += GET_MODE_SIZE (mode);
6371 while (0);
6373 if (mem_rtx != NULL_RTX)
6374 goto addr_ok;
6376 if (offset_in_r0 == -1)
6378 emit_move_insn (r0, GEN_INT (offset));
6379 offset_in_r0 = offset;
6381 else if (offset != offset_in_r0)
6383 emit_move_insn (r0,
6384 gen_rtx_PLUS
6385 (Pmode, r0,
6386 GEN_INT (offset - offset_in_r0)));
6387 offset_in_r0 += offset - offset_in_r0;
6390 if (pre_dec != NULL_RTX)
6392 if (! sp_in_r0)
6394 emit_move_insn (r0,
6395 gen_rtx_PLUS
6396 (Pmode, r0, stack_pointer_rtx));
6397 sp_in_r0 = 1;
6400 offset -= GET_MODE_SIZE (mode);
6401 offset_in_r0 -= GET_MODE_SIZE (mode);
6403 mem_rtx = pre_dec;
6405 else if (sp_in_r0)
6406 mem_rtx = gen_frame_mem (mode, r0);
6407 else
6408 mem_rtx = gen_frame_mem (mode,
6409 gen_rtx_PLUS (Pmode,
6410 stack_pointer_rtx,
6411 r0));
6413 /* We must not use an r0-based address for target-branch
6414 registers or for special registers without pre-dec
6415 memory addresses, since we store their values in r0
6416 first. */
6417 gcc_assert (!TARGET_REGISTER_P (reg)
6418 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6419 || mem_rtx == pre_dec));
6421 addr_ok:
6422 orig_reg_rtx = reg_rtx;
6423 if (TARGET_REGISTER_P (reg)
6424 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6425 && mem_rtx != pre_dec))
6427 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6429 emit_move_insn (tmp_reg, reg_rtx);
6431 if (REGNO (tmp_reg) == R0_REG)
6433 offset_in_r0 = -1;
6434 sp_in_r0 = 0;
6435 gcc_assert (!refers_to_regno_p
6436 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6439 if (*++tmp_pnt <= 0)
6440 tmp_pnt = schedule.temps;
6442 reg_rtx = tmp_reg;
6445 rtx insn;
6447 /* Mark as interesting for dwarf cfi generator */
6448 insn = emit_move_insn (mem_rtx, reg_rtx);
6449 RTX_FRAME_RELATED_P (insn) = 1;
6450 /* If we use an intermediate register for the save, we can't
6451 describe this exactly in cfi as a copy of the to-be-saved
6452 register into the temporary register and then the temporary
6453 register on the stack, because the temporary register can
6454 have a different natural size than the to-be-saved register.
6455 Thus, we gloss over the intermediate copy and pretend we do
6456 a direct save from the to-be-saved register. */
6457 if (REGNO (reg_rtx) != reg)
6459 rtx set, note_rtx;
6461 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6462 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6463 REG_NOTES (insn));
6464 REG_NOTES (insn) = note_rtx;
6467 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6469 rtx reg_rtx = gen_rtx_REG (mode, reg);
6470 rtx set, note_rtx;
6471 rtx mem_rtx = gen_frame_mem (mode,
6472 gen_rtx_PLUS (Pmode,
6473 stack_pointer_rtx,
6474 GEN_INT (offset)));
6476 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6477 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6478 REG_NOTES (insn));
6479 REG_NOTES (insn) = note_rtx;
6484 gcc_assert (entry->offset == d_rounding);
6486 else
6487 push_regs (&live_regs_mask, current_function_interrupt);
6489 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6491 rtx insn = get_last_insn ();
6492 rtx last = emit_insn (gen_GOTaddr2picreg ());
6494 /* Mark these insns as possibly dead. Sometimes, flow2 may
6495 delete all uses of the PIC register. In this case, let it
6496 delete the initialization too. */
6499 insn = NEXT_INSN (insn);
6501 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6502 const0_rtx,
6503 REG_NOTES (insn));
6505 while (insn != last);
6508 if (SHMEDIA_REGS_STACK_ADJUST ())
6510 /* This must NOT go through the PLT, otherwise mach and macl
6511 may be clobbered. */
6512 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6513 (TARGET_FPU_ANY
6514 ? "__GCC_push_shmedia_regs"
6515 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6516 emit_insn (gen_shmedia_save_restore_regs_compact
6517 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6520 if (target_flags != save_flags && ! current_function_interrupt)
6522 rtx insn = emit_insn (gen_toggle_sz ());
6524 /* If we're lucky, a mode switch in the function body will
6525 overwrite fpscr, turning this insn dead. Tell flow this
6526 insn is ok to delete. */
6527 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6528 const0_rtx,
6529 REG_NOTES (insn));
6532 target_flags = save_flags;
6534 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6535 stack_pointer_rtx, 0, NULL);
6537 if (frame_pointer_needed)
6538 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6540 if (TARGET_SHCOMPACT
6541 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6543 /* This must NOT go through the PLT, otherwise mach and macl
6544 may be clobbered. */
6545 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6546 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6547 emit_insn (gen_shcompact_incoming_args ());
6551 void
6552 sh_expand_epilogue (bool sibcall_p)
6554 HARD_REG_SET live_regs_mask;
6555 int d, i;
6556 int d_rounding = 0;
6558 int save_flags = target_flags;
6559 int frame_size, save_size;
6560 int fpscr_deferred = 0;
6561 int e = sibcall_p ? -1 : 1;
6563 d = calc_live_regs (&live_regs_mask);
6565 save_size = d;
6566 frame_size = rounded_frame_size (d);
6568 if (TARGET_SH5)
6570 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6571 int total_size;
6572 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6573 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6574 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6576 total_size = d + tregs_space;
6577 total_size += rounded_frame_size (total_size);
6578 save_size = total_size - frame_size;
6580 /* If adjusting the stack in a single step costs nothing extra, do so.
6581 I.e. either if a single addi is enough, or we need a movi anyway,
6582 and we don't exceed the maximum offset range (the test for the
6583 latter is conservative for simplicity). */
6584 if (TARGET_SHMEDIA
6585 && ! frame_pointer_needed
6586 && (CONST_OK_FOR_I10 (total_size)
6587 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6588 && total_size <= 2044)))
6589 d_rounding = frame_size;
6591 frame_size -= d_rounding;
6594 if (frame_pointer_needed)
6596 /* We must avoid scheduling the epilogue with previous basic blocks
6597 when exception handling is enabled. See PR/18032. */
6598 if (flag_exceptions)
6599 emit_insn (gen_blockage ());
6600 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6601 &live_regs_mask);
6603 /* We must avoid moving the stack pointer adjustment past code
6604 which reads from the local frame, else an interrupt could
6605 occur after the SP adjustment and clobber data in the local
6606 frame. */
6607 emit_insn (gen_blockage ());
6608 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6610 else if (frame_size)
6612 /* We must avoid moving the stack pointer adjustment past code
6613 which reads from the local frame, else an interrupt could
6614 occur after the SP adjustment and clobber data in the local
6615 frame. */
6616 emit_insn (gen_blockage ());
6617 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6620 if (SHMEDIA_REGS_STACK_ADJUST ())
6622 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6623 (TARGET_FPU_ANY
6624 ? "__GCC_pop_shmedia_regs"
6625 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6626 /* This must NOT go through the PLT, otherwise mach and macl
6627 may be clobbered. */
6628 emit_insn (gen_shmedia_save_restore_regs_compact
6629 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6632 /* Pop all the registers. */
6634 if (target_flags != save_flags && ! current_function_interrupt)
6635 emit_insn (gen_toggle_sz ());
6636 if (TARGET_SH5)
6638 int offset_base, offset;
6639 int offset_in_r0 = -1;
6640 int sp_in_r0 = 0;
6641 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6642 save_schedule schedule;
6643 save_entry *entry;
6644 int *tmp_pnt;
6646 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6647 offset_base = -entry[1].offset + d_rounding;
6648 tmp_pnt = schedule.temps;
6649 for (; entry->mode != VOIDmode; entry--)
6651 enum machine_mode mode = entry->mode;
6652 int reg = entry->reg;
6653 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6655 offset = offset_base + entry->offset;
6656 reg_rtx = gen_rtx_REG (mode, reg);
6658 mem_rtx = gen_frame_mem (mode,
6659 gen_rtx_PLUS (Pmode,
6660 stack_pointer_rtx,
6661 GEN_INT (offset)));
6663 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6665 mem_rtx = NULL_RTX;
6667 try_post_inc:
6669 if (HAVE_POST_INCREMENT
6670 && (offset == offset_in_r0
6671 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6672 && mem_rtx == NULL_RTX)
6673 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6675 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6677 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6678 post_inc_ok);
6680 post_inc = NULL_RTX;
6682 break;
6684 post_inc_ok:
6685 mem_rtx = NULL_RTX;
6687 while (0);
6689 if (mem_rtx != NULL_RTX)
6690 goto addr_ok;
6692 if (offset_in_r0 == -1)
6694 emit_move_insn (r0, GEN_INT (offset));
6695 offset_in_r0 = offset;
6697 else if (offset != offset_in_r0)
6699 emit_move_insn (r0,
6700 gen_rtx_PLUS
6701 (Pmode, r0,
6702 GEN_INT (offset - offset_in_r0)));
6703 offset_in_r0 += offset - offset_in_r0;
6706 if (post_inc != NULL_RTX)
6708 if (! sp_in_r0)
6710 emit_move_insn (r0,
6711 gen_rtx_PLUS
6712 (Pmode, r0, stack_pointer_rtx));
6713 sp_in_r0 = 1;
6716 mem_rtx = post_inc;
6718 offset_in_r0 += GET_MODE_SIZE (mode);
6720 else if (sp_in_r0)
6721 mem_rtx = gen_frame_mem (mode, r0);
6722 else
6723 mem_rtx = gen_frame_mem (mode,
6724 gen_rtx_PLUS (Pmode,
6725 stack_pointer_rtx,
6726 r0));
6728 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6729 || mem_rtx == post_inc);
6731 addr_ok:
6732 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6733 && mem_rtx != post_inc)
6735 insn = emit_move_insn (r0, mem_rtx);
6736 mem_rtx = r0;
6738 else if (TARGET_REGISTER_P (reg))
6740 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6742 /* Give the scheduler a bit of freedom by using up to
6743 MAX_TEMPS registers in a round-robin fashion. */
6744 insn = emit_move_insn (tmp_reg, mem_rtx);
6745 mem_rtx = tmp_reg;
6746 if (*++tmp_pnt < 0)
6747 tmp_pnt = schedule.temps;
6750 insn = emit_move_insn (reg_rtx, mem_rtx);
6751 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6752 /* This is dead, unless we return with a sibcall. */
6753 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6754 const0_rtx,
6755 REG_NOTES (insn));
6758 gcc_assert (entry->offset + offset_base == d + d_rounding);
6760 else /* ! TARGET_SH5 */
6762 save_size = 0;
6763 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6764 pop (PR_REG);
6765 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6767 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6769 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6770 && hard_regs_intersect_p (&live_regs_mask,
6771 &reg_class_contents[DF_REGS]))
6772 fpscr_deferred = 1;
6773 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6774 pop (j);
6775 if (j == FIRST_FP_REG && fpscr_deferred)
6776 pop (FPSCR_REG);
6780 if (target_flags != save_flags && ! current_function_interrupt)
6781 emit_insn (gen_toggle_sz ());
6782 target_flags = save_flags;
6784 output_stack_adjust (current_function_pretend_args_size
6785 + save_size + d_rounding
6786 + current_function_args_info.stack_regs * 8,
6787 stack_pointer_rtx, e, NULL);
6789 if (current_function_calls_eh_return)
6790 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6791 EH_RETURN_STACKADJ_RTX));
6793 /* Switch back to the normal stack if necessary. */
6794 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6795 emit_insn (gen_sp_switch_2 ());
6797 /* Tell flow the insn that pops PR isn't dead. */
6798 /* PR_REG will never be live in SHmedia mode, and we don't need to
6799 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6800 by the return pattern. */
6801 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6802 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6805 static int sh_need_epilogue_known = 0;
6808 sh_need_epilogue (void)
6810 if (! sh_need_epilogue_known)
6812 rtx epilogue;
6814 start_sequence ();
6815 sh_expand_epilogue (0);
6816 epilogue = get_insns ();
6817 end_sequence ();
6818 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6820 return sh_need_epilogue_known > 0;
6823 /* Emit code to change the current function's return address to RA.
6824 TEMP is available as a scratch register, if needed. */
6826 void
6827 sh_set_return_address (rtx ra, rtx tmp)
6829 HARD_REG_SET live_regs_mask;
6830 int d;
6831 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6832 int pr_offset;
6834 d = calc_live_regs (&live_regs_mask);
6836 /* If pr_reg isn't life, we can set it (or the register given in
6837 sh_media_register_for_return) directly. */
6838 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6840 rtx rr;
6842 if (TARGET_SHMEDIA)
6844 int rr_regno = sh_media_register_for_return ();
6846 if (rr_regno < 0)
6847 rr_regno = pr_reg;
6849 rr = gen_rtx_REG (DImode, rr_regno);
6851 else
6852 rr = gen_rtx_REG (SImode, pr_reg);
6854 emit_insn (GEN_MOV (rr, ra));
6855 /* Tell flow the register for return isn't dead. */
6856 emit_insn (gen_rtx_USE (VOIDmode, rr));
6857 return;
6860 if (TARGET_SH5)
6862 int offset;
6863 save_schedule schedule;
6864 save_entry *entry;
6866 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6867 offset = entry[1].offset;
6868 for (; entry->mode != VOIDmode; entry--)
6869 if (entry->reg == pr_reg)
6870 goto found;
6872 /* We can't find pr register. */
6873 gcc_unreachable ();
6875 found:
6876 offset = entry->offset - offset;
6877 pr_offset = (rounded_frame_size (d) + offset
6878 + SHMEDIA_REGS_STACK_ADJUST ());
6880 else
6881 pr_offset = rounded_frame_size (d);
6883 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6884 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6886 tmp = gen_frame_mem (Pmode, tmp);
6887 emit_insn (GEN_MOV (tmp, ra));
6890 /* Clear variables at function end. */
6892 static void
6893 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6894 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6896 sh_need_epilogue_known = 0;
6899 static rtx
6900 sh_builtin_saveregs (void)
6902 /* First unnamed integer register. */
6903 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6904 /* Number of integer registers we need to save. */
6905 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6906 /* First unnamed SFmode float reg */
6907 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6908 /* Number of SFmode float regs to save. */
6909 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6910 rtx regbuf, fpregs;
6911 int bufsize, regno;
6912 HOST_WIDE_INT alias_set;
6914 if (TARGET_SH5)
6916 if (n_intregs)
6918 int pushregs = n_intregs;
6920 while (pushregs < NPARM_REGS (SImode) - 1
6921 && (CALL_COOKIE_INT_REG_GET
6922 (current_function_args_info.call_cookie,
6923 NPARM_REGS (SImode) - pushregs)
6924 == 1))
6926 current_function_args_info.call_cookie
6927 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6928 - pushregs, 1);
6929 pushregs++;
6932 if (pushregs == NPARM_REGS (SImode))
6933 current_function_args_info.call_cookie
6934 |= (CALL_COOKIE_INT_REG (0, 1)
6935 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6936 else
6937 current_function_args_info.call_cookie
6938 |= CALL_COOKIE_STACKSEQ (pushregs);
6940 current_function_pretend_args_size += 8 * n_intregs;
6942 if (TARGET_SHCOMPACT)
6943 return const0_rtx;
6946 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6948 error ("__builtin_saveregs not supported by this subtarget");
6949 return const0_rtx;
6952 if (TARGET_SHMEDIA)
6953 n_floatregs = 0;
6955 /* Allocate block of memory for the regs. */
6956 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6957 Or can assign_stack_local accept a 0 SIZE argument? */
6958 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6960 if (TARGET_SHMEDIA)
6961 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6962 else if (n_floatregs & 1)
6964 rtx addr;
6966 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6967 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6968 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6969 regbuf = change_address (regbuf, BLKmode, addr);
6971 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6973 rtx addr, mask;
6975 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6976 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6977 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6978 emit_insn (gen_andsi3 (addr, addr, mask));
6979 regbuf = change_address (regbuf, BLKmode, addr);
6981 else
6982 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6983 alias_set = get_varargs_alias_set ();
6984 set_mem_alias_set (regbuf, alias_set);
6986 /* Save int args.
6987 This is optimized to only save the regs that are necessary. Explicitly
6988 named args need not be saved. */
6989 if (n_intregs > 0)
6990 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6991 adjust_address (regbuf, BLKmode,
6992 n_floatregs * UNITS_PER_WORD),
6993 n_intregs);
6995 if (TARGET_SHMEDIA)
6996 /* Return the address of the regbuf. */
6997 return XEXP (regbuf, 0);
6999 /* Save float args.
7000 This is optimized to only save the regs that are necessary. Explicitly
7001 named args need not be saved.
7002 We explicitly build a pointer to the buffer because it halves the insn
7003 count when not optimizing (otherwise the pointer is built for each reg
7004 saved).
7005 We emit the moves in reverse order so that we can use predecrement. */
7007 fpregs = copy_to_mode_reg (Pmode,
7008 plus_constant (XEXP (regbuf, 0),
7009 n_floatregs * UNITS_PER_WORD));
7010 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7012 rtx mem;
7013 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7015 emit_insn (gen_addsi3 (fpregs, fpregs,
7016 GEN_INT (-2 * UNITS_PER_WORD)));
7017 mem = change_address (regbuf, DFmode, fpregs);
7018 emit_move_insn (mem,
7019 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7021 regno = first_floatreg;
7022 if (regno & 1)
7024 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7025 mem = change_address (regbuf, SFmode, fpregs);
7026 emit_move_insn (mem,
7027 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7028 - (TARGET_LITTLE_ENDIAN != 0)));
7031 else
7032 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7034 rtx mem;
7036 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7037 mem = change_address (regbuf, SFmode, fpregs);
7038 emit_move_insn (mem,
7039 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7042 /* Return the address of the regbuf. */
7043 return XEXP (regbuf, 0);
7046 /* Define the `__builtin_va_list' type for the ABI. */
7048 static tree
7049 sh_build_builtin_va_list (void)
7051 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7052 tree record;
7054 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7055 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7056 return ptr_type_node;
7058 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7060 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7061 ptr_type_node);
7062 f_next_o_limit = build_decl (FIELD_DECL,
7063 get_identifier ("__va_next_o_limit"),
7064 ptr_type_node);
7065 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7066 ptr_type_node);
7067 f_next_fp_limit = build_decl (FIELD_DECL,
7068 get_identifier ("__va_next_fp_limit"),
7069 ptr_type_node);
7070 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7071 ptr_type_node);
7073 DECL_FIELD_CONTEXT (f_next_o) = record;
7074 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7075 DECL_FIELD_CONTEXT (f_next_fp) = record;
7076 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7077 DECL_FIELD_CONTEXT (f_next_stack) = record;
7079 TYPE_FIELDS (record) = f_next_o;
7080 TREE_CHAIN (f_next_o) = f_next_o_limit;
7081 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7082 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7083 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7085 layout_type (record);
7087 return record;
7090 /* Implement `va_start' for varargs and stdarg. */
7092 void
7093 sh_va_start (tree valist, rtx nextarg)
7095 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7096 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7097 tree t, u;
7098 int nfp, nint;
7100 if (TARGET_SH5)
7102 expand_builtin_saveregs ();
7103 std_expand_builtin_va_start (valist, nextarg);
7104 return;
7107 if ((! TARGET_SH2E && ! TARGET_SH4)
7108 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7110 std_expand_builtin_va_start (valist, nextarg);
7111 return;
7114 f_next_o = TYPE_FIELDS (va_list_type_node);
7115 f_next_o_limit = TREE_CHAIN (f_next_o);
7116 f_next_fp = TREE_CHAIN (f_next_o_limit);
7117 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7118 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7120 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7121 NULL_TREE);
7122 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7123 valist, f_next_o_limit, NULL_TREE);
7124 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7125 NULL_TREE);
7126 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7127 valist, f_next_fp_limit, NULL_TREE);
7128 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7129 valist, f_next_stack, NULL_TREE);
7131 /* Call __builtin_saveregs. */
7132 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
7133 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp, u);
7134 TREE_SIDE_EFFECTS (t) = 1;
7135 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7137 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
7138 if (nfp < 8)
7139 nfp = 8 - nfp;
7140 else
7141 nfp = 0;
7142 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
7143 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp));
7144 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_limit, u);
7145 TREE_SIDE_EFFECTS (t) = 1;
7146 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7148 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o, u);
7149 TREE_SIDE_EFFECTS (t) = 1;
7150 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7152 nint = current_function_args_info.arg_count[SH_ARG_INT];
7153 if (nint < 4)
7154 nint = 4 - nint;
7155 else
7156 nint = 0;
7157 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
7158 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint));
7159 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o_limit, u);
7160 TREE_SIDE_EFFECTS (t) = 1;
7161 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7163 u = make_tree (ptr_type_node, nextarg);
7164 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_stack, u);
7165 TREE_SIDE_EFFECTS (t) = 1;
7166 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7169 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7170 member, return it. */
7171 static tree
7172 find_sole_member (tree type)
7174 tree field, member = NULL_TREE;
7176 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7178 if (TREE_CODE (field) != FIELD_DECL)
7179 continue;
7180 if (!DECL_SIZE (field))
7181 return NULL_TREE;
7182 if (integer_zerop (DECL_SIZE (field)))
7183 continue;
7184 if (member)
7185 return NULL_TREE;
7186 member = field;
7188 return member;
7190 /* Implement `va_arg'. */
7192 static tree
7193 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
7194 tree *post_p ATTRIBUTE_UNUSED)
7196 HOST_WIDE_INT size, rsize;
7197 tree tmp, pptr_type_node;
7198 tree addr, lab_over = NULL, result = NULL;
7199 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7200 tree eff_type;
7202 if (pass_by_ref)
7203 type = build_pointer_type (type);
7205 size = int_size_in_bytes (type);
7206 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7207 pptr_type_node = build_pointer_type (ptr_type_node);
7209 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7210 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7212 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7213 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7214 int pass_as_float;
7215 tree lab_false;
7216 tree member;
7218 f_next_o = TYPE_FIELDS (va_list_type_node);
7219 f_next_o_limit = TREE_CHAIN (f_next_o);
7220 f_next_fp = TREE_CHAIN (f_next_o_limit);
7221 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7222 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7224 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7225 NULL_TREE);
7226 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7227 valist, f_next_o_limit, NULL_TREE);
7228 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7229 valist, f_next_fp, NULL_TREE);
7230 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7231 valist, f_next_fp_limit, NULL_TREE);
7232 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7233 valist, f_next_stack, NULL_TREE);
7235 /* Structures with a single member with a distinct mode are passed
7236 like their member. This is relevant if the latter has a REAL_TYPE
7237 or COMPLEX_TYPE type. */
7238 eff_type = type;
7239 while (TREE_CODE (eff_type) == RECORD_TYPE
7240 && (member = find_sole_member (eff_type))
7241 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7242 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7243 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7245 tree field_type = TREE_TYPE (member);
7247 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7248 eff_type = field_type;
7249 else
7251 gcc_assert ((TYPE_ALIGN (eff_type)
7252 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7253 || (TYPE_ALIGN (eff_type)
7254 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7255 break;
7259 if (TARGET_SH4)
7261 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7262 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7263 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7264 && size <= 16));
7266 else
7268 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7271 addr = create_tmp_var (pptr_type_node, NULL);
7272 lab_false = create_artificial_label ();
7273 lab_over = create_artificial_label ();
7275 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7277 if (pass_as_float)
7279 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7280 tree cmp;
7281 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7283 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
7284 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7285 gimplify_and_add (tmp, pre_p);
7287 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7288 gimplify_and_add (tmp, pre_p);
7289 tmp = next_fp_limit;
7290 if (size > 4 && !is_double)
7291 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
7292 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
7293 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
7294 cmp = build3 (COND_EXPR, void_type_node, tmp,
7295 build1 (GOTO_EXPR, void_type_node, lab_false),
7296 NULL_TREE);
7297 if (!is_double)
7298 gimplify_and_add (cmp, pre_p);
7300 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7301 || (is_double || size == 16))
7303 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
7304 tmp = build2 (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
7305 tmp = build2 (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
7306 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7307 next_fp_tmp, tmp);
7308 gimplify_and_add (tmp, pre_p);
7310 if (is_double)
7311 gimplify_and_add (cmp, pre_p);
7313 #ifdef FUNCTION_ARG_SCmode_WART
7314 if (TYPE_MODE (eff_type) == SCmode
7315 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7317 tree subtype = TREE_TYPE (eff_type);
7318 tree real, imag;
7320 imag
7321 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7322 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7324 real
7325 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7326 real = get_initialized_tmp_var (real, pre_p, NULL);
7328 result = build2 (COMPLEX_EXPR, type, real, imag);
7329 result = get_initialized_tmp_var (result, pre_p, NULL);
7331 #endif /* FUNCTION_ARG_SCmode_WART */
7333 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7334 gimplify_and_add (tmp, pre_p);
7336 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7337 gimplify_and_add (tmp, pre_p);
7339 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7340 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7341 gimplify_and_add (tmp, pre_p);
7342 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7343 gimplify_and_add (tmp, pre_p);
7345 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, valist, next_fp_tmp);
7346 gimplify_and_add (tmp, post_p);
7347 valist = next_fp_tmp;
7349 else
7351 tmp = fold_convert (ptr_type_node, size_int (rsize));
7352 tmp = build2 (PLUS_EXPR, ptr_type_node, next_o, tmp);
7353 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7354 tmp = build3 (COND_EXPR, void_type_node, tmp,
7355 build1 (GOTO_EXPR, void_type_node, lab_false),
7356 NULL_TREE);
7357 gimplify_and_add (tmp, pre_p);
7359 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7360 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7361 gimplify_and_add (tmp, pre_p);
7363 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7364 gimplify_and_add (tmp, pre_p);
7366 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7367 gimplify_and_add (tmp, pre_p);
7369 if (size > 4 && ! TARGET_SH4)
7371 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7372 next_o, next_o_limit);
7373 gimplify_and_add (tmp, pre_p);
7376 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7377 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7378 gimplify_and_add (tmp, pre_p);
7381 if (!result)
7383 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7384 gimplify_and_add (tmp, pre_p);
7388 /* ??? In va-sh.h, there had been code to make values larger than
7389 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7391 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7392 if (result)
7394 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, result, tmp);
7395 gimplify_and_add (tmp, pre_p);
7397 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7398 gimplify_and_add (tmp, pre_p);
7400 else
7401 result = tmp;
7403 if (pass_by_ref)
7404 result = build_va_arg_indirect_ref (result);
7406 return result;
7409 bool
7410 sh_promote_prototypes (tree type)
7412 if (TARGET_HITACHI)
7413 return 0;
7414 if (! type)
7415 return 1;
7416 return ! sh_attr_renesas_p (type);
7419 /* Whether an argument must be passed by reference. On SHcompact, we
7420 pretend arguments wider than 32-bits that would have been passed in
7421 registers are passed by reference, so that an SHmedia trampoline
7422 loads them into the full 64-bits registers. */
7424 static int
7425 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7426 tree type, bool named)
7428 unsigned HOST_WIDE_INT size;
7430 if (type)
7431 size = int_size_in_bytes (type);
7432 else
7433 size = GET_MODE_SIZE (mode);
7435 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7436 && (!named
7437 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7438 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7439 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7440 && size > 4
7441 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7442 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7443 return size;
7444 else
7445 return 0;
7448 static bool
7449 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7450 tree type, bool named)
7452 if (targetm.calls.must_pass_in_stack (mode, type))
7453 return true;
7455 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7456 wants to know about pass-by-reference semantics for incoming
7457 arguments. */
7458 if (! cum)
7459 return false;
7461 if (TARGET_SHCOMPACT)
7463 cum->byref = shcompact_byref (cum, mode, type, named);
7464 return cum->byref != 0;
7467 return false;
7470 static bool
7471 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7472 tree type, bool named ATTRIBUTE_UNUSED)
7474 /* ??? How can it possibly be correct to return true only on the
7475 caller side of the equation? Is there someplace else in the
7476 sh backend that's magically producing the copies? */
7477 return (cum->outgoing
7478 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7479 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7482 static int
7483 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7484 tree type, bool named ATTRIBUTE_UNUSED)
7486 int words = 0;
7488 if (!TARGET_SH5
7489 && PASS_IN_REG_P (*cum, mode, type)
7490 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7491 && (ROUND_REG (*cum, mode)
7492 + (mode != BLKmode
7493 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7494 : ROUND_ADVANCE (int_size_in_bytes (type)))
7495 > NPARM_REGS (mode)))
7496 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7498 else if (!TARGET_SHCOMPACT
7499 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7500 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7502 return words * UNITS_PER_WORD;
7506 /* Define where to put the arguments to a function.
7507 Value is zero to push the argument on the stack,
7508 or a hard register in which to store the argument.
7510 MODE is the argument's machine mode.
7511 TYPE is the data type of the argument (as a tree).
7512 This is null for libcalls where that information may
7513 not be available.
7514 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7515 the preceding args and about the function being called.
7516 NAMED is nonzero if this argument is a named parameter
7517 (otherwise it is an extra parameter matching an ellipsis).
7519 On SH the first args are normally in registers
7520 and the rest are pushed. Any arg that starts within the first
7521 NPARM_REGS words is at least partially passed in a register unless
7522 its data type forbids. */
7526 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7527 tree type, int named)
7529 if (! TARGET_SH5 && mode == VOIDmode)
7530 return GEN_INT (ca->renesas_abi ? 1 : 0);
7532 if (! TARGET_SH5
7533 && PASS_IN_REG_P (*ca, mode, type)
7534 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7536 int regno;
7538 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7539 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7541 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7542 gen_rtx_REG (SFmode,
7543 BASE_ARG_REG (mode)
7544 + (ROUND_REG (*ca, mode) ^ 1)),
7545 const0_rtx);
7546 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7547 gen_rtx_REG (SFmode,
7548 BASE_ARG_REG (mode)
7549 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7550 GEN_INT (4));
7551 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7554 /* If the alignment of a DF value causes an SF register to be
7555 skipped, we will use that skipped register for the next SF
7556 value. */
7557 if ((TARGET_HITACHI || ca->renesas_abi)
7558 && ca->free_single_fp_reg
7559 && mode == SFmode)
7560 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7562 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7563 ^ (mode == SFmode && TARGET_SH4
7564 && TARGET_LITTLE_ENDIAN != 0
7565 && ! TARGET_HITACHI && ! ca->renesas_abi);
7566 return gen_rtx_REG (mode, regno);
7570 if (TARGET_SH5)
7572 if (mode == VOIDmode && TARGET_SHCOMPACT)
7573 return GEN_INT (ca->call_cookie);
7575 /* The following test assumes unnamed arguments are promoted to
7576 DFmode. */
7577 if (mode == SFmode && ca->free_single_fp_reg)
7578 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7580 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7581 && (named || ! ca->prototype_p)
7582 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7584 if (! ca->prototype_p && TARGET_SHMEDIA)
7585 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7587 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7588 FIRST_FP_PARM_REG
7589 + ca->arg_count[(int) SH_ARG_FLOAT]);
7592 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7593 && (! TARGET_SHCOMPACT
7594 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7595 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7596 type, named))))
7598 return gen_rtx_REG (mode, (FIRST_PARM_REG
7599 + ca->arg_count[(int) SH_ARG_INT]));
7602 return 0;
7605 return 0;
7608 /* Update the data in CUM to advance over an argument
7609 of mode MODE and data type TYPE.
7610 (TYPE is null for libcalls where that information may not be
7611 available.) */
7613 void
7614 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7615 tree type, int named)
7617 if (ca->force_mem)
7618 ca->force_mem = 0;
7619 else if (TARGET_SH5)
7621 tree type2 = (ca->byref && type
7622 ? TREE_TYPE (type)
7623 : type);
7624 enum machine_mode mode2 = (ca->byref && type
7625 ? TYPE_MODE (type2)
7626 : mode);
7627 int dwords = ((ca->byref
7628 ? ca->byref
7629 : mode2 == BLKmode
7630 ? int_size_in_bytes (type2)
7631 : GET_MODE_SIZE (mode2)) + 7) / 8;
7632 int numregs = MIN (dwords, NPARM_REGS (SImode)
7633 - ca->arg_count[(int) SH_ARG_INT]);
7635 if (numregs)
7637 ca->arg_count[(int) SH_ARG_INT] += numregs;
7638 if (TARGET_SHCOMPACT
7639 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7641 ca->call_cookie
7642 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7643 - numregs, 1);
7644 /* N.B. We want this also for outgoing. */
7645 ca->stack_regs += numregs;
7647 else if (ca->byref)
7649 if (! ca->outgoing)
7650 ca->stack_regs += numregs;
7651 ca->byref_regs += numregs;
7652 ca->byref = 0;
7654 ca->call_cookie
7655 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7656 - numregs, 2);
7657 while (--numregs);
7658 ca->call_cookie
7659 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7660 - 1, 1);
7662 else if (dwords > numregs)
7664 int pushregs = numregs;
7666 if (TARGET_SHCOMPACT)
7667 ca->stack_regs += numregs;
7668 while (pushregs < NPARM_REGS (SImode) - 1
7669 && (CALL_COOKIE_INT_REG_GET
7670 (ca->call_cookie,
7671 NPARM_REGS (SImode) - pushregs)
7672 == 1))
7674 ca->call_cookie
7675 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7676 - pushregs, 1);
7677 pushregs++;
7679 if (numregs == NPARM_REGS (SImode))
7680 ca->call_cookie
7681 |= CALL_COOKIE_INT_REG (0, 1)
7682 | CALL_COOKIE_STACKSEQ (numregs - 1);
7683 else
7684 ca->call_cookie
7685 |= CALL_COOKIE_STACKSEQ (numregs);
7688 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7689 && (named || ! ca->prototype_p))
7691 if (mode2 == SFmode && ca->free_single_fp_reg)
7692 ca->free_single_fp_reg = 0;
7693 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7694 < NPARM_REGS (SFmode))
7696 int numfpregs
7697 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7698 NPARM_REGS (SFmode)
7699 - ca->arg_count[(int) SH_ARG_FLOAT]);
7701 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7703 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7705 if (ca->outgoing && numregs > 0)
7708 ca->call_cookie
7709 |= (CALL_COOKIE_INT_REG
7710 (ca->arg_count[(int) SH_ARG_INT]
7711 - numregs + ((numfpregs - 2) / 2),
7712 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7713 - numfpregs) / 2));
7715 while (numfpregs -= 2);
7717 else if (mode2 == SFmode && (named)
7718 && (ca->arg_count[(int) SH_ARG_FLOAT]
7719 < NPARM_REGS (SFmode)))
7720 ca->free_single_fp_reg
7721 = FIRST_FP_PARM_REG - numfpregs
7722 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7725 return;
7728 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7730 /* Note that we've used the skipped register. */
7731 if (mode == SFmode && ca->free_single_fp_reg)
7733 ca->free_single_fp_reg = 0;
7734 return;
7736 /* When we have a DF after an SF, there's an SF register that get
7737 skipped in order to align the DF value. We note this skipped
7738 register, because the next SF value will use it, and not the
7739 SF that follows the DF. */
7740 if (mode == DFmode
7741 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7743 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7744 + BASE_ARG_REG (mode));
7748 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7749 || PASS_IN_REG_P (*ca, mode, type))
7750 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7751 = (ROUND_REG (*ca, mode)
7752 + (mode == BLKmode
7753 ? ROUND_ADVANCE (int_size_in_bytes (type))
7754 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7757 /* The Renesas calling convention doesn't quite fit into this scheme since
7758 the address is passed like an invisible argument, but one that is always
7759 passed in memory. */
7760 static rtx
7761 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7763 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7764 return 0;
7765 return gen_rtx_REG (Pmode, 2);
7768 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7770 static bool
7771 sh_return_in_memory (tree type, tree fndecl)
7773 if (TARGET_SH5)
7775 if (TYPE_MODE (type) == BLKmode)
7776 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7777 else
7778 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7780 else
7782 return (TYPE_MODE (type) == BLKmode
7783 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7784 && TREE_CODE (type) == RECORD_TYPE));
7788 /* We actually emit the code in sh_expand_prologue. We used to use
7789 a static variable to flag that we need to emit this code, but that
7790 doesn't when inlining, when functions are deferred and then emitted
7791 later. Fortunately, we already have two flags that are part of struct
7792 function that tell if a function uses varargs or stdarg. */
7793 static void
7794 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7795 enum machine_mode mode,
7796 tree type,
7797 int *pretend_arg_size,
7798 int second_time ATTRIBUTE_UNUSED)
7800 gcc_assert (current_function_stdarg);
7801 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7803 int named_parm_regs, anon_parm_regs;
7805 named_parm_regs = (ROUND_REG (*ca, mode)
7806 + (mode == BLKmode
7807 ? ROUND_ADVANCE (int_size_in_bytes (type))
7808 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7809 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7810 if (anon_parm_regs > 0)
7811 *pretend_arg_size = anon_parm_regs * 4;
7815 static bool
7816 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7818 return TARGET_SH5;
7821 static bool
7822 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7824 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7828 /* Define the offset between two registers, one to be eliminated, and
7829 the other its replacement, at the start of a routine. */
7832 initial_elimination_offset (int from, int to)
7834 int regs_saved;
7835 int regs_saved_rounding = 0;
7836 int total_saved_regs_space;
7837 int total_auto_space;
7838 int save_flags = target_flags;
7839 int copy_flags;
7840 HARD_REG_SET live_regs_mask;
7842 shmedia_space_reserved_for_target_registers = false;
7843 regs_saved = calc_live_regs (&live_regs_mask);
7844 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7846 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7848 shmedia_space_reserved_for_target_registers = true;
7849 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7852 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7853 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7854 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7856 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7857 copy_flags = target_flags;
7858 target_flags = save_flags;
7860 total_saved_regs_space = regs_saved + regs_saved_rounding;
7862 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7863 return total_saved_regs_space + total_auto_space
7864 + current_function_args_info.byref_regs * 8;
7866 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7867 return total_saved_regs_space + total_auto_space
7868 + current_function_args_info.byref_regs * 8;
7870 /* Initial gap between fp and sp is 0. */
7871 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7872 return 0;
7874 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7875 return rounded_frame_size (0);
7877 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7878 return rounded_frame_size (0);
7880 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7881 && (to == HARD_FRAME_POINTER_REGNUM
7882 || to == STACK_POINTER_REGNUM));
7883 if (TARGET_SH5)
7885 int n = total_saved_regs_space;
7886 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7887 save_schedule schedule;
7888 save_entry *entry;
7890 n += total_auto_space;
7892 /* If it wasn't saved, there's not much we can do. */
7893 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7894 return n;
7896 target_flags = copy_flags;
7898 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7899 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7900 if (entry->reg == pr_reg)
7902 target_flags = save_flags;
7903 return entry->offset;
7905 gcc_unreachable ();
7907 else
7908 return total_auto_space;
7911 /* Insert any deferred function attributes from earlier pragmas. */
7912 static void
7913 sh_insert_attributes (tree node, tree *attributes)
7915 tree attrs;
7917 if (TREE_CODE (node) != FUNCTION_DECL)
7918 return;
7920 /* We are only interested in fields. */
7921 if (!DECL_P (node))
7922 return;
7924 /* Append the attributes to the deferred attributes. */
7925 *sh_deferred_function_attributes_tail = *attributes;
7926 attrs = sh_deferred_function_attributes;
7927 if (!attrs)
7928 return;
7930 /* Some attributes imply or require the interrupt attribute. */
7931 if (!lookup_attribute ("interrupt_handler", attrs)
7932 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7934 /* If we have a trapa_handler, but no interrupt_handler attribute,
7935 insert an interrupt_handler attribute. */
7936 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7937 /* We can't use sh_pr_interrupt here because that's not in the
7938 java frontend. */
7939 attrs
7940 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7941 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7942 interrupt attribute is missing, we ignore the attribute and warn. */
7943 else if (lookup_attribute ("sp_switch", attrs)
7944 || lookup_attribute ("trap_exit", attrs)
7945 || lookup_attribute ("nosave_low_regs", attrs))
7947 tree *tail;
7949 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7951 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7952 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7953 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7954 warning (OPT_Wattributes,
7955 "%qs attribute only applies to interrupt functions",
7956 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7957 else
7959 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7960 NULL_TREE);
7961 tail = &TREE_CHAIN (*tail);
7964 attrs = *attributes;
7968 /* Install the processed list. */
7969 *attributes = attrs;
7971 /* Clear deferred attributes. */
7972 sh_deferred_function_attributes = NULL_TREE;
7973 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7975 return;
7978 /* Supported attributes:
7980 interrupt_handler -- specifies this function is an interrupt handler.
7982 trapa_handler - like above, but don't save all registers.
7984 sp_switch -- specifies an alternate stack for an interrupt handler
7985 to run on.
7987 trap_exit -- use a trapa to exit an interrupt function instead of
7988 an rte instruction.
7990 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7991 This is useful on the SH3 and upwards,
7992 which has a separate set of low regs for User and Supervisor modes.
7993 This should only be used for the lowest level of interrupts. Higher levels
7994 of interrupts must save the registers in case they themselves are
7995 interrupted.
7997 renesas -- use Renesas calling/layout conventions (functions and
7998 structures).
8002 const struct attribute_spec sh_attribute_table[] =
8004 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
8005 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8006 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
8007 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
8008 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
8009 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8010 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8011 #ifdef SYMBIAN
8012 /* Symbian support adds three new attributes:
8013 dllexport - for exporting a function/variable that will live in a dll
8014 dllimport - for importing a function/variable from a dll
8016 Microsoft allows multiple declspecs in one __declspec, separating
8017 them with spaces. We do NOT support this. Instead, use __declspec
8018 multiple times. */
8019 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8020 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8021 #endif
8022 { NULL, 0, 0, false, false, false, NULL }
8025 /* Handle an "interrupt_handler" attribute; arguments as in
8026 struct attribute_spec.handler. */
8027 static tree
8028 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8029 tree args ATTRIBUTE_UNUSED,
8030 int flags ATTRIBUTE_UNUSED,
8031 bool *no_add_attrs)
8033 if (TREE_CODE (*node) != FUNCTION_DECL)
8035 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8036 IDENTIFIER_POINTER (name));
8037 *no_add_attrs = true;
8039 else if (TARGET_SHCOMPACT)
8041 error ("attribute interrupt_handler is not compatible with -m5-compact");
8042 *no_add_attrs = true;
8045 return NULL_TREE;
8048 /* Handle an "sp_switch" attribute; arguments as in
8049 struct attribute_spec.handler. */
8050 static tree
8051 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8052 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8054 if (TREE_CODE (*node) != FUNCTION_DECL)
8056 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8057 IDENTIFIER_POINTER (name));
8058 *no_add_attrs = true;
8060 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8062 /* The argument must be a constant string. */
8063 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8064 IDENTIFIER_POINTER (name));
8065 *no_add_attrs = true;
8068 return NULL_TREE;
8071 /* Handle an "trap_exit" attribute; arguments as in
8072 struct attribute_spec.handler. */
8073 static tree
8074 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8075 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8077 if (TREE_CODE (*node) != FUNCTION_DECL)
8079 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8080 IDENTIFIER_POINTER (name));
8081 *no_add_attrs = true;
8083 /* The argument specifies a trap number to be used in a trapa instruction
8084 at function exit (instead of an rte instruction). */
8085 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8087 /* The argument must be a constant integer. */
8088 warning (OPT_Wattributes, "%qs attribute argument not an "
8089 "integer constant", IDENTIFIER_POINTER (name));
8090 *no_add_attrs = true;
8093 return NULL_TREE;
8096 static tree
8097 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8098 tree name ATTRIBUTE_UNUSED,
8099 tree args ATTRIBUTE_UNUSED,
8100 int flags ATTRIBUTE_UNUSED,
8101 bool *no_add_attrs ATTRIBUTE_UNUSED)
8103 return NULL_TREE;
8106 /* True if __attribute__((renesas)) or -mrenesas. */
8108 sh_attr_renesas_p (tree td)
8110 if (TARGET_HITACHI)
8111 return 1;
8112 if (td == 0)
8113 return 0;
8114 if (DECL_P (td))
8115 td = TREE_TYPE (td);
8116 if (td == error_mark_node)
8117 return 0;
8118 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8119 != NULL_TREE);
8122 /* True if __attribute__((renesas)) or -mrenesas, for the current
8123 function. */
8125 sh_cfun_attr_renesas_p (void)
8127 return sh_attr_renesas_p (current_function_decl);
8131 sh_cfun_interrupt_handler_p (void)
8133 return (lookup_attribute ("interrupt_handler",
8134 DECL_ATTRIBUTES (current_function_decl))
8135 != NULL_TREE);
8138 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8140 static const char *
8141 sh_check_pch_target_flags (int old_flags)
8143 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8144 | MASK_SH_E | MASK_HARD_SH4
8145 | MASK_FPU_SINGLE | MASK_SH4))
8146 return _("created and used with different architectures / ABIs");
8147 if ((old_flags ^ target_flags) & MASK_HITACHI)
8148 return _("created and used with different ABIs");
8149 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8150 return _("created and used with different endianness");
8151 return NULL;
8154 /* Predicates used by the templates. */
8156 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8157 Used only in general_movsrc_operand. */
8160 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8162 switch (REGNO (op))
8164 case PR_REG:
8165 case MACL_REG:
8166 case MACH_REG:
8167 return 1;
8169 return 0;
8172 /* Nonzero if OP is a floating point value with value 0.0. */
8175 fp_zero_operand (rtx op)
8177 REAL_VALUE_TYPE r;
8179 if (GET_MODE (op) != SFmode)
8180 return 0;
8182 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8183 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8186 /* Nonzero if OP is a floating point value with value 1.0. */
8189 fp_one_operand (rtx op)
8191 REAL_VALUE_TYPE r;
8193 if (GET_MODE (op) != SFmode)
8194 return 0;
8196 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8197 return REAL_VALUES_EQUAL (r, dconst1);
8200 /* For -m4 and -m4-single-only, mode switching is used. If we are
8201 compiling without -mfmovd, movsf_ie isn't taken into account for
8202 mode switching. We could check in machine_dependent_reorg for
8203 cases where we know we are in single precision mode, but there is
8204 interface to find that out during reload, so we must avoid
8205 choosing an fldi alternative during reload and thus failing to
8206 allocate a scratch register for the constant loading. */
8208 fldi_ok (void)
8210 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8214 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8216 enum rtx_code code = GET_CODE (op);
8217 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8220 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8222 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8224 if (GET_CODE (op) != SYMBOL_REF)
8225 return 0;
8226 return SYMBOL_REF_TLS_MODEL (op);
8229 /* Return the destination address of a branch. */
8231 static int
8232 branch_dest (rtx branch)
8234 rtx dest = SET_SRC (PATTERN (branch));
8235 int dest_uid;
8237 if (GET_CODE (dest) == IF_THEN_ELSE)
8238 dest = XEXP (dest, 1);
8239 dest = XEXP (dest, 0);
8240 dest_uid = INSN_UID (dest);
8241 return INSN_ADDRESSES (dest_uid);
8244 /* Return nonzero if REG is not used after INSN.
8245 We assume REG is a reload reg, and therefore does
8246 not live past labels. It may live past calls or jumps though. */
8248 reg_unused_after (rtx reg, rtx insn)
8250 enum rtx_code code;
8251 rtx set;
8253 /* If the reg is set by this instruction, then it is safe for our
8254 case. Disregard the case where this is a store to memory, since
8255 we are checking a register used in the store address. */
8256 set = single_set (insn);
8257 if (set && GET_CODE (SET_DEST (set)) != MEM
8258 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8259 return 1;
8261 while ((insn = NEXT_INSN (insn)))
8263 rtx set;
8264 if (!INSN_P (insn))
8265 continue;
8267 code = GET_CODE (insn);
8269 #if 0
8270 /* If this is a label that existed before reload, then the register
8271 if dead here. However, if this is a label added by reorg, then
8272 the register may still be live here. We can't tell the difference,
8273 so we just ignore labels completely. */
8274 if (code == CODE_LABEL)
8275 return 1;
8276 /* else */
8277 #endif
8279 if (code == JUMP_INSN)
8280 return 0;
8282 /* If this is a sequence, we must handle them all at once.
8283 We could have for instance a call that sets the target register,
8284 and an insn in a delay slot that uses the register. In this case,
8285 we must return 0. */
8286 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8288 int i;
8289 int retval = 0;
8291 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8293 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8294 rtx set = single_set (this_insn);
8296 if (GET_CODE (this_insn) == CALL_INSN)
8297 code = CALL_INSN;
8298 else if (GET_CODE (this_insn) == JUMP_INSN)
8300 if (INSN_ANNULLED_BRANCH_P (this_insn))
8301 return 0;
8302 code = JUMP_INSN;
8305 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8306 return 0;
8307 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8309 if (GET_CODE (SET_DEST (set)) != MEM)
8310 retval = 1;
8311 else
8312 return 0;
8314 if (set == 0
8315 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8316 return 0;
8318 if (retval == 1)
8319 return 1;
8320 else if (code == JUMP_INSN)
8321 return 0;
8324 set = single_set (insn);
8325 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8326 return 0;
8327 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8328 return GET_CODE (SET_DEST (set)) != MEM;
8329 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8330 return 0;
8332 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8333 return 1;
8335 return 1;
8338 #include "ggc.h"
8340 static GTY(()) rtx fpscr_rtx;
8342 get_fpscr_rtx (void)
8344 if (! fpscr_rtx)
8346 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8347 REG_USERVAR_P (fpscr_rtx) = 1;
8348 mark_user_reg (fpscr_rtx);
8350 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8351 mark_user_reg (fpscr_rtx);
8352 return fpscr_rtx;
8355 static GTY(()) tree fpscr_values;
8357 static void
8358 emit_fpu_switch (rtx scratch, int index)
8360 rtx dst, src;
8362 if (fpscr_values == NULL)
8364 tree t;
8366 t = build_index_type (integer_one_node);
8367 t = build_array_type (integer_type_node, t);
8368 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8369 DECL_ARTIFICIAL (t) = 1;
8370 DECL_IGNORED_P (t) = 1;
8371 DECL_EXTERNAL (t) = 1;
8372 TREE_STATIC (t) = 1;
8373 TREE_PUBLIC (t) = 1;
8374 TREE_USED (t) = 1;
8376 fpscr_values = t;
8379 src = DECL_RTL (fpscr_values);
8380 if (no_new_pseudos)
8382 emit_move_insn (scratch, XEXP (src, 0));
8383 if (index != 0)
8384 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8385 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8387 else
8388 src = adjust_address (src, PSImode, index * 4);
8390 dst = get_fpscr_rtx ();
8391 emit_move_insn (dst, src);
8394 void
8395 emit_sf_insn (rtx pat)
8397 emit_insn (pat);
8400 void
8401 emit_df_insn (rtx pat)
8403 emit_insn (pat);
8406 void
8407 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8409 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8412 void
8413 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8415 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8416 get_fpscr_rtx ()));
8419 void
8420 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8422 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8425 void
8426 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8428 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8429 get_fpscr_rtx ()));
8432 /* ??? gcc does flow analysis strictly after common subexpression
8433 elimination. As a result, common subexpression elimination fails
8434 when there are some intervening statements setting the same register.
8435 If we did nothing about this, this would hurt the precision switching
8436 for SH4 badly. There is some cse after reload, but it is unable to
8437 undo the extra register pressure from the unused instructions, and
8438 it cannot remove auto-increment loads.
8440 A C code example that shows this flow/cse weakness for (at least) SH
8441 and sparc (as of gcc ss-970706) is this:
8443 double
8444 f(double a)
8446 double d;
8447 d = 0.1;
8448 a += d;
8449 d = 1.1;
8450 d = 0.1;
8451 a *= d;
8452 return a;
8455 So we add another pass before common subexpression elimination, to
8456 remove assignments that are dead due to a following assignment in the
8457 same basic block. */
8459 static void
8460 mark_use (rtx x, rtx *reg_set_block)
8462 enum rtx_code code;
8464 if (! x)
8465 return;
8466 code = GET_CODE (x);
8467 switch (code)
8469 case REG:
8471 int regno = REGNO (x);
8472 int nregs = (regno < FIRST_PSEUDO_REGISTER
8473 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8474 : 1);
8477 reg_set_block[regno + nregs - 1] = 0;
8479 while (--nregs);
8480 break;
8482 case SET:
8484 rtx dest = SET_DEST (x);
8486 if (GET_CODE (dest) == SUBREG)
8487 dest = SUBREG_REG (dest);
8488 if (GET_CODE (dest) != REG)
8489 mark_use (dest, reg_set_block);
8490 mark_use (SET_SRC (x), reg_set_block);
8491 break;
8493 case CLOBBER:
8494 break;
8495 default:
8497 const char *fmt = GET_RTX_FORMAT (code);
8498 int i, j;
8499 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8501 if (fmt[i] == 'e')
8502 mark_use (XEXP (x, i), reg_set_block);
8503 else if (fmt[i] == 'E')
8504 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8505 mark_use (XVECEXP (x, i, j), reg_set_block);
8507 break;
8512 static rtx get_free_reg (HARD_REG_SET);
8514 /* This function returns a register to use to load the address to load
8515 the fpscr from. Currently it always returns r1 or r7, but when we are
8516 able to use pseudo registers after combine, or have a better mechanism
8517 for choosing a register, it should be done here. */
8518 /* REGS_LIVE is the liveness information for the point for which we
8519 need this allocation. In some bare-bones exit blocks, r1 is live at the
8520 start. We can even have all of r0..r3 being live:
8521 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8522 INSN before which new insns are placed with will clobber the register
8523 we return. If a basic block consists only of setting the return value
8524 register to a pseudo and using that register, the return value is not
8525 live before or after this block, yet we we'll insert our insns right in
8526 the middle. */
8528 static rtx
8529 get_free_reg (HARD_REG_SET regs_live)
8531 if (! TEST_HARD_REG_BIT (regs_live, 1))
8532 return gen_rtx_REG (Pmode, 1);
8534 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8535 there shouldn't be anything but a jump before the function end. */
8536 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8537 return gen_rtx_REG (Pmode, 7);
8540 /* This function will set the fpscr from memory.
8541 MODE is the mode we are setting it to. */
8542 void
8543 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8545 enum attr_fp_mode fp_mode = mode;
8546 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8547 rtx addr_reg = get_free_reg (regs_live);
8549 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8552 /* Is the given character a logical line separator for the assembler? */
8553 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8554 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8555 #endif
8558 sh_insn_length_adjustment (rtx insn)
8560 /* Instructions with unfilled delay slots take up an extra two bytes for
8561 the nop in the delay slot. */
8562 if (((GET_CODE (insn) == INSN
8563 && GET_CODE (PATTERN (insn)) != USE
8564 && GET_CODE (PATTERN (insn)) != CLOBBER)
8565 || GET_CODE (insn) == CALL_INSN
8566 || (GET_CODE (insn) == JUMP_INSN
8567 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8568 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8569 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8570 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8571 return 2;
8573 /* SH2e has a bug that prevents the use of annulled branches, so if
8574 the delay slot is not filled, we'll have to put a NOP in it. */
8575 if (sh_cpu == CPU_SH2E
8576 && GET_CODE (insn) == JUMP_INSN
8577 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8578 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8579 && get_attr_type (insn) == TYPE_CBRANCH
8580 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8581 return 2;
8583 /* sh-dsp parallel processing insn take four bytes instead of two. */
8585 if (GET_CODE (insn) == INSN)
8587 int sum = 0;
8588 rtx body = PATTERN (insn);
8589 const char *template;
8590 char c;
8591 int maybe_label = 1;
8593 if (GET_CODE (body) == ASM_INPUT)
8594 template = XSTR (body, 0);
8595 else if (asm_noperands (body) >= 0)
8596 template
8597 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8598 else
8599 return 0;
8602 int ppi_adjust = 0;
8605 c = *template++;
8606 while (c == ' ' || c == '\t');
8607 /* all sh-dsp parallel-processing insns start with p.
8608 The only non-ppi sh insn starting with p is pref.
8609 The only ppi starting with pr is prnd. */
8610 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8611 ppi_adjust = 2;
8612 /* The repeat pseudo-insn expands two three insns, a total of
8613 six bytes in size. */
8614 else if ((c == 'r' || c == 'R')
8615 && ! strncasecmp ("epeat", template, 5))
8616 ppi_adjust = 4;
8617 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8619 /* If this is a label, it is obviously not a ppi insn. */
8620 if (c == ':' && maybe_label)
8622 ppi_adjust = 0;
8623 break;
8625 else if (c == '\'' || c == '"')
8626 maybe_label = 0;
8627 c = *template++;
8629 sum += ppi_adjust;
8630 maybe_label = c != ':';
8632 while (c);
8633 return sum;
8635 return 0;
8638 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8639 isn't protected by a PIC unspec. */
8641 nonpic_symbol_mentioned_p (rtx x)
8643 register const char *fmt;
8644 register int i;
8646 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8647 || GET_CODE (x) == PC)
8648 return 1;
8650 /* We don't want to look into the possible MEM location of a
8651 CONST_DOUBLE, since we're not going to use it, in general. */
8652 if (GET_CODE (x) == CONST_DOUBLE)
8653 return 0;
8655 if (GET_CODE (x) == UNSPEC
8656 && (XINT (x, 1) == UNSPEC_PIC
8657 || XINT (x, 1) == UNSPEC_GOT
8658 || XINT (x, 1) == UNSPEC_GOTOFF
8659 || XINT (x, 1) == UNSPEC_GOTPLT
8660 || XINT (x, 1) == UNSPEC_GOTTPOFF
8661 || XINT (x, 1) == UNSPEC_DTPOFF
8662 || XINT (x, 1) == UNSPEC_PLT))
8663 return 0;
8665 fmt = GET_RTX_FORMAT (GET_CODE (x));
8666 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8668 if (fmt[i] == 'E')
8670 register int j;
8672 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8673 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8674 return 1;
8676 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8677 return 1;
8680 return 0;
8683 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8684 @GOTOFF in `reg'. */
8686 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8687 rtx reg)
8689 if (tls_symbolic_operand (orig, Pmode))
8690 return orig;
8692 if (GET_CODE (orig) == LABEL_REF
8693 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8695 if (reg == 0)
8696 reg = gen_reg_rtx (Pmode);
8698 emit_insn (gen_symGOTOFF2reg (reg, orig));
8699 return reg;
8701 else if (GET_CODE (orig) == SYMBOL_REF)
8703 if (reg == 0)
8704 reg = gen_reg_rtx (Pmode);
8706 emit_insn (gen_symGOT2reg (reg, orig));
8707 return reg;
8709 return orig;
8712 /* Mark the use of a constant in the literal table. If the constant
8713 has multiple labels, make it unique. */
8714 static rtx
8715 mark_constant_pool_use (rtx x)
8717 rtx insn, lab, pattern;
8719 if (x == NULL)
8720 return x;
8722 switch (GET_CODE (x))
8724 case LABEL_REF:
8725 x = XEXP (x, 0);
8726 case CODE_LABEL:
8727 break;
8728 default:
8729 return x;
8732 /* Get the first label in the list of labels for the same constant
8733 and delete another labels in the list. */
8734 lab = x;
8735 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8737 if (GET_CODE (insn) != CODE_LABEL
8738 || LABEL_REFS (insn) != NEXT_INSN (insn))
8739 break;
8740 lab = insn;
8743 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8744 INSN_DELETED_P (insn) = 1;
8746 /* Mark constants in a window. */
8747 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8749 if (GET_CODE (insn) != INSN)
8750 continue;
8752 pattern = PATTERN (insn);
8753 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8754 continue;
8756 switch (XINT (pattern, 1))
8758 case UNSPECV_CONST2:
8759 case UNSPECV_CONST4:
8760 case UNSPECV_CONST8:
8761 XVECEXP (pattern, 0, 1) = const1_rtx;
8762 break;
8763 case UNSPECV_WINDOW_END:
8764 if (XVECEXP (pattern, 0, 0) == x)
8765 return lab;
8766 break;
8767 case UNSPECV_CONST_END:
8768 return lab;
8769 default:
8770 break;
8774 return lab;
8777 /* Return true if it's possible to redirect BRANCH1 to the destination
8778 of an unconditional jump BRANCH2. We only want to do this if the
8779 resulting branch will have a short displacement. */
8781 sh_can_redirect_branch (rtx branch1, rtx branch2)
8783 if (flag_expensive_optimizations && simplejump_p (branch2))
8785 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8786 rtx insn;
8787 int distance;
8789 for (distance = 0, insn = NEXT_INSN (branch1);
8790 insn && distance < 256;
8791 insn = PREV_INSN (insn))
8793 if (insn == dest)
8794 return 1;
8795 else
8796 distance += get_attr_length (insn);
8798 for (distance = 0, insn = NEXT_INSN (branch1);
8799 insn && distance < 256;
8800 insn = NEXT_INSN (insn))
8802 if (insn == dest)
8803 return 1;
8804 else
8805 distance += get_attr_length (insn);
8808 return 0;
8811 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8813 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8814 unsigned int new_reg)
8816 /* Interrupt functions can only use registers that have already been
8817 saved by the prologue, even if they would normally be
8818 call-clobbered. */
8820 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8821 return 0;
8823 return 1;
8826 /* Function to update the integer COST
8827 based on the relationship between INSN that is dependent on
8828 DEP_INSN through the dependence LINK. The default is to make no
8829 adjustment to COST. This can be used for example to specify to
8830 the scheduler that an output- or anti-dependence does not incur
8831 the same cost as a data-dependence. The return value should be
8832 the new value for COST. */
8833 static int
8834 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8836 rtx reg, use_pat;
8838 if (TARGET_SHMEDIA)
8840 /* On SHmedia, if the dependence is an anti-dependence or
8841 output-dependence, there is no cost. */
8842 if (REG_NOTE_KIND (link) != 0)
8844 /* However, dependencies between target register loads and
8845 uses of the register in a subsequent block that are separated
8846 by a conditional branch are not modelled - we have to do with
8847 the anti-dependency between the target register load and the
8848 conditional branch that ends the current block. */
8849 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8850 && GET_CODE (PATTERN (dep_insn)) == SET
8851 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8852 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8853 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8855 int orig_cost = cost;
8856 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8857 rtx target = ((! note
8858 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8859 ? insn : JUMP_LABEL (insn));
8860 /* On the likely path, the branch costs 1, on the unlikely path,
8861 it costs 3. */
8862 cost--;
8864 target = next_active_insn (target);
8865 while (target && ! flow_dependent_p (target, dep_insn)
8866 && --cost > 0);
8867 /* If two branches are executed in immediate succession, with the
8868 first branch properly predicted, this causes a stall at the
8869 second branch, hence we won't need the target for the
8870 second branch for two cycles after the launch of the first
8871 branch. */
8872 if (cost > orig_cost - 2)
8873 cost = orig_cost - 2;
8875 else
8876 cost = 0;
8879 else if (get_attr_is_mac_media (insn)
8880 && get_attr_is_mac_media (dep_insn))
8881 cost = 1;
8883 else if (! reload_completed
8884 && GET_CODE (PATTERN (insn)) == SET
8885 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8886 && GET_CODE (PATTERN (dep_insn)) == SET
8887 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8888 && cost < 4)
8889 cost = 4;
8890 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8891 that is needed at the target. */
8892 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8893 && ! flow_dependent_p (insn, dep_insn))
8894 cost--;
8896 else if (REG_NOTE_KIND (link) == 0)
8898 enum attr_type type;
8899 rtx dep_set;
8901 if (recog_memoized (insn) < 0
8902 || recog_memoized (dep_insn) < 0)
8903 return cost;
8905 dep_set = single_set (dep_insn);
8907 /* The latency that we specify in the scheduling description refers
8908 to the actual output, not to an auto-increment register; for that,
8909 the latency is one. */
8910 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
8912 rtx set = single_set (insn);
8914 if (set
8915 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
8916 && (!MEM_P (SET_DEST (set))
8917 || !reg_mentioned_p (SET_DEST (dep_set),
8918 XEXP (SET_DEST (set), 0))))
8919 cost = 1;
8921 /* The only input for a call that is timing-critical is the
8922 function's address. */
8923 if (GET_CODE (insn) == CALL_INSN)
8925 rtx call = PATTERN (insn);
8927 if (GET_CODE (call) == PARALLEL)
8928 call = XVECEXP (call, 0 ,0);
8929 if (GET_CODE (call) == SET)
8930 call = SET_SRC (call);
8931 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8932 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8933 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8934 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8935 cost -= TARGET_SH4_300 ? 3 : 6;
8937 /* Likewise, the most timing critical input for an sfuncs call
8938 is the function address. However, sfuncs typically start
8939 using their arguments pretty quickly.
8940 Assume a four cycle delay for SH4 before they are needed.
8941 Cached ST40-300 calls are quicker, so assume only a one
8942 cycle delay there.
8943 ??? Maybe we should encode the delays till input registers
8944 are needed by sfuncs into the sfunc call insn. */
8945 /* All sfunc calls are parallels with at least four components.
8946 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8947 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8948 && XVECLEN (PATTERN (insn), 0) >= 4
8949 && (reg = sfunc_uses_reg (insn)))
8951 if (! reg_set_p (reg, dep_insn))
8952 cost -= TARGET_SH4_300 ? 1 : 4;
8954 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
8956 enum attr_type dep_type = get_attr_type (dep_insn);
8958 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8959 cost--;
8960 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8961 && (type = get_attr_type (insn)) != TYPE_CALL
8962 && type != TYPE_SFUNC)
8963 cost--;
8964 /* When the preceding instruction loads the shift amount of
8965 the following SHAD/SHLD, the latency of the load is increased
8966 by 1 cycle. */
8967 if (get_attr_type (insn) == TYPE_DYN_SHIFT
8968 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8969 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8970 XEXP (SET_SRC (single_set (insn)),
8971 1)))
8972 cost++;
8973 /* When an LS group instruction with a latency of less than
8974 3 cycles is followed by a double-precision floating-point
8975 instruction, FIPR, or FTRV, the latency of the first
8976 instruction is increased to 3 cycles. */
8977 else if (cost < 3
8978 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8979 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8980 cost = 3;
8981 /* The lsw register of a double-precision computation is ready one
8982 cycle earlier. */
8983 else if (reload_completed
8984 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8985 && (use_pat = single_set (insn))
8986 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8987 SET_SRC (use_pat)))
8988 cost -= 1;
8990 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8991 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8992 cost -= 1;
8994 else if (TARGET_SH4_300)
8996 /* Stores need their input register two cycles later. */
8997 if (dep_set && cost >= 1
8998 && ((type = get_attr_type (insn)) == TYPE_STORE
8999 || type == TYPE_PSTORE
9000 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9002 rtx set = single_set (insn);
9004 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9005 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9007 cost -= 2;
9008 /* But don't reduce the cost below 1 if the address depends
9009 on a side effect of dep_insn. */
9010 if (cost < 1
9011 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9012 cost = 1;
9017 /* An anti-dependence penalty of two applies if the first insn is a double
9018 precision fadd / fsub / fmul. */
9019 else if (!TARGET_SH4_300
9020 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9021 && recog_memoized (dep_insn) >= 0
9022 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9023 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9024 /* A lot of alleged anti-flow dependences are fake,
9025 so check this one is real. */
9026 && flow_dependent_p (dep_insn, insn))
9027 cost = 2;
9029 return cost;
9032 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9033 if DEP_INSN is anti-flow dependent on INSN. */
9034 static int
9035 flow_dependent_p (rtx insn, rtx dep_insn)
9037 rtx tmp = PATTERN (insn);
9039 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9040 return tmp == NULL_RTX;
9043 /* A helper function for flow_dependent_p called through note_stores. */
9044 static void
9045 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
9047 rtx * pinsn = (rtx *) data;
9049 if (*pinsn && reg_referenced_p (x, *pinsn))
9050 *pinsn = NULL_RTX;
9053 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9054 'special function' patterns (type sfunc) that clobber pr, but that
9055 do not look like function calls to leaf_function_p. Hence we must
9056 do this extra check. */
9057 static int
9058 sh_pr_n_sets (void)
9060 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9063 /* Return where to allocate pseudo for a given hard register initial
9064 value. */
9065 static rtx
9066 sh_allocate_initial_value (rtx hard_reg)
9068 rtx x;
9070 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9072 if (current_function_is_leaf
9073 && ! sh_pr_n_sets ()
9074 && ! (TARGET_SHCOMPACT
9075 && ((current_function_args_info.call_cookie
9076 & ~ CALL_COOKIE_RET_TRAMP (1))
9077 || current_function_has_nonlocal_label)))
9078 x = hard_reg;
9079 else
9080 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9082 else
9083 x = NULL_RTX;
9085 return x;
9088 /* This function returns "2" to indicate dual issue for the SH4
9089 processor. To be used by the DFA pipeline description. */
9090 static int
9091 sh_issue_rate (void)
9093 if (TARGET_SUPERSCALAR)
9094 return 2;
9095 else
9096 return 1;
9099 /* Functions for ready queue reordering for sched1. */
9101 /* Get weight for mode for a set x. */
9102 static short
9103 find_set_regmode_weight (rtx x, enum machine_mode mode)
9105 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9106 return 1;
9107 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9109 if (GET_CODE (SET_DEST (x)) == REG)
9111 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9112 return 1;
9113 else
9114 return 0;
9116 return 1;
9118 return 0;
9121 /* Get regmode weight for insn. */
9122 static short
9123 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9125 short reg_weight = 0;
9126 rtx x;
9128 /* Increment weight for each register born here. */
9129 x = PATTERN (insn);
9130 reg_weight += find_set_regmode_weight (x, mode);
9131 if (GET_CODE (x) == PARALLEL)
9133 int j;
9134 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9136 x = XVECEXP (PATTERN (insn), 0, j);
9137 reg_weight += find_set_regmode_weight (x, mode);
9140 /* Decrement weight for each register that dies here. */
9141 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9143 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9145 rtx note = XEXP (x, 0);
9146 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9147 reg_weight--;
9150 return reg_weight;
9153 /* Calculate regmode weights for all insns of a basic block. */
9154 static void
9155 find_regmode_weight (basic_block b, enum machine_mode mode)
9157 rtx insn, next_tail, head, tail;
9159 get_ebb_head_tail (b, b, &head, &tail);
9160 next_tail = NEXT_INSN (tail);
9162 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9164 /* Handle register life information. */
9165 if (!INSN_P (insn))
9166 continue;
9168 if (mode == SFmode)
9169 INSN_REGMODE_WEIGHT (insn, mode) =
9170 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9171 else if (mode == SImode)
9172 INSN_REGMODE_WEIGHT (insn, mode) =
9173 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9177 /* Comparison function for ready queue sorting. */
9178 static int
9179 rank_for_reorder (const void *x, const void *y)
9181 rtx tmp = *(const rtx *) y;
9182 rtx tmp2 = *(const rtx *) x;
9184 /* The insn in a schedule group should be issued the first. */
9185 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9186 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9188 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9189 minimizes instruction movement, thus minimizing sched's effect on
9190 register pressure. */
9191 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9194 /* Resort the array A in which only element at index N may be out of order. */
9195 static void
9196 swap_reorder (rtx *a, int n)
9198 rtx insn = a[n - 1];
9199 int i = n - 2;
9201 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9203 a[i + 1] = a[i];
9204 i -= 1;
9206 a[i + 1] = insn;
9209 #define SCHED_REORDER(READY, N_READY) \
9210 do \
9212 if ((N_READY) == 2) \
9213 swap_reorder (READY, N_READY); \
9214 else if ((N_READY) > 2) \
9215 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9217 while (0)
9219 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9220 macro. */
9221 static void
9222 ready_reorder (rtx *ready, int nready)
9224 SCHED_REORDER (ready, nready);
9227 /* Calculate regmode weights for all insns of all basic block. */
9228 static void
9229 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9230 int verbose ATTRIBUTE_UNUSED,
9231 int old_max_uid)
9233 basic_block b;
9235 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9236 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9238 FOR_EACH_BB_REVERSE (b)
9240 find_regmode_weight (b, SImode);
9241 find_regmode_weight (b, SFmode);
9244 CURR_REGMODE_PRESSURE (SImode) = 0;
9245 CURR_REGMODE_PRESSURE (SFmode) = 0;
9249 /* Cleanup. */
9250 static void
9251 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9252 int verbose ATTRIBUTE_UNUSED)
9254 if (regmode_weight[0])
9256 free (regmode_weight[0]);
9257 regmode_weight[0] = NULL;
9259 if (regmode_weight[1])
9261 free (regmode_weight[1]);
9262 regmode_weight[1] = NULL;
9266 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9267 keep count of register pressures on SImode and SFmode. */
9268 static int
9269 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9270 int sched_verbose ATTRIBUTE_UNUSED,
9271 rtx insn,
9272 int can_issue_more)
9274 if (GET_CODE (PATTERN (insn)) != USE
9275 && GET_CODE (PATTERN (insn)) != CLOBBER)
9276 cached_can_issue_more = can_issue_more - 1;
9277 else
9278 cached_can_issue_more = can_issue_more;
9280 if (reload_completed)
9281 return cached_can_issue_more;
9283 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9284 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9286 return cached_can_issue_more;
9289 static void
9290 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9291 int verbose ATTRIBUTE_UNUSED,
9292 int veclen ATTRIBUTE_UNUSED)
9294 CURR_REGMODE_PRESSURE (SImode) = 0;
9295 CURR_REGMODE_PRESSURE (SFmode) = 0;
9298 /* Some magic numbers. */
9299 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9300 functions that already have high pressure on r0. */
9301 #define R0_MAX_LIFE_REGIONS 2
9302 #define R0_MAX_LIVE_LENGTH 12
9303 /* Register Pressure thresholds for SImode and SFmode registers. */
9304 #define SIMODE_MAX_WEIGHT 5
9305 #define SFMODE_MAX_WEIGHT 10
9307 /* Return true if the pressure is high for MODE. */
9308 static short
9309 high_pressure (enum machine_mode mode)
9311 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9312 functions that already have high pressure on r0. */
9313 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
9314 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
9315 return 1;
9317 if (mode == SFmode)
9318 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9319 else
9320 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9323 /* Reorder ready queue if register pressure is high. */
9324 static int
9325 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9326 int sched_verbose ATTRIBUTE_UNUSED,
9327 rtx *ready,
9328 int *n_readyp,
9329 int clock_var ATTRIBUTE_UNUSED)
9331 if (reload_completed)
9332 return sh_issue_rate ();
9334 if (high_pressure (SFmode) || high_pressure (SImode))
9336 ready_reorder (ready, *n_readyp);
9339 return sh_issue_rate ();
9342 /* Skip cycles if the current register pressure is high. */
9343 static int
9344 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9345 int sched_verbose ATTRIBUTE_UNUSED,
9346 rtx *ready ATTRIBUTE_UNUSED,
9347 int *n_readyp ATTRIBUTE_UNUSED,
9348 int clock_var ATTRIBUTE_UNUSED)
9350 if (reload_completed)
9351 return cached_can_issue_more;
9353 if (high_pressure(SFmode) || high_pressure (SImode))
9354 skip_cycles = 1;
9356 return cached_can_issue_more;
9359 /* Skip cycles without sorting the ready queue. This will move insn from
9360 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9361 queue by sh_reorder. */
9363 /* Generally, skipping these many cycles are sufficient for all insns to move
9364 from Q -> R. */
9365 #define MAX_SKIPS 8
9367 static int
9368 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9369 int sched_verbose ATTRIBUTE_UNUSED,
9370 rtx insn ATTRIBUTE_UNUSED,
9371 int last_clock_var,
9372 int clock_var,
9373 int *sort_p)
9375 if (reload_completed)
9376 return 0;
9378 if (skip_cycles)
9380 if ((clock_var - last_clock_var) < MAX_SKIPS)
9382 *sort_p = 0;
9383 return 1;
9385 /* If this is the last cycle we are skipping, allow reordering of R. */
9386 if ((clock_var - last_clock_var) == MAX_SKIPS)
9388 *sort_p = 1;
9389 return 1;
9393 skip_cycles = 0;
9395 return 0;
9398 /* SHmedia requires registers for branches, so we can't generate new
9399 branches past reload. */
9400 static bool
9401 sh_cannot_modify_jumps_p (void)
9403 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9406 static int
9407 sh_target_reg_class (void)
9409 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9412 static bool
9413 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9415 HARD_REG_SET dummy;
9416 #if 0
9417 rtx insn;
9418 #endif
9420 if (! shmedia_space_reserved_for_target_registers)
9421 return 0;
9422 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9423 return 0;
9424 if (calc_live_regs (&dummy) >= 6 * 8)
9425 return 1;
9426 #if 0
9427 /* This is a borderline case. See if we got a nested loop, or a loop
9428 with a call, or with more than 4 labels inside. */
9429 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9431 if (GET_CODE (insn) == NOTE
9432 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9434 int labels = 0;
9438 insn = NEXT_INSN (insn);
9439 if ((GET_CODE (insn) == NOTE
9440 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9441 || GET_CODE (insn) == CALL_INSN
9442 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9443 return 1;
9445 while (GET_CODE (insn) != NOTE
9446 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9449 #endif
9450 return 0;
9453 static bool
9454 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9456 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9460 On the SH1..SH4, the trampoline looks like
9461 2 0002 D202 mov.l l2,r2
9462 1 0000 D301 mov.l l1,r3
9463 3 0004 422B jmp @r2
9464 4 0006 0009 nop
9465 5 0008 00000000 l1: .long area
9466 6 000c 00000000 l2: .long function
9468 SH5 (compact) uses r1 instead of r3 for the static chain. */
9471 /* Emit RTL insns to initialize the variable parts of a trampoline.
9472 FNADDR is an RTX for the address of the function's pure code.
9473 CXT is an RTX for the static chain value for the function. */
9475 void
9476 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9478 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9480 if (TARGET_SHMEDIA64)
9482 rtx tramp_templ;
9483 int fixed_len;
9485 rtx movi1 = GEN_INT (0xcc000010);
9486 rtx shori1 = GEN_INT (0xc8000010);
9487 rtx src, dst;
9489 /* The following trampoline works within a +- 128 KB range for cxt:
9490 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9491 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9492 gettr tr1,r1; blink tr0,r63 */
9493 /* Address rounding makes it hard to compute the exact bounds of the
9494 offset for this trampoline, but we have a rather generous offset
9495 range, so frame_offset should do fine as an upper bound. */
9496 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9498 /* ??? could optimize this trampoline initialization
9499 by writing DImode words with two insns each. */
9500 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9501 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9502 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9503 insn = gen_rtx_AND (DImode, insn, mask);
9504 /* Or in ptb/u .,tr1 pattern */
9505 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9506 insn = force_operand (insn, NULL_RTX);
9507 insn = gen_lowpart (SImode, insn);
9508 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9509 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9510 insn = gen_rtx_AND (DImode, insn, mask);
9511 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9512 insn = gen_lowpart (SImode, insn);
9513 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9514 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9515 insn = gen_rtx_AND (DImode, insn, mask);
9516 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9517 insn = gen_lowpart (SImode, insn);
9518 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9519 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9520 insn = gen_rtx_AND (DImode, insn, mask);
9521 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9522 insn = gen_lowpart (SImode, insn);
9523 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9524 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9525 insn = gen_rtx_AND (DImode, insn, mask);
9526 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9527 insn = gen_lowpart (SImode, insn);
9528 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9529 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9530 GEN_INT (0x6bf10600));
9531 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9532 GEN_INT (0x4415fc10));
9533 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9534 GEN_INT (0x4401fff0));
9535 emit_insn (gen_ic_invalidate_line (tramp));
9536 return;
9538 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9539 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9541 tramp_templ = gen_datalabel_ref (tramp_templ);
9542 dst = tramp_mem;
9543 src = gen_const_mem (BLKmode, tramp_templ);
9544 set_mem_align (dst, 256);
9545 set_mem_align (src, 64);
9546 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9548 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9549 emit_move_insn (adjust_address (tramp_mem, Pmode,
9550 fixed_len + GET_MODE_SIZE (Pmode)),
9551 cxt);
9552 emit_insn (gen_ic_invalidate_line (tramp));
9553 return;
9555 else if (TARGET_SHMEDIA)
9557 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9558 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9559 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9560 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9561 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9562 rotated 10 right, and higher 16 bit of every 32 selected. */
9563 rtx movishori
9564 = force_reg (V2HImode, (simplify_gen_subreg
9565 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9566 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9567 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9569 tramp = force_reg (Pmode, tramp);
9570 fnaddr = force_reg (SImode, fnaddr);
9571 cxt = force_reg (SImode, cxt);
9572 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9573 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9574 movishori));
9575 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9576 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9577 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9578 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9579 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9580 gen_rtx_SUBREG (V2HImode, cxt, 0),
9581 movishori));
9582 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9583 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9584 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9585 if (TARGET_LITTLE_ENDIAN)
9587 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9588 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9590 else
9592 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9593 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9595 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9596 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9597 emit_insn (gen_ic_invalidate_line (tramp));
9598 return;
9600 else if (TARGET_SHCOMPACT)
9602 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9603 return;
9605 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9606 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9607 SImode));
9608 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9609 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9610 SImode));
9611 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9612 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9613 if (TARGET_HARVARD)
9615 if (!TARGET_INLINE_IC_INVALIDATE
9616 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9617 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9618 FUNCTION_ORDINARY),
9619 0, VOIDmode, 1, tramp, SImode);
9620 else
9621 emit_insn (gen_ic_invalidate_line (tramp));
9625 /* FIXME: This is overly conservative. A SHcompact function that
9626 receives arguments ``by reference'' will have them stored in its
9627 own stack frame, so it must not pass pointers or references to
9628 these arguments to other functions by means of sibling calls. */
9629 /* If PIC, we cannot make sibling calls to global functions
9630 because the PLT requires r12 to be live. */
9631 static bool
9632 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9634 return (1
9635 && (! TARGET_SHCOMPACT
9636 || current_function_args_info.stack_regs == 0)
9637 && ! sh_cfun_interrupt_handler_p ()
9638 && (! flag_pic
9639 || (decl && ! TREE_PUBLIC (decl))
9640 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9643 /* Machine specific built-in functions. */
9645 struct builtin_description
9647 const enum insn_code icode;
9648 const char *const name;
9649 int signature;
9652 /* describe number and signedness of arguments; arg[0] == result
9653 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9654 /* 9: 64 bit pointer, 10: 32 bit pointer */
9655 static const char signature_args[][4] =
9657 #define SH_BLTIN_V2SI2 0
9658 { 4, 4 },
9659 #define SH_BLTIN_V4HI2 1
9660 { 4, 4 },
9661 #define SH_BLTIN_V2SI3 2
9662 { 4, 4, 4 },
9663 #define SH_BLTIN_V4HI3 3
9664 { 4, 4, 4 },
9665 #define SH_BLTIN_V8QI3 4
9666 { 4, 4, 4 },
9667 #define SH_BLTIN_MAC_HISI 5
9668 { 1, 4, 4, 1 },
9669 #define SH_BLTIN_SH_HI 6
9670 { 4, 4, 1 },
9671 #define SH_BLTIN_SH_SI 7
9672 { 4, 4, 1 },
9673 #define SH_BLTIN_V4HI2V2SI 8
9674 { 4, 4, 4 },
9675 #define SH_BLTIN_V4HI2V8QI 9
9676 { 4, 4, 4 },
9677 #define SH_BLTIN_SISF 10
9678 { 4, 2 },
9679 #define SH_BLTIN_LDUA_L 11
9680 { 2, 10 },
9681 #define SH_BLTIN_LDUA_Q 12
9682 { 1, 10 },
9683 #define SH_BLTIN_STUA_L 13
9684 { 0, 10, 2 },
9685 #define SH_BLTIN_STUA_Q 14
9686 { 0, 10, 1 },
9687 #define SH_BLTIN_LDUA_L64 15
9688 { 2, 9 },
9689 #define SH_BLTIN_LDUA_Q64 16
9690 { 1, 9 },
9691 #define SH_BLTIN_STUA_L64 17
9692 { 0, 9, 2 },
9693 #define SH_BLTIN_STUA_Q64 18
9694 { 0, 9, 1 },
9695 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9696 #define SH_BLTIN_2 19
9697 #define SH_BLTIN_SU 19
9698 { 1, 2 },
9699 #define SH_BLTIN_3 20
9700 #define SH_BLTIN_SUS 20
9701 { 2, 2, 1 },
9702 #define SH_BLTIN_PSSV 21
9703 { 0, 8, 2, 2 },
9704 #define SH_BLTIN_XXUU 22
9705 #define SH_BLTIN_UUUU 22
9706 { 1, 1, 1, 1 },
9707 #define SH_BLTIN_PV 23
9708 { 0, 8 },
9710 /* mcmv: operands considered unsigned. */
9711 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9712 /* mperm: control value considered unsigned int. */
9713 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9714 /* mshards_q: returns signed short. */
9715 /* nsb: takes long long arg, returns unsigned char. */
9716 static const struct builtin_description bdesc[] =
9718 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9719 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9720 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9721 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9722 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9723 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9724 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9725 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9726 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9727 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9728 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9729 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9730 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9731 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9732 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9733 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9734 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9735 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9736 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9737 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9738 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9739 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9740 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9741 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9742 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9743 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9744 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9745 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9746 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9747 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9748 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9749 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9750 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9751 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9752 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9753 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9754 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9755 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9756 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9757 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9758 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9759 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9760 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9761 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9762 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9763 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9764 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9765 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9766 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9767 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9768 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9769 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9770 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9771 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9772 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9773 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9774 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9775 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9776 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9777 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9778 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9779 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9780 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9781 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9782 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9783 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9784 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9785 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9786 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9787 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9788 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9789 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9790 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9791 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9792 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9793 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9794 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9795 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9796 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9797 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9798 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9799 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9800 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9801 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9804 static void
9805 sh_media_init_builtins (void)
9807 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9808 const struct builtin_description *d;
9810 memset (shared, 0, sizeof shared);
9811 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9813 tree type, arg_type = 0;
9814 int signature = d->signature;
9815 int i;
9817 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9818 type = shared[signature];
9819 else
9821 int has_result = signature_args[signature][0] != 0;
9823 if ((signature_args[signature][1] & 8)
9824 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9825 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9826 continue;
9827 if (! TARGET_FPU_ANY
9828 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9829 continue;
9830 type = void_list_node;
9831 for (i = 3; ; i--)
9833 int arg = signature_args[signature][i];
9834 int opno = i - 1 + has_result;
9836 if (arg & 8)
9837 arg_type = ptr_type_node;
9838 else if (arg)
9839 arg_type = (*lang_hooks.types.type_for_mode)
9840 (insn_data[d->icode].operand[opno].mode,
9841 (arg & 1));
9842 else if (i)
9843 continue;
9844 else
9845 arg_type = void_type_node;
9846 if (i == 0)
9847 break;
9848 type = tree_cons (NULL_TREE, arg_type, type);
9850 type = build_function_type (arg_type, type);
9851 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9852 shared[signature] = type;
9854 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9855 NULL, NULL_TREE);
9859 /* Implements target hook vector_mode_supported_p. */
9860 bool
9861 sh_vector_mode_supported_p (enum machine_mode mode)
9863 if (TARGET_FPU_ANY
9864 && ((mode == V2SFmode)
9865 || (mode == V4SFmode)
9866 || (mode == V16SFmode)))
9867 return true;
9869 else if (TARGET_SHMEDIA
9870 && ((mode == V8QImode)
9871 || (mode == V2HImode)
9872 || (mode == V4HImode)
9873 || (mode == V2SImode)))
9874 return true;
9876 return false;
9879 /* Implements target hook dwarf_calling_convention. Return an enum
9880 of dwarf_calling_convention. */
9882 sh_dwarf_calling_convention (tree func)
9884 if (sh_attr_renesas_p (func))
9885 return DW_CC_GNU_renesas_sh;
9887 return DW_CC_normal;
9890 static void
9891 sh_init_builtins (void)
9893 if (TARGET_SHMEDIA)
9894 sh_media_init_builtins ();
9897 /* Expand an expression EXP that calls a built-in function,
9898 with result going to TARGET if that's convenient
9899 (and in mode MODE if that's convenient).
9900 SUBTARGET may be used as the target for computing one of EXP's operands.
9901 IGNORE is nonzero if the value is to be ignored. */
9903 static rtx
9904 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9905 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9907 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9908 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9909 const struct builtin_description *d = &bdesc[fcode];
9910 enum insn_code icode = d->icode;
9911 int signature = d->signature;
9912 enum machine_mode tmode = VOIDmode;
9913 int nop = 0, i;
9914 rtx op[4];
9915 rtx pat = 0;
9917 if (signature_args[signature][0])
9919 if (ignore)
9920 return 0;
9922 tmode = insn_data[icode].operand[0].mode;
9923 if (! target
9924 || GET_MODE (target) != tmode
9925 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9926 target = gen_reg_rtx (tmode);
9927 op[nop++] = target;
9929 else
9930 target = 0;
9932 for (i = 1; i <= 3; i++, nop++)
9934 tree arg;
9935 enum machine_mode opmode, argmode;
9936 tree optype;
9938 if (! signature_args[signature][i])
9939 break;
9940 arg = CALL_EXPR_ARG (exp, i - 1);
9941 if (arg == error_mark_node)
9942 return const0_rtx;
9943 if (signature_args[signature][i] & 8)
9945 opmode = ptr_mode;
9946 optype = ptr_type_node;
9948 else
9950 opmode = insn_data[icode].operand[nop].mode;
9951 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9953 argmode = TYPE_MODE (TREE_TYPE (arg));
9954 if (argmode != opmode)
9955 arg = build1 (NOP_EXPR, optype, arg);
9956 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9957 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9958 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9961 switch (nop)
9963 case 1:
9964 pat = (*insn_data[d->icode].genfun) (op[0]);
9965 break;
9966 case 2:
9967 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9968 break;
9969 case 3:
9970 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9971 break;
9972 case 4:
9973 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9974 break;
9975 default:
9976 gcc_unreachable ();
9978 if (! pat)
9979 return 0;
9980 emit_insn (pat);
9981 return target;
9984 void
9985 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9987 rtx sel0 = const0_rtx;
9988 rtx sel1 = const1_rtx;
9989 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9990 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9992 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9993 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9996 void
9997 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9999 rtx sel0 = const0_rtx;
10000 rtx sel1 = const1_rtx;
10001 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
10002 = gen_binary_sf_op;
10003 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10005 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
10006 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
10009 /* Return the class of registers for which a mode change from FROM to TO
10010 is invalid. */
10011 bool
10012 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10013 enum reg_class class)
10015 /* We want to enable the use of SUBREGs as a means to
10016 VEC_SELECT a single element of a vector. */
10017 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10018 return (reg_classes_intersect_p (GENERAL_REGS, class));
10020 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10022 if (TARGET_LITTLE_ENDIAN)
10024 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10025 return reg_classes_intersect_p (DF_REGS, class);
10027 else
10029 if (GET_MODE_SIZE (from) < 8)
10030 return reg_classes_intersect_p (DF_HI_REGS, class);
10033 return 0;
10037 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10038 that label is used. */
10040 void
10041 sh_mark_label (rtx address, int nuses)
10043 if (GOTOFF_P (address))
10045 /* Extract the label or symbol. */
10046 address = XEXP (address, 0);
10047 if (GET_CODE (address) == PLUS)
10048 address = XEXP (address, 0);
10049 address = XVECEXP (address, 0, 0);
10051 if (GET_CODE (address) == LABEL_REF
10052 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10053 LABEL_NUSES (XEXP (address, 0)) += nuses;
10056 /* Compute extra cost of moving data between one register class
10057 and another. */
10059 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10060 uses this information. Hence, the general register <-> floating point
10061 register information here is not used for SFmode. */
10064 sh_register_move_cost (enum machine_mode mode,
10065 enum reg_class srcclass, enum reg_class dstclass)
10067 if (dstclass == T_REGS || dstclass == PR_REGS)
10068 return 10;
10070 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10071 return 4;
10073 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10074 && REGCLASS_HAS_FP_REG (srcclass)
10075 && REGCLASS_HAS_FP_REG (dstclass))
10076 return 4;
10078 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10079 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10081 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10082 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10083 return 9;
10085 if ((REGCLASS_HAS_FP_REG (dstclass)
10086 && REGCLASS_HAS_GENERAL_REG (srcclass))
10087 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10088 && REGCLASS_HAS_FP_REG (srcclass)))
10089 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10090 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10092 if ((dstclass == FPUL_REGS
10093 && REGCLASS_HAS_GENERAL_REG (srcclass))
10094 || (srcclass == FPUL_REGS
10095 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10096 return 5;
10098 if ((dstclass == FPUL_REGS
10099 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10100 || (srcclass == FPUL_REGS
10101 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10102 return 7;
10104 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10105 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10106 return 20;
10108 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10109 if (TARGET_SHMEDIA
10110 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10112 if (sh_gettrcost >= 0)
10113 return sh_gettrcost;
10114 else if (!TARGET_PT_FIXED)
10115 return 100;
10118 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10119 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10120 return 4;
10122 if (TARGET_SHMEDIA
10123 || (TARGET_FMOVD
10124 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10125 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10126 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10128 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10131 static rtx emit_load_ptr (rtx, rtx);
10133 static rtx
10134 emit_load_ptr (rtx reg, rtx addr)
10136 rtx mem = gen_const_mem (ptr_mode, addr);
10138 if (Pmode != ptr_mode)
10139 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10140 return emit_move_insn (reg, mem);
10143 static void
10144 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10145 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10146 tree function)
10148 CUMULATIVE_ARGS cum;
10149 int structure_value_byref = 0;
10150 rtx this, this_value, sibcall, insns, funexp;
10151 tree funtype = TREE_TYPE (function);
10152 int simple_add = CONST_OK_FOR_ADD (delta);
10153 int did_load = 0;
10154 rtx scratch0, scratch1, scratch2;
10155 unsigned i;
10157 reload_completed = 1;
10158 epilogue_completed = 1;
10159 no_new_pseudos = 1;
10160 current_function_uses_only_leaf_regs = 1;
10161 reset_block_changes ();
10163 emit_note (NOTE_INSN_PROLOGUE_END);
10165 /* Find the "this" pointer. We have such a wide range of ABIs for the
10166 SH that it's best to do this completely machine independently.
10167 "this" is passed as first argument, unless a structure return pointer
10168 comes first, in which case "this" comes second. */
10169 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10170 #ifndef PCC_STATIC_STRUCT_RETURN
10171 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10172 structure_value_byref = 1;
10173 #endif /* not PCC_STATIC_STRUCT_RETURN */
10174 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10176 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10178 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10180 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10182 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10183 static chain pointer (even if you can't have nested virtual functions
10184 right now, someone might implement them sometime), and the rest of the
10185 registers are used for argument passing, are callee-saved, or reserved. */
10186 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10187 -ffixed-reg has been used. */
10188 if (! call_used_regs[0] || fixed_regs[0])
10189 error ("r0 needs to be available as a call-clobbered register");
10190 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10191 if (! TARGET_SH5)
10193 if (call_used_regs[1] && ! fixed_regs[1])
10194 scratch1 = gen_rtx_REG (ptr_mode, 1);
10195 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10196 pointing where to return struct values. */
10197 if (call_used_regs[3] && ! fixed_regs[3])
10198 scratch2 = gen_rtx_REG (Pmode, 3);
10200 else if (TARGET_SHMEDIA)
10202 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10203 if (i != REGNO (scratch0) &&
10204 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10206 scratch1 = gen_rtx_REG (ptr_mode, i);
10207 break;
10209 if (scratch1 == scratch0)
10210 error ("Need a second call-clobbered general purpose register");
10211 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10212 if (call_used_regs[i] && ! fixed_regs[i])
10214 scratch2 = gen_rtx_REG (Pmode, i);
10215 break;
10217 if (scratch2 == scratch0)
10218 error ("Need a call-clobbered target register");
10221 this_value = plus_constant (this, delta);
10222 if (vcall_offset
10223 && (simple_add || scratch0 != scratch1)
10224 && strict_memory_address_p (ptr_mode, this_value))
10226 emit_load_ptr (scratch0, this_value);
10227 did_load = 1;
10230 if (!delta)
10231 ; /* Do nothing. */
10232 else if (simple_add)
10233 emit_move_insn (this, this_value);
10234 else
10236 emit_move_insn (scratch1, GEN_INT (delta));
10237 emit_insn (gen_add2_insn (this, scratch1));
10240 if (vcall_offset)
10242 rtx offset_addr;
10244 if (!did_load)
10245 emit_load_ptr (scratch0, this);
10247 offset_addr = plus_constant (scratch0, vcall_offset);
10248 if (strict_memory_address_p (ptr_mode, offset_addr))
10249 ; /* Do nothing. */
10250 else if (! TARGET_SH5 && scratch0 != scratch1)
10252 /* scratch0 != scratch1, and we have indexed loads. Get better
10253 schedule by loading the offset into r1 and using an indexed
10254 load - then the load of r1 can issue before the load from
10255 (this + delta) finishes. */
10256 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10257 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10259 else if (CONST_OK_FOR_ADD (vcall_offset))
10261 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10262 offset_addr = scratch0;
10264 else if (scratch0 != scratch1)
10266 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10267 emit_insn (gen_add2_insn (scratch0, scratch1));
10268 offset_addr = scratch0;
10270 else
10271 gcc_unreachable (); /* FIXME */
10272 emit_load_ptr (scratch0, offset_addr);
10274 if (Pmode != ptr_mode)
10275 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10276 emit_insn (gen_add2_insn (this, scratch0));
10279 /* Generate a tail call to the target function. */
10280 if (! TREE_USED (function))
10282 assemble_external (function);
10283 TREE_USED (function) = 1;
10285 funexp = XEXP (DECL_RTL (function), 0);
10286 /* If the function is overridden, so is the thunk, hence we don't
10287 need GOT addressing even if this is a public symbol. */
10288 #if 0
10289 if (TARGET_SH1 && ! flag_weak)
10290 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10291 else
10292 #endif
10293 if (TARGET_SH2 && flag_pic)
10295 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10296 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10298 else
10300 if (TARGET_SHMEDIA && flag_pic)
10302 funexp = gen_sym2PIC (funexp);
10303 PUT_MODE (funexp, Pmode);
10305 emit_move_insn (scratch2, funexp);
10306 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10307 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10309 sibcall = emit_call_insn (sibcall);
10310 SIBLING_CALL_P (sibcall) = 1;
10311 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10312 emit_barrier ();
10314 /* Run just enough of rest_of_compilation to do scheduling and get
10315 the insns emitted. Note that use_thunk calls
10316 assemble_start_function and assemble_end_function. */
10318 insn_locators_initialize ();
10319 insns = get_insns ();
10321 if (optimize > 0)
10323 /* Initialize the bitmap obstacks. */
10324 bitmap_obstack_initialize (NULL);
10325 bitmap_obstack_initialize (&reg_obstack);
10326 if (! cfun->cfg)
10327 init_flow ();
10328 rtl_register_cfg_hooks ();
10329 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10330 init_rtl_bb_info (EXIT_BLOCK_PTR);
10331 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10332 EXIT_BLOCK_PTR->flags |= BB_RTL;
10333 find_basic_blocks (insns);
10335 if (flag_schedule_insns_after_reload)
10337 life_analysis (PROP_FINAL);
10339 split_all_insns (1);
10341 schedule_insns ();
10343 /* We must split jmp insn in PIC case. */
10344 else if (flag_pic)
10345 split_all_insns_noflow ();
10348 sh_reorg ();
10350 if (optimize > 0 && flag_delayed_branch)
10351 dbr_schedule (insns);
10353 shorten_branches (insns);
10354 final_start_function (insns, file, 1);
10355 final (insns, file, 1);
10356 final_end_function ();
10358 if (optimize > 0)
10360 /* Release all memory allocated by flow. */
10361 free_basic_block_vars ();
10363 /* Release the bitmap obstacks. */
10364 bitmap_obstack_release (&reg_obstack);
10365 bitmap_obstack_release (NULL);
10368 reload_completed = 0;
10369 epilogue_completed = 0;
10370 no_new_pseudos = 0;
10374 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10376 rtx sym;
10378 /* If this is not an ordinary function, the name usually comes from a
10379 string literal or an sprintf buffer. Make sure we use the same
10380 string consistently, so that cse will be able to unify address loads. */
10381 if (kind != FUNCTION_ORDINARY)
10382 name = IDENTIFIER_POINTER (get_identifier (name));
10383 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10384 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10385 if (flag_pic)
10386 switch (kind)
10388 case FUNCTION_ORDINARY:
10389 break;
10390 case SFUNC_GOT:
10392 rtx reg = target ? target : gen_reg_rtx (Pmode);
10394 emit_insn (gen_symGOT2reg (reg, sym));
10395 sym = reg;
10396 break;
10398 case SFUNC_STATIC:
10400 /* ??? To allow cse to work, we use GOTOFF relocations.
10401 we could add combiner patterns to transform this into
10402 straight pc-relative calls with sym2PIC / bsrf when
10403 label load and function call are still 1:1 and in the
10404 same basic block during combine. */
10405 rtx reg = target ? target : gen_reg_rtx (Pmode);
10407 emit_insn (gen_symGOTOFF2reg (reg, sym));
10408 sym = reg;
10409 break;
10412 if (target && sym != target)
10414 emit_move_insn (target, sym);
10415 return target;
10417 return sym;
10420 /* Find the number of a general purpose register in S. */
10421 static int
10422 scavenge_reg (HARD_REG_SET *s)
10424 int r;
10425 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10426 if (TEST_HARD_REG_BIT (*s, r))
10427 return r;
10428 return -1;
10432 sh_get_pr_initial_val (void)
10434 rtx val;
10436 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10437 PR register on SHcompact, because it might be clobbered by the prologue.
10438 We check first if that is known to be the case. */
10439 if (TARGET_SHCOMPACT
10440 && ((current_function_args_info.call_cookie
10441 & ~ CALL_COOKIE_RET_TRAMP (1))
10442 || current_function_has_nonlocal_label))
10443 return gen_frame_mem (SImode, return_address_pointer_rtx);
10445 /* If we haven't finished rtl generation, there might be a nonlocal label
10446 that we haven't seen yet.
10447 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10448 is set, unless it has been called before for the same register. And even
10449 then, we end in trouble if we didn't use the register in the same
10450 basic block before. So call get_hard_reg_initial_val now and wrap it
10451 in an unspec if we might need to replace it. */
10452 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10453 combine can put the pseudo returned by get_hard_reg_initial_val into
10454 instructions that need a general purpose registers, which will fail to
10455 be recognized when the pseudo becomes allocated to PR. */
10457 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10458 if (TARGET_SH1)
10459 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10460 return val;
10464 sh_expand_t_scc (enum rtx_code code, rtx target)
10466 rtx result = target;
10467 HOST_WIDE_INT val;
10469 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10470 || GET_CODE (sh_compare_op1) != CONST_INT)
10471 return 0;
10472 if (GET_CODE (result) != REG)
10473 result = gen_reg_rtx (SImode);
10474 val = INTVAL (sh_compare_op1);
10475 if ((code == EQ && val == 1) || (code == NE && val == 0))
10476 emit_insn (gen_movt (result));
10477 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10479 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10480 emit_insn (gen_subc (result, result, result));
10481 emit_insn (gen_addsi3 (result, result, const1_rtx));
10483 else if (code == EQ || code == NE)
10484 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10485 else
10486 return 0;
10487 if (result != target)
10488 emit_move_insn (target, result);
10489 return 1;
10492 /* INSN is an sfunc; return the rtx that describes the address used. */
10493 static rtx
10494 extract_sfunc_addr (rtx insn)
10496 rtx pattern, part = NULL_RTX;
10497 int len, i;
10499 pattern = PATTERN (insn);
10500 len = XVECLEN (pattern, 0);
10501 for (i = 0; i < len; i++)
10503 part = XVECEXP (pattern, 0, i);
10504 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10505 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10506 return XEXP (part, 0);
10508 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10509 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10512 /* Verify that the register in use_sfunc_addr still agrees with the address
10513 used in the sfunc. This prevents fill_slots_from_thread from changing
10514 use_sfunc_addr.
10515 INSN is the use_sfunc_addr instruction, and REG is the register it
10516 guards. */
10518 check_use_sfunc_addr (rtx insn, rtx reg)
10520 /* Search for the sfunc. It should really come right after INSN. */
10521 while ((insn = NEXT_INSN (insn)))
10523 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10524 break;
10525 if (! INSN_P (insn))
10526 continue;
10528 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10529 insn = XVECEXP (PATTERN (insn), 0, 0);
10530 if (GET_CODE (PATTERN (insn)) != PARALLEL
10531 || get_attr_type (insn) != TYPE_SFUNC)
10532 continue;
10533 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10535 gcc_unreachable ();
10538 /* This function returns a constant rtx that represents pi / 2**15 in
10539 SFmode. it's used to scale SFmode angles, in radians, to a
10540 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10541 maps to 0x10000). */
10543 static GTY(()) rtx sh_fsca_sf2int_rtx;
10546 sh_fsca_sf2int (void)
10548 if (! sh_fsca_sf2int_rtx)
10550 REAL_VALUE_TYPE rv;
10552 real_from_string (&rv, "10430.378350470453");
10553 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10556 return sh_fsca_sf2int_rtx;
10559 /* This function returns a constant rtx that represents pi / 2**15 in
10560 DFmode. it's used to scale DFmode angles, in radians, to a
10561 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10562 maps to 0x10000). */
10564 static GTY(()) rtx sh_fsca_df2int_rtx;
10567 sh_fsca_df2int (void)
10569 if (! sh_fsca_df2int_rtx)
10571 REAL_VALUE_TYPE rv;
10573 real_from_string (&rv, "10430.378350470453");
10574 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10577 return sh_fsca_df2int_rtx;
10580 /* This function returns a constant rtx that represents 2**15 / pi in
10581 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10582 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10583 2*pi). */
10585 static GTY(()) rtx sh_fsca_int2sf_rtx;
10588 sh_fsca_int2sf (void)
10590 if (! sh_fsca_int2sf_rtx)
10592 REAL_VALUE_TYPE rv;
10594 real_from_string (&rv, "9.587379924285257e-5");
10595 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10598 return sh_fsca_int2sf_rtx;
10601 /* Initialize the CUMULATIVE_ARGS structure. */
10603 void
10604 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10605 tree fntype,
10606 rtx libname ATTRIBUTE_UNUSED,
10607 tree fndecl,
10608 signed int n_named_args,
10609 enum machine_mode mode)
10611 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10612 pcum->free_single_fp_reg = 0;
10613 pcum->stack_regs = 0;
10614 pcum->byref_regs = 0;
10615 pcum->byref = 0;
10616 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10618 /* XXX - Should we check TARGET_HITACHI here ??? */
10619 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10621 if (fntype)
10623 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10624 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10625 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10626 pcum->arg_count [(int) SH_ARG_INT]
10627 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10629 pcum->call_cookie
10630 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10631 && pcum->arg_count [(int) SH_ARG_INT] == 0
10632 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10633 ? int_size_in_bytes (TREE_TYPE (fntype))
10634 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10635 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10636 == FIRST_RET_REG));
10638 else
10640 pcum->arg_count [(int) SH_ARG_INT] = 0;
10641 pcum->prototype_p = FALSE;
10642 if (mode != VOIDmode)
10644 pcum->call_cookie =
10645 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10646 && GET_MODE_SIZE (mode) > 4
10647 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10649 /* If the default ABI is the Renesas ABI then all library
10650 calls must assume that the library will be using the
10651 Renesas ABI. So if the function would return its result
10652 in memory then we must force the address of this memory
10653 block onto the stack. Ideally we would like to call
10654 targetm.calls.return_in_memory() here but we do not have
10655 the TYPE or the FNDECL available so we synthesize the
10656 contents of that function as best we can. */
10657 pcum->force_mem =
10658 (TARGET_DEFAULT & MASK_HITACHI)
10659 && (mode == BLKmode
10660 || (GET_MODE_SIZE (mode) > 4
10661 && !(mode == DFmode
10662 && TARGET_FPU_DOUBLE)));
10664 else
10666 pcum->call_cookie = 0;
10667 pcum->force_mem = FALSE;
10672 /* Determine if two hard register sets intersect.
10673 Return 1 if they do. */
10675 static int
10676 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10678 HARD_REG_SET c;
10679 COPY_HARD_REG_SET (c, *a);
10680 AND_HARD_REG_SET (c, *b);
10681 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10682 return 1;
10683 lose:
10684 return 0;
10687 #ifdef TARGET_ADJUST_UNROLL_MAX
10688 static int
10689 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10690 int max_unrolled_insns, int strength_reduce_p,
10691 int unroll_type)
10693 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10694 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10696 /* Throttle back loop unrolling so that the costs of using more
10697 targets than the eight target register we have don't outweigh
10698 the benefits of unrolling. */
10699 rtx insn;
10700 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10701 int n_barriers = 0;
10702 rtx dest;
10703 int i;
10704 rtx exit_dest[8];
10705 int threshold;
10706 int unroll_benefit = 0, mem_latency = 0;
10707 int base_cost, best_cost, cost;
10708 int factor, best_factor;
10709 int n_dest;
10710 unsigned max_iterations = 32767;
10711 int n_iterations;
10712 int need_precond = 0, precond = 0;
10713 basic_block * bbs = get_loop_body (loop);
10714 struct niter_desc *desc;
10716 /* Assume that all labels inside the loop are used from inside the
10717 loop. If the loop has multiple entry points, it is unlikely to
10718 be unrolled anyways.
10719 Also assume that all calls are to different functions. That is
10720 somewhat pessimistic, but if you have lots of calls, unrolling the
10721 loop is not likely to gain you much in the first place. */
10722 i = loop->num_nodes - 1;
10723 for (insn = BB_HEAD (bbs[i]); ; )
10725 if (GET_CODE (insn) == CODE_LABEL)
10726 n_labels++;
10727 else if (GET_CODE (insn) == CALL_INSN)
10728 n_calls++;
10729 else if (GET_CODE (insn) == NOTE
10730 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10731 n_inner_loops++;
10732 else if (GET_CODE (insn) == BARRIER)
10733 n_barriers++;
10734 if (insn != BB_END (bbs[i]))
10735 insn = NEXT_INSN (insn);
10736 else if (--i >= 0)
10737 insn = BB_HEAD (bbs[i]);
10738 else
10739 break;
10741 free (bbs);
10742 /* One label for the loop top is normal, and it won't be duplicated by
10743 unrolling. */
10744 if (n_labels <= 1)
10745 return max_unrolled_insns;
10746 if (n_inner_loops > 0)
10747 return 0;
10748 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10749 dest = LABEL_NEXTREF (dest))
10751 for (i = n_exit_dest - 1;
10752 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10753 if (i < 0)
10754 exit_dest[n_exit_dest++] = dest;
10756 /* If the loop top and call and exit destinations are enough to fill up
10757 the target registers, we're unlikely to do any more damage by
10758 unrolling. */
10759 if (n_calls + n_exit_dest >= 7)
10760 return max_unrolled_insns;
10762 /* ??? In the new loop unroller, there is no longer any strength
10763 reduction information available. Thus, when it comes to unrolling,
10764 we know the cost of everything, but we know the value of nothing. */
10765 #if 0
10766 if (strength_reduce_p
10767 && (unroll_type == LPT_UNROLL_RUNTIME
10768 || unroll_type == LPT_UNROLL_CONSTANT
10769 || unroll_type == LPT_PEEL_COMPLETELY))
10771 struct loop_ivs *ivs = LOOP_IVS (loop);
10772 struct iv_class *bl;
10774 /* We'll save one compare-and-branch in each loop body copy
10775 but the last one. */
10776 unroll_benefit = 1;
10777 /* Assess the benefit of removing biv & giv updates. */
10778 for (bl = ivs->list; bl; bl = bl->next)
10780 rtx increment = biv_total_increment (bl);
10781 struct induction *v;
10783 if (increment && GET_CODE (increment) == CONST_INT)
10785 unroll_benefit++;
10786 for (v = bl->giv; v; v = v->next_iv)
10788 if (! v->ignore && v->same == 0
10789 && GET_CODE (v->mult_val) == CONST_INT)
10790 unroll_benefit++;
10791 /* If this giv uses an array, try to determine
10792 a maximum iteration count from the size of the
10793 array. This need not be correct all the time,
10794 but should not be too far off the mark too often. */
10795 while (v->giv_type == DEST_ADDR)
10797 rtx mem = PATTERN (v->insn);
10798 tree mem_expr, type, size_tree;
10800 if (GET_CODE (SET_SRC (mem)) == MEM)
10801 mem = SET_SRC (mem);
10802 else if (GET_CODE (SET_DEST (mem)) == MEM)
10803 mem = SET_DEST (mem);
10804 else
10805 break;
10806 mem_expr = MEM_EXPR (mem);
10807 if (! mem_expr)
10808 break;
10809 type = TREE_TYPE (mem_expr);
10810 if (TREE_CODE (type) != ARRAY_TYPE
10811 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10812 break;
10813 size_tree = fold_build2 (TRUNC_DIV_EXPR,
10814 bitsizetype,
10815 TYPE_SIZE (type),
10816 TYPE_SIZE_UNIT (type));
10817 if (TREE_CODE (size_tree) == INTEGER_CST
10818 && ! TREE_INT_CST_HIGH (size_tree)
10819 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10820 max_iterations = TREE_INT_CST_LOW (size_tree);
10821 break;
10827 #else /* 0 */
10828 /* Assume there is at least some benefit. */
10829 unroll_benefit = 1;
10830 #endif /* 0 */
10832 desc = get_simple_loop_desc (loop);
10833 n_iterations = desc->const_iter ? desc->niter : 0;
10834 max_iterations
10835 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10837 if (! strength_reduce_p || ! n_iterations)
10838 need_precond = 1;
10839 if (! n_iterations)
10841 n_iterations
10842 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10843 if (! n_iterations)
10844 return 0;
10846 #if 0 /* ??? See above - missing induction variable information. */
10847 while (unroll_benefit > 1) /* no loop */
10849 /* We include the benefit of biv/ giv updates. Check if some or
10850 all of these updates are likely to fit into a scheduling
10851 bubble of a load.
10852 We check for the following case:
10853 - All the insns leading to the first JUMP_INSN are in a strict
10854 dependency chain.
10855 - there is at least one memory reference in them.
10857 When we find such a pattern, we assume that we can hide as many
10858 updates as the total of the load latency is, if we have an
10859 unroll factor of at least two. We might or might not also do
10860 this without unrolling, so rather than considering this as an
10861 extra unroll benefit, discount it in the unroll benefits of unroll
10862 factors higher than two. */
10864 rtx set, last_set;
10866 insn = next_active_insn (loop->start);
10867 last_set = single_set (insn);
10868 if (! last_set)
10869 break;
10870 if (GET_CODE (SET_SRC (last_set)) == MEM)
10871 mem_latency += 2;
10872 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10874 if (! INSN_P (insn))
10875 continue;
10876 if (GET_CODE (insn) == JUMP_INSN)
10877 break;
10878 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10880 /* Check if this is a to-be-reduced giv insn. */
10881 struct loop_ivs *ivs = LOOP_IVS (loop);
10882 struct iv_class *bl;
10883 struct induction *v;
10884 for (bl = ivs->list; bl; bl = bl->next)
10886 if (bl->biv->insn == insn)
10887 goto is_biv;
10888 for (v = bl->giv; v; v = v->next_iv)
10889 if (v->insn == insn)
10890 goto is_giv;
10892 mem_latency--;
10893 is_biv:
10894 is_giv:
10895 continue;
10897 set = single_set (insn);
10898 if (! set)
10899 continue;
10900 if (GET_CODE (SET_SRC (set)) == MEM)
10901 mem_latency += 2;
10902 last_set = set;
10904 if (mem_latency < 0)
10905 mem_latency = 0;
10906 else if (mem_latency > unroll_benefit - 1)
10907 mem_latency = unroll_benefit - 1;
10908 break;
10910 #endif /* 0 */
10911 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10912 <= unroll_benefit)
10913 return max_unrolled_insns;
10915 n_dest = n_labels + n_calls + n_exit_dest;
10916 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10917 best_cost = 0;
10918 best_factor = 1;
10919 if (n_barriers * 2 > n_labels - 1)
10920 n_barriers = (n_labels - 1) / 2;
10921 for (factor = 2; factor <= 8; factor++)
10923 /* Bump up preconditioning cost for each power of two. */
10924 if (! (factor & (factor-1)))
10925 precond += 4;
10926 /* When preconditioning, only powers of two will be considered. */
10927 else if (need_precond)
10928 continue;
10929 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10930 + (n_labels - 1) * factor + n_calls + n_exit_dest
10931 - (n_barriers * factor >> 1)
10932 + need_precond);
10933 cost
10934 = ((n_dest <= 8 ? 0 : n_dest - 7)
10935 - base_cost * factor
10936 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10937 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10938 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10939 / n_iterations));
10940 if (need_precond)
10941 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10942 if (cost < best_cost)
10944 best_cost = cost;
10945 best_factor = factor;
10948 threshold = best_factor * insn_count;
10949 if (max_unrolled_insns > threshold)
10950 max_unrolled_insns = threshold;
10952 return max_unrolled_insns;
10954 #endif /* TARGET_ADJUST_UNROLL_MAX */
10956 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10957 not enter into CONST_DOUBLE for the replace.
10959 Note that copying is not done so X must not be shared unless all copies
10960 are to be modified.
10962 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10963 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10964 replacements[n*2+1] - and that we take mode changes into account.
10966 If a replacement is ambiguous, return NULL_RTX.
10968 If MODIFY is zero, don't modify any rtl in place,
10969 just return zero or nonzero for failure / success. */
10972 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10974 int i, j;
10975 const char *fmt;
10977 /* The following prevents loops occurrence when we change MEM in
10978 CONST_DOUBLE onto the same CONST_DOUBLE. */
10979 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10980 return x;
10982 for (i = n_replacements - 1; i >= 0 ; i--)
10983 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10984 return replacements[i*2+1];
10986 /* Allow this function to make replacements in EXPR_LISTs. */
10987 if (x == 0)
10988 return 0;
10990 if (GET_CODE (x) == SUBREG)
10992 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10993 n_replacements, modify);
10995 if (GET_CODE (new) == CONST_INT)
10997 x = simplify_subreg (GET_MODE (x), new,
10998 GET_MODE (SUBREG_REG (x)),
10999 SUBREG_BYTE (x));
11000 if (! x)
11001 abort ();
11003 else if (modify)
11004 SUBREG_REG (x) = new;
11006 return x;
11008 else if (GET_CODE (x) == REG)
11010 unsigned regno = REGNO (x);
11011 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11012 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11013 rtx result = NULL_RTX;
11015 for (i = n_replacements - 1; i >= 0; i--)
11017 rtx from = replacements[i*2];
11018 rtx to = replacements[i*2+1];
11019 unsigned from_regno, from_nregs, to_regno, new_regno;
11021 if (GET_CODE (from) != REG)
11022 continue;
11023 from_regno = REGNO (from);
11024 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11025 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11026 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11028 if (regno < from_regno
11029 || regno + nregs > from_regno + nregs
11030 || GET_CODE (to) != REG
11031 || result)
11032 return NULL_RTX;
11033 to_regno = REGNO (to);
11034 if (to_regno < FIRST_PSEUDO_REGISTER)
11036 new_regno = regno + to_regno - from_regno;
11037 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11038 != nregs)
11039 return NULL_RTX;
11040 result = gen_rtx_REG (GET_MODE (x), new_regno);
11042 else if (GET_MODE (x) <= GET_MODE (to))
11043 result = gen_lowpart_common (GET_MODE (x), to);
11044 else
11045 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11048 return result ? result : x;
11050 else if (GET_CODE (x) == ZERO_EXTEND)
11052 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
11053 n_replacements, modify);
11055 if (GET_CODE (new) == CONST_INT)
11057 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11058 new, GET_MODE (XEXP (x, 0)));
11059 if (! x)
11060 abort ();
11062 else if (modify)
11063 XEXP (x, 0) = new;
11065 return x;
11068 fmt = GET_RTX_FORMAT (GET_CODE (x));
11069 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11071 rtx new;
11073 if (fmt[i] == 'e')
11075 new = replace_n_hard_rtx (XEXP (x, i), replacements,
11076 n_replacements, modify);
11077 if (!new)
11078 return NULL_RTX;
11079 if (modify)
11080 XEXP (x, i) = new;
11082 else if (fmt[i] == 'E')
11083 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11085 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11086 n_replacements, modify);
11087 if (!new)
11088 return NULL_RTX;
11089 if (modify)
11090 XVECEXP (x, i, j) = new;
11094 return x;
11098 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11100 enum rtx_code code = TRUNCATE;
11102 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11104 rtx inner = XEXP (x, 0);
11105 enum machine_mode inner_mode = GET_MODE (inner);
11107 if (inner_mode == mode)
11108 return inner;
11109 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11110 x = inner;
11111 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11112 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11114 code = GET_CODE (x);
11115 x = inner;
11118 return gen_rtx_fmt_e (code, mode, x);
11121 /* called via for_each_rtx after reload, to clean up truncates of
11122 registers that span multiple actual hard registers. */
11124 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11126 rtx x = *p, reg;
11128 if (GET_CODE (x) != TRUNCATE)
11129 return 0;
11130 reg = XEXP (x, 0);
11131 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11133 enum machine_mode reg_mode = GET_MODE (reg);
11134 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11135 subreg_lowpart_offset (DImode, reg_mode));
11136 *(int*) n_changes += 1;
11137 return -1;
11139 return 0;
11142 /* Load and store depend on the highpart of the address. However,
11143 set_attr_alternative does not give well-defined results before reload,
11144 so we must look at the rtl ourselves to see if any of the feeding
11145 registers is used in a memref. */
11147 /* Called by sh_contains_memref_p via for_each_rtx. */
11148 static int
11149 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11151 return (GET_CODE (*loc) == MEM);
11154 /* Return nonzero iff INSN contains a MEM. */
11156 sh_contains_memref_p (rtx insn)
11158 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11161 /* FNADDR is the MEM expression from a call expander. Return an address
11162 to use in an SHmedia insn pattern. */
11164 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11166 int is_sym;
11168 fnaddr = XEXP (fnaddr, 0);
11169 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11170 if (flag_pic && is_sym)
11172 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11174 rtx reg = gen_reg_rtx (Pmode);
11176 /* We must not use GOTPLT for sibcalls, because PIC_REG
11177 must be restored before the PLT code gets to run. */
11178 if (is_sibcall)
11179 emit_insn (gen_symGOT2reg (reg, fnaddr));
11180 else
11181 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11182 fnaddr = reg;
11184 else
11186 fnaddr = gen_sym2PIC (fnaddr);
11187 PUT_MODE (fnaddr, Pmode);
11190 /* If ptabs might trap, make this visible to the rest of the compiler.
11191 We generally assume that symbols pertain to valid locations, but
11192 it is possible to generate invalid symbols with asm or linker tricks.
11193 In a list of functions where each returns its successor, an invalid
11194 symbol might denote an empty list. */
11195 if (!TARGET_PT_FIXED
11196 && (!is_sym || TARGET_INVALID_SYMBOLS)
11197 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11199 rtx tr = gen_reg_rtx (PDImode);
11201 emit_insn (gen_ptabs (tr, fnaddr));
11202 fnaddr = tr;
11204 else if (! target_reg_operand (fnaddr, Pmode))
11205 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11206 return fnaddr;
11209 enum reg_class
11210 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
11211 enum machine_mode mode, secondary_reload_info *sri)
11213 if (in_p)
11215 if (REGCLASS_HAS_FP_REG (class)
11216 && ! TARGET_SHMEDIA
11217 && immediate_operand ((x), mode)
11218 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11219 && mode == SFmode && fldi_ok ()))
11220 switch (mode)
11222 case SFmode:
11223 sri->icode = CODE_FOR_reload_insf__frn;
11224 return NO_REGS;
11225 case DFmode:
11226 sri->icode = CODE_FOR_reload_indf__frn;
11227 return NO_REGS;
11228 case SImode:
11229 /* ??? If we knew that we are in the appropriate mode -
11230 single precision - we could use a reload pattern directly. */
11231 return FPUL_REGS;
11232 default:
11233 abort ();
11235 if (class == FPUL_REGS
11236 && ((GET_CODE (x) == REG
11237 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11238 || REGNO (x) == T_REG))
11239 || GET_CODE (x) == PLUS))
11240 return GENERAL_REGS;
11241 if (class == FPUL_REGS && immediate_operand (x, mode))
11243 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
11244 return GENERAL_REGS;
11245 sri->icode = CODE_FOR_reload_insi__i_fpul;
11246 return NO_REGS;
11248 if (class == FPSCR_REGS
11249 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11250 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
11251 return GENERAL_REGS;
11252 if (REGCLASS_HAS_FP_REG (class)
11253 && TARGET_SHMEDIA
11254 && immediate_operand (x, mode)
11255 && x != CONST0_RTX (GET_MODE (x))
11256 && GET_MODE (x) != V4SFmode)
11257 return GENERAL_REGS;
11258 if ((mode == QImode || mode == HImode)
11259 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11261 sri->icode = ((mode == QImode)
11262 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11263 return NO_REGS;
11265 if (TARGET_SHMEDIA && class == GENERAL_REGS
11266 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
11267 return TARGET_REGS;
11268 } /* end of input-only processing. */
11270 if (((REGCLASS_HAS_FP_REG (class)
11271 && (GET_CODE (x) == REG
11272 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11273 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11274 && TARGET_FMOVD))))
11275 || (REGCLASS_HAS_GENERAL_REG (class)
11276 && GET_CODE (x) == REG
11277 && FP_REGISTER_P (REGNO (x))))
11278 && ! TARGET_SHMEDIA
11279 && (mode == SFmode || mode == SImode))
11280 return FPUL_REGS;
11281 if ((class == FPUL_REGS
11282 || (REGCLASS_HAS_FP_REG (class)
11283 && ! TARGET_SHMEDIA && mode == SImode))
11284 && (GET_CODE (x) == MEM
11285 || (GET_CODE (x) == REG
11286 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11287 || REGNO (x) == T_REG
11288 || system_reg_operand (x, VOIDmode)))))
11290 if (class == FPUL_REGS)
11291 return GENERAL_REGS;
11292 return FPUL_REGS;
11294 if ((class == TARGET_REGS
11295 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
11296 && !EXTRA_CONSTRAINT_Csy (x)
11297 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
11298 return GENERAL_REGS;
11299 if ((class == MAC_REGS || class == PR_REGS)
11300 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
11301 && class != REGNO_REG_CLASS (REGNO (x)))
11302 return GENERAL_REGS;
11303 if (class != GENERAL_REGS && GET_CODE (x) == REG
11304 && TARGET_REGISTER_P (REGNO (x)))
11305 return GENERAL_REGS;
11306 return NO_REGS;
11309 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11311 #include "gt-sh.h"