rtl.h (emit_clobber, [...]): Declare.
[official-gcc.git] / gcc / config / sh / sh.c
blob71419ec61deb31dd01afadd6cecb9ab58711b1b2
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "insn-config.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "flags.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "c-pragma.h"
41 #include "integrate.h"
42 #include "dwarf2.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
73 they may be. */
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Saved operands from the last compare to use when we generate an scc
110 or bcc insn. */
112 rtx sh_compare_op0;
113 rtx sh_compare_op1;
115 /* Provides the class number of the smallest class containing
116 reg number. */
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
168 int assembler_dialect;
170 static bool shmedia_space_reserved_for_target_registers;
172 static bool sh_handle_option (size_t, const char *, int);
173 static void split_branches (rtx);
174 static int branch_dest (rtx);
175 static void force_into (rtx, rtx);
176 static void print_slot (rtx);
177 static rtx add_constant (rtx, enum machine_mode, rtx);
178 static void dump_table (rtx, rtx);
179 static int hi_const (rtx);
180 static int broken_move (rtx);
181 static int mova_p (rtx);
182 static rtx find_barrier (int, rtx, rtx);
183 static int noncall_uses_reg (rtx, rtx, rtx *);
184 static rtx gen_block_redirect (rtx, int, int);
185 static void sh_reorg (void);
186 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
187 static rtx frame_insn (rtx);
188 static rtx push (int);
189 static void pop (int);
190 static void push_regs (HARD_REG_SET *, int);
191 static int calc_live_regs (HARD_REG_SET *);
192 static HOST_WIDE_INT rounded_frame_size (int);
193 static rtx mark_constant_pool_use (rtx);
194 const struct attribute_spec sh_attribute_table[];
195 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
196 static tree sh_handle_resbank_handler_attribute (tree *, tree,
197 tree, int, bool *);
198 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
199 tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static int find_r0_life_regions (basic_block);
213 static void sh_md_init_global (FILE *, int, int);
214 static void sh_md_finish_global (FILE *, int);
215 static int rank_for_reorder (const void *, const void *);
216 static void swap_reorder (rtx *, int);
217 static void ready_reorder (rtx *, int);
218 static short high_pressure (enum machine_mode);
219 static int sh_reorder (FILE *, int, rtx *, int *, int);
220 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
221 static void sh_md_init (FILE *, int, int);
222 static int sh_variable_issue (FILE *, int, rtx, int);
224 static bool sh_function_ok_for_sibcall (tree, tree);
226 static bool sh_cannot_modify_jumps_p (void);
227 static int sh_target_reg_class (void);
228 static bool sh_optimize_target_register_callee_saved (bool);
229 static bool sh_ms_bitfield_layout_p (const_tree);
231 static void sh_init_builtins (void);
232 static void sh_media_init_builtins (void);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *);
245 static int sh_address_cost (rtx);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
249 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
250 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
251 static int scavenge_reg (HARD_REG_SET *s);
252 struct save_schedule_s;
253 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
254 struct save_schedule_s *, int);
256 static rtx sh_struct_value_rtx (tree, int);
257 static bool sh_return_in_memory (const_tree, const_tree);
258 static rtx sh_builtin_saveregs (void);
259 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
260 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
261 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
262 static tree sh_build_builtin_va_list (void);
263 static void sh_va_start (tree, rtx);
264 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
266 const_tree, bool);
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
268 const_tree, bool);
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
270 tree, bool);
271 static bool sh_scalar_mode_supported_p (enum machine_mode);
272 static int sh_dwarf_calling_convention (const_tree);
273 static void sh_encode_section_info (tree, rtx, int);
274 static int sh2a_function_vector_p (tree);
277 /* Initialize the GCC target structure. */
278 #undef TARGET_ATTRIBUTE_TABLE
279 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
281 /* The next two are used for debug info when compiling with -gdwarf. */
282 #undef TARGET_ASM_UNALIGNED_HI_OP
283 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
284 #undef TARGET_ASM_UNALIGNED_SI_OP
285 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
287 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
288 #undef TARGET_ASM_UNALIGNED_DI_OP
289 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
290 #undef TARGET_ASM_ALIGNED_DI_OP
291 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
293 #undef TARGET_ASM_FUNCTION_EPILOGUE
294 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
296 #undef TARGET_ASM_OUTPUT_MI_THUNK
297 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
302 #undef TARGET_ASM_FILE_START
303 #define TARGET_ASM_FILE_START sh_file_start
304 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
305 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
307 #undef TARGET_DEFAULT_TARGET_FLAGS
308 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
309 #undef TARGET_HANDLE_OPTION
310 #define TARGET_HANDLE_OPTION sh_handle_option
312 #undef TARGET_INSERT_ATTRIBUTES
313 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
318 #undef TARGET_SCHED_ISSUE_RATE
319 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
321 /* The next 5 hooks have been implemented for reenabling sched1. With the
322 help of these macros we are limiting the movement of insns in sched1 to
323 reduce the register pressure. The overall idea is to keep count of SImode
324 and SFmode regs required by already scheduled insns. When these counts
325 cross some threshold values; give priority to insns that free registers.
326 The insn that frees registers is most likely to be the insn with lowest
327 LUID (original insn order); but such an insn might be there in the stalled
328 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
329 upto a max of 8 cycles so that such insns may move from Q -> R.
331 The description of the hooks are as below:
333 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
334 scheduler; it is called inside the sched_init function just after
335 find_insn_reg_weights function call. It is used to calculate the SImode
336 and SFmode weights of insns of basic blocks; much similar to what
337 find_insn_reg_weights does.
338 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
340 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
341 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
342 (Q)->(R).
344 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
345 high; reorder the ready queue so that the insn with lowest LUID will be
346 issued next.
348 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
349 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
351 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
352 can be returned from TARGET_SCHED_REORDER2.
354 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
356 #undef TARGET_SCHED_DFA_NEW_CYCLE
357 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
359 #undef TARGET_SCHED_INIT_GLOBAL
360 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
362 #undef TARGET_SCHED_FINISH_GLOBAL
363 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
365 #undef TARGET_SCHED_VARIABLE_ISSUE
366 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
368 #undef TARGET_SCHED_REORDER
369 #define TARGET_SCHED_REORDER sh_reorder
371 #undef TARGET_SCHED_REORDER2
372 #define TARGET_SCHED_REORDER2 sh_reorder2
374 #undef TARGET_SCHED_INIT
375 #define TARGET_SCHED_INIT sh_md_init
377 #undef TARGET_CANNOT_MODIFY_JUMPS_P
378 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
379 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
380 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
381 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
382 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
383 sh_optimize_target_register_callee_saved
385 #undef TARGET_MS_BITFIELD_LAYOUT_P
386 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
388 #undef TARGET_INIT_BUILTINS
389 #define TARGET_INIT_BUILTINS sh_init_builtins
390 #undef TARGET_EXPAND_BUILTIN
391 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
393 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
394 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
396 #undef TARGET_CANNOT_COPY_INSN_P
397 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
398 #undef TARGET_RTX_COSTS
399 #define TARGET_RTX_COSTS sh_rtx_costs
400 #undef TARGET_ADDRESS_COST
401 #define TARGET_ADDRESS_COST sh_address_cost
402 #undef TARGET_ALLOCATE_INITIAL_VALUE
403 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
405 #undef TARGET_MACHINE_DEPENDENT_REORG
406 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
408 #ifdef HAVE_AS_TLS
409 #undef TARGET_HAVE_TLS
410 #define TARGET_HAVE_TLS true
411 #endif
413 #undef TARGET_PROMOTE_PROTOTYPES
414 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_ARGS
416 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
417 #undef TARGET_PROMOTE_FUNCTION_RETURN
418 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
420 #undef TARGET_STRUCT_VALUE_RTX
421 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
422 #undef TARGET_RETURN_IN_MEMORY
423 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
425 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
426 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
427 #undef TARGET_SETUP_INCOMING_VARARGS
428 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
429 #undef TARGET_STRICT_ARGUMENT_NAMING
430 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
431 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
432 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
433 #undef TARGET_MUST_PASS_IN_STACK
434 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
435 #undef TARGET_PASS_BY_REFERENCE
436 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
437 #undef TARGET_CALLEE_COPIES
438 #define TARGET_CALLEE_COPIES sh_callee_copies
439 #undef TARGET_ARG_PARTIAL_BYTES
440 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_EXPAND_BUILTIN_VA_START
445 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
446 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
447 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
449 #undef TARGET_SCALAR_MODE_SUPPORTED_P
450 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
451 #undef TARGET_VECTOR_MODE_SUPPORTED_P
452 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
454 #undef TARGET_CHECK_PCH_TARGET_FLAGS
455 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
457 #undef TARGET_DWARF_CALLING_CONVENTION
458 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
460 /* Return regmode weight for insn. */
461 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
463 /* Return current register pressure for regmode. */
464 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
466 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
469 #ifdef SYMBIAN
471 #undef TARGET_ENCODE_SECTION_INFO
472 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
475 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
476 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
478 #endif /* SYMBIAN */
480 #undef TARGET_SECONDARY_RELOAD
481 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
483 /* Machine-specific symbol_ref flags. */
484 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
486 struct gcc_target targetm = TARGET_INITIALIZER;
488 /* Implement TARGET_HANDLE_OPTION. */
490 static bool
491 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
492 int value ATTRIBUTE_UNUSED)
494 switch (code)
496 case OPT_m1:
497 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
498 return true;
500 case OPT_m2:
501 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
502 return true;
504 case OPT_m2a:
505 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
506 return true;
508 case OPT_m2a_nofpu:
509 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
510 return true;
512 case OPT_m2a_single:
513 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
514 return true;
516 case OPT_m2a_single_only:
517 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
518 return true;
520 case OPT_m2e:
521 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
522 return true;
524 case OPT_m3:
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
526 return true;
528 case OPT_m3e:
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
530 return true;
532 case OPT_m4:
533 case OPT_m4_100:
534 case OPT_m4_200:
535 case OPT_m4_300:
536 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
537 return true;
539 case OPT_m4_nofpu:
540 case OPT_m4_100_nofpu:
541 case OPT_m4_200_nofpu:
542 case OPT_m4_300_nofpu:
543 case OPT_m4_340:
544 case OPT_m4_400:
545 case OPT_m4_500:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
547 return true;
549 case OPT_m4_single:
550 case OPT_m4_100_single:
551 case OPT_m4_200_single:
552 case OPT_m4_300_single:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
554 return true;
556 case OPT_m4_single_only:
557 case OPT_m4_100_single_only:
558 case OPT_m4_200_single_only:
559 case OPT_m4_300_single_only:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
561 return true;
563 case OPT_m4a:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
565 return true;
567 case OPT_m4a_nofpu:
568 case OPT_m4al:
569 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
570 return true;
572 case OPT_m4a_single:
573 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
574 return true;
576 case OPT_m4a_single_only:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
578 return true;
580 case OPT_m5_32media:
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
582 return true;
584 case OPT_m5_32media_nofpu:
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
586 return true;
588 case OPT_m5_64media:
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
590 return true;
592 case OPT_m5_64media_nofpu:
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
594 return true;
596 case OPT_m5_compact:
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
598 return true;
600 case OPT_m5_compact_nofpu:
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
602 return true;
604 default:
605 return true;
609 /* Print the operand address in x to the stream. */
611 void
612 print_operand_address (FILE *stream, rtx x)
614 switch (GET_CODE (x))
616 case REG:
617 case SUBREG:
618 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
619 break;
621 case PLUS:
623 rtx base = XEXP (x, 0);
624 rtx index = XEXP (x, 1);
626 switch (GET_CODE (index))
628 case CONST_INT:
629 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
630 reg_names[true_regnum (base)]);
631 break;
633 case REG:
634 case SUBREG:
636 int base_num = true_regnum (base);
637 int index_num = true_regnum (index);
639 fprintf (stream, "@(r0,%s)",
640 reg_names[MAX (base_num, index_num)]);
641 break;
644 default:
645 gcc_unreachable ();
648 break;
650 case PRE_DEC:
651 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
652 break;
654 case POST_INC:
655 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
656 break;
658 default:
659 x = mark_constant_pool_use (x);
660 output_addr_const (stream, x);
661 break;
665 /* Print operand x (an rtx) in assembler syntax to file stream
666 according to modifier code.
668 '.' print a .s if insn needs delay slot
669 ',' print LOCAL_LABEL_PREFIX
670 '@' print trap, rte or rts depending upon pragma interruptness
671 '#' output a nop if there is nothing to put in the delay slot
672 ''' print likelihood suffix (/u for unlikely).
673 '>' print branch target if -fverbose-asm
674 'O' print a constant without the #
675 'R' print the LSW of a dp value - changes if in little endian
676 'S' print the MSW of a dp value - changes if in little endian
677 'T' print the next word of a dp value - same as 'R' in big endian mode.
678 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
679 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
680 'N' print 'r63' if the operand is (const_int 0).
681 'd' print a V2SF reg as dN instead of fpN.
682 'm' print a pair `base,offset' or `base,index', for LD and ST.
683 'U' Likewise for {LD,ST}{HI,LO}.
684 'V' print the position of a single bit set.
685 'W' print the position of a single bit cleared.
686 't' print a memory address which is a register.
687 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
688 'o' output an operator. */
690 void
691 print_operand (FILE *stream, rtx x, int code)
693 int regno;
694 enum machine_mode mode;
696 switch (code)
698 tree trapa_attr;
700 case '.':
701 if (final_sequence
702 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
703 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
704 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
705 break;
706 case ',':
707 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
708 break;
709 case '@':
710 trapa_attr = lookup_attribute ("trap_exit",
711 DECL_ATTRIBUTES (current_function_decl));
712 if (trapa_attr)
713 fprintf (stream, "trapa #%ld",
714 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
715 else if (sh_cfun_interrupt_handler_p ())
717 if (sh_cfun_resbank_handler_p ())
718 fprintf (stream, "resbank\n");
719 fprintf (stream, "rte");
721 else
722 fprintf (stream, "rts");
723 break;
724 case '#':
725 /* Output a nop if there's nothing in the delay slot. */
726 if (dbr_sequence_length () == 0)
727 fprintf (stream, "\n\tnop");
728 break;
729 case '\'':
731 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
733 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
734 fputs ("/u", stream);
735 break;
737 case '>':
738 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
740 fputs ("\t! target: ", stream);
741 output_addr_const (stream, JUMP_LABEL (current_output_insn));
743 break;
744 case 'O':
745 x = mark_constant_pool_use (x);
746 output_addr_const (stream, x);
747 break;
748 /* N.B.: %R / %S / %T adjust memory addresses by four.
749 For SHMEDIA, that means they can be used to access the first and
750 second 32 bit part of a 64 bit (or larger) value that
751 might be held in floating point registers or memory.
752 While they can be used to access 64 bit parts of a larger value
753 held in general purpose registers, that won't work with memory -
754 neither for fp registers, since the frxx names are used. */
755 case 'R':
756 if (REG_P (x) || GET_CODE (x) == SUBREG)
758 regno = true_regnum (x);
759 regno += FP_REGISTER_P (regno) ? 1 : LSW;
760 fputs (reg_names[regno], (stream));
762 else if (MEM_P (x))
764 x = adjust_address (x, SImode, 4 * LSW);
765 print_operand_address (stream, XEXP (x, 0));
767 else
769 rtx sub = NULL_RTX;
771 mode = GET_MODE (x);
772 if (mode == VOIDmode)
773 mode = DImode;
774 if (GET_MODE_SIZE (mode) >= 8)
775 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
776 if (sub)
777 print_operand (stream, sub, 0);
778 else
779 output_operand_lossage ("invalid operand to %%R");
781 break;
782 case 'S':
783 if (REG_P (x) || GET_CODE (x) == SUBREG)
785 regno = true_regnum (x);
786 regno += FP_REGISTER_P (regno) ? 0 : MSW;
787 fputs (reg_names[regno], (stream));
789 else if (MEM_P (x))
791 x = adjust_address (x, SImode, 4 * MSW);
792 print_operand_address (stream, XEXP (x, 0));
794 else
796 rtx sub = NULL_RTX;
798 mode = GET_MODE (x);
799 if (mode == VOIDmode)
800 mode = DImode;
801 if (GET_MODE_SIZE (mode) >= 8)
802 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
803 if (sub)
804 print_operand (stream, sub, 0);
805 else
806 output_operand_lossage ("invalid operand to %%S");
808 break;
809 case 'T':
810 /* Next word of a double. */
811 switch (GET_CODE (x))
813 case REG:
814 fputs (reg_names[REGNO (x) + 1], (stream));
815 break;
816 case MEM:
817 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
818 && GET_CODE (XEXP (x, 0)) != POST_INC)
819 x = adjust_address (x, SImode, 4);
820 print_operand_address (stream, XEXP (x, 0));
821 break;
822 default:
823 break;
825 break;
827 case 't':
828 gcc_assert (GET_CODE (x) == MEM);
829 x = XEXP (x, 0);
830 switch (GET_CODE (x))
832 case REG:
833 case SUBREG:
834 print_operand (stream, x, 0);
835 break;
836 default:
837 break;
839 break;
841 case 'o':
842 switch (GET_CODE (x))
844 case PLUS: fputs ("add", stream); break;
845 case MINUS: fputs ("sub", stream); break;
846 case MULT: fputs ("mul", stream); break;
847 case DIV: fputs ("div", stream); break;
848 case EQ: fputs ("eq", stream); break;
849 case NE: fputs ("ne", stream); break;
850 case GT: case LT: fputs ("gt", stream); break;
851 case GE: case LE: fputs ("ge", stream); break;
852 case GTU: case LTU: fputs ("gtu", stream); break;
853 case GEU: case LEU: fputs ("geu", stream); break;
854 default:
855 break;
857 break;
858 case 'M':
859 if (TARGET_SHMEDIA)
861 if (GET_CODE (x) == MEM
862 && GET_CODE (XEXP (x, 0)) == PLUS
863 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
864 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
865 fputc ('x', stream);
867 else
869 if (GET_CODE (x) == MEM)
871 switch (GET_MODE (x))
873 case QImode: fputs (".b", stream); break;
874 case HImode: fputs (".w", stream); break;
875 case SImode: fputs (".l", stream); break;
876 case SFmode: fputs (".s", stream); break;
877 case DFmode: fputs (".d", stream); break;
878 default: gcc_unreachable ();
882 break;
884 case 'm':
885 gcc_assert (GET_CODE (x) == MEM);
886 x = XEXP (x, 0);
887 /* Fall through. */
888 case 'U':
889 switch (GET_CODE (x))
891 case REG:
892 case SUBREG:
893 print_operand (stream, x, 0);
894 fputs (", 0", stream);
895 break;
897 case PLUS:
898 print_operand (stream, XEXP (x, 0), 0);
899 fputs (", ", stream);
900 print_operand (stream, XEXP (x, 1), 0);
901 break;
903 default:
904 gcc_unreachable ();
906 break;
908 case 'V':
910 int num = exact_log2 (INTVAL (x));
911 gcc_assert (num >= 0);
912 fprintf (stream, "#%d", num);
914 break;
916 case 'W':
918 int num = exact_log2 (~INTVAL (x));
919 gcc_assert (num >= 0);
920 fprintf (stream, "#%d", num);
922 break;
924 case 'd':
925 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
927 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
928 break;
930 case 'N':
931 if (x == CONST0_RTX (GET_MODE (x)))
933 fprintf ((stream), "r63");
934 break;
936 goto default_output;
937 case 'u':
938 if (GET_CODE (x) == CONST_INT)
940 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
941 break;
943 /* Fall through. */
945 default_output:
946 default:
947 regno = 0;
948 mode = GET_MODE (x);
950 switch (GET_CODE (x))
952 case TRUNCATE:
954 rtx inner = XEXP (x, 0);
955 int offset = 0;
956 enum machine_mode inner_mode;
958 /* We might see SUBREGs with vector mode registers inside. */
959 if (GET_CODE (inner) == SUBREG
960 && (GET_MODE_SIZE (GET_MODE (inner))
961 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
962 && subreg_lowpart_p (inner))
963 inner = SUBREG_REG (inner);
964 if (GET_CODE (inner) == CONST_INT)
966 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
967 goto default_output;
969 inner_mode = GET_MODE (inner);
970 if (GET_CODE (inner) == SUBREG
971 && (GET_MODE_SIZE (GET_MODE (inner))
972 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
973 && GET_CODE (SUBREG_REG (inner)) == REG)
975 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
976 GET_MODE (SUBREG_REG (inner)),
977 SUBREG_BYTE (inner),
978 GET_MODE (inner));
979 inner = SUBREG_REG (inner);
981 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
982 abort ();
983 /* Floating point register pairs are always big endian;
984 general purpose registers are 64 bit wide. */
985 regno = REGNO (inner);
986 regno = (HARD_REGNO_NREGS (regno, inner_mode)
987 - HARD_REGNO_NREGS (regno, mode))
988 + offset;
989 x = inner;
990 goto reg;
992 case SIGN_EXTEND:
993 x = XEXP (x, 0);
994 goto reg;
995 /* FIXME: We need this on SHmedia32 because reload generates
996 some sign-extended HI or QI loads into DImode registers
997 but, because Pmode is SImode, the address ends up with a
998 subreg:SI of the DImode register. Maybe reload should be
999 fixed so as to apply alter_subreg to such loads? */
1000 case IF_THEN_ELSE:
1001 gcc_assert (trapping_target_operand (x, VOIDmode));
1002 x = XEXP (XEXP (x, 2), 0);
1003 goto default_output;
1004 case SUBREG:
1005 gcc_assert (SUBREG_BYTE (x) == 0
1006 && GET_CODE (SUBREG_REG (x)) == REG);
1008 x = SUBREG_REG (x);
1009 /* Fall through. */
1011 reg:
1012 case REG:
1013 regno += REGNO (x);
1014 if (FP_REGISTER_P (regno)
1015 && mode == V16SFmode)
1016 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1017 else if (FP_REGISTER_P (REGNO (x))
1018 && mode == V4SFmode)
1019 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1020 else if (GET_CODE (x) == REG
1021 && mode == V2SFmode)
1022 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1023 else if (FP_REGISTER_P (REGNO (x))
1024 && GET_MODE_SIZE (mode) > 4)
1025 fprintf ((stream), "d%s", reg_names[regno] + 1);
1026 else
1027 fputs (reg_names[regno], (stream));
1028 break;
1030 case MEM:
1031 output_address (XEXP (x, 0));
1032 break;
1034 case CONST:
1035 if (TARGET_SHMEDIA
1036 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
1037 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
1038 && (GET_MODE (XEXP (x, 0)) == DImode
1039 || GET_MODE (XEXP (x, 0)) == SImode)
1040 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
1041 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
1043 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
1044 rtx val2 = val;
1045 bool nested_expr = false;
1047 fputc ('(', stream);
1048 if (GET_CODE (val) == ASHIFTRT)
1050 fputc ('(', stream);
1051 val2 = XEXP (val, 0);
1053 if (GET_CODE (val2) == CONST
1054 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
1056 fputc ('(', stream);
1057 nested_expr = true;
1059 output_addr_const (stream, val2);
1060 if (nested_expr)
1061 fputc (')', stream);
1062 if (GET_CODE (val) == ASHIFTRT)
1064 fputs (" >> ", stream);
1065 output_addr_const (stream, XEXP (val, 1));
1066 fputc (')', stream);
1068 fputs (" & 65535)", stream);
1069 break;
1072 /* Fall through. */
1073 default:
1074 if (TARGET_SH1)
1075 fputc ('#', stream);
1076 output_addr_const (stream, x);
1077 break;
1079 break;
1084 /* Encode symbol attributes of a SYMBOL_REF into its
1085 SYMBOL_REF_FLAGS. */
1086 static void
1087 sh_encode_section_info (tree decl, rtx rtl, int first)
1089 default_encode_section_info (decl, rtl, first);
1091 if (TREE_CODE (decl) == FUNCTION_DECL
1092 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1093 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1096 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1097 static void
1098 force_into (rtx value, rtx target)
1100 value = force_operand (value, target);
1101 if (! rtx_equal_p (value, target))
1102 emit_insn (gen_move_insn (target, value));
1105 /* Emit code to perform a block move. Choose the best method.
1107 OPERANDS[0] is the destination.
1108 OPERANDS[1] is the source.
1109 OPERANDS[2] is the size.
1110 OPERANDS[3] is the alignment safe to use. */
1113 expand_block_move (rtx *operands)
1115 int align = INTVAL (operands[3]);
1116 int constp = (GET_CODE (operands[2]) == CONST_INT);
1117 int bytes = (constp ? INTVAL (operands[2]) : 0);
1119 if (! constp)
1120 return 0;
1122 /* If we could use mov.l to move words and dest is word-aligned, we
1123 can use movua.l for loads and still generate a relatively short
1124 and efficient sequence. */
1125 if (TARGET_SH4A_ARCH && align < 4
1126 && MEM_ALIGN (operands[0]) >= 32
1127 && can_move_by_pieces (bytes, 32))
1129 rtx dest = copy_rtx (operands[0]);
1130 rtx src = copy_rtx (operands[1]);
1131 /* We could use different pseudos for each copied word, but
1132 since movua can only load into r0, it's kind of
1133 pointless. */
1134 rtx temp = gen_reg_rtx (SImode);
1135 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1136 int copied = 0;
1138 while (copied + 4 <= bytes)
1140 rtx to = adjust_address (dest, SImode, copied);
1141 rtx from = adjust_automodify_address (src, BLKmode,
1142 src_addr, copied);
1144 set_mem_size (from, GEN_INT (4));
1145 emit_insn (gen_movua (temp, from));
1146 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1147 emit_move_insn (to, temp);
1148 copied += 4;
1151 if (copied < bytes)
1152 move_by_pieces (adjust_address (dest, BLKmode, copied),
1153 adjust_automodify_address (src, BLKmode,
1154 src_addr, copied),
1155 bytes - copied, align, 0);
1157 return 1;
1160 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1161 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1162 if (align < 4 || (bytes % 4 != 0))
1163 return 0;
1165 if (TARGET_HARD_SH4)
1167 if (bytes < 12)
1168 return 0;
1169 else if (bytes == 12)
1171 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1172 rtx r4 = gen_rtx_REG (SImode, 4);
1173 rtx r5 = gen_rtx_REG (SImode, 5);
1175 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1176 force_into (XEXP (operands[0], 0), r4);
1177 force_into (XEXP (operands[1], 0), r5);
1178 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1179 return 1;
1181 else if (! TARGET_SMALLCODE)
1183 const char *entry_name;
1184 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1185 int dwords;
1186 rtx r4 = gen_rtx_REG (SImode, 4);
1187 rtx r5 = gen_rtx_REG (SImode, 5);
1188 rtx r6 = gen_rtx_REG (SImode, 6);
1190 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1191 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1192 force_into (XEXP (operands[0], 0), r4);
1193 force_into (XEXP (operands[1], 0), r5);
1195 dwords = bytes >> 3;
1196 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1197 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1198 return 1;
1200 else
1201 return 0;
1203 if (bytes < 64)
1205 char entry[30];
1206 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1207 rtx r4 = gen_rtx_REG (SImode, 4);
1208 rtx r5 = gen_rtx_REG (SImode, 5);
1210 sprintf (entry, "__movmemSI%d", bytes);
1211 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1212 force_into (XEXP (operands[0], 0), r4);
1213 force_into (XEXP (operands[1], 0), r5);
1214 emit_insn (gen_block_move_real (func_addr_rtx));
1215 return 1;
1218 /* This is the same number of bytes as a memcpy call, but to a different
1219 less common function name, so this will occasionally use more space. */
1220 if (! TARGET_SMALLCODE)
1222 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1223 int final_switch, while_loop;
1224 rtx r4 = gen_rtx_REG (SImode, 4);
1225 rtx r5 = gen_rtx_REG (SImode, 5);
1226 rtx r6 = gen_rtx_REG (SImode, 6);
1228 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1229 force_into (XEXP (operands[0], 0), r4);
1230 force_into (XEXP (operands[1], 0), r5);
1232 /* r6 controls the size of the move. 16 is decremented from it
1233 for each 64 bytes moved. Then the negative bit left over is used
1234 as an index into a list of move instructions. e.g., a 72 byte move
1235 would be set up with size(r6) = 14, for one iteration through the
1236 big while loop, and a switch of -2 for the last part. */
1238 final_switch = 16 - ((bytes / 4) % 16);
1239 while_loop = ((bytes / 4) / 16 - 1) * 16;
1240 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1241 emit_insn (gen_block_lump_real (func_addr_rtx));
1242 return 1;
1245 return 0;
1248 /* Prepare operands for a move define_expand; specifically, one of the
1249 operands must be in a register. */
1252 prepare_move_operands (rtx operands[], enum machine_mode mode)
1254 if ((mode == SImode || mode == DImode)
1255 && flag_pic
1256 && ! ((mode == Pmode || mode == ptr_mode)
1257 && tls_symbolic_operand (operands[1], Pmode) != 0))
1259 rtx temp;
1260 if (SYMBOLIC_CONST_P (operands[1]))
1262 if (GET_CODE (operands[0]) == MEM)
1263 operands[1] = force_reg (Pmode, operands[1]);
1264 else if (TARGET_SHMEDIA
1265 && GET_CODE (operands[1]) == LABEL_REF
1266 && target_reg_operand (operands[0], mode))
1267 /* It's ok. */;
1268 else
1270 temp = (!can_create_pseudo_p ()
1271 ? operands[0]
1272 : gen_reg_rtx (Pmode));
1273 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1276 else if (GET_CODE (operands[1]) == CONST
1277 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1278 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1280 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1281 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1282 mode, temp);
1283 operands[1] = expand_binop (mode, add_optab, temp,
1284 XEXP (XEXP (operands[1], 0), 1),
1285 (!can_create_pseudo_p ()
1286 ? temp
1287 : gen_reg_rtx (Pmode)),
1288 0, OPTAB_LIB_WIDEN);
1292 if (! reload_in_progress && ! reload_completed)
1294 /* Copy the source to a register if both operands aren't registers. */
1295 if (! register_operand (operands[0], mode)
1296 && ! sh_register_operand (operands[1], mode))
1297 operands[1] = copy_to_mode_reg (mode, operands[1]);
1299 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1301 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1302 except that we can't use that function because it is static. */
1303 rtx new = change_address (operands[0], mode, 0);
1304 MEM_COPY_ATTRIBUTES (new, operands[0]);
1305 operands[0] = new;
1308 /* This case can happen while generating code to move the result
1309 of a library call to the target. Reject `st r0,@(rX,rY)' because
1310 reload will fail to find a spill register for rX, since r0 is already
1311 being used for the source. */
1312 else if (TARGET_SH1
1313 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1314 && GET_CODE (operands[0]) == MEM
1315 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1316 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1317 operands[1] = copy_to_mode_reg (mode, operands[1]);
1320 if (mode == Pmode || mode == ptr_mode)
1322 rtx op0, op1, opc;
1323 enum tls_model tls_kind;
1325 op0 = operands[0];
1326 op1 = operands[1];
1327 if (GET_CODE (op1) == CONST
1328 && GET_CODE (XEXP (op1, 0)) == PLUS
1329 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1331 opc = XEXP (XEXP (op1, 0), 1);
1332 op1 = XEXP (XEXP (op1, 0), 0);
1334 else
1335 opc = NULL_RTX;
1337 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1339 rtx tga_op1, tga_ret, tmp, tmp2;
1341 switch (tls_kind)
1343 case TLS_MODEL_GLOBAL_DYNAMIC:
1344 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1345 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1346 op1 = tga_ret;
1347 break;
1349 case TLS_MODEL_LOCAL_DYNAMIC:
1350 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1351 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1353 tmp = gen_reg_rtx (Pmode);
1354 emit_move_insn (tmp, tga_ret);
1356 if (register_operand (op0, Pmode))
1357 tmp2 = op0;
1358 else
1359 tmp2 = gen_reg_rtx (Pmode);
1361 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1362 op1 = tmp2;
1363 break;
1365 case TLS_MODEL_INITIAL_EXEC:
1366 if (! flag_pic)
1368 /* Don't schedule insns for getting GOT address when
1369 the first scheduling is enabled, to avoid spill
1370 failures for R0. */
1371 if (flag_schedule_insns)
1372 emit_insn (gen_blockage ());
1373 emit_insn (gen_GOTaddr2picreg ());
1374 emit_use (gen_rtx_REG (SImode, PIC_REG));
1375 if (flag_schedule_insns)
1376 emit_insn (gen_blockage ());
1378 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1379 tmp = gen_sym2GOTTPOFF (op1);
1380 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1381 op1 = tga_op1;
1382 break;
1384 case TLS_MODEL_LOCAL_EXEC:
1385 tmp2 = gen_reg_rtx (Pmode);
1386 emit_insn (gen_load_gbr (tmp2));
1387 tmp = gen_reg_rtx (Pmode);
1388 emit_insn (gen_symTPOFF2reg (tmp, op1));
1390 if (register_operand (op0, Pmode))
1391 op1 = op0;
1392 else
1393 op1 = gen_reg_rtx (Pmode);
1395 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1396 break;
1398 default:
1399 gcc_unreachable ();
1401 if (opc)
1402 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1403 operands[1] = op1;
1407 return 0;
1410 enum rtx_code
1411 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1412 enum rtx_code comparison)
1414 rtx op1;
1415 rtx scratch = NULL_RTX;
1417 if (comparison == CODE_FOR_nothing)
1418 comparison = GET_CODE (operands[0]);
1419 else
1420 scratch = operands[4];
1421 if (GET_CODE (operands[1]) == CONST_INT
1422 && GET_CODE (operands[2]) != CONST_INT)
1424 rtx tmp = operands[1];
1426 operands[1] = operands[2];
1427 operands[2] = tmp;
1428 comparison = swap_condition (comparison);
1430 if (GET_CODE (operands[2]) == CONST_INT)
1432 HOST_WIDE_INT val = INTVAL (operands[2]);
1433 if ((val == -1 || val == -0x81)
1434 && (comparison == GT || comparison == LE))
1436 comparison = (comparison == GT) ? GE : LT;
1437 operands[2] = gen_int_mode (val + 1, mode);
1439 else if ((val == 1 || val == 0x80)
1440 && (comparison == GE || comparison == LT))
1442 comparison = (comparison == GE) ? GT : LE;
1443 operands[2] = gen_int_mode (val - 1, mode);
1445 else if (val == 1 && (comparison == GEU || comparison == LTU))
1447 comparison = (comparison == GEU) ? NE : EQ;
1448 operands[2] = CONST0_RTX (mode);
1450 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1452 comparison = (comparison == GEU) ? GTU : LEU;
1453 operands[2] = gen_int_mode (val - 1, mode);
1455 else if (val == 0 && (comparison == GTU || comparison == LEU))
1456 comparison = (comparison == GTU) ? NE : EQ;
1457 else if (mode == SImode
1458 && ((val == 0x7fffffff
1459 && (comparison == GTU || comparison == LEU))
1460 || ((unsigned HOST_WIDE_INT) val
1461 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1462 && (comparison == GEU || comparison == LTU))))
1464 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1465 operands[2] = CONST0_RTX (mode);
1468 op1 = operands[1];
1469 if (can_create_pseudo_p ())
1470 operands[1] = force_reg (mode, op1);
1471 /* When we are handling DImode comparisons, we want to keep constants so
1472 that we can optimize the component comparisons; however, memory loads
1473 are better issued as a whole so that they can be scheduled well.
1474 SImode equality comparisons allow I08 constants, but only when they
1475 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1476 into a register, that register might as well be r0, and we allow the
1477 constant. If it is already in a register, this is likely to be
1478 allocated to a different hard register, thus we load the constant into
1479 a register unless it is zero. */
1480 if (!REG_P (operands[2])
1481 && (GET_CODE (operands[2]) != CONST_INT
1482 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1483 && ((comparison != EQ && comparison != NE)
1484 || (REG_P (op1) && REGNO (op1) != R0_REG)
1485 || !satisfies_constraint_I08 (operands[2])))))
1487 if (scratch && GET_MODE (scratch) == mode)
1489 emit_move_insn (scratch, operands[2]);
1490 operands[2] = scratch;
1492 else if (can_create_pseudo_p ())
1493 operands[2] = force_reg (mode, operands[2]);
1495 return comparison;
1498 void
1499 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1501 rtx (*branch_expander) (rtx) = gen_branch_true;
1502 rtx jump;
1504 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1505 switch (comparison)
1507 case NE: case LT: case LE: case LTU: case LEU:
1508 comparison = reverse_condition (comparison);
1509 branch_expander = gen_branch_false;
1510 default: ;
1512 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1513 gen_rtx_fmt_ee (comparison, SImode,
1514 operands[1], operands[2])));
1515 jump = emit_jump_insn (branch_expander (operands[3]));
1516 if (probability >= 0)
1517 REG_NOTES (jump)
1518 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1519 REG_NOTES (jump));
1523 /* ??? How should we distribute probabilities when more than one branch
1524 is generated. So far we only have soem ad-hoc observations:
1525 - If the operands are random, they are likely to differ in both parts.
1526 - If comparing items in a hash chain, the operands are random or equal;
1527 operation should be EQ or NE.
1528 - If items are searched in an ordered tree from the root, we can expect
1529 the highpart to be unequal about half of the time; operation should be
1530 an inequality comparison, operands non-constant, and overall probability
1531 about 50%. Likewise for quicksort.
1532 - Range checks will be often made against constants. Even if we assume for
1533 simplicity an even distribution of the non-constant operand over a
1534 sub-range here, the same probability could be generated with differently
1535 wide sub-ranges - as long as the ratio of the part of the subrange that
1536 is before the threshold to the part that comes after the threshold stays
1537 the same. Thus, we can't really tell anything here;
1538 assuming random distribution is at least simple.
1541 bool
1542 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1544 enum rtx_code msw_taken, msw_skip, lsw_taken;
1545 rtx skip_label = NULL_RTX;
1546 rtx op1h, op1l, op2h, op2l;
1547 int num_branches;
1548 int prob, rev_prob;
1549 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1550 rtx scratch = operands[4];
1552 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1553 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1554 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1555 op1l = gen_lowpart (SImode, operands[1]);
1556 op2l = gen_lowpart (SImode, operands[2]);
1557 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1558 prob = split_branch_probability;
1559 rev_prob = REG_BR_PROB_BASE - prob;
1560 switch (comparison)
1562 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1563 That costs 1 cycle more when the first branch can be predicted taken,
1564 but saves us mispredicts because only one branch needs prediction.
1565 It also enables generating the cmpeqdi_t-1 pattern. */
1566 case EQ:
1567 if (TARGET_CMPEQDI_T)
1569 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1570 emit_jump_insn (gen_branch_true (operands[3]));
1571 return true;
1573 msw_skip = NE;
1574 lsw_taken = EQ;
1575 if (prob >= 0)
1577 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1579 msw_skip_prob = rev_prob;
1580 if (REG_BR_PROB_BASE <= 65535)
1581 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1582 else
1584 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1585 lsw_taken_prob
1586 = (prob
1587 ? (REG_BR_PROB_BASE
1588 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1589 / ((HOST_WIDEST_INT) prob << 32)))
1590 : 0);
1593 break;
1594 case NE:
1595 if (TARGET_CMPEQDI_T)
1597 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1598 emit_jump_insn (gen_branch_false (operands[3]));
1599 return true;
1601 msw_taken = NE;
1602 msw_taken_prob = prob;
1603 lsw_taken = NE;
1604 lsw_taken_prob = 0;
1605 break;
1606 case GTU: case GT:
1607 msw_taken = comparison;
1608 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1609 break;
1610 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1611 msw_skip = swap_condition (msw_taken);
1612 lsw_taken = GTU;
1613 break;
1614 case GEU: case GE:
1615 if (op2l == CONST0_RTX (SImode))
1616 msw_taken = comparison;
1617 else
1619 msw_taken = comparison == GE ? GT : GTU;
1620 msw_skip = swap_condition (msw_taken);
1621 lsw_taken = GEU;
1623 break;
1624 case LTU: case LT:
1625 msw_taken = comparison;
1626 if (op2l == CONST0_RTX (SImode))
1627 break;
1628 msw_skip = swap_condition (msw_taken);
1629 lsw_taken = LTU;
1630 break;
1631 case LEU: case LE:
1632 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1633 msw_taken = comparison;
1634 else
1636 lsw_taken = LEU;
1637 if (comparison == LE)
1638 msw_taken = LT;
1639 else if (op2h != CONST0_RTX (SImode))
1640 msw_taken = LTU;
1641 else
1642 break;
1643 msw_skip = swap_condition (msw_taken);
1645 break;
1646 default: return false;
1648 num_branches = ((msw_taken != CODE_FOR_nothing)
1649 + (msw_skip != CODE_FOR_nothing)
1650 + (lsw_taken != CODE_FOR_nothing));
1651 if (comparison != EQ && comparison != NE && num_branches > 1)
1653 if (!CONSTANT_P (operands[2])
1654 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1655 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1657 msw_taken_prob = prob / 2U;
1658 msw_skip_prob
1659 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1660 lsw_taken_prob = prob;
1662 else
1664 msw_taken_prob = prob;
1665 msw_skip_prob = REG_BR_PROB_BASE;
1666 /* ??? If we have a constant op2h, should we use that when
1667 calculating lsw_taken_prob? */
1668 lsw_taken_prob = prob;
1671 operands[1] = op1h;
1672 operands[2] = op2h;
1673 operands[4] = NULL_RTX;
1674 if (reload_completed
1675 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1676 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1678 emit_move_insn (scratch, operands[2]);
1679 operands[2] = scratch;
1681 if (msw_taken != CODE_FOR_nothing)
1682 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1683 if (msw_skip != CODE_FOR_nothing)
1685 rtx taken_label = operands[3];
1687 /* Operands were possibly modified, but msw_skip doesn't expect this.
1688 Always use the original ones. */
1689 if (msw_taken != CODE_FOR_nothing)
1691 operands[1] = op1h;
1692 operands[2] = op2h;
1695 operands[3] = skip_label = gen_label_rtx ();
1696 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1697 operands[3] = taken_label;
1699 operands[1] = op1l;
1700 operands[2] = op2l;
1701 if (lsw_taken != CODE_FOR_nothing)
1703 if (reload_completed
1704 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1705 operands[4] = scratch;
1706 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1708 if (msw_skip != CODE_FOR_nothing)
1709 emit_label (skip_label);
1710 return true;
1713 /* Prepare the operands for an scc instruction; make sure that the
1714 compare has been done. */
1716 prepare_scc_operands (enum rtx_code code)
1718 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1719 enum rtx_code oldcode = code;
1720 enum machine_mode mode;
1722 /* First need a compare insn. */
1723 switch (code)
1725 case NE:
1726 /* It isn't possible to handle this case. */
1727 gcc_unreachable ();
1728 case LT:
1729 code = GT;
1730 break;
1731 case LE:
1732 code = GE;
1733 break;
1734 case LTU:
1735 code = GTU;
1736 break;
1737 case LEU:
1738 code = GEU;
1739 break;
1740 default:
1741 break;
1743 if (code != oldcode)
1745 rtx tmp = sh_compare_op0;
1746 sh_compare_op0 = sh_compare_op1;
1747 sh_compare_op1 = tmp;
1750 mode = GET_MODE (sh_compare_op0);
1751 if (mode == VOIDmode)
1752 mode = GET_MODE (sh_compare_op1);
1754 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1755 if ((code != EQ && code != NE
1756 && (sh_compare_op1 != const0_rtx
1757 || code == GTU || code == GEU || code == LTU || code == LEU))
1758 || (mode == DImode && sh_compare_op1 != const0_rtx)
1759 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1760 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1762 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1763 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1764 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1765 gen_rtx_SET (VOIDmode, t_reg,
1766 gen_rtx_fmt_ee (code, SImode,
1767 sh_compare_op0, sh_compare_op1)),
1768 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1769 else
1770 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1771 gen_rtx_fmt_ee (code, SImode,
1772 sh_compare_op0, sh_compare_op1)));
1774 return t_reg;
1777 /* Called from the md file, set up the operands of a compare instruction. */
1779 void
1780 from_compare (rtx *operands, int code)
1782 enum machine_mode mode = GET_MODE (sh_compare_op0);
1783 rtx insn;
1784 if (mode == VOIDmode)
1785 mode = GET_MODE (sh_compare_op1);
1786 if (code != EQ
1787 || mode == DImode
1788 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1790 /* Force args into regs, since we can't use constants here. */
1791 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1792 if (sh_compare_op1 != const0_rtx
1793 || code == GTU || code == GEU
1794 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1795 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1797 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1799 from_compare (operands, GT);
1800 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1802 else
1803 insn = gen_rtx_SET (VOIDmode,
1804 gen_rtx_REG (SImode, T_REG),
1805 gen_rtx_fmt_ee (code, SImode,
1806 sh_compare_op0, sh_compare_op1));
1807 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1809 insn = gen_rtx_PARALLEL (VOIDmode,
1810 gen_rtvec (2, insn,
1811 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1812 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1814 else
1815 emit_insn (insn);
1818 /* Functions to output assembly code. */
1820 /* Return a sequence of instructions to perform DI or DF move.
1822 Since the SH cannot move a DI or DF in one instruction, we have
1823 to take care when we see overlapping source and dest registers. */
1825 const char *
1826 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1827 enum machine_mode mode)
1829 rtx dst = operands[0];
1830 rtx src = operands[1];
1832 if (GET_CODE (dst) == MEM
1833 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1834 return "mov.l %T1,%0\n\tmov.l %1,%0";
1836 if (register_operand (dst, mode)
1837 && register_operand (src, mode))
1839 if (REGNO (src) == MACH_REG)
1840 return "sts mach,%S0\n\tsts macl,%R0";
1842 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1843 when mov.d r1,r0 do r1->r0 then r2->r1. */
1845 if (REGNO (src) + 1 == REGNO (dst))
1846 return "mov %T1,%T0\n\tmov %1,%0";
1847 else
1848 return "mov %1,%0\n\tmov %T1,%T0";
1850 else if (GET_CODE (src) == CONST_INT)
1852 if (INTVAL (src) < 0)
1853 output_asm_insn ("mov #-1,%S0", operands);
1854 else
1855 output_asm_insn ("mov #0,%S0", operands);
1857 return "mov %1,%R0";
1859 else if (GET_CODE (src) == MEM)
1861 int ptrreg = -1;
1862 int dreg = REGNO (dst);
1863 rtx inside = XEXP (src, 0);
1865 switch (GET_CODE (inside))
1867 case REG:
1868 ptrreg = REGNO (inside);
1869 break;
1871 case SUBREG:
1872 ptrreg = subreg_regno (inside);
1873 break;
1875 case PLUS:
1876 ptrreg = REGNO (XEXP (inside, 0));
1877 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1878 an offsettable address. Unfortunately, offsettable addresses use
1879 QImode to check the offset, and a QImode offsettable address
1880 requires r0 for the other operand, which is not currently
1881 supported, so we can't use the 'o' constraint.
1882 Thus we must check for and handle r0+REG addresses here.
1883 We punt for now, since this is likely very rare. */
1884 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1885 break;
1887 case LABEL_REF:
1888 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1889 case POST_INC:
1890 return "mov.l %1,%0\n\tmov.l %1,%T0";
1891 default:
1892 gcc_unreachable ();
1895 /* Work out the safe way to copy. Copy into the second half first. */
1896 if (dreg == ptrreg)
1897 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1900 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1903 /* Print an instruction which would have gone into a delay slot after
1904 another instruction, but couldn't because the other instruction expanded
1905 into a sequence where putting the slot insn at the end wouldn't work. */
1907 static void
1908 print_slot (rtx insn)
1910 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1912 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1915 const char *
1916 output_far_jump (rtx insn, rtx op)
1918 struct { rtx lab, reg, op; } this;
1919 rtx braf_base_lab = NULL_RTX;
1920 const char *jump;
1921 int far;
1922 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1923 rtx prev;
1925 this.lab = gen_label_rtx ();
1927 if (TARGET_SH2
1928 && offset >= -32764
1929 && offset - get_attr_length (insn) <= 32766)
1931 far = 0;
1932 jump = "mov.w %O0,%1; braf %1";
1934 else
1936 far = 1;
1937 if (flag_pic)
1939 if (TARGET_SH2)
1940 jump = "mov.l %O0,%1; braf %1";
1941 else
1942 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1944 else
1945 jump = "mov.l %O0,%1; jmp @%1";
1947 /* If we have a scratch register available, use it. */
1948 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1949 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1951 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1952 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1953 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1954 output_asm_insn (jump, &this.lab);
1955 if (dbr_sequence_length ())
1956 print_slot (final_sequence);
1957 else
1958 output_asm_insn ("nop", 0);
1960 else
1962 /* Output the delay slot insn first if any. */
1963 if (dbr_sequence_length ())
1964 print_slot (final_sequence);
1966 this.reg = gen_rtx_REG (SImode, 13);
1967 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1968 Fortunately, MACL is fixed and call-clobbered, and we never
1969 need its value across jumps, so save r13 in it instead of in
1970 the stack. */
1971 if (TARGET_SH5)
1972 output_asm_insn ("lds r13, macl", 0);
1973 else
1974 output_asm_insn ("mov.l r13,@-r15", 0);
1975 output_asm_insn (jump, &this.lab);
1976 if (TARGET_SH5)
1977 output_asm_insn ("sts macl, r13", 0);
1978 else
1979 output_asm_insn ("mov.l @r15+,r13", 0);
1981 if (far && flag_pic && TARGET_SH2)
1983 braf_base_lab = gen_label_rtx ();
1984 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1985 CODE_LABEL_NUMBER (braf_base_lab));
1987 if (far)
1988 output_asm_insn (".align 2", 0);
1989 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1990 this.op = op;
1991 if (far && flag_pic)
1993 if (TARGET_SH2)
1994 this.lab = braf_base_lab;
1995 output_asm_insn (".long %O2-%O0", &this.lab);
1997 else
1998 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1999 return "";
2002 /* Local label counter, used for constants in the pool and inside
2003 pattern branches. */
2005 static int lf = 100;
2007 /* Output code for ordinary branches. */
2009 const char *
2010 output_branch (int logic, rtx insn, rtx *operands)
2012 switch (get_attr_length (insn))
2014 case 6:
2015 /* This can happen if filling the delay slot has caused a forward
2016 branch to exceed its range (we could reverse it, but only
2017 when we know we won't overextend other branches; this should
2018 best be handled by relaxation).
2019 It can also happen when other condbranches hoist delay slot insn
2020 from their destination, thus leading to code size increase.
2021 But the branch will still be in the range -4092..+4098 bytes. */
2023 if (! TARGET_RELAX)
2025 int label = lf++;
2026 /* The call to print_slot will clobber the operands. */
2027 rtx op0 = operands[0];
2029 /* If the instruction in the delay slot is annulled (true), then
2030 there is no delay slot where we can put it now. The only safe
2031 place for it is after the label. final will do that by default. */
2033 if (final_sequence
2034 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2035 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2037 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2038 ASSEMBLER_DIALECT ? "/" : ".", label);
2039 print_slot (final_sequence);
2041 else
2042 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2044 output_asm_insn ("bra\t%l0", &op0);
2045 fprintf (asm_out_file, "\tnop\n");
2046 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2048 return "";
2050 /* When relaxing, handle this like a short branch. The linker
2051 will fix it up if it still doesn't fit after relaxation. */
2052 case 2:
2053 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2055 /* These are for SH2e, in which we have to account for the
2056 extra nop because of the hardware bug in annulled branches. */
2057 case 8:
2058 if (! TARGET_RELAX)
2060 int label = lf++;
2062 gcc_assert (!final_sequence
2063 || !(INSN_ANNULLED_BRANCH_P
2064 (XVECEXP (final_sequence, 0, 0))));
2065 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2066 logic ? "f" : "t",
2067 ASSEMBLER_DIALECT ? "/" : ".", label);
2068 fprintf (asm_out_file, "\tnop\n");
2069 output_asm_insn ("bra\t%l0", operands);
2070 fprintf (asm_out_file, "\tnop\n");
2071 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2073 return "";
2075 /* When relaxing, fall through. */
2076 case 4:
2078 char buffer[10];
2080 sprintf (buffer, "b%s%ss\t%%l0",
2081 logic ? "t" : "f",
2082 ASSEMBLER_DIALECT ? "/" : ".");
2083 output_asm_insn (buffer, &operands[0]);
2084 return "nop";
2087 default:
2088 /* There should be no longer branches now - that would
2089 indicate that something has destroyed the branches set
2090 up in machine_dependent_reorg. */
2091 gcc_unreachable ();
2095 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2096 fill in operands 9 as a label to the successor insn.
2097 We try to use jump threading where possible.
2098 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2099 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2100 follow jmp and bt, if the address is in range. */
2101 const char *
2102 output_branchy_insn (enum rtx_code code, const char *template,
2103 rtx insn, rtx *operands)
2105 rtx next_insn = NEXT_INSN (insn);
2107 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2109 rtx src = SET_SRC (PATTERN (next_insn));
2110 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2112 /* Following branch not taken */
2113 operands[9] = gen_label_rtx ();
2114 emit_label_after (operands[9], next_insn);
2115 INSN_ADDRESSES_NEW (operands[9],
2116 INSN_ADDRESSES (INSN_UID (next_insn))
2117 + get_attr_length (next_insn));
2118 return template;
2120 else
2122 int offset = (branch_dest (next_insn)
2123 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2124 if (offset >= -252 && offset <= 258)
2126 if (GET_CODE (src) == IF_THEN_ELSE)
2127 /* branch_true */
2128 src = XEXP (src, 1);
2129 operands[9] = src;
2130 return template;
2134 operands[9] = gen_label_rtx ();
2135 emit_label_after (operands[9], insn);
2136 INSN_ADDRESSES_NEW (operands[9],
2137 INSN_ADDRESSES (INSN_UID (insn))
2138 + get_attr_length (insn));
2139 return template;
2142 const char *
2143 output_ieee_ccmpeq (rtx insn, rtx *operands)
2145 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2146 insn, operands);
2149 /* Output the start of the assembler file. */
2151 static void
2152 sh_file_start (void)
2154 default_file_start ();
2156 #ifdef SYMBIAN
2157 /* Declare the .directive section before it is used. */
2158 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2159 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2160 #endif
2162 if (TARGET_ELF)
2163 /* We need to show the text section with the proper
2164 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2165 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2166 will complain. We can teach GAS specifically about the
2167 default attributes for our choice of text section, but
2168 then we would have to change GAS again if/when we change
2169 the text section name. */
2170 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2171 else
2172 /* Switch to the data section so that the coffsem symbol
2173 isn't in the text section. */
2174 switch_to_section (data_section);
2176 if (TARGET_LITTLE_ENDIAN)
2177 fputs ("\t.little\n", asm_out_file);
2179 if (!TARGET_ELF)
2181 if (TARGET_SHCOMPACT)
2182 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2183 else if (TARGET_SHMEDIA)
2184 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2185 TARGET_SHMEDIA64 ? 64 : 32);
2189 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2191 static bool
2192 unspec_caller_rtx_p (rtx pat)
2194 switch (GET_CODE (pat))
2196 case CONST:
2197 return unspec_caller_rtx_p (XEXP (pat, 0));
2198 case PLUS:
2199 case MINUS:
2200 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2201 return true;
2202 return unspec_caller_rtx_p (XEXP (pat, 1));
2203 case UNSPEC:
2204 if (XINT (pat, 1) == UNSPEC_CALLER)
2205 return true;
2206 default:
2207 break;
2210 return false;
2213 /* Indicate that INSN cannot be duplicated. This is true for insn
2214 that generates a unique label. */
2216 static bool
2217 sh_cannot_copy_insn_p (rtx insn)
2219 rtx pat;
2221 if (!reload_completed || !flag_pic)
2222 return false;
2224 if (GET_CODE (insn) != INSN)
2225 return false;
2226 if (asm_noperands (insn) >= 0)
2227 return false;
2229 pat = PATTERN (insn);
2230 if (GET_CODE (pat) != SET)
2231 return false;
2232 pat = SET_SRC (pat);
2234 if (unspec_caller_rtx_p (pat))
2235 return true;
2237 return false;
2240 /* Actual number of instructions used to make a shift by N. */
2241 static const char ashiftrt_insns[] =
2242 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2244 /* Left shift and logical right shift are the same. */
2245 static const char shift_insns[] =
2246 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2248 /* Individual shift amounts needed to get the above length sequences.
2249 One bit right shifts clobber the T bit, so when possible, put one bit
2250 shifts in the middle of the sequence, so the ends are eligible for
2251 branch delay slots. */
2252 static const short shift_amounts[32][5] = {
2253 {0}, {1}, {2}, {2, 1},
2254 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2255 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2256 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2257 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2258 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2259 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2260 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2262 /* Likewise, but for shift amounts < 16, up to three highmost bits
2263 might be clobbered. This is typically used when combined with some
2264 kind of sign or zero extension. */
2266 static const char ext_shift_insns[] =
2267 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2269 static const short ext_shift_amounts[32][4] = {
2270 {0}, {1}, {2}, {2, 1},
2271 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2272 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2273 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2274 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2275 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2276 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2277 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2279 /* Assuming we have a value that has been sign-extended by at least one bit,
2280 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2281 to shift it by N without data loss, and quicker than by other means? */
2282 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2284 /* This is used in length attributes in sh.md to help compute the length
2285 of arbitrary constant shift instructions. */
2288 shift_insns_rtx (rtx insn)
2290 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2291 int shift_count = INTVAL (XEXP (set_src, 1));
2292 enum rtx_code shift_code = GET_CODE (set_src);
2294 switch (shift_code)
2296 case ASHIFTRT:
2297 return ashiftrt_insns[shift_count];
2298 case LSHIFTRT:
2299 case ASHIFT:
2300 return shift_insns[shift_count];
2301 default:
2302 gcc_unreachable ();
2306 /* Return the cost of a shift. */
2308 static inline int
2309 shiftcosts (rtx x)
2311 int value;
2313 if (TARGET_SHMEDIA)
2314 return 1;
2316 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2318 if (GET_MODE (x) == DImode
2319 && GET_CODE (XEXP (x, 1)) == CONST_INT
2320 && INTVAL (XEXP (x, 1)) == 1)
2321 return 2;
2323 /* Everything else is invalid, because there is no pattern for it. */
2324 return MAX_COST;
2326 /* If shift by a non constant, then this will be expensive. */
2327 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2328 return SH_DYNAMIC_SHIFT_COST;
2330 value = INTVAL (XEXP (x, 1));
2332 /* Otherwise, return the true cost in instructions. */
2333 if (GET_CODE (x) == ASHIFTRT)
2335 int cost = ashiftrt_insns[value];
2336 /* If SH3, then we put the constant in a reg and use shad. */
2337 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2338 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2339 return cost;
2341 else
2342 return shift_insns[value];
2345 /* Return the cost of an AND operation. */
2347 static inline int
2348 andcosts (rtx x)
2350 int i;
2352 /* Anding with a register is a single cycle and instruction. */
2353 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2354 return 1;
2356 i = INTVAL (XEXP (x, 1));
2358 if (TARGET_SHMEDIA)
2360 if (satisfies_constraint_I10 (XEXP (x, 1))
2361 || satisfies_constraint_J16 (XEXP (x, 1)))
2362 return 1;
2363 else
2364 return 1 + rtx_cost (XEXP (x, 1), AND);
2367 /* These constants are single cycle extu.[bw] instructions. */
2368 if (i == 0xff || i == 0xffff)
2369 return 1;
2370 /* Constants that can be used in an and immediate instruction in a single
2371 cycle, but this requires r0, so make it a little more expensive. */
2372 if (CONST_OK_FOR_K08 (i))
2373 return 2;
2374 /* Constants that can be loaded with a mov immediate and an and.
2375 This case is probably unnecessary. */
2376 if (CONST_OK_FOR_I08 (i))
2377 return 2;
2378 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2379 This case is probably unnecessary. */
2380 return 3;
2383 /* Return the cost of an addition or a subtraction. */
2385 static inline int
2386 addsubcosts (rtx x)
2388 /* Adding a register is a single cycle insn. */
2389 if (GET_CODE (XEXP (x, 1)) == REG
2390 || GET_CODE (XEXP (x, 1)) == SUBREG)
2391 return 1;
2393 /* Likewise for small constants. */
2394 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2395 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2396 return 1;
2398 if (TARGET_SHMEDIA)
2399 switch (GET_CODE (XEXP (x, 1)))
2401 case CONST:
2402 case LABEL_REF:
2403 case SYMBOL_REF:
2404 return TARGET_SHMEDIA64 ? 5 : 3;
2406 case CONST_INT:
2407 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2408 return 2;
2409 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2410 return 3;
2411 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2412 return 4;
2414 /* Fall through. */
2415 default:
2416 return 5;
2419 /* Any other constant requires a 2 cycle pc-relative load plus an
2420 addition. */
2421 return 3;
2424 /* Return the cost of a multiply. */
2425 static inline int
2426 multcosts (rtx x ATTRIBUTE_UNUSED)
2428 if (sh_multcost >= 0)
2429 return sh_multcost;
2430 if (TARGET_SHMEDIA)
2431 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2432 accept constants. Ideally, we would use a cost of one or two and
2433 add the cost of the operand, but disregard the latter when inside loops
2434 and loop invariant code motion is still to follow.
2435 Using a multiply first and splitting it later if it's a loss
2436 doesn't work because of different sign / zero extension semantics
2437 of multiplies vs. shifts. */
2438 return TARGET_SMALLCODE ? 2 : 3;
2440 if (TARGET_SH2)
2442 /* We have a mul insn, so we can never take more than the mul and the
2443 read of the mac reg, but count more because of the latency and extra
2444 reg usage. */
2445 if (TARGET_SMALLCODE)
2446 return 2;
2447 return 3;
2450 /* If we're aiming at small code, then just count the number of
2451 insns in a multiply call sequence. */
2452 if (TARGET_SMALLCODE)
2453 return 5;
2455 /* Otherwise count all the insns in the routine we'd be calling too. */
2456 return 20;
2459 /* Compute a (partial) cost for rtx X. Return true if the complete
2460 cost has been computed, and false if subexpressions should be
2461 scanned. In either case, *TOTAL contains the cost result. */
2463 static bool
2464 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2466 switch (code)
2468 case CONST_INT:
2469 if (TARGET_SHMEDIA)
2471 if (INTVAL (x) == 0)
2472 *total = 0;
2473 else if (outer_code == AND && and_operand ((x), DImode))
2474 *total = 0;
2475 else if ((outer_code == IOR || outer_code == XOR
2476 || outer_code == PLUS)
2477 && CONST_OK_FOR_I10 (INTVAL (x)))
2478 *total = 0;
2479 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2480 *total = COSTS_N_INSNS (outer_code != SET);
2481 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2482 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2483 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2484 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2485 else
2486 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2487 return true;
2489 if (CONST_OK_FOR_I08 (INTVAL (x)))
2490 *total = 0;
2491 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2492 && CONST_OK_FOR_K08 (INTVAL (x)))
2493 *total = 1;
2494 /* prepare_cmp_insn will force costly constants int registers before
2495 the cbranch[sd]i4 patterns can see them, so preserve potentially
2496 interesting ones not covered by I08 above. */
2497 else if (outer_code == COMPARE
2498 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2499 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2500 || INTVAL (x) == 0x7fffffff
2501 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2502 *total = 1;
2503 else
2504 *total = 8;
2505 return true;
2507 case CONST:
2508 case LABEL_REF:
2509 case SYMBOL_REF:
2510 if (TARGET_SHMEDIA64)
2511 *total = COSTS_N_INSNS (4);
2512 else if (TARGET_SHMEDIA32)
2513 *total = COSTS_N_INSNS (2);
2514 else
2515 *total = 5;
2516 return true;
2518 case CONST_DOUBLE:
2519 if (TARGET_SHMEDIA)
2520 *total = COSTS_N_INSNS (4);
2521 /* prepare_cmp_insn will force costly constants int registers before
2522 the cbranchdi4 pattern can see them, so preserve potentially
2523 interesting ones. */
2524 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2525 *total = 1;
2526 else
2527 *total = 10;
2528 return true;
2529 case CONST_VECTOR:
2530 if (x == CONST0_RTX (GET_MODE (x)))
2531 *total = 0;
2532 else if (sh_1el_vec (x, VOIDmode))
2533 *total = outer_code != SET;
2534 if (sh_rep_vec (x, VOIDmode))
2535 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2536 + (outer_code != SET));
2537 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2538 return true;
2540 case PLUS:
2541 case MINUS:
2542 *total = COSTS_N_INSNS (addsubcosts (x));
2543 return true;
2545 case AND:
2546 *total = COSTS_N_INSNS (andcosts (x));
2547 return true;
2549 case MULT:
2550 *total = COSTS_N_INSNS (multcosts (x));
2551 return true;
2553 case ASHIFT:
2554 case ASHIFTRT:
2555 case LSHIFTRT:
2556 *total = COSTS_N_INSNS (shiftcosts (x));
2557 return true;
2559 case DIV:
2560 case UDIV:
2561 case MOD:
2562 case UMOD:
2563 *total = COSTS_N_INSNS (20);
2564 return true;
2566 case PARALLEL:
2567 if (sh_1el_vec (x, VOIDmode))
2568 *total = outer_code != SET;
2569 if (sh_rep_vec (x, VOIDmode))
2570 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2571 + (outer_code != SET));
2572 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2573 return true;
2575 case FLOAT:
2576 case FIX:
2577 *total = 100;
2578 return true;
2580 default:
2581 return false;
2585 /* Compute the cost of an address. For the SH, all valid addresses are
2586 the same cost. Use a slightly higher cost for reg + reg addressing,
2587 since it increases pressure on r0. */
2589 static int
2590 sh_address_cost (rtx X)
2592 return (GET_CODE (X) == PLUS
2593 && ! CONSTANT_P (XEXP (X, 1))
2594 && ! TARGET_SHMEDIA ? 1 : 0);
2597 /* Code to expand a shift. */
2599 void
2600 gen_ashift (int type, int n, rtx reg)
2602 /* Negative values here come from the shift_amounts array. */
2603 if (n < 0)
2605 if (type == ASHIFT)
2606 type = LSHIFTRT;
2607 else
2608 type = ASHIFT;
2609 n = -n;
2612 switch (type)
2614 case ASHIFTRT:
2615 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2616 break;
2617 case LSHIFTRT:
2618 if (n == 1)
2619 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2620 else
2621 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2622 break;
2623 case ASHIFT:
2624 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2625 break;
2629 /* Same for HImode */
2631 void
2632 gen_ashift_hi (int type, int n, rtx reg)
2634 /* Negative values here come from the shift_amounts array. */
2635 if (n < 0)
2637 if (type == ASHIFT)
2638 type = LSHIFTRT;
2639 else
2640 type = ASHIFT;
2641 n = -n;
2644 switch (type)
2646 case ASHIFTRT:
2647 case LSHIFTRT:
2648 /* We don't have HImode right shift operations because using the
2649 ordinary 32 bit shift instructions for that doesn't generate proper
2650 zero/sign extension.
2651 gen_ashift_hi is only called in contexts where we know that the
2652 sign extension works out correctly. */
2654 int offset = 0;
2655 if (GET_CODE (reg) == SUBREG)
2657 offset = SUBREG_BYTE (reg);
2658 reg = SUBREG_REG (reg);
2660 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2661 break;
2663 case ASHIFT:
2664 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2665 break;
2669 /* Output RTL to split a constant shift into its component SH constant
2670 shift instructions. */
2672 void
2673 gen_shifty_op (int code, rtx *operands)
2675 int value = INTVAL (operands[2]);
2676 int max, i;
2678 /* Truncate the shift count in case it is out of bounds. */
2679 value = value & 0x1f;
2681 if (value == 31)
2683 if (code == LSHIFTRT)
2685 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2686 emit_insn (gen_movt (operands[0]));
2687 return;
2689 else if (code == ASHIFT)
2691 /* There is a two instruction sequence for 31 bit left shifts,
2692 but it requires r0. */
2693 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2695 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2696 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2697 return;
2701 else if (value == 0)
2703 /* This can happen even when optimizing, if there were subregs before
2704 reload. Don't output a nop here, as this is never optimized away;
2705 use a no-op move instead. */
2706 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2707 return;
2710 max = shift_insns[value];
2711 for (i = 0; i < max; i++)
2712 gen_ashift (code, shift_amounts[value][i], operands[0]);
2715 /* Same as above, but optimized for values where the topmost bits don't
2716 matter. */
2718 void
2719 gen_shifty_hi_op (int code, rtx *operands)
2721 int value = INTVAL (operands[2]);
2722 int max, i;
2723 void (*gen_fun) (int, int, rtx);
2725 /* This operation is used by and_shl for SImode values with a few
2726 high bits known to be cleared. */
2727 value &= 31;
2728 if (value == 0)
2730 emit_insn (gen_nop ());
2731 return;
2734 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2735 if (code == ASHIFT)
2737 max = ext_shift_insns[value];
2738 for (i = 0; i < max; i++)
2739 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2741 else
2742 /* When shifting right, emit the shifts in reverse order, so that
2743 solitary negative values come first. */
2744 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2745 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2748 /* Output RTL for an arithmetic right shift. */
2750 /* ??? Rewrite to use super-optimizer sequences. */
2753 expand_ashiftrt (rtx *operands)
2755 rtx wrk;
2756 char func[18];
2757 int value;
2759 if (TARGET_SH3)
2761 if (GET_CODE (operands[2]) != CONST_INT)
2763 rtx count = copy_to_mode_reg (SImode, operands[2]);
2764 emit_insn (gen_negsi2 (count, count));
2765 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2766 return 1;
2768 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2769 > 1 + SH_DYNAMIC_SHIFT_COST)
2771 rtx count
2772 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2773 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2774 return 1;
2777 if (GET_CODE (operands[2]) != CONST_INT)
2778 return 0;
2780 value = INTVAL (operands[2]) & 31;
2782 if (value == 31)
2784 /* If we are called from abs expansion, arrange things so that we
2785 we can use a single MT instruction that doesn't clobber the source,
2786 if LICM can hoist out the load of the constant zero. */
2787 if (currently_expanding_to_rtl)
2789 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2790 operands[1]));
2791 emit_insn (gen_mov_neg_si_t (operands[0]));
2792 return 1;
2794 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2795 return 1;
2797 else if (value >= 16 && value <= 19)
2799 wrk = gen_reg_rtx (SImode);
2800 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2801 value -= 16;
2802 while (value--)
2803 gen_ashift (ASHIFTRT, 1, wrk);
2804 emit_move_insn (operands[0], wrk);
2805 return 1;
2807 /* Expand a short sequence inline, longer call a magic routine. */
2808 else if (value <= 5)
2810 wrk = gen_reg_rtx (SImode);
2811 emit_move_insn (wrk, operands[1]);
2812 while (value--)
2813 gen_ashift (ASHIFTRT, 1, wrk);
2814 emit_move_insn (operands[0], wrk);
2815 return 1;
2818 wrk = gen_reg_rtx (Pmode);
2820 /* Load the value into an arg reg and call a helper. */
2821 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2822 sprintf (func, "__ashiftrt_r4_%d", value);
2823 function_symbol (wrk, func, SFUNC_STATIC);
2824 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2825 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2826 return 1;
2830 sh_dynamicalize_shift_p (rtx count)
2832 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2835 /* Try to find a good way to implement the combiner pattern
2836 [(set (match_operand:SI 0 "register_operand" "r")
2837 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2838 (match_operand:SI 2 "const_int_operand" "n"))
2839 (match_operand:SI 3 "const_int_operand" "n"))) .
2840 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2841 return 0 for simple right / left or left/right shift combination.
2842 return 1 for a combination of shifts with zero_extend.
2843 return 2 for a combination of shifts with an AND that needs r0.
2844 return 3 for a combination of shifts with an AND that needs an extra
2845 scratch register, when the three highmost bits of the AND mask are clear.
2846 return 4 for a combination of shifts with an AND that needs an extra
2847 scratch register, when any of the three highmost bits of the AND mask
2848 is set.
2849 If ATTRP is set, store an initial right shift width in ATTRP[0],
2850 and the instruction length in ATTRP[1] . These values are not valid
2851 when returning 0.
2852 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2853 shift_amounts for the last shift value that is to be used before the
2854 sign extend. */
2856 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2858 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2859 int left = INTVAL (left_rtx), right;
2860 int best = 0;
2861 int cost, best_cost = 10000;
2862 int best_right = 0, best_len = 0;
2863 int i;
2864 int can_ext;
2866 if (left < 0 || left > 31)
2867 return 0;
2868 if (GET_CODE (mask_rtx) == CONST_INT)
2869 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2870 else
2871 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2872 /* Can this be expressed as a right shift / left shift pair? */
2873 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2874 right = exact_log2 (lsb);
2875 mask2 = ~(mask + lsb - 1);
2876 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2877 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2878 if (! mask2)
2879 best_cost = shift_insns[right] + shift_insns[right + left];
2880 /* mask has no trailing zeroes <==> ! right */
2881 else if (! right && mask2 == ~(lsb2 - 1))
2883 int late_right = exact_log2 (lsb2);
2884 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2886 /* Try to use zero extend. */
2887 if (mask2 == ~(lsb2 - 1))
2889 int width, first;
2891 for (width = 8; width <= 16; width += 8)
2893 /* Can we zero-extend right away? */
2894 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2896 cost
2897 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2898 if (cost < best_cost)
2900 best = 1;
2901 best_cost = cost;
2902 best_right = right;
2903 best_len = cost;
2904 if (attrp)
2905 attrp[2] = -1;
2907 continue;
2909 /* ??? Could try to put zero extend into initial right shift,
2910 or even shift a bit left before the right shift. */
2911 /* Determine value of first part of left shift, to get to the
2912 zero extend cut-off point. */
2913 first = width - exact_log2 (lsb2) + right;
2914 if (first >= 0 && right + left - first >= 0)
2916 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2917 + ext_shift_insns[right + left - first];
2918 if (cost < best_cost)
2920 best = 1;
2921 best_cost = cost;
2922 best_right = right;
2923 best_len = cost;
2924 if (attrp)
2925 attrp[2] = first;
2930 /* Try to use r0 AND pattern */
2931 for (i = 0; i <= 2; i++)
2933 if (i > right)
2934 break;
2935 if (! CONST_OK_FOR_K08 (mask >> i))
2936 continue;
2937 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2938 if (cost < best_cost)
2940 best = 2;
2941 best_cost = cost;
2942 best_right = i;
2943 best_len = cost - 1;
2946 /* Try to use a scratch register to hold the AND operand. */
2947 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2948 for (i = 0; i <= 2; i++)
2950 if (i > right)
2951 break;
2952 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2953 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2954 if (cost < best_cost)
2956 best = 4 - can_ext;
2957 best_cost = cost;
2958 best_right = i;
2959 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2963 if (attrp)
2965 attrp[0] = best_right;
2966 attrp[1] = best_len;
2968 return best;
2971 /* This is used in length attributes of the unnamed instructions
2972 corresponding to shl_and_kind return values of 1 and 2. */
2974 shl_and_length (rtx insn)
2976 rtx set_src, left_rtx, mask_rtx;
2977 int attributes[3];
2979 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2980 left_rtx = XEXP (XEXP (set_src, 0), 1);
2981 mask_rtx = XEXP (set_src, 1);
2982 shl_and_kind (left_rtx, mask_rtx, attributes);
2983 return attributes[1];
2986 /* This is used in length attribute of the and_shl_scratch instruction. */
2989 shl_and_scr_length (rtx insn)
2991 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2992 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2993 rtx op = XEXP (set_src, 0);
2994 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2995 op = XEXP (XEXP (op, 0), 0);
2996 return len + shift_insns[INTVAL (XEXP (op, 1))];
2999 /* Generate rtl for instructions for which shl_and_kind advised a particular
3000 method of generating them, i.e. returned zero. */
3003 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3005 int attributes[3];
3006 unsigned HOST_WIDE_INT mask;
3007 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3008 int right, total_shift;
3009 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3011 right = attributes[0];
3012 total_shift = INTVAL (left_rtx) + right;
3013 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3014 switch (kind)
3016 default:
3017 return -1;
3018 case 1:
3020 int first = attributes[2];
3021 rtx operands[3];
3023 if (first < 0)
3025 emit_insn ((mask << right) <= 0xff
3026 ? gen_zero_extendqisi2 (dest,
3027 gen_lowpart (QImode, source))
3028 : gen_zero_extendhisi2 (dest,
3029 gen_lowpart (HImode, source)));
3030 source = dest;
3032 if (source != dest)
3033 emit_insn (gen_movsi (dest, source));
3034 operands[0] = dest;
3035 if (right)
3037 operands[2] = GEN_INT (right);
3038 gen_shifty_hi_op (LSHIFTRT, operands);
3040 if (first > 0)
3042 operands[2] = GEN_INT (first);
3043 gen_shifty_hi_op (ASHIFT, operands);
3044 total_shift -= first;
3045 mask <<= first;
3047 if (first >= 0)
3048 emit_insn (mask <= 0xff
3049 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3050 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3051 if (total_shift > 0)
3053 operands[2] = GEN_INT (total_shift);
3054 gen_shifty_hi_op (ASHIFT, operands);
3056 break;
3058 case 4:
3059 shift_gen_fun = gen_shifty_op;
3060 case 3:
3061 /* If the topmost bit that matters is set, set the topmost bits
3062 that don't matter. This way, we might be able to get a shorter
3063 signed constant. */
3064 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3065 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3066 case 2:
3067 /* Don't expand fine-grained when combining, because that will
3068 make the pattern fail. */
3069 if (currently_expanding_to_rtl
3070 || reload_in_progress || reload_completed)
3072 rtx operands[3];
3074 /* Cases 3 and 4 should be handled by this split
3075 only while combining */
3076 gcc_assert (kind <= 2);
3077 if (right)
3079 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3080 source = dest;
3082 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3083 if (total_shift)
3085 operands[0] = dest;
3086 operands[1] = dest;
3087 operands[2] = GEN_INT (total_shift);
3088 shift_gen_fun (ASHIFT, operands);
3090 break;
3092 else
3094 int neg = 0;
3095 if (kind != 4 && total_shift < 16)
3097 neg = -ext_shift_amounts[total_shift][1];
3098 if (neg > 0)
3099 neg -= ext_shift_amounts[total_shift][2];
3100 else
3101 neg = 0;
3103 emit_insn (gen_and_shl_scratch (dest, source,
3104 GEN_INT (right),
3105 GEN_INT (mask),
3106 GEN_INT (total_shift + neg),
3107 GEN_INT (neg)));
3108 emit_insn (gen_movsi (dest, dest));
3109 break;
3112 return 0;
3115 /* Try to find a good way to implement the combiner pattern
3116 [(set (match_operand:SI 0 "register_operand" "=r")
3117 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3118 (match_operand:SI 2 "const_int_operand" "n")
3119 (match_operand:SI 3 "const_int_operand" "n")
3120 (const_int 0)))
3121 (clobber (reg:SI T_REG))]
3122 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3123 return 0 for simple left / right shift combination.
3124 return 1 for left shift / 8 bit sign extend / left shift.
3125 return 2 for left shift / 16 bit sign extend / left shift.
3126 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3127 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3128 return 5 for left shift / 16 bit sign extend / right shift
3129 return 6 for < 8 bit sign extend / left shift.
3130 return 7 for < 8 bit sign extend / left shift / single right shift.
3131 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3134 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3136 int left, size, insize, ext;
3137 int cost = 0, best_cost;
3138 int kind;
3140 left = INTVAL (left_rtx);
3141 size = INTVAL (size_rtx);
3142 insize = size - left;
3143 gcc_assert (insize > 0);
3144 /* Default to left / right shift. */
3145 kind = 0;
3146 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3147 if (size <= 16)
3149 /* 16 bit shift / sign extend / 16 bit shift */
3150 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3151 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3152 below, by alternative 3 or something even better. */
3153 if (cost < best_cost)
3155 kind = 5;
3156 best_cost = cost;
3159 /* Try a plain sign extend between two shifts. */
3160 for (ext = 16; ext >= insize; ext -= 8)
3162 if (ext <= size)
3164 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3165 if (cost < best_cost)
3167 kind = ext / (unsigned) 8;
3168 best_cost = cost;
3171 /* Check if we can do a sloppy shift with a final signed shift
3172 restoring the sign. */
3173 if (EXT_SHIFT_SIGNED (size - ext))
3174 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3175 /* If not, maybe it's still cheaper to do the second shift sloppy,
3176 and do a final sign extend? */
3177 else if (size <= 16)
3178 cost = ext_shift_insns[ext - insize] + 1
3179 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3180 else
3181 continue;
3182 if (cost < best_cost)
3184 kind = ext / (unsigned) 8 + 2;
3185 best_cost = cost;
3188 /* Check if we can sign extend in r0 */
3189 if (insize < 8)
3191 cost = 3 + shift_insns[left];
3192 if (cost < best_cost)
3194 kind = 6;
3195 best_cost = cost;
3197 /* Try the same with a final signed shift. */
3198 if (left < 31)
3200 cost = 3 + ext_shift_insns[left + 1] + 1;
3201 if (cost < best_cost)
3203 kind = 7;
3204 best_cost = cost;
3208 if (TARGET_SH3)
3210 /* Try to use a dynamic shift. */
3211 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3212 if (cost < best_cost)
3214 kind = 0;
3215 best_cost = cost;
3218 if (costp)
3219 *costp = cost;
3220 return kind;
3223 /* Function to be used in the length attribute of the instructions
3224 implementing this pattern. */
3227 shl_sext_length (rtx insn)
3229 rtx set_src, left_rtx, size_rtx;
3230 int cost;
3232 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3233 left_rtx = XEXP (XEXP (set_src, 0), 1);
3234 size_rtx = XEXP (set_src, 1);
3235 shl_sext_kind (left_rtx, size_rtx, &cost);
3236 return cost;
3239 /* Generate rtl for this pattern */
3242 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3244 int kind;
3245 int left, size, insize, cost;
3246 rtx operands[3];
3248 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3249 left = INTVAL (left_rtx);
3250 size = INTVAL (size_rtx);
3251 insize = size - left;
3252 switch (kind)
3254 case 1:
3255 case 2:
3256 case 3:
3257 case 4:
3259 int ext = kind & 1 ? 8 : 16;
3260 int shift2 = size - ext;
3262 /* Don't expand fine-grained when combining, because that will
3263 make the pattern fail. */
3264 if (! currently_expanding_to_rtl
3265 && ! reload_in_progress && ! reload_completed)
3267 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3268 emit_insn (gen_movsi (dest, source));
3269 break;
3271 if (dest != source)
3272 emit_insn (gen_movsi (dest, source));
3273 operands[0] = dest;
3274 if (ext - insize)
3276 operands[2] = GEN_INT (ext - insize);
3277 gen_shifty_hi_op (ASHIFT, operands);
3279 emit_insn (kind & 1
3280 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3281 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3282 if (kind <= 2)
3284 if (shift2)
3286 operands[2] = GEN_INT (shift2);
3287 gen_shifty_op (ASHIFT, operands);
3290 else
3292 if (shift2 > 0)
3294 if (EXT_SHIFT_SIGNED (shift2))
3296 operands[2] = GEN_INT (shift2 + 1);
3297 gen_shifty_op (ASHIFT, operands);
3298 operands[2] = const1_rtx;
3299 gen_shifty_op (ASHIFTRT, operands);
3300 break;
3302 operands[2] = GEN_INT (shift2);
3303 gen_shifty_hi_op (ASHIFT, operands);
3305 else if (shift2)
3307 operands[2] = GEN_INT (-shift2);
3308 gen_shifty_hi_op (LSHIFTRT, operands);
3310 emit_insn (size <= 8
3311 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3312 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3314 break;
3316 case 5:
3318 int i = 16 - size;
3319 if (! currently_expanding_to_rtl
3320 && ! reload_in_progress && ! reload_completed)
3321 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3322 else
3324 operands[0] = dest;
3325 operands[2] = GEN_INT (16 - insize);
3326 gen_shifty_hi_op (ASHIFT, operands);
3327 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3329 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3330 while (--i >= 0)
3331 gen_ashift (ASHIFTRT, 1, dest);
3332 break;
3334 case 6:
3335 case 7:
3336 /* Don't expand fine-grained when combining, because that will
3337 make the pattern fail. */
3338 if (! currently_expanding_to_rtl
3339 && ! reload_in_progress && ! reload_completed)
3341 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3342 emit_insn (gen_movsi (dest, source));
3343 break;
3345 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3346 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3347 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3348 operands[0] = dest;
3349 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3350 gen_shifty_op (ASHIFT, operands);
3351 if (kind == 7)
3352 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3353 break;
3354 default:
3355 return -1;
3357 return 0;
3360 /* Prefix a symbol_ref name with "datalabel". */
3363 gen_datalabel_ref (rtx sym)
3365 const char *str;
3367 if (GET_CODE (sym) == LABEL_REF)
3368 return gen_rtx_CONST (GET_MODE (sym),
3369 gen_rtx_UNSPEC (GET_MODE (sym),
3370 gen_rtvec (1, sym),
3371 UNSPEC_DATALABEL));
3373 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3375 str = XSTR (sym, 0);
3376 /* Share all SYMBOL_REF strings with the same value - that is important
3377 for cse. */
3378 str = IDENTIFIER_POINTER (get_identifier (str));
3379 XSTR (sym, 0) = str;
3381 return sym;
3385 static alloc_pool label_ref_list_pool;
3387 typedef struct label_ref_list_d
3389 rtx label;
3390 struct label_ref_list_d *next;
3391 } *label_ref_list_t;
3393 /* The SH cannot load a large constant into a register, constants have to
3394 come from a pc relative load. The reference of a pc relative load
3395 instruction must be less than 1k in front of the instruction. This
3396 means that we often have to dump a constant inside a function, and
3397 generate code to branch around it.
3399 It is important to minimize this, since the branches will slow things
3400 down and make things bigger.
3402 Worst case code looks like:
3404 mov.l L1,rn
3405 bra L2
3407 align
3408 L1: .long value
3412 mov.l L3,rn
3413 bra L4
3415 align
3416 L3: .long value
3420 We fix this by performing a scan before scheduling, which notices which
3421 instructions need to have their operands fetched from the constant table
3422 and builds the table.
3424 The algorithm is:
3426 scan, find an instruction which needs a pcrel move. Look forward, find the
3427 last barrier which is within MAX_COUNT bytes of the requirement.
3428 If there isn't one, make one. Process all the instructions between
3429 the find and the barrier.
3431 In the above example, we can tell that L3 is within 1k of L1, so
3432 the first move can be shrunk from the 3 insn+constant sequence into
3433 just 1 insn, and the constant moved to L3 to make:
3435 mov.l L1,rn
3437 mov.l L3,rn
3438 bra L4
3440 align
3441 L3:.long value
3442 L4:.long value
3444 Then the second move becomes the target for the shortening process. */
3446 typedef struct
3448 rtx value; /* Value in table. */
3449 rtx label; /* Label of value. */
3450 label_ref_list_t wend; /* End of window. */
3451 enum machine_mode mode; /* Mode of value. */
3453 /* True if this constant is accessed as part of a post-increment
3454 sequence. Note that HImode constants are never accessed in this way. */
3455 bool part_of_sequence_p;
3456 } pool_node;
3458 /* The maximum number of constants that can fit into one pool, since
3459 constants in the range 0..510 are at least 2 bytes long, and in the
3460 range from there to 1018 at least 4 bytes. */
3462 #define MAX_POOL_SIZE 372
3463 static pool_node pool_vector[MAX_POOL_SIZE];
3464 static int pool_size;
3465 static rtx pool_window_label;
3466 static int pool_window_last;
3468 static int max_labelno_before_reorg;
3470 /* ??? If we need a constant in HImode which is the truncated value of a
3471 constant we need in SImode, we could combine the two entries thus saving
3472 two bytes. Is this common enough to be worth the effort of implementing
3473 it? */
3475 /* ??? This stuff should be done at the same time that we shorten branches.
3476 As it is now, we must assume that all branches are the maximum size, and
3477 this causes us to almost always output constant pools sooner than
3478 necessary. */
3480 /* Add a constant to the pool and return its label. */
3482 static rtx
3483 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3485 int i;
3486 rtx lab, new;
3487 label_ref_list_t ref, newref;
3489 /* First see if we've already got it. */
3490 for (i = 0; i < pool_size; i++)
3492 if (x->code == pool_vector[i].value->code
3493 && mode == pool_vector[i].mode)
3495 if (x->code == CODE_LABEL)
3497 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3498 continue;
3500 if (rtx_equal_p (x, pool_vector[i].value))
3502 lab = new = 0;
3503 if (! last_value
3504 || ! i
3505 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3507 new = gen_label_rtx ();
3508 LABEL_REFS (new) = pool_vector[i].label;
3509 pool_vector[i].label = lab = new;
3511 if (lab && pool_window_label)
3513 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3514 newref->label = pool_window_label;
3515 ref = pool_vector[pool_window_last].wend;
3516 newref->next = ref;
3517 pool_vector[pool_window_last].wend = newref;
3519 if (new)
3520 pool_window_label = new;
3521 pool_window_last = i;
3522 return lab;
3527 /* Need a new one. */
3528 pool_vector[pool_size].value = x;
3529 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3531 lab = 0;
3532 pool_vector[pool_size - 1].part_of_sequence_p = true;
3534 else
3535 lab = gen_label_rtx ();
3536 pool_vector[pool_size].mode = mode;
3537 pool_vector[pool_size].label = lab;
3538 pool_vector[pool_size].wend = NULL;
3539 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3540 if (lab && pool_window_label)
3542 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3543 newref->label = pool_window_label;
3544 ref = pool_vector[pool_window_last].wend;
3545 newref->next = ref;
3546 pool_vector[pool_window_last].wend = newref;
3548 if (lab)
3549 pool_window_label = lab;
3550 pool_window_last = pool_size;
3551 pool_size++;
3552 return lab;
3555 /* Output the literal table. START, if nonzero, is the first instruction
3556 this table is needed for, and also indicates that there is at least one
3557 casesi_worker_2 instruction; We have to emit the operand3 labels from
3558 these insns at a 4-byte aligned position. BARRIER is the barrier
3559 after which we are to place the table. */
3561 static void
3562 dump_table (rtx start, rtx barrier)
3564 rtx scan = barrier;
3565 int i;
3566 int need_align = 1;
3567 rtx lab;
3568 label_ref_list_t ref;
3569 int have_df = 0;
3571 /* Do two passes, first time dump out the HI sized constants. */
3573 for (i = 0; i < pool_size; i++)
3575 pool_node *p = &pool_vector[i];
3577 if (p->mode == HImode)
3579 if (need_align)
3581 scan = emit_insn_after (gen_align_2 (), scan);
3582 need_align = 0;
3584 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3585 scan = emit_label_after (lab, scan);
3586 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3587 scan);
3588 for (ref = p->wend; ref; ref = ref->next)
3590 lab = ref->label;
3591 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3594 else if (p->mode == DFmode)
3595 have_df = 1;
3598 need_align = 1;
3600 if (start)
3602 scan = emit_insn_after (gen_align_4 (), scan);
3603 need_align = 0;
3604 for (; start != barrier; start = NEXT_INSN (start))
3605 if (GET_CODE (start) == INSN
3606 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3608 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3609 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3611 scan = emit_label_after (lab, scan);
3614 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3616 rtx align_insn = NULL_RTX;
3618 scan = emit_label_after (gen_label_rtx (), scan);
3619 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3620 need_align = 0;
3622 for (i = 0; i < pool_size; i++)
3624 pool_node *p = &pool_vector[i];
3626 switch (p->mode)
3628 case HImode:
3629 break;
3630 case SImode:
3631 case SFmode:
3632 if (align_insn && !p->part_of_sequence_p)
3634 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3635 emit_label_before (lab, align_insn);
3636 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3637 align_insn);
3638 for (ref = p->wend; ref; ref = ref->next)
3640 lab = ref->label;
3641 emit_insn_before (gen_consttable_window_end (lab),
3642 align_insn);
3644 delete_insn (align_insn);
3645 align_insn = NULL_RTX;
3646 continue;
3648 else
3650 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3651 scan = emit_label_after (lab, scan);
3652 scan = emit_insn_after (gen_consttable_4 (p->value,
3653 const0_rtx), scan);
3654 need_align = ! need_align;
3656 break;
3657 case DFmode:
3658 if (need_align)
3660 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3661 align_insn = scan;
3662 need_align = 0;
3664 case DImode:
3665 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3666 scan = emit_label_after (lab, scan);
3667 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3668 scan);
3669 break;
3670 default:
3671 gcc_unreachable ();
3674 if (p->mode != HImode)
3676 for (ref = p->wend; ref; ref = ref->next)
3678 lab = ref->label;
3679 scan = emit_insn_after (gen_consttable_window_end (lab),
3680 scan);
3685 pool_size = 0;
3688 for (i = 0; i < pool_size; i++)
3690 pool_node *p = &pool_vector[i];
3692 switch (p->mode)
3694 case HImode:
3695 break;
3696 case SImode:
3697 case SFmode:
3698 if (need_align)
3700 need_align = 0;
3701 scan = emit_label_after (gen_label_rtx (), scan);
3702 scan = emit_insn_after (gen_align_4 (), scan);
3704 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3705 scan = emit_label_after (lab, scan);
3706 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3707 scan);
3708 break;
3709 case DFmode:
3710 case DImode:
3711 if (need_align)
3713 need_align = 0;
3714 scan = emit_label_after (gen_label_rtx (), scan);
3715 scan = emit_insn_after (gen_align_4 (), scan);
3717 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3718 scan = emit_label_after (lab, scan);
3719 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3720 scan);
3721 break;
3722 default:
3723 gcc_unreachable ();
3726 if (p->mode != HImode)
3728 for (ref = p->wend; ref; ref = ref->next)
3730 lab = ref->label;
3731 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3736 scan = emit_insn_after (gen_consttable_end (), scan);
3737 scan = emit_barrier_after (scan);
3738 pool_size = 0;
3739 pool_window_label = NULL_RTX;
3740 pool_window_last = 0;
3743 /* Return nonzero if constant would be an ok source for a
3744 mov.w instead of a mov.l. */
3746 static int
3747 hi_const (rtx src)
3749 return (GET_CODE (src) == CONST_INT
3750 && INTVAL (src) >= -32768
3751 && INTVAL (src) <= 32767);
3754 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3756 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3758 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3759 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3760 need to fix it if the input value is CONST_OK_FOR_I08. */
3762 static int
3763 broken_move (rtx insn)
3765 if (GET_CODE (insn) == INSN)
3767 rtx pat = PATTERN (insn);
3768 if (GET_CODE (pat) == PARALLEL)
3769 pat = XVECEXP (pat, 0, 0);
3770 if (GET_CODE (pat) == SET
3771 /* We can load any 8-bit value if we don't care what the high
3772 order bits end up as. */
3773 && GET_MODE (SET_DEST (pat)) != QImode
3774 && (CONSTANT_P (SET_SRC (pat))
3775 /* Match mova_const. */
3776 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3777 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3778 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3779 && ! (TARGET_SH2E
3780 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3781 && (fp_zero_operand (SET_SRC (pat))
3782 || fp_one_operand (SET_SRC (pat)))
3783 /* ??? If this is a -m4 or -m4-single compilation, in general
3784 we don't know the current setting of fpscr, so disable fldi.
3785 There is an exception if this was a register-register move
3786 before reload - and hence it was ascertained that we have
3787 single precision setting - and in a post-reload optimization
3788 we changed this to do a constant load. In that case
3789 we don't have an r0 clobber, hence we must use fldi. */
3790 && (! TARGET_SH4 || TARGET_FMOVD
3791 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3792 == SCRATCH))
3793 && GET_CODE (SET_DEST (pat)) == REG
3794 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3795 && ! (TARGET_SH2A
3796 && GET_MODE (SET_DEST (pat)) == SImode
3797 && (satisfies_constraint_I20 (SET_SRC (pat))
3798 || satisfies_constraint_I28 (SET_SRC (pat))))
3799 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3800 return 1;
3803 return 0;
3806 static int
3807 mova_p (rtx insn)
3809 return (GET_CODE (insn) == INSN
3810 && GET_CODE (PATTERN (insn)) == SET
3811 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3812 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3813 /* Don't match mova_const. */
3814 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3817 /* Fix up a mova from a switch that went out of range. */
3818 static void
3819 fixup_mova (rtx mova)
3821 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3822 if (! flag_pic)
3824 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3825 INSN_CODE (mova) = -1;
3827 else
3829 rtx worker = mova;
3830 rtx lab = gen_label_rtx ();
3831 rtx wpat, wpat0, wpat1, wsrc, diff;
3835 worker = NEXT_INSN (worker);
3836 gcc_assert (worker
3837 && GET_CODE (worker) != CODE_LABEL
3838 && GET_CODE (worker) != JUMP_INSN);
3839 } while (GET_CODE (worker) == NOTE
3840 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3841 wpat = PATTERN (worker);
3842 wpat0 = XVECEXP (wpat, 0, 0);
3843 wpat1 = XVECEXP (wpat, 0, 1);
3844 wsrc = SET_SRC (wpat0);
3845 PATTERN (worker) = (gen_casesi_worker_2
3846 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3847 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3848 XEXP (wpat1, 0)));
3849 INSN_CODE (worker) = -1;
3850 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3851 gen_rtx_LABEL_REF (Pmode, lab));
3852 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3853 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3854 INSN_CODE (mova) = -1;
3858 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3859 *num_mova, and check if the new mova is not nested within the first one.
3860 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3861 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3862 static int
3863 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3865 int n_addr = 0; /* Initialization to shut up spurious warning. */
3866 int f_target, n_target = 0; /* Likewise. */
3868 if (optimize)
3870 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3871 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3872 if (n_addr > n_target || n_addr + 1022 < n_target)
3874 /* Change the mova into a load.
3875 broken_move will then return true for it. */
3876 fixup_mova (new_mova);
3877 return 1;
3880 if (!(*num_mova)++)
3882 *first_mova = new_mova;
3883 return 2;
3885 if (!optimize
3886 || ((f_target
3887 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3888 >= n_target))
3889 return -1;
3891 (*num_mova)--;
3892 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3893 > n_target - n_addr)
3895 fixup_mova (*first_mova);
3896 return 0;
3898 else
3900 fixup_mova (new_mova);
3901 return 1;
3905 /* Find the last barrier from insn FROM which is close enough to hold the
3906 constant pool. If we can't find one, then create one near the end of
3907 the range. */
3909 static rtx
3910 find_barrier (int num_mova, rtx mova, rtx from)
3912 int count_si = 0;
3913 int count_hi = 0;
3914 int found_hi = 0;
3915 int found_si = 0;
3916 int found_di = 0;
3917 int hi_align = 2;
3918 int si_align = 2;
3919 int leading_mova = num_mova;
3920 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3921 int si_limit;
3922 int hi_limit;
3923 rtx orig = from;
3925 /* For HImode: range is 510, add 4 because pc counts from address of
3926 second instruction after this one, subtract 2 for the jump instruction
3927 that we may need to emit before the table, subtract 2 for the instruction
3928 that fills the jump delay slot (in very rare cases, reorg will take an
3929 instruction from after the constant pool or will leave the delay slot
3930 empty). This gives 510.
3931 For SImode: range is 1020, add 4 because pc counts from address of
3932 second instruction after this one, subtract 2 in case pc is 2 byte
3933 aligned, subtract 2 for the jump instruction that we may need to emit
3934 before the table, subtract 2 for the instruction that fills the jump
3935 delay slot. This gives 1018. */
3937 /* The branch will always be shortened now that the reference address for
3938 forward branches is the successor address, thus we need no longer make
3939 adjustments to the [sh]i_limit for -O0. */
3941 si_limit = 1018;
3942 hi_limit = 510;
3944 while (from && count_si < si_limit && count_hi < hi_limit)
3946 int inc = get_attr_length (from);
3947 int new_align = 1;
3949 /* If this is a label that existed at the time of the compute_alignments
3950 call, determine the alignment. N.B. When find_barrier recurses for
3951 an out-of-reach mova, we might see labels at the start of previously
3952 inserted constant tables. */
3953 if (GET_CODE (from) == CODE_LABEL
3954 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3956 if (optimize)
3957 new_align = 1 << label_to_alignment (from);
3958 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3959 new_align = 1 << barrier_align (from);
3960 else
3961 new_align = 1;
3962 inc = 0;
3964 /* In case we are scanning a constant table because of recursion, check
3965 for explicit alignments. If the table is long, we might be forced
3966 to emit the new table in front of it; the length of the alignment
3967 might be the last straw. */
3968 else if (GET_CODE (from) == INSN
3969 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3970 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3971 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3972 /* When we find the end of a constant table, paste the new constant
3973 at the end. That is better than putting it in front because
3974 this way, we don't need extra alignment for adding a 4-byte-aligned
3975 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3976 else if (GET_CODE (from) == INSN
3977 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3978 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3979 return from;
3981 if (GET_CODE (from) == BARRIER)
3983 rtx next;
3985 found_barrier = from;
3987 /* If we are at the end of the function, or in front of an alignment
3988 instruction, we need not insert an extra alignment. We prefer
3989 this kind of barrier. */
3990 if (barrier_align (from) > 2)
3991 good_barrier = from;
3993 /* If we are at the end of a hot/cold block, dump the constants
3994 here. */
3995 next = NEXT_INSN (from);
3996 if (next
3997 && NOTE_P (next)
3998 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3999 break;
4002 if (broken_move (from))
4004 rtx pat, src, dst;
4005 enum machine_mode mode;
4007 pat = PATTERN (from);
4008 if (GET_CODE (pat) == PARALLEL)
4009 pat = XVECEXP (pat, 0, 0);
4010 src = SET_SRC (pat);
4011 dst = SET_DEST (pat);
4012 mode = GET_MODE (dst);
4014 /* We must explicitly check the mode, because sometimes the
4015 front end will generate code to load unsigned constants into
4016 HImode targets without properly sign extending them. */
4017 if (mode == HImode
4018 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4020 found_hi += 2;
4021 /* We put the short constants before the long constants, so
4022 we must count the length of short constants in the range
4023 for the long constants. */
4024 /* ??? This isn't optimal, but is easy to do. */
4025 si_limit -= 2;
4027 else
4029 /* We dump DF/DI constants before SF/SI ones, because
4030 the limit is the same, but the alignment requirements
4031 are higher. We may waste up to 4 additional bytes
4032 for alignment, and the DF/DI constant may have
4033 another SF/SI constant placed before it. */
4034 if (TARGET_SHCOMPACT
4035 && ! found_di
4036 && (mode == DFmode || mode == DImode))
4038 found_di = 1;
4039 si_limit -= 8;
4041 while (si_align > 2 && found_si + si_align - 2 > count_si)
4042 si_align >>= 1;
4043 if (found_si > count_si)
4044 count_si = found_si;
4045 found_si += GET_MODE_SIZE (mode);
4046 if (num_mova)
4047 si_limit -= GET_MODE_SIZE (mode);
4051 if (mova_p (from))
4053 switch (untangle_mova (&num_mova, &mova, from))
4055 case 0: return find_barrier (0, 0, mova);
4056 case 2:
4058 leading_mova = 0;
4059 barrier_before_mova
4060 = good_barrier ? good_barrier : found_barrier;
4062 default: break;
4064 if (found_si > count_si)
4065 count_si = found_si;
4067 else if (GET_CODE (from) == JUMP_INSN
4068 && (GET_CODE (PATTERN (from)) == ADDR_VEC
4069 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
4071 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4072 || (num_mova
4073 && (prev_nonnote_insn (from)
4074 == XEXP (MOVA_LABELREF (mova), 0))))
4075 num_mova--;
4076 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4078 /* We have just passed the barrier in front of the
4079 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4080 the ADDR_DIFF_VEC is accessed as data, just like our pool
4081 constants, this is a good opportunity to accommodate what
4082 we have gathered so far.
4083 If we waited any longer, we could end up at a barrier in
4084 front of code, which gives worse cache usage for separated
4085 instruction / data caches. */
4086 good_barrier = found_barrier;
4087 break;
4089 else
4091 rtx body = PATTERN (from);
4092 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4095 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4096 else if (GET_CODE (from) == JUMP_INSN
4097 && ! TARGET_SH2
4098 && ! TARGET_SMALLCODE)
4099 new_align = 4;
4101 if (found_si)
4103 count_si += inc;
4104 if (new_align > si_align)
4106 si_limit -= (count_si - 1) & (new_align - si_align);
4107 si_align = new_align;
4109 count_si = (count_si + new_align - 1) & -new_align;
4111 if (found_hi)
4113 count_hi += inc;
4114 if (new_align > hi_align)
4116 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4117 hi_align = new_align;
4119 count_hi = (count_hi + new_align - 1) & -new_align;
4121 from = NEXT_INSN (from);
4124 if (num_mova)
4126 if (leading_mova)
4128 /* Try as we might, the leading mova is out of range. Change
4129 it into a load (which will become a pcload) and retry. */
4130 fixup_mova (mova);
4131 return find_barrier (0, 0, mova);
4133 else
4135 /* Insert the constant pool table before the mova instruction,
4136 to prevent the mova label reference from going out of range. */
4137 from = mova;
4138 good_barrier = found_barrier = barrier_before_mova;
4142 if (found_barrier)
4144 if (good_barrier && next_real_insn (found_barrier))
4145 found_barrier = good_barrier;
4147 else
4149 /* We didn't find a barrier in time to dump our stuff,
4150 so we'll make one. */
4151 rtx label = gen_label_rtx ();
4153 /* If we exceeded the range, then we must back up over the last
4154 instruction we looked at. Otherwise, we just need to undo the
4155 NEXT_INSN at the end of the loop. */
4156 if (PREV_INSN (from) != orig
4157 && (count_hi > hi_limit || count_si > si_limit))
4158 from = PREV_INSN (PREV_INSN (from));
4159 else
4160 from = PREV_INSN (from);
4162 /* Walk back to be just before any jump or label.
4163 Putting it before a label reduces the number of times the branch
4164 around the constant pool table will be hit. Putting it before
4165 a jump makes it more likely that the bra delay slot will be
4166 filled. */
4167 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4168 || GET_CODE (from) == CODE_LABEL)
4169 from = PREV_INSN (from);
4171 from = emit_jump_insn_after (gen_jump (label), from);
4172 JUMP_LABEL (from) = label;
4173 LABEL_NUSES (label) = 1;
4174 found_barrier = emit_barrier_after (from);
4175 emit_label_after (label, found_barrier);
4178 return found_barrier;
4181 /* If the instruction INSN is implemented by a special function, and we can
4182 positively find the register that is used to call the sfunc, and this
4183 register is not used anywhere else in this instruction - except as the
4184 destination of a set, return this register; else, return 0. */
4186 sfunc_uses_reg (rtx insn)
4188 int i;
4189 rtx pattern, part, reg_part, reg;
4191 if (GET_CODE (insn) != INSN)
4192 return 0;
4193 pattern = PATTERN (insn);
4194 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4195 return 0;
4197 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4199 part = XVECEXP (pattern, 0, i);
4200 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4201 reg_part = part;
4203 if (! reg_part)
4204 return 0;
4205 reg = XEXP (reg_part, 0);
4206 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4208 part = XVECEXP (pattern, 0, i);
4209 if (part == reg_part || GET_CODE (part) == CLOBBER)
4210 continue;
4211 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4212 && GET_CODE (SET_DEST (part)) == REG)
4213 ? SET_SRC (part) : part)))
4214 return 0;
4216 return reg;
4219 /* See if the only way in which INSN uses REG is by calling it, or by
4220 setting it while calling it. Set *SET to a SET rtx if the register
4221 is set by INSN. */
4223 static int
4224 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4226 rtx pattern, reg2;
4228 *set = NULL_RTX;
4230 reg2 = sfunc_uses_reg (insn);
4231 if (reg2 && REGNO (reg2) == REGNO (reg))
4233 pattern = single_set (insn);
4234 if (pattern
4235 && GET_CODE (SET_DEST (pattern)) == REG
4236 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4237 *set = pattern;
4238 return 0;
4240 if (GET_CODE (insn) != CALL_INSN)
4242 /* We don't use rtx_equal_p because we don't care if the mode is
4243 different. */
4244 pattern = single_set (insn);
4245 if (pattern
4246 && GET_CODE (SET_DEST (pattern)) == REG
4247 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4249 rtx par, part;
4250 int i;
4252 *set = pattern;
4253 par = PATTERN (insn);
4254 if (GET_CODE (par) == PARALLEL)
4255 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4257 part = XVECEXP (par, 0, i);
4258 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4259 return 1;
4261 return reg_mentioned_p (reg, SET_SRC (pattern));
4264 return 1;
4267 pattern = PATTERN (insn);
4269 if (GET_CODE (pattern) == PARALLEL)
4271 int i;
4273 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4274 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4275 return 1;
4276 pattern = XVECEXP (pattern, 0, 0);
4279 if (GET_CODE (pattern) == SET)
4281 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4283 /* We don't use rtx_equal_p, because we don't care if the
4284 mode is different. */
4285 if (GET_CODE (SET_DEST (pattern)) != REG
4286 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4287 return 1;
4289 *set = pattern;
4292 pattern = SET_SRC (pattern);
4295 if (GET_CODE (pattern) != CALL
4296 || GET_CODE (XEXP (pattern, 0)) != MEM
4297 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4298 return 1;
4300 return 0;
4303 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4304 general registers. Bits 0..15 mean that the respective registers
4305 are used as inputs in the instruction. Bits 16..31 mean that the
4306 registers 0..15, respectively, are used as outputs, or are clobbered.
4307 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4309 regs_used (rtx x, int is_dest)
4311 enum rtx_code code;
4312 const char *fmt;
4313 int i, used = 0;
4315 if (! x)
4316 return used;
4317 code = GET_CODE (x);
4318 switch (code)
4320 case REG:
4321 if (REGNO (x) < 16)
4322 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4323 << (REGNO (x) + is_dest));
4324 return 0;
4325 case SUBREG:
4327 rtx y = SUBREG_REG (x);
4329 if (GET_CODE (y) != REG)
4330 break;
4331 if (REGNO (y) < 16)
4332 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4333 << (REGNO (y) +
4334 subreg_regno_offset (REGNO (y),
4335 GET_MODE (y),
4336 SUBREG_BYTE (x),
4337 GET_MODE (x)) + is_dest));
4338 return 0;
4340 case SET:
4341 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4342 case RETURN:
4343 /* If there was a return value, it must have been indicated with USE. */
4344 return 0x00ffff00;
4345 case CLOBBER:
4346 is_dest = 1;
4347 break;
4348 case MEM:
4349 is_dest = 0;
4350 break;
4351 case CALL:
4352 used |= 0x00ff00f0;
4353 break;
4354 default:
4355 break;
4358 fmt = GET_RTX_FORMAT (code);
4360 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4362 if (fmt[i] == 'E')
4364 register int j;
4365 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4366 used |= regs_used (XVECEXP (x, i, j), is_dest);
4368 else if (fmt[i] == 'e')
4369 used |= regs_used (XEXP (x, i), is_dest);
4371 return used;
4374 /* Create an instruction that prevents redirection of a conditional branch
4375 to the destination of the JUMP with address ADDR.
4376 If the branch needs to be implemented as an indirect jump, try to find
4377 a scratch register for it.
4378 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4379 If any preceding insn that doesn't fit into a delay slot is good enough,
4380 pass 1. Pass 2 if a definite blocking insn is needed.
4381 -1 is used internally to avoid deep recursion.
4382 If a blocking instruction is made or recognized, return it. */
4384 static rtx
4385 gen_block_redirect (rtx jump, int addr, int need_block)
4387 int dead = 0;
4388 rtx prev = prev_nonnote_insn (jump);
4389 rtx dest;
4391 /* First, check if we already have an instruction that satisfies our need. */
4392 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4394 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4395 return prev;
4396 if (GET_CODE (PATTERN (prev)) == USE
4397 || GET_CODE (PATTERN (prev)) == CLOBBER
4398 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4399 prev = jump;
4400 else if ((need_block &= ~1) < 0)
4401 return prev;
4402 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4403 need_block = 0;
4405 if (GET_CODE (PATTERN (jump)) == RETURN)
4407 if (! need_block)
4408 return prev;
4409 /* Reorg even does nasty things with return insns that cause branches
4410 to go out of range - see find_end_label and callers. */
4411 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4413 /* We can't use JUMP_LABEL here because it might be undefined
4414 when not optimizing. */
4415 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4416 /* If the branch is out of range, try to find a scratch register for it. */
4417 if (optimize
4418 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4419 > 4092 + 4098))
4421 rtx scan;
4422 /* Don't look for the stack pointer as a scratch register,
4423 it would cause trouble if an interrupt occurred. */
4424 unsigned try = 0x7fff, used;
4425 int jump_left = flag_expensive_optimizations + 1;
4427 /* It is likely that the most recent eligible instruction is wanted for
4428 the delay slot. Therefore, find out which registers it uses, and
4429 try to avoid using them. */
4431 for (scan = jump; (scan = PREV_INSN (scan)); )
4433 enum rtx_code code;
4435 if (INSN_DELETED_P (scan))
4436 continue;
4437 code = GET_CODE (scan);
4438 if (code == CODE_LABEL || code == JUMP_INSN)
4439 break;
4440 if (code == INSN
4441 && GET_CODE (PATTERN (scan)) != USE
4442 && GET_CODE (PATTERN (scan)) != CLOBBER
4443 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4445 try &= ~regs_used (PATTERN (scan), 0);
4446 break;
4449 for (used = dead = 0, scan = JUMP_LABEL (jump);
4450 (scan = NEXT_INSN (scan)); )
4452 enum rtx_code code;
4454 if (INSN_DELETED_P (scan))
4455 continue;
4456 code = GET_CODE (scan);
4457 if (INSN_P (scan))
4459 used |= regs_used (PATTERN (scan), 0);
4460 if (code == CALL_INSN)
4461 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4462 dead |= (used >> 16) & ~used;
4463 if (dead & try)
4465 dead &= try;
4466 break;
4468 if (code == JUMP_INSN)
4470 if (jump_left-- && simplejump_p (scan))
4471 scan = JUMP_LABEL (scan);
4472 else
4473 break;
4477 /* Mask out the stack pointer again, in case it was
4478 the only 'free' register we have found. */
4479 dead &= 0x7fff;
4481 /* If the immediate destination is still in range, check for possible
4482 threading with a jump beyond the delay slot insn.
4483 Don't check if we are called recursively; the jump has been or will be
4484 checked in a different invocation then. */
4486 else if (optimize && need_block >= 0)
4488 rtx next = next_active_insn (next_active_insn (dest));
4489 if (next && GET_CODE (next) == JUMP_INSN
4490 && GET_CODE (PATTERN (next)) == SET
4491 && recog_memoized (next) == CODE_FOR_jump_compact)
4493 dest = JUMP_LABEL (next);
4494 if (dest
4495 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4496 > 4092 + 4098))
4497 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4501 if (dead)
4503 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4505 /* It would be nice if we could convert the jump into an indirect
4506 jump / far branch right now, and thus exposing all constituent
4507 instructions to further optimization. However, reorg uses
4508 simplejump_p to determine if there is an unconditional jump where
4509 it should try to schedule instructions from the target of the
4510 branch; simplejump_p fails for indirect jumps even if they have
4511 a JUMP_LABEL. */
4512 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4513 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4514 , jump);
4515 /* ??? We would like this to have the scope of the jump, but that
4516 scope will change when a delay slot insn of an inner scope is added.
4517 Hence, after delay slot scheduling, we'll have to expect
4518 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4519 the jump. */
4521 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4522 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4523 return insn;
4525 else if (need_block)
4526 /* We can't use JUMP_LABEL here because it might be undefined
4527 when not optimizing. */
4528 return emit_insn_before (gen_block_branch_redirect
4529 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4530 , jump);
4531 return prev;
4534 #define CONDJUMP_MIN -252
4535 #define CONDJUMP_MAX 262
4536 struct far_branch
4538 /* A label (to be placed) in front of the jump
4539 that jumps to our ultimate destination. */
4540 rtx near_label;
4541 /* Where we are going to insert it if we cannot move the jump any farther,
4542 or the jump itself if we have picked up an existing jump. */
4543 rtx insert_place;
4544 /* The ultimate destination. */
4545 rtx far_label;
4546 struct far_branch *prev;
4547 /* If the branch has already been created, its address;
4548 else the address of its first prospective user. */
4549 int address;
4552 static void gen_far_branch (struct far_branch *);
4553 enum mdep_reorg_phase_e mdep_reorg_phase;
4554 static void
4555 gen_far_branch (struct far_branch *bp)
4557 rtx insn = bp->insert_place;
4558 rtx jump;
4559 rtx label = gen_label_rtx ();
4560 int ok;
4562 emit_label_after (label, insn);
4563 if (bp->far_label)
4565 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4566 LABEL_NUSES (bp->far_label)++;
4568 else
4569 jump = emit_jump_insn_after (gen_return (), insn);
4570 /* Emit a barrier so that reorg knows that any following instructions
4571 are not reachable via a fall-through path.
4572 But don't do this when not optimizing, since we wouldn't suppress the
4573 alignment for the barrier then, and could end up with out-of-range
4574 pc-relative loads. */
4575 if (optimize)
4576 emit_barrier_after (jump);
4577 emit_label_after (bp->near_label, insn);
4578 JUMP_LABEL (jump) = bp->far_label;
4579 ok = invert_jump (insn, label, 1);
4580 gcc_assert (ok);
4582 /* If we are branching around a jump (rather than a return), prevent
4583 reorg from using an insn from the jump target as the delay slot insn -
4584 when reorg did this, it pessimized code (we rather hide the delay slot)
4585 and it could cause branches to go out of range. */
4586 if (bp->far_label)
4587 (emit_insn_after
4588 (gen_stuff_delay_slot
4589 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4590 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4591 insn));
4592 /* Prevent reorg from undoing our splits. */
4593 gen_block_redirect (jump, bp->address += 2, 2);
4596 /* Fix up ADDR_DIFF_VECs. */
4597 void
4598 fixup_addr_diff_vecs (rtx first)
4600 rtx insn;
4602 for (insn = first; insn; insn = NEXT_INSN (insn))
4604 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4606 if (GET_CODE (insn) != JUMP_INSN
4607 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4608 continue;
4609 pat = PATTERN (insn);
4610 vec_lab = XEXP (XEXP (pat, 0), 0);
4612 /* Search the matching casesi_jump_2. */
4613 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4615 if (GET_CODE (prev) != JUMP_INSN)
4616 continue;
4617 prevpat = PATTERN (prev);
4618 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4619 continue;
4620 x = XVECEXP (prevpat, 0, 1);
4621 if (GET_CODE (x) != USE)
4622 continue;
4623 x = XEXP (x, 0);
4624 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4625 break;
4627 /* FIXME: This is a bug in the optimizer, but it seems harmless
4628 to just avoid panicing. */
4629 if (!prev)
4630 continue;
4632 /* Emit the reference label of the braf where it belongs, right after
4633 the casesi_jump_2 (i.e. braf). */
4634 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4635 emit_label_after (braf_label, prev);
4637 /* Fix up the ADDR_DIF_VEC to be relative
4638 to the reference address of the braf. */
4639 XEXP (XEXP (pat, 0), 0) = braf_label;
4643 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4644 a barrier. Return the base 2 logarithm of the desired alignment. */
4646 barrier_align (rtx barrier_or_label)
4648 rtx next = next_real_insn (barrier_or_label), pat, prev;
4649 int slot, credit, jump_to_next = 0;
4651 if (! next)
4652 return 0;
4654 pat = PATTERN (next);
4656 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4657 return 2;
4659 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4660 /* This is a barrier in front of a constant table. */
4661 return 0;
4663 prev = prev_real_insn (barrier_or_label);
4664 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4666 pat = PATTERN (prev);
4667 /* If this is a very small table, we want to keep the alignment after
4668 the table to the minimum for proper code alignment. */
4669 return ((TARGET_SMALLCODE
4670 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4671 <= (unsigned) 1 << (CACHE_LOG - 2)))
4672 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4675 if (TARGET_SMALLCODE)
4676 return 0;
4678 if (! TARGET_SH2 || ! optimize)
4679 return align_jumps_log;
4681 /* When fixing up pcloads, a constant table might be inserted just before
4682 the basic block that ends with the barrier. Thus, we can't trust the
4683 instruction lengths before that. */
4684 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4686 /* Check if there is an immediately preceding branch to the insn beyond
4687 the barrier. We must weight the cost of discarding useful information
4688 from the current cache line when executing this branch and there is
4689 an alignment, against that of fetching unneeded insn in front of the
4690 branch target when there is no alignment. */
4692 /* There are two delay_slot cases to consider. One is the simple case
4693 where the preceding branch is to the insn beyond the barrier (simple
4694 delay slot filling), and the other is where the preceding branch has
4695 a delay slot that is a duplicate of the insn after the barrier
4696 (fill_eager_delay_slots) and the branch is to the insn after the insn
4697 after the barrier. */
4699 /* PREV is presumed to be the JUMP_INSN for the barrier under
4700 investigation. Skip to the insn before it. */
4701 prev = prev_real_insn (prev);
4703 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4704 credit >= 0 && prev && GET_CODE (prev) == INSN;
4705 prev = prev_real_insn (prev))
4707 jump_to_next = 0;
4708 if (GET_CODE (PATTERN (prev)) == USE
4709 || GET_CODE (PATTERN (prev)) == CLOBBER)
4710 continue;
4711 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4713 prev = XVECEXP (PATTERN (prev), 0, 1);
4714 if (INSN_UID (prev) == INSN_UID (next))
4716 /* Delay slot was filled with insn at jump target. */
4717 jump_to_next = 1;
4718 continue;
4722 if (slot &&
4723 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4724 slot = 0;
4725 credit -= get_attr_length (prev);
4727 if (prev
4728 && GET_CODE (prev) == JUMP_INSN
4729 && JUMP_LABEL (prev))
4731 rtx x;
4732 if (jump_to_next
4733 || next_real_insn (JUMP_LABEL (prev)) == next
4734 /* If relax_delay_slots() decides NEXT was redundant
4735 with some previous instruction, it will have
4736 redirected PREV's jump to the following insn. */
4737 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4738 /* There is no upper bound on redundant instructions
4739 that might have been skipped, but we must not put an
4740 alignment where none had been before. */
4741 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4742 (INSN_P (x)
4743 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4744 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4745 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4747 rtx pat = PATTERN (prev);
4748 if (GET_CODE (pat) == PARALLEL)
4749 pat = XVECEXP (pat, 0, 0);
4750 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4751 return 0;
4756 return align_jumps_log;
4759 /* If we are inside a phony loop, almost any kind of label can turn up as the
4760 first one in the loop. Aligning a braf label causes incorrect switch
4761 destination addresses; we can detect braf labels because they are
4762 followed by a BARRIER.
4763 Applying loop alignment to small constant or switch tables is a waste
4764 of space, so we suppress this too. */
4766 sh_loop_align (rtx label)
4768 rtx next = label;
4771 next = next_nonnote_insn (next);
4772 while (next && GET_CODE (next) == CODE_LABEL);
4774 if (! next
4775 || ! INSN_P (next)
4776 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4777 || recog_memoized (next) == CODE_FOR_consttable_2)
4778 return 0;
4780 return align_loops_log;
4783 /* Do a final pass over the function, just before delayed branch
4784 scheduling. */
4786 static void
4787 sh_reorg (void)
4789 rtx first, insn, mova = NULL_RTX;
4790 int num_mova;
4791 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4792 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4794 first = get_insns ();
4795 max_labelno_before_reorg = max_label_num ();
4797 /* We must split call insns before introducing `mova's. If we're
4798 optimizing, they'll have already been split. Otherwise, make
4799 sure we don't split them too late. */
4800 if (! optimize)
4801 split_all_insns_noflow ();
4803 if (TARGET_SHMEDIA)
4804 return;
4806 /* If relaxing, generate pseudo-ops to associate function calls with
4807 the symbols they call. It does no harm to not generate these
4808 pseudo-ops. However, when we can generate them, it enables to
4809 linker to potentially relax the jsr to a bsr, and eliminate the
4810 register load and, possibly, the constant pool entry. */
4812 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4813 if (TARGET_RELAX)
4815 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
4816 own purposes. This works because none of the remaining passes
4817 need to look at them.
4819 ??? But it may break in the future. We should use a machine
4820 dependent REG_NOTE, or some other approach entirely. */
4821 for (insn = first; insn; insn = NEXT_INSN (insn))
4823 if (INSN_P (insn))
4825 rtx note;
4827 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
4828 NULL_RTX)) != 0)
4829 remove_note (insn, note);
4833 for (insn = first; insn; insn = NEXT_INSN (insn))
4835 rtx pattern, reg, link, set, scan, dies, label;
4836 int rescan = 0, foundinsn = 0;
4838 if (GET_CODE (insn) == CALL_INSN)
4840 pattern = PATTERN (insn);
4842 if (GET_CODE (pattern) == PARALLEL)
4843 pattern = XVECEXP (pattern, 0, 0);
4844 if (GET_CODE (pattern) == SET)
4845 pattern = SET_SRC (pattern);
4847 if (GET_CODE (pattern) != CALL
4848 || GET_CODE (XEXP (pattern, 0)) != MEM)
4849 continue;
4851 reg = XEXP (XEXP (pattern, 0), 0);
4853 else
4855 reg = sfunc_uses_reg (insn);
4856 if (! reg)
4857 continue;
4860 if (GET_CODE (reg) != REG)
4861 continue;
4863 /* Try scanning backward to find where the register is set. */
4864 link = NULL;
4865 for (scan = PREV_INSN (insn);
4866 scan && GET_CODE (scan) != CODE_LABEL;
4867 scan = PREV_INSN (scan))
4869 if (! INSN_P (scan))
4870 continue;
4872 if (! reg_mentioned_p (reg, scan))
4873 continue;
4875 if (noncall_uses_reg (reg, scan, &set))
4876 break;
4878 if (set)
4880 link = scan;
4881 break;
4885 if (! link)
4886 continue;
4888 /* The register is set at LINK. */
4890 /* We can only optimize the function call if the register is
4891 being set to a symbol. In theory, we could sometimes
4892 optimize calls to a constant location, but the assembler
4893 and linker do not support that at present. */
4894 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4895 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4896 continue;
4898 /* Scan forward from LINK to the place where REG dies, and
4899 make sure that the only insns which use REG are
4900 themselves function calls. */
4902 /* ??? This doesn't work for call targets that were allocated
4903 by reload, since there may not be a REG_DEAD note for the
4904 register. */
4906 dies = NULL_RTX;
4907 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4909 rtx scanset;
4911 /* Don't try to trace forward past a CODE_LABEL if we haven't
4912 seen INSN yet. Ordinarily, we will only find the setting insn
4913 if it is in the same basic block. However,
4914 cross-jumping can insert code labels in between the load and
4915 the call, and can result in situations where a single call
4916 insn may have two targets depending on where we came from. */
4918 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4919 break;
4921 if (! INSN_P (scan))
4922 continue;
4924 /* Don't try to trace forward past a JUMP. To optimize
4925 safely, we would have to check that all the
4926 instructions at the jump destination did not use REG. */
4928 if (GET_CODE (scan) == JUMP_INSN)
4929 break;
4931 if (! reg_mentioned_p (reg, scan))
4932 continue;
4934 if (noncall_uses_reg (reg, scan, &scanset))
4935 break;
4937 if (scan == insn)
4938 foundinsn = 1;
4940 if (scan != insn
4941 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4943 /* There is a function call to this register other
4944 than the one we are checking. If we optimize
4945 this call, we need to rescan again below. */
4946 rescan = 1;
4949 /* ??? We shouldn't have to worry about SCANSET here.
4950 We should just be able to check for a REG_DEAD note
4951 on a function call. However, the REG_DEAD notes are
4952 apparently not dependable around libcalls; c-torture
4953 execute/920501-2 is a test case. If SCANSET is set,
4954 then this insn sets the register, so it must have
4955 died earlier. Unfortunately, this will only handle
4956 the cases in which the register is, in fact, set in a
4957 later insn. */
4959 /* ??? We shouldn't have to use FOUNDINSN here.
4960 This dates back to when we used LOG_LINKS to find
4961 the most recent insn which sets the register. */
4963 if (foundinsn
4964 && (scanset
4965 || find_reg_note (scan, REG_DEAD, reg)))
4967 dies = scan;
4968 break;
4972 if (! dies)
4974 /* Either there was a branch, or some insn used REG
4975 other than as a function call address. */
4976 continue;
4979 /* Create a code label, and put it in a REG_LABEL_OPERAND note
4980 on the insn which sets the register, and on each call insn
4981 which uses the register. In final_prescan_insn we look for
4982 the REG_LABEL_OPERAND notes, and output the appropriate label
4983 or pseudo-op. */
4985 label = gen_label_rtx ();
4986 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4987 REG_NOTES (link));
4988 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4989 REG_NOTES (insn));
4990 if (rescan)
4992 scan = link;
4995 rtx reg2;
4997 scan = NEXT_INSN (scan);
4998 if (scan != insn
4999 && ((GET_CODE (scan) == CALL_INSN
5000 && reg_mentioned_p (reg, scan))
5001 || ((reg2 = sfunc_uses_reg (scan))
5002 && REGNO (reg2) == REGNO (reg))))
5003 REG_NOTES (scan)
5004 = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
5005 REG_NOTES (scan));
5007 while (scan != dies);
5012 if (TARGET_SH2)
5013 fixup_addr_diff_vecs (first);
5015 if (optimize)
5017 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5018 shorten_branches (first);
5021 /* Scan the function looking for move instructions which have to be
5022 changed to pc-relative loads and insert the literal tables. */
5023 label_ref_list_pool = create_alloc_pool ("label references list",
5024 sizeof (struct label_ref_list_d),
5025 30);
5026 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5027 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5029 if (mova_p (insn))
5031 /* ??? basic block reordering can move a switch table dispatch
5032 below the switch table. Check if that has happened.
5033 We only have the addresses available when optimizing; but then,
5034 this check shouldn't be needed when not optimizing. */
5035 if (!untangle_mova (&num_mova, &mova, insn))
5037 insn = mova;
5038 num_mova = 0;
5041 else if (GET_CODE (insn) == JUMP_INSN
5042 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5043 && num_mova
5044 /* ??? loop invariant motion can also move a mova out of a
5045 loop. Since loop does this code motion anyway, maybe we
5046 should wrap UNSPEC_MOVA into a CONST, so that reload can
5047 move it back. */
5048 && ((num_mova > 1
5049 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5050 || (prev_nonnote_insn (insn)
5051 == XEXP (MOVA_LABELREF (mova), 0))))
5053 rtx scan;
5054 int total;
5056 num_mova--;
5058 /* Some code might have been inserted between the mova and
5059 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5060 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5061 total += get_attr_length (scan);
5063 /* range of mova is 1020, add 4 because pc counts from address of
5064 second instruction after this one, subtract 2 in case pc is 2
5065 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5066 cancels out with alignment effects of the mova itself. */
5067 if (total > 1022)
5069 /* Change the mova into a load, and restart scanning
5070 there. broken_move will then return true for mova. */
5071 fixup_mova (mova);
5072 insn = mova;
5075 if (broken_move (insn)
5076 || (GET_CODE (insn) == INSN
5077 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5079 rtx scan;
5080 /* Scan ahead looking for a barrier to stick the constant table
5081 behind. */
5082 rtx barrier = find_barrier (num_mova, mova, insn);
5083 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5084 int need_aligned_label = 0;
5086 if (num_mova && ! mova_p (mova))
5088 /* find_barrier had to change the first mova into a
5089 pcload; thus, we have to start with this new pcload. */
5090 insn = mova;
5091 num_mova = 0;
5093 /* Now find all the moves between the points and modify them. */
5094 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5096 if (GET_CODE (scan) == CODE_LABEL)
5097 last_float = 0;
5098 if (GET_CODE (scan) == INSN
5099 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5100 need_aligned_label = 1;
5101 if (broken_move (scan))
5103 rtx *patp = &PATTERN (scan), pat = *patp;
5104 rtx src, dst;
5105 rtx lab;
5106 rtx newsrc;
5107 enum machine_mode mode;
5109 if (GET_CODE (pat) == PARALLEL)
5110 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5111 src = SET_SRC (pat);
5112 dst = SET_DEST (pat);
5113 mode = GET_MODE (dst);
5115 if (mode == SImode && hi_const (src)
5116 && REGNO (dst) != FPUL_REG)
5118 int offset = 0;
5120 mode = HImode;
5121 while (GET_CODE (dst) == SUBREG)
5123 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5124 GET_MODE (SUBREG_REG (dst)),
5125 SUBREG_BYTE (dst),
5126 GET_MODE (dst));
5127 dst = SUBREG_REG (dst);
5129 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5131 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5133 /* This must be an insn that clobbers r0. */
5134 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5135 XVECLEN (PATTERN (scan), 0)
5136 - 1);
5137 rtx clobber = *clobberp;
5139 gcc_assert (GET_CODE (clobber) == CLOBBER
5140 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5142 if (last_float
5143 && reg_set_between_p (r0_rtx, last_float_move, scan))
5144 last_float = 0;
5145 if (last_float
5146 && TARGET_SHCOMPACT
5147 && GET_MODE_SIZE (mode) != 4
5148 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5149 last_float = 0;
5150 lab = add_constant (src, mode, last_float);
5151 if (lab)
5152 emit_insn_before (gen_mova (lab), scan);
5153 else
5155 /* There will be a REG_UNUSED note for r0 on
5156 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5157 lest reorg:mark_target_live_regs will not
5158 consider r0 to be used, and we end up with delay
5159 slot insn in front of SCAN that clobbers r0. */
5160 rtx note
5161 = find_regno_note (last_float_move, REG_UNUSED, 0);
5163 /* If we are not optimizing, then there may not be
5164 a note. */
5165 if (note)
5166 PUT_MODE (note, REG_INC);
5168 *last_float_addr = r0_inc_rtx;
5170 last_float_move = scan;
5171 last_float = src;
5172 newsrc = gen_const_mem (mode,
5173 (((TARGET_SH4 && ! TARGET_FMOVD)
5174 || REGNO (dst) == FPUL_REG)
5175 ? r0_inc_rtx
5176 : r0_rtx));
5177 last_float_addr = &XEXP (newsrc, 0);
5179 /* Remove the clobber of r0. */
5180 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5181 gen_rtx_SCRATCH (Pmode));
5183 /* This is a mova needing a label. Create it. */
5184 else if (GET_CODE (src) == UNSPEC
5185 && XINT (src, 1) == UNSPEC_MOVA
5186 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5188 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5189 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5190 newsrc = gen_rtx_UNSPEC (SImode,
5191 gen_rtvec (1, newsrc),
5192 UNSPEC_MOVA);
5194 else
5196 lab = add_constant (src, mode, 0);
5197 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5198 newsrc = gen_const_mem (mode, newsrc);
5200 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5201 INSN_CODE (scan) = -1;
5204 dump_table (need_aligned_label ? insn : 0, barrier);
5205 insn = barrier;
5208 free_alloc_pool (label_ref_list_pool);
5209 for (insn = first; insn; insn = NEXT_INSN (insn))
5210 PUT_MODE (insn, VOIDmode);
5212 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5213 INSN_ADDRESSES_FREE ();
5214 split_branches (first);
5216 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5217 also has an effect on the register that holds the address of the sfunc.
5218 Insert an extra dummy insn in front of each sfunc that pretends to
5219 use this register. */
5220 if (flag_delayed_branch)
5222 for (insn = first; insn; insn = NEXT_INSN (insn))
5224 rtx reg = sfunc_uses_reg (insn);
5226 if (! reg)
5227 continue;
5228 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5231 #if 0
5232 /* fpscr is not actually a user variable, but we pretend it is for the
5233 sake of the previous optimization passes, since we want it handled like
5234 one. However, we don't have any debugging information for it, so turn
5235 it into a non-user variable now. */
5236 if (TARGET_SH4)
5237 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5238 #endif
5239 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5243 get_dest_uid (rtx label, int max_uid)
5245 rtx dest = next_real_insn (label);
5246 int dest_uid;
5247 if (! dest)
5248 /* This can happen for an undefined label. */
5249 return 0;
5250 dest_uid = INSN_UID (dest);
5251 /* If this is a newly created branch redirection blocking instruction,
5252 we cannot index the branch_uid or insn_addresses arrays with its
5253 uid. But then, we won't need to, because the actual destination is
5254 the following branch. */
5255 while (dest_uid >= max_uid)
5257 dest = NEXT_INSN (dest);
5258 dest_uid = INSN_UID (dest);
5260 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5261 return 0;
5262 return dest_uid;
5265 /* Split condbranches that are out of range. Also add clobbers for
5266 scratch registers that are needed in far jumps.
5267 We do this before delay slot scheduling, so that it can take our
5268 newly created instructions into account. It also allows us to
5269 find branches with common targets more easily. */
5271 static void
5272 split_branches (rtx first)
5274 rtx insn;
5275 struct far_branch **uid_branch, *far_branch_list = 0;
5276 int max_uid = get_max_uid ();
5277 int ok;
5279 /* Find out which branches are out of range. */
5280 shorten_branches (first);
5282 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5283 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5285 for (insn = first; insn; insn = NEXT_INSN (insn))
5286 if (! INSN_P (insn))
5287 continue;
5288 else if (INSN_DELETED_P (insn))
5290 /* Shorten_branches would split this instruction again,
5291 so transform it into a note. */
5292 SET_INSN_DELETED (insn);
5294 else if (GET_CODE (insn) == JUMP_INSN
5295 /* Don't mess with ADDR_DIFF_VEC */
5296 && (GET_CODE (PATTERN (insn)) == SET
5297 || GET_CODE (PATTERN (insn)) == RETURN))
5299 enum attr_type type = get_attr_type (insn);
5300 if (type == TYPE_CBRANCH)
5302 rtx next, beyond;
5304 if (get_attr_length (insn) > 4)
5306 rtx src = SET_SRC (PATTERN (insn));
5307 rtx olabel = XEXP (XEXP (src, 1), 0);
5308 int addr = INSN_ADDRESSES (INSN_UID (insn));
5309 rtx label = 0;
5310 int dest_uid = get_dest_uid (olabel, max_uid);
5311 struct far_branch *bp = uid_branch[dest_uid];
5313 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5314 the label if the LABEL_NUSES count drops to zero. There is
5315 always a jump_optimize pass that sets these values, but it
5316 proceeds to delete unreferenced code, and then if not
5317 optimizing, to un-delete the deleted instructions, thus
5318 leaving labels with too low uses counts. */
5319 if (! optimize)
5321 JUMP_LABEL (insn) = olabel;
5322 LABEL_NUSES (olabel)++;
5324 if (! bp)
5326 bp = (struct far_branch *) alloca (sizeof *bp);
5327 uid_branch[dest_uid] = bp;
5328 bp->prev = far_branch_list;
5329 far_branch_list = bp;
5330 bp->far_label
5331 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5332 LABEL_NUSES (bp->far_label)++;
5334 else
5336 label = bp->near_label;
5337 if (! label && bp->address - addr >= CONDJUMP_MIN)
5339 rtx block = bp->insert_place;
5341 if (GET_CODE (PATTERN (block)) == RETURN)
5342 block = PREV_INSN (block);
5343 else
5344 block = gen_block_redirect (block,
5345 bp->address, 2);
5346 label = emit_label_after (gen_label_rtx (),
5347 PREV_INSN (block));
5348 bp->near_label = label;
5350 else if (label && ! NEXT_INSN (label))
5352 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5353 bp->insert_place = insn;
5354 else
5355 gen_far_branch (bp);
5358 if (! label
5359 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5361 bp->near_label = label = gen_label_rtx ();
5362 bp->insert_place = insn;
5363 bp->address = addr;
5365 ok = redirect_jump (insn, label, 0);
5366 gcc_assert (ok);
5368 else
5370 /* get_attr_length (insn) == 2 */
5371 /* Check if we have a pattern where reorg wants to redirect
5372 the branch to a label from an unconditional branch that
5373 is too far away. */
5374 /* We can't use JUMP_LABEL here because it might be undefined
5375 when not optimizing. */
5376 /* A syntax error might cause beyond to be NULL_RTX. */
5377 beyond
5378 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5379 0));
5381 if (beyond
5382 && (GET_CODE (beyond) == JUMP_INSN
5383 || ((beyond = next_active_insn (beyond))
5384 && GET_CODE (beyond) == JUMP_INSN))
5385 && GET_CODE (PATTERN (beyond)) == SET
5386 && recog_memoized (beyond) == CODE_FOR_jump_compact
5387 && ((INSN_ADDRESSES
5388 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5389 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5390 > 252 + 258 + 2))
5391 gen_block_redirect (beyond,
5392 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5395 next = next_active_insn (insn);
5397 if ((GET_CODE (next) == JUMP_INSN
5398 || ((next = next_active_insn (next))
5399 && GET_CODE (next) == JUMP_INSN))
5400 && GET_CODE (PATTERN (next)) == SET
5401 && recog_memoized (next) == CODE_FOR_jump_compact
5402 && ((INSN_ADDRESSES
5403 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5404 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5405 > 252 + 258 + 2))
5406 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5408 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5410 int addr = INSN_ADDRESSES (INSN_UID (insn));
5411 rtx far_label = 0;
5412 int dest_uid = 0;
5413 struct far_branch *bp;
5415 if (type == TYPE_JUMP)
5417 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5418 dest_uid = get_dest_uid (far_label, max_uid);
5419 if (! dest_uid)
5421 /* Parse errors can lead to labels outside
5422 the insn stream. */
5423 if (! NEXT_INSN (far_label))
5424 continue;
5426 if (! optimize)
5428 JUMP_LABEL (insn) = far_label;
5429 LABEL_NUSES (far_label)++;
5431 redirect_jump (insn, NULL_RTX, 1);
5432 far_label = 0;
5435 bp = uid_branch[dest_uid];
5436 if (! bp)
5438 bp = (struct far_branch *) alloca (sizeof *bp);
5439 uid_branch[dest_uid] = bp;
5440 bp->prev = far_branch_list;
5441 far_branch_list = bp;
5442 bp->near_label = 0;
5443 bp->far_label = far_label;
5444 if (far_label)
5445 LABEL_NUSES (far_label)++;
5447 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5448 if (addr - bp->address <= CONDJUMP_MAX)
5449 emit_label_after (bp->near_label, PREV_INSN (insn));
5450 else
5452 gen_far_branch (bp);
5453 bp->near_label = 0;
5455 else
5456 bp->near_label = 0;
5457 bp->address = addr;
5458 bp->insert_place = insn;
5459 if (! far_label)
5460 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5461 else
5462 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5465 /* Generate all pending far branches,
5466 and free our references to the far labels. */
5467 while (far_branch_list)
5469 if (far_branch_list->near_label
5470 && ! NEXT_INSN (far_branch_list->near_label))
5471 gen_far_branch (far_branch_list);
5472 if (optimize
5473 && far_branch_list->far_label
5474 && ! --LABEL_NUSES (far_branch_list->far_label))
5475 delete_insn (far_branch_list->far_label);
5476 far_branch_list = far_branch_list->prev;
5479 /* Instruction length information is no longer valid due to the new
5480 instructions that have been generated. */
5481 init_insn_lengths ();
5484 /* Dump out instruction addresses, which is useful for debugging the
5485 constant pool table stuff.
5487 If relaxing, output the label and pseudo-ops used to link together
5488 calls and the instruction which set the registers. */
5490 /* ??? The addresses printed by this routine for insns are nonsense for
5491 insns which are inside of a sequence where none of the inner insns have
5492 variable length. This is because the second pass of shorten_branches
5493 does not bother to update them. */
5495 void
5496 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5497 int noperands ATTRIBUTE_UNUSED)
5499 if (TARGET_DUMPISIZE)
5500 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5502 if (TARGET_RELAX)
5504 rtx note;
5506 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5507 if (note)
5509 rtx pattern;
5511 pattern = PATTERN (insn);
5512 if (GET_CODE (pattern) == PARALLEL)
5513 pattern = XVECEXP (pattern, 0, 0);
5514 switch (GET_CODE (pattern))
5516 case SET:
5517 if (GET_CODE (SET_SRC (pattern)) != CALL
5518 && get_attr_type (insn) != TYPE_SFUNC)
5520 targetm.asm_out.internal_label
5521 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5522 break;
5524 /* else FALLTHROUGH */
5525 case CALL:
5526 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5527 CODE_LABEL_NUMBER (XEXP (note, 0)));
5528 break;
5530 default:
5531 gcc_unreachable ();
5537 /* Dump out any constants accumulated in the final pass. These will
5538 only be labels. */
5540 const char *
5541 output_jump_label_table (void)
5543 int i;
5545 if (pool_size)
5547 fprintf (asm_out_file, "\t.align 2\n");
5548 for (i = 0; i < pool_size; i++)
5550 pool_node *p = &pool_vector[i];
5552 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5553 CODE_LABEL_NUMBER (p->label));
5554 output_asm_insn (".long %O0", &p->value);
5556 pool_size = 0;
5559 return "";
5562 /* A full frame looks like:
5564 arg-5
5565 arg-4
5566 [ if current_function_anonymous_args
5567 arg-3
5568 arg-2
5569 arg-1
5570 arg-0 ]
5571 saved-fp
5572 saved-r10
5573 saved-r11
5574 saved-r12
5575 saved-pr
5576 local-n
5578 local-1
5579 local-0 <- fp points here. */
5581 /* Number of bytes pushed for anonymous args, used to pass information
5582 between expand_prologue and expand_epilogue. */
5584 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5585 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5586 for an epilogue and a negative value means that it's for a sibcall
5587 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5588 all the registers that are about to be restored, and hence dead. */
5590 static void
5591 output_stack_adjust (int size, rtx reg, int epilogue_p,
5592 HARD_REG_SET *live_regs_mask)
5594 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5595 if (size)
5597 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5599 /* This test is bogus, as output_stack_adjust is used to re-align the
5600 stack. */
5601 #if 0
5602 gcc_assert (!(size % align));
5603 #endif
5605 if (CONST_OK_FOR_ADD (size))
5606 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5607 /* Try to do it with two partial adjustments; however, we must make
5608 sure that the stack is properly aligned at all times, in case
5609 an interrupt occurs between the two partial adjustments. */
5610 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5611 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5613 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5614 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5616 else
5618 rtx const_reg;
5619 rtx insn;
5620 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5621 int i;
5623 /* If TEMP is invalid, we could temporarily save a general
5624 register to MACL. However, there is currently no need
5625 to handle this case, so just die when we see it. */
5626 if (epilogue_p < 0
5627 || current_function_interrupt
5628 || ! call_really_used_regs[temp] || fixed_regs[temp])
5629 temp = -1;
5630 if (temp < 0 && ! current_function_interrupt
5631 && (TARGET_SHMEDIA || epilogue_p >= 0))
5633 HARD_REG_SET temps;
5634 COPY_HARD_REG_SET (temps, call_used_reg_set);
5635 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5636 if (epilogue_p > 0)
5638 int nreg = 0;
5639 if (crtl->return_rtx)
5641 enum machine_mode mode;
5642 mode = GET_MODE (crtl->return_rtx);
5643 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5644 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5646 for (i = 0; i < nreg; i++)
5647 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5648 if (crtl->calls_eh_return)
5650 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5651 for (i = 0; i <= 3; i++)
5652 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5655 if (TARGET_SHMEDIA && epilogue_p < 0)
5656 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5657 CLEAR_HARD_REG_BIT (temps, i);
5658 if (epilogue_p <= 0)
5660 for (i = FIRST_PARM_REG;
5661 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5662 CLEAR_HARD_REG_BIT (temps, i);
5663 if (cfun->static_chain_decl != NULL)
5664 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5666 temp = scavenge_reg (&temps);
5668 if (temp < 0 && live_regs_mask)
5670 HARD_REG_SET temps;
5672 COPY_HARD_REG_SET (temps, *live_regs_mask);
5673 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5674 temp = scavenge_reg (&temps);
5676 if (temp < 0)
5678 rtx adj_reg, tmp_reg, mem;
5680 /* If we reached here, the most likely case is the (sibcall)
5681 epilogue for non SHmedia. Put a special push/pop sequence
5682 for such case as the last resort. This looks lengthy but
5683 would not be problem because it seems to be very
5684 rare. */
5686 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5689 /* ??? There is still the slight possibility that r4 or
5690 r5 have been reserved as fixed registers or assigned
5691 as global registers, and they change during an
5692 interrupt. There are possible ways to handle this:
5694 - If we are adjusting the frame pointer (r14), we can do
5695 with a single temp register and an ordinary push / pop
5696 on the stack.
5697 - Grab any call-used or call-saved registers (i.e. not
5698 fixed or globals) for the temps we need. We might
5699 also grab r14 if we are adjusting the stack pointer.
5700 If we can't find enough available registers, issue
5701 a diagnostic and die - the user must have reserved
5702 way too many registers.
5703 But since all this is rather unlikely to happen and
5704 would require extra testing, we just die if r4 / r5
5705 are not available. */
5706 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5707 && !global_regs[4] && !global_regs[5]);
5709 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5710 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5711 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5712 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5713 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5714 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5715 emit_move_insn (mem, tmp_reg);
5716 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5717 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5718 emit_move_insn (mem, tmp_reg);
5719 emit_move_insn (reg, adj_reg);
5720 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5721 emit_move_insn (adj_reg, mem);
5722 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5723 emit_move_insn (tmp_reg, mem);
5724 /* Tell flow the insns that pop r4/r5 aren't dead. */
5725 emit_use (tmp_reg);
5726 emit_use (adj_reg);
5727 return;
5729 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5731 /* If SIZE is negative, subtract the positive value.
5732 This sometimes allows a constant pool entry to be shared
5733 between prologue and epilogue code. */
5734 if (size < 0)
5736 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5737 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5739 else
5741 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5742 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5744 if (! epilogue_p)
5745 REG_NOTES (insn)
5746 = (gen_rtx_EXPR_LIST
5747 (REG_FRAME_RELATED_EXPR,
5748 gen_rtx_SET (VOIDmode, reg,
5749 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5750 REG_NOTES (insn)));
5755 static rtx
5756 frame_insn (rtx x)
5758 x = emit_insn (x);
5759 RTX_FRAME_RELATED_P (x) = 1;
5760 return x;
5763 /* Output RTL to push register RN onto the stack. */
5765 static rtx
5766 push (int rn)
5768 rtx x;
5769 if (rn == FPUL_REG)
5770 x = gen_push_fpul ();
5771 else if (rn == FPSCR_REG)
5772 x = gen_push_fpscr ();
5773 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5774 && FP_OR_XD_REGISTER_P (rn))
5776 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5777 return NULL_RTX;
5778 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5780 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5781 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5782 else
5783 x = gen_push (gen_rtx_REG (SImode, rn));
5785 x = frame_insn (x);
5786 REG_NOTES (x)
5787 = gen_rtx_EXPR_LIST (REG_INC,
5788 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5789 return x;
5792 /* Output RTL to pop register RN from the stack. */
5794 static void
5795 pop (int rn)
5797 rtx x;
5798 if (rn == FPUL_REG)
5799 x = gen_pop_fpul ();
5800 else if (rn == FPSCR_REG)
5801 x = gen_pop_fpscr ();
5802 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5803 && FP_OR_XD_REGISTER_P (rn))
5805 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5806 return;
5807 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5809 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5810 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5811 else
5812 x = gen_pop (gen_rtx_REG (SImode, rn));
5814 x = emit_insn (x);
5815 REG_NOTES (x)
5816 = gen_rtx_EXPR_LIST (REG_INC,
5817 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5820 /* Generate code to push the regs specified in the mask. */
5822 static void
5823 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5825 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5826 int skip_fpscr = 0;
5828 /* Push PR last; this gives better latencies after the prologue, and
5829 candidates for the return delay slot when there are no general
5830 registers pushed. */
5831 for (; i < FIRST_PSEUDO_REGISTER; i++)
5833 /* If this is an interrupt handler, and the SZ bit varies,
5834 and we have to push any floating point register, we need
5835 to switch to the correct precision first. */
5836 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5837 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5839 HARD_REG_SET unsaved;
5841 push (FPSCR_REG);
5842 COMPL_HARD_REG_SET (unsaved, *mask);
5843 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5844 skip_fpscr = 1;
5846 if (i != PR_REG
5847 && (i != FPSCR_REG || ! skip_fpscr)
5848 && TEST_HARD_REG_BIT (*mask, i))
5850 /* If the ISR has RESBANK attribute assigned, don't push any of
5851 the following registers - R0-R14, MACH, MACL and GBR. */
5852 if (! (sh_cfun_resbank_handler_p ()
5853 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
5854 || i == MACH_REG
5855 || i == MACL_REG
5856 || i == GBR_REG)))
5857 push (i);
5861 /* Push banked registers last to improve delay slot opportunities. */
5862 if (interrupt_handler)
5863 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5864 if (TEST_HARD_REG_BIT (*mask, i))
5865 push (i);
5867 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
5868 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
5869 push (PR_REG);
5872 /* Calculate how much extra space is needed to save all callee-saved
5873 target registers.
5874 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5876 static int
5877 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5879 int reg;
5880 int stack_space = 0;
5881 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5883 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5884 if ((! call_really_used_regs[reg] || interrupt_handler)
5885 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5886 /* Leave space to save this target register on the stack,
5887 in case target register allocation wants to use it. */
5888 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5889 return stack_space;
5892 /* Decide whether we should reserve space for callee-save target registers,
5893 in case target register allocation wants to use them. REGS_SAVED is
5894 the space, in bytes, that is already required for register saves.
5895 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5897 static int
5898 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5899 HARD_REG_SET *live_regs_mask)
5901 if (optimize_size)
5902 return 0;
5903 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5906 /* Decide how much space to reserve for callee-save target registers
5907 in case target register allocation wants to use them.
5908 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5910 static int
5911 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5913 if (shmedia_space_reserved_for_target_registers)
5914 return shmedia_target_regs_stack_space (live_regs_mask);
5915 else
5916 return 0;
5919 /* Work out the registers which need to be saved, both as a mask and a
5920 count of saved words. Return the count.
5922 If doing a pragma interrupt function, then push all regs used by the
5923 function, and if we call another function (we can tell by looking at PR),
5924 make sure that all the regs it clobbers are safe too. */
5926 static int
5927 calc_live_regs (HARD_REG_SET *live_regs_mask)
5929 unsigned int reg;
5930 int count;
5931 tree attrs;
5932 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5933 bool nosave_low_regs;
5934 int pr_live, has_call;
5936 attrs = DECL_ATTRIBUTES (current_function_decl);
5937 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5938 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5939 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5940 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5942 CLEAR_HARD_REG_SET (*live_regs_mask);
5943 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5944 && df_regs_ever_live_p (FPSCR_REG))
5945 target_flags &= ~MASK_FPU_SINGLE;
5946 /* If we can save a lot of saves by switching to double mode, do that. */
5947 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5948 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5949 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
5950 && (! call_really_used_regs[reg]
5951 || interrupt_handler)
5952 && ++count > 2)
5954 target_flags &= ~MASK_FPU_SINGLE;
5955 break;
5957 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5958 knows how to use it. That means the pseudo originally allocated for
5959 the initial value can become the PR_MEDIA_REG hard register, as seen for
5960 execute/20010122-1.c:test9. */
5961 if (TARGET_SHMEDIA)
5962 /* ??? this function is called from initial_elimination_offset, hence we
5963 can't use the result of sh_media_register_for_return here. */
5964 pr_live = sh_pr_n_sets ();
5965 else
5967 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5968 pr_live = (pr_initial
5969 ? (GET_CODE (pr_initial) != REG
5970 || REGNO (pr_initial) != (PR_REG))
5971 : df_regs_ever_live_p (PR_REG));
5972 /* For Shcompact, if not optimizing, we end up with a memory reference
5973 using the return address pointer for __builtin_return_address even
5974 though there is no actual need to put the PR register on the stack. */
5975 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
5977 /* Force PR to be live if the prologue has to call the SHmedia
5978 argument decoder or register saver. */
5979 if (TARGET_SHCOMPACT
5980 && ((crtl->args.info.call_cookie
5981 & ~ CALL_COOKIE_RET_TRAMP (1))
5982 || crtl->saves_all_registers))
5983 pr_live = 1;
5984 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5985 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5987 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5988 ? pr_live
5989 : interrupt_handler
5990 ? (/* Need to save all the regs ever live. */
5991 (df_regs_ever_live_p (reg)
5992 || (call_really_used_regs[reg]
5993 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5994 || reg == PIC_OFFSET_TABLE_REGNUM)
5995 && has_call)
5996 || (TARGET_SHMEDIA && has_call
5997 && REGISTER_NATURAL_MODE (reg) == SImode
5998 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5999 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6000 && reg != RETURN_ADDRESS_POINTER_REGNUM
6001 && reg != T_REG && reg != GBR_REG
6002 /* Push fpscr only on targets which have FPU */
6003 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6004 : (/* Only push those regs which are used and need to be saved. */
6005 (TARGET_SHCOMPACT
6006 && flag_pic
6007 && crtl->args.info.call_cookie
6008 && reg == PIC_OFFSET_TABLE_REGNUM)
6009 || (df_regs_ever_live_p (reg)
6010 && (!call_really_used_regs[reg]
6011 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6012 || (crtl->calls_eh_return
6013 && (reg == EH_RETURN_DATA_REGNO (0)
6014 || reg == EH_RETURN_DATA_REGNO (1)
6015 || reg == EH_RETURN_DATA_REGNO (2)
6016 || reg == EH_RETURN_DATA_REGNO (3)))
6017 || ((reg == MACL_REG || reg == MACH_REG)
6018 && df_regs_ever_live_p (reg)
6019 && sh_cfun_attr_renesas_p ())
6022 SET_HARD_REG_BIT (*live_regs_mask, reg);
6023 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6025 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6026 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6028 if (FP_REGISTER_P (reg))
6030 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6032 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6033 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6036 else if (XD_REGISTER_P (reg))
6038 /* Must switch to double mode to access these registers. */
6039 target_flags &= ~MASK_FPU_SINGLE;
6043 if (nosave_low_regs && reg == R8_REG)
6044 break;
6046 /* If we have a target register optimization pass after prologue / epilogue
6047 threading, we need to assume all target registers will be live even if
6048 they aren't now. */
6049 if (flag_branch_target_load_optimize2
6050 && TARGET_SAVE_ALL_TARGET_REGS
6051 && shmedia_space_reserved_for_target_registers)
6052 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6053 if ((! call_really_used_regs[reg] || interrupt_handler)
6054 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6056 SET_HARD_REG_BIT (*live_regs_mask, reg);
6057 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6059 /* If this is an interrupt handler, we don't have any call-clobbered
6060 registers we can conveniently use for target register save/restore.
6061 Make sure we save at least one general purpose register when we need
6062 to save target registers. */
6063 if (interrupt_handler
6064 && hard_reg_set_intersect_p (*live_regs_mask,
6065 reg_class_contents[TARGET_REGS])
6066 && ! hard_reg_set_intersect_p (*live_regs_mask,
6067 reg_class_contents[GENERAL_REGS]))
6069 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6070 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6073 return count;
6076 /* Code to generate prologue and epilogue sequences */
6078 /* PUSHED is the number of bytes that are being pushed on the
6079 stack for register saves. Return the frame size, padded
6080 appropriately so that the stack stays properly aligned. */
6081 static HOST_WIDE_INT
6082 rounded_frame_size (int pushed)
6084 HOST_WIDE_INT size = get_frame_size ();
6085 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6087 return ((size + pushed + align - 1) & -align) - pushed;
6090 /* Choose a call-clobbered target-branch register that remains
6091 unchanged along the whole function. We set it up as the return
6092 value in the prologue. */
6094 sh_media_register_for_return (void)
6096 int regno;
6097 int tr0_used;
6099 if (! current_function_is_leaf)
6100 return -1;
6101 if (lookup_attribute ("interrupt_handler",
6102 DECL_ATTRIBUTES (current_function_decl)))
6103 return -1;
6104 if (sh_cfun_interrupt_handler_p ())
6105 return -1;
6107 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6109 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6110 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6111 return regno;
6113 return -1;
6116 /* The maximum registers we need to save are:
6117 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6118 - 32 floating point registers (for each pair, we save none,
6119 one single precision value, or a double precision value).
6120 - 8 target registers
6121 - add 1 entry for a delimiter. */
6122 #define MAX_SAVED_REGS (62+32+8)
6124 typedef struct save_entry_s
6126 unsigned char reg;
6127 unsigned char mode;
6128 short offset;
6129 } save_entry;
6131 #define MAX_TEMPS 4
6133 /* There will be a delimiter entry with VOIDmode both at the start and the
6134 end of a filled in schedule. The end delimiter has the offset of the
6135 save with the smallest (i.e. most negative) offset. */
6136 typedef struct save_schedule_s
6138 save_entry entries[MAX_SAVED_REGS + 2];
6139 int temps[MAX_TEMPS+1];
6140 } save_schedule;
6142 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6143 use reverse order. Returns the last entry written to (not counting
6144 the delimiter). OFFSET_BASE is a number to be added to all offset
6145 entries. */
6147 static save_entry *
6148 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6149 int offset_base)
6151 int align, i;
6152 save_entry *entry = schedule->entries;
6153 int tmpx = 0;
6154 int offset;
6156 if (! current_function_interrupt)
6157 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6158 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6159 && ! FUNCTION_ARG_REGNO_P (i)
6160 && i != FIRST_RET_REG
6161 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6162 && ! (crtl->calls_eh_return
6163 && (i == EH_RETURN_STACKADJ_REGNO
6164 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6165 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6166 schedule->temps[tmpx++] = i;
6167 entry->reg = -1;
6168 entry->mode = VOIDmode;
6169 entry->offset = offset_base;
6170 entry++;
6171 /* We loop twice: first, we save 8-byte aligned registers in the
6172 higher addresses, that are known to be aligned. Then, we
6173 proceed to saving 32-bit registers that don't need 8-byte
6174 alignment.
6175 If this is an interrupt function, all registers that need saving
6176 need to be saved in full. moreover, we need to postpone saving
6177 target registers till we have saved some general purpose registers
6178 we can then use as scratch registers. */
6179 offset = offset_base;
6180 for (align = 1; align >= 0; align--)
6182 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6183 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6185 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6186 int reg = i;
6188 if (current_function_interrupt)
6190 if (TARGET_REGISTER_P (i))
6191 continue;
6192 if (GENERAL_REGISTER_P (i))
6193 mode = DImode;
6195 if (mode == SFmode && (i % 2) == 1
6196 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6197 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6199 mode = DFmode;
6200 i--;
6201 reg--;
6204 /* If we're doing the aligned pass and this is not aligned,
6205 or we're doing the unaligned pass and this is aligned,
6206 skip it. */
6207 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6208 != align)
6209 continue;
6211 if (current_function_interrupt
6212 && GENERAL_REGISTER_P (i)
6213 && tmpx < MAX_TEMPS)
6214 schedule->temps[tmpx++] = i;
6216 offset -= GET_MODE_SIZE (mode);
6217 entry->reg = i;
6218 entry->mode = mode;
6219 entry->offset = offset;
6220 entry++;
6222 if (align && current_function_interrupt)
6223 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6224 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6226 offset -= GET_MODE_SIZE (DImode);
6227 entry->reg = i;
6228 entry->mode = DImode;
6229 entry->offset = offset;
6230 entry++;
6233 entry->reg = -1;
6234 entry->mode = VOIDmode;
6235 entry->offset = offset;
6236 schedule->temps[tmpx] = -1;
6237 return entry - 1;
6240 void
6241 sh_expand_prologue (void)
6243 HARD_REG_SET live_regs_mask;
6244 int d, i;
6245 int d_rounding = 0;
6246 int save_flags = target_flags;
6247 int pretend_args;
6248 tree sp_switch_attr
6249 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6251 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6253 /* We have pretend args if we had an object sent partially in registers
6254 and partially on the stack, e.g. a large structure. */
6255 pretend_args = crtl->args.pretend_args_size;
6256 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6257 && (NPARM_REGS(SImode)
6258 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6259 pretend_args = 0;
6260 output_stack_adjust (-pretend_args
6261 - crtl->args.info.stack_regs * 8,
6262 stack_pointer_rtx, 0, NULL);
6264 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6265 /* We're going to use the PIC register to load the address of the
6266 incoming-argument decoder and/or of the return trampoline from
6267 the GOT, so make sure the PIC register is preserved and
6268 initialized. */
6269 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6271 if (TARGET_SHCOMPACT
6272 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6274 int reg;
6276 /* First, make all registers with incoming arguments that will
6277 be pushed onto the stack live, so that register renaming
6278 doesn't overwrite them. */
6279 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6280 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6281 >= NPARM_REGS (SImode) - reg)
6282 for (; reg < NPARM_REGS (SImode); reg++)
6283 emit_insn (gen_shcompact_preserve_incoming_args
6284 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6285 else if (CALL_COOKIE_INT_REG_GET
6286 (crtl->args.info.call_cookie, reg) == 1)
6287 emit_insn (gen_shcompact_preserve_incoming_args
6288 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6290 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6291 stack_pointer_rtx);
6292 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6293 GEN_INT (crtl->args.info.call_cookie));
6294 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6295 gen_rtx_REG (SImode, R0_REG));
6297 else if (TARGET_SHMEDIA)
6299 int tr = sh_media_register_for_return ();
6301 if (tr >= 0)
6302 emit_move_insn (gen_rtx_REG (DImode, tr),
6303 gen_rtx_REG (DImode, PR_MEDIA_REG));
6306 /* Emit the code for SETUP_VARARGS. */
6307 if (cfun->stdarg)
6309 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6311 /* Push arg regs as if they'd been provided by caller in stack. */
6312 for (i = 0; i < NPARM_REGS(SImode); i++)
6314 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6315 rtx insn;
6317 if (i >= (NPARM_REGS(SImode)
6318 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6320 break;
6321 insn = push (rn);
6326 /* If we're supposed to switch stacks at function entry, do so now. */
6327 if (sp_switch_attr)
6329 /* The argument specifies a variable holding the address of the
6330 stack the interrupt function should switch to/from at entry/exit. */
6331 const char *s
6332 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6333 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6335 emit_insn (gen_sp_switch_1 (sp_switch));
6338 d = calc_live_regs (&live_regs_mask);
6339 /* ??? Maybe we could save some switching if we can move a mode switch
6340 that already happens to be at the function start into the prologue. */
6341 if (target_flags != save_flags && ! current_function_interrupt)
6342 emit_insn (gen_toggle_sz ());
6344 if (TARGET_SH5)
6346 int offset_base, offset;
6347 rtx r0 = NULL_RTX;
6348 int offset_in_r0 = -1;
6349 int sp_in_r0 = 0;
6350 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6351 int total_size, save_size;
6352 save_schedule schedule;
6353 save_entry *entry;
6354 int *tmp_pnt;
6356 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6357 && ! current_function_interrupt)
6358 r0 = gen_rtx_REG (Pmode, R0_REG);
6360 /* D is the actual number of bytes that we need for saving registers,
6361 however, in initial_elimination_offset we have committed to using
6362 an additional TREGS_SPACE amount of bytes - in order to keep both
6363 addresses to arguments supplied by the caller and local variables
6364 valid, we must keep this gap. Place it between the incoming
6365 arguments and the actually saved registers in a bid to optimize
6366 locality of reference. */
6367 total_size = d + tregs_space;
6368 total_size += rounded_frame_size (total_size);
6369 save_size = total_size - rounded_frame_size (d);
6370 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6371 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6372 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6374 /* If adjusting the stack in a single step costs nothing extra, do so.
6375 I.e. either if a single addi is enough, or we need a movi anyway,
6376 and we don't exceed the maximum offset range (the test for the
6377 latter is conservative for simplicity). */
6378 if (TARGET_SHMEDIA
6379 && (CONST_OK_FOR_I10 (-total_size)
6380 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6381 && total_size <= 2044)))
6382 d_rounding = total_size - save_size;
6384 offset_base = d + d_rounding;
6386 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6387 0, NULL);
6389 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6390 tmp_pnt = schedule.temps;
6391 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6393 enum machine_mode mode = entry->mode;
6394 unsigned int reg = entry->reg;
6395 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6396 rtx orig_reg_rtx;
6398 offset = entry->offset;
6400 reg_rtx = gen_rtx_REG (mode, reg);
6402 mem_rtx = gen_frame_mem (mode,
6403 gen_rtx_PLUS (Pmode,
6404 stack_pointer_rtx,
6405 GEN_INT (offset)));
6407 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6409 gcc_assert (r0);
6410 mem_rtx = NULL_RTX;
6412 try_pre_dec:
6414 if (HAVE_PRE_DECREMENT
6415 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6416 || mem_rtx == NULL_RTX
6417 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6419 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6421 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6422 pre_dec_ok);
6424 pre_dec = NULL_RTX;
6426 break;
6428 pre_dec_ok:
6429 mem_rtx = NULL_RTX;
6430 offset += GET_MODE_SIZE (mode);
6432 while (0);
6434 if (mem_rtx != NULL_RTX)
6435 goto addr_ok;
6437 if (offset_in_r0 == -1)
6439 emit_move_insn (r0, GEN_INT (offset));
6440 offset_in_r0 = offset;
6442 else if (offset != offset_in_r0)
6444 emit_move_insn (r0,
6445 gen_rtx_PLUS
6446 (Pmode, r0,
6447 GEN_INT (offset - offset_in_r0)));
6448 offset_in_r0 += offset - offset_in_r0;
6451 if (pre_dec != NULL_RTX)
6453 if (! sp_in_r0)
6455 emit_move_insn (r0,
6456 gen_rtx_PLUS
6457 (Pmode, r0, stack_pointer_rtx));
6458 sp_in_r0 = 1;
6461 offset -= GET_MODE_SIZE (mode);
6462 offset_in_r0 -= GET_MODE_SIZE (mode);
6464 mem_rtx = pre_dec;
6466 else if (sp_in_r0)
6467 mem_rtx = gen_frame_mem (mode, r0);
6468 else
6469 mem_rtx = gen_frame_mem (mode,
6470 gen_rtx_PLUS (Pmode,
6471 stack_pointer_rtx,
6472 r0));
6474 /* We must not use an r0-based address for target-branch
6475 registers or for special registers without pre-dec
6476 memory addresses, since we store their values in r0
6477 first. */
6478 gcc_assert (!TARGET_REGISTER_P (reg)
6479 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6480 || mem_rtx == pre_dec));
6482 addr_ok:
6483 orig_reg_rtx = reg_rtx;
6484 if (TARGET_REGISTER_P (reg)
6485 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6486 && mem_rtx != pre_dec))
6488 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6490 emit_move_insn (tmp_reg, reg_rtx);
6492 if (REGNO (tmp_reg) == R0_REG)
6494 offset_in_r0 = -1;
6495 sp_in_r0 = 0;
6496 gcc_assert (!refers_to_regno_p
6497 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6500 if (*++tmp_pnt <= 0)
6501 tmp_pnt = schedule.temps;
6503 reg_rtx = tmp_reg;
6506 rtx insn;
6508 /* Mark as interesting for dwarf cfi generator */
6509 insn = emit_move_insn (mem_rtx, reg_rtx);
6510 RTX_FRAME_RELATED_P (insn) = 1;
6511 /* If we use an intermediate register for the save, we can't
6512 describe this exactly in cfi as a copy of the to-be-saved
6513 register into the temporary register and then the temporary
6514 register on the stack, because the temporary register can
6515 have a different natural size than the to-be-saved register.
6516 Thus, we gloss over the intermediate copy and pretend we do
6517 a direct save from the to-be-saved register. */
6518 if (REGNO (reg_rtx) != reg)
6520 rtx set, note_rtx;
6522 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6523 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6524 REG_NOTES (insn));
6525 REG_NOTES (insn) = note_rtx;
6528 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6530 rtx reg_rtx = gen_rtx_REG (mode, reg);
6531 rtx set, note_rtx;
6532 rtx mem_rtx = gen_frame_mem (mode,
6533 gen_rtx_PLUS (Pmode,
6534 stack_pointer_rtx,
6535 GEN_INT (offset)));
6537 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6538 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6539 REG_NOTES (insn));
6540 REG_NOTES (insn) = note_rtx;
6545 gcc_assert (entry->offset == d_rounding);
6547 else
6548 push_regs (&live_regs_mask, current_function_interrupt);
6550 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6551 emit_insn (gen_GOTaddr2picreg ());
6553 if (SHMEDIA_REGS_STACK_ADJUST ())
6555 /* This must NOT go through the PLT, otherwise mach and macl
6556 may be clobbered. */
6557 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6558 (TARGET_FPU_ANY
6559 ? "__GCC_push_shmedia_regs"
6560 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6561 emit_insn (gen_shmedia_save_restore_regs_compact
6562 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6565 if (target_flags != save_flags && ! current_function_interrupt)
6566 emit_insn (gen_toggle_sz ());
6568 target_flags = save_flags;
6570 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6571 stack_pointer_rtx, 0, NULL);
6573 if (frame_pointer_needed)
6574 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6576 if (TARGET_SHCOMPACT
6577 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6579 /* This must NOT go through the PLT, otherwise mach and macl
6580 may be clobbered. */
6581 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6582 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6583 emit_insn (gen_shcompact_incoming_args ());
6587 void
6588 sh_expand_epilogue (bool sibcall_p)
6590 HARD_REG_SET live_regs_mask;
6591 int d, i;
6592 int d_rounding = 0;
6594 int save_flags = target_flags;
6595 int frame_size, save_size;
6596 int fpscr_deferred = 0;
6597 int e = sibcall_p ? -1 : 1;
6599 d = calc_live_regs (&live_regs_mask);
6601 save_size = d;
6602 frame_size = rounded_frame_size (d);
6604 if (TARGET_SH5)
6606 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6607 int total_size;
6608 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6609 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6610 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6612 total_size = d + tregs_space;
6613 total_size += rounded_frame_size (total_size);
6614 save_size = total_size - frame_size;
6616 /* If adjusting the stack in a single step costs nothing extra, do so.
6617 I.e. either if a single addi is enough, or we need a movi anyway,
6618 and we don't exceed the maximum offset range (the test for the
6619 latter is conservative for simplicity). */
6620 if (TARGET_SHMEDIA
6621 && ! frame_pointer_needed
6622 && (CONST_OK_FOR_I10 (total_size)
6623 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6624 && total_size <= 2044)))
6625 d_rounding = frame_size;
6627 frame_size -= d_rounding;
6630 if (frame_pointer_needed)
6632 /* We must avoid scheduling the epilogue with previous basic blocks
6633 when exception handling is enabled. See PR/18032. */
6634 if (flag_exceptions)
6635 emit_insn (gen_blockage ());
6636 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6637 &live_regs_mask);
6639 /* We must avoid moving the stack pointer adjustment past code
6640 which reads from the local frame, else an interrupt could
6641 occur after the SP adjustment and clobber data in the local
6642 frame. */
6643 emit_insn (gen_blockage ());
6644 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6646 else if (frame_size)
6648 /* We must avoid moving the stack pointer adjustment past code
6649 which reads from the local frame, else an interrupt could
6650 occur after the SP adjustment and clobber data in the local
6651 frame. */
6652 emit_insn (gen_blockage ());
6653 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6656 if (SHMEDIA_REGS_STACK_ADJUST ())
6658 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6659 (TARGET_FPU_ANY
6660 ? "__GCC_pop_shmedia_regs"
6661 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6662 /* This must NOT go through the PLT, otherwise mach and macl
6663 may be clobbered. */
6664 emit_insn (gen_shmedia_save_restore_regs_compact
6665 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6668 /* Pop all the registers. */
6670 if (target_flags != save_flags && ! current_function_interrupt)
6671 emit_insn (gen_toggle_sz ());
6672 if (TARGET_SH5)
6674 int offset_base, offset;
6675 int offset_in_r0 = -1;
6676 int sp_in_r0 = 0;
6677 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6678 save_schedule schedule;
6679 save_entry *entry;
6680 int *tmp_pnt;
6682 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6683 offset_base = -entry[1].offset + d_rounding;
6684 tmp_pnt = schedule.temps;
6685 for (; entry->mode != VOIDmode; entry--)
6687 enum machine_mode mode = entry->mode;
6688 int reg = entry->reg;
6689 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6691 offset = offset_base + entry->offset;
6692 reg_rtx = gen_rtx_REG (mode, reg);
6694 mem_rtx = gen_frame_mem (mode,
6695 gen_rtx_PLUS (Pmode,
6696 stack_pointer_rtx,
6697 GEN_INT (offset)));
6699 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6701 mem_rtx = NULL_RTX;
6703 try_post_inc:
6705 if (HAVE_POST_INCREMENT
6706 && (offset == offset_in_r0
6707 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6708 && mem_rtx == NULL_RTX)
6709 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6711 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6713 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6714 post_inc_ok);
6716 post_inc = NULL_RTX;
6718 break;
6720 post_inc_ok:
6721 mem_rtx = NULL_RTX;
6723 while (0);
6725 if (mem_rtx != NULL_RTX)
6726 goto addr_ok;
6728 if (offset_in_r0 == -1)
6730 emit_move_insn (r0, GEN_INT (offset));
6731 offset_in_r0 = offset;
6733 else if (offset != offset_in_r0)
6735 emit_move_insn (r0,
6736 gen_rtx_PLUS
6737 (Pmode, r0,
6738 GEN_INT (offset - offset_in_r0)));
6739 offset_in_r0 += offset - offset_in_r0;
6742 if (post_inc != NULL_RTX)
6744 if (! sp_in_r0)
6746 emit_move_insn (r0,
6747 gen_rtx_PLUS
6748 (Pmode, r0, stack_pointer_rtx));
6749 sp_in_r0 = 1;
6752 mem_rtx = post_inc;
6754 offset_in_r0 += GET_MODE_SIZE (mode);
6756 else if (sp_in_r0)
6757 mem_rtx = gen_frame_mem (mode, r0);
6758 else
6759 mem_rtx = gen_frame_mem (mode,
6760 gen_rtx_PLUS (Pmode,
6761 stack_pointer_rtx,
6762 r0));
6764 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6765 || mem_rtx == post_inc);
6767 addr_ok:
6768 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6769 && mem_rtx != post_inc)
6771 insn = emit_move_insn (r0, mem_rtx);
6772 mem_rtx = r0;
6774 else if (TARGET_REGISTER_P (reg))
6776 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6778 /* Give the scheduler a bit of freedom by using up to
6779 MAX_TEMPS registers in a round-robin fashion. */
6780 insn = emit_move_insn (tmp_reg, mem_rtx);
6781 mem_rtx = tmp_reg;
6782 if (*++tmp_pnt < 0)
6783 tmp_pnt = schedule.temps;
6786 insn = emit_move_insn (reg_rtx, mem_rtx);
6789 gcc_assert (entry->offset + offset_base == d + d_rounding);
6791 else /* ! TARGET_SH5 */
6793 int last_reg;
6795 save_size = 0;
6796 /* For an ISR with RESBANK attribute assigned, don't pop PR
6797 register. */
6798 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
6799 && !sh_cfun_resbank_handler_p ())
6801 if (!frame_pointer_needed)
6802 emit_insn (gen_blockage ());
6803 pop (PR_REG);
6806 /* Banked registers are poped first to avoid being scheduled in the
6807 delay slot. RTE switches banks before the ds instruction. */
6808 if (current_function_interrupt)
6810 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6811 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6812 pop (LAST_BANKED_REG - i);
6814 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6816 else
6817 last_reg = FIRST_PSEUDO_REGISTER;
6819 for (i = 0; i < last_reg; i++)
6821 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6823 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6824 && hard_reg_set_intersect_p (live_regs_mask,
6825 reg_class_contents[DF_REGS]))
6826 fpscr_deferred = 1;
6827 /* For an ISR with RESBANK attribute assigned, don't pop
6828 following registers, R0-R14, MACH, MACL and GBR. */
6829 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
6830 && ! (sh_cfun_resbank_handler_p ()
6831 && ((j >= FIRST_GENERAL_REG
6832 && j < LAST_GENERAL_REG)
6833 || j == MACH_REG
6834 || j == MACL_REG
6835 || j == GBR_REG)))
6836 pop (j);
6838 if (j == FIRST_FP_REG && fpscr_deferred)
6839 pop (FPSCR_REG);
6842 if (target_flags != save_flags && ! current_function_interrupt)
6843 emit_insn (gen_toggle_sz ());
6844 target_flags = save_flags;
6846 output_stack_adjust (crtl->args.pretend_args_size
6847 + save_size + d_rounding
6848 + crtl->args.info.stack_regs * 8,
6849 stack_pointer_rtx, e, NULL);
6851 if (crtl->calls_eh_return)
6852 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6853 EH_RETURN_STACKADJ_RTX));
6855 /* Switch back to the normal stack if necessary. */
6856 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6857 emit_insn (gen_sp_switch_2 ());
6859 /* Tell flow the insn that pops PR isn't dead. */
6860 /* PR_REG will never be live in SHmedia mode, and we don't need to
6861 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6862 by the return pattern. */
6863 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6864 emit_use (gen_rtx_REG (SImode, PR_REG));
6867 static int sh_need_epilogue_known = 0;
6870 sh_need_epilogue (void)
6872 if (! sh_need_epilogue_known)
6874 rtx epilogue;
6876 start_sequence ();
6877 sh_expand_epilogue (0);
6878 epilogue = get_insns ();
6879 end_sequence ();
6880 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6882 return sh_need_epilogue_known > 0;
6885 /* Emit code to change the current function's return address to RA.
6886 TEMP is available as a scratch register, if needed. */
6888 void
6889 sh_set_return_address (rtx ra, rtx tmp)
6891 HARD_REG_SET live_regs_mask;
6892 int d;
6893 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6894 int pr_offset;
6896 d = calc_live_regs (&live_regs_mask);
6898 /* If pr_reg isn't life, we can set it (or the register given in
6899 sh_media_register_for_return) directly. */
6900 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6902 rtx rr;
6904 if (TARGET_SHMEDIA)
6906 int rr_regno = sh_media_register_for_return ();
6908 if (rr_regno < 0)
6909 rr_regno = pr_reg;
6911 rr = gen_rtx_REG (DImode, rr_regno);
6913 else
6914 rr = gen_rtx_REG (SImode, pr_reg);
6916 emit_insn (GEN_MOV (rr, ra));
6917 /* Tell flow the register for return isn't dead. */
6918 emit_use (rr);
6919 return;
6922 if (TARGET_SH5)
6924 int offset;
6925 save_schedule schedule;
6926 save_entry *entry;
6928 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6929 offset = entry[1].offset;
6930 for (; entry->mode != VOIDmode; entry--)
6931 if (entry->reg == pr_reg)
6932 goto found;
6934 /* We can't find pr register. */
6935 gcc_unreachable ();
6937 found:
6938 offset = entry->offset - offset;
6939 pr_offset = (rounded_frame_size (d) + offset
6940 + SHMEDIA_REGS_STACK_ADJUST ());
6942 else
6943 pr_offset = rounded_frame_size (d);
6945 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6946 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6948 tmp = gen_frame_mem (Pmode, tmp);
6949 emit_insn (GEN_MOV (tmp, ra));
6952 /* Clear variables at function end. */
6954 static void
6955 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6956 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6958 sh_need_epilogue_known = 0;
6961 static rtx
6962 sh_builtin_saveregs (void)
6964 /* First unnamed integer register. */
6965 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
6966 /* Number of integer registers we need to save. */
6967 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6968 /* First unnamed SFmode float reg */
6969 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
6970 /* Number of SFmode float regs to save. */
6971 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6972 rtx regbuf, fpregs;
6973 int bufsize, regno;
6974 alias_set_type alias_set;
6976 if (TARGET_SH5)
6978 if (n_intregs)
6980 int pushregs = n_intregs;
6982 while (pushregs < NPARM_REGS (SImode) - 1
6983 && (CALL_COOKIE_INT_REG_GET
6984 (crtl->args.info.call_cookie,
6985 NPARM_REGS (SImode) - pushregs)
6986 == 1))
6988 crtl->args.info.call_cookie
6989 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6990 - pushregs, 1);
6991 pushregs++;
6994 if (pushregs == NPARM_REGS (SImode))
6995 crtl->args.info.call_cookie
6996 |= (CALL_COOKIE_INT_REG (0, 1)
6997 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6998 else
6999 crtl->args.info.call_cookie
7000 |= CALL_COOKIE_STACKSEQ (pushregs);
7002 crtl->args.pretend_args_size += 8 * n_intregs;
7004 if (TARGET_SHCOMPACT)
7005 return const0_rtx;
7008 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7010 error ("__builtin_saveregs not supported by this subtarget");
7011 return const0_rtx;
7014 if (TARGET_SHMEDIA)
7015 n_floatregs = 0;
7017 /* Allocate block of memory for the regs. */
7018 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7019 Or can assign_stack_local accept a 0 SIZE argument? */
7020 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7022 if (TARGET_SHMEDIA)
7023 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7024 else if (n_floatregs & 1)
7026 rtx addr;
7028 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7029 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7030 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7031 regbuf = change_address (regbuf, BLKmode, addr);
7033 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7035 rtx addr, mask;
7037 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7038 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7039 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7040 emit_insn (gen_andsi3 (addr, addr, mask));
7041 regbuf = change_address (regbuf, BLKmode, addr);
7043 else
7044 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7045 alias_set = get_varargs_alias_set ();
7046 set_mem_alias_set (regbuf, alias_set);
7048 /* Save int args.
7049 This is optimized to only save the regs that are necessary. Explicitly
7050 named args need not be saved. */
7051 if (n_intregs > 0)
7052 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7053 adjust_address (regbuf, BLKmode,
7054 n_floatregs * UNITS_PER_WORD),
7055 n_intregs);
7057 if (TARGET_SHMEDIA)
7058 /* Return the address of the regbuf. */
7059 return XEXP (regbuf, 0);
7061 /* Save float args.
7062 This is optimized to only save the regs that are necessary. Explicitly
7063 named args need not be saved.
7064 We explicitly build a pointer to the buffer because it halves the insn
7065 count when not optimizing (otherwise the pointer is built for each reg
7066 saved).
7067 We emit the moves in reverse order so that we can use predecrement. */
7069 fpregs = copy_to_mode_reg (Pmode,
7070 plus_constant (XEXP (regbuf, 0),
7071 n_floatregs * UNITS_PER_WORD));
7072 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7074 rtx mem;
7075 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7077 emit_insn (gen_addsi3 (fpregs, fpregs,
7078 GEN_INT (-2 * UNITS_PER_WORD)));
7079 mem = change_address (regbuf, DFmode, fpregs);
7080 emit_move_insn (mem,
7081 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7083 regno = first_floatreg;
7084 if (regno & 1)
7086 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7087 mem = change_address (regbuf, SFmode, fpregs);
7088 emit_move_insn (mem,
7089 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7090 - (TARGET_LITTLE_ENDIAN != 0)));
7093 else
7094 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7096 rtx mem;
7098 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7099 mem = change_address (regbuf, SFmode, fpregs);
7100 emit_move_insn (mem,
7101 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7104 /* Return the address of the regbuf. */
7105 return XEXP (regbuf, 0);
7108 /* Define the `__builtin_va_list' type for the ABI. */
7110 static tree
7111 sh_build_builtin_va_list (void)
7113 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7114 tree record;
7116 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7117 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7118 return ptr_type_node;
7120 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7122 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7123 ptr_type_node);
7124 f_next_o_limit = build_decl (FIELD_DECL,
7125 get_identifier ("__va_next_o_limit"),
7126 ptr_type_node);
7127 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7128 ptr_type_node);
7129 f_next_fp_limit = build_decl (FIELD_DECL,
7130 get_identifier ("__va_next_fp_limit"),
7131 ptr_type_node);
7132 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7133 ptr_type_node);
7135 DECL_FIELD_CONTEXT (f_next_o) = record;
7136 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7137 DECL_FIELD_CONTEXT (f_next_fp) = record;
7138 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7139 DECL_FIELD_CONTEXT (f_next_stack) = record;
7141 TYPE_FIELDS (record) = f_next_o;
7142 TREE_CHAIN (f_next_o) = f_next_o_limit;
7143 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7144 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7145 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7147 layout_type (record);
7149 return record;
7152 /* Implement `va_start' for varargs and stdarg. */
7154 static void
7155 sh_va_start (tree valist, rtx nextarg)
7157 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7158 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7159 tree t, u;
7160 int nfp, nint;
7162 if (TARGET_SH5)
7164 expand_builtin_saveregs ();
7165 std_expand_builtin_va_start (valist, nextarg);
7166 return;
7169 if ((! TARGET_SH2E && ! TARGET_SH4)
7170 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7172 std_expand_builtin_va_start (valist, nextarg);
7173 return;
7176 f_next_o = TYPE_FIELDS (va_list_type_node);
7177 f_next_o_limit = TREE_CHAIN (f_next_o);
7178 f_next_fp = TREE_CHAIN (f_next_o_limit);
7179 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7180 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7182 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7183 NULL_TREE);
7184 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7185 valist, f_next_o_limit, NULL_TREE);
7186 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7187 NULL_TREE);
7188 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7189 valist, f_next_fp_limit, NULL_TREE);
7190 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7191 valist, f_next_stack, NULL_TREE);
7193 /* Call __builtin_saveregs. */
7194 u = make_tree (sizetype, expand_builtin_saveregs ());
7195 u = fold_convert (ptr_type_node, u);
7196 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp, u);
7197 TREE_SIDE_EFFECTS (t) = 1;
7198 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7200 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7201 if (nfp < 8)
7202 nfp = 8 - nfp;
7203 else
7204 nfp = 0;
7205 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7206 size_int (UNITS_PER_WORD * nfp));
7207 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_limit, u);
7208 TREE_SIDE_EFFECTS (t) = 1;
7209 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7211 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o, u);
7212 TREE_SIDE_EFFECTS (t) = 1;
7213 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7215 nint = crtl->args.info.arg_count[SH_ARG_INT];
7216 if (nint < 4)
7217 nint = 4 - nint;
7218 else
7219 nint = 0;
7220 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7221 size_int (UNITS_PER_WORD * nint));
7222 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o_limit, u);
7223 TREE_SIDE_EFFECTS (t) = 1;
7224 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7226 u = make_tree (ptr_type_node, nextarg);
7227 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_stack, u);
7228 TREE_SIDE_EFFECTS (t) = 1;
7229 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7232 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7233 member, return it. */
7234 static tree
7235 find_sole_member (tree type)
7237 tree field, member = NULL_TREE;
7239 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7241 if (TREE_CODE (field) != FIELD_DECL)
7242 continue;
7243 if (!DECL_SIZE (field))
7244 return NULL_TREE;
7245 if (integer_zerop (DECL_SIZE (field)))
7246 continue;
7247 if (member)
7248 return NULL_TREE;
7249 member = field;
7251 return member;
7253 /* Implement `va_arg'. */
7255 static tree
7256 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
7257 tree *post_p ATTRIBUTE_UNUSED)
7259 HOST_WIDE_INT size, rsize;
7260 tree tmp, pptr_type_node;
7261 tree addr, lab_over = NULL, result = NULL;
7262 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7263 tree eff_type;
7265 if (pass_by_ref)
7266 type = build_pointer_type (type);
7268 size = int_size_in_bytes (type);
7269 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7270 pptr_type_node = build_pointer_type (ptr_type_node);
7272 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7273 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7275 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7276 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7277 int pass_as_float;
7278 tree lab_false;
7279 tree member;
7281 f_next_o = TYPE_FIELDS (va_list_type_node);
7282 f_next_o_limit = TREE_CHAIN (f_next_o);
7283 f_next_fp = TREE_CHAIN (f_next_o_limit);
7284 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7285 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7287 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7288 NULL_TREE);
7289 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7290 valist, f_next_o_limit, NULL_TREE);
7291 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7292 valist, f_next_fp, NULL_TREE);
7293 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7294 valist, f_next_fp_limit, NULL_TREE);
7295 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7296 valist, f_next_stack, NULL_TREE);
7298 /* Structures with a single member with a distinct mode are passed
7299 like their member. This is relevant if the latter has a REAL_TYPE
7300 or COMPLEX_TYPE type. */
7301 eff_type = type;
7302 while (TREE_CODE (eff_type) == RECORD_TYPE
7303 && (member = find_sole_member (eff_type))
7304 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7305 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7306 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7308 tree field_type = TREE_TYPE (member);
7310 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7311 eff_type = field_type;
7312 else
7314 gcc_assert ((TYPE_ALIGN (eff_type)
7315 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7316 || (TYPE_ALIGN (eff_type)
7317 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7318 break;
7322 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7324 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7325 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7326 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7327 && size <= 16));
7329 else
7331 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7334 addr = create_tmp_var (pptr_type_node, NULL);
7335 lab_false = create_artificial_label ();
7336 lab_over = create_artificial_label ();
7338 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7340 if (pass_as_float)
7342 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7343 tree cmp;
7344 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7346 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
7347 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7348 gimplify_and_add (tmp, pre_p);
7350 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7351 gimplify_and_add (tmp, pre_p);
7352 tmp = next_fp_limit;
7353 if (size > 4 && !is_double)
7354 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), tmp,
7355 size_int (4 - size));
7356 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
7357 cmp = build3 (COND_EXPR, void_type_node, tmp,
7358 build1 (GOTO_EXPR, void_type_node, lab_false),
7359 NULL_TREE);
7360 if (!is_double)
7361 gimplify_and_add (cmp, pre_p);
7363 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7364 || (is_double || size == 16))
7366 tmp = fold_convert (sizetype, next_fp_tmp);
7367 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7368 size_int (UNITS_PER_WORD));
7369 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7370 next_fp_tmp, tmp);
7371 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7372 next_fp_tmp, tmp);
7373 gimplify_and_add (tmp, pre_p);
7375 if (is_double)
7376 gimplify_and_add (cmp, pre_p);
7378 #ifdef FUNCTION_ARG_SCmode_WART
7379 if (TYPE_MODE (eff_type) == SCmode
7380 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7382 tree subtype = TREE_TYPE (eff_type);
7383 tree real, imag;
7385 imag
7386 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7387 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7389 real
7390 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7391 real = get_initialized_tmp_var (real, pre_p, NULL);
7393 result = build2 (COMPLEX_EXPR, type, real, imag);
7394 result = get_initialized_tmp_var (result, pre_p, NULL);
7396 #endif /* FUNCTION_ARG_SCmode_WART */
7398 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7399 gimplify_and_add (tmp, pre_p);
7401 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7402 gimplify_and_add (tmp, pre_p);
7404 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7405 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7406 gimplify_and_add (tmp, pre_p);
7407 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7408 gimplify_and_add (tmp, pre_p);
7410 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, valist, next_fp_tmp);
7411 gimplify_and_add (tmp, post_p);
7412 valist = next_fp_tmp;
7414 else
7416 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, next_o,
7417 size_int (rsize));
7418 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7419 tmp = build3 (COND_EXPR, void_type_node, tmp,
7420 build1 (GOTO_EXPR, void_type_node, lab_false),
7421 NULL_TREE);
7422 gimplify_and_add (tmp, pre_p);
7424 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7425 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7426 gimplify_and_add (tmp, pre_p);
7428 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7429 gimplify_and_add (tmp, pre_p);
7431 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7432 gimplify_and_add (tmp, pre_p);
7434 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7436 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7437 next_o, next_o_limit);
7438 gimplify_and_add (tmp, pre_p);
7441 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7442 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7443 gimplify_and_add (tmp, pre_p);
7446 if (!result)
7448 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7449 gimplify_and_add (tmp, pre_p);
7453 /* ??? In va-sh.h, there had been code to make values larger than
7454 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7456 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7457 if (result)
7459 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, result, tmp);
7460 gimplify_and_add (tmp, pre_p);
7462 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7463 gimplify_and_add (tmp, pre_p);
7465 else
7466 result = tmp;
7468 if (pass_by_ref)
7469 result = build_va_arg_indirect_ref (result);
7471 return result;
7474 bool
7475 sh_promote_prototypes (const_tree type)
7477 if (TARGET_HITACHI)
7478 return 0;
7479 if (! type)
7480 return 1;
7481 return ! sh_attr_renesas_p (type);
7484 /* Whether an argument must be passed by reference. On SHcompact, we
7485 pretend arguments wider than 32-bits that would have been passed in
7486 registers are passed by reference, so that an SHmedia trampoline
7487 loads them into the full 64-bits registers. */
7489 static int
7490 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7491 const_tree type, bool named)
7493 unsigned HOST_WIDE_INT size;
7495 if (type)
7496 size = int_size_in_bytes (type);
7497 else
7498 size = GET_MODE_SIZE (mode);
7500 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7501 && (!named
7502 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7503 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7504 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7505 && size > 4
7506 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7507 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7508 return size;
7509 else
7510 return 0;
7513 static bool
7514 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7515 const_tree type, bool named)
7517 if (targetm.calls.must_pass_in_stack (mode, type))
7518 return true;
7520 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7521 wants to know about pass-by-reference semantics for incoming
7522 arguments. */
7523 if (! cum)
7524 return false;
7526 if (TARGET_SHCOMPACT)
7528 cum->byref = shcompact_byref (cum, mode, type, named);
7529 return cum->byref != 0;
7532 return false;
7535 static bool
7536 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7537 const_tree type, bool named ATTRIBUTE_UNUSED)
7539 /* ??? How can it possibly be correct to return true only on the
7540 caller side of the equation? Is there someplace else in the
7541 sh backend that's magically producing the copies? */
7542 return (cum->outgoing
7543 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7544 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7547 static int
7548 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7549 tree type, bool named ATTRIBUTE_UNUSED)
7551 int words = 0;
7553 if (!TARGET_SH5
7554 && PASS_IN_REG_P (*cum, mode, type)
7555 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7556 && (ROUND_REG (*cum, mode)
7557 + (mode != BLKmode
7558 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7559 : ROUND_ADVANCE (int_size_in_bytes (type)))
7560 > NPARM_REGS (mode)))
7561 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7563 else if (!TARGET_SHCOMPACT
7564 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7565 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7567 return words * UNITS_PER_WORD;
7571 /* Define where to put the arguments to a function.
7572 Value is zero to push the argument on the stack,
7573 or a hard register in which to store the argument.
7575 MODE is the argument's machine mode.
7576 TYPE is the data type of the argument (as a tree).
7577 This is null for libcalls where that information may
7578 not be available.
7579 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7580 the preceding args and about the function being called.
7581 NAMED is nonzero if this argument is a named parameter
7582 (otherwise it is an extra parameter matching an ellipsis).
7584 On SH the first args are normally in registers
7585 and the rest are pushed. Any arg that starts within the first
7586 NPARM_REGS words is at least partially passed in a register unless
7587 its data type forbids. */
7591 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7592 tree type, int named)
7594 if (! TARGET_SH5 && mode == VOIDmode)
7595 return GEN_INT (ca->renesas_abi ? 1 : 0);
7597 if (! TARGET_SH5
7598 && PASS_IN_REG_P (*ca, mode, type)
7599 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7601 int regno;
7603 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7604 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7606 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7607 gen_rtx_REG (SFmode,
7608 BASE_ARG_REG (mode)
7609 + (ROUND_REG (*ca, mode) ^ 1)),
7610 const0_rtx);
7611 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7612 gen_rtx_REG (SFmode,
7613 BASE_ARG_REG (mode)
7614 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7615 GEN_INT (4));
7616 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7619 /* If the alignment of a DF value causes an SF register to be
7620 skipped, we will use that skipped register for the next SF
7621 value. */
7622 if ((TARGET_HITACHI || ca->renesas_abi)
7623 && ca->free_single_fp_reg
7624 && mode == SFmode)
7625 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7627 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7628 ^ (mode == SFmode && TARGET_SH4
7629 && TARGET_LITTLE_ENDIAN != 0
7630 && ! TARGET_HITACHI && ! ca->renesas_abi);
7631 return gen_rtx_REG (mode, regno);
7635 if (TARGET_SH5)
7637 if (mode == VOIDmode && TARGET_SHCOMPACT)
7638 return GEN_INT (ca->call_cookie);
7640 /* The following test assumes unnamed arguments are promoted to
7641 DFmode. */
7642 if (mode == SFmode && ca->free_single_fp_reg)
7643 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7645 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7646 && (named || ! ca->prototype_p)
7647 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7649 if (! ca->prototype_p && TARGET_SHMEDIA)
7650 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7652 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7653 FIRST_FP_PARM_REG
7654 + ca->arg_count[(int) SH_ARG_FLOAT]);
7657 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7658 && (! TARGET_SHCOMPACT
7659 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7660 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7661 type, named))))
7663 return gen_rtx_REG (mode, (FIRST_PARM_REG
7664 + ca->arg_count[(int) SH_ARG_INT]));
7667 return 0;
7670 return 0;
7673 /* Update the data in CUM to advance over an argument
7674 of mode MODE and data type TYPE.
7675 (TYPE is null for libcalls where that information may not be
7676 available.) */
7678 void
7679 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7680 tree type, int named)
7682 if (ca->force_mem)
7683 ca->force_mem = 0;
7684 else if (TARGET_SH5)
7686 tree type2 = (ca->byref && type
7687 ? TREE_TYPE (type)
7688 : type);
7689 enum machine_mode mode2 = (ca->byref && type
7690 ? TYPE_MODE (type2)
7691 : mode);
7692 int dwords = ((ca->byref
7693 ? ca->byref
7694 : mode2 == BLKmode
7695 ? int_size_in_bytes (type2)
7696 : GET_MODE_SIZE (mode2)) + 7) / 8;
7697 int numregs = MIN (dwords, NPARM_REGS (SImode)
7698 - ca->arg_count[(int) SH_ARG_INT]);
7700 if (numregs)
7702 ca->arg_count[(int) SH_ARG_INT] += numregs;
7703 if (TARGET_SHCOMPACT
7704 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7706 ca->call_cookie
7707 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7708 - numregs, 1);
7709 /* N.B. We want this also for outgoing. */
7710 ca->stack_regs += numregs;
7712 else if (ca->byref)
7714 if (! ca->outgoing)
7715 ca->stack_regs += numregs;
7716 ca->byref_regs += numregs;
7717 ca->byref = 0;
7719 ca->call_cookie
7720 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7721 - numregs, 2);
7722 while (--numregs);
7723 ca->call_cookie
7724 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7725 - 1, 1);
7727 else if (dwords > numregs)
7729 int pushregs = numregs;
7731 if (TARGET_SHCOMPACT)
7732 ca->stack_regs += numregs;
7733 while (pushregs < NPARM_REGS (SImode) - 1
7734 && (CALL_COOKIE_INT_REG_GET
7735 (ca->call_cookie,
7736 NPARM_REGS (SImode) - pushregs)
7737 == 1))
7739 ca->call_cookie
7740 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7741 - pushregs, 1);
7742 pushregs++;
7744 if (numregs == NPARM_REGS (SImode))
7745 ca->call_cookie
7746 |= CALL_COOKIE_INT_REG (0, 1)
7747 | CALL_COOKIE_STACKSEQ (numregs - 1);
7748 else
7749 ca->call_cookie
7750 |= CALL_COOKIE_STACKSEQ (numregs);
7753 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7754 && (named || ! ca->prototype_p))
7756 if (mode2 == SFmode && ca->free_single_fp_reg)
7757 ca->free_single_fp_reg = 0;
7758 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7759 < NPARM_REGS (SFmode))
7761 int numfpregs
7762 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7763 NPARM_REGS (SFmode)
7764 - ca->arg_count[(int) SH_ARG_FLOAT]);
7766 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7768 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7770 if (ca->outgoing && numregs > 0)
7773 ca->call_cookie
7774 |= (CALL_COOKIE_INT_REG
7775 (ca->arg_count[(int) SH_ARG_INT]
7776 - numregs + ((numfpregs - 2) / 2),
7777 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7778 - numfpregs) / 2));
7780 while (numfpregs -= 2);
7782 else if (mode2 == SFmode && (named)
7783 && (ca->arg_count[(int) SH_ARG_FLOAT]
7784 < NPARM_REGS (SFmode)))
7785 ca->free_single_fp_reg
7786 = FIRST_FP_PARM_REG - numfpregs
7787 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7790 return;
7793 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7795 /* Note that we've used the skipped register. */
7796 if (mode == SFmode && ca->free_single_fp_reg)
7798 ca->free_single_fp_reg = 0;
7799 return;
7801 /* When we have a DF after an SF, there's an SF register that get
7802 skipped in order to align the DF value. We note this skipped
7803 register, because the next SF value will use it, and not the
7804 SF that follows the DF. */
7805 if (mode == DFmode
7806 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7808 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7809 + BASE_ARG_REG (mode));
7813 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7814 || PASS_IN_REG_P (*ca, mode, type))
7815 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7816 = (ROUND_REG (*ca, mode)
7817 + (mode == BLKmode
7818 ? ROUND_ADVANCE (int_size_in_bytes (type))
7819 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7822 /* The Renesas calling convention doesn't quite fit into this scheme since
7823 the address is passed like an invisible argument, but one that is always
7824 passed in memory. */
7825 static rtx
7826 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7828 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7829 return 0;
7830 return gen_rtx_REG (Pmode, 2);
7833 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7835 static bool
7836 sh_return_in_memory (const_tree type, const_tree fndecl)
7838 if (TARGET_SH5)
7840 if (TYPE_MODE (type) == BLKmode)
7841 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7842 else
7843 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7845 else
7847 return (TYPE_MODE (type) == BLKmode
7848 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7849 && TREE_CODE (type) == RECORD_TYPE));
7853 /* We actually emit the code in sh_expand_prologue. We used to use
7854 a static variable to flag that we need to emit this code, but that
7855 doesn't when inlining, when functions are deferred and then emitted
7856 later. Fortunately, we already have two flags that are part of struct
7857 function that tell if a function uses varargs or stdarg. */
7858 static void
7859 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7860 enum machine_mode mode,
7861 tree type,
7862 int *pretend_arg_size,
7863 int second_time ATTRIBUTE_UNUSED)
7865 gcc_assert (cfun->stdarg);
7866 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7868 int named_parm_regs, anon_parm_regs;
7870 named_parm_regs = (ROUND_REG (*ca, mode)
7871 + (mode == BLKmode
7872 ? ROUND_ADVANCE (int_size_in_bytes (type))
7873 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7874 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7875 if (anon_parm_regs > 0)
7876 *pretend_arg_size = anon_parm_regs * 4;
7880 static bool
7881 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7883 return TARGET_SH5;
7886 static bool
7887 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7889 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7893 /* Define the offset between two registers, one to be eliminated, and
7894 the other its replacement, at the start of a routine. */
7897 initial_elimination_offset (int from, int to)
7899 int regs_saved;
7900 int regs_saved_rounding = 0;
7901 int total_saved_regs_space;
7902 int total_auto_space;
7903 int save_flags = target_flags;
7904 int copy_flags;
7905 HARD_REG_SET live_regs_mask;
7907 shmedia_space_reserved_for_target_registers = false;
7908 regs_saved = calc_live_regs (&live_regs_mask);
7909 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7911 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7913 shmedia_space_reserved_for_target_registers = true;
7914 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7917 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7918 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7919 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7921 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7922 copy_flags = target_flags;
7923 target_flags = save_flags;
7925 total_saved_regs_space = regs_saved + regs_saved_rounding;
7927 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7928 return total_saved_regs_space + total_auto_space
7929 + crtl->args.info.byref_regs * 8;
7931 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7932 return total_saved_regs_space + total_auto_space
7933 + crtl->args.info.byref_regs * 8;
7935 /* Initial gap between fp and sp is 0. */
7936 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7937 return 0;
7939 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7940 return rounded_frame_size (0);
7942 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7943 return rounded_frame_size (0);
7945 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7946 && (to == HARD_FRAME_POINTER_REGNUM
7947 || to == STACK_POINTER_REGNUM));
7948 if (TARGET_SH5)
7950 int n = total_saved_regs_space;
7951 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7952 save_schedule schedule;
7953 save_entry *entry;
7955 n += total_auto_space;
7957 /* If it wasn't saved, there's not much we can do. */
7958 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7959 return n;
7961 target_flags = copy_flags;
7963 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7964 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7965 if (entry->reg == pr_reg)
7967 target_flags = save_flags;
7968 return entry->offset;
7970 gcc_unreachable ();
7972 else
7973 return total_auto_space;
7976 /* Parse the -mfixed-range= option string. */
7977 void
7978 sh_fix_range (const char *const_str)
7980 int i, first, last;
7981 char *str, *dash, *comma;
7983 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
7984 REG2 are either register names or register numbers. The effect
7985 of this option is to mark the registers in the range from REG1 to
7986 REG2 as ``fixed'' so they won't be used by the compiler. */
7988 i = strlen (const_str);
7989 str = (char *) alloca (i + 1);
7990 memcpy (str, const_str, i + 1);
7992 while (1)
7994 dash = strchr (str, '-');
7995 if (!dash)
7997 warning (0, "value of -mfixed-range must have form REG1-REG2");
7998 return;
8000 *dash = '\0';
8001 comma = strchr (dash + 1, ',');
8002 if (comma)
8003 *comma = '\0';
8005 first = decode_reg_name (str);
8006 if (first < 0)
8008 warning (0, "unknown register name: %s", str);
8009 return;
8012 last = decode_reg_name (dash + 1);
8013 if (last < 0)
8015 warning (0, "unknown register name: %s", dash + 1);
8016 return;
8019 *dash = '-';
8021 if (first > last)
8023 warning (0, "%s-%s is an empty range", str, dash + 1);
8024 return;
8027 for (i = first; i <= last; ++i)
8028 fixed_regs[i] = call_used_regs[i] = 1;
8030 if (!comma)
8031 break;
8033 *comma = ',';
8034 str = comma + 1;
8038 /* Insert any deferred function attributes from earlier pragmas. */
8039 static void
8040 sh_insert_attributes (tree node, tree *attributes)
8042 tree attrs;
8044 if (TREE_CODE (node) != FUNCTION_DECL)
8045 return;
8047 /* We are only interested in fields. */
8048 if (!DECL_P (node))
8049 return;
8051 /* Append the attributes to the deferred attributes. */
8052 *sh_deferred_function_attributes_tail = *attributes;
8053 attrs = sh_deferred_function_attributes;
8054 if (!attrs)
8055 return;
8057 /* Some attributes imply or require the interrupt attribute. */
8058 if (!lookup_attribute ("interrupt_handler", attrs)
8059 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8061 /* If we have a trapa_handler, but no interrupt_handler attribute,
8062 insert an interrupt_handler attribute. */
8063 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8064 /* We can't use sh_pr_interrupt here because that's not in the
8065 java frontend. */
8066 attrs
8067 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8068 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8069 if the interrupt attribute is missing, we ignore the attribute
8070 and warn. */
8071 else if (lookup_attribute ("sp_switch", attrs)
8072 || lookup_attribute ("trap_exit", attrs)
8073 || lookup_attribute ("nosave_low_regs", attrs)
8074 || lookup_attribute ("resbank", attrs))
8076 tree *tail;
8078 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8080 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8081 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8082 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8083 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8084 warning (OPT_Wattributes,
8085 "%qs attribute only applies to interrupt functions",
8086 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
8087 else
8089 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8090 NULL_TREE);
8091 tail = &TREE_CHAIN (*tail);
8094 attrs = *attributes;
8098 /* Install the processed list. */
8099 *attributes = attrs;
8101 /* Clear deferred attributes. */
8102 sh_deferred_function_attributes = NULL_TREE;
8103 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8105 return;
8108 /* Supported attributes:
8110 interrupt_handler -- specifies this function is an interrupt handler.
8112 trapa_handler - like above, but don't save all registers.
8114 sp_switch -- specifies an alternate stack for an interrupt handler
8115 to run on.
8117 trap_exit -- use a trapa to exit an interrupt function instead of
8118 an rte instruction.
8120 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8121 This is useful on the SH3 and upwards,
8122 which has a separate set of low regs for User and Supervisor modes.
8123 This should only be used for the lowest level of interrupts. Higher levels
8124 of interrupts must save the registers in case they themselves are
8125 interrupted.
8127 renesas -- use Renesas calling/layout conventions (functions and
8128 structures).
8130 resbank -- In case of an ISR, use a register bank to save registers
8131 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8134 const struct attribute_spec sh_attribute_table[] =
8136 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
8137 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8138 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
8139 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
8140 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
8141 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8142 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8143 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
8144 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
8145 #ifdef SYMBIAN
8146 /* Symbian support adds three new attributes:
8147 dllexport - for exporting a function/variable that will live in a dll
8148 dllimport - for importing a function/variable from a dll
8150 Microsoft allows multiple declspecs in one __declspec, separating
8151 them with spaces. We do NOT support this. Instead, use __declspec
8152 multiple times. */
8153 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8154 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8155 #endif
8156 { NULL, 0, 0, false, false, false, NULL }
8159 /* Handle a 'resbank' attribute. */
8160 static tree
8161 sh_handle_resbank_handler_attribute (tree * node, tree name,
8162 tree args ATTRIBUTE_UNUSED,
8163 int flags ATTRIBUTE_UNUSED,
8164 bool * no_add_attrs)
8166 if (!TARGET_SH2A)
8168 warning (OPT_Wattributes, "%qs attribute is supported only for SH2A",
8169 IDENTIFIER_POINTER (name));
8170 *no_add_attrs = true;
8172 if (TREE_CODE (*node) != FUNCTION_DECL)
8174 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8175 IDENTIFIER_POINTER (name));
8176 *no_add_attrs = true;
8179 return NULL_TREE;
8182 /* Handle an "interrupt_handler" attribute; arguments as in
8183 struct attribute_spec.handler. */
8184 static tree
8185 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8186 tree args ATTRIBUTE_UNUSED,
8187 int flags ATTRIBUTE_UNUSED,
8188 bool *no_add_attrs)
8190 if (TREE_CODE (*node) != FUNCTION_DECL)
8192 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8193 IDENTIFIER_POINTER (name));
8194 *no_add_attrs = true;
8196 else if (TARGET_SHCOMPACT)
8198 error ("attribute interrupt_handler is not compatible with -m5-compact");
8199 *no_add_attrs = true;
8202 return NULL_TREE;
8205 /* Handle an 'function_vector' attribute; arguments as in
8206 struct attribute_spec.handler. */
8207 static tree
8208 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8209 tree args ATTRIBUTE_UNUSED,
8210 int flags ATTRIBUTE_UNUSED,
8211 bool * no_add_attrs)
8213 if (!TARGET_SH2A)
8215 warning (OPT_Wattributes, "%qs attribute only applies to SH2A",
8216 IDENTIFIER_POINTER (name));
8217 *no_add_attrs = true;
8219 else if (TREE_CODE (*node) != FUNCTION_DECL)
8221 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8222 IDENTIFIER_POINTER (name));
8223 *no_add_attrs = true;
8225 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8227 /* The argument must be a constant integer. */
8228 warning (OPT_Wattributes,
8229 "`%s' attribute argument not an integer constant",
8230 IDENTIFIER_POINTER (name));
8231 *no_add_attrs = true;
8233 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8235 /* The argument value must be between 0 to 255. */
8236 warning (OPT_Wattributes,
8237 "`%s' attribute argument should be between 0 to 255",
8238 IDENTIFIER_POINTER (name));
8239 *no_add_attrs = true;
8241 return NULL_TREE;
8244 /* Returns 1 if current function has been assigned the attribute
8245 'function_vector'. */
8247 sh2a_is_function_vector_call (rtx x)
8249 if (GET_CODE (x) == SYMBOL_REF
8250 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8252 tree tr = SYMBOL_REF_DECL (x);
8254 if (sh2a_function_vector_p (tr))
8255 return 1;
8258 return 0;
8261 /* Returns the function vector number, if the the attribute
8262 'function_vector' is assigned, otherwise returns zero. */
8264 sh2a_get_function_vector_number (rtx x)
8266 int num;
8267 tree list, t;
8269 if ((GET_CODE (x) == SYMBOL_REF)
8270 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8272 t = SYMBOL_REF_DECL (x);
8274 if (TREE_CODE (t) != FUNCTION_DECL)
8275 return 0;
8277 list = SH_ATTRIBUTES (t);
8278 while (list)
8280 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8282 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8283 return num;
8286 list = TREE_CHAIN (list);
8289 return 0;
8291 else
8292 return 0;
8295 /* Handle an "sp_switch" attribute; arguments as in
8296 struct attribute_spec.handler. */
8297 static tree
8298 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8299 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8301 if (TREE_CODE (*node) != FUNCTION_DECL)
8303 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8304 IDENTIFIER_POINTER (name));
8305 *no_add_attrs = true;
8307 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8309 /* The argument must be a constant string. */
8310 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8311 IDENTIFIER_POINTER (name));
8312 *no_add_attrs = true;
8315 return NULL_TREE;
8318 /* Handle an "trap_exit" attribute; arguments as in
8319 struct attribute_spec.handler. */
8320 static tree
8321 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8322 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8324 if (TREE_CODE (*node) != FUNCTION_DECL)
8326 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8327 IDENTIFIER_POINTER (name));
8328 *no_add_attrs = true;
8330 /* The argument specifies a trap number to be used in a trapa instruction
8331 at function exit (instead of an rte instruction). */
8332 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8334 /* The argument must be a constant integer. */
8335 warning (OPT_Wattributes, "%qs attribute argument not an "
8336 "integer constant", IDENTIFIER_POINTER (name));
8337 *no_add_attrs = true;
8340 return NULL_TREE;
8343 static tree
8344 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8345 tree name ATTRIBUTE_UNUSED,
8346 tree args ATTRIBUTE_UNUSED,
8347 int flags ATTRIBUTE_UNUSED,
8348 bool *no_add_attrs ATTRIBUTE_UNUSED)
8350 return NULL_TREE;
8353 /* True if __attribute__((renesas)) or -mrenesas. */
8355 sh_attr_renesas_p (const_tree td)
8357 if (TARGET_HITACHI)
8358 return 1;
8359 if (td == 0)
8360 return 0;
8361 if (DECL_P (td))
8362 td = TREE_TYPE (td);
8363 if (td == error_mark_node)
8364 return 0;
8365 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8366 != NULL_TREE);
8369 /* True if __attribute__((renesas)) or -mrenesas, for the current
8370 function. */
8372 sh_cfun_attr_renesas_p (void)
8374 return sh_attr_renesas_p (current_function_decl);
8378 sh_cfun_interrupt_handler_p (void)
8380 return (lookup_attribute ("interrupt_handler",
8381 DECL_ATTRIBUTES (current_function_decl))
8382 != NULL_TREE);
8385 /* Returns 1 if FUNC has been assigned the attribute
8386 "function_vector". */
8388 sh2a_function_vector_p (tree func)
8390 tree list;
8391 if (TREE_CODE (func) != FUNCTION_DECL)
8392 return 0;
8394 list = SH_ATTRIBUTES (func);
8395 while (list)
8397 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8398 return 1;
8400 list = TREE_CHAIN (list);
8402 return 0;
8405 /* Returns TRUE if given tree has the "resbank" attribute. */
8408 sh_cfun_resbank_handler_p (void)
8410 return ((lookup_attribute ("resbank",
8411 DECL_ATTRIBUTES (current_function_decl))
8412 != NULL_TREE)
8413 && (lookup_attribute ("interrupt_handler",
8414 DECL_ATTRIBUTES (current_function_decl))
8415 != NULL_TREE) && TARGET_SH2A);
8418 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8420 static const char *
8421 sh_check_pch_target_flags (int old_flags)
8423 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8424 | MASK_SH_E | MASK_HARD_SH4
8425 | MASK_FPU_SINGLE | MASK_SH4))
8426 return _("created and used with different architectures / ABIs");
8427 if ((old_flags ^ target_flags) & MASK_HITACHI)
8428 return _("created and used with different ABIs");
8429 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8430 return _("created and used with different endianness");
8431 return NULL;
8434 /* Predicates used by the templates. */
8436 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8437 Used only in general_movsrc_operand. */
8440 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8442 switch (REGNO (op))
8444 case PR_REG:
8445 case MACL_REG:
8446 case MACH_REG:
8447 return 1;
8449 return 0;
8452 /* Nonzero if OP is a floating point value with value 0.0. */
8455 fp_zero_operand (rtx op)
8457 REAL_VALUE_TYPE r;
8459 if (GET_MODE (op) != SFmode)
8460 return 0;
8462 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8463 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8466 /* Nonzero if OP is a floating point value with value 1.0. */
8469 fp_one_operand (rtx op)
8471 REAL_VALUE_TYPE r;
8473 if (GET_MODE (op) != SFmode)
8474 return 0;
8476 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8477 return REAL_VALUES_EQUAL (r, dconst1);
8480 /* For -m4 and -m4-single-only, mode switching is used. If we are
8481 compiling without -mfmovd, movsf_ie isn't taken into account for
8482 mode switching. We could check in machine_dependent_reorg for
8483 cases where we know we are in single precision mode, but there is
8484 interface to find that out during reload, so we must avoid
8485 choosing an fldi alternative during reload and thus failing to
8486 allocate a scratch register for the constant loading. */
8488 fldi_ok (void)
8490 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8494 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8496 enum rtx_code code = GET_CODE (op);
8497 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8500 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8502 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8504 if (GET_CODE (op) != SYMBOL_REF)
8505 return 0;
8506 return SYMBOL_REF_TLS_MODEL (op);
8509 /* Return the destination address of a branch. */
8511 static int
8512 branch_dest (rtx branch)
8514 rtx dest = SET_SRC (PATTERN (branch));
8515 int dest_uid;
8517 if (GET_CODE (dest) == IF_THEN_ELSE)
8518 dest = XEXP (dest, 1);
8519 dest = XEXP (dest, 0);
8520 dest_uid = INSN_UID (dest);
8521 return INSN_ADDRESSES (dest_uid);
8524 /* Return nonzero if REG is not used after INSN.
8525 We assume REG is a reload reg, and therefore does
8526 not live past labels. It may live past calls or jumps though. */
8528 reg_unused_after (rtx reg, rtx insn)
8530 enum rtx_code code;
8531 rtx set;
8533 /* If the reg is set by this instruction, then it is safe for our
8534 case. Disregard the case where this is a store to memory, since
8535 we are checking a register used in the store address. */
8536 set = single_set (insn);
8537 if (set && GET_CODE (SET_DEST (set)) != MEM
8538 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8539 return 1;
8541 while ((insn = NEXT_INSN (insn)))
8543 rtx set;
8544 if (!INSN_P (insn))
8545 continue;
8547 code = GET_CODE (insn);
8549 #if 0
8550 /* If this is a label that existed before reload, then the register
8551 if dead here. However, if this is a label added by reorg, then
8552 the register may still be live here. We can't tell the difference,
8553 so we just ignore labels completely. */
8554 if (code == CODE_LABEL)
8555 return 1;
8556 /* else */
8557 #endif
8559 if (code == JUMP_INSN)
8560 return 0;
8562 /* If this is a sequence, we must handle them all at once.
8563 We could have for instance a call that sets the target register,
8564 and an insn in a delay slot that uses the register. In this case,
8565 we must return 0. */
8566 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8568 int i;
8569 int retval = 0;
8571 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8573 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8574 rtx set = single_set (this_insn);
8576 if (GET_CODE (this_insn) == CALL_INSN)
8577 code = CALL_INSN;
8578 else if (GET_CODE (this_insn) == JUMP_INSN)
8580 if (INSN_ANNULLED_BRANCH_P (this_insn))
8581 return 0;
8582 code = JUMP_INSN;
8585 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8586 return 0;
8587 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8589 if (GET_CODE (SET_DEST (set)) != MEM)
8590 retval = 1;
8591 else
8592 return 0;
8594 if (set == 0
8595 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8596 return 0;
8598 if (retval == 1)
8599 return 1;
8600 else if (code == JUMP_INSN)
8601 return 0;
8604 set = single_set (insn);
8605 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8606 return 0;
8607 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8608 return GET_CODE (SET_DEST (set)) != MEM;
8609 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8610 return 0;
8612 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8613 return 1;
8615 return 1;
8618 #include "ggc.h"
8620 static GTY(()) rtx fpscr_rtx;
8622 get_fpscr_rtx (void)
8624 if (! fpscr_rtx)
8626 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8627 REG_USERVAR_P (fpscr_rtx) = 1;
8628 mark_user_reg (fpscr_rtx);
8630 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8631 mark_user_reg (fpscr_rtx);
8632 return fpscr_rtx;
8635 static GTY(()) tree fpscr_values;
8637 static void
8638 emit_fpu_switch (rtx scratch, int index)
8640 rtx dst, src;
8642 if (fpscr_values == NULL)
8644 tree t;
8646 t = build_index_type (integer_one_node);
8647 t = build_array_type (integer_type_node, t);
8648 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8649 DECL_ARTIFICIAL (t) = 1;
8650 DECL_IGNORED_P (t) = 1;
8651 DECL_EXTERNAL (t) = 1;
8652 TREE_STATIC (t) = 1;
8653 TREE_PUBLIC (t) = 1;
8654 TREE_USED (t) = 1;
8656 fpscr_values = t;
8659 src = DECL_RTL (fpscr_values);
8660 if (!can_create_pseudo_p ())
8662 emit_move_insn (scratch, XEXP (src, 0));
8663 if (index != 0)
8664 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8665 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8667 else
8668 src = adjust_address (src, PSImode, index * 4);
8670 dst = get_fpscr_rtx ();
8671 emit_move_insn (dst, src);
8674 void
8675 emit_sf_insn (rtx pat)
8677 emit_insn (pat);
8680 void
8681 emit_df_insn (rtx pat)
8683 emit_insn (pat);
8686 void
8687 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8689 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8692 void
8693 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8695 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8696 get_fpscr_rtx ()));
8699 void
8700 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8702 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8705 void
8706 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8708 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8709 get_fpscr_rtx ()));
8712 static rtx get_free_reg (HARD_REG_SET);
8714 /* This function returns a register to use to load the address to load
8715 the fpscr from. Currently it always returns r1 or r7, but when we are
8716 able to use pseudo registers after combine, or have a better mechanism
8717 for choosing a register, it should be done here. */
8718 /* REGS_LIVE is the liveness information for the point for which we
8719 need this allocation. In some bare-bones exit blocks, r1 is live at the
8720 start. We can even have all of r0..r3 being live:
8721 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8722 INSN before which new insns are placed with will clobber the register
8723 we return. If a basic block consists only of setting the return value
8724 register to a pseudo and using that register, the return value is not
8725 live before or after this block, yet we we'll insert our insns right in
8726 the middle. */
8728 static rtx
8729 get_free_reg (HARD_REG_SET regs_live)
8731 if (! TEST_HARD_REG_BIT (regs_live, 1))
8732 return gen_rtx_REG (Pmode, 1);
8734 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8735 there shouldn't be anything but a jump before the function end. */
8736 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8737 return gen_rtx_REG (Pmode, 7);
8740 /* This function will set the fpscr from memory.
8741 MODE is the mode we are setting it to. */
8742 void
8743 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8745 enum attr_fp_mode fp_mode = mode;
8746 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8747 rtx addr_reg;
8749 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8750 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8753 /* Is the given character a logical line separator for the assembler? */
8754 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8755 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8756 #endif
8759 sh_insn_length_adjustment (rtx insn)
8761 /* Instructions with unfilled delay slots take up an extra two bytes for
8762 the nop in the delay slot. */
8763 if (((GET_CODE (insn) == INSN
8764 && GET_CODE (PATTERN (insn)) != USE
8765 && GET_CODE (PATTERN (insn)) != CLOBBER)
8766 || GET_CODE (insn) == CALL_INSN
8767 || (GET_CODE (insn) == JUMP_INSN
8768 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8769 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8770 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8771 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8772 return 2;
8774 /* SH2e has a bug that prevents the use of annulled branches, so if
8775 the delay slot is not filled, we'll have to put a NOP in it. */
8776 if (sh_cpu == CPU_SH2E
8777 && GET_CODE (insn) == JUMP_INSN
8778 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8779 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8780 && get_attr_type (insn) == TYPE_CBRANCH
8781 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8782 return 2;
8784 /* sh-dsp parallel processing insn take four bytes instead of two. */
8786 if (GET_CODE (insn) == INSN)
8788 int sum = 0;
8789 rtx body = PATTERN (insn);
8790 const char *template;
8791 char c;
8792 int maybe_label = 1;
8794 if (GET_CODE (body) == ASM_INPUT)
8795 template = XSTR (body, 0);
8796 else if (asm_noperands (body) >= 0)
8797 template
8798 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8799 else
8800 return 0;
8803 int ppi_adjust = 0;
8806 c = *template++;
8807 while (c == ' ' || c == '\t');
8808 /* all sh-dsp parallel-processing insns start with p.
8809 The only non-ppi sh insn starting with p is pref.
8810 The only ppi starting with pr is prnd. */
8811 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8812 ppi_adjust = 2;
8813 /* The repeat pseudo-insn expands two three insns, a total of
8814 six bytes in size. */
8815 else if ((c == 'r' || c == 'R')
8816 && ! strncasecmp ("epeat", template, 5))
8817 ppi_adjust = 4;
8818 while (c && c != '\n'
8819 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, template))
8821 /* If this is a label, it is obviously not a ppi insn. */
8822 if (c == ':' && maybe_label)
8824 ppi_adjust = 0;
8825 break;
8827 else if (c == '\'' || c == '"')
8828 maybe_label = 0;
8829 c = *template++;
8831 sum += ppi_adjust;
8832 maybe_label = c != ':';
8834 while (c);
8835 return sum;
8837 return 0;
8840 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8841 isn't protected by a PIC unspec. */
8843 nonpic_symbol_mentioned_p (rtx x)
8845 register const char *fmt;
8846 register int i;
8848 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8849 || GET_CODE (x) == PC)
8850 return 1;
8852 /* We don't want to look into the possible MEM location of a
8853 CONST_DOUBLE, since we're not going to use it, in general. */
8854 if (GET_CODE (x) == CONST_DOUBLE)
8855 return 0;
8857 if (GET_CODE (x) == UNSPEC
8858 && (XINT (x, 1) == UNSPEC_PIC
8859 || XINT (x, 1) == UNSPEC_GOT
8860 || XINT (x, 1) == UNSPEC_GOTOFF
8861 || XINT (x, 1) == UNSPEC_GOTPLT
8862 || XINT (x, 1) == UNSPEC_GOTTPOFF
8863 || XINT (x, 1) == UNSPEC_DTPOFF
8864 || XINT (x, 1) == UNSPEC_PLT))
8865 return 0;
8867 fmt = GET_RTX_FORMAT (GET_CODE (x));
8868 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8870 if (fmt[i] == 'E')
8872 register int j;
8874 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8875 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8876 return 1;
8878 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8879 return 1;
8882 return 0;
8885 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8886 @GOTOFF in `reg'. */
8888 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8889 rtx reg)
8891 if (tls_symbolic_operand (orig, Pmode))
8892 return orig;
8894 if (GET_CODE (orig) == LABEL_REF
8895 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8897 if (reg == 0)
8898 reg = gen_reg_rtx (Pmode);
8900 emit_insn (gen_symGOTOFF2reg (reg, orig));
8901 return reg;
8903 else if (GET_CODE (orig) == SYMBOL_REF)
8905 if (reg == 0)
8906 reg = gen_reg_rtx (Pmode);
8908 emit_insn (gen_symGOT2reg (reg, orig));
8909 return reg;
8911 return orig;
8914 /* Mark the use of a constant in the literal table. If the constant
8915 has multiple labels, make it unique. */
8916 static rtx
8917 mark_constant_pool_use (rtx x)
8919 rtx insn, lab, pattern;
8921 if (x == NULL)
8922 return x;
8924 switch (GET_CODE (x))
8926 case LABEL_REF:
8927 x = XEXP (x, 0);
8928 case CODE_LABEL:
8929 break;
8930 default:
8931 return x;
8934 /* Get the first label in the list of labels for the same constant
8935 and delete another labels in the list. */
8936 lab = x;
8937 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8939 if (GET_CODE (insn) != CODE_LABEL
8940 || LABEL_REFS (insn) != NEXT_INSN (insn))
8941 break;
8942 lab = insn;
8945 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8946 INSN_DELETED_P (insn) = 1;
8948 /* Mark constants in a window. */
8949 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8951 if (GET_CODE (insn) != INSN)
8952 continue;
8954 pattern = PATTERN (insn);
8955 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8956 continue;
8958 switch (XINT (pattern, 1))
8960 case UNSPECV_CONST2:
8961 case UNSPECV_CONST4:
8962 case UNSPECV_CONST8:
8963 XVECEXP (pattern, 0, 1) = const1_rtx;
8964 break;
8965 case UNSPECV_WINDOW_END:
8966 if (XVECEXP (pattern, 0, 0) == x)
8967 return lab;
8968 break;
8969 case UNSPECV_CONST_END:
8970 return lab;
8971 default:
8972 break;
8976 return lab;
8979 /* Return true if it's possible to redirect BRANCH1 to the destination
8980 of an unconditional jump BRANCH2. We only want to do this if the
8981 resulting branch will have a short displacement. */
8983 sh_can_redirect_branch (rtx branch1, rtx branch2)
8985 if (flag_expensive_optimizations && simplejump_p (branch2))
8987 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8988 rtx insn;
8989 int distance;
8991 for (distance = 0, insn = NEXT_INSN (branch1);
8992 insn && distance < 256;
8993 insn = PREV_INSN (insn))
8995 if (insn == dest)
8996 return 1;
8997 else
8998 distance += get_attr_length (insn);
9000 for (distance = 0, insn = NEXT_INSN (branch1);
9001 insn && distance < 256;
9002 insn = NEXT_INSN (insn))
9004 if (insn == dest)
9005 return 1;
9006 else
9007 distance += get_attr_length (insn);
9010 return 0;
9013 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9015 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9016 unsigned int new_reg)
9018 /* Interrupt functions can only use registers that have already been
9019 saved by the prologue, even if they would normally be
9020 call-clobbered. */
9022 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9023 return 0;
9025 return 1;
9028 /* Function to update the integer COST
9029 based on the relationship between INSN that is dependent on
9030 DEP_INSN through the dependence LINK. The default is to make no
9031 adjustment to COST. This can be used for example to specify to
9032 the scheduler that an output- or anti-dependence does not incur
9033 the same cost as a data-dependence. The return value should be
9034 the new value for COST. */
9035 static int
9036 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9038 rtx reg, use_pat;
9040 if (TARGET_SHMEDIA)
9042 /* On SHmedia, if the dependence is an anti-dependence or
9043 output-dependence, there is no cost. */
9044 if (REG_NOTE_KIND (link) != 0)
9046 /* However, dependencies between target register loads and
9047 uses of the register in a subsequent block that are separated
9048 by a conditional branch are not modelled - we have to do with
9049 the anti-dependency between the target register load and the
9050 conditional branch that ends the current block. */
9051 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9052 && GET_CODE (PATTERN (dep_insn)) == SET
9053 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9054 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9055 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9057 int orig_cost = cost;
9058 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9059 rtx target = ((! note
9060 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9061 ? insn : JUMP_LABEL (insn));
9062 /* On the likely path, the branch costs 1, on the unlikely path,
9063 it costs 3. */
9064 cost--;
9066 target = next_active_insn (target);
9067 while (target && ! flow_dependent_p (target, dep_insn)
9068 && --cost > 0);
9069 /* If two branches are executed in immediate succession, with the
9070 first branch properly predicted, this causes a stall at the
9071 second branch, hence we won't need the target for the
9072 second branch for two cycles after the launch of the first
9073 branch. */
9074 if (cost > orig_cost - 2)
9075 cost = orig_cost - 2;
9077 else
9078 cost = 0;
9081 else if (get_attr_is_mac_media (insn)
9082 && get_attr_is_mac_media (dep_insn))
9083 cost = 1;
9085 else if (! reload_completed
9086 && GET_CODE (PATTERN (insn)) == SET
9087 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9088 && GET_CODE (PATTERN (dep_insn)) == SET
9089 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9090 && cost < 4)
9091 cost = 4;
9092 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9093 that is needed at the target. */
9094 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9095 && ! flow_dependent_p (insn, dep_insn))
9096 cost--;
9098 else if (REG_NOTE_KIND (link) == 0)
9100 enum attr_type type;
9101 rtx dep_set;
9103 if (recog_memoized (insn) < 0
9104 || recog_memoized (dep_insn) < 0)
9105 return cost;
9107 dep_set = single_set (dep_insn);
9109 /* The latency that we specify in the scheduling description refers
9110 to the actual output, not to an auto-increment register; for that,
9111 the latency is one. */
9112 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9114 rtx set = single_set (insn);
9116 if (set
9117 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9118 && (!MEM_P (SET_DEST (set))
9119 || !reg_mentioned_p (SET_DEST (dep_set),
9120 XEXP (SET_DEST (set), 0))))
9121 cost = 1;
9123 /* The only input for a call that is timing-critical is the
9124 function's address. */
9125 if (GET_CODE (insn) == CALL_INSN)
9127 rtx call = PATTERN (insn);
9129 if (GET_CODE (call) == PARALLEL)
9130 call = XVECEXP (call, 0 ,0);
9131 if (GET_CODE (call) == SET)
9132 call = SET_SRC (call);
9133 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
9134 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9135 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9136 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9137 cost -= TARGET_SH4_300 ? 3 : 6;
9139 /* Likewise, the most timing critical input for an sfuncs call
9140 is the function address. However, sfuncs typically start
9141 using their arguments pretty quickly.
9142 Assume a four cycle delay for SH4 before they are needed.
9143 Cached ST40-300 calls are quicker, so assume only a one
9144 cycle delay there.
9145 ??? Maybe we should encode the delays till input registers
9146 are needed by sfuncs into the sfunc call insn. */
9147 /* All sfunc calls are parallels with at least four components.
9148 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9149 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9150 && XVECLEN (PATTERN (insn), 0) >= 4
9151 && (reg = sfunc_uses_reg (insn)))
9153 if (! reg_set_p (reg, dep_insn))
9154 cost -= TARGET_SH4_300 ? 1 : 4;
9156 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9158 enum attr_type dep_type = get_attr_type (dep_insn);
9160 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9161 cost--;
9162 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9163 && (type = get_attr_type (insn)) != TYPE_CALL
9164 && type != TYPE_SFUNC)
9165 cost--;
9166 /* When the preceding instruction loads the shift amount of
9167 the following SHAD/SHLD, the latency of the load is increased
9168 by 1 cycle. */
9169 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9170 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9171 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9172 XEXP (SET_SRC (single_set (insn)),
9173 1)))
9174 cost++;
9175 /* When an LS group instruction with a latency of less than
9176 3 cycles is followed by a double-precision floating-point
9177 instruction, FIPR, or FTRV, the latency of the first
9178 instruction is increased to 3 cycles. */
9179 else if (cost < 3
9180 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9181 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9182 cost = 3;
9183 /* The lsw register of a double-precision computation is ready one
9184 cycle earlier. */
9185 else if (reload_completed
9186 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9187 && (use_pat = single_set (insn))
9188 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9189 SET_SRC (use_pat)))
9190 cost -= 1;
9192 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9193 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9194 cost -= 1;
9196 else if (TARGET_SH4_300)
9198 /* Stores need their input register two cycles later. */
9199 if (dep_set && cost >= 1
9200 && ((type = get_attr_type (insn)) == TYPE_STORE
9201 || type == TYPE_PSTORE
9202 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9204 rtx set = single_set (insn);
9206 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9207 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9209 cost -= 2;
9210 /* But don't reduce the cost below 1 if the address depends
9211 on a side effect of dep_insn. */
9212 if (cost < 1
9213 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9214 cost = 1;
9219 /* An anti-dependence penalty of two applies if the first insn is a double
9220 precision fadd / fsub / fmul. */
9221 else if (!TARGET_SH4_300
9222 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9223 && recog_memoized (dep_insn) >= 0
9224 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9225 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9226 /* A lot of alleged anti-flow dependences are fake,
9227 so check this one is real. */
9228 && flow_dependent_p (dep_insn, insn))
9229 cost = 2;
9231 return cost;
9234 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9235 if DEP_INSN is anti-flow dependent on INSN. */
9236 static int
9237 flow_dependent_p (rtx insn, rtx dep_insn)
9239 rtx tmp = PATTERN (insn);
9241 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9242 return tmp == NULL_RTX;
9245 /* A helper function for flow_dependent_p called through note_stores. */
9246 static void
9247 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9249 rtx * pinsn = (rtx *) data;
9251 if (*pinsn && reg_referenced_p (x, *pinsn))
9252 *pinsn = NULL_RTX;
9255 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9256 'special function' patterns (type sfunc) that clobber pr, but that
9257 do not look like function calls to leaf_function_p. Hence we must
9258 do this extra check. */
9259 static int
9260 sh_pr_n_sets (void)
9262 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9265 /* Return where to allocate pseudo for a given hard register initial
9266 value. */
9267 static rtx
9268 sh_allocate_initial_value (rtx hard_reg)
9270 rtx x;
9272 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9274 if (current_function_is_leaf
9275 && ! sh_pr_n_sets ()
9276 && ! (TARGET_SHCOMPACT
9277 && ((crtl->args.info.call_cookie
9278 & ~ CALL_COOKIE_RET_TRAMP (1))
9279 || crtl->saves_all_registers)))
9280 x = hard_reg;
9281 else
9282 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9284 else
9285 x = NULL_RTX;
9287 return x;
9290 /* This function returns "2" to indicate dual issue for the SH4
9291 processor. To be used by the DFA pipeline description. */
9292 static int
9293 sh_issue_rate (void)
9295 if (TARGET_SUPERSCALAR)
9296 return 2;
9297 else
9298 return 1;
9301 /* Functions for ready queue reordering for sched1. */
9303 /* Get weight for mode for a set x. */
9304 static short
9305 find_set_regmode_weight (rtx x, enum machine_mode mode)
9307 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9308 return 1;
9309 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9311 if (GET_CODE (SET_DEST (x)) == REG)
9313 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9314 return 1;
9315 else
9316 return 0;
9318 return 1;
9320 return 0;
9323 /* Get regmode weight for insn. */
9324 static short
9325 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9327 short reg_weight = 0;
9328 rtx x;
9330 /* Increment weight for each register born here. */
9331 x = PATTERN (insn);
9332 reg_weight += find_set_regmode_weight (x, mode);
9333 if (GET_CODE (x) == PARALLEL)
9335 int j;
9336 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9338 x = XVECEXP (PATTERN (insn), 0, j);
9339 reg_weight += find_set_regmode_weight (x, mode);
9342 /* Decrement weight for each register that dies here. */
9343 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9345 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9347 rtx note = XEXP (x, 0);
9348 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9349 reg_weight--;
9352 return reg_weight;
9355 /* Calculate regmode weights for all insns of a basic block. */
9356 static void
9357 find_regmode_weight (basic_block b, enum machine_mode mode)
9359 rtx insn, next_tail, head, tail;
9361 get_ebb_head_tail (b, b, &head, &tail);
9362 next_tail = NEXT_INSN (tail);
9364 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9366 /* Handle register life information. */
9367 if (!INSN_P (insn))
9368 continue;
9370 if (mode == SFmode)
9371 INSN_REGMODE_WEIGHT (insn, mode) =
9372 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9373 else if (mode == SImode)
9374 INSN_REGMODE_WEIGHT (insn, mode) =
9375 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9379 /* Comparison function for ready queue sorting. */
9380 static int
9381 rank_for_reorder (const void *x, const void *y)
9383 rtx tmp = *(const rtx *) y;
9384 rtx tmp2 = *(const rtx *) x;
9386 /* The insn in a schedule group should be issued the first. */
9387 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9388 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9390 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9391 minimizes instruction movement, thus minimizing sched's effect on
9392 register pressure. */
9393 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9396 /* Resort the array A in which only element at index N may be out of order. */
9397 static void
9398 swap_reorder (rtx *a, int n)
9400 rtx insn = a[n - 1];
9401 int i = n - 2;
9403 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9405 a[i + 1] = a[i];
9406 i -= 1;
9408 a[i + 1] = insn;
9411 #define SCHED_REORDER(READY, N_READY) \
9412 do \
9414 if ((N_READY) == 2) \
9415 swap_reorder (READY, N_READY); \
9416 else if ((N_READY) > 2) \
9417 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9419 while (0)
9421 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9422 macro. */
9423 static void
9424 ready_reorder (rtx *ready, int nready)
9426 SCHED_REORDER (ready, nready);
9429 /* Count life regions of r0 for a block. */
9430 static int
9431 find_r0_life_regions (basic_block b)
9433 rtx end, insn;
9434 rtx pset;
9435 rtx r0_reg;
9436 int live;
9437 int set;
9438 int death = 0;
9440 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9442 set = 1;
9443 live = 1;
9445 else
9447 set = 0;
9448 live = 0;
9451 insn = BB_HEAD (b);
9452 end = BB_END (b);
9453 r0_reg = gen_rtx_REG (SImode, R0_REG);
9454 while (1)
9456 if (INSN_P (insn))
9458 if (find_regno_note (insn, REG_DEAD, R0_REG))
9460 death++;
9461 live = 0;
9463 if (!live
9464 && (pset = single_set (insn))
9465 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9466 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9468 set++;
9469 live = 1;
9472 if (insn == end)
9473 break;
9474 insn = NEXT_INSN (insn);
9476 return set - death;
9479 /* Calculate regmode weights for all insns of all basic block. */
9480 static void
9481 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9482 int verbose ATTRIBUTE_UNUSED,
9483 int old_max_uid)
9485 basic_block b;
9487 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9488 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9489 r0_life_regions = 0;
9491 FOR_EACH_BB_REVERSE (b)
9493 find_regmode_weight (b, SImode);
9494 find_regmode_weight (b, SFmode);
9495 if (!reload_completed)
9496 r0_life_regions += find_r0_life_regions (b);
9499 CURR_REGMODE_PRESSURE (SImode) = 0;
9500 CURR_REGMODE_PRESSURE (SFmode) = 0;
9504 /* Cleanup. */
9505 static void
9506 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9507 int verbose ATTRIBUTE_UNUSED)
9509 if (regmode_weight[0])
9511 free (regmode_weight[0]);
9512 regmode_weight[0] = NULL;
9514 if (regmode_weight[1])
9516 free (regmode_weight[1]);
9517 regmode_weight[1] = NULL;
9521 /* The scalar modes supported differs from the default version in TImode
9522 for 32-bit SHMEDIA. */
9523 static bool
9524 sh_scalar_mode_supported_p (enum machine_mode mode)
9526 if (TARGET_SHMEDIA32 && mode == TImode)
9527 return false;
9529 return default_scalar_mode_supported_p (mode);
9532 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9533 keep count of register pressures on SImode and SFmode. */
9534 static int
9535 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9536 int sched_verbose ATTRIBUTE_UNUSED,
9537 rtx insn,
9538 int can_issue_more)
9540 if (GET_CODE (PATTERN (insn)) != USE
9541 && GET_CODE (PATTERN (insn)) != CLOBBER)
9542 cached_can_issue_more = can_issue_more - 1;
9543 else
9544 cached_can_issue_more = can_issue_more;
9546 if (reload_completed)
9547 return cached_can_issue_more;
9549 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9550 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9552 return cached_can_issue_more;
9555 static void
9556 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9557 int verbose ATTRIBUTE_UNUSED,
9558 int veclen ATTRIBUTE_UNUSED)
9560 CURR_REGMODE_PRESSURE (SImode) = 0;
9561 CURR_REGMODE_PRESSURE (SFmode) = 0;
9564 /* Some magic numbers. */
9565 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9566 functions that already have high pressure on r0. */
9567 #define R0_MAX_LIFE_REGIONS 2
9568 /* Register Pressure thresholds for SImode and SFmode registers. */
9569 #define SIMODE_MAX_WEIGHT 5
9570 #define SFMODE_MAX_WEIGHT 10
9572 /* Return true if the pressure is high for MODE. */
9573 static short
9574 high_pressure (enum machine_mode mode)
9576 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9577 functions that already have high pressure on r0. */
9578 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9579 return 1;
9581 if (mode == SFmode)
9582 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9583 else
9584 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9587 /* Reorder ready queue if register pressure is high. */
9588 static int
9589 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9590 int sched_verbose ATTRIBUTE_UNUSED,
9591 rtx *ready,
9592 int *n_readyp,
9593 int clock_var ATTRIBUTE_UNUSED)
9595 if (reload_completed)
9596 return sh_issue_rate ();
9598 if (high_pressure (SFmode) || high_pressure (SImode))
9600 ready_reorder (ready, *n_readyp);
9603 return sh_issue_rate ();
9606 /* Skip cycles if the current register pressure is high. */
9607 static int
9608 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9609 int sched_verbose ATTRIBUTE_UNUSED,
9610 rtx *ready ATTRIBUTE_UNUSED,
9611 int *n_readyp ATTRIBUTE_UNUSED,
9612 int clock_var ATTRIBUTE_UNUSED)
9614 if (reload_completed)
9615 return cached_can_issue_more;
9617 if (high_pressure(SFmode) || high_pressure (SImode))
9618 skip_cycles = 1;
9620 return cached_can_issue_more;
9623 /* Skip cycles without sorting the ready queue. This will move insn from
9624 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9625 queue by sh_reorder. */
9627 /* Generally, skipping these many cycles are sufficient for all insns to move
9628 from Q -> R. */
9629 #define MAX_SKIPS 8
9631 static int
9632 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9633 int sched_verbose ATTRIBUTE_UNUSED,
9634 rtx insn ATTRIBUTE_UNUSED,
9635 int last_clock_var,
9636 int clock_var,
9637 int *sort_p)
9639 if (reload_completed)
9640 return 0;
9642 if (skip_cycles)
9644 if ((clock_var - last_clock_var) < MAX_SKIPS)
9646 *sort_p = 0;
9647 return 1;
9649 /* If this is the last cycle we are skipping, allow reordering of R. */
9650 if ((clock_var - last_clock_var) == MAX_SKIPS)
9652 *sort_p = 1;
9653 return 1;
9657 skip_cycles = 0;
9659 return 0;
9662 /* SHmedia requires registers for branches, so we can't generate new
9663 branches past reload. */
9664 static bool
9665 sh_cannot_modify_jumps_p (void)
9667 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9670 static int
9671 sh_target_reg_class (void)
9673 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9676 static bool
9677 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9679 HARD_REG_SET dummy;
9680 #if 0
9681 rtx insn;
9682 #endif
9684 if (! shmedia_space_reserved_for_target_registers)
9685 return 0;
9686 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9687 return 0;
9688 if (calc_live_regs (&dummy) >= 6 * 8)
9689 return 1;
9690 return 0;
9693 static bool
9694 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9696 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9700 On the SH1..SH4, the trampoline looks like
9701 2 0002 D202 mov.l l2,r2
9702 1 0000 D301 mov.l l1,r3
9703 3 0004 422B jmp @r2
9704 4 0006 0009 nop
9705 5 0008 00000000 l1: .long area
9706 6 000c 00000000 l2: .long function
9708 SH5 (compact) uses r1 instead of r3 for the static chain. */
9711 /* Emit RTL insns to initialize the variable parts of a trampoline.
9712 FNADDR is an RTX for the address of the function's pure code.
9713 CXT is an RTX for the static chain value for the function. */
9715 void
9716 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9718 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9720 if (TARGET_SHMEDIA64)
9722 rtx tramp_templ;
9723 int fixed_len;
9725 rtx movi1 = GEN_INT (0xcc000010);
9726 rtx shori1 = GEN_INT (0xc8000010);
9727 rtx src, dst;
9729 /* The following trampoline works within a +- 128 KB range for cxt:
9730 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9731 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9732 gettr tr1,r1; blink tr0,r63 */
9733 /* Address rounding makes it hard to compute the exact bounds of the
9734 offset for this trampoline, but we have a rather generous offset
9735 range, so frame_offset should do fine as an upper bound. */
9736 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9738 /* ??? could optimize this trampoline initialization
9739 by writing DImode words with two insns each. */
9740 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9741 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9742 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9743 insn = gen_rtx_AND (DImode, insn, mask);
9744 /* Or in ptb/u .,tr1 pattern */
9745 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9746 insn = force_operand (insn, NULL_RTX);
9747 insn = gen_lowpart (SImode, insn);
9748 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9749 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9750 insn = gen_rtx_AND (DImode, insn, mask);
9751 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9752 insn = gen_lowpart (SImode, insn);
9753 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9754 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9755 insn = gen_rtx_AND (DImode, insn, mask);
9756 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9757 insn = gen_lowpart (SImode, insn);
9758 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9759 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9760 insn = gen_rtx_AND (DImode, insn, mask);
9761 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9762 insn = gen_lowpart (SImode, insn);
9763 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9764 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9765 insn = gen_rtx_AND (DImode, insn, mask);
9766 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9767 insn = gen_lowpart (SImode, insn);
9768 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9769 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9770 GEN_INT (0x6bf10600));
9771 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9772 GEN_INT (0x4415fc10));
9773 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9774 GEN_INT (0x4401fff0));
9775 emit_insn (gen_ic_invalidate_line (tramp));
9776 return;
9778 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9779 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9781 tramp_templ = gen_datalabel_ref (tramp_templ);
9782 dst = tramp_mem;
9783 src = gen_const_mem (BLKmode, tramp_templ);
9784 set_mem_align (dst, 256);
9785 set_mem_align (src, 64);
9786 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9788 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9789 emit_move_insn (adjust_address (tramp_mem, Pmode,
9790 fixed_len + GET_MODE_SIZE (Pmode)),
9791 cxt);
9792 emit_insn (gen_ic_invalidate_line (tramp));
9793 return;
9795 else if (TARGET_SHMEDIA)
9797 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9798 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9799 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9800 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9801 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9802 rotated 10 right, and higher 16 bit of every 32 selected. */
9803 rtx movishori
9804 = force_reg (V2HImode, (simplify_gen_subreg
9805 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9806 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9807 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9809 tramp = force_reg (Pmode, tramp);
9810 fnaddr = force_reg (SImode, fnaddr);
9811 cxt = force_reg (SImode, cxt);
9812 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9813 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9814 movishori));
9815 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9816 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9817 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9818 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9819 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9820 gen_rtx_SUBREG (V2HImode, cxt, 0),
9821 movishori));
9822 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9823 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9824 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9825 if (TARGET_LITTLE_ENDIAN)
9827 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9828 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9830 else
9832 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9833 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9835 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9836 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9837 emit_insn (gen_ic_invalidate_line (tramp));
9838 return;
9840 else if (TARGET_SHCOMPACT)
9842 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9843 return;
9845 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9846 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9847 SImode));
9848 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9849 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9850 SImode));
9851 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9852 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9853 if (TARGET_HARVARD)
9855 if (!TARGET_INLINE_IC_INVALIDATE
9856 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9857 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9858 FUNCTION_ORDINARY),
9859 0, VOIDmode, 1, tramp, SImode);
9860 else
9861 emit_insn (gen_ic_invalidate_line (tramp));
9865 /* FIXME: This is overly conservative. A SHcompact function that
9866 receives arguments ``by reference'' will have them stored in its
9867 own stack frame, so it must not pass pointers or references to
9868 these arguments to other functions by means of sibling calls. */
9869 /* If PIC, we cannot make sibling calls to global functions
9870 because the PLT requires r12 to be live. */
9871 static bool
9872 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9874 return (1
9875 && (! TARGET_SHCOMPACT
9876 || crtl->args.info.stack_regs == 0)
9877 && ! sh_cfun_interrupt_handler_p ()
9878 && (! flag_pic
9879 || (decl && ! TREE_PUBLIC (decl))
9880 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9883 /* Machine specific built-in functions. */
9885 struct builtin_description
9887 const enum insn_code icode;
9888 const char *const name;
9889 int signature;
9892 /* describe number and signedness of arguments; arg[0] == result
9893 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9894 /* 9: 64-bit pointer, 10: 32-bit pointer */
9895 static const char signature_args[][4] =
9897 #define SH_BLTIN_V2SI2 0
9898 { 4, 4 },
9899 #define SH_BLTIN_V4HI2 1
9900 { 4, 4 },
9901 #define SH_BLTIN_V2SI3 2
9902 { 4, 4, 4 },
9903 #define SH_BLTIN_V4HI3 3
9904 { 4, 4, 4 },
9905 #define SH_BLTIN_V8QI3 4
9906 { 4, 4, 4 },
9907 #define SH_BLTIN_MAC_HISI 5
9908 { 1, 4, 4, 1 },
9909 #define SH_BLTIN_SH_HI 6
9910 { 4, 4, 1 },
9911 #define SH_BLTIN_SH_SI 7
9912 { 4, 4, 1 },
9913 #define SH_BLTIN_V4HI2V2SI 8
9914 { 4, 4, 4 },
9915 #define SH_BLTIN_V4HI2V8QI 9
9916 { 4, 4, 4 },
9917 #define SH_BLTIN_SISF 10
9918 { 4, 2 },
9919 #define SH_BLTIN_LDUA_L 11
9920 { 2, 10 },
9921 #define SH_BLTIN_LDUA_Q 12
9922 { 1, 10 },
9923 #define SH_BLTIN_STUA_L 13
9924 { 0, 10, 2 },
9925 #define SH_BLTIN_STUA_Q 14
9926 { 0, 10, 1 },
9927 #define SH_BLTIN_LDUA_L64 15
9928 { 2, 9 },
9929 #define SH_BLTIN_LDUA_Q64 16
9930 { 1, 9 },
9931 #define SH_BLTIN_STUA_L64 17
9932 { 0, 9, 2 },
9933 #define SH_BLTIN_STUA_Q64 18
9934 { 0, 9, 1 },
9935 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9936 #define SH_BLTIN_2 19
9937 #define SH_BLTIN_SU 19
9938 { 1, 2 },
9939 #define SH_BLTIN_3 20
9940 #define SH_BLTIN_SUS 20
9941 { 2, 2, 1 },
9942 #define SH_BLTIN_PSSV 21
9943 { 0, 8, 2, 2 },
9944 #define SH_BLTIN_XXUU 22
9945 #define SH_BLTIN_UUUU 22
9946 { 1, 1, 1, 1 },
9947 #define SH_BLTIN_PV 23
9948 { 0, 8 },
9950 /* mcmv: operands considered unsigned. */
9951 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9952 /* mperm: control value considered unsigned int. */
9953 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9954 /* mshards_q: returns signed short. */
9955 /* nsb: takes long long arg, returns unsigned char. */
9956 static const struct builtin_description bdesc[] =
9958 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9959 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9960 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9961 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9962 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9963 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9964 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9965 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9966 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9967 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9968 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9969 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9970 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9971 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9972 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9973 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9974 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9975 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9976 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9977 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9978 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9979 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9980 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9981 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9982 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9983 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9984 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9985 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9986 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9987 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9988 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9989 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9990 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9991 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9992 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9993 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9994 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9995 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9996 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9997 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9998 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9999 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
10000 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
10001 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
10002 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
10003 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
10004 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
10005 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
10006 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
10007 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
10008 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
10009 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
10010 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
10011 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
10012 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
10013 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
10014 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
10015 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
10016 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
10017 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
10018 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
10019 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
10020 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
10021 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
10022 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
10023 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
10024 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
10025 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
10026 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
10027 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
10028 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
10029 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
10030 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
10031 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
10032 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
10033 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
10034 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
10035 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
10036 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
10037 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
10038 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
10039 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
10040 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
10041 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
10044 static void
10045 sh_media_init_builtins (void)
10047 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10048 const struct builtin_description *d;
10050 memset (shared, 0, sizeof shared);
10051 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10053 tree type, arg_type = 0;
10054 int signature = d->signature;
10055 int i;
10057 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10058 type = shared[signature];
10059 else
10061 int has_result = signature_args[signature][0] != 0;
10063 if ((signature_args[signature][1] & 8)
10064 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10065 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10066 continue;
10067 if (! TARGET_FPU_ANY
10068 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10069 continue;
10070 type = void_list_node;
10071 for (i = 3; ; i--)
10073 int arg = signature_args[signature][i];
10074 int opno = i - 1 + has_result;
10076 if (arg & 8)
10077 arg_type = ptr_type_node;
10078 else if (arg)
10079 arg_type = (*lang_hooks.types.type_for_mode)
10080 (insn_data[d->icode].operand[opno].mode,
10081 (arg & 1));
10082 else if (i)
10083 continue;
10084 else
10085 arg_type = void_type_node;
10086 if (i == 0)
10087 break;
10088 type = tree_cons (NULL_TREE, arg_type, type);
10090 type = build_function_type (arg_type, type);
10091 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10092 shared[signature] = type;
10094 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10095 NULL, NULL_TREE);
10099 /* Implements target hook vector_mode_supported_p. */
10100 bool
10101 sh_vector_mode_supported_p (enum machine_mode mode)
10103 if (TARGET_FPU_ANY
10104 && ((mode == V2SFmode)
10105 || (mode == V4SFmode)
10106 || (mode == V16SFmode)))
10107 return true;
10109 else if (TARGET_SHMEDIA
10110 && ((mode == V8QImode)
10111 || (mode == V2HImode)
10112 || (mode == V4HImode)
10113 || (mode == V2SImode)))
10114 return true;
10116 return false;
10119 /* Implements target hook dwarf_calling_convention. Return an enum
10120 of dwarf_calling_convention. */
10122 sh_dwarf_calling_convention (const_tree func)
10124 if (sh_attr_renesas_p (func))
10125 return DW_CC_GNU_renesas_sh;
10127 return DW_CC_normal;
10130 static void
10131 sh_init_builtins (void)
10133 if (TARGET_SHMEDIA)
10134 sh_media_init_builtins ();
10137 /* Expand an expression EXP that calls a built-in function,
10138 with result going to TARGET if that's convenient
10139 (and in mode MODE if that's convenient).
10140 SUBTARGET may be used as the target for computing one of EXP's operands.
10141 IGNORE is nonzero if the value is to be ignored. */
10143 static rtx
10144 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10145 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10147 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10148 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10149 const struct builtin_description *d = &bdesc[fcode];
10150 enum insn_code icode = d->icode;
10151 int signature = d->signature;
10152 enum machine_mode tmode = VOIDmode;
10153 int nop = 0, i;
10154 rtx op[4];
10155 rtx pat = 0;
10157 if (signature_args[signature][0])
10159 if (ignore)
10160 return 0;
10162 tmode = insn_data[icode].operand[0].mode;
10163 if (! target
10164 || GET_MODE (target) != tmode
10165 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10166 target = gen_reg_rtx (tmode);
10167 op[nop++] = target;
10169 else
10170 target = 0;
10172 for (i = 1; i <= 3; i++, nop++)
10174 tree arg;
10175 enum machine_mode opmode, argmode;
10176 tree optype;
10178 if (! signature_args[signature][i])
10179 break;
10180 arg = CALL_EXPR_ARG (exp, i - 1);
10181 if (arg == error_mark_node)
10182 return const0_rtx;
10183 if (signature_args[signature][i] & 8)
10185 opmode = ptr_mode;
10186 optype = ptr_type_node;
10188 else
10190 opmode = insn_data[icode].operand[nop].mode;
10191 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10193 argmode = TYPE_MODE (TREE_TYPE (arg));
10194 if (argmode != opmode)
10195 arg = build1 (NOP_EXPR, optype, arg);
10196 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
10197 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10198 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10201 switch (nop)
10203 case 1:
10204 pat = (*insn_data[d->icode].genfun) (op[0]);
10205 break;
10206 case 2:
10207 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10208 break;
10209 case 3:
10210 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10211 break;
10212 case 4:
10213 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10214 break;
10215 default:
10216 gcc_unreachable ();
10218 if (! pat)
10219 return 0;
10220 emit_insn (pat);
10221 return target;
10224 void
10225 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10227 rtx sel0 = const0_rtx;
10228 rtx sel1 = const1_rtx;
10229 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10230 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10232 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10233 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10236 void
10237 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10239 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10241 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10242 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10245 /* Return the class of registers for which a mode change from FROM to TO
10246 is invalid. */
10247 bool
10248 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10249 enum reg_class class)
10251 /* We want to enable the use of SUBREGs as a means to
10252 VEC_SELECT a single element of a vector. */
10253 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10254 return (reg_classes_intersect_p (GENERAL_REGS, class));
10256 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10258 if (TARGET_LITTLE_ENDIAN)
10260 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10261 return reg_classes_intersect_p (DF_REGS, class);
10263 else
10265 if (GET_MODE_SIZE (from) < 8)
10266 return reg_classes_intersect_p (DF_HI_REGS, class);
10269 return 0;
10273 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10274 that label is used. */
10276 void
10277 sh_mark_label (rtx address, int nuses)
10279 if (GOTOFF_P (address))
10281 /* Extract the label or symbol. */
10282 address = XEXP (address, 0);
10283 if (GET_CODE (address) == PLUS)
10284 address = XEXP (address, 0);
10285 address = XVECEXP (address, 0, 0);
10287 if (GET_CODE (address) == LABEL_REF
10288 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10289 LABEL_NUSES (XEXP (address, 0)) += nuses;
10292 /* Compute extra cost of moving data between one register class
10293 and another. */
10295 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10296 uses this information. Hence, the general register <-> floating point
10297 register information here is not used for SFmode. */
10300 sh_register_move_cost (enum machine_mode mode,
10301 enum reg_class srcclass, enum reg_class dstclass)
10303 if (dstclass == T_REGS || dstclass == PR_REGS)
10304 return 10;
10306 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10307 return 4;
10309 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10310 && REGCLASS_HAS_FP_REG (srcclass)
10311 && REGCLASS_HAS_FP_REG (dstclass))
10312 return 4;
10314 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10315 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10317 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10318 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10319 return 9;
10321 if ((REGCLASS_HAS_FP_REG (dstclass)
10322 && REGCLASS_HAS_GENERAL_REG (srcclass))
10323 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10324 && REGCLASS_HAS_FP_REG (srcclass)))
10325 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10326 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10328 if ((dstclass == FPUL_REGS
10329 && REGCLASS_HAS_GENERAL_REG (srcclass))
10330 || (srcclass == FPUL_REGS
10331 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10332 return 5;
10334 if ((dstclass == FPUL_REGS
10335 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10336 || (srcclass == FPUL_REGS
10337 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10338 return 7;
10340 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10341 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10342 return 20;
10344 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10345 if (TARGET_SHMEDIA
10346 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10348 if (sh_gettrcost >= 0)
10349 return sh_gettrcost;
10350 else if (!TARGET_PT_FIXED)
10351 return 100;
10354 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10355 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10356 return 4;
10358 if (TARGET_SHMEDIA
10359 || (TARGET_FMOVD
10360 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10361 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10362 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10364 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10367 static rtx emit_load_ptr (rtx, rtx);
10369 static rtx
10370 emit_load_ptr (rtx reg, rtx addr)
10372 rtx mem = gen_const_mem (ptr_mode, addr);
10374 if (Pmode != ptr_mode)
10375 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10376 return emit_move_insn (reg, mem);
10379 static void
10380 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10381 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10382 tree function)
10384 CUMULATIVE_ARGS cum;
10385 int structure_value_byref = 0;
10386 rtx this, this_value, sibcall, insns, funexp;
10387 tree funtype = TREE_TYPE (function);
10388 int simple_add = CONST_OK_FOR_ADD (delta);
10389 int did_load = 0;
10390 rtx scratch0, scratch1, scratch2;
10391 unsigned i;
10393 reload_completed = 1;
10394 epilogue_completed = 1;
10395 current_function_uses_only_leaf_regs = 1;
10397 emit_note (NOTE_INSN_PROLOGUE_END);
10399 /* Find the "this" pointer. We have such a wide range of ABIs for the
10400 SH that it's best to do this completely machine independently.
10401 "this" is passed as first argument, unless a structure return pointer
10402 comes first, in which case "this" comes second. */
10403 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10404 #ifndef PCC_STATIC_STRUCT_RETURN
10405 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10406 structure_value_byref = 1;
10407 #endif /* not PCC_STATIC_STRUCT_RETURN */
10408 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10410 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10412 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10414 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10416 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10417 static chain pointer (even if you can't have nested virtual functions
10418 right now, someone might implement them sometime), and the rest of the
10419 registers are used for argument passing, are callee-saved, or reserved. */
10420 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10421 -ffixed-reg has been used. */
10422 if (! call_used_regs[0] || fixed_regs[0])
10423 error ("r0 needs to be available as a call-clobbered register");
10424 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10425 if (! TARGET_SH5)
10427 if (call_used_regs[1] && ! fixed_regs[1])
10428 scratch1 = gen_rtx_REG (ptr_mode, 1);
10429 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10430 pointing where to return struct values. */
10431 if (call_used_regs[3] && ! fixed_regs[3])
10432 scratch2 = gen_rtx_REG (Pmode, 3);
10434 else if (TARGET_SHMEDIA)
10436 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10437 if (i != REGNO (scratch0) &&
10438 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10440 scratch1 = gen_rtx_REG (ptr_mode, i);
10441 break;
10443 if (scratch1 == scratch0)
10444 error ("Need a second call-clobbered general purpose register");
10445 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10446 if (call_used_regs[i] && ! fixed_regs[i])
10448 scratch2 = gen_rtx_REG (Pmode, i);
10449 break;
10451 if (scratch2 == scratch0)
10452 error ("Need a call-clobbered target register");
10455 this_value = plus_constant (this, delta);
10456 if (vcall_offset
10457 && (simple_add || scratch0 != scratch1)
10458 && strict_memory_address_p (ptr_mode, this_value))
10460 emit_load_ptr (scratch0, this_value);
10461 did_load = 1;
10464 if (!delta)
10465 ; /* Do nothing. */
10466 else if (simple_add)
10467 emit_move_insn (this, this_value);
10468 else
10470 emit_move_insn (scratch1, GEN_INT (delta));
10471 emit_insn (gen_add2_insn (this, scratch1));
10474 if (vcall_offset)
10476 rtx offset_addr;
10478 if (!did_load)
10479 emit_load_ptr (scratch0, this);
10481 offset_addr = plus_constant (scratch0, vcall_offset);
10482 if (strict_memory_address_p (ptr_mode, offset_addr))
10483 ; /* Do nothing. */
10484 else if (! TARGET_SH5 && scratch0 != scratch1)
10486 /* scratch0 != scratch1, and we have indexed loads. Get better
10487 schedule by loading the offset into r1 and using an indexed
10488 load - then the load of r1 can issue before the load from
10489 (this + delta) finishes. */
10490 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10491 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10493 else if (CONST_OK_FOR_ADD (vcall_offset))
10495 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10496 offset_addr = scratch0;
10498 else if (scratch0 != scratch1)
10500 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10501 emit_insn (gen_add2_insn (scratch0, scratch1));
10502 offset_addr = scratch0;
10504 else
10505 gcc_unreachable (); /* FIXME */
10506 emit_load_ptr (scratch0, offset_addr);
10508 if (Pmode != ptr_mode)
10509 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10510 emit_insn (gen_add2_insn (this, scratch0));
10513 /* Generate a tail call to the target function. */
10514 if (! TREE_USED (function))
10516 assemble_external (function);
10517 TREE_USED (function) = 1;
10519 funexp = XEXP (DECL_RTL (function), 0);
10520 /* If the function is overridden, so is the thunk, hence we don't
10521 need GOT addressing even if this is a public symbol. */
10522 #if 0
10523 if (TARGET_SH1 && ! flag_weak)
10524 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10525 else
10526 #endif
10527 if (TARGET_SH2 && flag_pic)
10529 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10530 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10532 else
10534 if (TARGET_SHMEDIA && flag_pic)
10536 funexp = gen_sym2PIC (funexp);
10537 PUT_MODE (funexp, Pmode);
10539 emit_move_insn (scratch2, funexp);
10540 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10541 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10543 sibcall = emit_call_insn (sibcall);
10544 SIBLING_CALL_P (sibcall) = 1;
10545 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10546 emit_barrier ();
10548 /* Run just enough of rest_of_compilation to do scheduling and get
10549 the insns emitted. Note that use_thunk calls
10550 assemble_start_function and assemble_end_function. */
10552 insn_locators_alloc ();
10553 insns = get_insns ();
10555 #if 0
10556 if (optimize > 0)
10558 /* Initialize the bitmap obstacks. */
10559 bitmap_obstack_initialize (NULL);
10560 bitmap_obstack_initialize (&reg_obstack);
10561 if (! cfun->cfg)
10562 init_flow ();
10563 rtl_register_cfg_hooks ();
10564 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10565 init_rtl_bb_info (EXIT_BLOCK_PTR);
10566 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10567 EXIT_BLOCK_PTR->flags |= BB_RTL;
10568 find_basic_blocks (insns);
10570 if (flag_schedule_insns_after_reload)
10572 life_analysis (PROP_FINAL);
10574 split_all_insns (1);
10576 schedule_insns ();
10578 /* We must split jmp insn in PIC case. */
10579 else if (flag_pic)
10580 split_all_insns_noflow ();
10582 #else
10583 if (optimize > 0)
10585 if (! cfun->cfg)
10586 init_flow (cfun);
10587 split_all_insns_noflow ();
10589 #endif
10591 sh_reorg ();
10593 if (optimize > 0 && flag_delayed_branch)
10594 dbr_schedule (insns);
10596 shorten_branches (insns);
10597 final_start_function (insns, file, 1);
10598 final (insns, file, 1);
10599 final_end_function ();
10600 free_after_compilation (cfun);
10602 reload_completed = 0;
10603 epilogue_completed = 0;
10607 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10609 rtx sym;
10611 /* If this is not an ordinary function, the name usually comes from a
10612 string literal or an sprintf buffer. Make sure we use the same
10613 string consistently, so that cse will be able to unify address loads. */
10614 if (kind != FUNCTION_ORDINARY)
10615 name = IDENTIFIER_POINTER (get_identifier (name));
10616 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10617 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10618 if (flag_pic)
10619 switch (kind)
10621 case FUNCTION_ORDINARY:
10622 break;
10623 case SFUNC_GOT:
10625 rtx reg = target ? target : gen_reg_rtx (Pmode);
10627 emit_insn (gen_symGOT2reg (reg, sym));
10628 sym = reg;
10629 break;
10631 case SFUNC_STATIC:
10633 /* ??? To allow cse to work, we use GOTOFF relocations.
10634 we could add combiner patterns to transform this into
10635 straight pc-relative calls with sym2PIC / bsrf when
10636 label load and function call are still 1:1 and in the
10637 same basic block during combine. */
10638 rtx reg = target ? target : gen_reg_rtx (Pmode);
10640 emit_insn (gen_symGOTOFF2reg (reg, sym));
10641 sym = reg;
10642 break;
10645 if (target && sym != target)
10647 emit_move_insn (target, sym);
10648 return target;
10650 return sym;
10653 /* Find the number of a general purpose register in S. */
10654 static int
10655 scavenge_reg (HARD_REG_SET *s)
10657 int r;
10658 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10659 if (TEST_HARD_REG_BIT (*s, r))
10660 return r;
10661 return -1;
10665 sh_get_pr_initial_val (void)
10667 rtx val;
10669 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10670 PR register on SHcompact, because it might be clobbered by the prologue.
10671 We check first if that is known to be the case. */
10672 if (TARGET_SHCOMPACT
10673 && ((crtl->args.info.call_cookie
10674 & ~ CALL_COOKIE_RET_TRAMP (1))
10675 || crtl->saves_all_registers))
10676 return gen_frame_mem (SImode, return_address_pointer_rtx);
10678 /* If we haven't finished rtl generation, there might be a nonlocal label
10679 that we haven't seen yet.
10680 ??? get_hard_reg_initial_val fails if it is called after register
10681 allocation has started, unless it has been called before for the
10682 same register. And even then, we end in trouble if we didn't use
10683 the register in the same basic block before. So call
10684 get_hard_reg_initial_val now and wrap it in an unspec if we might
10685 need to replace it. */
10686 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10687 combine can put the pseudo returned by get_hard_reg_initial_val into
10688 instructions that need a general purpose registers, which will fail to
10689 be recognized when the pseudo becomes allocated to PR. */
10691 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10692 if (TARGET_SH1)
10693 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10694 return val;
10698 sh_expand_t_scc (enum rtx_code code, rtx target)
10700 rtx result = target;
10701 HOST_WIDE_INT val;
10703 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10704 || GET_CODE (sh_compare_op1) != CONST_INT)
10705 return 0;
10706 if (GET_CODE (result) != REG)
10707 result = gen_reg_rtx (SImode);
10708 val = INTVAL (sh_compare_op1);
10709 if ((code == EQ && val == 1) || (code == NE && val == 0))
10710 emit_insn (gen_movt (result));
10711 else if (TARGET_SH2A && ((code == EQ && val == 0)
10712 || (code == NE && val == 1)))
10713 emit_insn (gen_movrt (result));
10714 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10716 emit_clobber (result);
10717 emit_insn (gen_subc (result, result, result));
10718 emit_insn (gen_addsi3 (result, result, const1_rtx));
10720 else if (code == EQ || code == NE)
10721 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10722 else
10723 return 0;
10724 if (result != target)
10725 emit_move_insn (target, result);
10726 return 1;
10729 /* INSN is an sfunc; return the rtx that describes the address used. */
10730 static rtx
10731 extract_sfunc_addr (rtx insn)
10733 rtx pattern, part = NULL_RTX;
10734 int len, i;
10736 pattern = PATTERN (insn);
10737 len = XVECLEN (pattern, 0);
10738 for (i = 0; i < len; i++)
10740 part = XVECEXP (pattern, 0, i);
10741 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10742 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10743 return XEXP (part, 0);
10745 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10746 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10749 /* Verify that the register in use_sfunc_addr still agrees with the address
10750 used in the sfunc. This prevents fill_slots_from_thread from changing
10751 use_sfunc_addr.
10752 INSN is the use_sfunc_addr instruction, and REG is the register it
10753 guards. */
10755 check_use_sfunc_addr (rtx insn, rtx reg)
10757 /* Search for the sfunc. It should really come right after INSN. */
10758 while ((insn = NEXT_INSN (insn)))
10760 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10761 break;
10762 if (! INSN_P (insn))
10763 continue;
10765 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10766 insn = XVECEXP (PATTERN (insn), 0, 0);
10767 if (GET_CODE (PATTERN (insn)) != PARALLEL
10768 || get_attr_type (insn) != TYPE_SFUNC)
10769 continue;
10770 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10772 gcc_unreachable ();
10775 /* This function returns a constant rtx that represents pi / 2**15 in
10776 SFmode. it's used to scale SFmode angles, in radians, to a
10777 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10778 maps to 0x10000). */
10780 static GTY(()) rtx sh_fsca_sf2int_rtx;
10783 sh_fsca_sf2int (void)
10785 if (! sh_fsca_sf2int_rtx)
10787 REAL_VALUE_TYPE rv;
10789 real_from_string (&rv, "10430.378350470453");
10790 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10793 return sh_fsca_sf2int_rtx;
10796 /* This function returns a constant rtx that represents pi / 2**15 in
10797 DFmode. it's used to scale DFmode angles, in radians, to a
10798 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10799 maps to 0x10000). */
10801 static GTY(()) rtx sh_fsca_df2int_rtx;
10804 sh_fsca_df2int (void)
10806 if (! sh_fsca_df2int_rtx)
10808 REAL_VALUE_TYPE rv;
10810 real_from_string (&rv, "10430.378350470453");
10811 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10814 return sh_fsca_df2int_rtx;
10817 /* This function returns a constant rtx that represents 2**15 / pi in
10818 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10819 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10820 2*pi). */
10822 static GTY(()) rtx sh_fsca_int2sf_rtx;
10825 sh_fsca_int2sf (void)
10827 if (! sh_fsca_int2sf_rtx)
10829 REAL_VALUE_TYPE rv;
10831 real_from_string (&rv, "9.587379924285257e-5");
10832 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10835 return sh_fsca_int2sf_rtx;
10838 /* Initialize the CUMULATIVE_ARGS structure. */
10840 void
10841 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10842 tree fntype,
10843 rtx libname ATTRIBUTE_UNUSED,
10844 tree fndecl,
10845 signed int n_named_args,
10846 enum machine_mode mode)
10848 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10849 pcum->free_single_fp_reg = 0;
10850 pcum->stack_regs = 0;
10851 pcum->byref_regs = 0;
10852 pcum->byref = 0;
10853 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10855 /* XXX - Should we check TARGET_HITACHI here ??? */
10856 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10858 if (fntype)
10860 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10861 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10862 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10863 pcum->arg_count [(int) SH_ARG_INT]
10864 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10866 pcum->call_cookie
10867 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10868 && pcum->arg_count [(int) SH_ARG_INT] == 0
10869 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10870 ? int_size_in_bytes (TREE_TYPE (fntype))
10871 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10872 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10873 == FIRST_RET_REG));
10875 else
10877 pcum->arg_count [(int) SH_ARG_INT] = 0;
10878 pcum->prototype_p = FALSE;
10879 if (mode != VOIDmode)
10881 pcum->call_cookie =
10882 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10883 && GET_MODE_SIZE (mode) > 4
10884 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10886 /* If the default ABI is the Renesas ABI then all library
10887 calls must assume that the library will be using the
10888 Renesas ABI. So if the function would return its result
10889 in memory then we must force the address of this memory
10890 block onto the stack. Ideally we would like to call
10891 targetm.calls.return_in_memory() here but we do not have
10892 the TYPE or the FNDECL available so we synthesize the
10893 contents of that function as best we can. */
10894 pcum->force_mem =
10895 (TARGET_DEFAULT & MASK_HITACHI)
10896 && (mode == BLKmode
10897 || (GET_MODE_SIZE (mode) > 4
10898 && !(mode == DFmode
10899 && TARGET_FPU_DOUBLE)));
10901 else
10903 pcum->call_cookie = 0;
10904 pcum->force_mem = FALSE;
10909 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10910 not enter into CONST_DOUBLE for the replace.
10912 Note that copying is not done so X must not be shared unless all copies
10913 are to be modified.
10915 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10916 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10917 replacements[n*2+1] - and that we take mode changes into account.
10919 If a replacement is ambiguous, return NULL_RTX.
10921 If MODIFY is zero, don't modify any rtl in place,
10922 just return zero or nonzero for failure / success. */
10925 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10927 int i, j;
10928 const char *fmt;
10930 /* The following prevents loops occurrence when we change MEM in
10931 CONST_DOUBLE onto the same CONST_DOUBLE. */
10932 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10933 return x;
10935 for (i = n_replacements - 1; i >= 0 ; i--)
10936 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10937 return replacements[i*2+1];
10939 /* Allow this function to make replacements in EXPR_LISTs. */
10940 if (x == 0)
10941 return 0;
10943 if (GET_CODE (x) == SUBREG)
10945 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10946 n_replacements, modify);
10948 if (GET_CODE (new) == CONST_INT)
10950 x = simplify_subreg (GET_MODE (x), new,
10951 GET_MODE (SUBREG_REG (x)),
10952 SUBREG_BYTE (x));
10953 if (! x)
10954 abort ();
10956 else if (modify)
10957 SUBREG_REG (x) = new;
10959 return x;
10961 else if (GET_CODE (x) == REG)
10963 unsigned regno = REGNO (x);
10964 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10965 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10966 rtx result = NULL_RTX;
10968 for (i = n_replacements - 1; i >= 0; i--)
10970 rtx from = replacements[i*2];
10971 rtx to = replacements[i*2+1];
10972 unsigned from_regno, from_nregs, to_regno, new_regno;
10974 if (GET_CODE (from) != REG)
10975 continue;
10976 from_regno = REGNO (from);
10977 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10978 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10979 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10981 if (regno < from_regno
10982 || regno + nregs > from_regno + nregs
10983 || GET_CODE (to) != REG
10984 || result)
10985 return NULL_RTX;
10986 to_regno = REGNO (to);
10987 if (to_regno < FIRST_PSEUDO_REGISTER)
10989 new_regno = regno + to_regno - from_regno;
10990 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10991 != nregs)
10992 return NULL_RTX;
10993 result = gen_rtx_REG (GET_MODE (x), new_regno);
10995 else if (GET_MODE (x) <= GET_MODE (to))
10996 result = gen_lowpart_common (GET_MODE (x), to);
10997 else
10998 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11001 return result ? result : x;
11003 else if (GET_CODE (x) == ZERO_EXTEND)
11005 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
11006 n_replacements, modify);
11008 if (GET_CODE (new) == CONST_INT)
11010 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11011 new, GET_MODE (XEXP (x, 0)));
11012 if (! x)
11013 abort ();
11015 else if (modify)
11016 XEXP (x, 0) = new;
11018 return x;
11021 fmt = GET_RTX_FORMAT (GET_CODE (x));
11022 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11024 rtx new;
11026 if (fmt[i] == 'e')
11028 new = replace_n_hard_rtx (XEXP (x, i), replacements,
11029 n_replacements, modify);
11030 if (!new)
11031 return NULL_RTX;
11032 if (modify)
11033 XEXP (x, i) = new;
11035 else if (fmt[i] == 'E')
11036 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11038 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11039 n_replacements, modify);
11040 if (!new)
11041 return NULL_RTX;
11042 if (modify)
11043 XVECEXP (x, i, j) = new;
11047 return x;
11051 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11053 enum rtx_code code = TRUNCATE;
11055 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11057 rtx inner = XEXP (x, 0);
11058 enum machine_mode inner_mode = GET_MODE (inner);
11060 if (inner_mode == mode)
11061 return inner;
11062 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11063 x = inner;
11064 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11065 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11067 code = GET_CODE (x);
11068 x = inner;
11071 return gen_rtx_fmt_e (code, mode, x);
11074 /* called via for_each_rtx after reload, to clean up truncates of
11075 registers that span multiple actual hard registers. */
11077 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11079 rtx x = *p, reg;
11081 if (GET_CODE (x) != TRUNCATE)
11082 return 0;
11083 reg = XEXP (x, 0);
11084 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11086 enum machine_mode reg_mode = GET_MODE (reg);
11087 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11088 subreg_lowpart_offset (DImode, reg_mode));
11089 *(int*) n_changes += 1;
11090 return -1;
11092 return 0;
11095 /* Load and store depend on the highpart of the address. However,
11096 set_attr_alternative does not give well-defined results before reload,
11097 so we must look at the rtl ourselves to see if any of the feeding
11098 registers is used in a memref. */
11100 /* Called by sh_contains_memref_p via for_each_rtx. */
11101 static int
11102 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11104 return (GET_CODE (*loc) == MEM);
11107 /* Return nonzero iff INSN contains a MEM. */
11109 sh_contains_memref_p (rtx insn)
11111 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11114 /* Return nonzero iff INSN loads a banked register. */
11116 sh_loads_bankedreg_p (rtx insn)
11118 if (GET_CODE (PATTERN (insn)) == SET)
11120 rtx op = SET_DEST (PATTERN(insn));
11121 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11122 return 1;
11125 return 0;
11128 /* FNADDR is the MEM expression from a call expander. Return an address
11129 to use in an SHmedia insn pattern. */
11131 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11133 int is_sym;
11135 fnaddr = XEXP (fnaddr, 0);
11136 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11137 if (flag_pic && is_sym)
11139 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11141 rtx reg = gen_reg_rtx (Pmode);
11143 /* We must not use GOTPLT for sibcalls, because PIC_REG
11144 must be restored before the PLT code gets to run. */
11145 if (is_sibcall)
11146 emit_insn (gen_symGOT2reg (reg, fnaddr));
11147 else
11148 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11149 fnaddr = reg;
11151 else
11153 fnaddr = gen_sym2PIC (fnaddr);
11154 PUT_MODE (fnaddr, Pmode);
11157 /* If ptabs might trap, make this visible to the rest of the compiler.
11158 We generally assume that symbols pertain to valid locations, but
11159 it is possible to generate invalid symbols with asm or linker tricks.
11160 In a list of functions where each returns its successor, an invalid
11161 symbol might denote an empty list. */
11162 if (!TARGET_PT_FIXED
11163 && (!is_sym || TARGET_INVALID_SYMBOLS)
11164 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11166 rtx tr = gen_reg_rtx (PDImode);
11168 emit_insn (gen_ptabs (tr, fnaddr));
11169 fnaddr = tr;
11171 else if (! target_reg_operand (fnaddr, Pmode))
11172 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11173 return fnaddr;
11176 enum reg_class
11177 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
11178 enum machine_mode mode, secondary_reload_info *sri)
11180 if (in_p)
11182 if (REGCLASS_HAS_FP_REG (class)
11183 && ! TARGET_SHMEDIA
11184 && immediate_operand ((x), mode)
11185 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11186 && mode == SFmode && fldi_ok ()))
11187 switch (mode)
11189 case SFmode:
11190 sri->icode = CODE_FOR_reload_insf__frn;
11191 return NO_REGS;
11192 case DFmode:
11193 sri->icode = CODE_FOR_reload_indf__frn;
11194 return NO_REGS;
11195 case SImode:
11196 /* ??? If we knew that we are in the appropriate mode -
11197 single precision - we could use a reload pattern directly. */
11198 return FPUL_REGS;
11199 default:
11200 abort ();
11202 if (class == FPUL_REGS
11203 && ((GET_CODE (x) == REG
11204 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11205 || REGNO (x) == T_REG))
11206 || GET_CODE (x) == PLUS))
11207 return GENERAL_REGS;
11208 if (class == FPUL_REGS && immediate_operand (x, mode))
11210 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11211 return GENERAL_REGS;
11212 else if (mode == SFmode)
11213 return FP_REGS;
11214 sri->icode = CODE_FOR_reload_insi__i_fpul;
11215 return NO_REGS;
11217 if (class == FPSCR_REGS
11218 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11219 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
11220 return GENERAL_REGS;
11221 if (REGCLASS_HAS_FP_REG (class)
11222 && TARGET_SHMEDIA
11223 && immediate_operand (x, mode)
11224 && x != CONST0_RTX (GET_MODE (x))
11225 && GET_MODE (x) != V4SFmode)
11226 return GENERAL_REGS;
11227 if ((mode == QImode || mode == HImode)
11228 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11230 sri->icode = ((mode == QImode)
11231 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11232 return NO_REGS;
11234 if (TARGET_SHMEDIA && class == GENERAL_REGS
11235 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
11236 return TARGET_REGS;
11237 } /* end of input-only processing. */
11239 if (((REGCLASS_HAS_FP_REG (class)
11240 && (GET_CODE (x) == REG
11241 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11242 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11243 && TARGET_FMOVD))))
11244 || (REGCLASS_HAS_GENERAL_REG (class)
11245 && GET_CODE (x) == REG
11246 && FP_REGISTER_P (REGNO (x))))
11247 && ! TARGET_SHMEDIA
11248 && (mode == SFmode || mode == SImode))
11249 return FPUL_REGS;
11250 if ((class == FPUL_REGS
11251 || (REGCLASS_HAS_FP_REG (class)
11252 && ! TARGET_SHMEDIA && mode == SImode))
11253 && (GET_CODE (x) == MEM
11254 || (GET_CODE (x) == REG
11255 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11256 || REGNO (x) == T_REG
11257 || system_reg_operand (x, VOIDmode)))))
11259 if (class == FPUL_REGS)
11260 return GENERAL_REGS;
11261 return FPUL_REGS;
11263 if ((class == TARGET_REGS
11264 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
11265 && !satisfies_constraint_Csy (x)
11266 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
11267 return GENERAL_REGS;
11268 if ((class == MAC_REGS || class == PR_REGS)
11269 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
11270 && class != REGNO_REG_CLASS (REGNO (x)))
11271 return GENERAL_REGS;
11272 if (class != GENERAL_REGS && GET_CODE (x) == REG
11273 && TARGET_REGISTER_P (REGNO (x)))
11274 return GENERAL_REGS;
11275 return NO_REGS;
11278 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11280 #include "gt-sh.h"