2008-05-30 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / config / sh / sh.c
blob3af0ee8cff12119a7fb102cc21b59bb263b70d0a
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "insn-config.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "flags.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "c-pragma.h"
41 #include "integrate.h"
42 #include "dwarf2.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
73 they may be. */
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Saved operands from the last compare to use when we generate an scc
110 or bcc insn. */
112 rtx sh_compare_op0;
113 rtx sh_compare_op1;
115 /* Provides the class number of the smallest class containing
116 reg number. */
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
168 int assembler_dialect;
170 static bool shmedia_space_reserved_for_target_registers;
172 static bool sh_handle_option (size_t, const char *, int);
173 static void split_branches (rtx);
174 static int branch_dest (rtx);
175 static void force_into (rtx, rtx);
176 static void print_slot (rtx);
177 static rtx add_constant (rtx, enum machine_mode, rtx);
178 static void dump_table (rtx, rtx);
179 static int hi_const (rtx);
180 static int broken_move (rtx);
181 static int mova_p (rtx);
182 static rtx find_barrier (int, rtx, rtx);
183 static int noncall_uses_reg (rtx, rtx, rtx *);
184 static rtx gen_block_redirect (rtx, int, int);
185 static void sh_reorg (void);
186 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
187 static rtx frame_insn (rtx);
188 static rtx push (int);
189 static void pop (int);
190 static void push_regs (HARD_REG_SET *, int);
191 static int calc_live_regs (HARD_REG_SET *);
192 static HOST_WIDE_INT rounded_frame_size (int);
193 static rtx mark_constant_pool_use (rtx);
194 const struct attribute_spec sh_attribute_table[];
195 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
196 static tree sh_handle_resbank_handler_attribute (tree *, tree,
197 tree, int, bool *);
198 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
199 tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static int find_r0_life_regions (basic_block);
213 static void sh_md_init_global (FILE *, int, int);
214 static void sh_md_finish_global (FILE *, int);
215 static int rank_for_reorder (const void *, const void *);
216 static void swap_reorder (rtx *, int);
217 static void ready_reorder (rtx *, int);
218 static short high_pressure (enum machine_mode);
219 static int sh_reorder (FILE *, int, rtx *, int *, int);
220 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
221 static void sh_md_init (FILE *, int, int);
222 static int sh_variable_issue (FILE *, int, rtx, int);
224 static bool sh_function_ok_for_sibcall (tree, tree);
226 static bool sh_cannot_modify_jumps_p (void);
227 static int sh_target_reg_class (void);
228 static bool sh_optimize_target_register_callee_saved (bool);
229 static bool sh_ms_bitfield_layout_p (const_tree);
231 static void sh_init_builtins (void);
232 static void sh_media_init_builtins (void);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *);
245 static int sh_address_cost (rtx);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
249 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
250 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
251 static int scavenge_reg (HARD_REG_SET *s);
252 struct save_schedule_s;
253 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
254 struct save_schedule_s *, int);
256 static rtx sh_struct_value_rtx (tree, int);
257 static bool sh_return_in_memory (const_tree, const_tree);
258 static rtx sh_builtin_saveregs (void);
259 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
260 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
261 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
262 static tree sh_build_builtin_va_list (void);
263 static void sh_va_start (tree, rtx);
264 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
266 const_tree, bool);
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
268 const_tree, bool);
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
270 tree, bool);
271 static bool sh_scalar_mode_supported_p (enum machine_mode);
272 static int sh_dwarf_calling_convention (const_tree);
273 static void sh_encode_section_info (tree, rtx, int);
274 static int sh2a_function_vector_p (tree);
277 /* Initialize the GCC target structure. */
278 #undef TARGET_ATTRIBUTE_TABLE
279 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
281 /* The next two are used for debug info when compiling with -gdwarf. */
282 #undef TARGET_ASM_UNALIGNED_HI_OP
283 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
284 #undef TARGET_ASM_UNALIGNED_SI_OP
285 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
287 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
288 #undef TARGET_ASM_UNALIGNED_DI_OP
289 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
290 #undef TARGET_ASM_ALIGNED_DI_OP
291 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
293 #undef TARGET_ASM_FUNCTION_EPILOGUE
294 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
296 #undef TARGET_ASM_OUTPUT_MI_THUNK
297 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
302 #undef TARGET_ASM_FILE_START
303 #define TARGET_ASM_FILE_START sh_file_start
304 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
305 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
307 #undef TARGET_DEFAULT_TARGET_FLAGS
308 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
309 #undef TARGET_HANDLE_OPTION
310 #define TARGET_HANDLE_OPTION sh_handle_option
312 #undef TARGET_INSERT_ATTRIBUTES
313 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
318 #undef TARGET_SCHED_ISSUE_RATE
319 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
321 /* The next 5 hooks have been implemented for reenabling sched1. With the
322 help of these macros we are limiting the movement of insns in sched1 to
323 reduce the register pressure. The overall idea is to keep count of SImode
324 and SFmode regs required by already scheduled insns. When these counts
325 cross some threshold values; give priority to insns that free registers.
326 The insn that frees registers is most likely to be the insn with lowest
327 LUID (original insn order); but such an insn might be there in the stalled
328 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
329 upto a max of 8 cycles so that such insns may move from Q -> R.
331 The description of the hooks are as below:
333 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
334 scheduler; it is called inside the sched_init function just after
335 find_insn_reg_weights function call. It is used to calculate the SImode
336 and SFmode weights of insns of basic blocks; much similar to what
337 find_insn_reg_weights does.
338 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
340 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
341 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
342 (Q)->(R).
344 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
345 high; reorder the ready queue so that the insn with lowest LUID will be
346 issued next.
348 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
349 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
351 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
352 can be returned from TARGET_SCHED_REORDER2.
354 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
356 #undef TARGET_SCHED_DFA_NEW_CYCLE
357 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
359 #undef TARGET_SCHED_INIT_GLOBAL
360 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
362 #undef TARGET_SCHED_FINISH_GLOBAL
363 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
365 #undef TARGET_SCHED_VARIABLE_ISSUE
366 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
368 #undef TARGET_SCHED_REORDER
369 #define TARGET_SCHED_REORDER sh_reorder
371 #undef TARGET_SCHED_REORDER2
372 #define TARGET_SCHED_REORDER2 sh_reorder2
374 #undef TARGET_SCHED_INIT
375 #define TARGET_SCHED_INIT sh_md_init
377 #undef TARGET_CANNOT_MODIFY_JUMPS_P
378 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
379 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
380 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
381 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
382 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
383 sh_optimize_target_register_callee_saved
385 #undef TARGET_MS_BITFIELD_LAYOUT_P
386 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
388 #undef TARGET_INIT_BUILTINS
389 #define TARGET_INIT_BUILTINS sh_init_builtins
390 #undef TARGET_EXPAND_BUILTIN
391 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
393 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
394 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
396 #undef TARGET_CANNOT_COPY_INSN_P
397 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
398 #undef TARGET_RTX_COSTS
399 #define TARGET_RTX_COSTS sh_rtx_costs
400 #undef TARGET_ADDRESS_COST
401 #define TARGET_ADDRESS_COST sh_address_cost
402 #undef TARGET_ALLOCATE_INITIAL_VALUE
403 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
405 #undef TARGET_MACHINE_DEPENDENT_REORG
406 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
408 #ifdef HAVE_AS_TLS
409 #undef TARGET_HAVE_TLS
410 #define TARGET_HAVE_TLS true
411 #endif
413 #undef TARGET_PROMOTE_PROTOTYPES
414 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_ARGS
416 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
417 #undef TARGET_PROMOTE_FUNCTION_RETURN
418 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
420 #undef TARGET_STRUCT_VALUE_RTX
421 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
422 #undef TARGET_RETURN_IN_MEMORY
423 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
425 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
426 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
427 #undef TARGET_SETUP_INCOMING_VARARGS
428 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
429 #undef TARGET_STRICT_ARGUMENT_NAMING
430 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
431 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
432 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
433 #undef TARGET_MUST_PASS_IN_STACK
434 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
435 #undef TARGET_PASS_BY_REFERENCE
436 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
437 #undef TARGET_CALLEE_COPIES
438 #define TARGET_CALLEE_COPIES sh_callee_copies
439 #undef TARGET_ARG_PARTIAL_BYTES
440 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_EXPAND_BUILTIN_VA_START
445 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
446 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
447 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
449 #undef TARGET_SCALAR_MODE_SUPPORTED_P
450 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
451 #undef TARGET_VECTOR_MODE_SUPPORTED_P
452 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
454 #undef TARGET_CHECK_PCH_TARGET_FLAGS
455 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
457 #undef TARGET_DWARF_CALLING_CONVENTION
458 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
460 /* Return regmode weight for insn. */
461 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
463 /* Return current register pressure for regmode. */
464 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
466 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
469 #ifdef SYMBIAN
471 #undef TARGET_ENCODE_SECTION_INFO
472 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
475 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
476 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
478 #endif /* SYMBIAN */
480 #undef TARGET_SECONDARY_RELOAD
481 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
483 /* Machine-specific symbol_ref flags. */
484 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
486 struct gcc_target targetm = TARGET_INITIALIZER;
488 /* Implement TARGET_HANDLE_OPTION. */
490 static bool
491 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
492 int value ATTRIBUTE_UNUSED)
494 switch (code)
496 case OPT_m1:
497 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
498 return true;
500 case OPT_m2:
501 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
502 return true;
504 case OPT_m2a:
505 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
506 return true;
508 case OPT_m2a_nofpu:
509 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
510 return true;
512 case OPT_m2a_single:
513 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
514 return true;
516 case OPT_m2a_single_only:
517 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
518 return true;
520 case OPT_m2e:
521 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
522 return true;
524 case OPT_m3:
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
526 return true;
528 case OPT_m3e:
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
530 return true;
532 case OPT_m4:
533 case OPT_m4_100:
534 case OPT_m4_200:
535 case OPT_m4_300:
536 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
537 return true;
539 case OPT_m4_nofpu:
540 case OPT_m4_100_nofpu:
541 case OPT_m4_200_nofpu:
542 case OPT_m4_300_nofpu:
543 case OPT_m4_340:
544 case OPT_m4_400:
545 case OPT_m4_500:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
547 return true;
549 case OPT_m4_single:
550 case OPT_m4_100_single:
551 case OPT_m4_200_single:
552 case OPT_m4_300_single:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
554 return true;
556 case OPT_m4_single_only:
557 case OPT_m4_100_single_only:
558 case OPT_m4_200_single_only:
559 case OPT_m4_300_single_only:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
561 return true;
563 case OPT_m4a:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
565 return true;
567 case OPT_m4a_nofpu:
568 case OPT_m4al:
569 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
570 return true;
572 case OPT_m4a_single:
573 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
574 return true;
576 case OPT_m4a_single_only:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
578 return true;
580 case OPT_m5_32media:
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
582 return true;
584 case OPT_m5_32media_nofpu:
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
586 return true;
588 case OPT_m5_64media:
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
590 return true;
592 case OPT_m5_64media_nofpu:
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
594 return true;
596 case OPT_m5_compact:
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
598 return true;
600 case OPT_m5_compact_nofpu:
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
602 return true;
604 default:
605 return true;
609 /* Print the operand address in x to the stream. */
611 void
612 print_operand_address (FILE *stream, rtx x)
614 switch (GET_CODE (x))
616 case REG:
617 case SUBREG:
618 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
619 break;
621 case PLUS:
623 rtx base = XEXP (x, 0);
624 rtx index = XEXP (x, 1);
626 switch (GET_CODE (index))
628 case CONST_INT:
629 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
630 reg_names[true_regnum (base)]);
631 break;
633 case REG:
634 case SUBREG:
636 int base_num = true_regnum (base);
637 int index_num = true_regnum (index);
639 fprintf (stream, "@(r0,%s)",
640 reg_names[MAX (base_num, index_num)]);
641 break;
644 default:
645 gcc_unreachable ();
648 break;
650 case PRE_DEC:
651 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
652 break;
654 case POST_INC:
655 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
656 break;
658 default:
659 x = mark_constant_pool_use (x);
660 output_addr_const (stream, x);
661 break;
665 /* Print operand x (an rtx) in assembler syntax to file stream
666 according to modifier code.
668 '.' print a .s if insn needs delay slot
669 ',' print LOCAL_LABEL_PREFIX
670 '@' print trap, rte or rts depending upon pragma interruptness
671 '#' output a nop if there is nothing to put in the delay slot
672 ''' print likelihood suffix (/u for unlikely).
673 '>' print branch target if -fverbose-asm
674 'O' print a constant without the #
675 'R' print the LSW of a dp value - changes if in little endian
676 'S' print the MSW of a dp value - changes if in little endian
677 'T' print the next word of a dp value - same as 'R' in big endian mode.
678 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
679 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
680 'N' print 'r63' if the operand is (const_int 0).
681 'd' print a V2SF reg as dN instead of fpN.
682 'm' print a pair `base,offset' or `base,index', for LD and ST.
683 'U' Likewise for {LD,ST}{HI,LO}.
684 'V' print the position of a single bit set.
685 'W' print the position of a single bit cleared.
686 't' print a memory address which is a register.
687 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
688 'o' output an operator. */
690 void
691 print_operand (FILE *stream, rtx x, int code)
693 int regno;
694 enum machine_mode mode;
696 switch (code)
698 tree trapa_attr;
700 case '.':
701 if (final_sequence
702 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
703 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
704 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
705 break;
706 case ',':
707 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
708 break;
709 case '@':
710 trapa_attr = lookup_attribute ("trap_exit",
711 DECL_ATTRIBUTES (current_function_decl));
712 if (trapa_attr)
713 fprintf (stream, "trapa #%ld",
714 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
715 else if (sh_cfun_interrupt_handler_p ())
717 if (sh_cfun_resbank_handler_p ())
718 fprintf (stream, "resbank\n");
719 fprintf (stream, "rte");
721 else
722 fprintf (stream, "rts");
723 break;
724 case '#':
725 /* Output a nop if there's nothing in the delay slot. */
726 if (dbr_sequence_length () == 0)
727 fprintf (stream, "\n\tnop");
728 break;
729 case '\'':
731 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
733 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
734 fputs ("/u", stream);
735 break;
737 case '>':
738 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
740 fputs ("\t! target: ", stream);
741 output_addr_const (stream, JUMP_LABEL (current_output_insn));
743 break;
744 case 'O':
745 x = mark_constant_pool_use (x);
746 output_addr_const (stream, x);
747 break;
748 /* N.B.: %R / %S / %T adjust memory addresses by four.
749 For SHMEDIA, that means they can be used to access the first and
750 second 32 bit part of a 64 bit (or larger) value that
751 might be held in floating point registers or memory.
752 While they can be used to access 64 bit parts of a larger value
753 held in general purpose registers, that won't work with memory -
754 neither for fp registers, since the frxx names are used. */
755 case 'R':
756 if (REG_P (x) || GET_CODE (x) == SUBREG)
758 regno = true_regnum (x);
759 regno += FP_REGISTER_P (regno) ? 1 : LSW;
760 fputs (reg_names[regno], (stream));
762 else if (MEM_P (x))
764 x = adjust_address (x, SImode, 4 * LSW);
765 print_operand_address (stream, XEXP (x, 0));
767 else
769 rtx sub = NULL_RTX;
771 mode = GET_MODE (x);
772 if (mode == VOIDmode)
773 mode = DImode;
774 if (GET_MODE_SIZE (mode) >= 8)
775 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
776 if (sub)
777 print_operand (stream, sub, 0);
778 else
779 output_operand_lossage ("invalid operand to %%R");
781 break;
782 case 'S':
783 if (REG_P (x) || GET_CODE (x) == SUBREG)
785 regno = true_regnum (x);
786 regno += FP_REGISTER_P (regno) ? 0 : MSW;
787 fputs (reg_names[regno], (stream));
789 else if (MEM_P (x))
791 x = adjust_address (x, SImode, 4 * MSW);
792 print_operand_address (stream, XEXP (x, 0));
794 else
796 rtx sub = NULL_RTX;
798 mode = GET_MODE (x);
799 if (mode == VOIDmode)
800 mode = DImode;
801 if (GET_MODE_SIZE (mode) >= 8)
802 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
803 if (sub)
804 print_operand (stream, sub, 0);
805 else
806 output_operand_lossage ("invalid operand to %%S");
808 break;
809 case 'T':
810 /* Next word of a double. */
811 switch (GET_CODE (x))
813 case REG:
814 fputs (reg_names[REGNO (x) + 1], (stream));
815 break;
816 case MEM:
817 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
818 && GET_CODE (XEXP (x, 0)) != POST_INC)
819 x = adjust_address (x, SImode, 4);
820 print_operand_address (stream, XEXP (x, 0));
821 break;
822 default:
823 break;
825 break;
827 case 't':
828 gcc_assert (GET_CODE (x) == MEM);
829 x = XEXP (x, 0);
830 switch (GET_CODE (x))
832 case REG:
833 case SUBREG:
834 print_operand (stream, x, 0);
835 break;
836 default:
837 break;
839 break;
841 case 'o':
842 switch (GET_CODE (x))
844 case PLUS: fputs ("add", stream); break;
845 case MINUS: fputs ("sub", stream); break;
846 case MULT: fputs ("mul", stream); break;
847 case DIV: fputs ("div", stream); break;
848 case EQ: fputs ("eq", stream); break;
849 case NE: fputs ("ne", stream); break;
850 case GT: case LT: fputs ("gt", stream); break;
851 case GE: case LE: fputs ("ge", stream); break;
852 case GTU: case LTU: fputs ("gtu", stream); break;
853 case GEU: case LEU: fputs ("geu", stream); break;
854 default:
855 break;
857 break;
858 case 'M':
859 if (TARGET_SHMEDIA)
861 if (GET_CODE (x) == MEM
862 && GET_CODE (XEXP (x, 0)) == PLUS
863 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
864 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
865 fputc ('x', stream);
867 else
869 if (GET_CODE (x) == MEM)
871 switch (GET_MODE (x))
873 case QImode: fputs (".b", stream); break;
874 case HImode: fputs (".w", stream); break;
875 case SImode: fputs (".l", stream); break;
876 case SFmode: fputs (".s", stream); break;
877 case DFmode: fputs (".d", stream); break;
878 default: gcc_unreachable ();
882 break;
884 case 'm':
885 gcc_assert (GET_CODE (x) == MEM);
886 x = XEXP (x, 0);
887 /* Fall through. */
888 case 'U':
889 switch (GET_CODE (x))
891 case REG:
892 case SUBREG:
893 print_operand (stream, x, 0);
894 fputs (", 0", stream);
895 break;
897 case PLUS:
898 print_operand (stream, XEXP (x, 0), 0);
899 fputs (", ", stream);
900 print_operand (stream, XEXP (x, 1), 0);
901 break;
903 default:
904 gcc_unreachable ();
906 break;
908 case 'V':
910 int num = exact_log2 (INTVAL (x));
911 gcc_assert (num >= 0);
912 fprintf (stream, "#%d", num);
914 break;
916 case 'W':
918 int num = exact_log2 (~INTVAL (x));
919 gcc_assert (num >= 0);
920 fprintf (stream, "#%d", num);
922 break;
924 case 'd':
925 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
927 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
928 break;
930 case 'N':
931 if (x == CONST0_RTX (GET_MODE (x)))
933 fprintf ((stream), "r63");
934 break;
936 goto default_output;
937 case 'u':
938 if (GET_CODE (x) == CONST_INT)
940 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
941 break;
943 /* Fall through. */
945 default_output:
946 default:
947 regno = 0;
948 mode = GET_MODE (x);
950 switch (GET_CODE (x))
952 case TRUNCATE:
954 rtx inner = XEXP (x, 0);
955 int offset = 0;
956 enum machine_mode inner_mode;
958 /* We might see SUBREGs with vector mode registers inside. */
959 if (GET_CODE (inner) == SUBREG
960 && (GET_MODE_SIZE (GET_MODE (inner))
961 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
962 && subreg_lowpart_p (inner))
963 inner = SUBREG_REG (inner);
964 if (GET_CODE (inner) == CONST_INT)
966 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
967 goto default_output;
969 inner_mode = GET_MODE (inner);
970 if (GET_CODE (inner) == SUBREG
971 && (GET_MODE_SIZE (GET_MODE (inner))
972 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
973 && GET_CODE (SUBREG_REG (inner)) == REG)
975 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
976 GET_MODE (SUBREG_REG (inner)),
977 SUBREG_BYTE (inner),
978 GET_MODE (inner));
979 inner = SUBREG_REG (inner);
981 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
982 abort ();
983 /* Floating point register pairs are always big endian;
984 general purpose registers are 64 bit wide. */
985 regno = REGNO (inner);
986 regno = (HARD_REGNO_NREGS (regno, inner_mode)
987 - HARD_REGNO_NREGS (regno, mode))
988 + offset;
989 x = inner;
990 goto reg;
992 case SIGN_EXTEND:
993 x = XEXP (x, 0);
994 goto reg;
995 /* FIXME: We need this on SHmedia32 because reload generates
996 some sign-extended HI or QI loads into DImode registers
997 but, because Pmode is SImode, the address ends up with a
998 subreg:SI of the DImode register. Maybe reload should be
999 fixed so as to apply alter_subreg to such loads? */
1000 case IF_THEN_ELSE:
1001 gcc_assert (trapping_target_operand (x, VOIDmode));
1002 x = XEXP (XEXP (x, 2), 0);
1003 goto default_output;
1004 case SUBREG:
1005 gcc_assert (SUBREG_BYTE (x) == 0
1006 && GET_CODE (SUBREG_REG (x)) == REG);
1008 x = SUBREG_REG (x);
1009 /* Fall through. */
1011 reg:
1012 case REG:
1013 regno += REGNO (x);
1014 if (FP_REGISTER_P (regno)
1015 && mode == V16SFmode)
1016 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1017 else if (FP_REGISTER_P (REGNO (x))
1018 && mode == V4SFmode)
1019 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1020 else if (GET_CODE (x) == REG
1021 && mode == V2SFmode)
1022 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1023 else if (FP_REGISTER_P (REGNO (x))
1024 && GET_MODE_SIZE (mode) > 4)
1025 fprintf ((stream), "d%s", reg_names[regno] + 1);
1026 else
1027 fputs (reg_names[regno], (stream));
1028 break;
1030 case MEM:
1031 output_address (XEXP (x, 0));
1032 break;
1034 case CONST:
1035 if (TARGET_SHMEDIA
1036 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
1037 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
1038 && (GET_MODE (XEXP (x, 0)) == DImode
1039 || GET_MODE (XEXP (x, 0)) == SImode)
1040 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
1041 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
1043 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
1044 rtx val2 = val;
1045 bool nested_expr = false;
1047 fputc ('(', stream);
1048 if (GET_CODE (val) == ASHIFTRT)
1050 fputc ('(', stream);
1051 val2 = XEXP (val, 0);
1053 if (GET_CODE (val2) == CONST
1054 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
1056 fputc ('(', stream);
1057 nested_expr = true;
1059 output_addr_const (stream, val2);
1060 if (nested_expr)
1061 fputc (')', stream);
1062 if (GET_CODE (val) == ASHIFTRT)
1064 fputs (" >> ", stream);
1065 output_addr_const (stream, XEXP (val, 1));
1066 fputc (')', stream);
1068 fputs (" & 65535)", stream);
1069 break;
1072 /* Fall through. */
1073 default:
1074 if (TARGET_SH1)
1075 fputc ('#', stream);
1076 output_addr_const (stream, x);
1077 break;
1079 break;
1084 /* Encode symbol attributes of a SYMBOL_REF into its
1085 SYMBOL_REF_FLAGS. */
1086 static void
1087 sh_encode_section_info (tree decl, rtx rtl, int first)
1089 default_encode_section_info (decl, rtl, first);
1091 if (TREE_CODE (decl) == FUNCTION_DECL
1092 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1093 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1096 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1097 static void
1098 force_into (rtx value, rtx target)
1100 value = force_operand (value, target);
1101 if (! rtx_equal_p (value, target))
1102 emit_insn (gen_move_insn (target, value));
1105 /* Emit code to perform a block move. Choose the best method.
1107 OPERANDS[0] is the destination.
1108 OPERANDS[1] is the source.
1109 OPERANDS[2] is the size.
1110 OPERANDS[3] is the alignment safe to use. */
1113 expand_block_move (rtx *operands)
1115 int align = INTVAL (operands[3]);
1116 int constp = (GET_CODE (operands[2]) == CONST_INT);
1117 int bytes = (constp ? INTVAL (operands[2]) : 0);
1119 if (! constp)
1120 return 0;
1122 /* If we could use mov.l to move words and dest is word-aligned, we
1123 can use movua.l for loads and still generate a relatively short
1124 and efficient sequence. */
1125 if (TARGET_SH4A_ARCH && align < 4
1126 && MEM_ALIGN (operands[0]) >= 32
1127 && can_move_by_pieces (bytes, 32))
1129 rtx dest = copy_rtx (operands[0]);
1130 rtx src = copy_rtx (operands[1]);
1131 /* We could use different pseudos for each copied word, but
1132 since movua can only load into r0, it's kind of
1133 pointless. */
1134 rtx temp = gen_reg_rtx (SImode);
1135 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1136 int copied = 0;
1138 while (copied + 4 <= bytes)
1140 rtx to = adjust_address (dest, SImode, copied);
1141 rtx from = adjust_automodify_address (src, BLKmode,
1142 src_addr, copied);
1144 set_mem_size (from, GEN_INT (4));
1145 emit_insn (gen_movua (temp, from));
1146 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1147 emit_move_insn (to, temp);
1148 copied += 4;
1151 if (copied < bytes)
1152 move_by_pieces (adjust_address (dest, BLKmode, copied),
1153 adjust_automodify_address (src, BLKmode,
1154 src_addr, copied),
1155 bytes - copied, align, 0);
1157 return 1;
1160 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1161 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1162 if (align < 4 || (bytes % 4 != 0))
1163 return 0;
1165 if (TARGET_HARD_SH4)
1167 if (bytes < 12)
1168 return 0;
1169 else if (bytes == 12)
1171 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1172 rtx r4 = gen_rtx_REG (SImode, 4);
1173 rtx r5 = gen_rtx_REG (SImode, 5);
1175 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1176 force_into (XEXP (operands[0], 0), r4);
1177 force_into (XEXP (operands[1], 0), r5);
1178 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1179 return 1;
1181 else if (! TARGET_SMALLCODE)
1183 const char *entry_name;
1184 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1185 int dwords;
1186 rtx r4 = gen_rtx_REG (SImode, 4);
1187 rtx r5 = gen_rtx_REG (SImode, 5);
1188 rtx r6 = gen_rtx_REG (SImode, 6);
1190 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1191 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1192 force_into (XEXP (operands[0], 0), r4);
1193 force_into (XEXP (operands[1], 0), r5);
1195 dwords = bytes >> 3;
1196 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1197 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1198 return 1;
1200 else
1201 return 0;
1203 if (bytes < 64)
1205 char entry[30];
1206 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1207 rtx r4 = gen_rtx_REG (SImode, 4);
1208 rtx r5 = gen_rtx_REG (SImode, 5);
1210 sprintf (entry, "__movmemSI%d", bytes);
1211 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1212 force_into (XEXP (operands[0], 0), r4);
1213 force_into (XEXP (operands[1], 0), r5);
1214 emit_insn (gen_block_move_real (func_addr_rtx));
1215 return 1;
1218 /* This is the same number of bytes as a memcpy call, but to a different
1219 less common function name, so this will occasionally use more space. */
1220 if (! TARGET_SMALLCODE)
1222 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1223 int final_switch, while_loop;
1224 rtx r4 = gen_rtx_REG (SImode, 4);
1225 rtx r5 = gen_rtx_REG (SImode, 5);
1226 rtx r6 = gen_rtx_REG (SImode, 6);
1228 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1229 force_into (XEXP (operands[0], 0), r4);
1230 force_into (XEXP (operands[1], 0), r5);
1232 /* r6 controls the size of the move. 16 is decremented from it
1233 for each 64 bytes moved. Then the negative bit left over is used
1234 as an index into a list of move instructions. e.g., a 72 byte move
1235 would be set up with size(r6) = 14, for one iteration through the
1236 big while loop, and a switch of -2 for the last part. */
1238 final_switch = 16 - ((bytes / 4) % 16);
1239 while_loop = ((bytes / 4) / 16 - 1) * 16;
1240 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1241 emit_insn (gen_block_lump_real (func_addr_rtx));
1242 return 1;
1245 return 0;
1248 /* Prepare operands for a move define_expand; specifically, one of the
1249 operands must be in a register. */
1252 prepare_move_operands (rtx operands[], enum machine_mode mode)
1254 if ((mode == SImode || mode == DImode)
1255 && flag_pic
1256 && ! ((mode == Pmode || mode == ptr_mode)
1257 && tls_symbolic_operand (operands[1], Pmode) != 0))
1259 rtx temp;
1260 if (SYMBOLIC_CONST_P (operands[1]))
1262 if (GET_CODE (operands[0]) == MEM)
1263 operands[1] = force_reg (Pmode, operands[1]);
1264 else if (TARGET_SHMEDIA
1265 && GET_CODE (operands[1]) == LABEL_REF
1266 && target_reg_operand (operands[0], mode))
1267 /* It's ok. */;
1268 else
1270 temp = (!can_create_pseudo_p ()
1271 ? operands[0]
1272 : gen_reg_rtx (Pmode));
1273 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1276 else if (GET_CODE (operands[1]) == CONST
1277 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1278 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1280 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1281 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1282 mode, temp);
1283 operands[1] = expand_binop (mode, add_optab, temp,
1284 XEXP (XEXP (operands[1], 0), 1),
1285 (!can_create_pseudo_p ()
1286 ? temp
1287 : gen_reg_rtx (Pmode)),
1288 0, OPTAB_LIB_WIDEN);
1292 if (! reload_in_progress && ! reload_completed)
1294 /* Copy the source to a register if both operands aren't registers. */
1295 if (! register_operand (operands[0], mode)
1296 && ! sh_register_operand (operands[1], mode))
1297 operands[1] = copy_to_mode_reg (mode, operands[1]);
1299 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1301 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1302 except that we can't use that function because it is static. */
1303 rtx new = change_address (operands[0], mode, 0);
1304 MEM_COPY_ATTRIBUTES (new, operands[0]);
1305 operands[0] = new;
1308 /* This case can happen while generating code to move the result
1309 of a library call to the target. Reject `st r0,@(rX,rY)' because
1310 reload will fail to find a spill register for rX, since r0 is already
1311 being used for the source. */
1312 else if (TARGET_SH1
1313 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1314 && GET_CODE (operands[0]) == MEM
1315 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1316 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1317 operands[1] = copy_to_mode_reg (mode, operands[1]);
1320 if (mode == Pmode || mode == ptr_mode)
1322 rtx op0, op1, opc;
1323 enum tls_model tls_kind;
1325 op0 = operands[0];
1326 op1 = operands[1];
1327 if (GET_CODE (op1) == CONST
1328 && GET_CODE (XEXP (op1, 0)) == PLUS
1329 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1331 opc = XEXP (XEXP (op1, 0), 1);
1332 op1 = XEXP (XEXP (op1, 0), 0);
1334 else
1335 opc = NULL_RTX;
1337 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1339 rtx tga_op1, tga_ret, tmp, tmp2;
1341 switch (tls_kind)
1343 case TLS_MODEL_GLOBAL_DYNAMIC:
1344 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1345 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1346 op1 = tga_ret;
1347 break;
1349 case TLS_MODEL_LOCAL_DYNAMIC:
1350 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1351 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1353 tmp = gen_reg_rtx (Pmode);
1354 emit_move_insn (tmp, tga_ret);
1356 if (register_operand (op0, Pmode))
1357 tmp2 = op0;
1358 else
1359 tmp2 = gen_reg_rtx (Pmode);
1361 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1362 op1 = tmp2;
1363 break;
1365 case TLS_MODEL_INITIAL_EXEC:
1366 if (! flag_pic)
1368 /* Don't schedule insns for getting GOT address when
1369 the first scheduling is enabled, to avoid spill
1370 failures for R0. */
1371 if (flag_schedule_insns)
1372 emit_insn (gen_blockage ());
1373 emit_insn (gen_GOTaddr2picreg ());
1374 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1375 PIC_REG)));
1376 if (flag_schedule_insns)
1377 emit_insn (gen_blockage ());
1379 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1380 tmp = gen_sym2GOTTPOFF (op1);
1381 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1382 op1 = tga_op1;
1383 break;
1385 case TLS_MODEL_LOCAL_EXEC:
1386 tmp2 = gen_reg_rtx (Pmode);
1387 emit_insn (gen_load_gbr (tmp2));
1388 tmp = gen_reg_rtx (Pmode);
1389 emit_insn (gen_symTPOFF2reg (tmp, op1));
1391 if (register_operand (op0, Pmode))
1392 op1 = op0;
1393 else
1394 op1 = gen_reg_rtx (Pmode);
1396 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1397 break;
1399 default:
1400 gcc_unreachable ();
1402 if (opc)
1403 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1404 operands[1] = op1;
1408 return 0;
1411 enum rtx_code
1412 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1413 enum rtx_code comparison)
1415 rtx op1;
1416 rtx scratch = NULL_RTX;
1418 if (comparison == CODE_FOR_nothing)
1419 comparison = GET_CODE (operands[0]);
1420 else
1421 scratch = operands[4];
1422 if (GET_CODE (operands[1]) == CONST_INT
1423 && GET_CODE (operands[2]) != CONST_INT)
1425 rtx tmp = operands[1];
1427 operands[1] = operands[2];
1428 operands[2] = tmp;
1429 comparison = swap_condition (comparison);
1431 if (GET_CODE (operands[2]) == CONST_INT)
1433 HOST_WIDE_INT val = INTVAL (operands[2]);
1434 if ((val == -1 || val == -0x81)
1435 && (comparison == GT || comparison == LE))
1437 comparison = (comparison == GT) ? GE : LT;
1438 operands[2] = gen_int_mode (val + 1, mode);
1440 else if ((val == 1 || val == 0x80)
1441 && (comparison == GE || comparison == LT))
1443 comparison = (comparison == GE) ? GT : LE;
1444 operands[2] = gen_int_mode (val - 1, mode);
1446 else if (val == 1 && (comparison == GEU || comparison == LTU))
1448 comparison = (comparison == GEU) ? NE : EQ;
1449 operands[2] = CONST0_RTX (mode);
1451 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1453 comparison = (comparison == GEU) ? GTU : LEU;
1454 operands[2] = gen_int_mode (val - 1, mode);
1456 else if (val == 0 && (comparison == GTU || comparison == LEU))
1457 comparison = (comparison == GTU) ? NE : EQ;
1458 else if (mode == SImode
1459 && ((val == 0x7fffffff
1460 && (comparison == GTU || comparison == LEU))
1461 || ((unsigned HOST_WIDE_INT) val
1462 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1463 && (comparison == GEU || comparison == LTU))))
1465 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1466 operands[2] = CONST0_RTX (mode);
1469 op1 = operands[1];
1470 if (can_create_pseudo_p ())
1471 operands[1] = force_reg (mode, op1);
1472 /* When we are handling DImode comparisons, we want to keep constants so
1473 that we can optimize the component comparisons; however, memory loads
1474 are better issued as a whole so that they can be scheduled well.
1475 SImode equality comparisons allow I08 constants, but only when they
1476 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1477 into a register, that register might as well be r0, and we allow the
1478 constant. If it is already in a register, this is likely to be
1479 allocated to a different hard register, thus we load the constant into
1480 a register unless it is zero. */
1481 if (!REG_P (operands[2])
1482 && (GET_CODE (operands[2]) != CONST_INT
1483 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1484 && ((comparison != EQ && comparison != NE)
1485 || (REG_P (op1) && REGNO (op1) != R0_REG)
1486 || !satisfies_constraint_I08 (operands[2])))))
1488 if (scratch && GET_MODE (scratch) == mode)
1490 emit_move_insn (scratch, operands[2]);
1491 operands[2] = scratch;
1493 else if (can_create_pseudo_p ())
1494 operands[2] = force_reg (mode, operands[2]);
1496 return comparison;
1499 void
1500 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1502 rtx (*branch_expander) (rtx) = gen_branch_true;
1503 rtx jump;
1505 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1506 switch (comparison)
1508 case NE: case LT: case LE: case LTU: case LEU:
1509 comparison = reverse_condition (comparison);
1510 branch_expander = gen_branch_false;
1511 default: ;
1513 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1514 gen_rtx_fmt_ee (comparison, SImode,
1515 operands[1], operands[2])));
1516 jump = emit_jump_insn (branch_expander (operands[3]));
1517 if (probability >= 0)
1518 REG_NOTES (jump)
1519 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1520 REG_NOTES (jump));
1524 /* ??? How should we distribute probabilities when more than one branch
1525 is generated. So far we only have soem ad-hoc observations:
1526 - If the operands are random, they are likely to differ in both parts.
1527 - If comparing items in a hash chain, the operands are random or equal;
1528 operation should be EQ or NE.
1529 - If items are searched in an ordered tree from the root, we can expect
1530 the highpart to be unequal about half of the time; operation should be
1531 an inequality comparison, operands non-constant, and overall probability
1532 about 50%. Likewise for quicksort.
1533 - Range checks will be often made against constants. Even if we assume for
1534 simplicity an even distribution of the non-constant operand over a
1535 sub-range here, the same probability could be generated with differently
1536 wide sub-ranges - as long as the ratio of the part of the subrange that
1537 is before the threshold to the part that comes after the threshold stays
1538 the same. Thus, we can't really tell anything here;
1539 assuming random distribution is at least simple.
1542 bool
1543 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1545 enum rtx_code msw_taken, msw_skip, lsw_taken;
1546 rtx skip_label = NULL_RTX;
1547 rtx op1h, op1l, op2h, op2l;
1548 int num_branches;
1549 int prob, rev_prob;
1550 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1551 rtx scratch = operands[4];
1553 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1554 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1555 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1556 op1l = gen_lowpart (SImode, operands[1]);
1557 op2l = gen_lowpart (SImode, operands[2]);
1558 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1559 prob = split_branch_probability;
1560 rev_prob = REG_BR_PROB_BASE - prob;
1561 switch (comparison)
1563 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1564 That costs 1 cycle more when the first branch can be predicted taken,
1565 but saves us mispredicts because only one branch needs prediction.
1566 It also enables generating the cmpeqdi_t-1 pattern. */
1567 case EQ:
1568 if (TARGET_CMPEQDI_T)
1570 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1571 emit_jump_insn (gen_branch_true (operands[3]));
1572 return true;
1574 msw_skip = NE;
1575 lsw_taken = EQ;
1576 if (prob >= 0)
1578 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1580 msw_skip_prob = rev_prob;
1581 if (REG_BR_PROB_BASE <= 65535)
1582 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1583 else
1585 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1586 lsw_taken_prob
1587 = (prob
1588 ? (REG_BR_PROB_BASE
1589 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1590 / ((HOST_WIDEST_INT) prob << 32)))
1591 : 0);
1594 break;
1595 case NE:
1596 if (TARGET_CMPEQDI_T)
1598 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1599 emit_jump_insn (gen_branch_false (operands[3]));
1600 return true;
1602 msw_taken = NE;
1603 msw_taken_prob = prob;
1604 lsw_taken = NE;
1605 lsw_taken_prob = 0;
1606 break;
1607 case GTU: case GT:
1608 msw_taken = comparison;
1609 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1610 break;
1611 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1612 msw_skip = swap_condition (msw_taken);
1613 lsw_taken = GTU;
1614 break;
1615 case GEU: case GE:
1616 if (op2l == CONST0_RTX (SImode))
1617 msw_taken = comparison;
1618 else
1620 msw_taken = comparison == GE ? GT : GTU;
1621 msw_skip = swap_condition (msw_taken);
1622 lsw_taken = GEU;
1624 break;
1625 case LTU: case LT:
1626 msw_taken = comparison;
1627 if (op2l == CONST0_RTX (SImode))
1628 break;
1629 msw_skip = swap_condition (msw_taken);
1630 lsw_taken = LTU;
1631 break;
1632 case LEU: case LE:
1633 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1634 msw_taken = comparison;
1635 else
1637 lsw_taken = LEU;
1638 if (comparison == LE)
1639 msw_taken = LT;
1640 else if (op2h != CONST0_RTX (SImode))
1641 msw_taken = LTU;
1642 else
1643 break;
1644 msw_skip = swap_condition (msw_taken);
1646 break;
1647 default: return false;
1649 num_branches = ((msw_taken != CODE_FOR_nothing)
1650 + (msw_skip != CODE_FOR_nothing)
1651 + (lsw_taken != CODE_FOR_nothing));
1652 if (comparison != EQ && comparison != NE && num_branches > 1)
1654 if (!CONSTANT_P (operands[2])
1655 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1656 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1658 msw_taken_prob = prob / 2U;
1659 msw_skip_prob
1660 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1661 lsw_taken_prob = prob;
1663 else
1665 msw_taken_prob = prob;
1666 msw_skip_prob = REG_BR_PROB_BASE;
1667 /* ??? If we have a constant op2h, should we use that when
1668 calculating lsw_taken_prob? */
1669 lsw_taken_prob = prob;
1672 operands[1] = op1h;
1673 operands[2] = op2h;
1674 operands[4] = NULL_RTX;
1675 if (reload_completed
1676 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1677 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1679 emit_move_insn (scratch, operands[2]);
1680 operands[2] = scratch;
1682 if (msw_taken != CODE_FOR_nothing)
1683 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1684 if (msw_skip != CODE_FOR_nothing)
1686 rtx taken_label = operands[3];
1688 /* Operands were possibly modified, but msw_skip doesn't expect this.
1689 Always use the original ones. */
1690 if (msw_taken != CODE_FOR_nothing)
1692 operands[1] = op1h;
1693 operands[2] = op2h;
1696 operands[3] = skip_label = gen_label_rtx ();
1697 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1698 operands[3] = taken_label;
1700 operands[1] = op1l;
1701 operands[2] = op2l;
1702 if (lsw_taken != CODE_FOR_nothing)
1704 if (reload_completed
1705 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1706 operands[4] = scratch;
1707 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1709 if (msw_skip != CODE_FOR_nothing)
1710 emit_label (skip_label);
1711 return true;
1714 /* Prepare the operands for an scc instruction; make sure that the
1715 compare has been done. */
1717 prepare_scc_operands (enum rtx_code code)
1719 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1720 enum rtx_code oldcode = code;
1721 enum machine_mode mode;
1723 /* First need a compare insn. */
1724 switch (code)
1726 case NE:
1727 /* It isn't possible to handle this case. */
1728 gcc_unreachable ();
1729 case LT:
1730 code = GT;
1731 break;
1732 case LE:
1733 code = GE;
1734 break;
1735 case LTU:
1736 code = GTU;
1737 break;
1738 case LEU:
1739 code = GEU;
1740 break;
1741 default:
1742 break;
1744 if (code != oldcode)
1746 rtx tmp = sh_compare_op0;
1747 sh_compare_op0 = sh_compare_op1;
1748 sh_compare_op1 = tmp;
1751 mode = GET_MODE (sh_compare_op0);
1752 if (mode == VOIDmode)
1753 mode = GET_MODE (sh_compare_op1);
1755 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1756 if ((code != EQ && code != NE
1757 && (sh_compare_op1 != const0_rtx
1758 || code == GTU || code == GEU || code == LTU || code == LEU))
1759 || (mode == DImode && sh_compare_op1 != const0_rtx)
1760 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1761 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1763 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1764 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1765 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1766 gen_rtx_SET (VOIDmode, t_reg,
1767 gen_rtx_fmt_ee (code, SImode,
1768 sh_compare_op0, sh_compare_op1)),
1769 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1770 else
1771 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1772 gen_rtx_fmt_ee (code, SImode,
1773 sh_compare_op0, sh_compare_op1)));
1775 return t_reg;
1778 /* Called from the md file, set up the operands of a compare instruction. */
1780 void
1781 from_compare (rtx *operands, int code)
1783 enum machine_mode mode = GET_MODE (sh_compare_op0);
1784 rtx insn;
1785 if (mode == VOIDmode)
1786 mode = GET_MODE (sh_compare_op1);
1787 if (code != EQ
1788 || mode == DImode
1789 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1791 /* Force args into regs, since we can't use constants here. */
1792 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1793 if (sh_compare_op1 != const0_rtx
1794 || code == GTU || code == GEU
1795 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1796 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1798 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1800 from_compare (operands, GT);
1801 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1803 else
1804 insn = gen_rtx_SET (VOIDmode,
1805 gen_rtx_REG (SImode, T_REG),
1806 gen_rtx_fmt_ee (code, SImode,
1807 sh_compare_op0, sh_compare_op1));
1808 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1810 insn = gen_rtx_PARALLEL (VOIDmode,
1811 gen_rtvec (2, insn,
1812 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1813 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1815 else
1816 emit_insn (insn);
1819 /* Functions to output assembly code. */
1821 /* Return a sequence of instructions to perform DI or DF move.
1823 Since the SH cannot move a DI or DF in one instruction, we have
1824 to take care when we see overlapping source and dest registers. */
1826 const char *
1827 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1828 enum machine_mode mode)
1830 rtx dst = operands[0];
1831 rtx src = operands[1];
1833 if (GET_CODE (dst) == MEM
1834 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1835 return "mov.l %T1,%0\n\tmov.l %1,%0";
1837 if (register_operand (dst, mode)
1838 && register_operand (src, mode))
1840 if (REGNO (src) == MACH_REG)
1841 return "sts mach,%S0\n\tsts macl,%R0";
1843 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1844 when mov.d r1,r0 do r1->r0 then r2->r1. */
1846 if (REGNO (src) + 1 == REGNO (dst))
1847 return "mov %T1,%T0\n\tmov %1,%0";
1848 else
1849 return "mov %1,%0\n\tmov %T1,%T0";
1851 else if (GET_CODE (src) == CONST_INT)
1853 if (INTVAL (src) < 0)
1854 output_asm_insn ("mov #-1,%S0", operands);
1855 else
1856 output_asm_insn ("mov #0,%S0", operands);
1858 return "mov %1,%R0";
1860 else if (GET_CODE (src) == MEM)
1862 int ptrreg = -1;
1863 int dreg = REGNO (dst);
1864 rtx inside = XEXP (src, 0);
1866 switch (GET_CODE (inside))
1868 case REG:
1869 ptrreg = REGNO (inside);
1870 break;
1872 case SUBREG:
1873 ptrreg = subreg_regno (inside);
1874 break;
1876 case PLUS:
1877 ptrreg = REGNO (XEXP (inside, 0));
1878 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1879 an offsettable address. Unfortunately, offsettable addresses use
1880 QImode to check the offset, and a QImode offsettable address
1881 requires r0 for the other operand, which is not currently
1882 supported, so we can't use the 'o' constraint.
1883 Thus we must check for and handle r0+REG addresses here.
1884 We punt for now, since this is likely very rare. */
1885 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1886 break;
1888 case LABEL_REF:
1889 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1890 case POST_INC:
1891 return "mov.l %1,%0\n\tmov.l %1,%T0";
1892 default:
1893 gcc_unreachable ();
1896 /* Work out the safe way to copy. Copy into the second half first. */
1897 if (dreg == ptrreg)
1898 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1901 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1904 /* Print an instruction which would have gone into a delay slot after
1905 another instruction, but couldn't because the other instruction expanded
1906 into a sequence where putting the slot insn at the end wouldn't work. */
1908 static void
1909 print_slot (rtx insn)
1911 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1913 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1916 const char *
1917 output_far_jump (rtx insn, rtx op)
1919 struct { rtx lab, reg, op; } this;
1920 rtx braf_base_lab = NULL_RTX;
1921 const char *jump;
1922 int far;
1923 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1924 rtx prev;
1926 this.lab = gen_label_rtx ();
1928 if (TARGET_SH2
1929 && offset >= -32764
1930 && offset - get_attr_length (insn) <= 32766)
1932 far = 0;
1933 jump = "mov.w %O0,%1; braf %1";
1935 else
1937 far = 1;
1938 if (flag_pic)
1940 if (TARGET_SH2)
1941 jump = "mov.l %O0,%1; braf %1";
1942 else
1943 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1945 else
1946 jump = "mov.l %O0,%1; jmp @%1";
1948 /* If we have a scratch register available, use it. */
1949 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1950 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1952 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1953 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1954 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1955 output_asm_insn (jump, &this.lab);
1956 if (dbr_sequence_length ())
1957 print_slot (final_sequence);
1958 else
1959 output_asm_insn ("nop", 0);
1961 else
1963 /* Output the delay slot insn first if any. */
1964 if (dbr_sequence_length ())
1965 print_slot (final_sequence);
1967 this.reg = gen_rtx_REG (SImode, 13);
1968 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1969 Fortunately, MACL is fixed and call-clobbered, and we never
1970 need its value across jumps, so save r13 in it instead of in
1971 the stack. */
1972 if (TARGET_SH5)
1973 output_asm_insn ("lds r13, macl", 0);
1974 else
1975 output_asm_insn ("mov.l r13,@-r15", 0);
1976 output_asm_insn (jump, &this.lab);
1977 if (TARGET_SH5)
1978 output_asm_insn ("sts macl, r13", 0);
1979 else
1980 output_asm_insn ("mov.l @r15+,r13", 0);
1982 if (far && flag_pic && TARGET_SH2)
1984 braf_base_lab = gen_label_rtx ();
1985 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1986 CODE_LABEL_NUMBER (braf_base_lab));
1988 if (far)
1989 output_asm_insn (".align 2", 0);
1990 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1991 this.op = op;
1992 if (far && flag_pic)
1994 if (TARGET_SH2)
1995 this.lab = braf_base_lab;
1996 output_asm_insn (".long %O2-%O0", &this.lab);
1998 else
1999 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
2000 return "";
2003 /* Local label counter, used for constants in the pool and inside
2004 pattern branches. */
2006 static int lf = 100;
2008 /* Output code for ordinary branches. */
2010 const char *
2011 output_branch (int logic, rtx insn, rtx *operands)
2013 switch (get_attr_length (insn))
2015 case 6:
2016 /* This can happen if filling the delay slot has caused a forward
2017 branch to exceed its range (we could reverse it, but only
2018 when we know we won't overextend other branches; this should
2019 best be handled by relaxation).
2020 It can also happen when other condbranches hoist delay slot insn
2021 from their destination, thus leading to code size increase.
2022 But the branch will still be in the range -4092..+4098 bytes. */
2024 if (! TARGET_RELAX)
2026 int label = lf++;
2027 /* The call to print_slot will clobber the operands. */
2028 rtx op0 = operands[0];
2030 /* If the instruction in the delay slot is annulled (true), then
2031 there is no delay slot where we can put it now. The only safe
2032 place for it is after the label. final will do that by default. */
2034 if (final_sequence
2035 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2036 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2038 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2039 ASSEMBLER_DIALECT ? "/" : ".", label);
2040 print_slot (final_sequence);
2042 else
2043 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2045 output_asm_insn ("bra\t%l0", &op0);
2046 fprintf (asm_out_file, "\tnop\n");
2047 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2049 return "";
2051 /* When relaxing, handle this like a short branch. The linker
2052 will fix it up if it still doesn't fit after relaxation. */
2053 case 2:
2054 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2056 /* These are for SH2e, in which we have to account for the
2057 extra nop because of the hardware bug in annulled branches. */
2058 case 8:
2059 if (! TARGET_RELAX)
2061 int label = lf++;
2063 gcc_assert (!final_sequence
2064 || !(INSN_ANNULLED_BRANCH_P
2065 (XVECEXP (final_sequence, 0, 0))));
2066 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2067 logic ? "f" : "t",
2068 ASSEMBLER_DIALECT ? "/" : ".", label);
2069 fprintf (asm_out_file, "\tnop\n");
2070 output_asm_insn ("bra\t%l0", operands);
2071 fprintf (asm_out_file, "\tnop\n");
2072 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2074 return "";
2076 /* When relaxing, fall through. */
2077 case 4:
2079 char buffer[10];
2081 sprintf (buffer, "b%s%ss\t%%l0",
2082 logic ? "t" : "f",
2083 ASSEMBLER_DIALECT ? "/" : ".");
2084 output_asm_insn (buffer, &operands[0]);
2085 return "nop";
2088 default:
2089 /* There should be no longer branches now - that would
2090 indicate that something has destroyed the branches set
2091 up in machine_dependent_reorg. */
2092 gcc_unreachable ();
2096 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2097 fill in operands 9 as a label to the successor insn.
2098 We try to use jump threading where possible.
2099 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2100 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2101 follow jmp and bt, if the address is in range. */
2102 const char *
2103 output_branchy_insn (enum rtx_code code, const char *template,
2104 rtx insn, rtx *operands)
2106 rtx next_insn = NEXT_INSN (insn);
2108 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2110 rtx src = SET_SRC (PATTERN (next_insn));
2111 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2113 /* Following branch not taken */
2114 operands[9] = gen_label_rtx ();
2115 emit_label_after (operands[9], next_insn);
2116 INSN_ADDRESSES_NEW (operands[9],
2117 INSN_ADDRESSES (INSN_UID (next_insn))
2118 + get_attr_length (next_insn));
2119 return template;
2121 else
2123 int offset = (branch_dest (next_insn)
2124 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2125 if (offset >= -252 && offset <= 258)
2127 if (GET_CODE (src) == IF_THEN_ELSE)
2128 /* branch_true */
2129 src = XEXP (src, 1);
2130 operands[9] = src;
2131 return template;
2135 operands[9] = gen_label_rtx ();
2136 emit_label_after (operands[9], insn);
2137 INSN_ADDRESSES_NEW (operands[9],
2138 INSN_ADDRESSES (INSN_UID (insn))
2139 + get_attr_length (insn));
2140 return template;
2143 const char *
2144 output_ieee_ccmpeq (rtx insn, rtx *operands)
2146 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2147 insn, operands);
2150 /* Output the start of the assembler file. */
2152 static void
2153 sh_file_start (void)
2155 default_file_start ();
2157 #ifdef SYMBIAN
2158 /* Declare the .directive section before it is used. */
2159 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2160 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2161 #endif
2163 if (TARGET_ELF)
2164 /* We need to show the text section with the proper
2165 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2166 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2167 will complain. We can teach GAS specifically about the
2168 default attributes for our choice of text section, but
2169 then we would have to change GAS again if/when we change
2170 the text section name. */
2171 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2172 else
2173 /* Switch to the data section so that the coffsem symbol
2174 isn't in the text section. */
2175 switch_to_section (data_section);
2177 if (TARGET_LITTLE_ENDIAN)
2178 fputs ("\t.little\n", asm_out_file);
2180 if (!TARGET_ELF)
2182 if (TARGET_SHCOMPACT)
2183 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2184 else if (TARGET_SHMEDIA)
2185 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2186 TARGET_SHMEDIA64 ? 64 : 32);
2190 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2192 static bool
2193 unspec_caller_rtx_p (rtx pat)
2195 switch (GET_CODE (pat))
2197 case CONST:
2198 return unspec_caller_rtx_p (XEXP (pat, 0));
2199 case PLUS:
2200 case MINUS:
2201 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2202 return true;
2203 return unspec_caller_rtx_p (XEXP (pat, 1));
2204 case UNSPEC:
2205 if (XINT (pat, 1) == UNSPEC_CALLER)
2206 return true;
2207 default:
2208 break;
2211 return false;
2214 /* Indicate that INSN cannot be duplicated. This is true for insn
2215 that generates a unique label. */
2217 static bool
2218 sh_cannot_copy_insn_p (rtx insn)
2220 rtx pat;
2222 if (!reload_completed || !flag_pic)
2223 return false;
2225 if (GET_CODE (insn) != INSN)
2226 return false;
2227 if (asm_noperands (insn) >= 0)
2228 return false;
2230 pat = PATTERN (insn);
2231 if (GET_CODE (pat) != SET)
2232 return false;
2233 pat = SET_SRC (pat);
2235 if (unspec_caller_rtx_p (pat))
2236 return true;
2238 return false;
2241 /* Actual number of instructions used to make a shift by N. */
2242 static const char ashiftrt_insns[] =
2243 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2245 /* Left shift and logical right shift are the same. */
2246 static const char shift_insns[] =
2247 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2249 /* Individual shift amounts needed to get the above length sequences.
2250 One bit right shifts clobber the T bit, so when possible, put one bit
2251 shifts in the middle of the sequence, so the ends are eligible for
2252 branch delay slots. */
2253 static const short shift_amounts[32][5] = {
2254 {0}, {1}, {2}, {2, 1},
2255 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2256 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2257 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2258 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2259 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2260 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2261 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2263 /* Likewise, but for shift amounts < 16, up to three highmost bits
2264 might be clobbered. This is typically used when combined with some
2265 kind of sign or zero extension. */
2267 static const char ext_shift_insns[] =
2268 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2270 static const short ext_shift_amounts[32][4] = {
2271 {0}, {1}, {2}, {2, 1},
2272 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2273 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2274 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2275 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2276 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2277 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2278 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2280 /* Assuming we have a value that has been sign-extended by at least one bit,
2281 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2282 to shift it by N without data loss, and quicker than by other means? */
2283 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2285 /* This is used in length attributes in sh.md to help compute the length
2286 of arbitrary constant shift instructions. */
2289 shift_insns_rtx (rtx insn)
2291 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2292 int shift_count = INTVAL (XEXP (set_src, 1));
2293 enum rtx_code shift_code = GET_CODE (set_src);
2295 switch (shift_code)
2297 case ASHIFTRT:
2298 return ashiftrt_insns[shift_count];
2299 case LSHIFTRT:
2300 case ASHIFT:
2301 return shift_insns[shift_count];
2302 default:
2303 gcc_unreachable ();
2307 /* Return the cost of a shift. */
2309 static inline int
2310 shiftcosts (rtx x)
2312 int value;
2314 if (TARGET_SHMEDIA)
2315 return 1;
2317 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2319 if (GET_MODE (x) == DImode
2320 && GET_CODE (XEXP (x, 1)) == CONST_INT
2321 && INTVAL (XEXP (x, 1)) == 1)
2322 return 2;
2324 /* Everything else is invalid, because there is no pattern for it. */
2325 return MAX_COST;
2327 /* If shift by a non constant, then this will be expensive. */
2328 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2329 return SH_DYNAMIC_SHIFT_COST;
2331 value = INTVAL (XEXP (x, 1));
2333 /* Otherwise, return the true cost in instructions. */
2334 if (GET_CODE (x) == ASHIFTRT)
2336 int cost = ashiftrt_insns[value];
2337 /* If SH3, then we put the constant in a reg and use shad. */
2338 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2339 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2340 return cost;
2342 else
2343 return shift_insns[value];
2346 /* Return the cost of an AND operation. */
2348 static inline int
2349 andcosts (rtx x)
2351 int i;
2353 /* Anding with a register is a single cycle and instruction. */
2354 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2355 return 1;
2357 i = INTVAL (XEXP (x, 1));
2359 if (TARGET_SHMEDIA)
2361 if (satisfies_constraint_I10 (XEXP (x, 1))
2362 || satisfies_constraint_J16 (XEXP (x, 1)))
2363 return 1;
2364 else
2365 return 1 + rtx_cost (XEXP (x, 1), AND);
2368 /* These constants are single cycle extu.[bw] instructions. */
2369 if (i == 0xff || i == 0xffff)
2370 return 1;
2371 /* Constants that can be used in an and immediate instruction in a single
2372 cycle, but this requires r0, so make it a little more expensive. */
2373 if (CONST_OK_FOR_K08 (i))
2374 return 2;
2375 /* Constants that can be loaded with a mov immediate and an and.
2376 This case is probably unnecessary. */
2377 if (CONST_OK_FOR_I08 (i))
2378 return 2;
2379 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2380 This case is probably unnecessary. */
2381 return 3;
2384 /* Return the cost of an addition or a subtraction. */
2386 static inline int
2387 addsubcosts (rtx x)
2389 /* Adding a register is a single cycle insn. */
2390 if (GET_CODE (XEXP (x, 1)) == REG
2391 || GET_CODE (XEXP (x, 1)) == SUBREG)
2392 return 1;
2394 /* Likewise for small constants. */
2395 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2396 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2397 return 1;
2399 if (TARGET_SHMEDIA)
2400 switch (GET_CODE (XEXP (x, 1)))
2402 case CONST:
2403 case LABEL_REF:
2404 case SYMBOL_REF:
2405 return TARGET_SHMEDIA64 ? 5 : 3;
2407 case CONST_INT:
2408 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2409 return 2;
2410 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2411 return 3;
2412 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2413 return 4;
2415 /* Fall through. */
2416 default:
2417 return 5;
2420 /* Any other constant requires a 2 cycle pc-relative load plus an
2421 addition. */
2422 return 3;
2425 /* Return the cost of a multiply. */
2426 static inline int
2427 multcosts (rtx x ATTRIBUTE_UNUSED)
2429 if (sh_multcost >= 0)
2430 return sh_multcost;
2431 if (TARGET_SHMEDIA)
2432 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2433 accept constants. Ideally, we would use a cost of one or two and
2434 add the cost of the operand, but disregard the latter when inside loops
2435 and loop invariant code motion is still to follow.
2436 Using a multiply first and splitting it later if it's a loss
2437 doesn't work because of different sign / zero extension semantics
2438 of multiplies vs. shifts. */
2439 return TARGET_SMALLCODE ? 2 : 3;
2441 if (TARGET_SH2)
2443 /* We have a mul insn, so we can never take more than the mul and the
2444 read of the mac reg, but count more because of the latency and extra
2445 reg usage. */
2446 if (TARGET_SMALLCODE)
2447 return 2;
2448 return 3;
2451 /* If we're aiming at small code, then just count the number of
2452 insns in a multiply call sequence. */
2453 if (TARGET_SMALLCODE)
2454 return 5;
2456 /* Otherwise count all the insns in the routine we'd be calling too. */
2457 return 20;
2460 /* Compute a (partial) cost for rtx X. Return true if the complete
2461 cost has been computed, and false if subexpressions should be
2462 scanned. In either case, *TOTAL contains the cost result. */
2464 static bool
2465 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2467 switch (code)
2469 case CONST_INT:
2470 if (TARGET_SHMEDIA)
2472 if (INTVAL (x) == 0)
2473 *total = 0;
2474 else if (outer_code == AND && and_operand ((x), DImode))
2475 *total = 0;
2476 else if ((outer_code == IOR || outer_code == XOR
2477 || outer_code == PLUS)
2478 && CONST_OK_FOR_I10 (INTVAL (x)))
2479 *total = 0;
2480 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2481 *total = COSTS_N_INSNS (outer_code != SET);
2482 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2483 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2484 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2485 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2486 else
2487 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2488 return true;
2490 if (CONST_OK_FOR_I08 (INTVAL (x)))
2491 *total = 0;
2492 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2493 && CONST_OK_FOR_K08 (INTVAL (x)))
2494 *total = 1;
2495 /* prepare_cmp_insn will force costly constants int registers before
2496 the cbranch[sd]i4 patterns can see them, so preserve potentially
2497 interesting ones not covered by I08 above. */
2498 else if (outer_code == COMPARE
2499 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2500 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2501 || INTVAL (x) == 0x7fffffff
2502 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2503 *total = 1;
2504 else
2505 *total = 8;
2506 return true;
2508 case CONST:
2509 case LABEL_REF:
2510 case SYMBOL_REF:
2511 if (TARGET_SHMEDIA64)
2512 *total = COSTS_N_INSNS (4);
2513 else if (TARGET_SHMEDIA32)
2514 *total = COSTS_N_INSNS (2);
2515 else
2516 *total = 5;
2517 return true;
2519 case CONST_DOUBLE:
2520 if (TARGET_SHMEDIA)
2521 *total = COSTS_N_INSNS (4);
2522 /* prepare_cmp_insn will force costly constants int registers before
2523 the cbranchdi4 pattern can see them, so preserve potentially
2524 interesting ones. */
2525 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2526 *total = 1;
2527 else
2528 *total = 10;
2529 return true;
2530 case CONST_VECTOR:
2531 if (x == CONST0_RTX (GET_MODE (x)))
2532 *total = 0;
2533 else if (sh_1el_vec (x, VOIDmode))
2534 *total = outer_code != SET;
2535 if (sh_rep_vec (x, VOIDmode))
2536 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2537 + (outer_code != SET));
2538 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2539 return true;
2541 case PLUS:
2542 case MINUS:
2543 *total = COSTS_N_INSNS (addsubcosts (x));
2544 return true;
2546 case AND:
2547 *total = COSTS_N_INSNS (andcosts (x));
2548 return true;
2550 case MULT:
2551 *total = COSTS_N_INSNS (multcosts (x));
2552 return true;
2554 case ASHIFT:
2555 case ASHIFTRT:
2556 case LSHIFTRT:
2557 *total = COSTS_N_INSNS (shiftcosts (x));
2558 return true;
2560 case DIV:
2561 case UDIV:
2562 case MOD:
2563 case UMOD:
2564 *total = COSTS_N_INSNS (20);
2565 return true;
2567 case PARALLEL:
2568 if (sh_1el_vec (x, VOIDmode))
2569 *total = outer_code != SET;
2570 if (sh_rep_vec (x, VOIDmode))
2571 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2572 + (outer_code != SET));
2573 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2574 return true;
2576 case FLOAT:
2577 case FIX:
2578 *total = 100;
2579 return true;
2581 default:
2582 return false;
2586 /* Compute the cost of an address. For the SH, all valid addresses are
2587 the same cost. Use a slightly higher cost for reg + reg addressing,
2588 since it increases pressure on r0. */
2590 static int
2591 sh_address_cost (rtx X)
2593 return (GET_CODE (X) == PLUS
2594 && ! CONSTANT_P (XEXP (X, 1))
2595 && ! TARGET_SHMEDIA ? 1 : 0);
2598 /* Code to expand a shift. */
2600 void
2601 gen_ashift (int type, int n, rtx reg)
2603 /* Negative values here come from the shift_amounts array. */
2604 if (n < 0)
2606 if (type == ASHIFT)
2607 type = LSHIFTRT;
2608 else
2609 type = ASHIFT;
2610 n = -n;
2613 switch (type)
2615 case ASHIFTRT:
2616 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2617 break;
2618 case LSHIFTRT:
2619 if (n == 1)
2620 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2621 else
2622 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2623 break;
2624 case ASHIFT:
2625 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2626 break;
2630 /* Same for HImode */
2632 void
2633 gen_ashift_hi (int type, int n, rtx reg)
2635 /* Negative values here come from the shift_amounts array. */
2636 if (n < 0)
2638 if (type == ASHIFT)
2639 type = LSHIFTRT;
2640 else
2641 type = ASHIFT;
2642 n = -n;
2645 switch (type)
2647 case ASHIFTRT:
2648 case LSHIFTRT:
2649 /* We don't have HImode right shift operations because using the
2650 ordinary 32 bit shift instructions for that doesn't generate proper
2651 zero/sign extension.
2652 gen_ashift_hi is only called in contexts where we know that the
2653 sign extension works out correctly. */
2655 int offset = 0;
2656 if (GET_CODE (reg) == SUBREG)
2658 offset = SUBREG_BYTE (reg);
2659 reg = SUBREG_REG (reg);
2661 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2662 break;
2664 case ASHIFT:
2665 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2666 break;
2670 /* Output RTL to split a constant shift into its component SH constant
2671 shift instructions. */
2673 void
2674 gen_shifty_op (int code, rtx *operands)
2676 int value = INTVAL (operands[2]);
2677 int max, i;
2679 /* Truncate the shift count in case it is out of bounds. */
2680 value = value & 0x1f;
2682 if (value == 31)
2684 if (code == LSHIFTRT)
2686 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2687 emit_insn (gen_movt (operands[0]));
2688 return;
2690 else if (code == ASHIFT)
2692 /* There is a two instruction sequence for 31 bit left shifts,
2693 but it requires r0. */
2694 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2696 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2697 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2698 return;
2702 else if (value == 0)
2704 /* This can happen even when optimizing, if there were subregs before
2705 reload. Don't output a nop here, as this is never optimized away;
2706 use a no-op move instead. */
2707 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2708 return;
2711 max = shift_insns[value];
2712 for (i = 0; i < max; i++)
2713 gen_ashift (code, shift_amounts[value][i], operands[0]);
2716 /* Same as above, but optimized for values where the topmost bits don't
2717 matter. */
2719 void
2720 gen_shifty_hi_op (int code, rtx *operands)
2722 int value = INTVAL (operands[2]);
2723 int max, i;
2724 void (*gen_fun) (int, int, rtx);
2726 /* This operation is used by and_shl for SImode values with a few
2727 high bits known to be cleared. */
2728 value &= 31;
2729 if (value == 0)
2731 emit_insn (gen_nop ());
2732 return;
2735 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2736 if (code == ASHIFT)
2738 max = ext_shift_insns[value];
2739 for (i = 0; i < max; i++)
2740 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2742 else
2743 /* When shifting right, emit the shifts in reverse order, so that
2744 solitary negative values come first. */
2745 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2746 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2749 /* Output RTL for an arithmetic right shift. */
2751 /* ??? Rewrite to use super-optimizer sequences. */
2754 expand_ashiftrt (rtx *operands)
2756 rtx wrk;
2757 char func[18];
2758 int value;
2760 if (TARGET_SH3)
2762 if (GET_CODE (operands[2]) != CONST_INT)
2764 rtx count = copy_to_mode_reg (SImode, operands[2]);
2765 emit_insn (gen_negsi2 (count, count));
2766 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2767 return 1;
2769 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2770 > 1 + SH_DYNAMIC_SHIFT_COST)
2772 rtx count
2773 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2774 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2775 return 1;
2778 if (GET_CODE (operands[2]) != CONST_INT)
2779 return 0;
2781 value = INTVAL (operands[2]) & 31;
2783 if (value == 31)
2785 /* If we are called from abs expansion, arrange things so that we
2786 we can use a single MT instruction that doesn't clobber the source,
2787 if LICM can hoist out the load of the constant zero. */
2788 if (currently_expanding_to_rtl)
2790 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2791 operands[1]));
2792 emit_insn (gen_mov_neg_si_t (operands[0]));
2793 return 1;
2795 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2796 return 1;
2798 else if (value >= 16 && value <= 19)
2800 wrk = gen_reg_rtx (SImode);
2801 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2802 value -= 16;
2803 while (value--)
2804 gen_ashift (ASHIFTRT, 1, wrk);
2805 emit_move_insn (operands[0], wrk);
2806 return 1;
2808 /* Expand a short sequence inline, longer call a magic routine. */
2809 else if (value <= 5)
2811 wrk = gen_reg_rtx (SImode);
2812 emit_move_insn (wrk, operands[1]);
2813 while (value--)
2814 gen_ashift (ASHIFTRT, 1, wrk);
2815 emit_move_insn (operands[0], wrk);
2816 return 1;
2819 wrk = gen_reg_rtx (Pmode);
2821 /* Load the value into an arg reg and call a helper. */
2822 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2823 sprintf (func, "__ashiftrt_r4_%d", value);
2824 function_symbol (wrk, func, SFUNC_STATIC);
2825 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2826 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2827 return 1;
2831 sh_dynamicalize_shift_p (rtx count)
2833 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2836 /* Try to find a good way to implement the combiner pattern
2837 [(set (match_operand:SI 0 "register_operand" "r")
2838 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2839 (match_operand:SI 2 "const_int_operand" "n"))
2840 (match_operand:SI 3 "const_int_operand" "n"))) .
2841 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2842 return 0 for simple right / left or left/right shift combination.
2843 return 1 for a combination of shifts with zero_extend.
2844 return 2 for a combination of shifts with an AND that needs r0.
2845 return 3 for a combination of shifts with an AND that needs an extra
2846 scratch register, when the three highmost bits of the AND mask are clear.
2847 return 4 for a combination of shifts with an AND that needs an extra
2848 scratch register, when any of the three highmost bits of the AND mask
2849 is set.
2850 If ATTRP is set, store an initial right shift width in ATTRP[0],
2851 and the instruction length in ATTRP[1] . These values are not valid
2852 when returning 0.
2853 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2854 shift_amounts for the last shift value that is to be used before the
2855 sign extend. */
2857 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2859 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2860 int left = INTVAL (left_rtx), right;
2861 int best = 0;
2862 int cost, best_cost = 10000;
2863 int best_right = 0, best_len = 0;
2864 int i;
2865 int can_ext;
2867 if (left < 0 || left > 31)
2868 return 0;
2869 if (GET_CODE (mask_rtx) == CONST_INT)
2870 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2871 else
2872 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2873 /* Can this be expressed as a right shift / left shift pair? */
2874 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2875 right = exact_log2 (lsb);
2876 mask2 = ~(mask + lsb - 1);
2877 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2878 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2879 if (! mask2)
2880 best_cost = shift_insns[right] + shift_insns[right + left];
2881 /* mask has no trailing zeroes <==> ! right */
2882 else if (! right && mask2 == ~(lsb2 - 1))
2884 int late_right = exact_log2 (lsb2);
2885 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2887 /* Try to use zero extend. */
2888 if (mask2 == ~(lsb2 - 1))
2890 int width, first;
2892 for (width = 8; width <= 16; width += 8)
2894 /* Can we zero-extend right away? */
2895 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2897 cost
2898 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2899 if (cost < best_cost)
2901 best = 1;
2902 best_cost = cost;
2903 best_right = right;
2904 best_len = cost;
2905 if (attrp)
2906 attrp[2] = -1;
2908 continue;
2910 /* ??? Could try to put zero extend into initial right shift,
2911 or even shift a bit left before the right shift. */
2912 /* Determine value of first part of left shift, to get to the
2913 zero extend cut-off point. */
2914 first = width - exact_log2 (lsb2) + right;
2915 if (first >= 0 && right + left - first >= 0)
2917 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2918 + ext_shift_insns[right + left - first];
2919 if (cost < best_cost)
2921 best = 1;
2922 best_cost = cost;
2923 best_right = right;
2924 best_len = cost;
2925 if (attrp)
2926 attrp[2] = first;
2931 /* Try to use r0 AND pattern */
2932 for (i = 0; i <= 2; i++)
2934 if (i > right)
2935 break;
2936 if (! CONST_OK_FOR_K08 (mask >> i))
2937 continue;
2938 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2939 if (cost < best_cost)
2941 best = 2;
2942 best_cost = cost;
2943 best_right = i;
2944 best_len = cost - 1;
2947 /* Try to use a scratch register to hold the AND operand. */
2948 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2949 for (i = 0; i <= 2; i++)
2951 if (i > right)
2952 break;
2953 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2954 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2955 if (cost < best_cost)
2957 best = 4 - can_ext;
2958 best_cost = cost;
2959 best_right = i;
2960 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2964 if (attrp)
2966 attrp[0] = best_right;
2967 attrp[1] = best_len;
2969 return best;
2972 /* This is used in length attributes of the unnamed instructions
2973 corresponding to shl_and_kind return values of 1 and 2. */
2975 shl_and_length (rtx insn)
2977 rtx set_src, left_rtx, mask_rtx;
2978 int attributes[3];
2980 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2981 left_rtx = XEXP (XEXP (set_src, 0), 1);
2982 mask_rtx = XEXP (set_src, 1);
2983 shl_and_kind (left_rtx, mask_rtx, attributes);
2984 return attributes[1];
2987 /* This is used in length attribute of the and_shl_scratch instruction. */
2990 shl_and_scr_length (rtx insn)
2992 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2993 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2994 rtx op = XEXP (set_src, 0);
2995 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2996 op = XEXP (XEXP (op, 0), 0);
2997 return len + shift_insns[INTVAL (XEXP (op, 1))];
3000 /* Generate rtl for instructions for which shl_and_kind advised a particular
3001 method of generating them, i.e. returned zero. */
3004 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3006 int attributes[3];
3007 unsigned HOST_WIDE_INT mask;
3008 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3009 int right, total_shift;
3010 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3012 right = attributes[0];
3013 total_shift = INTVAL (left_rtx) + right;
3014 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3015 switch (kind)
3017 default:
3018 return -1;
3019 case 1:
3021 int first = attributes[2];
3022 rtx operands[3];
3024 if (first < 0)
3026 emit_insn ((mask << right) <= 0xff
3027 ? gen_zero_extendqisi2 (dest,
3028 gen_lowpart (QImode, source))
3029 : gen_zero_extendhisi2 (dest,
3030 gen_lowpart (HImode, source)));
3031 source = dest;
3033 if (source != dest)
3034 emit_insn (gen_movsi (dest, source));
3035 operands[0] = dest;
3036 if (right)
3038 operands[2] = GEN_INT (right);
3039 gen_shifty_hi_op (LSHIFTRT, operands);
3041 if (first > 0)
3043 operands[2] = GEN_INT (first);
3044 gen_shifty_hi_op (ASHIFT, operands);
3045 total_shift -= first;
3046 mask <<= first;
3048 if (first >= 0)
3049 emit_insn (mask <= 0xff
3050 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3051 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3052 if (total_shift > 0)
3054 operands[2] = GEN_INT (total_shift);
3055 gen_shifty_hi_op (ASHIFT, operands);
3057 break;
3059 case 4:
3060 shift_gen_fun = gen_shifty_op;
3061 case 3:
3062 /* If the topmost bit that matters is set, set the topmost bits
3063 that don't matter. This way, we might be able to get a shorter
3064 signed constant. */
3065 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3066 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3067 case 2:
3068 /* Don't expand fine-grained when combining, because that will
3069 make the pattern fail. */
3070 if (currently_expanding_to_rtl
3071 || reload_in_progress || reload_completed)
3073 rtx operands[3];
3075 /* Cases 3 and 4 should be handled by this split
3076 only while combining */
3077 gcc_assert (kind <= 2);
3078 if (right)
3080 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3081 source = dest;
3083 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3084 if (total_shift)
3086 operands[0] = dest;
3087 operands[1] = dest;
3088 operands[2] = GEN_INT (total_shift);
3089 shift_gen_fun (ASHIFT, operands);
3091 break;
3093 else
3095 int neg = 0;
3096 if (kind != 4 && total_shift < 16)
3098 neg = -ext_shift_amounts[total_shift][1];
3099 if (neg > 0)
3100 neg -= ext_shift_amounts[total_shift][2];
3101 else
3102 neg = 0;
3104 emit_insn (gen_and_shl_scratch (dest, source,
3105 GEN_INT (right),
3106 GEN_INT (mask),
3107 GEN_INT (total_shift + neg),
3108 GEN_INT (neg)));
3109 emit_insn (gen_movsi (dest, dest));
3110 break;
3113 return 0;
3116 /* Try to find a good way to implement the combiner pattern
3117 [(set (match_operand:SI 0 "register_operand" "=r")
3118 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3119 (match_operand:SI 2 "const_int_operand" "n")
3120 (match_operand:SI 3 "const_int_operand" "n")
3121 (const_int 0)))
3122 (clobber (reg:SI T_REG))]
3123 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3124 return 0 for simple left / right shift combination.
3125 return 1 for left shift / 8 bit sign extend / left shift.
3126 return 2 for left shift / 16 bit sign extend / left shift.
3127 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3128 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3129 return 5 for left shift / 16 bit sign extend / right shift
3130 return 6 for < 8 bit sign extend / left shift.
3131 return 7 for < 8 bit sign extend / left shift / single right shift.
3132 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3135 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3137 int left, size, insize, ext;
3138 int cost = 0, best_cost;
3139 int kind;
3141 left = INTVAL (left_rtx);
3142 size = INTVAL (size_rtx);
3143 insize = size - left;
3144 gcc_assert (insize > 0);
3145 /* Default to left / right shift. */
3146 kind = 0;
3147 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3148 if (size <= 16)
3150 /* 16 bit shift / sign extend / 16 bit shift */
3151 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3152 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3153 below, by alternative 3 or something even better. */
3154 if (cost < best_cost)
3156 kind = 5;
3157 best_cost = cost;
3160 /* Try a plain sign extend between two shifts. */
3161 for (ext = 16; ext >= insize; ext -= 8)
3163 if (ext <= size)
3165 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3166 if (cost < best_cost)
3168 kind = ext / (unsigned) 8;
3169 best_cost = cost;
3172 /* Check if we can do a sloppy shift with a final signed shift
3173 restoring the sign. */
3174 if (EXT_SHIFT_SIGNED (size - ext))
3175 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3176 /* If not, maybe it's still cheaper to do the second shift sloppy,
3177 and do a final sign extend? */
3178 else if (size <= 16)
3179 cost = ext_shift_insns[ext - insize] + 1
3180 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3181 else
3182 continue;
3183 if (cost < best_cost)
3185 kind = ext / (unsigned) 8 + 2;
3186 best_cost = cost;
3189 /* Check if we can sign extend in r0 */
3190 if (insize < 8)
3192 cost = 3 + shift_insns[left];
3193 if (cost < best_cost)
3195 kind = 6;
3196 best_cost = cost;
3198 /* Try the same with a final signed shift. */
3199 if (left < 31)
3201 cost = 3 + ext_shift_insns[left + 1] + 1;
3202 if (cost < best_cost)
3204 kind = 7;
3205 best_cost = cost;
3209 if (TARGET_SH3)
3211 /* Try to use a dynamic shift. */
3212 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3213 if (cost < best_cost)
3215 kind = 0;
3216 best_cost = cost;
3219 if (costp)
3220 *costp = cost;
3221 return kind;
3224 /* Function to be used in the length attribute of the instructions
3225 implementing this pattern. */
3228 shl_sext_length (rtx insn)
3230 rtx set_src, left_rtx, size_rtx;
3231 int cost;
3233 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3234 left_rtx = XEXP (XEXP (set_src, 0), 1);
3235 size_rtx = XEXP (set_src, 1);
3236 shl_sext_kind (left_rtx, size_rtx, &cost);
3237 return cost;
3240 /* Generate rtl for this pattern */
3243 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3245 int kind;
3246 int left, size, insize, cost;
3247 rtx operands[3];
3249 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3250 left = INTVAL (left_rtx);
3251 size = INTVAL (size_rtx);
3252 insize = size - left;
3253 switch (kind)
3255 case 1:
3256 case 2:
3257 case 3:
3258 case 4:
3260 int ext = kind & 1 ? 8 : 16;
3261 int shift2 = size - ext;
3263 /* Don't expand fine-grained when combining, because that will
3264 make the pattern fail. */
3265 if (! currently_expanding_to_rtl
3266 && ! reload_in_progress && ! reload_completed)
3268 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3269 emit_insn (gen_movsi (dest, source));
3270 break;
3272 if (dest != source)
3273 emit_insn (gen_movsi (dest, source));
3274 operands[0] = dest;
3275 if (ext - insize)
3277 operands[2] = GEN_INT (ext - insize);
3278 gen_shifty_hi_op (ASHIFT, operands);
3280 emit_insn (kind & 1
3281 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3282 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3283 if (kind <= 2)
3285 if (shift2)
3287 operands[2] = GEN_INT (shift2);
3288 gen_shifty_op (ASHIFT, operands);
3291 else
3293 if (shift2 > 0)
3295 if (EXT_SHIFT_SIGNED (shift2))
3297 operands[2] = GEN_INT (shift2 + 1);
3298 gen_shifty_op (ASHIFT, operands);
3299 operands[2] = const1_rtx;
3300 gen_shifty_op (ASHIFTRT, operands);
3301 break;
3303 operands[2] = GEN_INT (shift2);
3304 gen_shifty_hi_op (ASHIFT, operands);
3306 else if (shift2)
3308 operands[2] = GEN_INT (-shift2);
3309 gen_shifty_hi_op (LSHIFTRT, operands);
3311 emit_insn (size <= 8
3312 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3313 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3315 break;
3317 case 5:
3319 int i = 16 - size;
3320 if (! currently_expanding_to_rtl
3321 && ! reload_in_progress && ! reload_completed)
3322 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3323 else
3325 operands[0] = dest;
3326 operands[2] = GEN_INT (16 - insize);
3327 gen_shifty_hi_op (ASHIFT, operands);
3328 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3330 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3331 while (--i >= 0)
3332 gen_ashift (ASHIFTRT, 1, dest);
3333 break;
3335 case 6:
3336 case 7:
3337 /* Don't expand fine-grained when combining, because that will
3338 make the pattern fail. */
3339 if (! currently_expanding_to_rtl
3340 && ! reload_in_progress && ! reload_completed)
3342 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3343 emit_insn (gen_movsi (dest, source));
3344 break;
3346 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3347 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3348 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3349 operands[0] = dest;
3350 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3351 gen_shifty_op (ASHIFT, operands);
3352 if (kind == 7)
3353 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3354 break;
3355 default:
3356 return -1;
3358 return 0;
3361 /* Prefix a symbol_ref name with "datalabel". */
3364 gen_datalabel_ref (rtx sym)
3366 const char *str;
3368 if (GET_CODE (sym) == LABEL_REF)
3369 return gen_rtx_CONST (GET_MODE (sym),
3370 gen_rtx_UNSPEC (GET_MODE (sym),
3371 gen_rtvec (1, sym),
3372 UNSPEC_DATALABEL));
3374 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3376 str = XSTR (sym, 0);
3377 /* Share all SYMBOL_REF strings with the same value - that is important
3378 for cse. */
3379 str = IDENTIFIER_POINTER (get_identifier (str));
3380 XSTR (sym, 0) = str;
3382 return sym;
3386 static alloc_pool label_ref_list_pool;
3388 typedef struct label_ref_list_d
3390 rtx label;
3391 struct label_ref_list_d *next;
3392 } *label_ref_list_t;
3394 /* The SH cannot load a large constant into a register, constants have to
3395 come from a pc relative load. The reference of a pc relative load
3396 instruction must be less than 1k in front of the instruction. This
3397 means that we often have to dump a constant inside a function, and
3398 generate code to branch around it.
3400 It is important to minimize this, since the branches will slow things
3401 down and make things bigger.
3403 Worst case code looks like:
3405 mov.l L1,rn
3406 bra L2
3408 align
3409 L1: .long value
3413 mov.l L3,rn
3414 bra L4
3416 align
3417 L3: .long value
3421 We fix this by performing a scan before scheduling, which notices which
3422 instructions need to have their operands fetched from the constant table
3423 and builds the table.
3425 The algorithm is:
3427 scan, find an instruction which needs a pcrel move. Look forward, find the
3428 last barrier which is within MAX_COUNT bytes of the requirement.
3429 If there isn't one, make one. Process all the instructions between
3430 the find and the barrier.
3432 In the above example, we can tell that L3 is within 1k of L1, so
3433 the first move can be shrunk from the 3 insn+constant sequence into
3434 just 1 insn, and the constant moved to L3 to make:
3436 mov.l L1,rn
3438 mov.l L3,rn
3439 bra L4
3441 align
3442 L3:.long value
3443 L4:.long value
3445 Then the second move becomes the target for the shortening process. */
3447 typedef struct
3449 rtx value; /* Value in table. */
3450 rtx label; /* Label of value. */
3451 label_ref_list_t wend; /* End of window. */
3452 enum machine_mode mode; /* Mode of value. */
3454 /* True if this constant is accessed as part of a post-increment
3455 sequence. Note that HImode constants are never accessed in this way. */
3456 bool part_of_sequence_p;
3457 } pool_node;
3459 /* The maximum number of constants that can fit into one pool, since
3460 constants in the range 0..510 are at least 2 bytes long, and in the
3461 range from there to 1018 at least 4 bytes. */
3463 #define MAX_POOL_SIZE 372
3464 static pool_node pool_vector[MAX_POOL_SIZE];
3465 static int pool_size;
3466 static rtx pool_window_label;
3467 static int pool_window_last;
3469 static int max_labelno_before_reorg;
3471 /* ??? If we need a constant in HImode which is the truncated value of a
3472 constant we need in SImode, we could combine the two entries thus saving
3473 two bytes. Is this common enough to be worth the effort of implementing
3474 it? */
3476 /* ??? This stuff should be done at the same time that we shorten branches.
3477 As it is now, we must assume that all branches are the maximum size, and
3478 this causes us to almost always output constant pools sooner than
3479 necessary. */
3481 /* Add a constant to the pool and return its label. */
3483 static rtx
3484 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3486 int i;
3487 rtx lab, new;
3488 label_ref_list_t ref, newref;
3490 /* First see if we've already got it. */
3491 for (i = 0; i < pool_size; i++)
3493 if (x->code == pool_vector[i].value->code
3494 && mode == pool_vector[i].mode)
3496 if (x->code == CODE_LABEL)
3498 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3499 continue;
3501 if (rtx_equal_p (x, pool_vector[i].value))
3503 lab = new = 0;
3504 if (! last_value
3505 || ! i
3506 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3508 new = gen_label_rtx ();
3509 LABEL_REFS (new) = pool_vector[i].label;
3510 pool_vector[i].label = lab = new;
3512 if (lab && pool_window_label)
3514 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3515 newref->label = pool_window_label;
3516 ref = pool_vector[pool_window_last].wend;
3517 newref->next = ref;
3518 pool_vector[pool_window_last].wend = newref;
3520 if (new)
3521 pool_window_label = new;
3522 pool_window_last = i;
3523 return lab;
3528 /* Need a new one. */
3529 pool_vector[pool_size].value = x;
3530 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3532 lab = 0;
3533 pool_vector[pool_size - 1].part_of_sequence_p = true;
3535 else
3536 lab = gen_label_rtx ();
3537 pool_vector[pool_size].mode = mode;
3538 pool_vector[pool_size].label = lab;
3539 pool_vector[pool_size].wend = NULL;
3540 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3541 if (lab && pool_window_label)
3543 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3544 newref->label = pool_window_label;
3545 ref = pool_vector[pool_window_last].wend;
3546 newref->next = ref;
3547 pool_vector[pool_window_last].wend = newref;
3549 if (lab)
3550 pool_window_label = lab;
3551 pool_window_last = pool_size;
3552 pool_size++;
3553 return lab;
3556 /* Output the literal table. START, if nonzero, is the first instruction
3557 this table is needed for, and also indicates that there is at least one
3558 casesi_worker_2 instruction; We have to emit the operand3 labels from
3559 these insns at a 4-byte aligned position. BARRIER is the barrier
3560 after which we are to place the table. */
3562 static void
3563 dump_table (rtx start, rtx barrier)
3565 rtx scan = barrier;
3566 int i;
3567 int need_align = 1;
3568 rtx lab;
3569 label_ref_list_t ref;
3570 int have_df = 0;
3572 /* Do two passes, first time dump out the HI sized constants. */
3574 for (i = 0; i < pool_size; i++)
3576 pool_node *p = &pool_vector[i];
3578 if (p->mode == HImode)
3580 if (need_align)
3582 scan = emit_insn_after (gen_align_2 (), scan);
3583 need_align = 0;
3585 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3586 scan = emit_label_after (lab, scan);
3587 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3588 scan);
3589 for (ref = p->wend; ref; ref = ref->next)
3591 lab = ref->label;
3592 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3595 else if (p->mode == DFmode)
3596 have_df = 1;
3599 need_align = 1;
3601 if (start)
3603 scan = emit_insn_after (gen_align_4 (), scan);
3604 need_align = 0;
3605 for (; start != barrier; start = NEXT_INSN (start))
3606 if (GET_CODE (start) == INSN
3607 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3609 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3610 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3612 scan = emit_label_after (lab, scan);
3615 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3617 rtx align_insn = NULL_RTX;
3619 scan = emit_label_after (gen_label_rtx (), scan);
3620 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3621 need_align = 0;
3623 for (i = 0; i < pool_size; i++)
3625 pool_node *p = &pool_vector[i];
3627 switch (p->mode)
3629 case HImode:
3630 break;
3631 case SImode:
3632 case SFmode:
3633 if (align_insn && !p->part_of_sequence_p)
3635 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3636 emit_label_before (lab, align_insn);
3637 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3638 align_insn);
3639 for (ref = p->wend; ref; ref = ref->next)
3641 lab = ref->label;
3642 emit_insn_before (gen_consttable_window_end (lab),
3643 align_insn);
3645 delete_insn (align_insn);
3646 align_insn = NULL_RTX;
3647 continue;
3649 else
3651 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3652 scan = emit_label_after (lab, scan);
3653 scan = emit_insn_after (gen_consttable_4 (p->value,
3654 const0_rtx), scan);
3655 need_align = ! need_align;
3657 break;
3658 case DFmode:
3659 if (need_align)
3661 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3662 align_insn = scan;
3663 need_align = 0;
3665 case DImode:
3666 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3667 scan = emit_label_after (lab, scan);
3668 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3669 scan);
3670 break;
3671 default:
3672 gcc_unreachable ();
3675 if (p->mode != HImode)
3677 for (ref = p->wend; ref; ref = ref->next)
3679 lab = ref->label;
3680 scan = emit_insn_after (gen_consttable_window_end (lab),
3681 scan);
3686 pool_size = 0;
3689 for (i = 0; i < pool_size; i++)
3691 pool_node *p = &pool_vector[i];
3693 switch (p->mode)
3695 case HImode:
3696 break;
3697 case SImode:
3698 case SFmode:
3699 if (need_align)
3701 need_align = 0;
3702 scan = emit_label_after (gen_label_rtx (), scan);
3703 scan = emit_insn_after (gen_align_4 (), scan);
3705 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3706 scan = emit_label_after (lab, scan);
3707 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3708 scan);
3709 break;
3710 case DFmode:
3711 case DImode:
3712 if (need_align)
3714 need_align = 0;
3715 scan = emit_label_after (gen_label_rtx (), scan);
3716 scan = emit_insn_after (gen_align_4 (), scan);
3718 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3719 scan = emit_label_after (lab, scan);
3720 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3721 scan);
3722 break;
3723 default:
3724 gcc_unreachable ();
3727 if (p->mode != HImode)
3729 for (ref = p->wend; ref; ref = ref->next)
3731 lab = ref->label;
3732 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3737 scan = emit_insn_after (gen_consttable_end (), scan);
3738 scan = emit_barrier_after (scan);
3739 pool_size = 0;
3740 pool_window_label = NULL_RTX;
3741 pool_window_last = 0;
3744 /* Return nonzero if constant would be an ok source for a
3745 mov.w instead of a mov.l. */
3747 static int
3748 hi_const (rtx src)
3750 return (GET_CODE (src) == CONST_INT
3751 && INTVAL (src) >= -32768
3752 && INTVAL (src) <= 32767);
3755 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3757 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3759 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3760 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3761 need to fix it if the input value is CONST_OK_FOR_I08. */
3763 static int
3764 broken_move (rtx insn)
3766 if (GET_CODE (insn) == INSN)
3768 rtx pat = PATTERN (insn);
3769 if (GET_CODE (pat) == PARALLEL)
3770 pat = XVECEXP (pat, 0, 0);
3771 if (GET_CODE (pat) == SET
3772 /* We can load any 8-bit value if we don't care what the high
3773 order bits end up as. */
3774 && GET_MODE (SET_DEST (pat)) != QImode
3775 && (CONSTANT_P (SET_SRC (pat))
3776 /* Match mova_const. */
3777 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3778 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3779 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3780 && ! (TARGET_SH2E
3781 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3782 && (fp_zero_operand (SET_SRC (pat))
3783 || fp_one_operand (SET_SRC (pat)))
3784 /* ??? If this is a -m4 or -m4-single compilation, in general
3785 we don't know the current setting of fpscr, so disable fldi.
3786 There is an exception if this was a register-register move
3787 before reload - and hence it was ascertained that we have
3788 single precision setting - and in a post-reload optimization
3789 we changed this to do a constant load. In that case
3790 we don't have an r0 clobber, hence we must use fldi. */
3791 && (! TARGET_SH4 || TARGET_FMOVD
3792 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3793 == SCRATCH))
3794 && GET_CODE (SET_DEST (pat)) == REG
3795 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3796 && ! (TARGET_SH2A
3797 && GET_MODE (SET_DEST (pat)) == SImode
3798 && (satisfies_constraint_I20 (SET_SRC (pat))
3799 || satisfies_constraint_I28 (SET_SRC (pat))))
3800 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3801 return 1;
3804 return 0;
3807 static int
3808 mova_p (rtx insn)
3810 return (GET_CODE (insn) == INSN
3811 && GET_CODE (PATTERN (insn)) == SET
3812 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3813 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3814 /* Don't match mova_const. */
3815 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3818 /* Fix up a mova from a switch that went out of range. */
3819 static void
3820 fixup_mova (rtx mova)
3822 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3823 if (! flag_pic)
3825 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3826 INSN_CODE (mova) = -1;
3828 else
3830 rtx worker = mova;
3831 rtx lab = gen_label_rtx ();
3832 rtx wpat, wpat0, wpat1, wsrc, diff;
3836 worker = NEXT_INSN (worker);
3837 gcc_assert (worker
3838 && GET_CODE (worker) != CODE_LABEL
3839 && GET_CODE (worker) != JUMP_INSN);
3840 } while (GET_CODE (worker) == NOTE
3841 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3842 wpat = PATTERN (worker);
3843 wpat0 = XVECEXP (wpat, 0, 0);
3844 wpat1 = XVECEXP (wpat, 0, 1);
3845 wsrc = SET_SRC (wpat0);
3846 PATTERN (worker) = (gen_casesi_worker_2
3847 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3848 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3849 XEXP (wpat1, 0)));
3850 INSN_CODE (worker) = -1;
3851 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3852 gen_rtx_LABEL_REF (Pmode, lab));
3853 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3854 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3855 INSN_CODE (mova) = -1;
3859 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3860 *num_mova, and check if the new mova is not nested within the first one.
3861 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3862 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3863 static int
3864 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3866 int n_addr = 0; /* Initialization to shut up spurious warning. */
3867 int f_target, n_target = 0; /* Likewise. */
3869 if (optimize)
3871 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3872 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3873 if (n_addr > n_target || n_addr + 1022 < n_target)
3875 /* Change the mova into a load.
3876 broken_move will then return true for it. */
3877 fixup_mova (new_mova);
3878 return 1;
3881 if (!(*num_mova)++)
3883 *first_mova = new_mova;
3884 return 2;
3886 if (!optimize
3887 || ((f_target
3888 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3889 >= n_target))
3890 return -1;
3892 (*num_mova)--;
3893 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3894 > n_target - n_addr)
3896 fixup_mova (*first_mova);
3897 return 0;
3899 else
3901 fixup_mova (new_mova);
3902 return 1;
3906 /* Find the last barrier from insn FROM which is close enough to hold the
3907 constant pool. If we can't find one, then create one near the end of
3908 the range. */
3910 static rtx
3911 find_barrier (int num_mova, rtx mova, rtx from)
3913 int count_si = 0;
3914 int count_hi = 0;
3915 int found_hi = 0;
3916 int found_si = 0;
3917 int found_di = 0;
3918 int hi_align = 2;
3919 int si_align = 2;
3920 int leading_mova = num_mova;
3921 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3922 int si_limit;
3923 int hi_limit;
3924 rtx orig = from;
3926 /* For HImode: range is 510, add 4 because pc counts from address of
3927 second instruction after this one, subtract 2 for the jump instruction
3928 that we may need to emit before the table, subtract 2 for the instruction
3929 that fills the jump delay slot (in very rare cases, reorg will take an
3930 instruction from after the constant pool or will leave the delay slot
3931 empty). This gives 510.
3932 For SImode: range is 1020, add 4 because pc counts from address of
3933 second instruction after this one, subtract 2 in case pc is 2 byte
3934 aligned, subtract 2 for the jump instruction that we may need to emit
3935 before the table, subtract 2 for the instruction that fills the jump
3936 delay slot. This gives 1018. */
3938 /* The branch will always be shortened now that the reference address for
3939 forward branches is the successor address, thus we need no longer make
3940 adjustments to the [sh]i_limit for -O0. */
3942 si_limit = 1018;
3943 hi_limit = 510;
3945 while (from && count_si < si_limit && count_hi < hi_limit)
3947 int inc = get_attr_length (from);
3948 int new_align = 1;
3950 /* If this is a label that existed at the time of the compute_alignments
3951 call, determine the alignment. N.B. When find_barrier recurses for
3952 an out-of-reach mova, we might see labels at the start of previously
3953 inserted constant tables. */
3954 if (GET_CODE (from) == CODE_LABEL
3955 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3957 if (optimize)
3958 new_align = 1 << label_to_alignment (from);
3959 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3960 new_align = 1 << barrier_align (from);
3961 else
3962 new_align = 1;
3963 inc = 0;
3965 /* In case we are scanning a constant table because of recursion, check
3966 for explicit alignments. If the table is long, we might be forced
3967 to emit the new table in front of it; the length of the alignment
3968 might be the last straw. */
3969 else if (GET_CODE (from) == INSN
3970 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3971 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3972 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3973 /* When we find the end of a constant table, paste the new constant
3974 at the end. That is better than putting it in front because
3975 this way, we don't need extra alignment for adding a 4-byte-aligned
3976 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3977 else if (GET_CODE (from) == INSN
3978 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3979 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3980 return from;
3982 if (GET_CODE (from) == BARRIER)
3984 rtx next;
3986 found_barrier = from;
3988 /* If we are at the end of the function, or in front of an alignment
3989 instruction, we need not insert an extra alignment. We prefer
3990 this kind of barrier. */
3991 if (barrier_align (from) > 2)
3992 good_barrier = from;
3994 /* If we are at the end of a hot/cold block, dump the constants
3995 here. */
3996 next = NEXT_INSN (from);
3997 if (next
3998 && NOTE_P (next)
3999 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4000 break;
4003 if (broken_move (from))
4005 rtx pat, src, dst;
4006 enum machine_mode mode;
4008 pat = PATTERN (from);
4009 if (GET_CODE (pat) == PARALLEL)
4010 pat = XVECEXP (pat, 0, 0);
4011 src = SET_SRC (pat);
4012 dst = SET_DEST (pat);
4013 mode = GET_MODE (dst);
4015 /* We must explicitly check the mode, because sometimes the
4016 front end will generate code to load unsigned constants into
4017 HImode targets without properly sign extending them. */
4018 if (mode == HImode
4019 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4021 found_hi += 2;
4022 /* We put the short constants before the long constants, so
4023 we must count the length of short constants in the range
4024 for the long constants. */
4025 /* ??? This isn't optimal, but is easy to do. */
4026 si_limit -= 2;
4028 else
4030 /* We dump DF/DI constants before SF/SI ones, because
4031 the limit is the same, but the alignment requirements
4032 are higher. We may waste up to 4 additional bytes
4033 for alignment, and the DF/DI constant may have
4034 another SF/SI constant placed before it. */
4035 if (TARGET_SHCOMPACT
4036 && ! found_di
4037 && (mode == DFmode || mode == DImode))
4039 found_di = 1;
4040 si_limit -= 8;
4042 while (si_align > 2 && found_si + si_align - 2 > count_si)
4043 si_align >>= 1;
4044 if (found_si > count_si)
4045 count_si = found_si;
4046 found_si += GET_MODE_SIZE (mode);
4047 if (num_mova)
4048 si_limit -= GET_MODE_SIZE (mode);
4052 if (mova_p (from))
4054 switch (untangle_mova (&num_mova, &mova, from))
4056 case 0: return find_barrier (0, 0, mova);
4057 case 2:
4059 leading_mova = 0;
4060 barrier_before_mova
4061 = good_barrier ? good_barrier : found_barrier;
4063 default: break;
4065 if (found_si > count_si)
4066 count_si = found_si;
4068 else if (GET_CODE (from) == JUMP_INSN
4069 && (GET_CODE (PATTERN (from)) == ADDR_VEC
4070 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
4072 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4073 || (num_mova
4074 && (prev_nonnote_insn (from)
4075 == XEXP (MOVA_LABELREF (mova), 0))))
4076 num_mova--;
4077 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4079 /* We have just passed the barrier in front of the
4080 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4081 the ADDR_DIFF_VEC is accessed as data, just like our pool
4082 constants, this is a good opportunity to accommodate what
4083 we have gathered so far.
4084 If we waited any longer, we could end up at a barrier in
4085 front of code, which gives worse cache usage for separated
4086 instruction / data caches. */
4087 good_barrier = found_barrier;
4088 break;
4090 else
4092 rtx body = PATTERN (from);
4093 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4096 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4097 else if (GET_CODE (from) == JUMP_INSN
4098 && ! TARGET_SH2
4099 && ! TARGET_SMALLCODE)
4100 new_align = 4;
4102 if (found_si)
4104 count_si += inc;
4105 if (new_align > si_align)
4107 si_limit -= (count_si - 1) & (new_align - si_align);
4108 si_align = new_align;
4110 count_si = (count_si + new_align - 1) & -new_align;
4112 if (found_hi)
4114 count_hi += inc;
4115 if (new_align > hi_align)
4117 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4118 hi_align = new_align;
4120 count_hi = (count_hi + new_align - 1) & -new_align;
4122 from = NEXT_INSN (from);
4125 if (num_mova)
4127 if (leading_mova)
4129 /* Try as we might, the leading mova is out of range. Change
4130 it into a load (which will become a pcload) and retry. */
4131 fixup_mova (mova);
4132 return find_barrier (0, 0, mova);
4134 else
4136 /* Insert the constant pool table before the mova instruction,
4137 to prevent the mova label reference from going out of range. */
4138 from = mova;
4139 good_barrier = found_barrier = barrier_before_mova;
4143 if (found_barrier)
4145 if (good_barrier && next_real_insn (found_barrier))
4146 found_barrier = good_barrier;
4148 else
4150 /* We didn't find a barrier in time to dump our stuff,
4151 so we'll make one. */
4152 rtx label = gen_label_rtx ();
4154 /* If we exceeded the range, then we must back up over the last
4155 instruction we looked at. Otherwise, we just need to undo the
4156 NEXT_INSN at the end of the loop. */
4157 if (PREV_INSN (from) != orig
4158 && (count_hi > hi_limit || count_si > si_limit))
4159 from = PREV_INSN (PREV_INSN (from));
4160 else
4161 from = PREV_INSN (from);
4163 /* Walk back to be just before any jump or label.
4164 Putting it before a label reduces the number of times the branch
4165 around the constant pool table will be hit. Putting it before
4166 a jump makes it more likely that the bra delay slot will be
4167 filled. */
4168 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4169 || GET_CODE (from) == CODE_LABEL)
4170 from = PREV_INSN (from);
4172 from = emit_jump_insn_after (gen_jump (label), from);
4173 JUMP_LABEL (from) = label;
4174 LABEL_NUSES (label) = 1;
4175 found_barrier = emit_barrier_after (from);
4176 emit_label_after (label, found_barrier);
4179 return found_barrier;
4182 /* If the instruction INSN is implemented by a special function, and we can
4183 positively find the register that is used to call the sfunc, and this
4184 register is not used anywhere else in this instruction - except as the
4185 destination of a set, return this register; else, return 0. */
4187 sfunc_uses_reg (rtx insn)
4189 int i;
4190 rtx pattern, part, reg_part, reg;
4192 if (GET_CODE (insn) != INSN)
4193 return 0;
4194 pattern = PATTERN (insn);
4195 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4196 return 0;
4198 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4200 part = XVECEXP (pattern, 0, i);
4201 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4202 reg_part = part;
4204 if (! reg_part)
4205 return 0;
4206 reg = XEXP (reg_part, 0);
4207 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4209 part = XVECEXP (pattern, 0, i);
4210 if (part == reg_part || GET_CODE (part) == CLOBBER)
4211 continue;
4212 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4213 && GET_CODE (SET_DEST (part)) == REG)
4214 ? SET_SRC (part) : part)))
4215 return 0;
4217 return reg;
4220 /* See if the only way in which INSN uses REG is by calling it, or by
4221 setting it while calling it. Set *SET to a SET rtx if the register
4222 is set by INSN. */
4224 static int
4225 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4227 rtx pattern, reg2;
4229 *set = NULL_RTX;
4231 reg2 = sfunc_uses_reg (insn);
4232 if (reg2 && REGNO (reg2) == REGNO (reg))
4234 pattern = single_set (insn);
4235 if (pattern
4236 && GET_CODE (SET_DEST (pattern)) == REG
4237 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4238 *set = pattern;
4239 return 0;
4241 if (GET_CODE (insn) != CALL_INSN)
4243 /* We don't use rtx_equal_p because we don't care if the mode is
4244 different. */
4245 pattern = single_set (insn);
4246 if (pattern
4247 && GET_CODE (SET_DEST (pattern)) == REG
4248 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4250 rtx par, part;
4251 int i;
4253 *set = pattern;
4254 par = PATTERN (insn);
4255 if (GET_CODE (par) == PARALLEL)
4256 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4258 part = XVECEXP (par, 0, i);
4259 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4260 return 1;
4262 return reg_mentioned_p (reg, SET_SRC (pattern));
4265 return 1;
4268 pattern = PATTERN (insn);
4270 if (GET_CODE (pattern) == PARALLEL)
4272 int i;
4274 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4275 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4276 return 1;
4277 pattern = XVECEXP (pattern, 0, 0);
4280 if (GET_CODE (pattern) == SET)
4282 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4284 /* We don't use rtx_equal_p, because we don't care if the
4285 mode is different. */
4286 if (GET_CODE (SET_DEST (pattern)) != REG
4287 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4288 return 1;
4290 *set = pattern;
4293 pattern = SET_SRC (pattern);
4296 if (GET_CODE (pattern) != CALL
4297 || GET_CODE (XEXP (pattern, 0)) != MEM
4298 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4299 return 1;
4301 return 0;
4304 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4305 general registers. Bits 0..15 mean that the respective registers
4306 are used as inputs in the instruction. Bits 16..31 mean that the
4307 registers 0..15, respectively, are used as outputs, or are clobbered.
4308 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4310 regs_used (rtx x, int is_dest)
4312 enum rtx_code code;
4313 const char *fmt;
4314 int i, used = 0;
4316 if (! x)
4317 return used;
4318 code = GET_CODE (x);
4319 switch (code)
4321 case REG:
4322 if (REGNO (x) < 16)
4323 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4324 << (REGNO (x) + is_dest));
4325 return 0;
4326 case SUBREG:
4328 rtx y = SUBREG_REG (x);
4330 if (GET_CODE (y) != REG)
4331 break;
4332 if (REGNO (y) < 16)
4333 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4334 << (REGNO (y) +
4335 subreg_regno_offset (REGNO (y),
4336 GET_MODE (y),
4337 SUBREG_BYTE (x),
4338 GET_MODE (x)) + is_dest));
4339 return 0;
4341 case SET:
4342 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4343 case RETURN:
4344 /* If there was a return value, it must have been indicated with USE. */
4345 return 0x00ffff00;
4346 case CLOBBER:
4347 is_dest = 1;
4348 break;
4349 case MEM:
4350 is_dest = 0;
4351 break;
4352 case CALL:
4353 used |= 0x00ff00f0;
4354 break;
4355 default:
4356 break;
4359 fmt = GET_RTX_FORMAT (code);
4361 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4363 if (fmt[i] == 'E')
4365 register int j;
4366 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4367 used |= regs_used (XVECEXP (x, i, j), is_dest);
4369 else if (fmt[i] == 'e')
4370 used |= regs_used (XEXP (x, i), is_dest);
4372 return used;
4375 /* Create an instruction that prevents redirection of a conditional branch
4376 to the destination of the JUMP with address ADDR.
4377 If the branch needs to be implemented as an indirect jump, try to find
4378 a scratch register for it.
4379 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4380 If any preceding insn that doesn't fit into a delay slot is good enough,
4381 pass 1. Pass 2 if a definite blocking insn is needed.
4382 -1 is used internally to avoid deep recursion.
4383 If a blocking instruction is made or recognized, return it. */
4385 static rtx
4386 gen_block_redirect (rtx jump, int addr, int need_block)
4388 int dead = 0;
4389 rtx prev = prev_nonnote_insn (jump);
4390 rtx dest;
4392 /* First, check if we already have an instruction that satisfies our need. */
4393 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4395 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4396 return prev;
4397 if (GET_CODE (PATTERN (prev)) == USE
4398 || GET_CODE (PATTERN (prev)) == CLOBBER
4399 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4400 prev = jump;
4401 else if ((need_block &= ~1) < 0)
4402 return prev;
4403 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4404 need_block = 0;
4406 if (GET_CODE (PATTERN (jump)) == RETURN)
4408 if (! need_block)
4409 return prev;
4410 /* Reorg even does nasty things with return insns that cause branches
4411 to go out of range - see find_end_label and callers. */
4412 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4414 /* We can't use JUMP_LABEL here because it might be undefined
4415 when not optimizing. */
4416 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4417 /* If the branch is out of range, try to find a scratch register for it. */
4418 if (optimize
4419 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4420 > 4092 + 4098))
4422 rtx scan;
4423 /* Don't look for the stack pointer as a scratch register,
4424 it would cause trouble if an interrupt occurred. */
4425 unsigned try = 0x7fff, used;
4426 int jump_left = flag_expensive_optimizations + 1;
4428 /* It is likely that the most recent eligible instruction is wanted for
4429 the delay slot. Therefore, find out which registers it uses, and
4430 try to avoid using them. */
4432 for (scan = jump; (scan = PREV_INSN (scan)); )
4434 enum rtx_code code;
4436 if (INSN_DELETED_P (scan))
4437 continue;
4438 code = GET_CODE (scan);
4439 if (code == CODE_LABEL || code == JUMP_INSN)
4440 break;
4441 if (code == INSN
4442 && GET_CODE (PATTERN (scan)) != USE
4443 && GET_CODE (PATTERN (scan)) != CLOBBER
4444 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4446 try &= ~regs_used (PATTERN (scan), 0);
4447 break;
4450 for (used = dead = 0, scan = JUMP_LABEL (jump);
4451 (scan = NEXT_INSN (scan)); )
4453 enum rtx_code code;
4455 if (INSN_DELETED_P (scan))
4456 continue;
4457 code = GET_CODE (scan);
4458 if (INSN_P (scan))
4460 used |= regs_used (PATTERN (scan), 0);
4461 if (code == CALL_INSN)
4462 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4463 dead |= (used >> 16) & ~used;
4464 if (dead & try)
4466 dead &= try;
4467 break;
4469 if (code == JUMP_INSN)
4471 if (jump_left-- && simplejump_p (scan))
4472 scan = JUMP_LABEL (scan);
4473 else
4474 break;
4478 /* Mask out the stack pointer again, in case it was
4479 the only 'free' register we have found. */
4480 dead &= 0x7fff;
4482 /* If the immediate destination is still in range, check for possible
4483 threading with a jump beyond the delay slot insn.
4484 Don't check if we are called recursively; the jump has been or will be
4485 checked in a different invocation then. */
4487 else if (optimize && need_block >= 0)
4489 rtx next = next_active_insn (next_active_insn (dest));
4490 if (next && GET_CODE (next) == JUMP_INSN
4491 && GET_CODE (PATTERN (next)) == SET
4492 && recog_memoized (next) == CODE_FOR_jump_compact)
4494 dest = JUMP_LABEL (next);
4495 if (dest
4496 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4497 > 4092 + 4098))
4498 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4502 if (dead)
4504 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4506 /* It would be nice if we could convert the jump into an indirect
4507 jump / far branch right now, and thus exposing all constituent
4508 instructions to further optimization. However, reorg uses
4509 simplejump_p to determine if there is an unconditional jump where
4510 it should try to schedule instructions from the target of the
4511 branch; simplejump_p fails for indirect jumps even if they have
4512 a JUMP_LABEL. */
4513 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4514 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4515 , jump);
4516 /* ??? We would like this to have the scope of the jump, but that
4517 scope will change when a delay slot insn of an inner scope is added.
4518 Hence, after delay slot scheduling, we'll have to expect
4519 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4520 the jump. */
4522 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4523 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4524 return insn;
4526 else if (need_block)
4527 /* We can't use JUMP_LABEL here because it might be undefined
4528 when not optimizing. */
4529 return emit_insn_before (gen_block_branch_redirect
4530 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4531 , jump);
4532 return prev;
4535 #define CONDJUMP_MIN -252
4536 #define CONDJUMP_MAX 262
4537 struct far_branch
4539 /* A label (to be placed) in front of the jump
4540 that jumps to our ultimate destination. */
4541 rtx near_label;
4542 /* Where we are going to insert it if we cannot move the jump any farther,
4543 or the jump itself if we have picked up an existing jump. */
4544 rtx insert_place;
4545 /* The ultimate destination. */
4546 rtx far_label;
4547 struct far_branch *prev;
4548 /* If the branch has already been created, its address;
4549 else the address of its first prospective user. */
4550 int address;
4553 static void gen_far_branch (struct far_branch *);
4554 enum mdep_reorg_phase_e mdep_reorg_phase;
4555 static void
4556 gen_far_branch (struct far_branch *bp)
4558 rtx insn = bp->insert_place;
4559 rtx jump;
4560 rtx label = gen_label_rtx ();
4561 int ok;
4563 emit_label_after (label, insn);
4564 if (bp->far_label)
4566 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4567 LABEL_NUSES (bp->far_label)++;
4569 else
4570 jump = emit_jump_insn_after (gen_return (), insn);
4571 /* Emit a barrier so that reorg knows that any following instructions
4572 are not reachable via a fall-through path.
4573 But don't do this when not optimizing, since we wouldn't suppress the
4574 alignment for the barrier then, and could end up with out-of-range
4575 pc-relative loads. */
4576 if (optimize)
4577 emit_barrier_after (jump);
4578 emit_label_after (bp->near_label, insn);
4579 JUMP_LABEL (jump) = bp->far_label;
4580 ok = invert_jump (insn, label, 1);
4581 gcc_assert (ok);
4583 /* If we are branching around a jump (rather than a return), prevent
4584 reorg from using an insn from the jump target as the delay slot insn -
4585 when reorg did this, it pessimized code (we rather hide the delay slot)
4586 and it could cause branches to go out of range. */
4587 if (bp->far_label)
4588 (emit_insn_after
4589 (gen_stuff_delay_slot
4590 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4591 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4592 insn));
4593 /* Prevent reorg from undoing our splits. */
4594 gen_block_redirect (jump, bp->address += 2, 2);
4597 /* Fix up ADDR_DIFF_VECs. */
4598 void
4599 fixup_addr_diff_vecs (rtx first)
4601 rtx insn;
4603 for (insn = first; insn; insn = NEXT_INSN (insn))
4605 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4607 if (GET_CODE (insn) != JUMP_INSN
4608 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4609 continue;
4610 pat = PATTERN (insn);
4611 vec_lab = XEXP (XEXP (pat, 0), 0);
4613 /* Search the matching casesi_jump_2. */
4614 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4616 if (GET_CODE (prev) != JUMP_INSN)
4617 continue;
4618 prevpat = PATTERN (prev);
4619 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4620 continue;
4621 x = XVECEXP (prevpat, 0, 1);
4622 if (GET_CODE (x) != USE)
4623 continue;
4624 x = XEXP (x, 0);
4625 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4626 break;
4628 /* FIXME: This is a bug in the optimizer, but it seems harmless
4629 to just avoid panicing. */
4630 if (!prev)
4631 continue;
4633 /* Emit the reference label of the braf where it belongs, right after
4634 the casesi_jump_2 (i.e. braf). */
4635 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4636 emit_label_after (braf_label, prev);
4638 /* Fix up the ADDR_DIF_VEC to be relative
4639 to the reference address of the braf. */
4640 XEXP (XEXP (pat, 0), 0) = braf_label;
4644 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4645 a barrier. Return the base 2 logarithm of the desired alignment. */
4647 barrier_align (rtx barrier_or_label)
4649 rtx next = next_real_insn (barrier_or_label), pat, prev;
4650 int slot, credit, jump_to_next = 0;
4652 if (! next)
4653 return 0;
4655 pat = PATTERN (next);
4657 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4658 return 2;
4660 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4661 /* This is a barrier in front of a constant table. */
4662 return 0;
4664 prev = prev_real_insn (barrier_or_label);
4665 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4667 pat = PATTERN (prev);
4668 /* If this is a very small table, we want to keep the alignment after
4669 the table to the minimum for proper code alignment. */
4670 return ((TARGET_SMALLCODE
4671 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4672 <= (unsigned) 1 << (CACHE_LOG - 2)))
4673 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4676 if (TARGET_SMALLCODE)
4677 return 0;
4679 if (! TARGET_SH2 || ! optimize)
4680 return align_jumps_log;
4682 /* When fixing up pcloads, a constant table might be inserted just before
4683 the basic block that ends with the barrier. Thus, we can't trust the
4684 instruction lengths before that. */
4685 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4687 /* Check if there is an immediately preceding branch to the insn beyond
4688 the barrier. We must weight the cost of discarding useful information
4689 from the current cache line when executing this branch and there is
4690 an alignment, against that of fetching unneeded insn in front of the
4691 branch target when there is no alignment. */
4693 /* There are two delay_slot cases to consider. One is the simple case
4694 where the preceding branch is to the insn beyond the barrier (simple
4695 delay slot filling), and the other is where the preceding branch has
4696 a delay slot that is a duplicate of the insn after the barrier
4697 (fill_eager_delay_slots) and the branch is to the insn after the insn
4698 after the barrier. */
4700 /* PREV is presumed to be the JUMP_INSN for the barrier under
4701 investigation. Skip to the insn before it. */
4702 prev = prev_real_insn (prev);
4704 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4705 credit >= 0 && prev && GET_CODE (prev) == INSN;
4706 prev = prev_real_insn (prev))
4708 jump_to_next = 0;
4709 if (GET_CODE (PATTERN (prev)) == USE
4710 || GET_CODE (PATTERN (prev)) == CLOBBER)
4711 continue;
4712 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4714 prev = XVECEXP (PATTERN (prev), 0, 1);
4715 if (INSN_UID (prev) == INSN_UID (next))
4717 /* Delay slot was filled with insn at jump target. */
4718 jump_to_next = 1;
4719 continue;
4723 if (slot &&
4724 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4725 slot = 0;
4726 credit -= get_attr_length (prev);
4728 if (prev
4729 && GET_CODE (prev) == JUMP_INSN
4730 && JUMP_LABEL (prev))
4732 rtx x;
4733 if (jump_to_next
4734 || next_real_insn (JUMP_LABEL (prev)) == next
4735 /* If relax_delay_slots() decides NEXT was redundant
4736 with some previous instruction, it will have
4737 redirected PREV's jump to the following insn. */
4738 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4739 /* There is no upper bound on redundant instructions
4740 that might have been skipped, but we must not put an
4741 alignment where none had been before. */
4742 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4743 (INSN_P (x)
4744 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4745 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4746 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4748 rtx pat = PATTERN (prev);
4749 if (GET_CODE (pat) == PARALLEL)
4750 pat = XVECEXP (pat, 0, 0);
4751 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4752 return 0;
4757 return align_jumps_log;
4760 /* If we are inside a phony loop, almost any kind of label can turn up as the
4761 first one in the loop. Aligning a braf label causes incorrect switch
4762 destination addresses; we can detect braf labels because they are
4763 followed by a BARRIER.
4764 Applying loop alignment to small constant or switch tables is a waste
4765 of space, so we suppress this too. */
4767 sh_loop_align (rtx label)
4769 rtx next = label;
4772 next = next_nonnote_insn (next);
4773 while (next && GET_CODE (next) == CODE_LABEL);
4775 if (! next
4776 || ! INSN_P (next)
4777 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4778 || recog_memoized (next) == CODE_FOR_consttable_2)
4779 return 0;
4781 return align_loops_log;
4784 /* Do a final pass over the function, just before delayed branch
4785 scheduling. */
4787 static void
4788 sh_reorg (void)
4790 rtx first, insn, mova = NULL_RTX;
4791 int num_mova;
4792 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4793 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4795 first = get_insns ();
4796 max_labelno_before_reorg = max_label_num ();
4798 /* We must split call insns before introducing `mova's. If we're
4799 optimizing, they'll have already been split. Otherwise, make
4800 sure we don't split them too late. */
4801 if (! optimize)
4802 split_all_insns_noflow ();
4804 if (TARGET_SHMEDIA)
4805 return;
4807 /* If relaxing, generate pseudo-ops to associate function calls with
4808 the symbols they call. It does no harm to not generate these
4809 pseudo-ops. However, when we can generate them, it enables to
4810 linker to potentially relax the jsr to a bsr, and eliminate the
4811 register load and, possibly, the constant pool entry. */
4813 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4814 if (TARGET_RELAX)
4816 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
4817 own purposes. This works because none of the remaining passes
4818 need to look at them.
4820 ??? But it may break in the future. We should use a machine
4821 dependent REG_NOTE, or some other approach entirely. */
4822 for (insn = first; insn; insn = NEXT_INSN (insn))
4824 if (INSN_P (insn))
4826 rtx note;
4828 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
4829 NULL_RTX)) != 0)
4830 remove_note (insn, note);
4834 for (insn = first; insn; insn = NEXT_INSN (insn))
4836 rtx pattern, reg, link, set, scan, dies, label;
4837 int rescan = 0, foundinsn = 0;
4839 if (GET_CODE (insn) == CALL_INSN)
4841 pattern = PATTERN (insn);
4843 if (GET_CODE (pattern) == PARALLEL)
4844 pattern = XVECEXP (pattern, 0, 0);
4845 if (GET_CODE (pattern) == SET)
4846 pattern = SET_SRC (pattern);
4848 if (GET_CODE (pattern) != CALL
4849 || GET_CODE (XEXP (pattern, 0)) != MEM)
4850 continue;
4852 reg = XEXP (XEXP (pattern, 0), 0);
4854 else
4856 reg = sfunc_uses_reg (insn);
4857 if (! reg)
4858 continue;
4861 if (GET_CODE (reg) != REG)
4862 continue;
4864 /* Try scanning backward to find where the register is set. */
4865 link = NULL;
4866 for (scan = PREV_INSN (insn);
4867 scan && GET_CODE (scan) != CODE_LABEL;
4868 scan = PREV_INSN (scan))
4870 if (! INSN_P (scan))
4871 continue;
4873 if (! reg_mentioned_p (reg, scan))
4874 continue;
4876 if (noncall_uses_reg (reg, scan, &set))
4877 break;
4879 if (set)
4881 link = scan;
4882 break;
4886 if (! link)
4887 continue;
4889 /* The register is set at LINK. */
4891 /* We can only optimize the function call if the register is
4892 being set to a symbol. In theory, we could sometimes
4893 optimize calls to a constant location, but the assembler
4894 and linker do not support that at present. */
4895 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4896 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4897 continue;
4899 /* Scan forward from LINK to the place where REG dies, and
4900 make sure that the only insns which use REG are
4901 themselves function calls. */
4903 /* ??? This doesn't work for call targets that were allocated
4904 by reload, since there may not be a REG_DEAD note for the
4905 register. */
4907 dies = NULL_RTX;
4908 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4910 rtx scanset;
4912 /* Don't try to trace forward past a CODE_LABEL if we haven't
4913 seen INSN yet. Ordinarily, we will only find the setting insn
4914 if it is in the same basic block. However,
4915 cross-jumping can insert code labels in between the load and
4916 the call, and can result in situations where a single call
4917 insn may have two targets depending on where we came from. */
4919 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4920 break;
4922 if (! INSN_P (scan))
4923 continue;
4925 /* Don't try to trace forward past a JUMP. To optimize
4926 safely, we would have to check that all the
4927 instructions at the jump destination did not use REG. */
4929 if (GET_CODE (scan) == JUMP_INSN)
4930 break;
4932 if (! reg_mentioned_p (reg, scan))
4933 continue;
4935 if (noncall_uses_reg (reg, scan, &scanset))
4936 break;
4938 if (scan == insn)
4939 foundinsn = 1;
4941 if (scan != insn
4942 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4944 /* There is a function call to this register other
4945 than the one we are checking. If we optimize
4946 this call, we need to rescan again below. */
4947 rescan = 1;
4950 /* ??? We shouldn't have to worry about SCANSET here.
4951 We should just be able to check for a REG_DEAD note
4952 on a function call. However, the REG_DEAD notes are
4953 apparently not dependable around libcalls; c-torture
4954 execute/920501-2 is a test case. If SCANSET is set,
4955 then this insn sets the register, so it must have
4956 died earlier. Unfortunately, this will only handle
4957 the cases in which the register is, in fact, set in a
4958 later insn. */
4960 /* ??? We shouldn't have to use FOUNDINSN here.
4961 This dates back to when we used LOG_LINKS to find
4962 the most recent insn which sets the register. */
4964 if (foundinsn
4965 && (scanset
4966 || find_reg_note (scan, REG_DEAD, reg)))
4968 dies = scan;
4969 break;
4973 if (! dies)
4975 /* Either there was a branch, or some insn used REG
4976 other than as a function call address. */
4977 continue;
4980 /* Create a code label, and put it in a REG_LABEL_OPERAND note
4981 on the insn which sets the register, and on each call insn
4982 which uses the register. In final_prescan_insn we look for
4983 the REG_LABEL_OPERAND notes, and output the appropriate label
4984 or pseudo-op. */
4986 label = gen_label_rtx ();
4987 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4988 REG_NOTES (link));
4989 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4990 REG_NOTES (insn));
4991 if (rescan)
4993 scan = link;
4996 rtx reg2;
4998 scan = NEXT_INSN (scan);
4999 if (scan != insn
5000 && ((GET_CODE (scan) == CALL_INSN
5001 && reg_mentioned_p (reg, scan))
5002 || ((reg2 = sfunc_uses_reg (scan))
5003 && REGNO (reg2) == REGNO (reg))))
5004 REG_NOTES (scan)
5005 = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
5006 REG_NOTES (scan));
5008 while (scan != dies);
5013 if (TARGET_SH2)
5014 fixup_addr_diff_vecs (first);
5016 if (optimize)
5018 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5019 shorten_branches (first);
5022 /* Scan the function looking for move instructions which have to be
5023 changed to pc-relative loads and insert the literal tables. */
5024 label_ref_list_pool = create_alloc_pool ("label references list",
5025 sizeof (struct label_ref_list_d),
5026 30);
5027 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5028 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5030 if (mova_p (insn))
5032 /* ??? basic block reordering can move a switch table dispatch
5033 below the switch table. Check if that has happened.
5034 We only have the addresses available when optimizing; but then,
5035 this check shouldn't be needed when not optimizing. */
5036 if (!untangle_mova (&num_mova, &mova, insn))
5038 insn = mova;
5039 num_mova = 0;
5042 else if (GET_CODE (insn) == JUMP_INSN
5043 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5044 && num_mova
5045 /* ??? loop invariant motion can also move a mova out of a
5046 loop. Since loop does this code motion anyway, maybe we
5047 should wrap UNSPEC_MOVA into a CONST, so that reload can
5048 move it back. */
5049 && ((num_mova > 1
5050 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5051 || (prev_nonnote_insn (insn)
5052 == XEXP (MOVA_LABELREF (mova), 0))))
5054 rtx scan;
5055 int total;
5057 num_mova--;
5059 /* Some code might have been inserted between the mova and
5060 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5061 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5062 total += get_attr_length (scan);
5064 /* range of mova is 1020, add 4 because pc counts from address of
5065 second instruction after this one, subtract 2 in case pc is 2
5066 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5067 cancels out with alignment effects of the mova itself. */
5068 if (total > 1022)
5070 /* Change the mova into a load, and restart scanning
5071 there. broken_move will then return true for mova. */
5072 fixup_mova (mova);
5073 insn = mova;
5076 if (broken_move (insn)
5077 || (GET_CODE (insn) == INSN
5078 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5080 rtx scan;
5081 /* Scan ahead looking for a barrier to stick the constant table
5082 behind. */
5083 rtx barrier = find_barrier (num_mova, mova, insn);
5084 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5085 int need_aligned_label = 0;
5087 if (num_mova && ! mova_p (mova))
5089 /* find_barrier had to change the first mova into a
5090 pcload; thus, we have to start with this new pcload. */
5091 insn = mova;
5092 num_mova = 0;
5094 /* Now find all the moves between the points and modify them. */
5095 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5097 if (GET_CODE (scan) == CODE_LABEL)
5098 last_float = 0;
5099 if (GET_CODE (scan) == INSN
5100 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5101 need_aligned_label = 1;
5102 if (broken_move (scan))
5104 rtx *patp = &PATTERN (scan), pat = *patp;
5105 rtx src, dst;
5106 rtx lab;
5107 rtx newsrc;
5108 enum machine_mode mode;
5110 if (GET_CODE (pat) == PARALLEL)
5111 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5112 src = SET_SRC (pat);
5113 dst = SET_DEST (pat);
5114 mode = GET_MODE (dst);
5116 if (mode == SImode && hi_const (src)
5117 && REGNO (dst) != FPUL_REG)
5119 int offset = 0;
5121 mode = HImode;
5122 while (GET_CODE (dst) == SUBREG)
5124 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5125 GET_MODE (SUBREG_REG (dst)),
5126 SUBREG_BYTE (dst),
5127 GET_MODE (dst));
5128 dst = SUBREG_REG (dst);
5130 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5132 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5134 /* This must be an insn that clobbers r0. */
5135 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5136 XVECLEN (PATTERN (scan), 0)
5137 - 1);
5138 rtx clobber = *clobberp;
5140 gcc_assert (GET_CODE (clobber) == CLOBBER
5141 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5143 if (last_float
5144 && reg_set_between_p (r0_rtx, last_float_move, scan))
5145 last_float = 0;
5146 if (last_float
5147 && TARGET_SHCOMPACT
5148 && GET_MODE_SIZE (mode) != 4
5149 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5150 last_float = 0;
5151 lab = add_constant (src, mode, last_float);
5152 if (lab)
5153 emit_insn_before (gen_mova (lab), scan);
5154 else
5156 /* There will be a REG_UNUSED note for r0 on
5157 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5158 lest reorg:mark_target_live_regs will not
5159 consider r0 to be used, and we end up with delay
5160 slot insn in front of SCAN that clobbers r0. */
5161 rtx note
5162 = find_regno_note (last_float_move, REG_UNUSED, 0);
5164 /* If we are not optimizing, then there may not be
5165 a note. */
5166 if (note)
5167 PUT_MODE (note, REG_INC);
5169 *last_float_addr = r0_inc_rtx;
5171 last_float_move = scan;
5172 last_float = src;
5173 newsrc = gen_const_mem (mode,
5174 (((TARGET_SH4 && ! TARGET_FMOVD)
5175 || REGNO (dst) == FPUL_REG)
5176 ? r0_inc_rtx
5177 : r0_rtx));
5178 last_float_addr = &XEXP (newsrc, 0);
5180 /* Remove the clobber of r0. */
5181 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5182 gen_rtx_SCRATCH (Pmode));
5184 /* This is a mova needing a label. Create it. */
5185 else if (GET_CODE (src) == UNSPEC
5186 && XINT (src, 1) == UNSPEC_MOVA
5187 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5189 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5190 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5191 newsrc = gen_rtx_UNSPEC (SImode,
5192 gen_rtvec (1, newsrc),
5193 UNSPEC_MOVA);
5195 else
5197 lab = add_constant (src, mode, 0);
5198 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5199 newsrc = gen_const_mem (mode, newsrc);
5201 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5202 INSN_CODE (scan) = -1;
5205 dump_table (need_aligned_label ? insn : 0, barrier);
5206 insn = barrier;
5209 free_alloc_pool (label_ref_list_pool);
5210 for (insn = first; insn; insn = NEXT_INSN (insn))
5211 PUT_MODE (insn, VOIDmode);
5213 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5214 INSN_ADDRESSES_FREE ();
5215 split_branches (first);
5217 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5218 also has an effect on the register that holds the address of the sfunc.
5219 Insert an extra dummy insn in front of each sfunc that pretends to
5220 use this register. */
5221 if (flag_delayed_branch)
5223 for (insn = first; insn; insn = NEXT_INSN (insn))
5225 rtx reg = sfunc_uses_reg (insn);
5227 if (! reg)
5228 continue;
5229 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5232 #if 0
5233 /* fpscr is not actually a user variable, but we pretend it is for the
5234 sake of the previous optimization passes, since we want it handled like
5235 one. However, we don't have any debugging information for it, so turn
5236 it into a non-user variable now. */
5237 if (TARGET_SH4)
5238 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5239 #endif
5240 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5244 get_dest_uid (rtx label, int max_uid)
5246 rtx dest = next_real_insn (label);
5247 int dest_uid;
5248 if (! dest)
5249 /* This can happen for an undefined label. */
5250 return 0;
5251 dest_uid = INSN_UID (dest);
5252 /* If this is a newly created branch redirection blocking instruction,
5253 we cannot index the branch_uid or insn_addresses arrays with its
5254 uid. But then, we won't need to, because the actual destination is
5255 the following branch. */
5256 while (dest_uid >= max_uid)
5258 dest = NEXT_INSN (dest);
5259 dest_uid = INSN_UID (dest);
5261 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5262 return 0;
5263 return dest_uid;
5266 /* Split condbranches that are out of range. Also add clobbers for
5267 scratch registers that are needed in far jumps.
5268 We do this before delay slot scheduling, so that it can take our
5269 newly created instructions into account. It also allows us to
5270 find branches with common targets more easily. */
5272 static void
5273 split_branches (rtx first)
5275 rtx insn;
5276 struct far_branch **uid_branch, *far_branch_list = 0;
5277 int max_uid = get_max_uid ();
5278 int ok;
5280 /* Find out which branches are out of range. */
5281 shorten_branches (first);
5283 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5284 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5286 for (insn = first; insn; insn = NEXT_INSN (insn))
5287 if (! INSN_P (insn))
5288 continue;
5289 else if (INSN_DELETED_P (insn))
5291 /* Shorten_branches would split this instruction again,
5292 so transform it into a note. */
5293 SET_INSN_DELETED (insn);
5295 else if (GET_CODE (insn) == JUMP_INSN
5296 /* Don't mess with ADDR_DIFF_VEC */
5297 && (GET_CODE (PATTERN (insn)) == SET
5298 || GET_CODE (PATTERN (insn)) == RETURN))
5300 enum attr_type type = get_attr_type (insn);
5301 if (type == TYPE_CBRANCH)
5303 rtx next, beyond;
5305 if (get_attr_length (insn) > 4)
5307 rtx src = SET_SRC (PATTERN (insn));
5308 rtx olabel = XEXP (XEXP (src, 1), 0);
5309 int addr = INSN_ADDRESSES (INSN_UID (insn));
5310 rtx label = 0;
5311 int dest_uid = get_dest_uid (olabel, max_uid);
5312 struct far_branch *bp = uid_branch[dest_uid];
5314 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5315 the label if the LABEL_NUSES count drops to zero. There is
5316 always a jump_optimize pass that sets these values, but it
5317 proceeds to delete unreferenced code, and then if not
5318 optimizing, to un-delete the deleted instructions, thus
5319 leaving labels with too low uses counts. */
5320 if (! optimize)
5322 JUMP_LABEL (insn) = olabel;
5323 LABEL_NUSES (olabel)++;
5325 if (! bp)
5327 bp = (struct far_branch *) alloca (sizeof *bp);
5328 uid_branch[dest_uid] = bp;
5329 bp->prev = far_branch_list;
5330 far_branch_list = bp;
5331 bp->far_label
5332 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5333 LABEL_NUSES (bp->far_label)++;
5335 else
5337 label = bp->near_label;
5338 if (! label && bp->address - addr >= CONDJUMP_MIN)
5340 rtx block = bp->insert_place;
5342 if (GET_CODE (PATTERN (block)) == RETURN)
5343 block = PREV_INSN (block);
5344 else
5345 block = gen_block_redirect (block,
5346 bp->address, 2);
5347 label = emit_label_after (gen_label_rtx (),
5348 PREV_INSN (block));
5349 bp->near_label = label;
5351 else if (label && ! NEXT_INSN (label))
5353 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5354 bp->insert_place = insn;
5355 else
5356 gen_far_branch (bp);
5359 if (! label
5360 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5362 bp->near_label = label = gen_label_rtx ();
5363 bp->insert_place = insn;
5364 bp->address = addr;
5366 ok = redirect_jump (insn, label, 0);
5367 gcc_assert (ok);
5369 else
5371 /* get_attr_length (insn) == 2 */
5372 /* Check if we have a pattern where reorg wants to redirect
5373 the branch to a label from an unconditional branch that
5374 is too far away. */
5375 /* We can't use JUMP_LABEL here because it might be undefined
5376 when not optimizing. */
5377 /* A syntax error might cause beyond to be NULL_RTX. */
5378 beyond
5379 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5380 0));
5382 if (beyond
5383 && (GET_CODE (beyond) == JUMP_INSN
5384 || ((beyond = next_active_insn (beyond))
5385 && GET_CODE (beyond) == JUMP_INSN))
5386 && GET_CODE (PATTERN (beyond)) == SET
5387 && recog_memoized (beyond) == CODE_FOR_jump_compact
5388 && ((INSN_ADDRESSES
5389 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5390 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5391 > 252 + 258 + 2))
5392 gen_block_redirect (beyond,
5393 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5396 next = next_active_insn (insn);
5398 if ((GET_CODE (next) == JUMP_INSN
5399 || ((next = next_active_insn (next))
5400 && GET_CODE (next) == JUMP_INSN))
5401 && GET_CODE (PATTERN (next)) == SET
5402 && recog_memoized (next) == CODE_FOR_jump_compact
5403 && ((INSN_ADDRESSES
5404 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5405 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5406 > 252 + 258 + 2))
5407 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5409 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5411 int addr = INSN_ADDRESSES (INSN_UID (insn));
5412 rtx far_label = 0;
5413 int dest_uid = 0;
5414 struct far_branch *bp;
5416 if (type == TYPE_JUMP)
5418 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5419 dest_uid = get_dest_uid (far_label, max_uid);
5420 if (! dest_uid)
5422 /* Parse errors can lead to labels outside
5423 the insn stream. */
5424 if (! NEXT_INSN (far_label))
5425 continue;
5427 if (! optimize)
5429 JUMP_LABEL (insn) = far_label;
5430 LABEL_NUSES (far_label)++;
5432 redirect_jump (insn, NULL_RTX, 1);
5433 far_label = 0;
5436 bp = uid_branch[dest_uid];
5437 if (! bp)
5439 bp = (struct far_branch *) alloca (sizeof *bp);
5440 uid_branch[dest_uid] = bp;
5441 bp->prev = far_branch_list;
5442 far_branch_list = bp;
5443 bp->near_label = 0;
5444 bp->far_label = far_label;
5445 if (far_label)
5446 LABEL_NUSES (far_label)++;
5448 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5449 if (addr - bp->address <= CONDJUMP_MAX)
5450 emit_label_after (bp->near_label, PREV_INSN (insn));
5451 else
5453 gen_far_branch (bp);
5454 bp->near_label = 0;
5456 else
5457 bp->near_label = 0;
5458 bp->address = addr;
5459 bp->insert_place = insn;
5460 if (! far_label)
5461 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5462 else
5463 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5466 /* Generate all pending far branches,
5467 and free our references to the far labels. */
5468 while (far_branch_list)
5470 if (far_branch_list->near_label
5471 && ! NEXT_INSN (far_branch_list->near_label))
5472 gen_far_branch (far_branch_list);
5473 if (optimize
5474 && far_branch_list->far_label
5475 && ! --LABEL_NUSES (far_branch_list->far_label))
5476 delete_insn (far_branch_list->far_label);
5477 far_branch_list = far_branch_list->prev;
5480 /* Instruction length information is no longer valid due to the new
5481 instructions that have been generated. */
5482 init_insn_lengths ();
5485 /* Dump out instruction addresses, which is useful for debugging the
5486 constant pool table stuff.
5488 If relaxing, output the label and pseudo-ops used to link together
5489 calls and the instruction which set the registers. */
5491 /* ??? The addresses printed by this routine for insns are nonsense for
5492 insns which are inside of a sequence where none of the inner insns have
5493 variable length. This is because the second pass of shorten_branches
5494 does not bother to update them. */
5496 void
5497 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5498 int noperands ATTRIBUTE_UNUSED)
5500 if (TARGET_DUMPISIZE)
5501 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5503 if (TARGET_RELAX)
5505 rtx note;
5507 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5508 if (note)
5510 rtx pattern;
5512 pattern = PATTERN (insn);
5513 if (GET_CODE (pattern) == PARALLEL)
5514 pattern = XVECEXP (pattern, 0, 0);
5515 switch (GET_CODE (pattern))
5517 case SET:
5518 if (GET_CODE (SET_SRC (pattern)) != CALL
5519 && get_attr_type (insn) != TYPE_SFUNC)
5521 targetm.asm_out.internal_label
5522 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5523 break;
5525 /* else FALLTHROUGH */
5526 case CALL:
5527 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5528 CODE_LABEL_NUMBER (XEXP (note, 0)));
5529 break;
5531 default:
5532 gcc_unreachable ();
5538 /* Dump out any constants accumulated in the final pass. These will
5539 only be labels. */
5541 const char *
5542 output_jump_label_table (void)
5544 int i;
5546 if (pool_size)
5548 fprintf (asm_out_file, "\t.align 2\n");
5549 for (i = 0; i < pool_size; i++)
5551 pool_node *p = &pool_vector[i];
5553 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5554 CODE_LABEL_NUMBER (p->label));
5555 output_asm_insn (".long %O0", &p->value);
5557 pool_size = 0;
5560 return "";
5563 /* A full frame looks like:
5565 arg-5
5566 arg-4
5567 [ if current_function_anonymous_args
5568 arg-3
5569 arg-2
5570 arg-1
5571 arg-0 ]
5572 saved-fp
5573 saved-r10
5574 saved-r11
5575 saved-r12
5576 saved-pr
5577 local-n
5579 local-1
5580 local-0 <- fp points here. */
5582 /* Number of bytes pushed for anonymous args, used to pass information
5583 between expand_prologue and expand_epilogue. */
5585 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5586 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5587 for an epilogue and a negative value means that it's for a sibcall
5588 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5589 all the registers that are about to be restored, and hence dead. */
5591 static void
5592 output_stack_adjust (int size, rtx reg, int epilogue_p,
5593 HARD_REG_SET *live_regs_mask)
5595 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5596 if (size)
5598 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5600 /* This test is bogus, as output_stack_adjust is used to re-align the
5601 stack. */
5602 #if 0
5603 gcc_assert (!(size % align));
5604 #endif
5606 if (CONST_OK_FOR_ADD (size))
5607 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5608 /* Try to do it with two partial adjustments; however, we must make
5609 sure that the stack is properly aligned at all times, in case
5610 an interrupt occurs between the two partial adjustments. */
5611 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5612 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5614 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5615 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5617 else
5619 rtx const_reg;
5620 rtx insn;
5621 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5622 int i;
5624 /* If TEMP is invalid, we could temporarily save a general
5625 register to MACL. However, there is currently no need
5626 to handle this case, so just die when we see it. */
5627 if (epilogue_p < 0
5628 || current_function_interrupt
5629 || ! call_really_used_regs[temp] || fixed_regs[temp])
5630 temp = -1;
5631 if (temp < 0 && ! current_function_interrupt
5632 && (TARGET_SHMEDIA || epilogue_p >= 0))
5634 HARD_REG_SET temps;
5635 COPY_HARD_REG_SET (temps, call_used_reg_set);
5636 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5637 if (epilogue_p > 0)
5639 int nreg = 0;
5640 if (crtl->return_rtx)
5642 enum machine_mode mode;
5643 mode = GET_MODE (crtl->return_rtx);
5644 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5645 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5647 for (i = 0; i < nreg; i++)
5648 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5649 if (crtl->calls_eh_return)
5651 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5652 for (i = 0; i <= 3; i++)
5653 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5656 if (TARGET_SHMEDIA && epilogue_p < 0)
5657 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5658 CLEAR_HARD_REG_BIT (temps, i);
5659 if (epilogue_p <= 0)
5661 for (i = FIRST_PARM_REG;
5662 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5663 CLEAR_HARD_REG_BIT (temps, i);
5664 if (cfun->static_chain_decl != NULL)
5665 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5667 temp = scavenge_reg (&temps);
5669 if (temp < 0 && live_regs_mask)
5671 HARD_REG_SET temps;
5673 COPY_HARD_REG_SET (temps, *live_regs_mask);
5674 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5675 temp = scavenge_reg (&temps);
5677 if (temp < 0)
5679 rtx adj_reg, tmp_reg, mem;
5681 /* If we reached here, the most likely case is the (sibcall)
5682 epilogue for non SHmedia. Put a special push/pop sequence
5683 for such case as the last resort. This looks lengthy but
5684 would not be problem because it seems to be very
5685 rare. */
5687 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5690 /* ??? There is still the slight possibility that r4 or
5691 r5 have been reserved as fixed registers or assigned
5692 as global registers, and they change during an
5693 interrupt. There are possible ways to handle this:
5695 - If we are adjusting the frame pointer (r14), we can do
5696 with a single temp register and an ordinary push / pop
5697 on the stack.
5698 - Grab any call-used or call-saved registers (i.e. not
5699 fixed or globals) for the temps we need. We might
5700 also grab r14 if we are adjusting the stack pointer.
5701 If we can't find enough available registers, issue
5702 a diagnostic and die - the user must have reserved
5703 way too many registers.
5704 But since all this is rather unlikely to happen and
5705 would require extra testing, we just die if r4 / r5
5706 are not available. */
5707 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5708 && !global_regs[4] && !global_regs[5]);
5710 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5711 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5712 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5713 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5714 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5715 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5716 emit_move_insn (mem, tmp_reg);
5717 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5718 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5719 emit_move_insn (mem, tmp_reg);
5720 emit_move_insn (reg, adj_reg);
5721 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5722 emit_move_insn (adj_reg, mem);
5723 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5724 emit_move_insn (tmp_reg, mem);
5725 /* Tell flow the insns that pop r4/r5 aren't dead. */
5726 emit_insn (gen_rtx_USE (VOIDmode, tmp_reg));
5727 emit_insn (gen_rtx_USE (VOIDmode, adj_reg));
5728 return;
5730 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5732 /* If SIZE is negative, subtract the positive value.
5733 This sometimes allows a constant pool entry to be shared
5734 between prologue and epilogue code. */
5735 if (size < 0)
5737 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5738 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5740 else
5742 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5743 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5745 if (! epilogue_p)
5746 REG_NOTES (insn)
5747 = (gen_rtx_EXPR_LIST
5748 (REG_FRAME_RELATED_EXPR,
5749 gen_rtx_SET (VOIDmode, reg,
5750 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5751 REG_NOTES (insn)));
5756 static rtx
5757 frame_insn (rtx x)
5759 x = emit_insn (x);
5760 RTX_FRAME_RELATED_P (x) = 1;
5761 return x;
5764 /* Output RTL to push register RN onto the stack. */
5766 static rtx
5767 push (int rn)
5769 rtx x;
5770 if (rn == FPUL_REG)
5771 x = gen_push_fpul ();
5772 else if (rn == FPSCR_REG)
5773 x = gen_push_fpscr ();
5774 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5775 && FP_OR_XD_REGISTER_P (rn))
5777 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5778 return NULL_RTX;
5779 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5781 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5782 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5783 else
5784 x = gen_push (gen_rtx_REG (SImode, rn));
5786 x = frame_insn (x);
5787 REG_NOTES (x)
5788 = gen_rtx_EXPR_LIST (REG_INC,
5789 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5790 return x;
5793 /* Output RTL to pop register RN from the stack. */
5795 static void
5796 pop (int rn)
5798 rtx x;
5799 if (rn == FPUL_REG)
5800 x = gen_pop_fpul ();
5801 else if (rn == FPSCR_REG)
5802 x = gen_pop_fpscr ();
5803 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5804 && FP_OR_XD_REGISTER_P (rn))
5806 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5807 return;
5808 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5810 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5811 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5812 else
5813 x = gen_pop (gen_rtx_REG (SImode, rn));
5815 x = emit_insn (x);
5816 REG_NOTES (x)
5817 = gen_rtx_EXPR_LIST (REG_INC,
5818 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5821 /* Generate code to push the regs specified in the mask. */
5823 static void
5824 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5826 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5827 int skip_fpscr = 0;
5829 /* Push PR last; this gives better latencies after the prologue, and
5830 candidates for the return delay slot when there are no general
5831 registers pushed. */
5832 for (; i < FIRST_PSEUDO_REGISTER; i++)
5834 /* If this is an interrupt handler, and the SZ bit varies,
5835 and we have to push any floating point register, we need
5836 to switch to the correct precision first. */
5837 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5838 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5840 HARD_REG_SET unsaved;
5842 push (FPSCR_REG);
5843 COMPL_HARD_REG_SET (unsaved, *mask);
5844 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5845 skip_fpscr = 1;
5847 if (i != PR_REG
5848 && (i != FPSCR_REG || ! skip_fpscr)
5849 && TEST_HARD_REG_BIT (*mask, i))
5851 /* If the ISR has RESBANK attribute assigned, don't push any of
5852 the following registers - R0-R14, MACH, MACL and GBR. */
5853 if (! (sh_cfun_resbank_handler_p ()
5854 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
5855 || i == MACH_REG
5856 || i == MACL_REG
5857 || i == GBR_REG)))
5858 push (i);
5862 /* Push banked registers last to improve delay slot opportunities. */
5863 if (interrupt_handler)
5864 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5865 if (TEST_HARD_REG_BIT (*mask, i))
5866 push (i);
5868 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
5869 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
5870 push (PR_REG);
5873 /* Calculate how much extra space is needed to save all callee-saved
5874 target registers.
5875 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5877 static int
5878 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5880 int reg;
5881 int stack_space = 0;
5882 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5884 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5885 if ((! call_really_used_regs[reg] || interrupt_handler)
5886 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5887 /* Leave space to save this target register on the stack,
5888 in case target register allocation wants to use it. */
5889 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5890 return stack_space;
5893 /* Decide whether we should reserve space for callee-save target registers,
5894 in case target register allocation wants to use them. REGS_SAVED is
5895 the space, in bytes, that is already required for register saves.
5896 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5898 static int
5899 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5900 HARD_REG_SET *live_regs_mask)
5902 if (optimize_size)
5903 return 0;
5904 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5907 /* Decide how much space to reserve for callee-save target registers
5908 in case target register allocation wants to use them.
5909 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5911 static int
5912 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5914 if (shmedia_space_reserved_for_target_registers)
5915 return shmedia_target_regs_stack_space (live_regs_mask);
5916 else
5917 return 0;
5920 /* Work out the registers which need to be saved, both as a mask and a
5921 count of saved words. Return the count.
5923 If doing a pragma interrupt function, then push all regs used by the
5924 function, and if we call another function (we can tell by looking at PR),
5925 make sure that all the regs it clobbers are safe too. */
5927 static int
5928 calc_live_regs (HARD_REG_SET *live_regs_mask)
5930 unsigned int reg;
5931 int count;
5932 tree attrs;
5933 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5934 bool nosave_low_regs;
5935 int pr_live, has_call;
5937 attrs = DECL_ATTRIBUTES (current_function_decl);
5938 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5939 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5940 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5941 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5943 CLEAR_HARD_REG_SET (*live_regs_mask);
5944 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5945 && df_regs_ever_live_p (FPSCR_REG))
5946 target_flags &= ~MASK_FPU_SINGLE;
5947 /* If we can save a lot of saves by switching to double mode, do that. */
5948 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5949 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5950 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
5951 && (! call_really_used_regs[reg]
5952 || interrupt_handler)
5953 && ++count > 2)
5955 target_flags &= ~MASK_FPU_SINGLE;
5956 break;
5958 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5959 knows how to use it. That means the pseudo originally allocated for
5960 the initial value can become the PR_MEDIA_REG hard register, as seen for
5961 execute/20010122-1.c:test9. */
5962 if (TARGET_SHMEDIA)
5963 /* ??? this function is called from initial_elimination_offset, hence we
5964 can't use the result of sh_media_register_for_return here. */
5965 pr_live = sh_pr_n_sets ();
5966 else
5968 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5969 pr_live = (pr_initial
5970 ? (GET_CODE (pr_initial) != REG
5971 || REGNO (pr_initial) != (PR_REG))
5972 : df_regs_ever_live_p (PR_REG));
5973 /* For Shcompact, if not optimizing, we end up with a memory reference
5974 using the return address pointer for __builtin_return_address even
5975 though there is no actual need to put the PR register on the stack. */
5976 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
5978 /* Force PR to be live if the prologue has to call the SHmedia
5979 argument decoder or register saver. */
5980 if (TARGET_SHCOMPACT
5981 && ((crtl->args.info.call_cookie
5982 & ~ CALL_COOKIE_RET_TRAMP (1))
5983 || crtl->saves_all_registers))
5984 pr_live = 1;
5985 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5986 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5988 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5989 ? pr_live
5990 : interrupt_handler
5991 ? (/* Need to save all the regs ever live. */
5992 (df_regs_ever_live_p (reg)
5993 || (call_really_used_regs[reg]
5994 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5995 || reg == PIC_OFFSET_TABLE_REGNUM)
5996 && has_call)
5997 || (TARGET_SHMEDIA && has_call
5998 && REGISTER_NATURAL_MODE (reg) == SImode
5999 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6000 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6001 && reg != RETURN_ADDRESS_POINTER_REGNUM
6002 && reg != T_REG && reg != GBR_REG
6003 /* Push fpscr only on targets which have FPU */
6004 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6005 : (/* Only push those regs which are used and need to be saved. */
6006 (TARGET_SHCOMPACT
6007 && flag_pic
6008 && crtl->args.info.call_cookie
6009 && reg == PIC_OFFSET_TABLE_REGNUM)
6010 || (df_regs_ever_live_p (reg)
6011 && (!call_really_used_regs[reg]
6012 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6013 || (crtl->calls_eh_return
6014 && (reg == EH_RETURN_DATA_REGNO (0)
6015 || reg == EH_RETURN_DATA_REGNO (1)
6016 || reg == EH_RETURN_DATA_REGNO (2)
6017 || reg == EH_RETURN_DATA_REGNO (3)))
6018 || ((reg == MACL_REG || reg == MACH_REG)
6019 && df_regs_ever_live_p (reg)
6020 && sh_cfun_attr_renesas_p ())
6023 SET_HARD_REG_BIT (*live_regs_mask, reg);
6024 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6026 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6027 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6029 if (FP_REGISTER_P (reg))
6031 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6033 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6034 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6037 else if (XD_REGISTER_P (reg))
6039 /* Must switch to double mode to access these registers. */
6040 target_flags &= ~MASK_FPU_SINGLE;
6044 if (nosave_low_regs && reg == R8_REG)
6045 break;
6047 /* If we have a target register optimization pass after prologue / epilogue
6048 threading, we need to assume all target registers will be live even if
6049 they aren't now. */
6050 if (flag_branch_target_load_optimize2
6051 && TARGET_SAVE_ALL_TARGET_REGS
6052 && shmedia_space_reserved_for_target_registers)
6053 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6054 if ((! call_really_used_regs[reg] || interrupt_handler)
6055 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6057 SET_HARD_REG_BIT (*live_regs_mask, reg);
6058 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6060 /* If this is an interrupt handler, we don't have any call-clobbered
6061 registers we can conveniently use for target register save/restore.
6062 Make sure we save at least one general purpose register when we need
6063 to save target registers. */
6064 if (interrupt_handler
6065 && hard_reg_set_intersect_p (*live_regs_mask,
6066 reg_class_contents[TARGET_REGS])
6067 && ! hard_reg_set_intersect_p (*live_regs_mask,
6068 reg_class_contents[GENERAL_REGS]))
6070 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6071 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6074 return count;
6077 /* Code to generate prologue and epilogue sequences */
6079 /* PUSHED is the number of bytes that are being pushed on the
6080 stack for register saves. Return the frame size, padded
6081 appropriately so that the stack stays properly aligned. */
6082 static HOST_WIDE_INT
6083 rounded_frame_size (int pushed)
6085 HOST_WIDE_INT size = get_frame_size ();
6086 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6088 return ((size + pushed + align - 1) & -align) - pushed;
6091 /* Choose a call-clobbered target-branch register that remains
6092 unchanged along the whole function. We set it up as the return
6093 value in the prologue. */
6095 sh_media_register_for_return (void)
6097 int regno;
6098 int tr0_used;
6100 if (! current_function_is_leaf)
6101 return -1;
6102 if (lookup_attribute ("interrupt_handler",
6103 DECL_ATTRIBUTES (current_function_decl)))
6104 return -1;
6105 if (sh_cfun_interrupt_handler_p ())
6106 return -1;
6108 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6110 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6111 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6112 return regno;
6114 return -1;
6117 /* The maximum registers we need to save are:
6118 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6119 - 32 floating point registers (for each pair, we save none,
6120 one single precision value, or a double precision value).
6121 - 8 target registers
6122 - add 1 entry for a delimiter. */
6123 #define MAX_SAVED_REGS (62+32+8)
6125 typedef struct save_entry_s
6127 unsigned char reg;
6128 unsigned char mode;
6129 short offset;
6130 } save_entry;
6132 #define MAX_TEMPS 4
6134 /* There will be a delimiter entry with VOIDmode both at the start and the
6135 end of a filled in schedule. The end delimiter has the offset of the
6136 save with the smallest (i.e. most negative) offset. */
6137 typedef struct save_schedule_s
6139 save_entry entries[MAX_SAVED_REGS + 2];
6140 int temps[MAX_TEMPS+1];
6141 } save_schedule;
6143 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6144 use reverse order. Returns the last entry written to (not counting
6145 the delimiter). OFFSET_BASE is a number to be added to all offset
6146 entries. */
6148 static save_entry *
6149 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6150 int offset_base)
6152 int align, i;
6153 save_entry *entry = schedule->entries;
6154 int tmpx = 0;
6155 int offset;
6157 if (! current_function_interrupt)
6158 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6159 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6160 && ! FUNCTION_ARG_REGNO_P (i)
6161 && i != FIRST_RET_REG
6162 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6163 && ! (crtl->calls_eh_return
6164 && (i == EH_RETURN_STACKADJ_REGNO
6165 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6166 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6167 schedule->temps[tmpx++] = i;
6168 entry->reg = -1;
6169 entry->mode = VOIDmode;
6170 entry->offset = offset_base;
6171 entry++;
6172 /* We loop twice: first, we save 8-byte aligned registers in the
6173 higher addresses, that are known to be aligned. Then, we
6174 proceed to saving 32-bit registers that don't need 8-byte
6175 alignment.
6176 If this is an interrupt function, all registers that need saving
6177 need to be saved in full. moreover, we need to postpone saving
6178 target registers till we have saved some general purpose registers
6179 we can then use as scratch registers. */
6180 offset = offset_base;
6181 for (align = 1; align >= 0; align--)
6183 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6184 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6186 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6187 int reg = i;
6189 if (current_function_interrupt)
6191 if (TARGET_REGISTER_P (i))
6192 continue;
6193 if (GENERAL_REGISTER_P (i))
6194 mode = DImode;
6196 if (mode == SFmode && (i % 2) == 1
6197 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6198 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6200 mode = DFmode;
6201 i--;
6202 reg--;
6205 /* If we're doing the aligned pass and this is not aligned,
6206 or we're doing the unaligned pass and this is aligned,
6207 skip it. */
6208 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6209 != align)
6210 continue;
6212 if (current_function_interrupt
6213 && GENERAL_REGISTER_P (i)
6214 && tmpx < MAX_TEMPS)
6215 schedule->temps[tmpx++] = i;
6217 offset -= GET_MODE_SIZE (mode);
6218 entry->reg = i;
6219 entry->mode = mode;
6220 entry->offset = offset;
6221 entry++;
6223 if (align && current_function_interrupt)
6224 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6225 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6227 offset -= GET_MODE_SIZE (DImode);
6228 entry->reg = i;
6229 entry->mode = DImode;
6230 entry->offset = offset;
6231 entry++;
6234 entry->reg = -1;
6235 entry->mode = VOIDmode;
6236 entry->offset = offset;
6237 schedule->temps[tmpx] = -1;
6238 return entry - 1;
6241 void
6242 sh_expand_prologue (void)
6244 HARD_REG_SET live_regs_mask;
6245 int d, i;
6246 int d_rounding = 0;
6247 int save_flags = target_flags;
6248 int pretend_args;
6249 tree sp_switch_attr
6250 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6252 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6254 /* We have pretend args if we had an object sent partially in registers
6255 and partially on the stack, e.g. a large structure. */
6256 pretend_args = crtl->args.pretend_args_size;
6257 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6258 && (NPARM_REGS(SImode)
6259 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6260 pretend_args = 0;
6261 output_stack_adjust (-pretend_args
6262 - crtl->args.info.stack_regs * 8,
6263 stack_pointer_rtx, 0, NULL);
6265 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6266 /* We're going to use the PIC register to load the address of the
6267 incoming-argument decoder and/or of the return trampoline from
6268 the GOT, so make sure the PIC register is preserved and
6269 initialized. */
6270 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6272 if (TARGET_SHCOMPACT
6273 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6275 int reg;
6277 /* First, make all registers with incoming arguments that will
6278 be pushed onto the stack live, so that register renaming
6279 doesn't overwrite them. */
6280 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6281 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6282 >= NPARM_REGS (SImode) - reg)
6283 for (; reg < NPARM_REGS (SImode); reg++)
6284 emit_insn (gen_shcompact_preserve_incoming_args
6285 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6286 else if (CALL_COOKIE_INT_REG_GET
6287 (crtl->args.info.call_cookie, reg) == 1)
6288 emit_insn (gen_shcompact_preserve_incoming_args
6289 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6291 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6292 stack_pointer_rtx);
6293 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6294 GEN_INT (crtl->args.info.call_cookie));
6295 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6296 gen_rtx_REG (SImode, R0_REG));
6298 else if (TARGET_SHMEDIA)
6300 int tr = sh_media_register_for_return ();
6302 if (tr >= 0)
6303 emit_move_insn (gen_rtx_REG (DImode, tr),
6304 gen_rtx_REG (DImode, PR_MEDIA_REG));
6307 /* Emit the code for SETUP_VARARGS. */
6308 if (cfun->stdarg)
6310 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6312 /* Push arg regs as if they'd been provided by caller in stack. */
6313 for (i = 0; i < NPARM_REGS(SImode); i++)
6315 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6316 rtx insn;
6318 if (i >= (NPARM_REGS(SImode)
6319 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6321 break;
6322 insn = push (rn);
6327 /* If we're supposed to switch stacks at function entry, do so now. */
6328 if (sp_switch_attr)
6330 /* The argument specifies a variable holding the address of the
6331 stack the interrupt function should switch to/from at entry/exit. */
6332 const char *s
6333 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6334 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6336 emit_insn (gen_sp_switch_1 (sp_switch));
6339 d = calc_live_regs (&live_regs_mask);
6340 /* ??? Maybe we could save some switching if we can move a mode switch
6341 that already happens to be at the function start into the prologue. */
6342 if (target_flags != save_flags && ! current_function_interrupt)
6343 emit_insn (gen_toggle_sz ());
6345 if (TARGET_SH5)
6347 int offset_base, offset;
6348 rtx r0 = NULL_RTX;
6349 int offset_in_r0 = -1;
6350 int sp_in_r0 = 0;
6351 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6352 int total_size, save_size;
6353 save_schedule schedule;
6354 save_entry *entry;
6355 int *tmp_pnt;
6357 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6358 && ! current_function_interrupt)
6359 r0 = gen_rtx_REG (Pmode, R0_REG);
6361 /* D is the actual number of bytes that we need for saving registers,
6362 however, in initial_elimination_offset we have committed to using
6363 an additional TREGS_SPACE amount of bytes - in order to keep both
6364 addresses to arguments supplied by the caller and local variables
6365 valid, we must keep this gap. Place it between the incoming
6366 arguments and the actually saved registers in a bid to optimize
6367 locality of reference. */
6368 total_size = d + tregs_space;
6369 total_size += rounded_frame_size (total_size);
6370 save_size = total_size - rounded_frame_size (d);
6371 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6372 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6373 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6375 /* If adjusting the stack in a single step costs nothing extra, do so.
6376 I.e. either if a single addi is enough, or we need a movi anyway,
6377 and we don't exceed the maximum offset range (the test for the
6378 latter is conservative for simplicity). */
6379 if (TARGET_SHMEDIA
6380 && (CONST_OK_FOR_I10 (-total_size)
6381 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6382 && total_size <= 2044)))
6383 d_rounding = total_size - save_size;
6385 offset_base = d + d_rounding;
6387 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6388 0, NULL);
6390 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6391 tmp_pnt = schedule.temps;
6392 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6394 enum machine_mode mode = entry->mode;
6395 unsigned int reg = entry->reg;
6396 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6397 rtx orig_reg_rtx;
6399 offset = entry->offset;
6401 reg_rtx = gen_rtx_REG (mode, reg);
6403 mem_rtx = gen_frame_mem (mode,
6404 gen_rtx_PLUS (Pmode,
6405 stack_pointer_rtx,
6406 GEN_INT (offset)));
6408 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6410 gcc_assert (r0);
6411 mem_rtx = NULL_RTX;
6413 try_pre_dec:
6415 if (HAVE_PRE_DECREMENT
6416 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6417 || mem_rtx == NULL_RTX
6418 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6420 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6422 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6423 pre_dec_ok);
6425 pre_dec = NULL_RTX;
6427 break;
6429 pre_dec_ok:
6430 mem_rtx = NULL_RTX;
6431 offset += GET_MODE_SIZE (mode);
6433 while (0);
6435 if (mem_rtx != NULL_RTX)
6436 goto addr_ok;
6438 if (offset_in_r0 == -1)
6440 emit_move_insn (r0, GEN_INT (offset));
6441 offset_in_r0 = offset;
6443 else if (offset != offset_in_r0)
6445 emit_move_insn (r0,
6446 gen_rtx_PLUS
6447 (Pmode, r0,
6448 GEN_INT (offset - offset_in_r0)));
6449 offset_in_r0 += offset - offset_in_r0;
6452 if (pre_dec != NULL_RTX)
6454 if (! sp_in_r0)
6456 emit_move_insn (r0,
6457 gen_rtx_PLUS
6458 (Pmode, r0, stack_pointer_rtx));
6459 sp_in_r0 = 1;
6462 offset -= GET_MODE_SIZE (mode);
6463 offset_in_r0 -= GET_MODE_SIZE (mode);
6465 mem_rtx = pre_dec;
6467 else if (sp_in_r0)
6468 mem_rtx = gen_frame_mem (mode, r0);
6469 else
6470 mem_rtx = gen_frame_mem (mode,
6471 gen_rtx_PLUS (Pmode,
6472 stack_pointer_rtx,
6473 r0));
6475 /* We must not use an r0-based address for target-branch
6476 registers or for special registers without pre-dec
6477 memory addresses, since we store their values in r0
6478 first. */
6479 gcc_assert (!TARGET_REGISTER_P (reg)
6480 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6481 || mem_rtx == pre_dec));
6483 addr_ok:
6484 orig_reg_rtx = reg_rtx;
6485 if (TARGET_REGISTER_P (reg)
6486 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6487 && mem_rtx != pre_dec))
6489 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6491 emit_move_insn (tmp_reg, reg_rtx);
6493 if (REGNO (tmp_reg) == R0_REG)
6495 offset_in_r0 = -1;
6496 sp_in_r0 = 0;
6497 gcc_assert (!refers_to_regno_p
6498 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6501 if (*++tmp_pnt <= 0)
6502 tmp_pnt = schedule.temps;
6504 reg_rtx = tmp_reg;
6507 rtx insn;
6509 /* Mark as interesting for dwarf cfi generator */
6510 insn = emit_move_insn (mem_rtx, reg_rtx);
6511 RTX_FRAME_RELATED_P (insn) = 1;
6512 /* If we use an intermediate register for the save, we can't
6513 describe this exactly in cfi as a copy of the to-be-saved
6514 register into the temporary register and then the temporary
6515 register on the stack, because the temporary register can
6516 have a different natural size than the to-be-saved register.
6517 Thus, we gloss over the intermediate copy and pretend we do
6518 a direct save from the to-be-saved register. */
6519 if (REGNO (reg_rtx) != reg)
6521 rtx set, note_rtx;
6523 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6524 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6525 REG_NOTES (insn));
6526 REG_NOTES (insn) = note_rtx;
6529 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6531 rtx reg_rtx = gen_rtx_REG (mode, reg);
6532 rtx set, note_rtx;
6533 rtx mem_rtx = gen_frame_mem (mode,
6534 gen_rtx_PLUS (Pmode,
6535 stack_pointer_rtx,
6536 GEN_INT (offset)));
6538 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6539 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6540 REG_NOTES (insn));
6541 REG_NOTES (insn) = note_rtx;
6546 gcc_assert (entry->offset == d_rounding);
6548 else
6549 push_regs (&live_regs_mask, current_function_interrupt);
6551 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6552 emit_insn (gen_GOTaddr2picreg ());
6554 if (SHMEDIA_REGS_STACK_ADJUST ())
6556 /* This must NOT go through the PLT, otherwise mach and macl
6557 may be clobbered. */
6558 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6559 (TARGET_FPU_ANY
6560 ? "__GCC_push_shmedia_regs"
6561 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6562 emit_insn (gen_shmedia_save_restore_regs_compact
6563 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6566 if (target_flags != save_flags && ! current_function_interrupt)
6567 emit_insn (gen_toggle_sz ());
6569 target_flags = save_flags;
6571 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6572 stack_pointer_rtx, 0, NULL);
6574 if (frame_pointer_needed)
6575 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6577 if (TARGET_SHCOMPACT
6578 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6580 /* This must NOT go through the PLT, otherwise mach and macl
6581 may be clobbered. */
6582 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6583 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6584 emit_insn (gen_shcompact_incoming_args ());
6588 void
6589 sh_expand_epilogue (bool sibcall_p)
6591 HARD_REG_SET live_regs_mask;
6592 int d, i;
6593 int d_rounding = 0;
6595 int save_flags = target_flags;
6596 int frame_size, save_size;
6597 int fpscr_deferred = 0;
6598 int e = sibcall_p ? -1 : 1;
6600 d = calc_live_regs (&live_regs_mask);
6602 save_size = d;
6603 frame_size = rounded_frame_size (d);
6605 if (TARGET_SH5)
6607 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6608 int total_size;
6609 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6610 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6611 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6613 total_size = d + tregs_space;
6614 total_size += rounded_frame_size (total_size);
6615 save_size = total_size - frame_size;
6617 /* If adjusting the stack in a single step costs nothing extra, do so.
6618 I.e. either if a single addi is enough, or we need a movi anyway,
6619 and we don't exceed the maximum offset range (the test for the
6620 latter is conservative for simplicity). */
6621 if (TARGET_SHMEDIA
6622 && ! frame_pointer_needed
6623 && (CONST_OK_FOR_I10 (total_size)
6624 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6625 && total_size <= 2044)))
6626 d_rounding = frame_size;
6628 frame_size -= d_rounding;
6631 if (frame_pointer_needed)
6633 /* We must avoid scheduling the epilogue with previous basic blocks
6634 when exception handling is enabled. See PR/18032. */
6635 if (flag_exceptions)
6636 emit_insn (gen_blockage ());
6637 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6638 &live_regs_mask);
6640 /* We must avoid moving the stack pointer adjustment past code
6641 which reads from the local frame, else an interrupt could
6642 occur after the SP adjustment and clobber data in the local
6643 frame. */
6644 emit_insn (gen_blockage ());
6645 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6647 else if (frame_size)
6649 /* We must avoid moving the stack pointer adjustment past code
6650 which reads from the local frame, else an interrupt could
6651 occur after the SP adjustment and clobber data in the local
6652 frame. */
6653 emit_insn (gen_blockage ());
6654 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6657 if (SHMEDIA_REGS_STACK_ADJUST ())
6659 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6660 (TARGET_FPU_ANY
6661 ? "__GCC_pop_shmedia_regs"
6662 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6663 /* This must NOT go through the PLT, otherwise mach and macl
6664 may be clobbered. */
6665 emit_insn (gen_shmedia_save_restore_regs_compact
6666 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6669 /* Pop all the registers. */
6671 if (target_flags != save_flags && ! current_function_interrupt)
6672 emit_insn (gen_toggle_sz ());
6673 if (TARGET_SH5)
6675 int offset_base, offset;
6676 int offset_in_r0 = -1;
6677 int sp_in_r0 = 0;
6678 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6679 save_schedule schedule;
6680 save_entry *entry;
6681 int *tmp_pnt;
6683 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6684 offset_base = -entry[1].offset + d_rounding;
6685 tmp_pnt = schedule.temps;
6686 for (; entry->mode != VOIDmode; entry--)
6688 enum machine_mode mode = entry->mode;
6689 int reg = entry->reg;
6690 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6692 offset = offset_base + entry->offset;
6693 reg_rtx = gen_rtx_REG (mode, reg);
6695 mem_rtx = gen_frame_mem (mode,
6696 gen_rtx_PLUS (Pmode,
6697 stack_pointer_rtx,
6698 GEN_INT (offset)));
6700 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6702 mem_rtx = NULL_RTX;
6704 try_post_inc:
6706 if (HAVE_POST_INCREMENT
6707 && (offset == offset_in_r0
6708 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6709 && mem_rtx == NULL_RTX)
6710 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6712 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6714 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6715 post_inc_ok);
6717 post_inc = NULL_RTX;
6719 break;
6721 post_inc_ok:
6722 mem_rtx = NULL_RTX;
6724 while (0);
6726 if (mem_rtx != NULL_RTX)
6727 goto addr_ok;
6729 if (offset_in_r0 == -1)
6731 emit_move_insn (r0, GEN_INT (offset));
6732 offset_in_r0 = offset;
6734 else if (offset != offset_in_r0)
6736 emit_move_insn (r0,
6737 gen_rtx_PLUS
6738 (Pmode, r0,
6739 GEN_INT (offset - offset_in_r0)));
6740 offset_in_r0 += offset - offset_in_r0;
6743 if (post_inc != NULL_RTX)
6745 if (! sp_in_r0)
6747 emit_move_insn (r0,
6748 gen_rtx_PLUS
6749 (Pmode, r0, stack_pointer_rtx));
6750 sp_in_r0 = 1;
6753 mem_rtx = post_inc;
6755 offset_in_r0 += GET_MODE_SIZE (mode);
6757 else if (sp_in_r0)
6758 mem_rtx = gen_frame_mem (mode, r0);
6759 else
6760 mem_rtx = gen_frame_mem (mode,
6761 gen_rtx_PLUS (Pmode,
6762 stack_pointer_rtx,
6763 r0));
6765 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6766 || mem_rtx == post_inc);
6768 addr_ok:
6769 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6770 && mem_rtx != post_inc)
6772 insn = emit_move_insn (r0, mem_rtx);
6773 mem_rtx = r0;
6775 else if (TARGET_REGISTER_P (reg))
6777 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6779 /* Give the scheduler a bit of freedom by using up to
6780 MAX_TEMPS registers in a round-robin fashion. */
6781 insn = emit_move_insn (tmp_reg, mem_rtx);
6782 mem_rtx = tmp_reg;
6783 if (*++tmp_pnt < 0)
6784 tmp_pnt = schedule.temps;
6787 insn = emit_move_insn (reg_rtx, mem_rtx);
6790 gcc_assert (entry->offset + offset_base == d + d_rounding);
6792 else /* ! TARGET_SH5 */
6794 int last_reg;
6796 save_size = 0;
6797 /* For an ISR with RESBANK attribute assigned, don't pop PR
6798 register. */
6799 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
6800 && !sh_cfun_resbank_handler_p ())
6802 if (!frame_pointer_needed)
6803 emit_insn (gen_blockage ());
6804 pop (PR_REG);
6807 /* Banked registers are poped first to avoid being scheduled in the
6808 delay slot. RTE switches banks before the ds instruction. */
6809 if (current_function_interrupt)
6811 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6812 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6813 pop (LAST_BANKED_REG - i);
6815 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6817 else
6818 last_reg = FIRST_PSEUDO_REGISTER;
6820 for (i = 0; i < last_reg; i++)
6822 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6824 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6825 && hard_reg_set_intersect_p (live_regs_mask,
6826 reg_class_contents[DF_REGS]))
6827 fpscr_deferred = 1;
6828 /* For an ISR with RESBANK attribute assigned, don't pop
6829 following registers, R0-R14, MACH, MACL and GBR. */
6830 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
6831 && ! (sh_cfun_resbank_handler_p ()
6832 && ((j >= FIRST_GENERAL_REG
6833 && j < LAST_GENERAL_REG)
6834 || j == MACH_REG
6835 || j == MACL_REG
6836 || j == GBR_REG)))
6837 pop (j);
6839 if (j == FIRST_FP_REG && fpscr_deferred)
6840 pop (FPSCR_REG);
6843 if (target_flags != save_flags && ! current_function_interrupt)
6844 emit_insn (gen_toggle_sz ());
6845 target_flags = save_flags;
6847 output_stack_adjust (crtl->args.pretend_args_size
6848 + save_size + d_rounding
6849 + crtl->args.info.stack_regs * 8,
6850 stack_pointer_rtx, e, NULL);
6852 if (crtl->calls_eh_return)
6853 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6854 EH_RETURN_STACKADJ_RTX));
6856 /* Switch back to the normal stack if necessary. */
6857 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6858 emit_insn (gen_sp_switch_2 ());
6860 /* Tell flow the insn that pops PR isn't dead. */
6861 /* PR_REG will never be live in SHmedia mode, and we don't need to
6862 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6863 by the return pattern. */
6864 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6865 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6868 static int sh_need_epilogue_known = 0;
6871 sh_need_epilogue (void)
6873 if (! sh_need_epilogue_known)
6875 rtx epilogue;
6877 start_sequence ();
6878 sh_expand_epilogue (0);
6879 epilogue = get_insns ();
6880 end_sequence ();
6881 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6883 return sh_need_epilogue_known > 0;
6886 /* Emit code to change the current function's return address to RA.
6887 TEMP is available as a scratch register, if needed. */
6889 void
6890 sh_set_return_address (rtx ra, rtx tmp)
6892 HARD_REG_SET live_regs_mask;
6893 int d;
6894 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6895 int pr_offset;
6897 d = calc_live_regs (&live_regs_mask);
6899 /* If pr_reg isn't life, we can set it (or the register given in
6900 sh_media_register_for_return) directly. */
6901 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6903 rtx rr;
6905 if (TARGET_SHMEDIA)
6907 int rr_regno = sh_media_register_for_return ();
6909 if (rr_regno < 0)
6910 rr_regno = pr_reg;
6912 rr = gen_rtx_REG (DImode, rr_regno);
6914 else
6915 rr = gen_rtx_REG (SImode, pr_reg);
6917 emit_insn (GEN_MOV (rr, ra));
6918 /* Tell flow the register for return isn't dead. */
6919 emit_insn (gen_rtx_USE (VOIDmode, rr));
6920 return;
6923 if (TARGET_SH5)
6925 int offset;
6926 save_schedule schedule;
6927 save_entry *entry;
6929 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6930 offset = entry[1].offset;
6931 for (; entry->mode != VOIDmode; entry--)
6932 if (entry->reg == pr_reg)
6933 goto found;
6935 /* We can't find pr register. */
6936 gcc_unreachable ();
6938 found:
6939 offset = entry->offset - offset;
6940 pr_offset = (rounded_frame_size (d) + offset
6941 + SHMEDIA_REGS_STACK_ADJUST ());
6943 else
6944 pr_offset = rounded_frame_size (d);
6946 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6947 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6949 tmp = gen_frame_mem (Pmode, tmp);
6950 emit_insn (GEN_MOV (tmp, ra));
6953 /* Clear variables at function end. */
6955 static void
6956 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6957 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6959 sh_need_epilogue_known = 0;
6962 static rtx
6963 sh_builtin_saveregs (void)
6965 /* First unnamed integer register. */
6966 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
6967 /* Number of integer registers we need to save. */
6968 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6969 /* First unnamed SFmode float reg */
6970 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
6971 /* Number of SFmode float regs to save. */
6972 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6973 rtx regbuf, fpregs;
6974 int bufsize, regno;
6975 alias_set_type alias_set;
6977 if (TARGET_SH5)
6979 if (n_intregs)
6981 int pushregs = n_intregs;
6983 while (pushregs < NPARM_REGS (SImode) - 1
6984 && (CALL_COOKIE_INT_REG_GET
6985 (crtl->args.info.call_cookie,
6986 NPARM_REGS (SImode) - pushregs)
6987 == 1))
6989 crtl->args.info.call_cookie
6990 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6991 - pushregs, 1);
6992 pushregs++;
6995 if (pushregs == NPARM_REGS (SImode))
6996 crtl->args.info.call_cookie
6997 |= (CALL_COOKIE_INT_REG (0, 1)
6998 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6999 else
7000 crtl->args.info.call_cookie
7001 |= CALL_COOKIE_STACKSEQ (pushregs);
7003 crtl->args.pretend_args_size += 8 * n_intregs;
7005 if (TARGET_SHCOMPACT)
7006 return const0_rtx;
7009 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7011 error ("__builtin_saveregs not supported by this subtarget");
7012 return const0_rtx;
7015 if (TARGET_SHMEDIA)
7016 n_floatregs = 0;
7018 /* Allocate block of memory for the regs. */
7019 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7020 Or can assign_stack_local accept a 0 SIZE argument? */
7021 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7023 if (TARGET_SHMEDIA)
7024 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7025 else if (n_floatregs & 1)
7027 rtx addr;
7029 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7030 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7031 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7032 regbuf = change_address (regbuf, BLKmode, addr);
7034 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7036 rtx addr, mask;
7038 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7039 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7040 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7041 emit_insn (gen_andsi3 (addr, addr, mask));
7042 regbuf = change_address (regbuf, BLKmode, addr);
7044 else
7045 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7046 alias_set = get_varargs_alias_set ();
7047 set_mem_alias_set (regbuf, alias_set);
7049 /* Save int args.
7050 This is optimized to only save the regs that are necessary. Explicitly
7051 named args need not be saved. */
7052 if (n_intregs > 0)
7053 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7054 adjust_address (regbuf, BLKmode,
7055 n_floatregs * UNITS_PER_WORD),
7056 n_intregs);
7058 if (TARGET_SHMEDIA)
7059 /* Return the address of the regbuf. */
7060 return XEXP (regbuf, 0);
7062 /* Save float args.
7063 This is optimized to only save the regs that are necessary. Explicitly
7064 named args need not be saved.
7065 We explicitly build a pointer to the buffer because it halves the insn
7066 count when not optimizing (otherwise the pointer is built for each reg
7067 saved).
7068 We emit the moves in reverse order so that we can use predecrement. */
7070 fpregs = copy_to_mode_reg (Pmode,
7071 plus_constant (XEXP (regbuf, 0),
7072 n_floatregs * UNITS_PER_WORD));
7073 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7075 rtx mem;
7076 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7078 emit_insn (gen_addsi3 (fpregs, fpregs,
7079 GEN_INT (-2 * UNITS_PER_WORD)));
7080 mem = change_address (regbuf, DFmode, fpregs);
7081 emit_move_insn (mem,
7082 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7084 regno = first_floatreg;
7085 if (regno & 1)
7087 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7088 mem = change_address (regbuf, SFmode, fpregs);
7089 emit_move_insn (mem,
7090 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7091 - (TARGET_LITTLE_ENDIAN != 0)));
7094 else
7095 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7097 rtx mem;
7099 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7100 mem = change_address (regbuf, SFmode, fpregs);
7101 emit_move_insn (mem,
7102 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7105 /* Return the address of the regbuf. */
7106 return XEXP (regbuf, 0);
7109 /* Define the `__builtin_va_list' type for the ABI. */
7111 static tree
7112 sh_build_builtin_va_list (void)
7114 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7115 tree record;
7117 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7118 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7119 return ptr_type_node;
7121 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7123 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7124 ptr_type_node);
7125 f_next_o_limit = build_decl (FIELD_DECL,
7126 get_identifier ("__va_next_o_limit"),
7127 ptr_type_node);
7128 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7129 ptr_type_node);
7130 f_next_fp_limit = build_decl (FIELD_DECL,
7131 get_identifier ("__va_next_fp_limit"),
7132 ptr_type_node);
7133 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7134 ptr_type_node);
7136 DECL_FIELD_CONTEXT (f_next_o) = record;
7137 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7138 DECL_FIELD_CONTEXT (f_next_fp) = record;
7139 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7140 DECL_FIELD_CONTEXT (f_next_stack) = record;
7142 TYPE_FIELDS (record) = f_next_o;
7143 TREE_CHAIN (f_next_o) = f_next_o_limit;
7144 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7145 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7146 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7148 layout_type (record);
7150 return record;
7153 /* Implement `va_start' for varargs and stdarg. */
7155 static void
7156 sh_va_start (tree valist, rtx nextarg)
7158 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7159 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7160 tree t, u;
7161 int nfp, nint;
7163 if (TARGET_SH5)
7165 expand_builtin_saveregs ();
7166 std_expand_builtin_va_start (valist, nextarg);
7167 return;
7170 if ((! TARGET_SH2E && ! TARGET_SH4)
7171 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7173 std_expand_builtin_va_start (valist, nextarg);
7174 return;
7177 f_next_o = TYPE_FIELDS (va_list_type_node);
7178 f_next_o_limit = TREE_CHAIN (f_next_o);
7179 f_next_fp = TREE_CHAIN (f_next_o_limit);
7180 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7181 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7183 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7184 NULL_TREE);
7185 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7186 valist, f_next_o_limit, NULL_TREE);
7187 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7188 NULL_TREE);
7189 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7190 valist, f_next_fp_limit, NULL_TREE);
7191 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7192 valist, f_next_stack, NULL_TREE);
7194 /* Call __builtin_saveregs. */
7195 u = make_tree (sizetype, expand_builtin_saveregs ());
7196 u = fold_convert (ptr_type_node, u);
7197 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp, u);
7198 TREE_SIDE_EFFECTS (t) = 1;
7199 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7201 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7202 if (nfp < 8)
7203 nfp = 8 - nfp;
7204 else
7205 nfp = 0;
7206 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7207 size_int (UNITS_PER_WORD * nfp));
7208 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_limit, u);
7209 TREE_SIDE_EFFECTS (t) = 1;
7210 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7212 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o, u);
7213 TREE_SIDE_EFFECTS (t) = 1;
7214 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7216 nint = crtl->args.info.arg_count[SH_ARG_INT];
7217 if (nint < 4)
7218 nint = 4 - nint;
7219 else
7220 nint = 0;
7221 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7222 size_int (UNITS_PER_WORD * nint));
7223 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o_limit, u);
7224 TREE_SIDE_EFFECTS (t) = 1;
7225 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7227 u = make_tree (ptr_type_node, nextarg);
7228 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_stack, u);
7229 TREE_SIDE_EFFECTS (t) = 1;
7230 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7233 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7234 member, return it. */
7235 static tree
7236 find_sole_member (tree type)
7238 tree field, member = NULL_TREE;
7240 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7242 if (TREE_CODE (field) != FIELD_DECL)
7243 continue;
7244 if (!DECL_SIZE (field))
7245 return NULL_TREE;
7246 if (integer_zerop (DECL_SIZE (field)))
7247 continue;
7248 if (member)
7249 return NULL_TREE;
7250 member = field;
7252 return member;
7254 /* Implement `va_arg'. */
7256 static tree
7257 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
7258 tree *post_p ATTRIBUTE_UNUSED)
7260 HOST_WIDE_INT size, rsize;
7261 tree tmp, pptr_type_node;
7262 tree addr, lab_over = NULL, result = NULL;
7263 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7264 tree eff_type;
7266 if (pass_by_ref)
7267 type = build_pointer_type (type);
7269 size = int_size_in_bytes (type);
7270 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7271 pptr_type_node = build_pointer_type (ptr_type_node);
7273 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7274 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7276 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7277 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7278 int pass_as_float;
7279 tree lab_false;
7280 tree member;
7282 f_next_o = TYPE_FIELDS (va_list_type_node);
7283 f_next_o_limit = TREE_CHAIN (f_next_o);
7284 f_next_fp = TREE_CHAIN (f_next_o_limit);
7285 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7286 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7288 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7289 NULL_TREE);
7290 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7291 valist, f_next_o_limit, NULL_TREE);
7292 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7293 valist, f_next_fp, NULL_TREE);
7294 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7295 valist, f_next_fp_limit, NULL_TREE);
7296 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7297 valist, f_next_stack, NULL_TREE);
7299 /* Structures with a single member with a distinct mode are passed
7300 like their member. This is relevant if the latter has a REAL_TYPE
7301 or COMPLEX_TYPE type. */
7302 eff_type = type;
7303 while (TREE_CODE (eff_type) == RECORD_TYPE
7304 && (member = find_sole_member (eff_type))
7305 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7306 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7307 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7309 tree field_type = TREE_TYPE (member);
7311 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7312 eff_type = field_type;
7313 else
7315 gcc_assert ((TYPE_ALIGN (eff_type)
7316 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7317 || (TYPE_ALIGN (eff_type)
7318 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7319 break;
7323 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7325 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7326 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7327 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7328 && size <= 16));
7330 else
7332 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7335 addr = create_tmp_var (pptr_type_node, NULL);
7336 lab_false = create_artificial_label ();
7337 lab_over = create_artificial_label ();
7339 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7341 if (pass_as_float)
7343 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7344 tree cmp;
7345 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7347 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
7348 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7349 gimplify_and_add (tmp, pre_p);
7351 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7352 gimplify_and_add (tmp, pre_p);
7353 tmp = next_fp_limit;
7354 if (size > 4 && !is_double)
7355 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), tmp,
7356 size_int (4 - size));
7357 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
7358 cmp = build3 (COND_EXPR, void_type_node, tmp,
7359 build1 (GOTO_EXPR, void_type_node, lab_false),
7360 NULL_TREE);
7361 if (!is_double)
7362 gimplify_and_add (cmp, pre_p);
7364 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7365 || (is_double || size == 16))
7367 tmp = fold_convert (sizetype, next_fp_tmp);
7368 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7369 size_int (UNITS_PER_WORD));
7370 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7371 next_fp_tmp, tmp);
7372 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7373 next_fp_tmp, tmp);
7374 gimplify_and_add (tmp, pre_p);
7376 if (is_double)
7377 gimplify_and_add (cmp, pre_p);
7379 #ifdef FUNCTION_ARG_SCmode_WART
7380 if (TYPE_MODE (eff_type) == SCmode
7381 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7383 tree subtype = TREE_TYPE (eff_type);
7384 tree real, imag;
7386 imag
7387 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7388 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7390 real
7391 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7392 real = get_initialized_tmp_var (real, pre_p, NULL);
7394 result = build2 (COMPLEX_EXPR, type, real, imag);
7395 result = get_initialized_tmp_var (result, pre_p, NULL);
7397 #endif /* FUNCTION_ARG_SCmode_WART */
7399 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7400 gimplify_and_add (tmp, pre_p);
7402 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7403 gimplify_and_add (tmp, pre_p);
7405 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7406 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7407 gimplify_and_add (tmp, pre_p);
7408 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7409 gimplify_and_add (tmp, pre_p);
7411 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, valist, next_fp_tmp);
7412 gimplify_and_add (tmp, post_p);
7413 valist = next_fp_tmp;
7415 else
7417 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, next_o,
7418 size_int (rsize));
7419 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7420 tmp = build3 (COND_EXPR, void_type_node, tmp,
7421 build1 (GOTO_EXPR, void_type_node, lab_false),
7422 NULL_TREE);
7423 gimplify_and_add (tmp, pre_p);
7425 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7426 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7427 gimplify_and_add (tmp, pre_p);
7429 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7430 gimplify_and_add (tmp, pre_p);
7432 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7433 gimplify_and_add (tmp, pre_p);
7435 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7437 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7438 next_o, next_o_limit);
7439 gimplify_and_add (tmp, pre_p);
7442 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7443 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7444 gimplify_and_add (tmp, pre_p);
7447 if (!result)
7449 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7450 gimplify_and_add (tmp, pre_p);
7454 /* ??? In va-sh.h, there had been code to make values larger than
7455 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7457 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7458 if (result)
7460 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, result, tmp);
7461 gimplify_and_add (tmp, pre_p);
7463 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7464 gimplify_and_add (tmp, pre_p);
7466 else
7467 result = tmp;
7469 if (pass_by_ref)
7470 result = build_va_arg_indirect_ref (result);
7472 return result;
7475 bool
7476 sh_promote_prototypes (const_tree type)
7478 if (TARGET_HITACHI)
7479 return 0;
7480 if (! type)
7481 return 1;
7482 return ! sh_attr_renesas_p (type);
7485 /* Whether an argument must be passed by reference. On SHcompact, we
7486 pretend arguments wider than 32-bits that would have been passed in
7487 registers are passed by reference, so that an SHmedia trampoline
7488 loads them into the full 64-bits registers. */
7490 static int
7491 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7492 const_tree type, bool named)
7494 unsigned HOST_WIDE_INT size;
7496 if (type)
7497 size = int_size_in_bytes (type);
7498 else
7499 size = GET_MODE_SIZE (mode);
7501 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7502 && (!named
7503 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7504 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7505 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7506 && size > 4
7507 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7508 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7509 return size;
7510 else
7511 return 0;
7514 static bool
7515 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7516 const_tree type, bool named)
7518 if (targetm.calls.must_pass_in_stack (mode, type))
7519 return true;
7521 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7522 wants to know about pass-by-reference semantics for incoming
7523 arguments. */
7524 if (! cum)
7525 return false;
7527 if (TARGET_SHCOMPACT)
7529 cum->byref = shcompact_byref (cum, mode, type, named);
7530 return cum->byref != 0;
7533 return false;
7536 static bool
7537 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7538 const_tree type, bool named ATTRIBUTE_UNUSED)
7540 /* ??? How can it possibly be correct to return true only on the
7541 caller side of the equation? Is there someplace else in the
7542 sh backend that's magically producing the copies? */
7543 return (cum->outgoing
7544 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7545 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7548 static int
7549 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7550 tree type, bool named ATTRIBUTE_UNUSED)
7552 int words = 0;
7554 if (!TARGET_SH5
7555 && PASS_IN_REG_P (*cum, mode, type)
7556 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7557 && (ROUND_REG (*cum, mode)
7558 + (mode != BLKmode
7559 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7560 : ROUND_ADVANCE (int_size_in_bytes (type)))
7561 > NPARM_REGS (mode)))
7562 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7564 else if (!TARGET_SHCOMPACT
7565 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7566 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7568 return words * UNITS_PER_WORD;
7572 /* Define where to put the arguments to a function.
7573 Value is zero to push the argument on the stack,
7574 or a hard register in which to store the argument.
7576 MODE is the argument's machine mode.
7577 TYPE is the data type of the argument (as a tree).
7578 This is null for libcalls where that information may
7579 not be available.
7580 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7581 the preceding args and about the function being called.
7582 NAMED is nonzero if this argument is a named parameter
7583 (otherwise it is an extra parameter matching an ellipsis).
7585 On SH the first args are normally in registers
7586 and the rest are pushed. Any arg that starts within the first
7587 NPARM_REGS words is at least partially passed in a register unless
7588 its data type forbids. */
7592 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7593 tree type, int named)
7595 if (! TARGET_SH5 && mode == VOIDmode)
7596 return GEN_INT (ca->renesas_abi ? 1 : 0);
7598 if (! TARGET_SH5
7599 && PASS_IN_REG_P (*ca, mode, type)
7600 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7602 int regno;
7604 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7605 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7607 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7608 gen_rtx_REG (SFmode,
7609 BASE_ARG_REG (mode)
7610 + (ROUND_REG (*ca, mode) ^ 1)),
7611 const0_rtx);
7612 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7613 gen_rtx_REG (SFmode,
7614 BASE_ARG_REG (mode)
7615 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7616 GEN_INT (4));
7617 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7620 /* If the alignment of a DF value causes an SF register to be
7621 skipped, we will use that skipped register for the next SF
7622 value. */
7623 if ((TARGET_HITACHI || ca->renesas_abi)
7624 && ca->free_single_fp_reg
7625 && mode == SFmode)
7626 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7628 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7629 ^ (mode == SFmode && TARGET_SH4
7630 && TARGET_LITTLE_ENDIAN != 0
7631 && ! TARGET_HITACHI && ! ca->renesas_abi);
7632 return gen_rtx_REG (mode, regno);
7636 if (TARGET_SH5)
7638 if (mode == VOIDmode && TARGET_SHCOMPACT)
7639 return GEN_INT (ca->call_cookie);
7641 /* The following test assumes unnamed arguments are promoted to
7642 DFmode. */
7643 if (mode == SFmode && ca->free_single_fp_reg)
7644 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7646 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7647 && (named || ! ca->prototype_p)
7648 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7650 if (! ca->prototype_p && TARGET_SHMEDIA)
7651 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7653 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7654 FIRST_FP_PARM_REG
7655 + ca->arg_count[(int) SH_ARG_FLOAT]);
7658 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7659 && (! TARGET_SHCOMPACT
7660 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7661 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7662 type, named))))
7664 return gen_rtx_REG (mode, (FIRST_PARM_REG
7665 + ca->arg_count[(int) SH_ARG_INT]));
7668 return 0;
7671 return 0;
7674 /* Update the data in CUM to advance over an argument
7675 of mode MODE and data type TYPE.
7676 (TYPE is null for libcalls where that information may not be
7677 available.) */
7679 void
7680 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7681 tree type, int named)
7683 if (ca->force_mem)
7684 ca->force_mem = 0;
7685 else if (TARGET_SH5)
7687 tree type2 = (ca->byref && type
7688 ? TREE_TYPE (type)
7689 : type);
7690 enum machine_mode mode2 = (ca->byref && type
7691 ? TYPE_MODE (type2)
7692 : mode);
7693 int dwords = ((ca->byref
7694 ? ca->byref
7695 : mode2 == BLKmode
7696 ? int_size_in_bytes (type2)
7697 : GET_MODE_SIZE (mode2)) + 7) / 8;
7698 int numregs = MIN (dwords, NPARM_REGS (SImode)
7699 - ca->arg_count[(int) SH_ARG_INT]);
7701 if (numregs)
7703 ca->arg_count[(int) SH_ARG_INT] += numregs;
7704 if (TARGET_SHCOMPACT
7705 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7707 ca->call_cookie
7708 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7709 - numregs, 1);
7710 /* N.B. We want this also for outgoing. */
7711 ca->stack_regs += numregs;
7713 else if (ca->byref)
7715 if (! ca->outgoing)
7716 ca->stack_regs += numregs;
7717 ca->byref_regs += numregs;
7718 ca->byref = 0;
7720 ca->call_cookie
7721 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7722 - numregs, 2);
7723 while (--numregs);
7724 ca->call_cookie
7725 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7726 - 1, 1);
7728 else if (dwords > numregs)
7730 int pushregs = numregs;
7732 if (TARGET_SHCOMPACT)
7733 ca->stack_regs += numregs;
7734 while (pushregs < NPARM_REGS (SImode) - 1
7735 && (CALL_COOKIE_INT_REG_GET
7736 (ca->call_cookie,
7737 NPARM_REGS (SImode) - pushregs)
7738 == 1))
7740 ca->call_cookie
7741 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7742 - pushregs, 1);
7743 pushregs++;
7745 if (numregs == NPARM_REGS (SImode))
7746 ca->call_cookie
7747 |= CALL_COOKIE_INT_REG (0, 1)
7748 | CALL_COOKIE_STACKSEQ (numregs - 1);
7749 else
7750 ca->call_cookie
7751 |= CALL_COOKIE_STACKSEQ (numregs);
7754 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7755 && (named || ! ca->prototype_p))
7757 if (mode2 == SFmode && ca->free_single_fp_reg)
7758 ca->free_single_fp_reg = 0;
7759 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7760 < NPARM_REGS (SFmode))
7762 int numfpregs
7763 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7764 NPARM_REGS (SFmode)
7765 - ca->arg_count[(int) SH_ARG_FLOAT]);
7767 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7769 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7771 if (ca->outgoing && numregs > 0)
7774 ca->call_cookie
7775 |= (CALL_COOKIE_INT_REG
7776 (ca->arg_count[(int) SH_ARG_INT]
7777 - numregs + ((numfpregs - 2) / 2),
7778 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7779 - numfpregs) / 2));
7781 while (numfpregs -= 2);
7783 else if (mode2 == SFmode && (named)
7784 && (ca->arg_count[(int) SH_ARG_FLOAT]
7785 < NPARM_REGS (SFmode)))
7786 ca->free_single_fp_reg
7787 = FIRST_FP_PARM_REG - numfpregs
7788 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7791 return;
7794 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7796 /* Note that we've used the skipped register. */
7797 if (mode == SFmode && ca->free_single_fp_reg)
7799 ca->free_single_fp_reg = 0;
7800 return;
7802 /* When we have a DF after an SF, there's an SF register that get
7803 skipped in order to align the DF value. We note this skipped
7804 register, because the next SF value will use it, and not the
7805 SF that follows the DF. */
7806 if (mode == DFmode
7807 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7809 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7810 + BASE_ARG_REG (mode));
7814 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7815 || PASS_IN_REG_P (*ca, mode, type))
7816 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7817 = (ROUND_REG (*ca, mode)
7818 + (mode == BLKmode
7819 ? ROUND_ADVANCE (int_size_in_bytes (type))
7820 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7823 /* The Renesas calling convention doesn't quite fit into this scheme since
7824 the address is passed like an invisible argument, but one that is always
7825 passed in memory. */
7826 static rtx
7827 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7829 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7830 return 0;
7831 return gen_rtx_REG (Pmode, 2);
7834 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7836 static bool
7837 sh_return_in_memory (const_tree type, const_tree fndecl)
7839 if (TARGET_SH5)
7841 if (TYPE_MODE (type) == BLKmode)
7842 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7843 else
7844 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7846 else
7848 return (TYPE_MODE (type) == BLKmode
7849 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7850 && TREE_CODE (type) == RECORD_TYPE));
7854 /* We actually emit the code in sh_expand_prologue. We used to use
7855 a static variable to flag that we need to emit this code, but that
7856 doesn't when inlining, when functions are deferred and then emitted
7857 later. Fortunately, we already have two flags that are part of struct
7858 function that tell if a function uses varargs or stdarg. */
7859 static void
7860 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7861 enum machine_mode mode,
7862 tree type,
7863 int *pretend_arg_size,
7864 int second_time ATTRIBUTE_UNUSED)
7866 gcc_assert (cfun->stdarg);
7867 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7869 int named_parm_regs, anon_parm_regs;
7871 named_parm_regs = (ROUND_REG (*ca, mode)
7872 + (mode == BLKmode
7873 ? ROUND_ADVANCE (int_size_in_bytes (type))
7874 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7875 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7876 if (anon_parm_regs > 0)
7877 *pretend_arg_size = anon_parm_regs * 4;
7881 static bool
7882 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7884 return TARGET_SH5;
7887 static bool
7888 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7890 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7894 /* Define the offset between two registers, one to be eliminated, and
7895 the other its replacement, at the start of a routine. */
7898 initial_elimination_offset (int from, int to)
7900 int regs_saved;
7901 int regs_saved_rounding = 0;
7902 int total_saved_regs_space;
7903 int total_auto_space;
7904 int save_flags = target_flags;
7905 int copy_flags;
7906 HARD_REG_SET live_regs_mask;
7908 shmedia_space_reserved_for_target_registers = false;
7909 regs_saved = calc_live_regs (&live_regs_mask);
7910 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7912 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7914 shmedia_space_reserved_for_target_registers = true;
7915 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7918 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7919 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7920 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7922 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7923 copy_flags = target_flags;
7924 target_flags = save_flags;
7926 total_saved_regs_space = regs_saved + regs_saved_rounding;
7928 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7929 return total_saved_regs_space + total_auto_space
7930 + crtl->args.info.byref_regs * 8;
7932 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7933 return total_saved_regs_space + total_auto_space
7934 + crtl->args.info.byref_regs * 8;
7936 /* Initial gap between fp and sp is 0. */
7937 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7938 return 0;
7940 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7941 return rounded_frame_size (0);
7943 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7944 return rounded_frame_size (0);
7946 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7947 && (to == HARD_FRAME_POINTER_REGNUM
7948 || to == STACK_POINTER_REGNUM));
7949 if (TARGET_SH5)
7951 int n = total_saved_regs_space;
7952 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7953 save_schedule schedule;
7954 save_entry *entry;
7956 n += total_auto_space;
7958 /* If it wasn't saved, there's not much we can do. */
7959 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7960 return n;
7962 target_flags = copy_flags;
7964 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7965 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7966 if (entry->reg == pr_reg)
7968 target_flags = save_flags;
7969 return entry->offset;
7971 gcc_unreachable ();
7973 else
7974 return total_auto_space;
7977 /* Parse the -mfixed-range= option string. */
7978 void
7979 sh_fix_range (const char *const_str)
7981 int i, first, last;
7982 char *str, *dash, *comma;
7984 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
7985 REG2 are either register names or register numbers. The effect
7986 of this option is to mark the registers in the range from REG1 to
7987 REG2 as ``fixed'' so they won't be used by the compiler. */
7989 i = strlen (const_str);
7990 str = (char *) alloca (i + 1);
7991 memcpy (str, const_str, i + 1);
7993 while (1)
7995 dash = strchr (str, '-');
7996 if (!dash)
7998 warning (0, "value of -mfixed-range must have form REG1-REG2");
7999 return;
8001 *dash = '\0';
8002 comma = strchr (dash + 1, ',');
8003 if (comma)
8004 *comma = '\0';
8006 first = decode_reg_name (str);
8007 if (first < 0)
8009 warning (0, "unknown register name: %s", str);
8010 return;
8013 last = decode_reg_name (dash + 1);
8014 if (last < 0)
8016 warning (0, "unknown register name: %s", dash + 1);
8017 return;
8020 *dash = '-';
8022 if (first > last)
8024 warning (0, "%s-%s is an empty range", str, dash + 1);
8025 return;
8028 for (i = first; i <= last; ++i)
8029 fixed_regs[i] = call_used_regs[i] = 1;
8031 if (!comma)
8032 break;
8034 *comma = ',';
8035 str = comma + 1;
8039 /* Insert any deferred function attributes from earlier pragmas. */
8040 static void
8041 sh_insert_attributes (tree node, tree *attributes)
8043 tree attrs;
8045 if (TREE_CODE (node) != FUNCTION_DECL)
8046 return;
8048 /* We are only interested in fields. */
8049 if (!DECL_P (node))
8050 return;
8052 /* Append the attributes to the deferred attributes. */
8053 *sh_deferred_function_attributes_tail = *attributes;
8054 attrs = sh_deferred_function_attributes;
8055 if (!attrs)
8056 return;
8058 /* Some attributes imply or require the interrupt attribute. */
8059 if (!lookup_attribute ("interrupt_handler", attrs)
8060 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8062 /* If we have a trapa_handler, but no interrupt_handler attribute,
8063 insert an interrupt_handler attribute. */
8064 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8065 /* We can't use sh_pr_interrupt here because that's not in the
8066 java frontend. */
8067 attrs
8068 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8069 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8070 if the interrupt attribute is missing, we ignore the attribute
8071 and warn. */
8072 else if (lookup_attribute ("sp_switch", attrs)
8073 || lookup_attribute ("trap_exit", attrs)
8074 || lookup_attribute ("nosave_low_regs", attrs)
8075 || lookup_attribute ("resbank", attrs))
8077 tree *tail;
8079 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8081 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8082 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8083 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8084 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8085 warning (OPT_Wattributes,
8086 "%qs attribute only applies to interrupt functions",
8087 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
8088 else
8090 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8091 NULL_TREE);
8092 tail = &TREE_CHAIN (*tail);
8095 attrs = *attributes;
8099 /* Install the processed list. */
8100 *attributes = attrs;
8102 /* Clear deferred attributes. */
8103 sh_deferred_function_attributes = NULL_TREE;
8104 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8106 return;
8109 /* Supported attributes:
8111 interrupt_handler -- specifies this function is an interrupt handler.
8113 trapa_handler - like above, but don't save all registers.
8115 sp_switch -- specifies an alternate stack for an interrupt handler
8116 to run on.
8118 trap_exit -- use a trapa to exit an interrupt function instead of
8119 an rte instruction.
8121 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8122 This is useful on the SH3 and upwards,
8123 which has a separate set of low regs for User and Supervisor modes.
8124 This should only be used for the lowest level of interrupts. Higher levels
8125 of interrupts must save the registers in case they themselves are
8126 interrupted.
8128 renesas -- use Renesas calling/layout conventions (functions and
8129 structures).
8131 resbank -- In case of an ISR, use a register bank to save registers
8132 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8135 const struct attribute_spec sh_attribute_table[] =
8137 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
8138 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8139 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
8140 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
8141 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
8142 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8143 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8144 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
8145 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
8146 #ifdef SYMBIAN
8147 /* Symbian support adds three new attributes:
8148 dllexport - for exporting a function/variable that will live in a dll
8149 dllimport - for importing a function/variable from a dll
8151 Microsoft allows multiple declspecs in one __declspec, separating
8152 them with spaces. We do NOT support this. Instead, use __declspec
8153 multiple times. */
8154 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8155 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8156 #endif
8157 { NULL, 0, 0, false, false, false, NULL }
8160 /* Handle a 'resbank' attribute. */
8161 static tree
8162 sh_handle_resbank_handler_attribute (tree * node, tree name,
8163 tree args ATTRIBUTE_UNUSED,
8164 int flags ATTRIBUTE_UNUSED,
8165 bool * no_add_attrs)
8167 if (!TARGET_SH2A)
8169 warning (OPT_Wattributes, "%qs attribute is supported only for SH2A",
8170 IDENTIFIER_POINTER (name));
8171 *no_add_attrs = true;
8173 if (TREE_CODE (*node) != FUNCTION_DECL)
8175 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8176 IDENTIFIER_POINTER (name));
8177 *no_add_attrs = true;
8180 return NULL_TREE;
8183 /* Handle an "interrupt_handler" attribute; arguments as in
8184 struct attribute_spec.handler. */
8185 static tree
8186 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8187 tree args ATTRIBUTE_UNUSED,
8188 int flags ATTRIBUTE_UNUSED,
8189 bool *no_add_attrs)
8191 if (TREE_CODE (*node) != FUNCTION_DECL)
8193 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8194 IDENTIFIER_POINTER (name));
8195 *no_add_attrs = true;
8197 else if (TARGET_SHCOMPACT)
8199 error ("attribute interrupt_handler is not compatible with -m5-compact");
8200 *no_add_attrs = true;
8203 return NULL_TREE;
8206 /* Handle an 'function_vector' attribute; arguments as in
8207 struct attribute_spec.handler. */
8208 static tree
8209 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8210 tree args ATTRIBUTE_UNUSED,
8211 int flags ATTRIBUTE_UNUSED,
8212 bool * no_add_attrs)
8214 if (!TARGET_SH2A)
8216 warning (OPT_Wattributes, "%qs attribute only applies to SH2A",
8217 IDENTIFIER_POINTER (name));
8218 *no_add_attrs = true;
8220 else if (TREE_CODE (*node) != FUNCTION_DECL)
8222 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8223 IDENTIFIER_POINTER (name));
8224 *no_add_attrs = true;
8226 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8228 /* The argument must be a constant integer. */
8229 warning (OPT_Wattributes,
8230 "`%s' attribute argument not an integer constant",
8231 IDENTIFIER_POINTER (name));
8232 *no_add_attrs = true;
8234 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8236 /* The argument value must be between 0 to 255. */
8237 warning (OPT_Wattributes,
8238 "`%s' attribute argument should be between 0 to 255",
8239 IDENTIFIER_POINTER (name));
8240 *no_add_attrs = true;
8242 return NULL_TREE;
8245 /* Returns 1 if current function has been assigned the attribute
8246 'function_vector'. */
8248 sh2a_is_function_vector_call (rtx x)
8250 if (GET_CODE (x) == SYMBOL_REF
8251 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8253 tree tr = SYMBOL_REF_DECL (x);
8255 if (sh2a_function_vector_p (tr))
8256 return 1;
8259 return 0;
8262 /* Returns the function vector number, if the the attribute
8263 'function_vector' is assigned, otherwise returns zero. */
8265 sh2a_get_function_vector_number (rtx x)
8267 int num;
8268 tree list, t;
8270 if ((GET_CODE (x) == SYMBOL_REF)
8271 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8273 t = SYMBOL_REF_DECL (x);
8275 if (TREE_CODE (t) != FUNCTION_DECL)
8276 return 0;
8278 list = SH_ATTRIBUTES (t);
8279 while (list)
8281 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8283 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8284 return num;
8287 list = TREE_CHAIN (list);
8290 return 0;
8292 else
8293 return 0;
8296 /* Handle an "sp_switch" attribute; arguments as in
8297 struct attribute_spec.handler. */
8298 static tree
8299 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8300 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8302 if (TREE_CODE (*node) != FUNCTION_DECL)
8304 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8305 IDENTIFIER_POINTER (name));
8306 *no_add_attrs = true;
8308 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8310 /* The argument must be a constant string. */
8311 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8312 IDENTIFIER_POINTER (name));
8313 *no_add_attrs = true;
8316 return NULL_TREE;
8319 /* Handle an "trap_exit" attribute; arguments as in
8320 struct attribute_spec.handler. */
8321 static tree
8322 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8323 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8325 if (TREE_CODE (*node) != FUNCTION_DECL)
8327 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8328 IDENTIFIER_POINTER (name));
8329 *no_add_attrs = true;
8331 /* The argument specifies a trap number to be used in a trapa instruction
8332 at function exit (instead of an rte instruction). */
8333 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8335 /* The argument must be a constant integer. */
8336 warning (OPT_Wattributes, "%qs attribute argument not an "
8337 "integer constant", IDENTIFIER_POINTER (name));
8338 *no_add_attrs = true;
8341 return NULL_TREE;
8344 static tree
8345 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8346 tree name ATTRIBUTE_UNUSED,
8347 tree args ATTRIBUTE_UNUSED,
8348 int flags ATTRIBUTE_UNUSED,
8349 bool *no_add_attrs ATTRIBUTE_UNUSED)
8351 return NULL_TREE;
8354 /* True if __attribute__((renesas)) or -mrenesas. */
8356 sh_attr_renesas_p (const_tree td)
8358 if (TARGET_HITACHI)
8359 return 1;
8360 if (td == 0)
8361 return 0;
8362 if (DECL_P (td))
8363 td = TREE_TYPE (td);
8364 if (td == error_mark_node)
8365 return 0;
8366 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8367 != NULL_TREE);
8370 /* True if __attribute__((renesas)) or -mrenesas, for the current
8371 function. */
8373 sh_cfun_attr_renesas_p (void)
8375 return sh_attr_renesas_p (current_function_decl);
8379 sh_cfun_interrupt_handler_p (void)
8381 return (lookup_attribute ("interrupt_handler",
8382 DECL_ATTRIBUTES (current_function_decl))
8383 != NULL_TREE);
8386 /* Returns 1 if FUNC has been assigned the attribute
8387 "function_vector". */
8389 sh2a_function_vector_p (tree func)
8391 tree list;
8392 if (TREE_CODE (func) != FUNCTION_DECL)
8393 return 0;
8395 list = SH_ATTRIBUTES (func);
8396 while (list)
8398 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8399 return 1;
8401 list = TREE_CHAIN (list);
8403 return 0;
8406 /* Returns TRUE if given tree has the "resbank" attribute. */
8409 sh_cfun_resbank_handler_p (void)
8411 return ((lookup_attribute ("resbank",
8412 DECL_ATTRIBUTES (current_function_decl))
8413 != NULL_TREE)
8414 && (lookup_attribute ("interrupt_handler",
8415 DECL_ATTRIBUTES (current_function_decl))
8416 != NULL_TREE) && TARGET_SH2A);
8419 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8421 static const char *
8422 sh_check_pch_target_flags (int old_flags)
8424 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8425 | MASK_SH_E | MASK_HARD_SH4
8426 | MASK_FPU_SINGLE | MASK_SH4))
8427 return _("created and used with different architectures / ABIs");
8428 if ((old_flags ^ target_flags) & MASK_HITACHI)
8429 return _("created and used with different ABIs");
8430 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8431 return _("created and used with different endianness");
8432 return NULL;
8435 /* Predicates used by the templates. */
8437 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8438 Used only in general_movsrc_operand. */
8441 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8443 switch (REGNO (op))
8445 case PR_REG:
8446 case MACL_REG:
8447 case MACH_REG:
8448 return 1;
8450 return 0;
8453 /* Nonzero if OP is a floating point value with value 0.0. */
8456 fp_zero_operand (rtx op)
8458 REAL_VALUE_TYPE r;
8460 if (GET_MODE (op) != SFmode)
8461 return 0;
8463 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8464 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8467 /* Nonzero if OP is a floating point value with value 1.0. */
8470 fp_one_operand (rtx op)
8472 REAL_VALUE_TYPE r;
8474 if (GET_MODE (op) != SFmode)
8475 return 0;
8477 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8478 return REAL_VALUES_EQUAL (r, dconst1);
8481 /* For -m4 and -m4-single-only, mode switching is used. If we are
8482 compiling without -mfmovd, movsf_ie isn't taken into account for
8483 mode switching. We could check in machine_dependent_reorg for
8484 cases where we know we are in single precision mode, but there is
8485 interface to find that out during reload, so we must avoid
8486 choosing an fldi alternative during reload and thus failing to
8487 allocate a scratch register for the constant loading. */
8489 fldi_ok (void)
8491 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8495 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8497 enum rtx_code code = GET_CODE (op);
8498 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8501 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8503 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8505 if (GET_CODE (op) != SYMBOL_REF)
8506 return 0;
8507 return SYMBOL_REF_TLS_MODEL (op);
8510 /* Return the destination address of a branch. */
8512 static int
8513 branch_dest (rtx branch)
8515 rtx dest = SET_SRC (PATTERN (branch));
8516 int dest_uid;
8518 if (GET_CODE (dest) == IF_THEN_ELSE)
8519 dest = XEXP (dest, 1);
8520 dest = XEXP (dest, 0);
8521 dest_uid = INSN_UID (dest);
8522 return INSN_ADDRESSES (dest_uid);
8525 /* Return nonzero if REG is not used after INSN.
8526 We assume REG is a reload reg, and therefore does
8527 not live past labels. It may live past calls or jumps though. */
8529 reg_unused_after (rtx reg, rtx insn)
8531 enum rtx_code code;
8532 rtx set;
8534 /* If the reg is set by this instruction, then it is safe for our
8535 case. Disregard the case where this is a store to memory, since
8536 we are checking a register used in the store address. */
8537 set = single_set (insn);
8538 if (set && GET_CODE (SET_DEST (set)) != MEM
8539 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8540 return 1;
8542 while ((insn = NEXT_INSN (insn)))
8544 rtx set;
8545 if (!INSN_P (insn))
8546 continue;
8548 code = GET_CODE (insn);
8550 #if 0
8551 /* If this is a label that existed before reload, then the register
8552 if dead here. However, if this is a label added by reorg, then
8553 the register may still be live here. We can't tell the difference,
8554 so we just ignore labels completely. */
8555 if (code == CODE_LABEL)
8556 return 1;
8557 /* else */
8558 #endif
8560 if (code == JUMP_INSN)
8561 return 0;
8563 /* If this is a sequence, we must handle them all at once.
8564 We could have for instance a call that sets the target register,
8565 and an insn in a delay slot that uses the register. In this case,
8566 we must return 0. */
8567 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8569 int i;
8570 int retval = 0;
8572 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8574 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8575 rtx set = single_set (this_insn);
8577 if (GET_CODE (this_insn) == CALL_INSN)
8578 code = CALL_INSN;
8579 else if (GET_CODE (this_insn) == JUMP_INSN)
8581 if (INSN_ANNULLED_BRANCH_P (this_insn))
8582 return 0;
8583 code = JUMP_INSN;
8586 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8587 return 0;
8588 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8590 if (GET_CODE (SET_DEST (set)) != MEM)
8591 retval = 1;
8592 else
8593 return 0;
8595 if (set == 0
8596 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8597 return 0;
8599 if (retval == 1)
8600 return 1;
8601 else if (code == JUMP_INSN)
8602 return 0;
8605 set = single_set (insn);
8606 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8607 return 0;
8608 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8609 return GET_CODE (SET_DEST (set)) != MEM;
8610 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8611 return 0;
8613 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8614 return 1;
8616 return 1;
8619 #include "ggc.h"
8621 static GTY(()) rtx fpscr_rtx;
8623 get_fpscr_rtx (void)
8625 if (! fpscr_rtx)
8627 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8628 REG_USERVAR_P (fpscr_rtx) = 1;
8629 mark_user_reg (fpscr_rtx);
8631 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8632 mark_user_reg (fpscr_rtx);
8633 return fpscr_rtx;
8636 static GTY(()) tree fpscr_values;
8638 static void
8639 emit_fpu_switch (rtx scratch, int index)
8641 rtx dst, src;
8643 if (fpscr_values == NULL)
8645 tree t;
8647 t = build_index_type (integer_one_node);
8648 t = build_array_type (integer_type_node, t);
8649 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8650 DECL_ARTIFICIAL (t) = 1;
8651 DECL_IGNORED_P (t) = 1;
8652 DECL_EXTERNAL (t) = 1;
8653 TREE_STATIC (t) = 1;
8654 TREE_PUBLIC (t) = 1;
8655 TREE_USED (t) = 1;
8657 fpscr_values = t;
8660 src = DECL_RTL (fpscr_values);
8661 if (!can_create_pseudo_p ())
8663 emit_move_insn (scratch, XEXP (src, 0));
8664 if (index != 0)
8665 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8666 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8668 else
8669 src = adjust_address (src, PSImode, index * 4);
8671 dst = get_fpscr_rtx ();
8672 emit_move_insn (dst, src);
8675 void
8676 emit_sf_insn (rtx pat)
8678 emit_insn (pat);
8681 void
8682 emit_df_insn (rtx pat)
8684 emit_insn (pat);
8687 void
8688 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8690 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8693 void
8694 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8696 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8697 get_fpscr_rtx ()));
8700 void
8701 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8703 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8706 void
8707 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8709 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8710 get_fpscr_rtx ()));
8713 static rtx get_free_reg (HARD_REG_SET);
8715 /* This function returns a register to use to load the address to load
8716 the fpscr from. Currently it always returns r1 or r7, but when we are
8717 able to use pseudo registers after combine, or have a better mechanism
8718 for choosing a register, it should be done here. */
8719 /* REGS_LIVE is the liveness information for the point for which we
8720 need this allocation. In some bare-bones exit blocks, r1 is live at the
8721 start. We can even have all of r0..r3 being live:
8722 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8723 INSN before which new insns are placed with will clobber the register
8724 we return. If a basic block consists only of setting the return value
8725 register to a pseudo and using that register, the return value is not
8726 live before or after this block, yet we we'll insert our insns right in
8727 the middle. */
8729 static rtx
8730 get_free_reg (HARD_REG_SET regs_live)
8732 if (! TEST_HARD_REG_BIT (regs_live, 1))
8733 return gen_rtx_REG (Pmode, 1);
8735 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8736 there shouldn't be anything but a jump before the function end. */
8737 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8738 return gen_rtx_REG (Pmode, 7);
8741 /* This function will set the fpscr from memory.
8742 MODE is the mode we are setting it to. */
8743 void
8744 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8746 enum attr_fp_mode fp_mode = mode;
8747 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8748 rtx addr_reg;
8750 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8751 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8754 /* Is the given character a logical line separator for the assembler? */
8755 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8756 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8757 #endif
8760 sh_insn_length_adjustment (rtx insn)
8762 /* Instructions with unfilled delay slots take up an extra two bytes for
8763 the nop in the delay slot. */
8764 if (((GET_CODE (insn) == INSN
8765 && GET_CODE (PATTERN (insn)) != USE
8766 && GET_CODE (PATTERN (insn)) != CLOBBER)
8767 || GET_CODE (insn) == CALL_INSN
8768 || (GET_CODE (insn) == JUMP_INSN
8769 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8770 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8771 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8772 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8773 return 2;
8775 /* SH2e has a bug that prevents the use of annulled branches, so if
8776 the delay slot is not filled, we'll have to put a NOP in it. */
8777 if (sh_cpu == CPU_SH2E
8778 && GET_CODE (insn) == JUMP_INSN
8779 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8780 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8781 && get_attr_type (insn) == TYPE_CBRANCH
8782 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8783 return 2;
8785 /* sh-dsp parallel processing insn take four bytes instead of two. */
8787 if (GET_CODE (insn) == INSN)
8789 int sum = 0;
8790 rtx body = PATTERN (insn);
8791 const char *template;
8792 char c;
8793 int maybe_label = 1;
8795 if (GET_CODE (body) == ASM_INPUT)
8796 template = XSTR (body, 0);
8797 else if (asm_noperands (body) >= 0)
8798 template
8799 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8800 else
8801 return 0;
8804 int ppi_adjust = 0;
8807 c = *template++;
8808 while (c == ' ' || c == '\t');
8809 /* all sh-dsp parallel-processing insns start with p.
8810 The only non-ppi sh insn starting with p is pref.
8811 The only ppi starting with pr is prnd. */
8812 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8813 ppi_adjust = 2;
8814 /* The repeat pseudo-insn expands two three insns, a total of
8815 six bytes in size. */
8816 else if ((c == 'r' || c == 'R')
8817 && ! strncasecmp ("epeat", template, 5))
8818 ppi_adjust = 4;
8819 while (c && c != '\n'
8820 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, template))
8822 /* If this is a label, it is obviously not a ppi insn. */
8823 if (c == ':' && maybe_label)
8825 ppi_adjust = 0;
8826 break;
8828 else if (c == '\'' || c == '"')
8829 maybe_label = 0;
8830 c = *template++;
8832 sum += ppi_adjust;
8833 maybe_label = c != ':';
8835 while (c);
8836 return sum;
8838 return 0;
8841 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8842 isn't protected by a PIC unspec. */
8844 nonpic_symbol_mentioned_p (rtx x)
8846 register const char *fmt;
8847 register int i;
8849 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8850 || GET_CODE (x) == PC)
8851 return 1;
8853 /* We don't want to look into the possible MEM location of a
8854 CONST_DOUBLE, since we're not going to use it, in general. */
8855 if (GET_CODE (x) == CONST_DOUBLE)
8856 return 0;
8858 if (GET_CODE (x) == UNSPEC
8859 && (XINT (x, 1) == UNSPEC_PIC
8860 || XINT (x, 1) == UNSPEC_GOT
8861 || XINT (x, 1) == UNSPEC_GOTOFF
8862 || XINT (x, 1) == UNSPEC_GOTPLT
8863 || XINT (x, 1) == UNSPEC_GOTTPOFF
8864 || XINT (x, 1) == UNSPEC_DTPOFF
8865 || XINT (x, 1) == UNSPEC_PLT))
8866 return 0;
8868 fmt = GET_RTX_FORMAT (GET_CODE (x));
8869 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8871 if (fmt[i] == 'E')
8873 register int j;
8875 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8876 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8877 return 1;
8879 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8880 return 1;
8883 return 0;
8886 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8887 @GOTOFF in `reg'. */
8889 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8890 rtx reg)
8892 if (tls_symbolic_operand (orig, Pmode))
8893 return orig;
8895 if (GET_CODE (orig) == LABEL_REF
8896 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8898 if (reg == 0)
8899 reg = gen_reg_rtx (Pmode);
8901 emit_insn (gen_symGOTOFF2reg (reg, orig));
8902 return reg;
8904 else if (GET_CODE (orig) == SYMBOL_REF)
8906 if (reg == 0)
8907 reg = gen_reg_rtx (Pmode);
8909 emit_insn (gen_symGOT2reg (reg, orig));
8910 return reg;
8912 return orig;
8915 /* Mark the use of a constant in the literal table. If the constant
8916 has multiple labels, make it unique. */
8917 static rtx
8918 mark_constant_pool_use (rtx x)
8920 rtx insn, lab, pattern;
8922 if (x == NULL)
8923 return x;
8925 switch (GET_CODE (x))
8927 case LABEL_REF:
8928 x = XEXP (x, 0);
8929 case CODE_LABEL:
8930 break;
8931 default:
8932 return x;
8935 /* Get the first label in the list of labels for the same constant
8936 and delete another labels in the list. */
8937 lab = x;
8938 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8940 if (GET_CODE (insn) != CODE_LABEL
8941 || LABEL_REFS (insn) != NEXT_INSN (insn))
8942 break;
8943 lab = insn;
8946 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8947 INSN_DELETED_P (insn) = 1;
8949 /* Mark constants in a window. */
8950 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8952 if (GET_CODE (insn) != INSN)
8953 continue;
8955 pattern = PATTERN (insn);
8956 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8957 continue;
8959 switch (XINT (pattern, 1))
8961 case UNSPECV_CONST2:
8962 case UNSPECV_CONST4:
8963 case UNSPECV_CONST8:
8964 XVECEXP (pattern, 0, 1) = const1_rtx;
8965 break;
8966 case UNSPECV_WINDOW_END:
8967 if (XVECEXP (pattern, 0, 0) == x)
8968 return lab;
8969 break;
8970 case UNSPECV_CONST_END:
8971 return lab;
8972 default:
8973 break;
8977 return lab;
8980 /* Return true if it's possible to redirect BRANCH1 to the destination
8981 of an unconditional jump BRANCH2. We only want to do this if the
8982 resulting branch will have a short displacement. */
8984 sh_can_redirect_branch (rtx branch1, rtx branch2)
8986 if (flag_expensive_optimizations && simplejump_p (branch2))
8988 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8989 rtx insn;
8990 int distance;
8992 for (distance = 0, insn = NEXT_INSN (branch1);
8993 insn && distance < 256;
8994 insn = PREV_INSN (insn))
8996 if (insn == dest)
8997 return 1;
8998 else
8999 distance += get_attr_length (insn);
9001 for (distance = 0, insn = NEXT_INSN (branch1);
9002 insn && distance < 256;
9003 insn = NEXT_INSN (insn))
9005 if (insn == dest)
9006 return 1;
9007 else
9008 distance += get_attr_length (insn);
9011 return 0;
9014 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9016 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9017 unsigned int new_reg)
9019 /* Interrupt functions can only use registers that have already been
9020 saved by the prologue, even if they would normally be
9021 call-clobbered. */
9023 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9024 return 0;
9026 return 1;
9029 /* Function to update the integer COST
9030 based on the relationship between INSN that is dependent on
9031 DEP_INSN through the dependence LINK. The default is to make no
9032 adjustment to COST. This can be used for example to specify to
9033 the scheduler that an output- or anti-dependence does not incur
9034 the same cost as a data-dependence. The return value should be
9035 the new value for COST. */
9036 static int
9037 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9039 rtx reg, use_pat;
9041 if (TARGET_SHMEDIA)
9043 /* On SHmedia, if the dependence is an anti-dependence or
9044 output-dependence, there is no cost. */
9045 if (REG_NOTE_KIND (link) != 0)
9047 /* However, dependencies between target register loads and
9048 uses of the register in a subsequent block that are separated
9049 by a conditional branch are not modelled - we have to do with
9050 the anti-dependency between the target register load and the
9051 conditional branch that ends the current block. */
9052 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9053 && GET_CODE (PATTERN (dep_insn)) == SET
9054 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9055 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9056 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9058 int orig_cost = cost;
9059 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9060 rtx target = ((! note
9061 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9062 ? insn : JUMP_LABEL (insn));
9063 /* On the likely path, the branch costs 1, on the unlikely path,
9064 it costs 3. */
9065 cost--;
9067 target = next_active_insn (target);
9068 while (target && ! flow_dependent_p (target, dep_insn)
9069 && --cost > 0);
9070 /* If two branches are executed in immediate succession, with the
9071 first branch properly predicted, this causes a stall at the
9072 second branch, hence we won't need the target for the
9073 second branch for two cycles after the launch of the first
9074 branch. */
9075 if (cost > orig_cost - 2)
9076 cost = orig_cost - 2;
9078 else
9079 cost = 0;
9082 else if (get_attr_is_mac_media (insn)
9083 && get_attr_is_mac_media (dep_insn))
9084 cost = 1;
9086 else if (! reload_completed
9087 && GET_CODE (PATTERN (insn)) == SET
9088 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9089 && GET_CODE (PATTERN (dep_insn)) == SET
9090 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9091 && cost < 4)
9092 cost = 4;
9093 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9094 that is needed at the target. */
9095 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9096 && ! flow_dependent_p (insn, dep_insn))
9097 cost--;
9099 else if (REG_NOTE_KIND (link) == 0)
9101 enum attr_type type;
9102 rtx dep_set;
9104 if (recog_memoized (insn) < 0
9105 || recog_memoized (dep_insn) < 0)
9106 return cost;
9108 dep_set = single_set (dep_insn);
9110 /* The latency that we specify in the scheduling description refers
9111 to the actual output, not to an auto-increment register; for that,
9112 the latency is one. */
9113 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9115 rtx set = single_set (insn);
9117 if (set
9118 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9119 && (!MEM_P (SET_DEST (set))
9120 || !reg_mentioned_p (SET_DEST (dep_set),
9121 XEXP (SET_DEST (set), 0))))
9122 cost = 1;
9124 /* The only input for a call that is timing-critical is the
9125 function's address. */
9126 if (GET_CODE (insn) == CALL_INSN)
9128 rtx call = PATTERN (insn);
9130 if (GET_CODE (call) == PARALLEL)
9131 call = XVECEXP (call, 0 ,0);
9132 if (GET_CODE (call) == SET)
9133 call = SET_SRC (call);
9134 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
9135 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9136 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9137 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9138 cost -= TARGET_SH4_300 ? 3 : 6;
9140 /* Likewise, the most timing critical input for an sfuncs call
9141 is the function address. However, sfuncs typically start
9142 using their arguments pretty quickly.
9143 Assume a four cycle delay for SH4 before they are needed.
9144 Cached ST40-300 calls are quicker, so assume only a one
9145 cycle delay there.
9146 ??? Maybe we should encode the delays till input registers
9147 are needed by sfuncs into the sfunc call insn. */
9148 /* All sfunc calls are parallels with at least four components.
9149 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9150 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9151 && XVECLEN (PATTERN (insn), 0) >= 4
9152 && (reg = sfunc_uses_reg (insn)))
9154 if (! reg_set_p (reg, dep_insn))
9155 cost -= TARGET_SH4_300 ? 1 : 4;
9157 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9159 enum attr_type dep_type = get_attr_type (dep_insn);
9161 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9162 cost--;
9163 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9164 && (type = get_attr_type (insn)) != TYPE_CALL
9165 && type != TYPE_SFUNC)
9166 cost--;
9167 /* When the preceding instruction loads the shift amount of
9168 the following SHAD/SHLD, the latency of the load is increased
9169 by 1 cycle. */
9170 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9171 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9172 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9173 XEXP (SET_SRC (single_set (insn)),
9174 1)))
9175 cost++;
9176 /* When an LS group instruction with a latency of less than
9177 3 cycles is followed by a double-precision floating-point
9178 instruction, FIPR, or FTRV, the latency of the first
9179 instruction is increased to 3 cycles. */
9180 else if (cost < 3
9181 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9182 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9183 cost = 3;
9184 /* The lsw register of a double-precision computation is ready one
9185 cycle earlier. */
9186 else if (reload_completed
9187 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9188 && (use_pat = single_set (insn))
9189 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9190 SET_SRC (use_pat)))
9191 cost -= 1;
9193 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9194 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9195 cost -= 1;
9197 else if (TARGET_SH4_300)
9199 /* Stores need their input register two cycles later. */
9200 if (dep_set && cost >= 1
9201 && ((type = get_attr_type (insn)) == TYPE_STORE
9202 || type == TYPE_PSTORE
9203 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9205 rtx set = single_set (insn);
9207 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9208 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9210 cost -= 2;
9211 /* But don't reduce the cost below 1 if the address depends
9212 on a side effect of dep_insn. */
9213 if (cost < 1
9214 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9215 cost = 1;
9220 /* An anti-dependence penalty of two applies if the first insn is a double
9221 precision fadd / fsub / fmul. */
9222 else if (!TARGET_SH4_300
9223 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9224 && recog_memoized (dep_insn) >= 0
9225 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9226 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9227 /* A lot of alleged anti-flow dependences are fake,
9228 so check this one is real. */
9229 && flow_dependent_p (dep_insn, insn))
9230 cost = 2;
9232 return cost;
9235 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9236 if DEP_INSN is anti-flow dependent on INSN. */
9237 static int
9238 flow_dependent_p (rtx insn, rtx dep_insn)
9240 rtx tmp = PATTERN (insn);
9242 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9243 return tmp == NULL_RTX;
9246 /* A helper function for flow_dependent_p called through note_stores. */
9247 static void
9248 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9250 rtx * pinsn = (rtx *) data;
9252 if (*pinsn && reg_referenced_p (x, *pinsn))
9253 *pinsn = NULL_RTX;
9256 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9257 'special function' patterns (type sfunc) that clobber pr, but that
9258 do not look like function calls to leaf_function_p. Hence we must
9259 do this extra check. */
9260 static int
9261 sh_pr_n_sets (void)
9263 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9266 /* Return where to allocate pseudo for a given hard register initial
9267 value. */
9268 static rtx
9269 sh_allocate_initial_value (rtx hard_reg)
9271 rtx x;
9273 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9275 if (current_function_is_leaf
9276 && ! sh_pr_n_sets ()
9277 && ! (TARGET_SHCOMPACT
9278 && ((crtl->args.info.call_cookie
9279 & ~ CALL_COOKIE_RET_TRAMP (1))
9280 || crtl->saves_all_registers)))
9281 x = hard_reg;
9282 else
9283 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9285 else
9286 x = NULL_RTX;
9288 return x;
9291 /* This function returns "2" to indicate dual issue for the SH4
9292 processor. To be used by the DFA pipeline description. */
9293 static int
9294 sh_issue_rate (void)
9296 if (TARGET_SUPERSCALAR)
9297 return 2;
9298 else
9299 return 1;
9302 /* Functions for ready queue reordering for sched1. */
9304 /* Get weight for mode for a set x. */
9305 static short
9306 find_set_regmode_weight (rtx x, enum machine_mode mode)
9308 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9309 return 1;
9310 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9312 if (GET_CODE (SET_DEST (x)) == REG)
9314 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9315 return 1;
9316 else
9317 return 0;
9319 return 1;
9321 return 0;
9324 /* Get regmode weight for insn. */
9325 static short
9326 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9328 short reg_weight = 0;
9329 rtx x;
9331 /* Increment weight for each register born here. */
9332 x = PATTERN (insn);
9333 reg_weight += find_set_regmode_weight (x, mode);
9334 if (GET_CODE (x) == PARALLEL)
9336 int j;
9337 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9339 x = XVECEXP (PATTERN (insn), 0, j);
9340 reg_weight += find_set_regmode_weight (x, mode);
9343 /* Decrement weight for each register that dies here. */
9344 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9346 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9348 rtx note = XEXP (x, 0);
9349 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9350 reg_weight--;
9353 return reg_weight;
9356 /* Calculate regmode weights for all insns of a basic block. */
9357 static void
9358 find_regmode_weight (basic_block b, enum machine_mode mode)
9360 rtx insn, next_tail, head, tail;
9362 get_ebb_head_tail (b, b, &head, &tail);
9363 next_tail = NEXT_INSN (tail);
9365 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9367 /* Handle register life information. */
9368 if (!INSN_P (insn))
9369 continue;
9371 if (mode == SFmode)
9372 INSN_REGMODE_WEIGHT (insn, mode) =
9373 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9374 else if (mode == SImode)
9375 INSN_REGMODE_WEIGHT (insn, mode) =
9376 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9380 /* Comparison function for ready queue sorting. */
9381 static int
9382 rank_for_reorder (const void *x, const void *y)
9384 rtx tmp = *(const rtx *) y;
9385 rtx tmp2 = *(const rtx *) x;
9387 /* The insn in a schedule group should be issued the first. */
9388 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9389 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9391 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9392 minimizes instruction movement, thus minimizing sched's effect on
9393 register pressure. */
9394 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9397 /* Resort the array A in which only element at index N may be out of order. */
9398 static void
9399 swap_reorder (rtx *a, int n)
9401 rtx insn = a[n - 1];
9402 int i = n - 2;
9404 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9406 a[i + 1] = a[i];
9407 i -= 1;
9409 a[i + 1] = insn;
9412 #define SCHED_REORDER(READY, N_READY) \
9413 do \
9415 if ((N_READY) == 2) \
9416 swap_reorder (READY, N_READY); \
9417 else if ((N_READY) > 2) \
9418 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9420 while (0)
9422 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9423 macro. */
9424 static void
9425 ready_reorder (rtx *ready, int nready)
9427 SCHED_REORDER (ready, nready);
9430 /* Count life regions of r0 for a block. */
9431 static int
9432 find_r0_life_regions (basic_block b)
9434 rtx end, insn;
9435 rtx pset;
9436 rtx r0_reg;
9437 int live;
9438 int set;
9439 int death = 0;
9441 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9443 set = 1;
9444 live = 1;
9446 else
9448 set = 0;
9449 live = 0;
9452 insn = BB_HEAD (b);
9453 end = BB_END (b);
9454 r0_reg = gen_rtx_REG (SImode, R0_REG);
9455 while (1)
9457 if (INSN_P (insn))
9459 if (find_regno_note (insn, REG_DEAD, R0_REG))
9461 death++;
9462 live = 0;
9464 if (!live
9465 && (pset = single_set (insn))
9466 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9467 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9469 set++;
9470 live = 1;
9473 if (insn == end)
9474 break;
9475 insn = NEXT_INSN (insn);
9477 return set - death;
9480 /* Calculate regmode weights for all insns of all basic block. */
9481 static void
9482 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9483 int verbose ATTRIBUTE_UNUSED,
9484 int old_max_uid)
9486 basic_block b;
9488 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9489 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9490 r0_life_regions = 0;
9492 FOR_EACH_BB_REVERSE (b)
9494 find_regmode_weight (b, SImode);
9495 find_regmode_weight (b, SFmode);
9496 if (!reload_completed)
9497 r0_life_regions += find_r0_life_regions (b);
9500 CURR_REGMODE_PRESSURE (SImode) = 0;
9501 CURR_REGMODE_PRESSURE (SFmode) = 0;
9505 /* Cleanup. */
9506 static void
9507 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9508 int verbose ATTRIBUTE_UNUSED)
9510 if (regmode_weight[0])
9512 free (regmode_weight[0]);
9513 regmode_weight[0] = NULL;
9515 if (regmode_weight[1])
9517 free (regmode_weight[1]);
9518 regmode_weight[1] = NULL;
9522 /* The scalar modes supported differs from the default version in TImode
9523 for 32-bit SHMEDIA. */
9524 static bool
9525 sh_scalar_mode_supported_p (enum machine_mode mode)
9527 if (TARGET_SHMEDIA32 && mode == TImode)
9528 return false;
9530 return default_scalar_mode_supported_p (mode);
9533 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9534 keep count of register pressures on SImode and SFmode. */
9535 static int
9536 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9537 int sched_verbose ATTRIBUTE_UNUSED,
9538 rtx insn,
9539 int can_issue_more)
9541 if (GET_CODE (PATTERN (insn)) != USE
9542 && GET_CODE (PATTERN (insn)) != CLOBBER)
9543 cached_can_issue_more = can_issue_more - 1;
9544 else
9545 cached_can_issue_more = can_issue_more;
9547 if (reload_completed)
9548 return cached_can_issue_more;
9550 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9551 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9553 return cached_can_issue_more;
9556 static void
9557 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9558 int verbose ATTRIBUTE_UNUSED,
9559 int veclen ATTRIBUTE_UNUSED)
9561 CURR_REGMODE_PRESSURE (SImode) = 0;
9562 CURR_REGMODE_PRESSURE (SFmode) = 0;
9565 /* Some magic numbers. */
9566 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9567 functions that already have high pressure on r0. */
9568 #define R0_MAX_LIFE_REGIONS 2
9569 /* Register Pressure thresholds for SImode and SFmode registers. */
9570 #define SIMODE_MAX_WEIGHT 5
9571 #define SFMODE_MAX_WEIGHT 10
9573 /* Return true if the pressure is high for MODE. */
9574 static short
9575 high_pressure (enum machine_mode mode)
9577 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9578 functions that already have high pressure on r0. */
9579 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9580 return 1;
9582 if (mode == SFmode)
9583 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9584 else
9585 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9588 /* Reorder ready queue if register pressure is high. */
9589 static int
9590 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9591 int sched_verbose ATTRIBUTE_UNUSED,
9592 rtx *ready,
9593 int *n_readyp,
9594 int clock_var ATTRIBUTE_UNUSED)
9596 if (reload_completed)
9597 return sh_issue_rate ();
9599 if (high_pressure (SFmode) || high_pressure (SImode))
9601 ready_reorder (ready, *n_readyp);
9604 return sh_issue_rate ();
9607 /* Skip cycles if the current register pressure is high. */
9608 static int
9609 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9610 int sched_verbose ATTRIBUTE_UNUSED,
9611 rtx *ready ATTRIBUTE_UNUSED,
9612 int *n_readyp ATTRIBUTE_UNUSED,
9613 int clock_var ATTRIBUTE_UNUSED)
9615 if (reload_completed)
9616 return cached_can_issue_more;
9618 if (high_pressure(SFmode) || high_pressure (SImode))
9619 skip_cycles = 1;
9621 return cached_can_issue_more;
9624 /* Skip cycles without sorting the ready queue. This will move insn from
9625 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9626 queue by sh_reorder. */
9628 /* Generally, skipping these many cycles are sufficient for all insns to move
9629 from Q -> R. */
9630 #define MAX_SKIPS 8
9632 static int
9633 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9634 int sched_verbose ATTRIBUTE_UNUSED,
9635 rtx insn ATTRIBUTE_UNUSED,
9636 int last_clock_var,
9637 int clock_var,
9638 int *sort_p)
9640 if (reload_completed)
9641 return 0;
9643 if (skip_cycles)
9645 if ((clock_var - last_clock_var) < MAX_SKIPS)
9647 *sort_p = 0;
9648 return 1;
9650 /* If this is the last cycle we are skipping, allow reordering of R. */
9651 if ((clock_var - last_clock_var) == MAX_SKIPS)
9653 *sort_p = 1;
9654 return 1;
9658 skip_cycles = 0;
9660 return 0;
9663 /* SHmedia requires registers for branches, so we can't generate new
9664 branches past reload. */
9665 static bool
9666 sh_cannot_modify_jumps_p (void)
9668 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9671 static int
9672 sh_target_reg_class (void)
9674 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9677 static bool
9678 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9680 HARD_REG_SET dummy;
9681 #if 0
9682 rtx insn;
9683 #endif
9685 if (! shmedia_space_reserved_for_target_registers)
9686 return 0;
9687 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9688 return 0;
9689 if (calc_live_regs (&dummy) >= 6 * 8)
9690 return 1;
9691 return 0;
9694 static bool
9695 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9697 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9701 On the SH1..SH4, the trampoline looks like
9702 2 0002 D202 mov.l l2,r2
9703 1 0000 D301 mov.l l1,r3
9704 3 0004 422B jmp @r2
9705 4 0006 0009 nop
9706 5 0008 00000000 l1: .long area
9707 6 000c 00000000 l2: .long function
9709 SH5 (compact) uses r1 instead of r3 for the static chain. */
9712 /* Emit RTL insns to initialize the variable parts of a trampoline.
9713 FNADDR is an RTX for the address of the function's pure code.
9714 CXT is an RTX for the static chain value for the function. */
9716 void
9717 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9719 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9721 if (TARGET_SHMEDIA64)
9723 rtx tramp_templ;
9724 int fixed_len;
9726 rtx movi1 = GEN_INT (0xcc000010);
9727 rtx shori1 = GEN_INT (0xc8000010);
9728 rtx src, dst;
9730 /* The following trampoline works within a +- 128 KB range for cxt:
9731 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9732 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9733 gettr tr1,r1; blink tr0,r63 */
9734 /* Address rounding makes it hard to compute the exact bounds of the
9735 offset for this trampoline, but we have a rather generous offset
9736 range, so frame_offset should do fine as an upper bound. */
9737 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9739 /* ??? could optimize this trampoline initialization
9740 by writing DImode words with two insns each. */
9741 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9742 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9743 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9744 insn = gen_rtx_AND (DImode, insn, mask);
9745 /* Or in ptb/u .,tr1 pattern */
9746 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9747 insn = force_operand (insn, NULL_RTX);
9748 insn = gen_lowpart (SImode, insn);
9749 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9750 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9751 insn = gen_rtx_AND (DImode, insn, mask);
9752 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9753 insn = gen_lowpart (SImode, insn);
9754 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9755 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9756 insn = gen_rtx_AND (DImode, insn, mask);
9757 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9758 insn = gen_lowpart (SImode, insn);
9759 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9760 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9761 insn = gen_rtx_AND (DImode, insn, mask);
9762 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9763 insn = gen_lowpart (SImode, insn);
9764 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9765 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9766 insn = gen_rtx_AND (DImode, insn, mask);
9767 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9768 insn = gen_lowpart (SImode, insn);
9769 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9770 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9771 GEN_INT (0x6bf10600));
9772 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9773 GEN_INT (0x4415fc10));
9774 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9775 GEN_INT (0x4401fff0));
9776 emit_insn (gen_ic_invalidate_line (tramp));
9777 return;
9779 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9780 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9782 tramp_templ = gen_datalabel_ref (tramp_templ);
9783 dst = tramp_mem;
9784 src = gen_const_mem (BLKmode, tramp_templ);
9785 set_mem_align (dst, 256);
9786 set_mem_align (src, 64);
9787 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9789 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9790 emit_move_insn (adjust_address (tramp_mem, Pmode,
9791 fixed_len + GET_MODE_SIZE (Pmode)),
9792 cxt);
9793 emit_insn (gen_ic_invalidate_line (tramp));
9794 return;
9796 else if (TARGET_SHMEDIA)
9798 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9799 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9800 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9801 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9802 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9803 rotated 10 right, and higher 16 bit of every 32 selected. */
9804 rtx movishori
9805 = force_reg (V2HImode, (simplify_gen_subreg
9806 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9807 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9808 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9810 tramp = force_reg (Pmode, tramp);
9811 fnaddr = force_reg (SImode, fnaddr);
9812 cxt = force_reg (SImode, cxt);
9813 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9814 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9815 movishori));
9816 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9817 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9818 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9819 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9820 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9821 gen_rtx_SUBREG (V2HImode, cxt, 0),
9822 movishori));
9823 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9824 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9825 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9826 if (TARGET_LITTLE_ENDIAN)
9828 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9829 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9831 else
9833 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9834 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9836 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9837 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9838 emit_insn (gen_ic_invalidate_line (tramp));
9839 return;
9841 else if (TARGET_SHCOMPACT)
9843 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9844 return;
9846 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9847 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9848 SImode));
9849 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9850 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9851 SImode));
9852 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9853 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9854 if (TARGET_HARVARD)
9856 if (!TARGET_INLINE_IC_INVALIDATE
9857 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9858 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9859 FUNCTION_ORDINARY),
9860 0, VOIDmode, 1, tramp, SImode);
9861 else
9862 emit_insn (gen_ic_invalidate_line (tramp));
9866 /* FIXME: This is overly conservative. A SHcompact function that
9867 receives arguments ``by reference'' will have them stored in its
9868 own stack frame, so it must not pass pointers or references to
9869 these arguments to other functions by means of sibling calls. */
9870 /* If PIC, we cannot make sibling calls to global functions
9871 because the PLT requires r12 to be live. */
9872 static bool
9873 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9875 return (1
9876 && (! TARGET_SHCOMPACT
9877 || crtl->args.info.stack_regs == 0)
9878 && ! sh_cfun_interrupt_handler_p ()
9879 && (! flag_pic
9880 || (decl && ! TREE_PUBLIC (decl))
9881 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9884 /* Machine specific built-in functions. */
9886 struct builtin_description
9888 const enum insn_code icode;
9889 const char *const name;
9890 int signature;
9893 /* describe number and signedness of arguments; arg[0] == result
9894 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9895 /* 9: 64-bit pointer, 10: 32-bit pointer */
9896 static const char signature_args[][4] =
9898 #define SH_BLTIN_V2SI2 0
9899 { 4, 4 },
9900 #define SH_BLTIN_V4HI2 1
9901 { 4, 4 },
9902 #define SH_BLTIN_V2SI3 2
9903 { 4, 4, 4 },
9904 #define SH_BLTIN_V4HI3 3
9905 { 4, 4, 4 },
9906 #define SH_BLTIN_V8QI3 4
9907 { 4, 4, 4 },
9908 #define SH_BLTIN_MAC_HISI 5
9909 { 1, 4, 4, 1 },
9910 #define SH_BLTIN_SH_HI 6
9911 { 4, 4, 1 },
9912 #define SH_BLTIN_SH_SI 7
9913 { 4, 4, 1 },
9914 #define SH_BLTIN_V4HI2V2SI 8
9915 { 4, 4, 4 },
9916 #define SH_BLTIN_V4HI2V8QI 9
9917 { 4, 4, 4 },
9918 #define SH_BLTIN_SISF 10
9919 { 4, 2 },
9920 #define SH_BLTIN_LDUA_L 11
9921 { 2, 10 },
9922 #define SH_BLTIN_LDUA_Q 12
9923 { 1, 10 },
9924 #define SH_BLTIN_STUA_L 13
9925 { 0, 10, 2 },
9926 #define SH_BLTIN_STUA_Q 14
9927 { 0, 10, 1 },
9928 #define SH_BLTIN_LDUA_L64 15
9929 { 2, 9 },
9930 #define SH_BLTIN_LDUA_Q64 16
9931 { 1, 9 },
9932 #define SH_BLTIN_STUA_L64 17
9933 { 0, 9, 2 },
9934 #define SH_BLTIN_STUA_Q64 18
9935 { 0, 9, 1 },
9936 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9937 #define SH_BLTIN_2 19
9938 #define SH_BLTIN_SU 19
9939 { 1, 2 },
9940 #define SH_BLTIN_3 20
9941 #define SH_BLTIN_SUS 20
9942 { 2, 2, 1 },
9943 #define SH_BLTIN_PSSV 21
9944 { 0, 8, 2, 2 },
9945 #define SH_BLTIN_XXUU 22
9946 #define SH_BLTIN_UUUU 22
9947 { 1, 1, 1, 1 },
9948 #define SH_BLTIN_PV 23
9949 { 0, 8 },
9951 /* mcmv: operands considered unsigned. */
9952 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9953 /* mperm: control value considered unsigned int. */
9954 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9955 /* mshards_q: returns signed short. */
9956 /* nsb: takes long long arg, returns unsigned char. */
9957 static const struct builtin_description bdesc[] =
9959 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9960 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9961 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9962 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9963 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9964 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9965 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9966 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9967 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9968 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9969 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9970 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9971 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9972 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9973 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9974 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9975 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9976 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9977 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9978 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9979 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9980 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9981 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9982 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9983 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9984 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9985 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9986 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9987 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9988 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9989 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9990 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9991 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9992 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9993 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9994 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9995 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9996 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9997 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9998 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9999 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
10000 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
10001 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
10002 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
10003 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
10004 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
10005 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
10006 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
10007 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
10008 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
10009 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
10010 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
10011 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
10012 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
10013 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
10014 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
10015 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
10016 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
10017 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
10018 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
10019 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
10020 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
10021 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
10022 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
10023 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
10024 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
10025 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
10026 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
10027 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
10028 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
10029 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
10030 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
10031 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
10032 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
10033 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
10034 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
10035 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
10036 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
10037 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
10038 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
10039 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
10040 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
10041 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
10042 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
10045 static void
10046 sh_media_init_builtins (void)
10048 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10049 const struct builtin_description *d;
10051 memset (shared, 0, sizeof shared);
10052 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10054 tree type, arg_type = 0;
10055 int signature = d->signature;
10056 int i;
10058 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10059 type = shared[signature];
10060 else
10062 int has_result = signature_args[signature][0] != 0;
10064 if ((signature_args[signature][1] & 8)
10065 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10066 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10067 continue;
10068 if (! TARGET_FPU_ANY
10069 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10070 continue;
10071 type = void_list_node;
10072 for (i = 3; ; i--)
10074 int arg = signature_args[signature][i];
10075 int opno = i - 1 + has_result;
10077 if (arg & 8)
10078 arg_type = ptr_type_node;
10079 else if (arg)
10080 arg_type = (*lang_hooks.types.type_for_mode)
10081 (insn_data[d->icode].operand[opno].mode,
10082 (arg & 1));
10083 else if (i)
10084 continue;
10085 else
10086 arg_type = void_type_node;
10087 if (i == 0)
10088 break;
10089 type = tree_cons (NULL_TREE, arg_type, type);
10091 type = build_function_type (arg_type, type);
10092 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10093 shared[signature] = type;
10095 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10096 NULL, NULL_TREE);
10100 /* Implements target hook vector_mode_supported_p. */
10101 bool
10102 sh_vector_mode_supported_p (enum machine_mode mode)
10104 if (TARGET_FPU_ANY
10105 && ((mode == V2SFmode)
10106 || (mode == V4SFmode)
10107 || (mode == V16SFmode)))
10108 return true;
10110 else if (TARGET_SHMEDIA
10111 && ((mode == V8QImode)
10112 || (mode == V2HImode)
10113 || (mode == V4HImode)
10114 || (mode == V2SImode)))
10115 return true;
10117 return false;
10120 /* Implements target hook dwarf_calling_convention. Return an enum
10121 of dwarf_calling_convention. */
10123 sh_dwarf_calling_convention (const_tree func)
10125 if (sh_attr_renesas_p (func))
10126 return DW_CC_GNU_renesas_sh;
10128 return DW_CC_normal;
10131 static void
10132 sh_init_builtins (void)
10134 if (TARGET_SHMEDIA)
10135 sh_media_init_builtins ();
10138 /* Expand an expression EXP that calls a built-in function,
10139 with result going to TARGET if that's convenient
10140 (and in mode MODE if that's convenient).
10141 SUBTARGET may be used as the target for computing one of EXP's operands.
10142 IGNORE is nonzero if the value is to be ignored. */
10144 static rtx
10145 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10146 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10148 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10149 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10150 const struct builtin_description *d = &bdesc[fcode];
10151 enum insn_code icode = d->icode;
10152 int signature = d->signature;
10153 enum machine_mode tmode = VOIDmode;
10154 int nop = 0, i;
10155 rtx op[4];
10156 rtx pat = 0;
10158 if (signature_args[signature][0])
10160 if (ignore)
10161 return 0;
10163 tmode = insn_data[icode].operand[0].mode;
10164 if (! target
10165 || GET_MODE (target) != tmode
10166 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10167 target = gen_reg_rtx (tmode);
10168 op[nop++] = target;
10170 else
10171 target = 0;
10173 for (i = 1; i <= 3; i++, nop++)
10175 tree arg;
10176 enum machine_mode opmode, argmode;
10177 tree optype;
10179 if (! signature_args[signature][i])
10180 break;
10181 arg = CALL_EXPR_ARG (exp, i - 1);
10182 if (arg == error_mark_node)
10183 return const0_rtx;
10184 if (signature_args[signature][i] & 8)
10186 opmode = ptr_mode;
10187 optype = ptr_type_node;
10189 else
10191 opmode = insn_data[icode].operand[nop].mode;
10192 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10194 argmode = TYPE_MODE (TREE_TYPE (arg));
10195 if (argmode != opmode)
10196 arg = build1 (NOP_EXPR, optype, arg);
10197 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
10198 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10199 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10202 switch (nop)
10204 case 1:
10205 pat = (*insn_data[d->icode].genfun) (op[0]);
10206 break;
10207 case 2:
10208 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10209 break;
10210 case 3:
10211 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10212 break;
10213 case 4:
10214 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10215 break;
10216 default:
10217 gcc_unreachable ();
10219 if (! pat)
10220 return 0;
10221 emit_insn (pat);
10222 return target;
10225 void
10226 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10228 rtx sel0 = const0_rtx;
10229 rtx sel1 = const1_rtx;
10230 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10231 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10233 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10234 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10237 void
10238 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10240 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10242 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10243 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10246 /* Return the class of registers for which a mode change from FROM to TO
10247 is invalid. */
10248 bool
10249 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10250 enum reg_class class)
10252 /* We want to enable the use of SUBREGs as a means to
10253 VEC_SELECT a single element of a vector. */
10254 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10255 return (reg_classes_intersect_p (GENERAL_REGS, class));
10257 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10259 if (TARGET_LITTLE_ENDIAN)
10261 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10262 return reg_classes_intersect_p (DF_REGS, class);
10264 else
10266 if (GET_MODE_SIZE (from) < 8)
10267 return reg_classes_intersect_p (DF_HI_REGS, class);
10270 return 0;
10274 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10275 that label is used. */
10277 void
10278 sh_mark_label (rtx address, int nuses)
10280 if (GOTOFF_P (address))
10282 /* Extract the label or symbol. */
10283 address = XEXP (address, 0);
10284 if (GET_CODE (address) == PLUS)
10285 address = XEXP (address, 0);
10286 address = XVECEXP (address, 0, 0);
10288 if (GET_CODE (address) == LABEL_REF
10289 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10290 LABEL_NUSES (XEXP (address, 0)) += nuses;
10293 /* Compute extra cost of moving data between one register class
10294 and another. */
10296 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10297 uses this information. Hence, the general register <-> floating point
10298 register information here is not used for SFmode. */
10301 sh_register_move_cost (enum machine_mode mode,
10302 enum reg_class srcclass, enum reg_class dstclass)
10304 if (dstclass == T_REGS || dstclass == PR_REGS)
10305 return 10;
10307 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10308 return 4;
10310 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10311 && REGCLASS_HAS_FP_REG (srcclass)
10312 && REGCLASS_HAS_FP_REG (dstclass))
10313 return 4;
10315 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10316 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10318 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10319 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10320 return 9;
10322 if ((REGCLASS_HAS_FP_REG (dstclass)
10323 && REGCLASS_HAS_GENERAL_REG (srcclass))
10324 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10325 && REGCLASS_HAS_FP_REG (srcclass)))
10326 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10327 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10329 if ((dstclass == FPUL_REGS
10330 && REGCLASS_HAS_GENERAL_REG (srcclass))
10331 || (srcclass == FPUL_REGS
10332 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10333 return 5;
10335 if ((dstclass == FPUL_REGS
10336 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10337 || (srcclass == FPUL_REGS
10338 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10339 return 7;
10341 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10342 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10343 return 20;
10345 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10346 if (TARGET_SHMEDIA
10347 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10349 if (sh_gettrcost >= 0)
10350 return sh_gettrcost;
10351 else if (!TARGET_PT_FIXED)
10352 return 100;
10355 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10356 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10357 return 4;
10359 if (TARGET_SHMEDIA
10360 || (TARGET_FMOVD
10361 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10362 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10363 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10365 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10368 static rtx emit_load_ptr (rtx, rtx);
10370 static rtx
10371 emit_load_ptr (rtx reg, rtx addr)
10373 rtx mem = gen_const_mem (ptr_mode, addr);
10375 if (Pmode != ptr_mode)
10376 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10377 return emit_move_insn (reg, mem);
10380 static void
10381 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10382 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10383 tree function)
10385 CUMULATIVE_ARGS cum;
10386 int structure_value_byref = 0;
10387 rtx this, this_value, sibcall, insns, funexp;
10388 tree funtype = TREE_TYPE (function);
10389 int simple_add = CONST_OK_FOR_ADD (delta);
10390 int did_load = 0;
10391 rtx scratch0, scratch1, scratch2;
10392 unsigned i;
10394 reload_completed = 1;
10395 epilogue_completed = 1;
10396 current_function_uses_only_leaf_regs = 1;
10398 emit_note (NOTE_INSN_PROLOGUE_END);
10400 /* Find the "this" pointer. We have such a wide range of ABIs for the
10401 SH that it's best to do this completely machine independently.
10402 "this" is passed as first argument, unless a structure return pointer
10403 comes first, in which case "this" comes second. */
10404 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10405 #ifndef PCC_STATIC_STRUCT_RETURN
10406 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10407 structure_value_byref = 1;
10408 #endif /* not PCC_STATIC_STRUCT_RETURN */
10409 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10411 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10413 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10415 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10417 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10418 static chain pointer (even if you can't have nested virtual functions
10419 right now, someone might implement them sometime), and the rest of the
10420 registers are used for argument passing, are callee-saved, or reserved. */
10421 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10422 -ffixed-reg has been used. */
10423 if (! call_used_regs[0] || fixed_regs[0])
10424 error ("r0 needs to be available as a call-clobbered register");
10425 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10426 if (! TARGET_SH5)
10428 if (call_used_regs[1] && ! fixed_regs[1])
10429 scratch1 = gen_rtx_REG (ptr_mode, 1);
10430 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10431 pointing where to return struct values. */
10432 if (call_used_regs[3] && ! fixed_regs[3])
10433 scratch2 = gen_rtx_REG (Pmode, 3);
10435 else if (TARGET_SHMEDIA)
10437 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10438 if (i != REGNO (scratch0) &&
10439 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10441 scratch1 = gen_rtx_REG (ptr_mode, i);
10442 break;
10444 if (scratch1 == scratch0)
10445 error ("Need a second call-clobbered general purpose register");
10446 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10447 if (call_used_regs[i] && ! fixed_regs[i])
10449 scratch2 = gen_rtx_REG (Pmode, i);
10450 break;
10452 if (scratch2 == scratch0)
10453 error ("Need a call-clobbered target register");
10456 this_value = plus_constant (this, delta);
10457 if (vcall_offset
10458 && (simple_add || scratch0 != scratch1)
10459 && strict_memory_address_p (ptr_mode, this_value))
10461 emit_load_ptr (scratch0, this_value);
10462 did_load = 1;
10465 if (!delta)
10466 ; /* Do nothing. */
10467 else if (simple_add)
10468 emit_move_insn (this, this_value);
10469 else
10471 emit_move_insn (scratch1, GEN_INT (delta));
10472 emit_insn (gen_add2_insn (this, scratch1));
10475 if (vcall_offset)
10477 rtx offset_addr;
10479 if (!did_load)
10480 emit_load_ptr (scratch0, this);
10482 offset_addr = plus_constant (scratch0, vcall_offset);
10483 if (strict_memory_address_p (ptr_mode, offset_addr))
10484 ; /* Do nothing. */
10485 else if (! TARGET_SH5 && scratch0 != scratch1)
10487 /* scratch0 != scratch1, and we have indexed loads. Get better
10488 schedule by loading the offset into r1 and using an indexed
10489 load - then the load of r1 can issue before the load from
10490 (this + delta) finishes. */
10491 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10492 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10494 else if (CONST_OK_FOR_ADD (vcall_offset))
10496 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10497 offset_addr = scratch0;
10499 else if (scratch0 != scratch1)
10501 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10502 emit_insn (gen_add2_insn (scratch0, scratch1));
10503 offset_addr = scratch0;
10505 else
10506 gcc_unreachable (); /* FIXME */
10507 emit_load_ptr (scratch0, offset_addr);
10509 if (Pmode != ptr_mode)
10510 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10511 emit_insn (gen_add2_insn (this, scratch0));
10514 /* Generate a tail call to the target function. */
10515 if (! TREE_USED (function))
10517 assemble_external (function);
10518 TREE_USED (function) = 1;
10520 funexp = XEXP (DECL_RTL (function), 0);
10521 /* If the function is overridden, so is the thunk, hence we don't
10522 need GOT addressing even if this is a public symbol. */
10523 #if 0
10524 if (TARGET_SH1 && ! flag_weak)
10525 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10526 else
10527 #endif
10528 if (TARGET_SH2 && flag_pic)
10530 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10531 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10533 else
10535 if (TARGET_SHMEDIA && flag_pic)
10537 funexp = gen_sym2PIC (funexp);
10538 PUT_MODE (funexp, Pmode);
10540 emit_move_insn (scratch2, funexp);
10541 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10542 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10544 sibcall = emit_call_insn (sibcall);
10545 SIBLING_CALL_P (sibcall) = 1;
10546 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10547 emit_barrier ();
10549 /* Run just enough of rest_of_compilation to do scheduling and get
10550 the insns emitted. Note that use_thunk calls
10551 assemble_start_function and assemble_end_function. */
10553 insn_locators_alloc ();
10554 insns = get_insns ();
10556 #if 0
10557 if (optimize > 0)
10559 /* Initialize the bitmap obstacks. */
10560 bitmap_obstack_initialize (NULL);
10561 bitmap_obstack_initialize (&reg_obstack);
10562 if (! cfun->cfg)
10563 init_flow ();
10564 rtl_register_cfg_hooks ();
10565 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10566 init_rtl_bb_info (EXIT_BLOCK_PTR);
10567 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10568 EXIT_BLOCK_PTR->flags |= BB_RTL;
10569 find_basic_blocks (insns);
10571 if (flag_schedule_insns_after_reload)
10573 life_analysis (PROP_FINAL);
10575 split_all_insns (1);
10577 schedule_insns ();
10579 /* We must split jmp insn in PIC case. */
10580 else if (flag_pic)
10581 split_all_insns_noflow ();
10583 #else
10584 if (optimize > 0)
10586 if (! cfun->cfg)
10587 init_flow (cfun);
10588 split_all_insns_noflow ();
10590 #endif
10592 sh_reorg ();
10594 if (optimize > 0 && flag_delayed_branch)
10595 dbr_schedule (insns);
10597 shorten_branches (insns);
10598 final_start_function (insns, file, 1);
10599 final (insns, file, 1);
10600 final_end_function ();
10601 free_after_compilation (cfun);
10603 reload_completed = 0;
10604 epilogue_completed = 0;
10608 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10610 rtx sym;
10612 /* If this is not an ordinary function, the name usually comes from a
10613 string literal or an sprintf buffer. Make sure we use the same
10614 string consistently, so that cse will be able to unify address loads. */
10615 if (kind != FUNCTION_ORDINARY)
10616 name = IDENTIFIER_POINTER (get_identifier (name));
10617 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10618 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10619 if (flag_pic)
10620 switch (kind)
10622 case FUNCTION_ORDINARY:
10623 break;
10624 case SFUNC_GOT:
10626 rtx reg = target ? target : gen_reg_rtx (Pmode);
10628 emit_insn (gen_symGOT2reg (reg, sym));
10629 sym = reg;
10630 break;
10632 case SFUNC_STATIC:
10634 /* ??? To allow cse to work, we use GOTOFF relocations.
10635 we could add combiner patterns to transform this into
10636 straight pc-relative calls with sym2PIC / bsrf when
10637 label load and function call are still 1:1 and in the
10638 same basic block during combine. */
10639 rtx reg = target ? target : gen_reg_rtx (Pmode);
10641 emit_insn (gen_symGOTOFF2reg (reg, sym));
10642 sym = reg;
10643 break;
10646 if (target && sym != target)
10648 emit_move_insn (target, sym);
10649 return target;
10651 return sym;
10654 /* Find the number of a general purpose register in S. */
10655 static int
10656 scavenge_reg (HARD_REG_SET *s)
10658 int r;
10659 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10660 if (TEST_HARD_REG_BIT (*s, r))
10661 return r;
10662 return -1;
10666 sh_get_pr_initial_val (void)
10668 rtx val;
10670 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10671 PR register on SHcompact, because it might be clobbered by the prologue.
10672 We check first if that is known to be the case. */
10673 if (TARGET_SHCOMPACT
10674 && ((crtl->args.info.call_cookie
10675 & ~ CALL_COOKIE_RET_TRAMP (1))
10676 || crtl->saves_all_registers))
10677 return gen_frame_mem (SImode, return_address_pointer_rtx);
10679 /* If we haven't finished rtl generation, there might be a nonlocal label
10680 that we haven't seen yet.
10681 ??? get_hard_reg_initial_val fails if it is called after register
10682 allocation has started, unless it has been called before for the
10683 same register. And even then, we end in trouble if we didn't use
10684 the register in the same basic block before. So call
10685 get_hard_reg_initial_val now and wrap it in an unspec if we might
10686 need to replace it. */
10687 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10688 combine can put the pseudo returned by get_hard_reg_initial_val into
10689 instructions that need a general purpose registers, which will fail to
10690 be recognized when the pseudo becomes allocated to PR. */
10692 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10693 if (TARGET_SH1)
10694 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10695 return val;
10699 sh_expand_t_scc (enum rtx_code code, rtx target)
10701 rtx result = target;
10702 HOST_WIDE_INT val;
10704 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10705 || GET_CODE (sh_compare_op1) != CONST_INT)
10706 return 0;
10707 if (GET_CODE (result) != REG)
10708 result = gen_reg_rtx (SImode);
10709 val = INTVAL (sh_compare_op1);
10710 if ((code == EQ && val == 1) || (code == NE && val == 0))
10711 emit_insn (gen_movt (result));
10712 else if (TARGET_SH2A && ((code == EQ && val == 0)
10713 || (code == NE && val == 1)))
10714 emit_insn (gen_movrt (result));
10715 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10717 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10718 emit_insn (gen_subc (result, result, result));
10719 emit_insn (gen_addsi3 (result, result, const1_rtx));
10721 else if (code == EQ || code == NE)
10722 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10723 else
10724 return 0;
10725 if (result != target)
10726 emit_move_insn (target, result);
10727 return 1;
10730 /* INSN is an sfunc; return the rtx that describes the address used. */
10731 static rtx
10732 extract_sfunc_addr (rtx insn)
10734 rtx pattern, part = NULL_RTX;
10735 int len, i;
10737 pattern = PATTERN (insn);
10738 len = XVECLEN (pattern, 0);
10739 for (i = 0; i < len; i++)
10741 part = XVECEXP (pattern, 0, i);
10742 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10743 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10744 return XEXP (part, 0);
10746 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10747 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10750 /* Verify that the register in use_sfunc_addr still agrees with the address
10751 used in the sfunc. This prevents fill_slots_from_thread from changing
10752 use_sfunc_addr.
10753 INSN is the use_sfunc_addr instruction, and REG is the register it
10754 guards. */
10756 check_use_sfunc_addr (rtx insn, rtx reg)
10758 /* Search for the sfunc. It should really come right after INSN. */
10759 while ((insn = NEXT_INSN (insn)))
10761 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10762 break;
10763 if (! INSN_P (insn))
10764 continue;
10766 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10767 insn = XVECEXP (PATTERN (insn), 0, 0);
10768 if (GET_CODE (PATTERN (insn)) != PARALLEL
10769 || get_attr_type (insn) != TYPE_SFUNC)
10770 continue;
10771 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10773 gcc_unreachable ();
10776 /* This function returns a constant rtx that represents pi / 2**15 in
10777 SFmode. it's used to scale SFmode angles, in radians, to a
10778 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10779 maps to 0x10000). */
10781 static GTY(()) rtx sh_fsca_sf2int_rtx;
10784 sh_fsca_sf2int (void)
10786 if (! sh_fsca_sf2int_rtx)
10788 REAL_VALUE_TYPE rv;
10790 real_from_string (&rv, "10430.378350470453");
10791 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10794 return sh_fsca_sf2int_rtx;
10797 /* This function returns a constant rtx that represents pi / 2**15 in
10798 DFmode. it's used to scale DFmode angles, in radians, to a
10799 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10800 maps to 0x10000). */
10802 static GTY(()) rtx sh_fsca_df2int_rtx;
10805 sh_fsca_df2int (void)
10807 if (! sh_fsca_df2int_rtx)
10809 REAL_VALUE_TYPE rv;
10811 real_from_string (&rv, "10430.378350470453");
10812 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10815 return sh_fsca_df2int_rtx;
10818 /* This function returns a constant rtx that represents 2**15 / pi in
10819 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10820 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10821 2*pi). */
10823 static GTY(()) rtx sh_fsca_int2sf_rtx;
10826 sh_fsca_int2sf (void)
10828 if (! sh_fsca_int2sf_rtx)
10830 REAL_VALUE_TYPE rv;
10832 real_from_string (&rv, "9.587379924285257e-5");
10833 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10836 return sh_fsca_int2sf_rtx;
10839 /* Initialize the CUMULATIVE_ARGS structure. */
10841 void
10842 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10843 tree fntype,
10844 rtx libname ATTRIBUTE_UNUSED,
10845 tree fndecl,
10846 signed int n_named_args,
10847 enum machine_mode mode)
10849 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10850 pcum->free_single_fp_reg = 0;
10851 pcum->stack_regs = 0;
10852 pcum->byref_regs = 0;
10853 pcum->byref = 0;
10854 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10856 /* XXX - Should we check TARGET_HITACHI here ??? */
10857 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10859 if (fntype)
10861 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10862 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10863 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10864 pcum->arg_count [(int) SH_ARG_INT]
10865 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10867 pcum->call_cookie
10868 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10869 && pcum->arg_count [(int) SH_ARG_INT] == 0
10870 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10871 ? int_size_in_bytes (TREE_TYPE (fntype))
10872 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10873 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10874 == FIRST_RET_REG));
10876 else
10878 pcum->arg_count [(int) SH_ARG_INT] = 0;
10879 pcum->prototype_p = FALSE;
10880 if (mode != VOIDmode)
10882 pcum->call_cookie =
10883 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10884 && GET_MODE_SIZE (mode) > 4
10885 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10887 /* If the default ABI is the Renesas ABI then all library
10888 calls must assume that the library will be using the
10889 Renesas ABI. So if the function would return its result
10890 in memory then we must force the address of this memory
10891 block onto the stack. Ideally we would like to call
10892 targetm.calls.return_in_memory() here but we do not have
10893 the TYPE or the FNDECL available so we synthesize the
10894 contents of that function as best we can. */
10895 pcum->force_mem =
10896 (TARGET_DEFAULT & MASK_HITACHI)
10897 && (mode == BLKmode
10898 || (GET_MODE_SIZE (mode) > 4
10899 && !(mode == DFmode
10900 && TARGET_FPU_DOUBLE)));
10902 else
10904 pcum->call_cookie = 0;
10905 pcum->force_mem = FALSE;
10910 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10911 not enter into CONST_DOUBLE for the replace.
10913 Note that copying is not done so X must not be shared unless all copies
10914 are to be modified.
10916 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10917 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10918 replacements[n*2+1] - and that we take mode changes into account.
10920 If a replacement is ambiguous, return NULL_RTX.
10922 If MODIFY is zero, don't modify any rtl in place,
10923 just return zero or nonzero for failure / success. */
10926 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10928 int i, j;
10929 const char *fmt;
10931 /* The following prevents loops occurrence when we change MEM in
10932 CONST_DOUBLE onto the same CONST_DOUBLE. */
10933 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10934 return x;
10936 for (i = n_replacements - 1; i >= 0 ; i--)
10937 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10938 return replacements[i*2+1];
10940 /* Allow this function to make replacements in EXPR_LISTs. */
10941 if (x == 0)
10942 return 0;
10944 if (GET_CODE (x) == SUBREG)
10946 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10947 n_replacements, modify);
10949 if (GET_CODE (new) == CONST_INT)
10951 x = simplify_subreg (GET_MODE (x), new,
10952 GET_MODE (SUBREG_REG (x)),
10953 SUBREG_BYTE (x));
10954 if (! x)
10955 abort ();
10957 else if (modify)
10958 SUBREG_REG (x) = new;
10960 return x;
10962 else if (GET_CODE (x) == REG)
10964 unsigned regno = REGNO (x);
10965 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10966 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10967 rtx result = NULL_RTX;
10969 for (i = n_replacements - 1; i >= 0; i--)
10971 rtx from = replacements[i*2];
10972 rtx to = replacements[i*2+1];
10973 unsigned from_regno, from_nregs, to_regno, new_regno;
10975 if (GET_CODE (from) != REG)
10976 continue;
10977 from_regno = REGNO (from);
10978 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10979 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10980 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10982 if (regno < from_regno
10983 || regno + nregs > from_regno + nregs
10984 || GET_CODE (to) != REG
10985 || result)
10986 return NULL_RTX;
10987 to_regno = REGNO (to);
10988 if (to_regno < FIRST_PSEUDO_REGISTER)
10990 new_regno = regno + to_regno - from_regno;
10991 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10992 != nregs)
10993 return NULL_RTX;
10994 result = gen_rtx_REG (GET_MODE (x), new_regno);
10996 else if (GET_MODE (x) <= GET_MODE (to))
10997 result = gen_lowpart_common (GET_MODE (x), to);
10998 else
10999 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11002 return result ? result : x;
11004 else if (GET_CODE (x) == ZERO_EXTEND)
11006 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
11007 n_replacements, modify);
11009 if (GET_CODE (new) == CONST_INT)
11011 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11012 new, GET_MODE (XEXP (x, 0)));
11013 if (! x)
11014 abort ();
11016 else if (modify)
11017 XEXP (x, 0) = new;
11019 return x;
11022 fmt = GET_RTX_FORMAT (GET_CODE (x));
11023 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11025 rtx new;
11027 if (fmt[i] == 'e')
11029 new = replace_n_hard_rtx (XEXP (x, i), replacements,
11030 n_replacements, modify);
11031 if (!new)
11032 return NULL_RTX;
11033 if (modify)
11034 XEXP (x, i) = new;
11036 else if (fmt[i] == 'E')
11037 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11039 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11040 n_replacements, modify);
11041 if (!new)
11042 return NULL_RTX;
11043 if (modify)
11044 XVECEXP (x, i, j) = new;
11048 return x;
11052 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11054 enum rtx_code code = TRUNCATE;
11056 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11058 rtx inner = XEXP (x, 0);
11059 enum machine_mode inner_mode = GET_MODE (inner);
11061 if (inner_mode == mode)
11062 return inner;
11063 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11064 x = inner;
11065 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11066 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11068 code = GET_CODE (x);
11069 x = inner;
11072 return gen_rtx_fmt_e (code, mode, x);
11075 /* called via for_each_rtx after reload, to clean up truncates of
11076 registers that span multiple actual hard registers. */
11078 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11080 rtx x = *p, reg;
11082 if (GET_CODE (x) != TRUNCATE)
11083 return 0;
11084 reg = XEXP (x, 0);
11085 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11087 enum machine_mode reg_mode = GET_MODE (reg);
11088 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11089 subreg_lowpart_offset (DImode, reg_mode));
11090 *(int*) n_changes += 1;
11091 return -1;
11093 return 0;
11096 /* Load and store depend on the highpart of the address. However,
11097 set_attr_alternative does not give well-defined results before reload,
11098 so we must look at the rtl ourselves to see if any of the feeding
11099 registers is used in a memref. */
11101 /* Called by sh_contains_memref_p via for_each_rtx. */
11102 static int
11103 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11105 return (GET_CODE (*loc) == MEM);
11108 /* Return nonzero iff INSN contains a MEM. */
11110 sh_contains_memref_p (rtx insn)
11112 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11115 /* Return nonzero iff INSN loads a banked register. */
11117 sh_loads_bankedreg_p (rtx insn)
11119 if (GET_CODE (PATTERN (insn)) == SET)
11121 rtx op = SET_DEST (PATTERN(insn));
11122 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11123 return 1;
11126 return 0;
11129 /* FNADDR is the MEM expression from a call expander. Return an address
11130 to use in an SHmedia insn pattern. */
11132 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11134 int is_sym;
11136 fnaddr = XEXP (fnaddr, 0);
11137 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11138 if (flag_pic && is_sym)
11140 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11142 rtx reg = gen_reg_rtx (Pmode);
11144 /* We must not use GOTPLT for sibcalls, because PIC_REG
11145 must be restored before the PLT code gets to run. */
11146 if (is_sibcall)
11147 emit_insn (gen_symGOT2reg (reg, fnaddr));
11148 else
11149 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11150 fnaddr = reg;
11152 else
11154 fnaddr = gen_sym2PIC (fnaddr);
11155 PUT_MODE (fnaddr, Pmode);
11158 /* If ptabs might trap, make this visible to the rest of the compiler.
11159 We generally assume that symbols pertain to valid locations, but
11160 it is possible to generate invalid symbols with asm or linker tricks.
11161 In a list of functions where each returns its successor, an invalid
11162 symbol might denote an empty list. */
11163 if (!TARGET_PT_FIXED
11164 && (!is_sym || TARGET_INVALID_SYMBOLS)
11165 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11167 rtx tr = gen_reg_rtx (PDImode);
11169 emit_insn (gen_ptabs (tr, fnaddr));
11170 fnaddr = tr;
11172 else if (! target_reg_operand (fnaddr, Pmode))
11173 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11174 return fnaddr;
11177 enum reg_class
11178 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
11179 enum machine_mode mode, secondary_reload_info *sri)
11181 if (in_p)
11183 if (REGCLASS_HAS_FP_REG (class)
11184 && ! TARGET_SHMEDIA
11185 && immediate_operand ((x), mode)
11186 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11187 && mode == SFmode && fldi_ok ()))
11188 switch (mode)
11190 case SFmode:
11191 sri->icode = CODE_FOR_reload_insf__frn;
11192 return NO_REGS;
11193 case DFmode:
11194 sri->icode = CODE_FOR_reload_indf__frn;
11195 return NO_REGS;
11196 case SImode:
11197 /* ??? If we knew that we are in the appropriate mode -
11198 single precision - we could use a reload pattern directly. */
11199 return FPUL_REGS;
11200 default:
11201 abort ();
11203 if (class == FPUL_REGS
11204 && ((GET_CODE (x) == REG
11205 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11206 || REGNO (x) == T_REG))
11207 || GET_CODE (x) == PLUS))
11208 return GENERAL_REGS;
11209 if (class == FPUL_REGS && immediate_operand (x, mode))
11211 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11212 return GENERAL_REGS;
11213 else if (mode == SFmode)
11214 return FP_REGS;
11215 sri->icode = CODE_FOR_reload_insi__i_fpul;
11216 return NO_REGS;
11218 if (class == FPSCR_REGS
11219 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11220 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
11221 return GENERAL_REGS;
11222 if (REGCLASS_HAS_FP_REG (class)
11223 && TARGET_SHMEDIA
11224 && immediate_operand (x, mode)
11225 && x != CONST0_RTX (GET_MODE (x))
11226 && GET_MODE (x) != V4SFmode)
11227 return GENERAL_REGS;
11228 if ((mode == QImode || mode == HImode)
11229 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11231 sri->icode = ((mode == QImode)
11232 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11233 return NO_REGS;
11235 if (TARGET_SHMEDIA && class == GENERAL_REGS
11236 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
11237 return TARGET_REGS;
11238 } /* end of input-only processing. */
11240 if (((REGCLASS_HAS_FP_REG (class)
11241 && (GET_CODE (x) == REG
11242 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11243 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11244 && TARGET_FMOVD))))
11245 || (REGCLASS_HAS_GENERAL_REG (class)
11246 && GET_CODE (x) == REG
11247 && FP_REGISTER_P (REGNO (x))))
11248 && ! TARGET_SHMEDIA
11249 && (mode == SFmode || mode == SImode))
11250 return FPUL_REGS;
11251 if ((class == FPUL_REGS
11252 || (REGCLASS_HAS_FP_REG (class)
11253 && ! TARGET_SHMEDIA && mode == SImode))
11254 && (GET_CODE (x) == MEM
11255 || (GET_CODE (x) == REG
11256 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11257 || REGNO (x) == T_REG
11258 || system_reg_operand (x, VOIDmode)))))
11260 if (class == FPUL_REGS)
11261 return GENERAL_REGS;
11262 return FPUL_REGS;
11264 if ((class == TARGET_REGS
11265 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
11266 && !satisfies_constraint_Csy (x)
11267 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
11268 return GENERAL_REGS;
11269 if ((class == MAC_REGS || class == PR_REGS)
11270 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
11271 && class != REGNO_REG_CLASS (REGNO (x)))
11272 return GENERAL_REGS;
11273 if (class != GENERAL_REGS && GET_CODE (x) == REG
11274 && TARGET_REGISTER_P (REGNO (x)))
11275 return GENERAL_REGS;
11276 return NO_REGS;
11279 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11281 #include "gt-sh.h"