* config/sh/sh-protos.h (sh_legitimize_reload_address): Declare.
[official-gcc.git] / gcc / config / sh / sh.c
blob18491378730347438c9f438b38785d63d83f6825
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "toplev.h"
41 #include "recog.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "df.h"
51 #include "cfglayout.h"
52 #include "intl.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "ggc.h"
56 #include "gimple.h"
57 #include "cfgloop.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
75 they may be. */
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
115 reg number. */
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static rtx mark_constant_pool_use (rtx);
193 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_resbank_handler_attribute (tree *, tree,
195 tree, int, bool *);
196 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
197 tree, int, bool *);
198 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
201 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
202 static void sh_insert_attributes (tree, tree *);
203 static const char *sh_check_pch_target_flags (int);
204 static int sh_adjust_cost (rtx, rtx, rtx, int);
205 static int sh_issue_rate (void);
206 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
207 static short find_set_regmode_weight (rtx, enum machine_mode);
208 static short find_insn_regmode_weight (rtx, enum machine_mode);
209 static void find_regmode_weight (basic_block, enum machine_mode);
210 static int find_r0_life_regions (basic_block);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx *, int);
215 static void ready_reorder (rtx *, int);
216 static short high_pressure (enum machine_mode);
217 static int sh_reorder (FILE *, int, rtx *, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx, int);
222 static bool sh_function_ok_for_sibcall (tree, tree);
224 static bool sh_cannot_modify_jumps_p (void);
225 static enum reg_class sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (const_tree);
229 static void sh_init_builtins (void);
230 static tree sh_builtin_decl (unsigned, bool);
231 static void sh_media_init_builtins (void);
232 static tree sh_media_builtin_decl (unsigned, bool);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *, bool);
245 static int sh_address_cost (rtx, bool);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
249 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static rtx sh_function_value (const_tree, const_tree, bool);
260 static rtx sh_libcall_value (enum machine_mode, const_rtx);
261 static bool sh_return_in_memory (const_tree, const_tree);
262 static rtx sh_builtin_saveregs (void);
263 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
264 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
265 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
266 static tree sh_build_builtin_va_list (void);
267 static void sh_va_start (tree, rtx);
268 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
269 static bool sh_promote_prototypes (const_tree);
270 static enum machine_mode sh_promote_function_mode (const_tree type,
271 enum machine_mode,
272 int *punsignedp,
273 const_tree funtype,
274 int for_return);
275 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
276 const_tree, bool);
277 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
278 const_tree, bool);
279 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
280 tree, bool);
281 static bool sh_scalar_mode_supported_p (enum machine_mode);
282 static int sh_dwarf_calling_convention (const_tree);
283 static void sh_encode_section_info (tree, rtx, int);
284 static int sh2a_function_vector_p (tree);
285 static void sh_trampoline_init (rtx, tree, rtx);
286 static rtx sh_trampoline_adjust_address (rtx);
288 static const struct attribute_spec sh_attribute_table[] =
290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
291 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
292 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
293 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
294 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
295 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
296 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
297 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
298 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
299 #ifdef SYMBIAN
300 /* Symbian support adds three new attributes:
301 dllexport - for exporting a function/variable that will live in a dll
302 dllimport - for importing a function/variable from a dll
304 Microsoft allows multiple declspecs in one __declspec, separating
305 them with spaces. We do NOT support this. Instead, use __declspec
306 multiple times. */
307 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
308 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
309 #endif
310 { NULL, 0, 0, false, false, false, NULL }
313 /* Initialize the GCC target structure. */
314 #undef TARGET_ATTRIBUTE_TABLE
315 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
317 /* The next two are used for debug info when compiling with -gdwarf. */
318 #undef TARGET_ASM_UNALIGNED_HI_OP
319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
320 #undef TARGET_ASM_UNALIGNED_SI_OP
321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
323 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
324 #undef TARGET_ASM_UNALIGNED_DI_OP
325 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
326 #undef TARGET_ASM_ALIGNED_DI_OP
327 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
329 #undef TARGET_ASM_FUNCTION_EPILOGUE
330 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
332 #undef TARGET_ASM_OUTPUT_MI_THUNK
333 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
338 #undef TARGET_ASM_FILE_START
339 #define TARGET_ASM_FILE_START sh_file_start
340 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
341 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
343 #undef TARGET_DEFAULT_TARGET_FLAGS
344 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
345 #undef TARGET_HANDLE_OPTION
346 #define TARGET_HANDLE_OPTION sh_handle_option
348 #undef TARGET_INSERT_ATTRIBUTES
349 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
351 #undef TARGET_SCHED_ADJUST_COST
352 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
354 #undef TARGET_SCHED_ISSUE_RATE
355 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
357 /* The next 5 hooks have been implemented for reenabling sched1. With the
358 help of these macros we are limiting the movement of insns in sched1 to
359 reduce the register pressure. The overall idea is to keep count of SImode
360 and SFmode regs required by already scheduled insns. When these counts
361 cross some threshold values; give priority to insns that free registers.
362 The insn that frees registers is most likely to be the insn with lowest
363 LUID (original insn order); but such an insn might be there in the stalled
364 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
365 upto a max of 8 cycles so that such insns may move from Q -> R.
367 The description of the hooks are as below:
369 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
370 scheduler; it is called inside the sched_init function just after
371 find_insn_reg_weights function call. It is used to calculate the SImode
372 and SFmode weights of insns of basic blocks; much similar to what
373 find_insn_reg_weights does.
374 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
376 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
377 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
378 (Q)->(R).
380 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
381 high; reorder the ready queue so that the insn with lowest LUID will be
382 issued next.
384 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
385 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
387 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
388 can be returned from TARGET_SCHED_REORDER2.
390 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
392 #undef TARGET_SCHED_DFA_NEW_CYCLE
393 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
395 #undef TARGET_SCHED_INIT_GLOBAL
396 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
398 #undef TARGET_SCHED_FINISH_GLOBAL
399 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
401 #undef TARGET_SCHED_VARIABLE_ISSUE
402 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
404 #undef TARGET_SCHED_REORDER
405 #define TARGET_SCHED_REORDER sh_reorder
407 #undef TARGET_SCHED_REORDER2
408 #define TARGET_SCHED_REORDER2 sh_reorder2
410 #undef TARGET_SCHED_INIT
411 #define TARGET_SCHED_INIT sh_md_init
413 #undef TARGET_LEGITIMIZE_ADDRESS
414 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
416 #undef TARGET_CANNOT_MODIFY_JUMPS_P
417 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
418 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
419 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
420 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
421 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
422 sh_optimize_target_register_callee_saved
424 #undef TARGET_MS_BITFIELD_LAYOUT_P
425 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
427 #undef TARGET_INIT_BUILTINS
428 #define TARGET_INIT_BUILTINS sh_init_builtins
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL sh_builtin_decl
431 #undef TARGET_EXPAND_BUILTIN
432 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
434 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
435 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
437 #undef TARGET_CANNOT_COPY_INSN_P
438 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
439 #undef TARGET_RTX_COSTS
440 #define TARGET_RTX_COSTS sh_rtx_costs
441 #undef TARGET_ADDRESS_COST
442 #define TARGET_ADDRESS_COST sh_address_cost
443 #undef TARGET_ALLOCATE_INITIAL_VALUE
444 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
446 #undef TARGET_MACHINE_DEPENDENT_REORG
447 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
449 #undef TARGET_DWARF_REGISTER_SPAN
450 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
452 #ifdef HAVE_AS_TLS
453 #undef TARGET_HAVE_TLS
454 #define TARGET_HAVE_TLS true
455 #endif
457 #undef TARGET_PROMOTE_PROTOTYPES
458 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
459 #undef TARGET_PROMOTE_FUNCTION_MODE
460 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
462 #undef TARGET_FUNCTION_VALUE
463 #define TARGET_FUNCTION_VALUE sh_function_value
464 #undef TARGET_LIBCALL_VALUE
465 #define TARGET_LIBCALL_VALUE sh_libcall_value
466 #undef TARGET_STRUCT_VALUE_RTX
467 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
468 #undef TARGET_RETURN_IN_MEMORY
469 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
471 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
472 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
475 #undef TARGET_STRICT_ARGUMENT_NAMING
476 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
477 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
478 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
479 #undef TARGET_MUST_PASS_IN_STACK
480 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
481 #undef TARGET_PASS_BY_REFERENCE
482 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
483 #undef TARGET_CALLEE_COPIES
484 #define TARGET_CALLEE_COPIES sh_callee_copies
485 #undef TARGET_ARG_PARTIAL_BYTES
486 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
488 #undef TARGET_BUILD_BUILTIN_VA_LIST
489 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
490 #undef TARGET_EXPAND_BUILTIN_VA_START
491 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
492 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
493 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
495 #undef TARGET_SCALAR_MODE_SUPPORTED_P
496 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
497 #undef TARGET_VECTOR_MODE_SUPPORTED_P
498 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
500 #undef TARGET_CHECK_PCH_TARGET_FLAGS
501 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
503 #undef TARGET_DWARF_CALLING_CONVENTION
504 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
506 /* Return regmode weight for insn. */
507 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
509 /* Return current register pressure for regmode. */
510 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
512 #undef TARGET_ENCODE_SECTION_INFO
513 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
515 #ifdef SYMBIAN
517 #undef TARGET_ENCODE_SECTION_INFO
518 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
519 #undef TARGET_STRIP_NAME_ENCODING
520 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
521 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
522 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
524 #endif /* SYMBIAN */
526 #undef TARGET_SECONDARY_RELOAD
527 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
529 #undef TARGET_LEGITIMATE_ADDRESS_P
530 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
532 #undef TARGET_TRAMPOLINE_INIT
533 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
534 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
535 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
537 /* Machine-specific symbol_ref flags. */
538 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
540 struct gcc_target targetm = TARGET_INITIALIZER;
542 /* Implement TARGET_HANDLE_OPTION. */
544 static bool
545 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
546 int value ATTRIBUTE_UNUSED)
548 switch (code)
550 case OPT_m1:
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
552 return true;
554 case OPT_m2:
555 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
556 return true;
558 case OPT_m2a:
559 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
560 return true;
562 case OPT_m2a_nofpu:
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
564 return true;
566 case OPT_m2a_single:
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
568 return true;
570 case OPT_m2a_single_only:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
572 return true;
574 case OPT_m2e:
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
576 return true;
578 case OPT_m3:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
580 return true;
582 case OPT_m3e:
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
584 return true;
586 case OPT_m4:
587 case OPT_m4_100:
588 case OPT_m4_200:
589 case OPT_m4_300:
590 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
591 return true;
593 case OPT_m4_nofpu:
594 case OPT_m4_100_nofpu:
595 case OPT_m4_200_nofpu:
596 case OPT_m4_300_nofpu:
597 case OPT_m4_340:
598 case OPT_m4_400:
599 case OPT_m4_500:
600 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
601 return true;
603 case OPT_m4_single:
604 case OPT_m4_100_single:
605 case OPT_m4_200_single:
606 case OPT_m4_300_single:
607 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
608 return true;
610 case OPT_m4_single_only:
611 case OPT_m4_100_single_only:
612 case OPT_m4_200_single_only:
613 case OPT_m4_300_single_only:
614 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
615 return true;
617 case OPT_m4a:
618 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
619 return true;
621 case OPT_m4a_nofpu:
622 case OPT_m4al:
623 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
624 return true;
626 case OPT_m4a_single:
627 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
628 return true;
630 case OPT_m4a_single_only:
631 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
632 return true;
634 case OPT_m5_32media:
635 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
636 return true;
638 case OPT_m5_32media_nofpu:
639 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
640 return true;
642 case OPT_m5_64media:
643 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
644 return true;
646 case OPT_m5_64media_nofpu:
647 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
648 return true;
650 case OPT_m5_compact:
651 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
652 return true;
654 case OPT_m5_compact_nofpu:
655 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
656 return true;
658 default:
659 return true;
663 /* Set default optimization options. */
664 void
665 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
667 if (level)
669 flag_omit_frame_pointer = 2;
670 if (!size)
671 sh_div_str = "inv:minlat";
673 if (size)
675 target_flags |= MASK_SMALLCODE;
676 sh_div_str = SH_DIV_STR_FOR_SIZE ;
678 else
679 TARGET_CBRANCHDI4 = 1;
680 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
681 haven't been parsed yet, hence we'd read only the default.
682 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
683 it's OK to always set flag_branch_target_load_optimize. */
684 if (level > 1)
686 flag_branch_target_load_optimize = 1;
687 if (!size)
688 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
690 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
691 here, so leave it to OVERRIDE_OPTIONS to set
692 flag_finite_math_only. We set it to 2 here so we know if the user
693 explicitly requested this to be on or off. */
694 flag_finite_math_only = 2;
695 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
696 the user explicitly requested this to be on or off. */
697 if (flag_schedule_insns > 0)
698 flag_schedule_insns = 2;
700 set_param_value ("simultaneous-prefetches", 2);
703 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
704 options, and do some machine dependent initialization. */
705 void
706 sh_override_options (void)
708 int regno;
710 SUBTARGET_OVERRIDE_OPTIONS;
711 if (flag_finite_math_only == 2)
712 flag_finite_math_only
713 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
714 if (TARGET_SH2E && !flag_finite_math_only)
715 target_flags |= MASK_IEEE;
716 sh_cpu = PROCESSOR_SH1;
717 assembler_dialect = 0;
718 if (TARGET_SH2)
719 sh_cpu = PROCESSOR_SH2;
720 if (TARGET_SH2E)
721 sh_cpu = PROCESSOR_SH2E;
722 if (TARGET_SH2A)
723 sh_cpu = PROCESSOR_SH2A;
724 if (TARGET_SH3)
725 sh_cpu = PROCESSOR_SH3;
726 if (TARGET_SH3E)
727 sh_cpu = PROCESSOR_SH3E;
728 if (TARGET_SH4)
730 assembler_dialect = 1;
731 sh_cpu = PROCESSOR_SH4;
733 if (TARGET_SH4A_ARCH)
735 assembler_dialect = 1;
736 sh_cpu = PROCESSOR_SH4A;
738 if (TARGET_SH5)
740 sh_cpu = PROCESSOR_SH5;
741 target_flags |= MASK_ALIGN_DOUBLE;
742 if (TARGET_SHMEDIA_FPU)
743 target_flags |= MASK_FMOVD;
744 if (TARGET_SHMEDIA)
746 /* There are no delay slots on SHmedia. */
747 flag_delayed_branch = 0;
748 /* Relaxation isn't yet supported for SHmedia */
749 target_flags &= ~MASK_RELAX;
750 /* After reload, if conversion does little good but can cause
751 ICEs:
752 - find_if_block doesn't do anything for SH because we don't
753 have conditional execution patterns. (We use conditional
754 move patterns, which are handled differently, and only
755 before reload).
756 - find_cond_trap doesn't do anything for the SH because we
757 don't have conditional traps.
758 - find_if_case_1 uses redirect_edge_and_branch_force in
759 the only path that does an optimization, and this causes
760 an ICE when branch targets are in registers.
761 - find_if_case_2 doesn't do anything for the SHmedia after
762 reload except when it can redirect a tablejump - and
763 that's rather rare. */
764 flag_if_conversion2 = 0;
765 if (! strcmp (sh_div_str, "call"))
766 sh_div_strategy = SH_DIV_CALL;
767 else if (! strcmp (sh_div_str, "call2"))
768 sh_div_strategy = SH_DIV_CALL2;
769 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
770 sh_div_strategy = SH_DIV_FP;
771 else if (! strcmp (sh_div_str, "inv"))
772 sh_div_strategy = SH_DIV_INV;
773 else if (! strcmp (sh_div_str, "inv:minlat"))
774 sh_div_strategy = SH_DIV_INV_MINLAT;
775 else if (! strcmp (sh_div_str, "inv20u"))
776 sh_div_strategy = SH_DIV_INV20U;
777 else if (! strcmp (sh_div_str, "inv20l"))
778 sh_div_strategy = SH_DIV_INV20L;
779 else if (! strcmp (sh_div_str, "inv:call2"))
780 sh_div_strategy = SH_DIV_INV_CALL2;
781 else if (! strcmp (sh_div_str, "inv:call"))
782 sh_div_strategy = SH_DIV_INV_CALL;
783 else if (! strcmp (sh_div_str, "inv:fp"))
785 if (TARGET_FPU_ANY)
786 sh_div_strategy = SH_DIV_INV_FP;
787 else
788 sh_div_strategy = SH_DIV_INV;
790 TARGET_CBRANCHDI4 = 0;
791 /* Assembler CFI isn't yet fully supported for SHmedia. */
792 flag_dwarf2_cfi_asm = 0;
795 else
797 /* Only the sh64-elf assembler fully supports .quad properly. */
798 targetm.asm_out.aligned_op.di = NULL;
799 targetm.asm_out.unaligned_op.di = NULL;
801 if (TARGET_SH1)
803 if (! strcmp (sh_div_str, "call-div1"))
804 sh_div_strategy = SH_DIV_CALL_DIV1;
805 else if (! strcmp (sh_div_str, "call-fp")
806 && (TARGET_FPU_DOUBLE
807 || (TARGET_HARD_SH4 && TARGET_SH2E)
808 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
809 sh_div_strategy = SH_DIV_CALL_FP;
810 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
811 sh_div_strategy = SH_DIV_CALL_TABLE;
812 else
813 /* Pick one that makes most sense for the target in general.
814 It is not much good to use different functions depending
815 on -Os, since then we'll end up with two different functions
816 when some of the code is compiled for size, and some for
817 speed. */
819 /* SH4 tends to emphasize speed. */
820 if (TARGET_HARD_SH4)
821 sh_div_strategy = SH_DIV_CALL_TABLE;
822 /* These have their own way of doing things. */
823 else if (TARGET_SH2A)
824 sh_div_strategy = SH_DIV_INTRINSIC;
825 /* ??? Should we use the integer SHmedia function instead? */
826 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
827 sh_div_strategy = SH_DIV_CALL_FP;
828 /* SH1 .. SH3 cores often go into small-footprint systems, so
829 default to the smallest implementation available. */
830 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
831 sh_div_strategy = SH_DIV_CALL_TABLE;
832 else
833 sh_div_strategy = SH_DIV_CALL_DIV1;
835 if (!TARGET_SH1)
836 TARGET_PRETEND_CMOVE = 0;
837 if (sh_divsi3_libfunc[0])
838 ; /* User supplied - leave it alone. */
839 else if (TARGET_DIVIDE_CALL_FP)
840 sh_divsi3_libfunc = "__sdivsi3_i4";
841 else if (TARGET_DIVIDE_CALL_TABLE)
842 sh_divsi3_libfunc = "__sdivsi3_i4i";
843 else if (TARGET_SH5)
844 sh_divsi3_libfunc = "__sdivsi3_1";
845 else
846 sh_divsi3_libfunc = "__sdivsi3";
847 if (sh_branch_cost == -1)
848 sh_branch_cost
849 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
851 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
852 if (! VALID_REGISTER_P (regno))
853 sh_register_names[regno][0] = '\0';
855 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
856 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
857 sh_additional_register_names[regno][0] = '\0';
859 if (flag_omit_frame_pointer == 2)
861 /* The debugging information is sufficient,
862 but gdb doesn't implement this yet */
863 if (0)
864 flag_omit_frame_pointer
865 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
866 else
867 flag_omit_frame_pointer = 0;
870 if ((flag_pic && ! TARGET_PREFERGOT)
871 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
872 flag_no_function_cse = 1;
874 if (SMALL_REGISTER_CLASSES)
876 /* Never run scheduling before reload, since that can
877 break global alloc, and generates slower code anyway due
878 to the pressure on R0. */
879 /* Enable sched1 for SH4 if the user explicitly requests.
880 When sched1 is enabled, the ready queue will be reordered by
881 the target hooks if pressure is high. We can not do this for
882 PIC, SH3 and lower as they give spill failures for R0. */
883 if (!TARGET_HARD_SH4 || flag_pic)
884 flag_schedule_insns = 0;
885 /* ??? Current exception handling places basic block boundaries
886 after call_insns. It causes the high pressure on R0 and gives
887 spill failures for R0 in reload. See PR 22553 and the thread
888 on gcc-patches
889 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
890 else if (flag_exceptions)
892 if (flag_schedule_insns == 1)
893 warning (0, "ignoring -fschedule-insns because of exception handling bug");
894 flag_schedule_insns = 0;
896 else if (flag_schedule_insns == 2)
897 flag_schedule_insns = 0;
900 /* Unwinding with -freorder-blocks-and-partition does not work on this
901 architecture, because it requires far jumps to label crossing between
902 hot/cold sections which are rejected on this architecture. */
903 if (flag_reorder_blocks_and_partition)
905 if (flag_exceptions)
907 inform (input_location,
908 "-freorder-blocks-and-partition does not work with "
909 "exceptions on this architecture");
910 flag_reorder_blocks_and_partition = 0;
911 flag_reorder_blocks = 1;
913 else if (flag_unwind_tables)
915 inform (input_location,
916 "-freorder-blocks-and-partition does not support unwind "
917 "info on this architecture");
918 flag_reorder_blocks_and_partition = 0;
919 flag_reorder_blocks = 1;
923 if (align_loops == 0)
924 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
925 if (align_jumps == 0)
926 align_jumps = 1 << CACHE_LOG;
927 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
928 align_jumps = TARGET_SHMEDIA ? 4 : 2;
930 /* Allocation boundary (in *bytes*) for the code of a function.
931 SH1: 32 bit alignment is faster, because instructions are always
932 fetched as a pair from a longword boundary.
933 SH2 .. SH5 : align to cache line start. */
934 if (align_functions == 0)
935 align_functions
936 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
937 /* The linker relaxation code breaks when a function contains
938 alignments that are larger than that at the start of a
939 compilation unit. */
940 if (TARGET_RELAX)
942 int min_align
943 = align_loops > align_jumps ? align_loops : align_jumps;
945 /* Also take possible .long constants / mova tables int account. */
946 if (min_align < 4)
947 min_align = 4;
948 if (align_functions < min_align)
949 align_functions = min_align;
952 if (sh_fixed_range_str)
953 sh_fix_range (sh_fixed_range_str);
956 /* Print the operand address in x to the stream. */
958 void
959 print_operand_address (FILE *stream, rtx x)
961 switch (GET_CODE (x))
963 case REG:
964 case SUBREG:
965 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
966 break;
968 case PLUS:
970 rtx base = XEXP (x, 0);
971 rtx index = XEXP (x, 1);
973 switch (GET_CODE (index))
975 case CONST_INT:
976 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
977 reg_names[true_regnum (base)]);
978 break;
980 case REG:
981 case SUBREG:
983 int base_num = true_regnum (base);
984 int index_num = true_regnum (index);
986 fprintf (stream, "@(r0,%s)",
987 reg_names[MAX (base_num, index_num)]);
988 break;
991 default:
992 gcc_unreachable ();
995 break;
997 case PRE_DEC:
998 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
999 break;
1001 case POST_INC:
1002 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1003 break;
1005 default:
1006 x = mark_constant_pool_use (x);
1007 output_addr_const (stream, x);
1008 break;
1012 /* Print operand x (an rtx) in assembler syntax to file stream
1013 according to modifier code.
1015 '.' print a .s if insn needs delay slot
1016 ',' print LOCAL_LABEL_PREFIX
1017 '@' print trap, rte or rts depending upon pragma interruptness
1018 '#' output a nop if there is nothing to put in the delay slot
1019 ''' print likelihood suffix (/u for unlikely).
1020 '>' print branch target if -fverbose-asm
1021 'O' print a constant without the #
1022 'R' print the LSW of a dp value - changes if in little endian
1023 'S' print the MSW of a dp value - changes if in little endian
1024 'T' print the next word of a dp value - same as 'R' in big endian mode.
1025 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1026 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1027 'N' print 'r63' if the operand is (const_int 0).
1028 'd' print a V2SF reg as dN instead of fpN.
1029 'm' print a pair `base,offset' or `base,index', for LD and ST.
1030 'U' Likewise for {LD,ST}{HI,LO}.
1031 'V' print the position of a single bit set.
1032 'W' print the position of a single bit cleared.
1033 't' print a memory address which is a register.
1034 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1035 'o' output an operator. */
1037 void
1038 print_operand (FILE *stream, rtx x, int code)
1040 int regno;
1041 enum machine_mode mode;
1043 switch (code)
1045 tree trapa_attr;
1047 case '.':
1048 if (final_sequence
1049 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1050 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1051 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1052 break;
1053 case ',':
1054 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1055 break;
1056 case '@':
1057 trapa_attr = lookup_attribute ("trap_exit",
1058 DECL_ATTRIBUTES (current_function_decl));
1059 if (trapa_attr)
1060 fprintf (stream, "trapa #%ld",
1061 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1062 else if (sh_cfun_interrupt_handler_p ())
1064 if (sh_cfun_resbank_handler_p ())
1065 fprintf (stream, "resbank\n");
1066 fprintf (stream, "rte");
1068 else
1069 fprintf (stream, "rts");
1070 break;
1071 case '#':
1072 /* Output a nop if there's nothing in the delay slot. */
1073 if (dbr_sequence_length () == 0)
1074 fprintf (stream, "\n\tnop");
1075 break;
1076 case '\'':
1078 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1080 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1081 fputs ("/u", stream);
1082 break;
1084 case '>':
1085 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1087 fputs ("\t! target: ", stream);
1088 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1090 break;
1091 case 'O':
1092 x = mark_constant_pool_use (x);
1093 output_addr_const (stream, x);
1094 break;
1095 /* N.B.: %R / %S / %T adjust memory addresses by four.
1096 For SHMEDIA, that means they can be used to access the first and
1097 second 32 bit part of a 64 bit (or larger) value that
1098 might be held in floating point registers or memory.
1099 While they can be used to access 64 bit parts of a larger value
1100 held in general purpose registers, that won't work with memory -
1101 neither for fp registers, since the frxx names are used. */
1102 case 'R':
1103 if (REG_P (x) || GET_CODE (x) == SUBREG)
1105 regno = true_regnum (x);
1106 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1107 fputs (reg_names[regno], (stream));
1109 else if (MEM_P (x))
1111 x = adjust_address (x, SImode, 4 * LSW);
1112 print_operand_address (stream, XEXP (x, 0));
1114 else
1116 rtx sub = NULL_RTX;
1118 mode = GET_MODE (x);
1119 if (mode == VOIDmode)
1120 mode = DImode;
1121 if (GET_MODE_SIZE (mode) >= 8)
1122 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1123 if (sub)
1124 print_operand (stream, sub, 0);
1125 else
1126 output_operand_lossage ("invalid operand to %%R");
1128 break;
1129 case 'S':
1130 if (REG_P (x) || GET_CODE (x) == SUBREG)
1132 regno = true_regnum (x);
1133 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1134 fputs (reg_names[regno], (stream));
1136 else if (MEM_P (x))
1138 x = adjust_address (x, SImode, 4 * MSW);
1139 print_operand_address (stream, XEXP (x, 0));
1141 else
1143 rtx sub = NULL_RTX;
1145 mode = GET_MODE (x);
1146 if (mode == VOIDmode)
1147 mode = DImode;
1148 if (GET_MODE_SIZE (mode) >= 8)
1149 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1150 if (sub)
1151 print_operand (stream, sub, 0);
1152 else
1153 output_operand_lossage ("invalid operand to %%S");
1155 break;
1156 case 'T':
1157 /* Next word of a double. */
1158 switch (GET_CODE (x))
1160 case REG:
1161 fputs (reg_names[REGNO (x) + 1], (stream));
1162 break;
1163 case MEM:
1164 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1165 && GET_CODE (XEXP (x, 0)) != POST_INC)
1166 x = adjust_address (x, SImode, 4);
1167 print_operand_address (stream, XEXP (x, 0));
1168 break;
1169 default:
1170 break;
1172 break;
1174 case 't':
1175 gcc_assert (MEM_P (x));
1176 x = XEXP (x, 0);
1177 switch (GET_CODE (x))
1179 case REG:
1180 case SUBREG:
1181 print_operand (stream, x, 0);
1182 break;
1183 default:
1184 break;
1186 break;
1188 case 'o':
1189 switch (GET_CODE (x))
1191 case PLUS: fputs ("add", stream); break;
1192 case MINUS: fputs ("sub", stream); break;
1193 case MULT: fputs ("mul", stream); break;
1194 case DIV: fputs ("div", stream); break;
1195 case EQ: fputs ("eq", stream); break;
1196 case NE: fputs ("ne", stream); break;
1197 case GT: case LT: fputs ("gt", stream); break;
1198 case GE: case LE: fputs ("ge", stream); break;
1199 case GTU: case LTU: fputs ("gtu", stream); break;
1200 case GEU: case LEU: fputs ("geu", stream); break;
1201 default:
1202 break;
1204 break;
1205 case 'M':
1206 if (TARGET_SHMEDIA)
1208 if (MEM_P (x)
1209 && GET_CODE (XEXP (x, 0)) == PLUS
1210 && (REG_P (XEXP (XEXP (x, 0), 1))
1211 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1212 fputc ('x', stream);
1214 else
1216 if (MEM_P (x))
1218 switch (GET_MODE (x))
1220 case QImode: fputs (".b", stream); break;
1221 case HImode: fputs (".w", stream); break;
1222 case SImode: fputs (".l", stream); break;
1223 case SFmode: fputs (".s", stream); break;
1224 case DFmode: fputs (".d", stream); break;
1225 default: gcc_unreachable ();
1229 break;
1231 case 'm':
1232 gcc_assert (MEM_P (x));
1233 x = XEXP (x, 0);
1234 /* Fall through. */
1235 case 'U':
1236 switch (GET_CODE (x))
1238 case REG:
1239 case SUBREG:
1240 print_operand (stream, x, 0);
1241 fputs (", 0", stream);
1242 break;
1244 case PLUS:
1245 print_operand (stream, XEXP (x, 0), 0);
1246 fputs (", ", stream);
1247 print_operand (stream, XEXP (x, 1), 0);
1248 break;
1250 default:
1251 gcc_unreachable ();
1253 break;
1255 case 'V':
1257 int num = exact_log2 (INTVAL (x));
1258 gcc_assert (num >= 0);
1259 fprintf (stream, "#%d", num);
1261 break;
1263 case 'W':
1265 int num = exact_log2 (~INTVAL (x));
1266 gcc_assert (num >= 0);
1267 fprintf (stream, "#%d", num);
1269 break;
1271 case 'd':
1272 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1274 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1275 break;
1277 case 'N':
1278 if (x == CONST0_RTX (GET_MODE (x)))
1280 fprintf ((stream), "r63");
1281 break;
1283 goto default_output;
1284 case 'u':
1285 if (CONST_INT_P (x))
1287 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1288 break;
1290 /* Fall through. */
1292 default_output:
1293 default:
1294 regno = 0;
1295 mode = GET_MODE (x);
1297 switch (GET_CODE (x))
1299 case TRUNCATE:
1301 rtx inner = XEXP (x, 0);
1302 int offset = 0;
1303 enum machine_mode inner_mode;
1305 /* We might see SUBREGs with vector mode registers inside. */
1306 if (GET_CODE (inner) == SUBREG
1307 && (GET_MODE_SIZE (GET_MODE (inner))
1308 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1309 && subreg_lowpart_p (inner))
1310 inner = SUBREG_REG (inner);
1311 if (CONST_INT_P (inner))
1313 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1314 goto default_output;
1316 inner_mode = GET_MODE (inner);
1317 if (GET_CODE (inner) == SUBREG
1318 && (GET_MODE_SIZE (GET_MODE (inner))
1319 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1320 && REG_P (SUBREG_REG (inner)))
1322 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1323 GET_MODE (SUBREG_REG (inner)),
1324 SUBREG_BYTE (inner),
1325 GET_MODE (inner));
1326 inner = SUBREG_REG (inner);
1328 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1329 abort ();
1330 /* Floating point register pairs are always big endian;
1331 general purpose registers are 64 bit wide. */
1332 regno = REGNO (inner);
1333 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1334 - HARD_REGNO_NREGS (regno, mode))
1335 + offset;
1336 x = inner;
1337 goto reg;
1339 case SIGN_EXTEND:
1340 x = XEXP (x, 0);
1341 goto reg;
1342 /* FIXME: We need this on SHmedia32 because reload generates
1343 some sign-extended HI or QI loads into DImode registers
1344 but, because Pmode is SImode, the address ends up with a
1345 subreg:SI of the DImode register. Maybe reload should be
1346 fixed so as to apply alter_subreg to such loads? */
1347 case IF_THEN_ELSE:
1348 gcc_assert (trapping_target_operand (x, VOIDmode));
1349 x = XEXP (XEXP (x, 2), 0);
1350 goto default_output;
1351 case SUBREG:
1352 gcc_assert (SUBREG_BYTE (x) == 0
1353 && REG_P (SUBREG_REG (x)));
1355 x = SUBREG_REG (x);
1356 /* Fall through. */
1358 reg:
1359 case REG:
1360 regno += REGNO (x);
1361 if (FP_REGISTER_P (regno)
1362 && mode == V16SFmode)
1363 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1364 else if (FP_REGISTER_P (REGNO (x))
1365 && mode == V4SFmode)
1366 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1367 else if (REG_P (x)
1368 && mode == V2SFmode)
1369 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1370 else if (FP_REGISTER_P (REGNO (x))
1371 && GET_MODE_SIZE (mode) > 4)
1372 fprintf ((stream), "d%s", reg_names[regno] + 1);
1373 else
1374 fputs (reg_names[regno], (stream));
1375 break;
1377 case MEM:
1378 output_address (XEXP (x, 0));
1379 break;
1381 default:
1382 if (TARGET_SH1)
1383 fputc ('#', stream);
1384 output_addr_const (stream, x);
1385 break;
1387 break;
1392 /* Encode symbol attributes of a SYMBOL_REF into its
1393 SYMBOL_REF_FLAGS. */
1394 static void
1395 sh_encode_section_info (tree decl, rtx rtl, int first)
1397 default_encode_section_info (decl, rtl, first);
1399 if (TREE_CODE (decl) == FUNCTION_DECL
1400 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1401 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1404 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1405 static void
1406 force_into (rtx value, rtx target)
1408 value = force_operand (value, target);
1409 if (! rtx_equal_p (value, target))
1410 emit_insn (gen_move_insn (target, value));
1413 /* Emit code to perform a block move. Choose the best method.
1415 OPERANDS[0] is the destination.
1416 OPERANDS[1] is the source.
1417 OPERANDS[2] is the size.
1418 OPERANDS[3] is the alignment safe to use. */
1421 expand_block_move (rtx *operands)
1423 int align = INTVAL (operands[3]);
1424 int constp = (CONST_INT_P (operands[2]));
1425 int bytes = (constp ? INTVAL (operands[2]) : 0);
1427 if (! constp)
1428 return 0;
1430 /* If we could use mov.l to move words and dest is word-aligned, we
1431 can use movua.l for loads and still generate a relatively short
1432 and efficient sequence. */
1433 if (TARGET_SH4A_ARCH && align < 4
1434 && MEM_ALIGN (operands[0]) >= 32
1435 && can_move_by_pieces (bytes, 32))
1437 rtx dest = copy_rtx (operands[0]);
1438 rtx src = copy_rtx (operands[1]);
1439 /* We could use different pseudos for each copied word, but
1440 since movua can only load into r0, it's kind of
1441 pointless. */
1442 rtx temp = gen_reg_rtx (SImode);
1443 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1444 int copied = 0;
1446 while (copied + 4 <= bytes)
1448 rtx to = adjust_address (dest, SImode, copied);
1449 rtx from = adjust_automodify_address (src, BLKmode,
1450 src_addr, copied);
1452 set_mem_size (from, GEN_INT (4));
1453 emit_insn (gen_movua (temp, from));
1454 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1455 emit_move_insn (to, temp);
1456 copied += 4;
1459 if (copied < bytes)
1460 move_by_pieces (adjust_address (dest, BLKmode, copied),
1461 adjust_automodify_address (src, BLKmode,
1462 src_addr, copied),
1463 bytes - copied, align, 0);
1465 return 1;
1468 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1469 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1470 if (align < 4 || (bytes % 4 != 0))
1471 return 0;
1473 if (TARGET_HARD_SH4)
1475 if (bytes < 12)
1476 return 0;
1477 else if (bytes == 12)
1479 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1480 rtx r4 = gen_rtx_REG (SImode, 4);
1481 rtx r5 = gen_rtx_REG (SImode, 5);
1483 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1484 force_into (XEXP (operands[0], 0), r4);
1485 force_into (XEXP (operands[1], 0), r5);
1486 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1487 return 1;
1489 else if (! TARGET_SMALLCODE)
1491 const char *entry_name;
1492 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1493 int dwords;
1494 rtx r4 = gen_rtx_REG (SImode, 4);
1495 rtx r5 = gen_rtx_REG (SImode, 5);
1496 rtx r6 = gen_rtx_REG (SImode, 6);
1498 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1499 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1500 force_into (XEXP (operands[0], 0), r4);
1501 force_into (XEXP (operands[1], 0), r5);
1503 dwords = bytes >> 3;
1504 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1505 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1506 return 1;
1508 else
1509 return 0;
1511 if (bytes < 64)
1513 char entry[30];
1514 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1515 rtx r4 = gen_rtx_REG (SImode, 4);
1516 rtx r5 = gen_rtx_REG (SImode, 5);
1518 sprintf (entry, "__movmemSI%d", bytes);
1519 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1520 force_into (XEXP (operands[0], 0), r4);
1521 force_into (XEXP (operands[1], 0), r5);
1522 emit_insn (gen_block_move_real (func_addr_rtx));
1523 return 1;
1526 /* This is the same number of bytes as a memcpy call, but to a different
1527 less common function name, so this will occasionally use more space. */
1528 if (! TARGET_SMALLCODE)
1530 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1531 int final_switch, while_loop;
1532 rtx r4 = gen_rtx_REG (SImode, 4);
1533 rtx r5 = gen_rtx_REG (SImode, 5);
1534 rtx r6 = gen_rtx_REG (SImode, 6);
1536 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1537 force_into (XEXP (operands[0], 0), r4);
1538 force_into (XEXP (operands[1], 0), r5);
1540 /* r6 controls the size of the move. 16 is decremented from it
1541 for each 64 bytes moved. Then the negative bit left over is used
1542 as an index into a list of move instructions. e.g., a 72 byte move
1543 would be set up with size(r6) = 14, for one iteration through the
1544 big while loop, and a switch of -2 for the last part. */
1546 final_switch = 16 - ((bytes / 4) % 16);
1547 while_loop = ((bytes / 4) / 16 - 1) * 16;
1548 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1549 emit_insn (gen_block_lump_real (func_addr_rtx));
1550 return 1;
1553 return 0;
1556 /* Prepare operands for a move define_expand; specifically, one of the
1557 operands must be in a register. */
1560 prepare_move_operands (rtx operands[], enum machine_mode mode)
1562 if ((mode == SImode || mode == DImode)
1563 && flag_pic
1564 && ! ((mode == Pmode || mode == ptr_mode)
1565 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1567 rtx temp;
1568 if (SYMBOLIC_CONST_P (operands[1]))
1570 if (MEM_P (operands[0]))
1571 operands[1] = force_reg (Pmode, operands[1]);
1572 else if (TARGET_SHMEDIA
1573 && GET_CODE (operands[1]) == LABEL_REF
1574 && target_reg_operand (operands[0], mode))
1575 /* It's ok. */;
1576 else
1578 temp = (!can_create_pseudo_p ()
1579 ? operands[0]
1580 : gen_reg_rtx (Pmode));
1581 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1584 else if (GET_CODE (operands[1]) == CONST
1585 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1586 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1588 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1589 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1590 mode, temp);
1591 operands[1] = expand_binop (mode, add_optab, temp,
1592 XEXP (XEXP (operands[1], 0), 1),
1593 (!can_create_pseudo_p ()
1594 ? temp
1595 : gen_reg_rtx (Pmode)),
1596 0, OPTAB_LIB_WIDEN);
1600 if (! reload_in_progress && ! reload_completed)
1602 /* Copy the source to a register if both operands aren't registers. */
1603 if (! register_operand (operands[0], mode)
1604 && ! sh_register_operand (operands[1], mode))
1605 operands[1] = copy_to_mode_reg (mode, operands[1]);
1607 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1609 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1610 except that we can't use that function because it is static. */
1611 rtx new_rtx = change_address (operands[0], mode, 0);
1612 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1613 operands[0] = new_rtx;
1616 /* This case can happen while generating code to move the result
1617 of a library call to the target. Reject `st r0,@(rX,rY)' because
1618 reload will fail to find a spill register for rX, since r0 is already
1619 being used for the source. */
1620 else if (TARGET_SH1
1621 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1622 && MEM_P (operands[0])
1623 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1624 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1625 operands[1] = copy_to_mode_reg (mode, operands[1]);
1628 if (mode == Pmode || mode == ptr_mode)
1630 rtx op0, op1, opc;
1631 enum tls_model tls_kind;
1633 op0 = operands[0];
1634 op1 = operands[1];
1635 if (GET_CODE (op1) == CONST
1636 && GET_CODE (XEXP (op1, 0)) == PLUS
1637 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1638 != TLS_MODEL_NONE))
1640 opc = XEXP (XEXP (op1, 0), 1);
1641 op1 = XEXP (XEXP (op1, 0), 0);
1643 else
1644 opc = NULL_RTX;
1646 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1648 rtx tga_op1, tga_ret, tmp, tmp2;
1650 switch (tls_kind)
1652 case TLS_MODEL_GLOBAL_DYNAMIC:
1653 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1654 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1655 op1 = tga_ret;
1656 break;
1658 case TLS_MODEL_LOCAL_DYNAMIC:
1659 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1660 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1662 tmp = gen_reg_rtx (Pmode);
1663 emit_move_insn (tmp, tga_ret);
1665 if (register_operand (op0, Pmode))
1666 tmp2 = op0;
1667 else
1668 tmp2 = gen_reg_rtx (Pmode);
1670 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1671 op1 = tmp2;
1672 break;
1674 case TLS_MODEL_INITIAL_EXEC:
1675 if (! flag_pic)
1677 /* Don't schedule insns for getting GOT address when
1678 the first scheduling is enabled, to avoid spill
1679 failures for R0. */
1680 if (flag_schedule_insns)
1681 emit_insn (gen_blockage ());
1682 emit_insn (gen_GOTaddr2picreg ());
1683 emit_use (gen_rtx_REG (SImode, PIC_REG));
1684 if (flag_schedule_insns)
1685 emit_insn (gen_blockage ());
1687 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1688 tmp = gen_sym2GOTTPOFF (op1);
1689 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1690 op1 = tga_op1;
1691 break;
1693 case TLS_MODEL_LOCAL_EXEC:
1694 tmp2 = gen_reg_rtx (Pmode);
1695 emit_insn (gen_load_gbr (tmp2));
1696 tmp = gen_reg_rtx (Pmode);
1697 emit_insn (gen_symTPOFF2reg (tmp, op1));
1699 if (register_operand (op0, Pmode))
1700 op1 = op0;
1701 else
1702 op1 = gen_reg_rtx (Pmode);
1704 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1705 break;
1707 default:
1708 gcc_unreachable ();
1710 if (opc)
1711 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1712 operands[1] = op1;
1716 return 0;
1719 enum rtx_code
1720 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1721 enum rtx_code comparison)
1723 rtx op1;
1724 rtx scratch = NULL_RTX;
1726 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1727 comparison = GET_CODE (operands[0]);
1728 else
1729 scratch = operands[4];
1730 if (CONST_INT_P (operands[1])
1731 && !CONST_INT_P (operands[2]))
1733 rtx tmp = operands[1];
1735 operands[1] = operands[2];
1736 operands[2] = tmp;
1737 comparison = swap_condition (comparison);
1739 if (CONST_INT_P (operands[2]))
1741 HOST_WIDE_INT val = INTVAL (operands[2]);
1742 if ((val == -1 || val == -0x81)
1743 && (comparison == GT || comparison == LE))
1745 comparison = (comparison == GT) ? GE : LT;
1746 operands[2] = gen_int_mode (val + 1, mode);
1748 else if ((val == 1 || val == 0x80)
1749 && (comparison == GE || comparison == LT))
1751 comparison = (comparison == GE) ? GT : LE;
1752 operands[2] = gen_int_mode (val - 1, mode);
1754 else if (val == 1 && (comparison == GEU || comparison == LTU))
1756 comparison = (comparison == GEU) ? NE : EQ;
1757 operands[2] = CONST0_RTX (mode);
1759 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1761 comparison = (comparison == GEU) ? GTU : LEU;
1762 operands[2] = gen_int_mode (val - 1, mode);
1764 else if (val == 0 && (comparison == GTU || comparison == LEU))
1765 comparison = (comparison == GTU) ? NE : EQ;
1766 else if (mode == SImode
1767 && ((val == 0x7fffffff
1768 && (comparison == GTU || comparison == LEU))
1769 || ((unsigned HOST_WIDE_INT) val
1770 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1771 && (comparison == GEU || comparison == LTU))))
1773 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1774 operands[2] = CONST0_RTX (mode);
1777 op1 = operands[1];
1778 if (can_create_pseudo_p ())
1779 operands[1] = force_reg (mode, op1);
1780 /* When we are handling DImode comparisons, we want to keep constants so
1781 that we can optimize the component comparisons; however, memory loads
1782 are better issued as a whole so that they can be scheduled well.
1783 SImode equality comparisons allow I08 constants, but only when they
1784 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1785 into a register, that register might as well be r0, and we allow the
1786 constant. If it is already in a register, this is likely to be
1787 allocated to a different hard register, thus we load the constant into
1788 a register unless it is zero. */
1789 if (!REG_P (operands[2])
1790 && (!CONST_INT_P (operands[2])
1791 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1792 && ((comparison != EQ && comparison != NE)
1793 || (REG_P (op1) && REGNO (op1) != R0_REG)
1794 || !satisfies_constraint_I08 (operands[2])))))
1796 if (scratch && GET_MODE (scratch) == mode)
1798 emit_move_insn (scratch, operands[2]);
1799 operands[2] = scratch;
1801 else if (can_create_pseudo_p ())
1802 operands[2] = force_reg (mode, operands[2]);
1804 return comparison;
1807 void
1808 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1810 rtx (*branch_expander) (rtx) = gen_branch_true;
1811 rtx jump;
1813 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1814 switch (comparison)
1816 case NE: case LT: case LE: case LTU: case LEU:
1817 comparison = reverse_condition (comparison);
1818 branch_expander = gen_branch_false;
1819 default: ;
1821 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1822 gen_rtx_fmt_ee (comparison, SImode,
1823 operands[1], operands[2])));
1824 jump = emit_jump_insn (branch_expander (operands[3]));
1825 if (probability >= 0)
1826 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1830 /* ??? How should we distribute probabilities when more than one branch
1831 is generated. So far we only have soem ad-hoc observations:
1832 - If the operands are random, they are likely to differ in both parts.
1833 - If comparing items in a hash chain, the operands are random or equal;
1834 operation should be EQ or NE.
1835 - If items are searched in an ordered tree from the root, we can expect
1836 the highpart to be unequal about half of the time; operation should be
1837 an inequality comparison, operands non-constant, and overall probability
1838 about 50%. Likewise for quicksort.
1839 - Range checks will be often made against constants. Even if we assume for
1840 simplicity an even distribution of the non-constant operand over a
1841 sub-range here, the same probability could be generated with differently
1842 wide sub-ranges - as long as the ratio of the part of the subrange that
1843 is before the threshold to the part that comes after the threshold stays
1844 the same. Thus, we can't really tell anything here;
1845 assuming random distribution is at least simple.
1848 bool
1849 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1851 enum rtx_code msw_taken, msw_skip, lsw_taken;
1852 rtx skip_label = NULL_RTX;
1853 rtx op1h, op1l, op2h, op2l;
1854 int num_branches;
1855 int prob, rev_prob;
1856 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1857 rtx scratch = operands[4];
1859 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1860 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1861 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1862 op1l = gen_lowpart (SImode, operands[1]);
1863 op2l = gen_lowpart (SImode, operands[2]);
1864 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1865 prob = split_branch_probability;
1866 rev_prob = REG_BR_PROB_BASE - prob;
1867 switch (comparison)
1869 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1870 That costs 1 cycle more when the first branch can be predicted taken,
1871 but saves us mispredicts because only one branch needs prediction.
1872 It also enables generating the cmpeqdi_t-1 pattern. */
1873 case EQ:
1874 if (TARGET_CMPEQDI_T)
1876 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1877 emit_jump_insn (gen_branch_true (operands[3]));
1878 return true;
1880 msw_skip = NE;
1881 lsw_taken = EQ;
1882 if (prob >= 0)
1884 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1886 msw_skip_prob = rev_prob;
1887 if (REG_BR_PROB_BASE <= 65535)
1888 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1889 else
1891 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1892 lsw_taken_prob
1893 = (prob
1894 ? (REG_BR_PROB_BASE
1895 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1896 / ((HOST_WIDEST_INT) prob << 32)))
1897 : 0);
1900 break;
1901 case NE:
1902 if (TARGET_CMPEQDI_T)
1904 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1905 emit_jump_insn (gen_branch_false (operands[3]));
1906 return true;
1908 msw_taken = NE;
1909 msw_taken_prob = prob;
1910 lsw_taken = NE;
1911 lsw_taken_prob = 0;
1912 break;
1913 case GTU: case GT:
1914 msw_taken = comparison;
1915 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1916 break;
1917 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1918 msw_skip = swap_condition (msw_taken);
1919 lsw_taken = GTU;
1920 break;
1921 case GEU: case GE:
1922 if (op2l == CONST0_RTX (SImode))
1923 msw_taken = comparison;
1924 else
1926 msw_taken = comparison == GE ? GT : GTU;
1927 msw_skip = swap_condition (msw_taken);
1928 lsw_taken = GEU;
1930 break;
1931 case LTU: case LT:
1932 msw_taken = comparison;
1933 if (op2l == CONST0_RTX (SImode))
1934 break;
1935 msw_skip = swap_condition (msw_taken);
1936 lsw_taken = LTU;
1937 break;
1938 case LEU: case LE:
1939 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1940 msw_taken = comparison;
1941 else
1943 lsw_taken = LEU;
1944 if (comparison == LE)
1945 msw_taken = LT;
1946 else if (op2h != CONST0_RTX (SImode))
1947 msw_taken = LTU;
1948 else
1949 break;
1950 msw_skip = swap_condition (msw_taken);
1952 break;
1953 default: return false;
1955 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1956 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1957 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1958 if (comparison != EQ && comparison != NE && num_branches > 1)
1960 if (!CONSTANT_P (operands[2])
1961 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1962 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1964 msw_taken_prob = prob / 2U;
1965 msw_skip_prob
1966 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1967 lsw_taken_prob = prob;
1969 else
1971 msw_taken_prob = prob;
1972 msw_skip_prob = REG_BR_PROB_BASE;
1973 /* ??? If we have a constant op2h, should we use that when
1974 calculating lsw_taken_prob? */
1975 lsw_taken_prob = prob;
1978 operands[1] = op1h;
1979 operands[2] = op2h;
1980 operands[4] = NULL_RTX;
1981 if (reload_completed
1982 && ! arith_reg_or_0_operand (op2h, SImode)
1983 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1984 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1985 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1987 emit_move_insn (scratch, operands[2]);
1988 operands[2] = scratch;
1990 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1991 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1992 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1994 rtx taken_label = operands[3];
1996 /* Operands were possibly modified, but msw_skip doesn't expect this.
1997 Always use the original ones. */
1998 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2000 operands[1] = op1h;
2001 operands[2] = op2h;
2004 operands[3] = skip_label = gen_label_rtx ();
2005 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2006 operands[3] = taken_label;
2008 operands[1] = op1l;
2009 operands[2] = op2l;
2010 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2012 if (reload_completed
2013 && ! arith_reg_or_0_operand (op2l, SImode)
2014 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2016 emit_move_insn (scratch, operands[2]);
2017 operands[2] = scratch;
2019 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2021 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2022 emit_label (skip_label);
2023 return true;
2026 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2028 static void
2029 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2031 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2033 insn = gen_rtx_PARALLEL (VOIDmode,
2034 gen_rtvec (2, insn,
2035 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2036 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2038 else
2039 emit_insn (insn);
2042 /* Prepare the operands for an scc instruction; make sure that the
2043 compare has been done and the result is in T_REG. */
2044 void
2045 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2047 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2048 enum rtx_code oldcode = code;
2049 enum machine_mode mode;
2051 /* First need a compare insn. */
2052 switch (code)
2054 case NE:
2055 /* It isn't possible to handle this case. */
2056 gcc_unreachable ();
2057 case LT:
2058 code = GT;
2059 break;
2060 case LE:
2061 code = GE;
2062 break;
2063 case LTU:
2064 code = GTU;
2065 break;
2066 case LEU:
2067 code = GEU;
2068 break;
2069 default:
2070 break;
2072 if (code != oldcode)
2074 rtx tmp = op0;
2075 op0 = op1;
2076 op1 = tmp;
2079 mode = GET_MODE (op0);
2080 if (mode == VOIDmode)
2081 mode = GET_MODE (op1);
2083 op0 = force_reg (mode, op0);
2084 if ((code != EQ && code != NE
2085 && (op1 != const0_rtx
2086 || code == GTU || code == GEU || code == LTU || code == LEU))
2087 || (mode == DImode && op1 != const0_rtx)
2088 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2089 op1 = force_reg (mode, op1);
2091 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2092 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2093 mode);
2097 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2098 rtx op0, rtx op1)
2100 rtx target = gen_reg_rtx (SImode);
2101 rtx tmp;
2103 gcc_assert (TARGET_SHMEDIA);
2104 switch (code)
2106 case EQ:
2107 case GT:
2108 case LT:
2109 case UNORDERED:
2110 case GTU:
2111 case LTU:
2112 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2113 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2114 code = NE;
2115 break;
2117 case NE:
2118 case GE:
2119 case LE:
2120 case ORDERED:
2121 case GEU:
2122 case LEU:
2123 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2124 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2125 code = EQ;
2126 break;
2128 case UNEQ:
2129 case UNGE:
2130 case UNGT:
2131 case UNLE:
2132 case UNLT:
2133 case LTGT:
2134 return NULL_RTX;
2136 default:
2137 gcc_unreachable ();
2140 if (mode == DImode)
2142 rtx t2 = gen_reg_rtx (DImode);
2143 emit_insn (gen_extendsidi2 (t2, target));
2144 target = t2;
2147 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2150 /* Called from the md file, set up the operands of a compare instruction. */
2152 void
2153 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2155 enum rtx_code code = GET_CODE (operands[0]);
2156 enum rtx_code branch_code;
2157 rtx op0 = operands[1];
2158 rtx op1 = operands[2];
2159 rtx insn, tem;
2160 bool need_ccmpeq = false;
2162 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2164 op0 = force_reg (mode, op0);
2165 op1 = force_reg (mode, op1);
2167 else
2169 if (code != EQ || mode == DImode)
2171 /* Force args into regs, since we can't use constants here. */
2172 op0 = force_reg (mode, op0);
2173 if (op1 != const0_rtx || code == GTU || code == GEU)
2174 op1 = force_reg (mode, op1);
2178 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2180 if (code == LT
2181 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2182 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2184 tem = op0, op0 = op1, op1 = tem;
2185 code = swap_condition (code);
2188 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2189 if (code == GE)
2191 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2192 need_ccmpeq = true;
2193 code = GT;
2196 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2197 to EQ/GT respectively. */
2198 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2201 switch (code)
2203 case EQ:
2204 case GT:
2205 case GE:
2206 case GTU:
2207 case GEU:
2208 branch_code = code;
2209 break;
2210 case NE:
2211 case LT:
2212 case LE:
2213 case LTU:
2214 case LEU:
2215 branch_code = reverse_condition (code);
2216 break;
2217 default:
2218 gcc_unreachable ();
2221 insn = gen_rtx_SET (VOIDmode,
2222 gen_rtx_REG (SImode, T_REG),
2223 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2225 sh_emit_set_t_insn (insn, mode);
2226 if (need_ccmpeq)
2227 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2229 if (branch_code == code)
2230 emit_jump_insn (gen_branch_true (operands[3]));
2231 else
2232 emit_jump_insn (gen_branch_false (operands[3]));
2235 void
2236 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2238 enum rtx_code code = GET_CODE (operands[1]);
2239 rtx op0 = operands[2];
2240 rtx op1 = operands[3];
2241 rtx lab = NULL_RTX;
2242 bool invert = false;
2243 rtx tem;
2245 op0 = force_reg (mode, op0);
2246 if ((code != EQ && code != NE
2247 && (op1 != const0_rtx
2248 || code == GTU || code == GEU || code == LTU || code == LEU))
2249 || (mode == DImode && op1 != const0_rtx)
2250 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2251 op1 = force_reg (mode, op1);
2253 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2255 if (code == LT || code == LE)
2257 code = swap_condition (code);
2258 tem = op0, op0 = op1, op1 = tem;
2260 if (code == GE)
2262 if (TARGET_IEEE)
2264 lab = gen_label_rtx ();
2265 sh_emit_scc_to_t (EQ, op0, op1);
2266 emit_jump_insn (gen_branch_true (lab));
2267 code = GT;
2269 else
2271 code = LT;
2272 invert = true;
2277 if (code == NE)
2279 code = EQ;
2280 invert = true;
2283 sh_emit_scc_to_t (code, op0, op1);
2284 if (lab)
2285 emit_label (lab);
2286 if (invert)
2287 emit_insn (gen_movnegt (operands[0]));
2288 else
2289 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2292 /* Functions to output assembly code. */
2294 /* Return a sequence of instructions to perform DI or DF move.
2296 Since the SH cannot move a DI or DF in one instruction, we have
2297 to take care when we see overlapping source and dest registers. */
2299 const char *
2300 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2301 enum machine_mode mode)
2303 rtx dst = operands[0];
2304 rtx src = operands[1];
2306 if (MEM_P (dst)
2307 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2308 return "mov.l %T1,%0\n\tmov.l %1,%0";
2310 if (register_operand (dst, mode)
2311 && register_operand (src, mode))
2313 if (REGNO (src) == MACH_REG)
2314 return "sts mach,%S0\n\tsts macl,%R0";
2316 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2317 when mov.d r1,r0 do r1->r0 then r2->r1. */
2319 if (REGNO (src) + 1 == REGNO (dst))
2320 return "mov %T1,%T0\n\tmov %1,%0";
2321 else
2322 return "mov %1,%0\n\tmov %T1,%T0";
2324 else if (CONST_INT_P (src))
2326 if (INTVAL (src) < 0)
2327 output_asm_insn ("mov #-1,%S0", operands);
2328 else
2329 output_asm_insn ("mov #0,%S0", operands);
2331 return "mov %1,%R0";
2333 else if (MEM_P (src))
2335 int ptrreg = -1;
2336 int dreg = REGNO (dst);
2337 rtx inside = XEXP (src, 0);
2339 switch (GET_CODE (inside))
2341 case REG:
2342 ptrreg = REGNO (inside);
2343 break;
2345 case SUBREG:
2346 ptrreg = subreg_regno (inside);
2347 break;
2349 case PLUS:
2350 ptrreg = REGNO (XEXP (inside, 0));
2351 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2352 an offsettable address. Unfortunately, offsettable addresses use
2353 QImode to check the offset, and a QImode offsettable address
2354 requires r0 for the other operand, which is not currently
2355 supported, so we can't use the 'o' constraint.
2356 Thus we must check for and handle r0+REG addresses here.
2357 We punt for now, since this is likely very rare. */
2358 gcc_assert (!REG_P (XEXP (inside, 1)));
2359 break;
2361 case LABEL_REF:
2362 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2363 case POST_INC:
2364 return "mov.l %1,%0\n\tmov.l %1,%T0";
2365 default:
2366 gcc_unreachable ();
2369 /* Work out the safe way to copy. Copy into the second half first. */
2370 if (dreg == ptrreg)
2371 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2374 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2377 /* Print an instruction which would have gone into a delay slot after
2378 another instruction, but couldn't because the other instruction expanded
2379 into a sequence where putting the slot insn at the end wouldn't work. */
2381 static void
2382 print_slot (rtx insn)
2384 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2386 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2389 const char *
2390 output_far_jump (rtx insn, rtx op)
2392 struct { rtx lab, reg, op; } this_jmp;
2393 rtx braf_base_lab = NULL_RTX;
2394 const char *jump;
2395 int far;
2396 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2397 rtx prev;
2399 this_jmp.lab = gen_label_rtx ();
2401 if (TARGET_SH2
2402 && offset >= -32764
2403 && offset - get_attr_length (insn) <= 32766)
2405 far = 0;
2406 jump = "mov.w %O0,%1; braf %1";
2408 else
2410 far = 1;
2411 if (flag_pic)
2413 if (TARGET_SH2)
2414 jump = "mov.l %O0,%1; braf %1";
2415 else
2416 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2418 else
2419 jump = "mov.l %O0,%1; jmp @%1";
2421 /* If we have a scratch register available, use it. */
2422 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2423 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2425 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2426 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2427 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2428 output_asm_insn (jump, &this_jmp.lab);
2429 if (dbr_sequence_length ())
2430 print_slot (final_sequence);
2431 else
2432 output_asm_insn ("nop", 0);
2434 else
2436 /* Output the delay slot insn first if any. */
2437 if (dbr_sequence_length ())
2438 print_slot (final_sequence);
2440 this_jmp.reg = gen_rtx_REG (SImode, 13);
2441 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2442 Fortunately, MACL is fixed and call-clobbered, and we never
2443 need its value across jumps, so save r13 in it instead of in
2444 the stack. */
2445 if (TARGET_SH5)
2446 output_asm_insn ("lds r13, macl", 0);
2447 else
2448 output_asm_insn ("mov.l r13,@-r15", 0);
2449 output_asm_insn (jump, &this_jmp.lab);
2450 if (TARGET_SH5)
2451 output_asm_insn ("sts macl, r13", 0);
2452 else
2453 output_asm_insn ("mov.l @r15+,r13", 0);
2455 if (far && flag_pic && TARGET_SH2)
2457 braf_base_lab = gen_label_rtx ();
2458 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2459 CODE_LABEL_NUMBER (braf_base_lab));
2461 if (far)
2462 output_asm_insn (".align 2", 0);
2463 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2464 this_jmp.op = op;
2465 if (far && flag_pic)
2467 if (TARGET_SH2)
2468 this_jmp.lab = braf_base_lab;
2469 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2471 else
2472 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2473 return "";
2476 /* Local label counter, used for constants in the pool and inside
2477 pattern branches. */
2479 static int lf = 100;
2481 /* Output code for ordinary branches. */
2483 const char *
2484 output_branch (int logic, rtx insn, rtx *operands)
2486 switch (get_attr_length (insn))
2488 case 6:
2489 /* This can happen if filling the delay slot has caused a forward
2490 branch to exceed its range (we could reverse it, but only
2491 when we know we won't overextend other branches; this should
2492 best be handled by relaxation).
2493 It can also happen when other condbranches hoist delay slot insn
2494 from their destination, thus leading to code size increase.
2495 But the branch will still be in the range -4092..+4098 bytes. */
2497 if (! TARGET_RELAX)
2499 int label = lf++;
2500 /* The call to print_slot will clobber the operands. */
2501 rtx op0 = operands[0];
2503 /* If the instruction in the delay slot is annulled (true), then
2504 there is no delay slot where we can put it now. The only safe
2505 place for it is after the label. final will do that by default. */
2507 if (final_sequence
2508 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2509 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2511 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2512 ASSEMBLER_DIALECT ? "/" : ".", label);
2513 print_slot (final_sequence);
2515 else
2516 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2518 output_asm_insn ("bra\t%l0", &op0);
2519 fprintf (asm_out_file, "\tnop\n");
2520 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2522 return "";
2524 /* When relaxing, handle this like a short branch. The linker
2525 will fix it up if it still doesn't fit after relaxation. */
2526 case 2:
2527 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2529 /* These are for SH2e, in which we have to account for the
2530 extra nop because of the hardware bug in annulled branches. */
2531 case 8:
2532 if (! TARGET_RELAX)
2534 int label = lf++;
2536 gcc_assert (!final_sequence
2537 || !(INSN_ANNULLED_BRANCH_P
2538 (XVECEXP (final_sequence, 0, 0))));
2539 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2540 logic ? "f" : "t",
2541 ASSEMBLER_DIALECT ? "/" : ".", label);
2542 fprintf (asm_out_file, "\tnop\n");
2543 output_asm_insn ("bra\t%l0", operands);
2544 fprintf (asm_out_file, "\tnop\n");
2545 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2547 return "";
2549 /* When relaxing, fall through. */
2550 case 4:
2552 char buffer[10];
2554 sprintf (buffer, "b%s%ss\t%%l0",
2555 logic ? "t" : "f",
2556 ASSEMBLER_DIALECT ? "/" : ".");
2557 output_asm_insn (buffer, &operands[0]);
2558 return "nop";
2561 default:
2562 /* There should be no longer branches now - that would
2563 indicate that something has destroyed the branches set
2564 up in machine_dependent_reorg. */
2565 gcc_unreachable ();
2569 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2570 fill in operands 9 as a label to the successor insn.
2571 We try to use jump threading where possible.
2572 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2573 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2574 follow jmp and bt, if the address is in range. */
2575 const char *
2576 output_branchy_insn (enum rtx_code code, const char *templ,
2577 rtx insn, rtx *operands)
2579 rtx next_insn = NEXT_INSN (insn);
2581 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2583 rtx src = SET_SRC (PATTERN (next_insn));
2584 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2586 /* Following branch not taken */
2587 operands[9] = gen_label_rtx ();
2588 emit_label_after (operands[9], next_insn);
2589 INSN_ADDRESSES_NEW (operands[9],
2590 INSN_ADDRESSES (INSN_UID (next_insn))
2591 + get_attr_length (next_insn));
2592 return templ;
2594 else
2596 int offset = (branch_dest (next_insn)
2597 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2598 if (offset >= -252 && offset <= 258)
2600 if (GET_CODE (src) == IF_THEN_ELSE)
2601 /* branch_true */
2602 src = XEXP (src, 1);
2603 operands[9] = src;
2604 return templ;
2608 operands[9] = gen_label_rtx ();
2609 emit_label_after (operands[9], insn);
2610 INSN_ADDRESSES_NEW (operands[9],
2611 INSN_ADDRESSES (INSN_UID (insn))
2612 + get_attr_length (insn));
2613 return templ;
2616 const char *
2617 output_ieee_ccmpeq (rtx insn, rtx *operands)
2619 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2620 insn, operands);
2623 /* Output the start of the assembler file. */
2625 static void
2626 sh_file_start (void)
2628 default_file_start ();
2630 #ifdef SYMBIAN
2631 /* Declare the .directive section before it is used. */
2632 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2633 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2634 #endif
2636 if (TARGET_ELF)
2637 /* We need to show the text section with the proper
2638 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2639 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2640 will complain. We can teach GAS specifically about the
2641 default attributes for our choice of text section, but
2642 then we would have to change GAS again if/when we change
2643 the text section name. */
2644 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2645 else
2646 /* Switch to the data section so that the coffsem symbol
2647 isn't in the text section. */
2648 switch_to_section (data_section);
2650 if (TARGET_LITTLE_ENDIAN)
2651 fputs ("\t.little\n", asm_out_file);
2653 if (!TARGET_ELF)
2655 if (TARGET_SHCOMPACT)
2656 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2657 else if (TARGET_SHMEDIA)
2658 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2659 TARGET_SHMEDIA64 ? 64 : 32);
2663 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2665 static bool
2666 unspec_caller_rtx_p (rtx pat)
2668 rtx base, offset;
2669 int i;
2671 split_const (pat, &base, &offset);
2672 if (GET_CODE (base) == UNSPEC)
2674 if (XINT (base, 1) == UNSPEC_CALLER)
2675 return true;
2676 for (i = 0; i < XVECLEN (base, 0); i++)
2677 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2678 return true;
2680 return false;
2683 /* Indicate that INSN cannot be duplicated. This is true for insn
2684 that generates a unique label. */
2686 static bool
2687 sh_cannot_copy_insn_p (rtx insn)
2689 rtx pat;
2691 if (!reload_completed || !flag_pic)
2692 return false;
2694 if (!NONJUMP_INSN_P (insn))
2695 return false;
2696 if (asm_noperands (insn) >= 0)
2697 return false;
2699 pat = PATTERN (insn);
2700 if (GET_CODE (pat) != SET)
2701 return false;
2702 pat = SET_SRC (pat);
2704 if (unspec_caller_rtx_p (pat))
2705 return true;
2707 return false;
2710 /* Actual number of instructions used to make a shift by N. */
2711 static const char ashiftrt_insns[] =
2712 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2714 /* Left shift and logical right shift are the same. */
2715 static const char shift_insns[] =
2716 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2718 /* Individual shift amounts needed to get the above length sequences.
2719 One bit right shifts clobber the T bit, so when possible, put one bit
2720 shifts in the middle of the sequence, so the ends are eligible for
2721 branch delay slots. */
2722 static const short shift_amounts[32][5] = {
2723 {0}, {1}, {2}, {2, 1},
2724 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2725 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2726 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2727 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2728 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2729 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2730 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2732 /* Likewise, but for shift amounts < 16, up to three highmost bits
2733 might be clobbered. This is typically used when combined with some
2734 kind of sign or zero extension. */
2736 static const char ext_shift_insns[] =
2737 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2739 static const short ext_shift_amounts[32][4] = {
2740 {0}, {1}, {2}, {2, 1},
2741 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2742 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2743 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2744 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2745 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2746 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2747 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2749 /* Assuming we have a value that has been sign-extended by at least one bit,
2750 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2751 to shift it by N without data loss, and quicker than by other means? */
2752 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2754 /* This is used in length attributes in sh.md to help compute the length
2755 of arbitrary constant shift instructions. */
2758 shift_insns_rtx (rtx insn)
2760 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2761 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2762 enum rtx_code shift_code = GET_CODE (set_src);
2764 switch (shift_code)
2766 case ASHIFTRT:
2767 return ashiftrt_insns[shift_count];
2768 case LSHIFTRT:
2769 case ASHIFT:
2770 return shift_insns[shift_count];
2771 default:
2772 gcc_unreachable ();
2776 /* Return the cost of a shift. */
2778 static inline int
2779 shiftcosts (rtx x)
2781 int value;
2783 if (TARGET_SHMEDIA)
2784 return 1;
2786 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2788 if (GET_MODE (x) == DImode
2789 && CONST_INT_P (XEXP (x, 1))
2790 && INTVAL (XEXP (x, 1)) == 1)
2791 return 2;
2793 /* Everything else is invalid, because there is no pattern for it. */
2794 return MAX_COST;
2796 /* If shift by a non constant, then this will be expensive. */
2797 if (!CONST_INT_P (XEXP (x, 1)))
2798 return SH_DYNAMIC_SHIFT_COST;
2800 /* Otherwise, return the true cost in instructions. Cope with out of range
2801 shift counts more or less arbitrarily. */
2802 value = INTVAL (XEXP (x, 1)) & 31;
2804 if (GET_CODE (x) == ASHIFTRT)
2806 int cost = ashiftrt_insns[value];
2807 /* If SH3, then we put the constant in a reg and use shad. */
2808 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2809 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2810 return cost;
2812 else
2813 return shift_insns[value];
2816 /* Return the cost of an AND operation. */
2818 static inline int
2819 andcosts (rtx x)
2821 int i;
2823 /* Anding with a register is a single cycle and instruction. */
2824 if (!CONST_INT_P (XEXP (x, 1)))
2825 return 1;
2827 i = INTVAL (XEXP (x, 1));
2829 if (TARGET_SHMEDIA)
2831 if (satisfies_constraint_I10 (XEXP (x, 1))
2832 || satisfies_constraint_J16 (XEXP (x, 1)))
2833 return 1;
2834 else
2835 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2838 /* These constants are single cycle extu.[bw] instructions. */
2839 if (i == 0xff || i == 0xffff)
2840 return 1;
2841 /* Constants that can be used in an and immediate instruction in a single
2842 cycle, but this requires r0, so make it a little more expensive. */
2843 if (CONST_OK_FOR_K08 (i))
2844 return 2;
2845 /* Constants that can be loaded with a mov immediate and an and.
2846 This case is probably unnecessary. */
2847 if (CONST_OK_FOR_I08 (i))
2848 return 2;
2849 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2850 This case is probably unnecessary. */
2851 return 3;
2854 /* Return the cost of an addition or a subtraction. */
2856 static inline int
2857 addsubcosts (rtx x)
2859 /* Adding a register is a single cycle insn. */
2860 if (REG_P (XEXP (x, 1))
2861 || GET_CODE (XEXP (x, 1)) == SUBREG)
2862 return 1;
2864 /* Likewise for small constants. */
2865 if (CONST_INT_P (XEXP (x, 1))
2866 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2867 return 1;
2869 if (TARGET_SHMEDIA)
2870 switch (GET_CODE (XEXP (x, 1)))
2872 case CONST:
2873 case LABEL_REF:
2874 case SYMBOL_REF:
2875 return TARGET_SHMEDIA64 ? 5 : 3;
2877 case CONST_INT:
2878 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2879 return 2;
2880 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2881 return 3;
2882 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2883 return 4;
2885 /* Fall through. */
2886 default:
2887 return 5;
2890 /* Any other constant requires a 2 cycle pc-relative load plus an
2891 addition. */
2892 return 3;
2895 /* Return the cost of a multiply. */
2896 static inline int
2897 multcosts (rtx x ATTRIBUTE_UNUSED)
2899 if (sh_multcost >= 0)
2900 return sh_multcost;
2901 if (TARGET_SHMEDIA)
2902 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2903 accept constants. Ideally, we would use a cost of one or two and
2904 add the cost of the operand, but disregard the latter when inside loops
2905 and loop invariant code motion is still to follow.
2906 Using a multiply first and splitting it later if it's a loss
2907 doesn't work because of different sign / zero extension semantics
2908 of multiplies vs. shifts. */
2909 return TARGET_SMALLCODE ? 2 : 3;
2911 if (TARGET_SH2)
2913 /* We have a mul insn, so we can never take more than the mul and the
2914 read of the mac reg, but count more because of the latency and extra
2915 reg usage. */
2916 if (TARGET_SMALLCODE)
2917 return 2;
2918 return 3;
2921 /* If we're aiming at small code, then just count the number of
2922 insns in a multiply call sequence. */
2923 if (TARGET_SMALLCODE)
2924 return 5;
2926 /* Otherwise count all the insns in the routine we'd be calling too. */
2927 return 20;
2930 /* Compute a (partial) cost for rtx X. Return true if the complete
2931 cost has been computed, and false if subexpressions should be
2932 scanned. In either case, *TOTAL contains the cost result. */
2934 static bool
2935 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2936 bool speed ATTRIBUTE_UNUSED)
2938 switch (code)
2940 case CONST_INT:
2941 if (TARGET_SHMEDIA)
2943 if (INTVAL (x) == 0)
2944 *total = 0;
2945 else if (outer_code == AND && and_operand ((x), DImode))
2946 *total = 0;
2947 else if ((outer_code == IOR || outer_code == XOR
2948 || outer_code == PLUS)
2949 && CONST_OK_FOR_I10 (INTVAL (x)))
2950 *total = 0;
2951 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2952 *total = COSTS_N_INSNS (outer_code != SET);
2953 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2954 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2955 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2956 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2957 else
2958 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2959 return true;
2961 if (CONST_OK_FOR_I08 (INTVAL (x)))
2962 *total = 0;
2963 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2964 && CONST_OK_FOR_K08 (INTVAL (x)))
2965 *total = 1;
2966 /* prepare_cmp_insn will force costly constants int registers before
2967 the cbranch[sd]i4 patterns can see them, so preserve potentially
2968 interesting ones not covered by I08 above. */
2969 else if (outer_code == COMPARE
2970 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2971 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2972 || INTVAL (x) == 0x7fffffff
2973 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2974 *total = 1;
2975 else
2976 *total = 8;
2977 return true;
2979 case CONST:
2980 case LABEL_REF:
2981 case SYMBOL_REF:
2982 if (TARGET_SHMEDIA64)
2983 *total = COSTS_N_INSNS (4);
2984 else if (TARGET_SHMEDIA32)
2985 *total = COSTS_N_INSNS (2);
2986 else
2987 *total = 5;
2988 return true;
2990 case CONST_DOUBLE:
2991 if (TARGET_SHMEDIA)
2992 *total = COSTS_N_INSNS (4);
2993 /* prepare_cmp_insn will force costly constants int registers before
2994 the cbranchdi4 pattern can see them, so preserve potentially
2995 interesting ones. */
2996 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2997 *total = 1;
2998 else
2999 *total = 10;
3000 return true;
3001 case CONST_VECTOR:
3002 if (x == CONST0_RTX (GET_MODE (x)))
3003 *total = 0;
3004 else if (sh_1el_vec (x, VOIDmode))
3005 *total = outer_code != SET;
3006 if (sh_rep_vec (x, VOIDmode))
3007 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3008 + (outer_code != SET));
3009 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3010 return true;
3012 case PLUS:
3013 case MINUS:
3014 *total = COSTS_N_INSNS (addsubcosts (x));
3015 return true;
3017 case AND:
3018 *total = COSTS_N_INSNS (andcosts (x));
3019 return true;
3021 case MULT:
3022 *total = COSTS_N_INSNS (multcosts (x));
3023 return true;
3025 case ASHIFT:
3026 case ASHIFTRT:
3027 case LSHIFTRT:
3028 *total = COSTS_N_INSNS (shiftcosts (x));
3029 return true;
3031 case DIV:
3032 case UDIV:
3033 case MOD:
3034 case UMOD:
3035 *total = COSTS_N_INSNS (20);
3036 return true;
3038 case PARALLEL:
3039 if (sh_1el_vec (x, VOIDmode))
3040 *total = outer_code != SET;
3041 if (sh_rep_vec (x, VOIDmode))
3042 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3043 + (outer_code != SET));
3044 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3045 return true;
3047 case FLOAT:
3048 case FIX:
3049 *total = 100;
3050 return true;
3052 default:
3053 return false;
3057 /* Compute the cost of an address. For the SH, all valid addresses are
3058 the same cost. Use a slightly higher cost for reg + reg addressing,
3059 since it increases pressure on r0. */
3061 static int
3062 sh_address_cost (rtx X,
3063 bool speed ATTRIBUTE_UNUSED)
3065 return (GET_CODE (X) == PLUS
3066 && ! CONSTANT_P (XEXP (X, 1))
3067 && ! TARGET_SHMEDIA ? 1 : 0);
3070 /* Code to expand a shift. */
3072 void
3073 gen_ashift (int type, int n, rtx reg)
3075 /* Negative values here come from the shift_amounts array. */
3076 if (n < 0)
3078 if (type == ASHIFT)
3079 type = LSHIFTRT;
3080 else
3081 type = ASHIFT;
3082 n = -n;
3085 switch (type)
3087 case ASHIFTRT:
3088 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3089 break;
3090 case LSHIFTRT:
3091 if (n == 1)
3092 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3093 else
3094 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3095 break;
3096 case ASHIFT:
3097 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3098 break;
3102 /* Same for HImode */
3104 void
3105 gen_ashift_hi (int type, int n, rtx reg)
3107 /* Negative values here come from the shift_amounts array. */
3108 if (n < 0)
3110 if (type == ASHIFT)
3111 type = LSHIFTRT;
3112 else
3113 type = ASHIFT;
3114 n = -n;
3117 switch (type)
3119 case ASHIFTRT:
3120 case LSHIFTRT:
3121 /* We don't have HImode right shift operations because using the
3122 ordinary 32 bit shift instructions for that doesn't generate proper
3123 zero/sign extension.
3124 gen_ashift_hi is only called in contexts where we know that the
3125 sign extension works out correctly. */
3127 int offset = 0;
3128 if (GET_CODE (reg) == SUBREG)
3130 offset = SUBREG_BYTE (reg);
3131 reg = SUBREG_REG (reg);
3133 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3134 break;
3136 case ASHIFT:
3137 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3138 break;
3142 /* Output RTL to split a constant shift into its component SH constant
3143 shift instructions. */
3145 void
3146 gen_shifty_op (int code, rtx *operands)
3148 int value = INTVAL (operands[2]);
3149 int max, i;
3151 /* Truncate the shift count in case it is out of bounds. */
3152 value = value & 31;
3154 if (value == 31)
3156 if (code == LSHIFTRT)
3158 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3159 emit_insn (gen_movt (operands[0]));
3160 return;
3162 else if (code == ASHIFT)
3164 /* There is a two instruction sequence for 31 bit left shifts,
3165 but it requires r0. */
3166 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3168 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3169 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3170 return;
3174 else if (value == 0)
3176 /* This can happen even when optimizing, if there were subregs before
3177 reload. Don't output a nop here, as this is never optimized away;
3178 use a no-op move instead. */
3179 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3180 return;
3183 max = shift_insns[value];
3184 for (i = 0; i < max; i++)
3185 gen_ashift (code, shift_amounts[value][i], operands[0]);
3188 /* Same as above, but optimized for values where the topmost bits don't
3189 matter. */
3191 void
3192 gen_shifty_hi_op (int code, rtx *operands)
3194 int value = INTVAL (operands[2]);
3195 int max, i;
3196 void (*gen_fun) (int, int, rtx);
3198 /* This operation is used by and_shl for SImode values with a few
3199 high bits known to be cleared. */
3200 value &= 31;
3201 if (value == 0)
3203 emit_insn (gen_nop ());
3204 return;
3207 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3208 if (code == ASHIFT)
3210 max = ext_shift_insns[value];
3211 for (i = 0; i < max; i++)
3212 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3214 else
3215 /* When shifting right, emit the shifts in reverse order, so that
3216 solitary negative values come first. */
3217 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3218 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3221 /* Output RTL for an arithmetic right shift. */
3223 /* ??? Rewrite to use super-optimizer sequences. */
3226 expand_ashiftrt (rtx *operands)
3228 rtx wrk;
3229 char func[18];
3230 int value;
3232 if (TARGET_SH3)
3234 if (!CONST_INT_P (operands[2]))
3236 rtx count = copy_to_mode_reg (SImode, operands[2]);
3237 emit_insn (gen_negsi2 (count, count));
3238 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3239 return 1;
3241 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3242 > 1 + SH_DYNAMIC_SHIFT_COST)
3244 rtx count
3245 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3246 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3247 return 1;
3250 if (!CONST_INT_P (operands[2]))
3251 return 0;
3253 value = INTVAL (operands[2]) & 31;
3255 if (value == 31)
3257 /* If we are called from abs expansion, arrange things so that we
3258 we can use a single MT instruction that doesn't clobber the source,
3259 if LICM can hoist out the load of the constant zero. */
3260 if (currently_expanding_to_rtl)
3262 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3263 operands[1]));
3264 emit_insn (gen_mov_neg_si_t (operands[0]));
3265 return 1;
3267 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3268 return 1;
3270 else if (value >= 16 && value <= 19)
3272 wrk = gen_reg_rtx (SImode);
3273 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3274 value -= 16;
3275 while (value--)
3276 gen_ashift (ASHIFTRT, 1, wrk);
3277 emit_move_insn (operands[0], wrk);
3278 return 1;
3280 /* Expand a short sequence inline, longer call a magic routine. */
3281 else if (value <= 5)
3283 wrk = gen_reg_rtx (SImode);
3284 emit_move_insn (wrk, operands[1]);
3285 while (value--)
3286 gen_ashift (ASHIFTRT, 1, wrk);
3287 emit_move_insn (operands[0], wrk);
3288 return 1;
3291 wrk = gen_reg_rtx (Pmode);
3293 /* Load the value into an arg reg and call a helper. */
3294 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3295 sprintf (func, "__ashiftrt_r4_%d", value);
3296 function_symbol (wrk, func, SFUNC_STATIC);
3297 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3298 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3299 return 1;
3303 sh_dynamicalize_shift_p (rtx count)
3305 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3308 /* Try to find a good way to implement the combiner pattern
3309 [(set (match_operand:SI 0 "register_operand" "r")
3310 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3311 (match_operand:SI 2 "const_int_operand" "n"))
3312 (match_operand:SI 3 "const_int_operand" "n"))) .
3313 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3314 return 0 for simple right / left or left/right shift combination.
3315 return 1 for a combination of shifts with zero_extend.
3316 return 2 for a combination of shifts with an AND that needs r0.
3317 return 3 for a combination of shifts with an AND that needs an extra
3318 scratch register, when the three highmost bits of the AND mask are clear.
3319 return 4 for a combination of shifts with an AND that needs an extra
3320 scratch register, when any of the three highmost bits of the AND mask
3321 is set.
3322 If ATTRP is set, store an initial right shift width in ATTRP[0],
3323 and the instruction length in ATTRP[1] . These values are not valid
3324 when returning 0.
3325 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3326 shift_amounts for the last shift value that is to be used before the
3327 sign extend. */
3329 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3331 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3332 int left = INTVAL (left_rtx), right;
3333 int best = 0;
3334 int cost, best_cost = 10000;
3335 int best_right = 0, best_len = 0;
3336 int i;
3337 int can_ext;
3339 if (left < 0 || left > 31)
3340 return 0;
3341 if (CONST_INT_P (mask_rtx))
3342 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3343 else
3344 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3345 /* Can this be expressed as a right shift / left shift pair? */
3346 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3347 right = exact_log2 (lsb);
3348 mask2 = ~(mask + lsb - 1);
3349 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3350 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3351 if (! mask2)
3352 best_cost = shift_insns[right] + shift_insns[right + left];
3353 /* mask has no trailing zeroes <==> ! right */
3354 else if (! right && mask2 == ~(lsb2 - 1))
3356 int late_right = exact_log2 (lsb2);
3357 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3359 /* Try to use zero extend. */
3360 if (mask2 == ~(lsb2 - 1))
3362 int width, first;
3364 for (width = 8; width <= 16; width += 8)
3366 /* Can we zero-extend right away? */
3367 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3369 cost
3370 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3371 if (cost < best_cost)
3373 best = 1;
3374 best_cost = cost;
3375 best_right = right;
3376 best_len = cost;
3377 if (attrp)
3378 attrp[2] = -1;
3380 continue;
3382 /* ??? Could try to put zero extend into initial right shift,
3383 or even shift a bit left before the right shift. */
3384 /* Determine value of first part of left shift, to get to the
3385 zero extend cut-off point. */
3386 first = width - exact_log2 (lsb2) + right;
3387 if (first >= 0 && right + left - first >= 0)
3389 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3390 + ext_shift_insns[right + left - first];
3391 if (cost < best_cost)
3393 best = 1;
3394 best_cost = cost;
3395 best_right = right;
3396 best_len = cost;
3397 if (attrp)
3398 attrp[2] = first;
3403 /* Try to use r0 AND pattern */
3404 for (i = 0; i <= 2; i++)
3406 if (i > right)
3407 break;
3408 if (! CONST_OK_FOR_K08 (mask >> i))
3409 continue;
3410 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3411 if (cost < best_cost)
3413 best = 2;
3414 best_cost = cost;
3415 best_right = i;
3416 best_len = cost - 1;
3419 /* Try to use a scratch register to hold the AND operand. */
3420 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3421 for (i = 0; i <= 2; i++)
3423 if (i > right)
3424 break;
3425 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3426 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3427 if (cost < best_cost)
3429 best = 4 - can_ext;
3430 best_cost = cost;
3431 best_right = i;
3432 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3436 if (attrp)
3438 attrp[0] = best_right;
3439 attrp[1] = best_len;
3441 return best;
3444 /* This is used in length attributes of the unnamed instructions
3445 corresponding to shl_and_kind return values of 1 and 2. */
3447 shl_and_length (rtx insn)
3449 rtx set_src, left_rtx, mask_rtx;
3450 int attributes[3];
3452 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3453 left_rtx = XEXP (XEXP (set_src, 0), 1);
3454 mask_rtx = XEXP (set_src, 1);
3455 shl_and_kind (left_rtx, mask_rtx, attributes);
3456 return attributes[1];
3459 /* This is used in length attribute of the and_shl_scratch instruction. */
3462 shl_and_scr_length (rtx insn)
3464 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3465 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3466 rtx op = XEXP (set_src, 0);
3467 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3468 op = XEXP (XEXP (op, 0), 0);
3469 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3472 /* Generate rtl for instructions for which shl_and_kind advised a particular
3473 method of generating them, i.e. returned zero. */
3476 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3478 int attributes[3];
3479 unsigned HOST_WIDE_INT mask;
3480 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3481 int right, total_shift;
3482 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3484 right = attributes[0];
3485 total_shift = INTVAL (left_rtx) + right;
3486 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3487 switch (kind)
3489 default:
3490 return -1;
3491 case 1:
3493 int first = attributes[2];
3494 rtx operands[3];
3496 if (first < 0)
3498 emit_insn ((mask << right) <= 0xff
3499 ? gen_zero_extendqisi2 (dest,
3500 gen_lowpart (QImode, source))
3501 : gen_zero_extendhisi2 (dest,
3502 gen_lowpart (HImode, source)));
3503 source = dest;
3505 if (source != dest)
3506 emit_insn (gen_movsi (dest, source));
3507 operands[0] = dest;
3508 if (right)
3510 operands[2] = GEN_INT (right);
3511 gen_shifty_hi_op (LSHIFTRT, operands);
3513 if (first > 0)
3515 operands[2] = GEN_INT (first);
3516 gen_shifty_hi_op (ASHIFT, operands);
3517 total_shift -= first;
3518 mask <<= first;
3520 if (first >= 0)
3521 emit_insn (mask <= 0xff
3522 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3523 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3524 if (total_shift > 0)
3526 operands[2] = GEN_INT (total_shift);
3527 gen_shifty_hi_op (ASHIFT, operands);
3529 break;
3531 case 4:
3532 shift_gen_fun = gen_shifty_op;
3533 case 3:
3534 /* If the topmost bit that matters is set, set the topmost bits
3535 that don't matter. This way, we might be able to get a shorter
3536 signed constant. */
3537 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3538 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3539 case 2:
3540 /* Don't expand fine-grained when combining, because that will
3541 make the pattern fail. */
3542 if (currently_expanding_to_rtl
3543 || reload_in_progress || reload_completed)
3545 rtx operands[3];
3547 /* Cases 3 and 4 should be handled by this split
3548 only while combining */
3549 gcc_assert (kind <= 2);
3550 if (right)
3552 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3553 source = dest;
3555 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3556 if (total_shift)
3558 operands[0] = dest;
3559 operands[1] = dest;
3560 operands[2] = GEN_INT (total_shift);
3561 shift_gen_fun (ASHIFT, operands);
3563 break;
3565 else
3567 int neg = 0;
3568 if (kind != 4 && total_shift < 16)
3570 neg = -ext_shift_amounts[total_shift][1];
3571 if (neg > 0)
3572 neg -= ext_shift_amounts[total_shift][2];
3573 else
3574 neg = 0;
3576 emit_insn (gen_and_shl_scratch (dest, source,
3577 GEN_INT (right),
3578 GEN_INT (mask),
3579 GEN_INT (total_shift + neg),
3580 GEN_INT (neg)));
3581 emit_insn (gen_movsi (dest, dest));
3582 break;
3585 return 0;
3588 /* Try to find a good way to implement the combiner pattern
3589 [(set (match_operand:SI 0 "register_operand" "=r")
3590 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3591 (match_operand:SI 2 "const_int_operand" "n")
3592 (match_operand:SI 3 "const_int_operand" "n")
3593 (const_int 0)))
3594 (clobber (reg:SI T_REG))]
3595 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3596 return 0 for simple left / right shift combination.
3597 return 1 for left shift / 8 bit sign extend / left shift.
3598 return 2 for left shift / 16 bit sign extend / left shift.
3599 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3600 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3601 return 5 for left shift / 16 bit sign extend / right shift
3602 return 6 for < 8 bit sign extend / left shift.
3603 return 7 for < 8 bit sign extend / left shift / single right shift.
3604 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3607 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3609 int left, size, insize, ext;
3610 int cost = 0, best_cost;
3611 int kind;
3613 left = INTVAL (left_rtx);
3614 size = INTVAL (size_rtx);
3615 insize = size - left;
3616 gcc_assert (insize > 0);
3617 /* Default to left / right shift. */
3618 kind = 0;
3619 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3620 if (size <= 16)
3622 /* 16 bit shift / sign extend / 16 bit shift */
3623 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3624 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3625 below, by alternative 3 or something even better. */
3626 if (cost < best_cost)
3628 kind = 5;
3629 best_cost = cost;
3632 /* Try a plain sign extend between two shifts. */
3633 for (ext = 16; ext >= insize; ext -= 8)
3635 if (ext <= size)
3637 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3638 if (cost < best_cost)
3640 kind = ext / (unsigned) 8;
3641 best_cost = cost;
3644 /* Check if we can do a sloppy shift with a final signed shift
3645 restoring the sign. */
3646 if (EXT_SHIFT_SIGNED (size - ext))
3647 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3648 /* If not, maybe it's still cheaper to do the second shift sloppy,
3649 and do a final sign extend? */
3650 else if (size <= 16)
3651 cost = ext_shift_insns[ext - insize] + 1
3652 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3653 else
3654 continue;
3655 if (cost < best_cost)
3657 kind = ext / (unsigned) 8 + 2;
3658 best_cost = cost;
3661 /* Check if we can sign extend in r0 */
3662 if (insize < 8)
3664 cost = 3 + shift_insns[left];
3665 if (cost < best_cost)
3667 kind = 6;
3668 best_cost = cost;
3670 /* Try the same with a final signed shift. */
3671 if (left < 31)
3673 cost = 3 + ext_shift_insns[left + 1] + 1;
3674 if (cost < best_cost)
3676 kind = 7;
3677 best_cost = cost;
3681 if (TARGET_SH3)
3683 /* Try to use a dynamic shift. */
3684 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3685 if (cost < best_cost)
3687 kind = 0;
3688 best_cost = cost;
3691 if (costp)
3692 *costp = cost;
3693 return kind;
3696 /* Function to be used in the length attribute of the instructions
3697 implementing this pattern. */
3700 shl_sext_length (rtx insn)
3702 rtx set_src, left_rtx, size_rtx;
3703 int cost;
3705 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3706 left_rtx = XEXP (XEXP (set_src, 0), 1);
3707 size_rtx = XEXP (set_src, 1);
3708 shl_sext_kind (left_rtx, size_rtx, &cost);
3709 return cost;
3712 /* Generate rtl for this pattern */
3715 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3717 int kind;
3718 int left, size, insize, cost;
3719 rtx operands[3];
3721 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3722 left = INTVAL (left_rtx);
3723 size = INTVAL (size_rtx);
3724 insize = size - left;
3725 switch (kind)
3727 case 1:
3728 case 2:
3729 case 3:
3730 case 4:
3732 int ext = kind & 1 ? 8 : 16;
3733 int shift2 = size - ext;
3735 /* Don't expand fine-grained when combining, because that will
3736 make the pattern fail. */
3737 if (! currently_expanding_to_rtl
3738 && ! reload_in_progress && ! reload_completed)
3740 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3741 emit_insn (gen_movsi (dest, source));
3742 break;
3744 if (dest != source)
3745 emit_insn (gen_movsi (dest, source));
3746 operands[0] = dest;
3747 if (ext - insize)
3749 operands[2] = GEN_INT (ext - insize);
3750 gen_shifty_hi_op (ASHIFT, operands);
3752 emit_insn (kind & 1
3753 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3754 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3755 if (kind <= 2)
3757 if (shift2)
3759 operands[2] = GEN_INT (shift2);
3760 gen_shifty_op (ASHIFT, operands);
3763 else
3765 if (shift2 > 0)
3767 if (EXT_SHIFT_SIGNED (shift2))
3769 operands[2] = GEN_INT (shift2 + 1);
3770 gen_shifty_op (ASHIFT, operands);
3771 operands[2] = const1_rtx;
3772 gen_shifty_op (ASHIFTRT, operands);
3773 break;
3775 operands[2] = GEN_INT (shift2);
3776 gen_shifty_hi_op (ASHIFT, operands);
3778 else if (shift2)
3780 operands[2] = GEN_INT (-shift2);
3781 gen_shifty_hi_op (LSHIFTRT, operands);
3783 emit_insn (size <= 8
3784 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3785 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3787 break;
3789 case 5:
3791 int i = 16 - size;
3792 if (! currently_expanding_to_rtl
3793 && ! reload_in_progress && ! reload_completed)
3794 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3795 else
3797 operands[0] = dest;
3798 operands[2] = GEN_INT (16 - insize);
3799 gen_shifty_hi_op (ASHIFT, operands);
3800 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3802 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3803 while (--i >= 0)
3804 gen_ashift (ASHIFTRT, 1, dest);
3805 break;
3807 case 6:
3808 case 7:
3809 /* Don't expand fine-grained when combining, because that will
3810 make the pattern fail. */
3811 if (! currently_expanding_to_rtl
3812 && ! reload_in_progress && ! reload_completed)
3814 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3815 emit_insn (gen_movsi (dest, source));
3816 break;
3818 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3819 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3820 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3821 operands[0] = dest;
3822 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3823 gen_shifty_op (ASHIFT, operands);
3824 if (kind == 7)
3825 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3826 break;
3827 default:
3828 return -1;
3830 return 0;
3833 /* Prefix a symbol_ref name with "datalabel". */
3836 gen_datalabel_ref (rtx sym)
3838 const char *str;
3840 if (GET_CODE (sym) == LABEL_REF)
3841 return gen_rtx_CONST (GET_MODE (sym),
3842 gen_rtx_UNSPEC (GET_MODE (sym),
3843 gen_rtvec (1, sym),
3844 UNSPEC_DATALABEL));
3846 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3848 str = XSTR (sym, 0);
3849 /* Share all SYMBOL_REF strings with the same value - that is important
3850 for cse. */
3851 str = IDENTIFIER_POINTER (get_identifier (str));
3852 XSTR (sym, 0) = str;
3854 return sym;
3858 static alloc_pool label_ref_list_pool;
3860 typedef struct label_ref_list_d
3862 rtx label;
3863 struct label_ref_list_d *next;
3864 } *label_ref_list_t;
3866 /* The SH cannot load a large constant into a register, constants have to
3867 come from a pc relative load. The reference of a pc relative load
3868 instruction must be less than 1k in front of the instruction. This
3869 means that we often have to dump a constant inside a function, and
3870 generate code to branch around it.
3872 It is important to minimize this, since the branches will slow things
3873 down and make things bigger.
3875 Worst case code looks like:
3877 mov.l L1,rn
3878 bra L2
3880 align
3881 L1: .long value
3885 mov.l L3,rn
3886 bra L4
3888 align
3889 L3: .long value
3893 We fix this by performing a scan before scheduling, which notices which
3894 instructions need to have their operands fetched from the constant table
3895 and builds the table.
3897 The algorithm is:
3899 scan, find an instruction which needs a pcrel move. Look forward, find the
3900 last barrier which is within MAX_COUNT bytes of the requirement.
3901 If there isn't one, make one. Process all the instructions between
3902 the find and the barrier.
3904 In the above example, we can tell that L3 is within 1k of L1, so
3905 the first move can be shrunk from the 3 insn+constant sequence into
3906 just 1 insn, and the constant moved to L3 to make:
3908 mov.l L1,rn
3910 mov.l L3,rn
3911 bra L4
3913 align
3914 L3:.long value
3915 L4:.long value
3917 Then the second move becomes the target for the shortening process. */
3919 typedef struct
3921 rtx value; /* Value in table. */
3922 rtx label; /* Label of value. */
3923 label_ref_list_t wend; /* End of window. */
3924 enum machine_mode mode; /* Mode of value. */
3926 /* True if this constant is accessed as part of a post-increment
3927 sequence. Note that HImode constants are never accessed in this way. */
3928 bool part_of_sequence_p;
3929 } pool_node;
3931 /* The maximum number of constants that can fit into one pool, since
3932 constants in the range 0..510 are at least 2 bytes long, and in the
3933 range from there to 1018 at least 4 bytes. */
3935 #define MAX_POOL_SIZE 372
3936 static pool_node pool_vector[MAX_POOL_SIZE];
3937 static int pool_size;
3938 static rtx pool_window_label;
3939 static int pool_window_last;
3941 static int max_labelno_before_reorg;
3943 /* ??? If we need a constant in HImode which is the truncated value of a
3944 constant we need in SImode, we could combine the two entries thus saving
3945 two bytes. Is this common enough to be worth the effort of implementing
3946 it? */
3948 /* ??? This stuff should be done at the same time that we shorten branches.
3949 As it is now, we must assume that all branches are the maximum size, and
3950 this causes us to almost always output constant pools sooner than
3951 necessary. */
3953 /* Add a constant to the pool and return its label. */
3955 static rtx
3956 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3958 int i;
3959 rtx lab, new_rtx;
3960 label_ref_list_t ref, newref;
3962 /* First see if we've already got it. */
3963 for (i = 0; i < pool_size; i++)
3965 if (x->code == pool_vector[i].value->code
3966 && mode == pool_vector[i].mode)
3968 if (x->code == CODE_LABEL)
3970 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3971 continue;
3973 if (rtx_equal_p (x, pool_vector[i].value))
3975 lab = new_rtx = 0;
3976 if (! last_value
3977 || ! i
3978 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3980 new_rtx = gen_label_rtx ();
3981 LABEL_REFS (new_rtx) = pool_vector[i].label;
3982 pool_vector[i].label = lab = new_rtx;
3984 if (lab && pool_window_label)
3986 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3987 newref->label = pool_window_label;
3988 ref = pool_vector[pool_window_last].wend;
3989 newref->next = ref;
3990 pool_vector[pool_window_last].wend = newref;
3992 if (new_rtx)
3993 pool_window_label = new_rtx;
3994 pool_window_last = i;
3995 return lab;
4000 /* Need a new one. */
4001 pool_vector[pool_size].value = x;
4002 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4004 lab = 0;
4005 pool_vector[pool_size - 1].part_of_sequence_p = true;
4007 else
4008 lab = gen_label_rtx ();
4009 pool_vector[pool_size].mode = mode;
4010 pool_vector[pool_size].label = lab;
4011 pool_vector[pool_size].wend = NULL;
4012 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4013 if (lab && pool_window_label)
4015 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4016 newref->label = pool_window_label;
4017 ref = pool_vector[pool_window_last].wend;
4018 newref->next = ref;
4019 pool_vector[pool_window_last].wend = newref;
4021 if (lab)
4022 pool_window_label = lab;
4023 pool_window_last = pool_size;
4024 pool_size++;
4025 return lab;
4028 /* Output the literal table. START, if nonzero, is the first instruction
4029 this table is needed for, and also indicates that there is at least one
4030 casesi_worker_2 instruction; We have to emit the operand3 labels from
4031 these insns at a 4-byte aligned position. BARRIER is the barrier
4032 after which we are to place the table. */
4034 static void
4035 dump_table (rtx start, rtx barrier)
4037 rtx scan = barrier;
4038 int i;
4039 int need_align = 1;
4040 rtx lab;
4041 label_ref_list_t ref;
4042 int have_df = 0;
4044 /* Do two passes, first time dump out the HI sized constants. */
4046 for (i = 0; i < pool_size; i++)
4048 pool_node *p = &pool_vector[i];
4050 if (p->mode == HImode)
4052 if (need_align)
4054 scan = emit_insn_after (gen_align_2 (), scan);
4055 need_align = 0;
4057 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4058 scan = emit_label_after (lab, scan);
4059 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4060 scan);
4061 for (ref = p->wend; ref; ref = ref->next)
4063 lab = ref->label;
4064 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4067 else if (p->mode == DFmode)
4068 have_df = 1;
4071 need_align = 1;
4073 if (start)
4075 scan = emit_insn_after (gen_align_4 (), scan);
4076 need_align = 0;
4077 for (; start != barrier; start = NEXT_INSN (start))
4078 if (NONJUMP_INSN_P (start)
4079 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4081 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4082 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4084 scan = emit_label_after (lab, scan);
4087 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4089 rtx align_insn = NULL_RTX;
4091 scan = emit_label_after (gen_label_rtx (), scan);
4092 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4093 need_align = 0;
4095 for (i = 0; i < pool_size; i++)
4097 pool_node *p = &pool_vector[i];
4099 switch (p->mode)
4101 case HImode:
4102 break;
4103 case SImode:
4104 case SFmode:
4105 if (align_insn && !p->part_of_sequence_p)
4107 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4108 emit_label_before (lab, align_insn);
4109 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4110 align_insn);
4111 for (ref = p->wend; ref; ref = ref->next)
4113 lab = ref->label;
4114 emit_insn_before (gen_consttable_window_end (lab),
4115 align_insn);
4117 delete_insn (align_insn);
4118 align_insn = NULL_RTX;
4119 continue;
4121 else
4123 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4124 scan = emit_label_after (lab, scan);
4125 scan = emit_insn_after (gen_consttable_4 (p->value,
4126 const0_rtx), scan);
4127 need_align = ! need_align;
4129 break;
4130 case DFmode:
4131 if (need_align)
4133 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4134 align_insn = scan;
4135 need_align = 0;
4137 case DImode:
4138 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4139 scan = emit_label_after (lab, scan);
4140 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4141 scan);
4142 break;
4143 default:
4144 gcc_unreachable ();
4147 if (p->mode != HImode)
4149 for (ref = p->wend; ref; ref = ref->next)
4151 lab = ref->label;
4152 scan = emit_insn_after (gen_consttable_window_end (lab),
4153 scan);
4158 pool_size = 0;
4161 for (i = 0; i < pool_size; i++)
4163 pool_node *p = &pool_vector[i];
4165 switch (p->mode)
4167 case HImode:
4168 break;
4169 case SImode:
4170 case SFmode:
4171 if (need_align)
4173 need_align = 0;
4174 scan = emit_label_after (gen_label_rtx (), scan);
4175 scan = emit_insn_after (gen_align_4 (), scan);
4177 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4178 scan = emit_label_after (lab, scan);
4179 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4180 scan);
4181 break;
4182 case DFmode:
4183 case DImode:
4184 if (need_align)
4186 need_align = 0;
4187 scan = emit_label_after (gen_label_rtx (), scan);
4188 scan = emit_insn_after (gen_align_4 (), scan);
4190 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4191 scan = emit_label_after (lab, scan);
4192 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4193 scan);
4194 break;
4195 default:
4196 gcc_unreachable ();
4199 if (p->mode != HImode)
4201 for (ref = p->wend; ref; ref = ref->next)
4203 lab = ref->label;
4204 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4209 scan = emit_insn_after (gen_consttable_end (), scan);
4210 scan = emit_barrier_after (scan);
4211 pool_size = 0;
4212 pool_window_label = NULL_RTX;
4213 pool_window_last = 0;
4216 /* Return nonzero if constant would be an ok source for a
4217 mov.w instead of a mov.l. */
4219 static int
4220 hi_const (rtx src)
4222 return (CONST_INT_P (src)
4223 && INTVAL (src) >= -32768
4224 && INTVAL (src) <= 32767);
4227 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4229 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4231 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4232 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4233 need to fix it if the input value is CONST_OK_FOR_I08. */
4235 static int
4236 broken_move (rtx insn)
4238 if (NONJUMP_INSN_P (insn))
4240 rtx pat = PATTERN (insn);
4241 if (GET_CODE (pat) == PARALLEL)
4242 pat = XVECEXP (pat, 0, 0);
4243 if (GET_CODE (pat) == SET
4244 /* We can load any 8-bit value if we don't care what the high
4245 order bits end up as. */
4246 && GET_MODE (SET_DEST (pat)) != QImode
4247 && (CONSTANT_P (SET_SRC (pat))
4248 /* Match mova_const. */
4249 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4250 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4251 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4252 && ! (TARGET_SH2E
4253 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4254 && (fp_zero_operand (SET_SRC (pat))
4255 || fp_one_operand (SET_SRC (pat)))
4256 /* In general we don't know the current setting of fpscr, so disable fldi.
4257 There is an exception if this was a register-register move
4258 before reload - and hence it was ascertained that we have
4259 single precision setting - and in a post-reload optimization
4260 we changed this to do a constant load. In that case
4261 we don't have an r0 clobber, hence we must use fldi. */
4262 && (TARGET_FMOVD
4263 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4264 == SCRATCH))
4265 && REG_P (SET_DEST (pat))
4266 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4267 && ! (TARGET_SH2A
4268 && GET_MODE (SET_DEST (pat)) == SImode
4269 && (satisfies_constraint_I20 (SET_SRC (pat))
4270 || satisfies_constraint_I28 (SET_SRC (pat))))
4271 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4272 return 1;
4275 return 0;
4278 static int
4279 mova_p (rtx insn)
4281 return (NONJUMP_INSN_P (insn)
4282 && GET_CODE (PATTERN (insn)) == SET
4283 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4284 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4285 /* Don't match mova_const. */
4286 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4289 /* Fix up a mova from a switch that went out of range. */
4290 static void
4291 fixup_mova (rtx mova)
4293 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4294 if (! flag_pic)
4296 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4297 INSN_CODE (mova) = -1;
4299 else
4301 rtx worker = mova;
4302 rtx lab = gen_label_rtx ();
4303 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4307 worker = NEXT_INSN (worker);
4308 gcc_assert (worker
4309 && !LABEL_P (worker)
4310 && !JUMP_P (worker));
4311 } while (NOTE_P (worker)
4312 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4313 wpat = PATTERN (worker);
4314 wpat0 = XVECEXP (wpat, 0, 0);
4315 wpat1 = XVECEXP (wpat, 0, 1);
4316 wsrc = SET_SRC (wpat0);
4317 PATTERN (worker) = (gen_casesi_worker_2
4318 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4319 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4320 XEXP (wpat1, 0)));
4321 INSN_CODE (worker) = -1;
4322 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4323 base = gen_rtx_LABEL_REF (Pmode, lab);
4324 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4325 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4326 INSN_CODE (mova) = -1;
4330 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4331 *num_mova, and check if the new mova is not nested within the first one.
4332 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4333 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4334 static int
4335 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4337 int n_addr = 0; /* Initialization to shut up spurious warning. */
4338 int f_target, n_target = 0; /* Likewise. */
4340 if (optimize)
4342 /* If NEW_MOVA has no address yet, it will be handled later. */
4343 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4344 return -1;
4346 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4347 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4348 if (n_addr > n_target || n_addr + 1022 < n_target)
4350 /* Change the mova into a load.
4351 broken_move will then return true for it. */
4352 fixup_mova (new_mova);
4353 return 1;
4356 if (!(*num_mova)++)
4358 *first_mova = new_mova;
4359 return 2;
4361 if (!optimize
4362 || ((f_target
4363 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4364 >= n_target))
4365 return -1;
4367 (*num_mova)--;
4368 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4369 > n_target - n_addr)
4371 fixup_mova (*first_mova);
4372 return 0;
4374 else
4376 fixup_mova (new_mova);
4377 return 1;
4381 /* Find the last barrier from insn FROM which is close enough to hold the
4382 constant pool. If we can't find one, then create one near the end of
4383 the range. */
4385 static rtx
4386 find_barrier (int num_mova, rtx mova, rtx from)
4388 int count_si = 0;
4389 int count_hi = 0;
4390 int found_hi = 0;
4391 int found_si = 0;
4392 int found_di = 0;
4393 int hi_align = 2;
4394 int si_align = 2;
4395 int leading_mova = num_mova;
4396 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4397 int si_limit;
4398 int hi_limit;
4399 rtx orig = from;
4400 rtx last_got = NULL_RTX;
4402 /* For HImode: range is 510, add 4 because pc counts from address of
4403 second instruction after this one, subtract 2 for the jump instruction
4404 that we may need to emit before the table, subtract 2 for the instruction
4405 that fills the jump delay slot (in very rare cases, reorg will take an
4406 instruction from after the constant pool or will leave the delay slot
4407 empty). This gives 510.
4408 For SImode: range is 1020, add 4 because pc counts from address of
4409 second instruction after this one, subtract 2 in case pc is 2 byte
4410 aligned, subtract 2 for the jump instruction that we may need to emit
4411 before the table, subtract 2 for the instruction that fills the jump
4412 delay slot. This gives 1018. */
4414 /* The branch will always be shortened now that the reference address for
4415 forward branches is the successor address, thus we need no longer make
4416 adjustments to the [sh]i_limit for -O0. */
4418 si_limit = 1018;
4419 hi_limit = 510;
4421 while (from && count_si < si_limit && count_hi < hi_limit)
4423 int inc = get_attr_length (from);
4424 int new_align = 1;
4426 /* If this is a label that existed at the time of the compute_alignments
4427 call, determine the alignment. N.B. When find_barrier recurses for
4428 an out-of-reach mova, we might see labels at the start of previously
4429 inserted constant tables. */
4430 if (LABEL_P (from)
4431 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4433 if (optimize)
4434 new_align = 1 << label_to_alignment (from);
4435 else if (BARRIER_P (prev_nonnote_insn (from)))
4436 new_align = 1 << barrier_align (from);
4437 else
4438 new_align = 1;
4439 inc = 0;
4441 /* In case we are scanning a constant table because of recursion, check
4442 for explicit alignments. If the table is long, we might be forced
4443 to emit the new table in front of it; the length of the alignment
4444 might be the last straw. */
4445 else if (NONJUMP_INSN_P (from)
4446 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4447 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4448 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4449 /* When we find the end of a constant table, paste the new constant
4450 at the end. That is better than putting it in front because
4451 this way, we don't need extra alignment for adding a 4-byte-aligned
4452 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4453 else if (NONJUMP_INSN_P (from)
4454 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4455 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4456 return from;
4458 if (BARRIER_P (from))
4460 rtx next;
4462 found_barrier = from;
4464 /* If we are at the end of the function, or in front of an alignment
4465 instruction, we need not insert an extra alignment. We prefer
4466 this kind of barrier. */
4467 if (barrier_align (from) > 2)
4468 good_barrier = from;
4470 /* If we are at the end of a hot/cold block, dump the constants
4471 here. */
4472 next = NEXT_INSN (from);
4473 if (next
4474 && NOTE_P (next)
4475 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4476 break;
4479 if (broken_move (from))
4481 rtx pat, src, dst;
4482 enum machine_mode mode;
4484 pat = PATTERN (from);
4485 if (GET_CODE (pat) == PARALLEL)
4486 pat = XVECEXP (pat, 0, 0);
4487 src = SET_SRC (pat);
4488 dst = SET_DEST (pat);
4489 mode = GET_MODE (dst);
4491 /* GOT pcrelat setting comes in pair of
4492 mova .L8,r0
4493 mov.l .L8,r12
4494 instructions. (plus add r0,r12).
4495 Remember if we see one without the other. */
4496 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4497 last_got = last_got ? NULL_RTX : from;
4498 else if (PIC_ADDR_P (src))
4499 last_got = last_got ? NULL_RTX : from;
4501 /* We must explicitly check the mode, because sometimes the
4502 front end will generate code to load unsigned constants into
4503 HImode targets without properly sign extending them. */
4504 if (mode == HImode
4505 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4507 found_hi += 2;
4508 /* We put the short constants before the long constants, so
4509 we must count the length of short constants in the range
4510 for the long constants. */
4511 /* ??? This isn't optimal, but is easy to do. */
4512 si_limit -= 2;
4514 else
4516 /* We dump DF/DI constants before SF/SI ones, because
4517 the limit is the same, but the alignment requirements
4518 are higher. We may waste up to 4 additional bytes
4519 for alignment, and the DF/DI constant may have
4520 another SF/SI constant placed before it. */
4521 if (TARGET_SHCOMPACT
4522 && ! found_di
4523 && (mode == DFmode || mode == DImode))
4525 found_di = 1;
4526 si_limit -= 8;
4528 while (si_align > 2 && found_si + si_align - 2 > count_si)
4529 si_align >>= 1;
4530 if (found_si > count_si)
4531 count_si = found_si;
4532 found_si += GET_MODE_SIZE (mode);
4533 if (num_mova)
4534 si_limit -= GET_MODE_SIZE (mode);
4538 if (mova_p (from))
4540 switch (untangle_mova (&num_mova, &mova, from))
4542 case 0: return find_barrier (0, 0, mova);
4543 case 2:
4545 leading_mova = 0;
4546 barrier_before_mova
4547 = good_barrier ? good_barrier : found_barrier;
4549 default: break;
4551 if (found_si > count_si)
4552 count_si = found_si;
4554 else if (JUMP_TABLE_DATA_P (from))
4556 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4557 || (num_mova
4558 && (prev_nonnote_insn (from)
4559 == XEXP (MOVA_LABELREF (mova), 0))))
4560 num_mova--;
4561 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4563 /* We have just passed the barrier in front of the
4564 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4565 the ADDR_DIFF_VEC is accessed as data, just like our pool
4566 constants, this is a good opportunity to accommodate what
4567 we have gathered so far.
4568 If we waited any longer, we could end up at a barrier in
4569 front of code, which gives worse cache usage for separated
4570 instruction / data caches. */
4571 good_barrier = found_barrier;
4572 break;
4574 else
4576 rtx body = PATTERN (from);
4577 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4580 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4581 else if (JUMP_P (from)
4582 && ! TARGET_SH2
4583 && ! TARGET_SMALLCODE)
4584 new_align = 4;
4586 /* There is a possibility that a bf is transformed into a bf/s by the
4587 delay slot scheduler. */
4588 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4589 && get_attr_type (from) == TYPE_CBRANCH
4590 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4591 inc += 2;
4593 if (found_si)
4595 count_si += inc;
4596 if (new_align > si_align)
4598 si_limit -= (count_si - 1) & (new_align - si_align);
4599 si_align = new_align;
4601 count_si = (count_si + new_align - 1) & -new_align;
4603 if (found_hi)
4605 count_hi += inc;
4606 if (new_align > hi_align)
4608 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4609 hi_align = new_align;
4611 count_hi = (count_hi + new_align - 1) & -new_align;
4613 from = NEXT_INSN (from);
4616 if (num_mova)
4618 if (leading_mova)
4620 /* Try as we might, the leading mova is out of range. Change
4621 it into a load (which will become a pcload) and retry. */
4622 fixup_mova (mova);
4623 return find_barrier (0, 0, mova);
4625 else
4627 /* Insert the constant pool table before the mova instruction,
4628 to prevent the mova label reference from going out of range. */
4629 from = mova;
4630 good_barrier = found_barrier = barrier_before_mova;
4634 if (found_barrier)
4636 if (good_barrier && next_real_insn (found_barrier))
4637 found_barrier = good_barrier;
4639 else
4641 /* We didn't find a barrier in time to dump our stuff,
4642 so we'll make one. */
4643 rtx label = gen_label_rtx ();
4645 /* If we exceeded the range, then we must back up over the last
4646 instruction we looked at. Otherwise, we just need to undo the
4647 NEXT_INSN at the end of the loop. */
4648 if (PREV_INSN (from) != orig
4649 && (count_hi > hi_limit || count_si > si_limit))
4650 from = PREV_INSN (PREV_INSN (from));
4651 else
4652 from = PREV_INSN (from);
4654 /* Don't emit a constant table int the middle of global pointer setting,
4655 since that that would move the addressing base GOT into another table.
4656 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4657 in the pool anyway, so just move up the whole constant pool. */
4658 if (last_got)
4659 from = PREV_INSN (last_got);
4661 /* Don't insert the constant pool table at the position which
4662 may be the landing pad. */
4663 if (flag_exceptions
4664 && CALL_P (from)
4665 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4666 from = PREV_INSN (from);
4668 /* Walk back to be just before any jump or label.
4669 Putting it before a label reduces the number of times the branch
4670 around the constant pool table will be hit. Putting it before
4671 a jump makes it more likely that the bra delay slot will be
4672 filled. */
4673 while (NOTE_P (from) || JUMP_P (from)
4674 || LABEL_P (from))
4675 from = PREV_INSN (from);
4677 from = emit_jump_insn_after (gen_jump (label), from);
4678 JUMP_LABEL (from) = label;
4679 LABEL_NUSES (label) = 1;
4680 found_barrier = emit_barrier_after (from);
4681 emit_label_after (label, found_barrier);
4684 return found_barrier;
4687 /* If the instruction INSN is implemented by a special function, and we can
4688 positively find the register that is used to call the sfunc, and this
4689 register is not used anywhere else in this instruction - except as the
4690 destination of a set, return this register; else, return 0. */
4692 sfunc_uses_reg (rtx insn)
4694 int i;
4695 rtx pattern, part, reg_part, reg;
4697 if (!NONJUMP_INSN_P (insn))
4698 return 0;
4699 pattern = PATTERN (insn);
4700 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4701 return 0;
4703 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4705 part = XVECEXP (pattern, 0, i);
4706 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4707 reg_part = part;
4709 if (! reg_part)
4710 return 0;
4711 reg = XEXP (reg_part, 0);
4712 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4714 part = XVECEXP (pattern, 0, i);
4715 if (part == reg_part || GET_CODE (part) == CLOBBER)
4716 continue;
4717 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4718 && REG_P (SET_DEST (part)))
4719 ? SET_SRC (part) : part)))
4720 return 0;
4722 return reg;
4725 /* See if the only way in which INSN uses REG is by calling it, or by
4726 setting it while calling it. Set *SET to a SET rtx if the register
4727 is set by INSN. */
4729 static int
4730 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4732 rtx pattern, reg2;
4734 *set = NULL_RTX;
4736 reg2 = sfunc_uses_reg (insn);
4737 if (reg2 && REGNO (reg2) == REGNO (reg))
4739 pattern = single_set (insn);
4740 if (pattern
4741 && REG_P (SET_DEST (pattern))
4742 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4743 *set = pattern;
4744 return 0;
4746 if (!CALL_P (insn))
4748 /* We don't use rtx_equal_p because we don't care if the mode is
4749 different. */
4750 pattern = single_set (insn);
4751 if (pattern
4752 && REG_P (SET_DEST (pattern))
4753 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4755 rtx par, part;
4756 int i;
4758 *set = pattern;
4759 par = PATTERN (insn);
4760 if (GET_CODE (par) == PARALLEL)
4761 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4763 part = XVECEXP (par, 0, i);
4764 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4765 return 1;
4767 return reg_mentioned_p (reg, SET_SRC (pattern));
4770 return 1;
4773 pattern = PATTERN (insn);
4775 if (GET_CODE (pattern) == PARALLEL)
4777 int i;
4779 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4780 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4781 return 1;
4782 pattern = XVECEXP (pattern, 0, 0);
4785 if (GET_CODE (pattern) == SET)
4787 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4789 /* We don't use rtx_equal_p, because we don't care if the
4790 mode is different. */
4791 if (!REG_P (SET_DEST (pattern))
4792 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4793 return 1;
4795 *set = pattern;
4798 pattern = SET_SRC (pattern);
4801 if (GET_CODE (pattern) != CALL
4802 || !MEM_P (XEXP (pattern, 0))
4803 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4804 return 1;
4806 return 0;
4809 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4810 general registers. Bits 0..15 mean that the respective registers
4811 are used as inputs in the instruction. Bits 16..31 mean that the
4812 registers 0..15, respectively, are used as outputs, or are clobbered.
4813 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4815 regs_used (rtx x, int is_dest)
4817 enum rtx_code code;
4818 const char *fmt;
4819 int i, used = 0;
4821 if (! x)
4822 return used;
4823 code = GET_CODE (x);
4824 switch (code)
4826 case REG:
4827 if (REGNO (x) < 16)
4828 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4829 << (REGNO (x) + is_dest));
4830 return 0;
4831 case SUBREG:
4833 rtx y = SUBREG_REG (x);
4835 if (!REG_P (y))
4836 break;
4837 if (REGNO (y) < 16)
4838 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4839 << (REGNO (y) +
4840 subreg_regno_offset (REGNO (y),
4841 GET_MODE (y),
4842 SUBREG_BYTE (x),
4843 GET_MODE (x)) + is_dest));
4844 return 0;
4846 case SET:
4847 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4848 case RETURN:
4849 /* If there was a return value, it must have been indicated with USE. */
4850 return 0x00ffff00;
4851 case CLOBBER:
4852 is_dest = 1;
4853 break;
4854 case MEM:
4855 is_dest = 0;
4856 break;
4857 case CALL:
4858 used |= 0x00ff00f0;
4859 break;
4860 default:
4861 break;
4864 fmt = GET_RTX_FORMAT (code);
4866 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4868 if (fmt[i] == 'E')
4870 register int j;
4871 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4872 used |= regs_used (XVECEXP (x, i, j), is_dest);
4874 else if (fmt[i] == 'e')
4875 used |= regs_used (XEXP (x, i), is_dest);
4877 return used;
4880 /* Create an instruction that prevents redirection of a conditional branch
4881 to the destination of the JUMP with address ADDR.
4882 If the branch needs to be implemented as an indirect jump, try to find
4883 a scratch register for it.
4884 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4885 If any preceding insn that doesn't fit into a delay slot is good enough,
4886 pass 1. Pass 2 if a definite blocking insn is needed.
4887 -1 is used internally to avoid deep recursion.
4888 If a blocking instruction is made or recognized, return it. */
4890 static rtx
4891 gen_block_redirect (rtx jump, int addr, int need_block)
4893 int dead = 0;
4894 rtx prev = prev_nonnote_insn (jump);
4895 rtx dest;
4897 /* First, check if we already have an instruction that satisfies our need. */
4898 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4900 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4901 return prev;
4902 if (GET_CODE (PATTERN (prev)) == USE
4903 || GET_CODE (PATTERN (prev)) == CLOBBER
4904 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4905 prev = jump;
4906 else if ((need_block &= ~1) < 0)
4907 return prev;
4908 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4909 need_block = 0;
4911 if (GET_CODE (PATTERN (jump)) == RETURN)
4913 if (! need_block)
4914 return prev;
4915 /* Reorg even does nasty things with return insns that cause branches
4916 to go out of range - see find_end_label and callers. */
4917 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4919 /* We can't use JUMP_LABEL here because it might be undefined
4920 when not optimizing. */
4921 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4922 /* If the branch is out of range, try to find a scratch register for it. */
4923 if (optimize
4924 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4925 > 4092 + 4098))
4927 rtx scan;
4928 /* Don't look for the stack pointer as a scratch register,
4929 it would cause trouble if an interrupt occurred. */
4930 unsigned attempt = 0x7fff, used;
4931 int jump_left = flag_expensive_optimizations + 1;
4933 /* It is likely that the most recent eligible instruction is wanted for
4934 the delay slot. Therefore, find out which registers it uses, and
4935 try to avoid using them. */
4937 for (scan = jump; (scan = PREV_INSN (scan)); )
4939 enum rtx_code code;
4941 if (INSN_DELETED_P (scan))
4942 continue;
4943 code = GET_CODE (scan);
4944 if (code == CODE_LABEL || code == JUMP_INSN)
4945 break;
4946 if (code == INSN
4947 && GET_CODE (PATTERN (scan)) != USE
4948 && GET_CODE (PATTERN (scan)) != CLOBBER
4949 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4951 attempt &= ~regs_used (PATTERN (scan), 0);
4952 break;
4955 for (used = dead = 0, scan = JUMP_LABEL (jump);
4956 (scan = NEXT_INSN (scan)); )
4958 enum rtx_code code;
4960 if (INSN_DELETED_P (scan))
4961 continue;
4962 code = GET_CODE (scan);
4963 if (INSN_P (scan))
4965 used |= regs_used (PATTERN (scan), 0);
4966 if (code == CALL_INSN)
4967 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4968 dead |= (used >> 16) & ~used;
4969 if (dead & attempt)
4971 dead &= attempt;
4972 break;
4974 if (code == JUMP_INSN)
4976 if (jump_left-- && simplejump_p (scan))
4977 scan = JUMP_LABEL (scan);
4978 else
4979 break;
4983 /* Mask out the stack pointer again, in case it was
4984 the only 'free' register we have found. */
4985 dead &= 0x7fff;
4987 /* If the immediate destination is still in range, check for possible
4988 threading with a jump beyond the delay slot insn.
4989 Don't check if we are called recursively; the jump has been or will be
4990 checked in a different invocation then. */
4992 else if (optimize && need_block >= 0)
4994 rtx next = next_active_insn (next_active_insn (dest));
4995 if (next && JUMP_P (next)
4996 && GET_CODE (PATTERN (next)) == SET
4997 && recog_memoized (next) == CODE_FOR_jump_compact)
4999 dest = JUMP_LABEL (next);
5000 if (dest
5001 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5002 > 4092 + 4098))
5003 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5007 if (dead)
5009 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5011 /* It would be nice if we could convert the jump into an indirect
5012 jump / far branch right now, and thus exposing all constituent
5013 instructions to further optimization. However, reorg uses
5014 simplejump_p to determine if there is an unconditional jump where
5015 it should try to schedule instructions from the target of the
5016 branch; simplejump_p fails for indirect jumps even if they have
5017 a JUMP_LABEL. */
5018 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5019 (reg, GEN_INT (unspec_bbr_uid++)),
5020 jump);
5021 /* ??? We would like this to have the scope of the jump, but that
5022 scope will change when a delay slot insn of an inner scope is added.
5023 Hence, after delay slot scheduling, we'll have to expect
5024 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5025 the jump. */
5027 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5028 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5029 return insn;
5031 else if (need_block)
5032 /* We can't use JUMP_LABEL here because it might be undefined
5033 when not optimizing. */
5034 return emit_insn_before (gen_block_branch_redirect
5035 (GEN_INT (unspec_bbr_uid++)),
5036 jump);
5037 return prev;
5040 #define CONDJUMP_MIN -252
5041 #define CONDJUMP_MAX 262
5042 struct far_branch
5044 /* A label (to be placed) in front of the jump
5045 that jumps to our ultimate destination. */
5046 rtx near_label;
5047 /* Where we are going to insert it if we cannot move the jump any farther,
5048 or the jump itself if we have picked up an existing jump. */
5049 rtx insert_place;
5050 /* The ultimate destination. */
5051 rtx far_label;
5052 struct far_branch *prev;
5053 /* If the branch has already been created, its address;
5054 else the address of its first prospective user. */
5055 int address;
5058 static void gen_far_branch (struct far_branch *);
5059 enum mdep_reorg_phase_e mdep_reorg_phase;
5060 static void
5061 gen_far_branch (struct far_branch *bp)
5063 rtx insn = bp->insert_place;
5064 rtx jump;
5065 rtx label = gen_label_rtx ();
5066 int ok;
5068 emit_label_after (label, insn);
5069 if (bp->far_label)
5071 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5072 LABEL_NUSES (bp->far_label)++;
5074 else
5075 jump = emit_jump_insn_after (gen_return (), insn);
5076 /* Emit a barrier so that reorg knows that any following instructions
5077 are not reachable via a fall-through path.
5078 But don't do this when not optimizing, since we wouldn't suppress the
5079 alignment for the barrier then, and could end up with out-of-range
5080 pc-relative loads. */
5081 if (optimize)
5082 emit_barrier_after (jump);
5083 emit_label_after (bp->near_label, insn);
5084 JUMP_LABEL (jump) = bp->far_label;
5085 ok = invert_jump (insn, label, 1);
5086 gcc_assert (ok);
5088 /* If we are branching around a jump (rather than a return), prevent
5089 reorg from using an insn from the jump target as the delay slot insn -
5090 when reorg did this, it pessimized code (we rather hide the delay slot)
5091 and it could cause branches to go out of range. */
5092 if (bp->far_label)
5093 (emit_insn_after
5094 (gen_stuff_delay_slot
5095 (GEN_INT (unspec_bbr_uid++),
5096 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5097 insn));
5098 /* Prevent reorg from undoing our splits. */
5099 gen_block_redirect (jump, bp->address += 2, 2);
5102 /* Fix up ADDR_DIFF_VECs. */
5103 void
5104 fixup_addr_diff_vecs (rtx first)
5106 rtx insn;
5108 for (insn = first; insn; insn = NEXT_INSN (insn))
5110 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5112 if (!JUMP_P (insn)
5113 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5114 continue;
5115 pat = PATTERN (insn);
5116 vec_lab = XEXP (XEXP (pat, 0), 0);
5118 /* Search the matching casesi_jump_2. */
5119 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5121 if (!JUMP_P (prev))
5122 continue;
5123 prevpat = PATTERN (prev);
5124 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5125 continue;
5126 x = XVECEXP (prevpat, 0, 1);
5127 if (GET_CODE (x) != USE)
5128 continue;
5129 x = XEXP (x, 0);
5130 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5131 break;
5133 /* FIXME: This is a bug in the optimizer, but it seems harmless
5134 to just avoid panicing. */
5135 if (!prev)
5136 continue;
5138 /* Emit the reference label of the braf where it belongs, right after
5139 the casesi_jump_2 (i.e. braf). */
5140 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5141 emit_label_after (braf_label, prev);
5143 /* Fix up the ADDR_DIF_VEC to be relative
5144 to the reference address of the braf. */
5145 XEXP (XEXP (pat, 0), 0) = braf_label;
5149 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5150 a barrier. Return the base 2 logarithm of the desired alignment. */
5152 barrier_align (rtx barrier_or_label)
5154 rtx next = next_real_insn (barrier_or_label), pat, prev;
5155 int slot, credit, jump_to_next = 0;
5157 if (! next)
5158 return 0;
5160 pat = PATTERN (next);
5162 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5163 return 2;
5165 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5166 /* This is a barrier in front of a constant table. */
5167 return 0;
5169 prev = prev_real_insn (barrier_or_label);
5170 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5172 pat = PATTERN (prev);
5173 /* If this is a very small table, we want to keep the alignment after
5174 the table to the minimum for proper code alignment. */
5175 return ((TARGET_SMALLCODE
5176 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5177 <= (unsigned) 1 << (CACHE_LOG - 2)))
5178 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5181 if (TARGET_SMALLCODE)
5182 return 0;
5184 if (! TARGET_SH2 || ! optimize)
5185 return align_jumps_log;
5187 /* When fixing up pcloads, a constant table might be inserted just before
5188 the basic block that ends with the barrier. Thus, we can't trust the
5189 instruction lengths before that. */
5190 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5192 /* Check if there is an immediately preceding branch to the insn beyond
5193 the barrier. We must weight the cost of discarding useful information
5194 from the current cache line when executing this branch and there is
5195 an alignment, against that of fetching unneeded insn in front of the
5196 branch target when there is no alignment. */
5198 /* There are two delay_slot cases to consider. One is the simple case
5199 where the preceding branch is to the insn beyond the barrier (simple
5200 delay slot filling), and the other is where the preceding branch has
5201 a delay slot that is a duplicate of the insn after the barrier
5202 (fill_eager_delay_slots) and the branch is to the insn after the insn
5203 after the barrier. */
5205 /* PREV is presumed to be the JUMP_INSN for the barrier under
5206 investigation. Skip to the insn before it. */
5207 prev = prev_real_insn (prev);
5209 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5210 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5211 prev = prev_real_insn (prev))
5213 jump_to_next = 0;
5214 if (GET_CODE (PATTERN (prev)) == USE
5215 || GET_CODE (PATTERN (prev)) == CLOBBER)
5216 continue;
5217 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5219 prev = XVECEXP (PATTERN (prev), 0, 1);
5220 if (INSN_UID (prev) == INSN_UID (next))
5222 /* Delay slot was filled with insn at jump target. */
5223 jump_to_next = 1;
5224 continue;
5228 if (slot &&
5229 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5230 slot = 0;
5231 credit -= get_attr_length (prev);
5233 if (prev
5234 && JUMP_P (prev)
5235 && JUMP_LABEL (prev))
5237 rtx x;
5238 if (jump_to_next
5239 || next_real_insn (JUMP_LABEL (prev)) == next
5240 /* If relax_delay_slots() decides NEXT was redundant
5241 with some previous instruction, it will have
5242 redirected PREV's jump to the following insn. */
5243 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5244 /* There is no upper bound on redundant instructions
5245 that might have been skipped, but we must not put an
5246 alignment where none had been before. */
5247 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5248 (INSN_P (x)
5249 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5250 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5251 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5253 rtx pat = PATTERN (prev);
5254 if (GET_CODE (pat) == PARALLEL)
5255 pat = XVECEXP (pat, 0, 0);
5256 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5257 return 0;
5262 return align_jumps_log;
5265 /* If we are inside a phony loop, almost any kind of label can turn up as the
5266 first one in the loop. Aligning a braf label causes incorrect switch
5267 destination addresses; we can detect braf labels because they are
5268 followed by a BARRIER.
5269 Applying loop alignment to small constant or switch tables is a waste
5270 of space, so we suppress this too. */
5272 sh_loop_align (rtx label)
5274 rtx next = label;
5277 next = next_nonnote_insn (next);
5278 while (next && LABEL_P (next));
5280 if (! next
5281 || ! INSN_P (next)
5282 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5283 || recog_memoized (next) == CODE_FOR_consttable_2)
5284 return 0;
5286 return align_loops_log;
5289 /* Do a final pass over the function, just before delayed branch
5290 scheduling. */
5292 static void
5293 sh_reorg (void)
5295 rtx first, insn, mova = NULL_RTX;
5296 int num_mova;
5297 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5298 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5300 first = get_insns ();
5301 max_labelno_before_reorg = max_label_num ();
5303 /* We must split call insns before introducing `mova's. If we're
5304 optimizing, they'll have already been split. Otherwise, make
5305 sure we don't split them too late. */
5306 if (! optimize)
5307 split_all_insns_noflow ();
5309 if (TARGET_SHMEDIA)
5310 return;
5312 /* If relaxing, generate pseudo-ops to associate function calls with
5313 the symbols they call. It does no harm to not generate these
5314 pseudo-ops. However, when we can generate them, it enables to
5315 linker to potentially relax the jsr to a bsr, and eliminate the
5316 register load and, possibly, the constant pool entry. */
5318 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5319 if (TARGET_RELAX)
5321 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5322 own purposes. This works because none of the remaining passes
5323 need to look at them.
5325 ??? But it may break in the future. We should use a machine
5326 dependent REG_NOTE, or some other approach entirely. */
5327 for (insn = first; insn; insn = NEXT_INSN (insn))
5329 if (INSN_P (insn))
5331 rtx note;
5333 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5334 NULL_RTX)) != 0)
5335 remove_note (insn, note);
5339 for (insn = first; insn; insn = NEXT_INSN (insn))
5341 rtx pattern, reg, link, set, scan, dies, label;
5342 int rescan = 0, foundinsn = 0;
5344 if (CALL_P (insn))
5346 pattern = PATTERN (insn);
5348 if (GET_CODE (pattern) == PARALLEL)
5349 pattern = XVECEXP (pattern, 0, 0);
5350 if (GET_CODE (pattern) == SET)
5351 pattern = SET_SRC (pattern);
5353 if (GET_CODE (pattern) != CALL
5354 || !MEM_P (XEXP (pattern, 0)))
5355 continue;
5357 reg = XEXP (XEXP (pattern, 0), 0);
5359 else
5361 reg = sfunc_uses_reg (insn);
5362 if (! reg)
5363 continue;
5366 if (!REG_P (reg))
5367 continue;
5369 /* Try scanning backward to find where the register is set. */
5370 link = NULL;
5371 for (scan = PREV_INSN (insn);
5372 scan && !LABEL_P (scan);
5373 scan = PREV_INSN (scan))
5375 if (! INSN_P (scan))
5376 continue;
5378 if (! reg_mentioned_p (reg, scan))
5379 continue;
5381 if (noncall_uses_reg (reg, scan, &set))
5382 break;
5384 if (set)
5386 link = scan;
5387 break;
5391 if (! link)
5392 continue;
5394 /* The register is set at LINK. */
5396 /* We can only optimize the function call if the register is
5397 being set to a symbol. In theory, we could sometimes
5398 optimize calls to a constant location, but the assembler
5399 and linker do not support that at present. */
5400 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5401 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5402 continue;
5404 /* Scan forward from LINK to the place where REG dies, and
5405 make sure that the only insns which use REG are
5406 themselves function calls. */
5408 /* ??? This doesn't work for call targets that were allocated
5409 by reload, since there may not be a REG_DEAD note for the
5410 register. */
5412 dies = NULL_RTX;
5413 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5415 rtx scanset;
5417 /* Don't try to trace forward past a CODE_LABEL if we haven't
5418 seen INSN yet. Ordinarily, we will only find the setting insn
5419 if it is in the same basic block. However,
5420 cross-jumping can insert code labels in between the load and
5421 the call, and can result in situations where a single call
5422 insn may have two targets depending on where we came from. */
5424 if (LABEL_P (scan) && ! foundinsn)
5425 break;
5427 if (! INSN_P (scan))
5428 continue;
5430 /* Don't try to trace forward past a JUMP. To optimize
5431 safely, we would have to check that all the
5432 instructions at the jump destination did not use REG. */
5434 if (JUMP_P (scan))
5435 break;
5437 if (! reg_mentioned_p (reg, scan))
5438 continue;
5440 if (noncall_uses_reg (reg, scan, &scanset))
5441 break;
5443 if (scan == insn)
5444 foundinsn = 1;
5446 if (scan != insn
5447 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5449 /* There is a function call to this register other
5450 than the one we are checking. If we optimize
5451 this call, we need to rescan again below. */
5452 rescan = 1;
5455 /* ??? We shouldn't have to worry about SCANSET here.
5456 We should just be able to check for a REG_DEAD note
5457 on a function call. However, the REG_DEAD notes are
5458 apparently not dependable around libcalls; c-torture
5459 execute/920501-2 is a test case. If SCANSET is set,
5460 then this insn sets the register, so it must have
5461 died earlier. Unfortunately, this will only handle
5462 the cases in which the register is, in fact, set in a
5463 later insn. */
5465 /* ??? We shouldn't have to use FOUNDINSN here.
5466 This dates back to when we used LOG_LINKS to find
5467 the most recent insn which sets the register. */
5469 if (foundinsn
5470 && (scanset
5471 || find_reg_note (scan, REG_DEAD, reg)))
5473 dies = scan;
5474 break;
5478 if (! dies)
5480 /* Either there was a branch, or some insn used REG
5481 other than as a function call address. */
5482 continue;
5485 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5486 on the insn which sets the register, and on each call insn
5487 which uses the register. In final_prescan_insn we look for
5488 the REG_LABEL_OPERAND notes, and output the appropriate label
5489 or pseudo-op. */
5491 label = gen_label_rtx ();
5492 add_reg_note (link, REG_LABEL_OPERAND, label);
5493 add_reg_note (insn, REG_LABEL_OPERAND, label);
5494 if (rescan)
5496 scan = link;
5499 rtx reg2;
5501 scan = NEXT_INSN (scan);
5502 if (scan != insn
5503 && ((CALL_P (scan)
5504 && reg_mentioned_p (reg, scan))
5505 || ((reg2 = sfunc_uses_reg (scan))
5506 && REGNO (reg2) == REGNO (reg))))
5507 add_reg_note (scan, REG_LABEL_OPERAND, label);
5509 while (scan != dies);
5514 if (TARGET_SH2)
5515 fixup_addr_diff_vecs (first);
5517 if (optimize)
5519 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5520 shorten_branches (first);
5523 /* Scan the function looking for move instructions which have to be
5524 changed to pc-relative loads and insert the literal tables. */
5525 label_ref_list_pool = create_alloc_pool ("label references list",
5526 sizeof (struct label_ref_list_d),
5527 30);
5528 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5529 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5531 if (mova_p (insn))
5533 /* ??? basic block reordering can move a switch table dispatch
5534 below the switch table. Check if that has happened.
5535 We only have the addresses available when optimizing; but then,
5536 this check shouldn't be needed when not optimizing. */
5537 if (!untangle_mova (&num_mova, &mova, insn))
5539 insn = mova;
5540 num_mova = 0;
5543 else if (JUMP_P (insn)
5544 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5545 && num_mova
5546 /* ??? loop invariant motion can also move a mova out of a
5547 loop. Since loop does this code motion anyway, maybe we
5548 should wrap UNSPEC_MOVA into a CONST, so that reload can
5549 move it back. */
5550 && ((num_mova > 1
5551 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5552 || (prev_nonnote_insn (insn)
5553 == XEXP (MOVA_LABELREF (mova), 0))))
5555 rtx scan;
5556 int total;
5558 num_mova--;
5560 /* Some code might have been inserted between the mova and
5561 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5562 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5563 total += get_attr_length (scan);
5565 /* range of mova is 1020, add 4 because pc counts from address of
5566 second instruction after this one, subtract 2 in case pc is 2
5567 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5568 cancels out with alignment effects of the mova itself. */
5569 if (total > 1022)
5571 /* Change the mova into a load, and restart scanning
5572 there. broken_move will then return true for mova. */
5573 fixup_mova (mova);
5574 insn = mova;
5577 if (broken_move (insn)
5578 || (NONJUMP_INSN_P (insn)
5579 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5581 rtx scan;
5582 /* Scan ahead looking for a barrier to stick the constant table
5583 behind. */
5584 rtx barrier = find_barrier (num_mova, mova, insn);
5585 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5586 int need_aligned_label = 0;
5588 if (num_mova && ! mova_p (mova))
5590 /* find_barrier had to change the first mova into a
5591 pcload; thus, we have to start with this new pcload. */
5592 insn = mova;
5593 num_mova = 0;
5595 /* Now find all the moves between the points and modify them. */
5596 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5598 if (LABEL_P (scan))
5599 last_float = 0;
5600 if (NONJUMP_INSN_P (scan)
5601 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5602 need_aligned_label = 1;
5603 if (broken_move (scan))
5605 rtx *patp = &PATTERN (scan), pat = *patp;
5606 rtx src, dst;
5607 rtx lab;
5608 rtx newsrc;
5609 enum machine_mode mode;
5611 if (GET_CODE (pat) == PARALLEL)
5612 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5613 src = SET_SRC (pat);
5614 dst = SET_DEST (pat);
5615 mode = GET_MODE (dst);
5617 if (mode == SImode && hi_const (src)
5618 && REGNO (dst) != FPUL_REG)
5620 int offset = 0;
5622 mode = HImode;
5623 while (GET_CODE (dst) == SUBREG)
5625 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5626 GET_MODE (SUBREG_REG (dst)),
5627 SUBREG_BYTE (dst),
5628 GET_MODE (dst));
5629 dst = SUBREG_REG (dst);
5631 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5633 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5635 /* This must be an insn that clobbers r0. */
5636 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5637 XVECLEN (PATTERN (scan), 0)
5638 - 1);
5639 rtx clobber = *clobberp;
5641 gcc_assert (GET_CODE (clobber) == CLOBBER
5642 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5644 if (last_float
5645 && reg_set_between_p (r0_rtx, last_float_move, scan))
5646 last_float = 0;
5647 if (last_float
5648 && TARGET_SHCOMPACT
5649 && GET_MODE_SIZE (mode) != 4
5650 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5651 last_float = 0;
5652 lab = add_constant (src, mode, last_float);
5653 if (lab)
5654 emit_insn_before (gen_mova (lab), scan);
5655 else
5657 /* There will be a REG_UNUSED note for r0 on
5658 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5659 lest reorg:mark_target_live_regs will not
5660 consider r0 to be used, and we end up with delay
5661 slot insn in front of SCAN that clobbers r0. */
5662 rtx note
5663 = find_regno_note (last_float_move, REG_UNUSED, 0);
5665 /* If we are not optimizing, then there may not be
5666 a note. */
5667 if (note)
5668 PUT_REG_NOTE_KIND (note, REG_INC);
5670 *last_float_addr = r0_inc_rtx;
5672 last_float_move = scan;
5673 last_float = src;
5674 newsrc = gen_const_mem (mode,
5675 (((TARGET_SH4 && ! TARGET_FMOVD)
5676 || REGNO (dst) == FPUL_REG)
5677 ? r0_inc_rtx
5678 : r0_rtx));
5679 last_float_addr = &XEXP (newsrc, 0);
5681 /* Remove the clobber of r0. */
5682 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5683 gen_rtx_SCRATCH (Pmode));
5685 /* This is a mova needing a label. Create it. */
5686 else if (GET_CODE (src) == UNSPEC
5687 && XINT (src, 1) == UNSPEC_MOVA
5688 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5690 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5691 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5692 newsrc = gen_rtx_UNSPEC (SImode,
5693 gen_rtvec (1, newsrc),
5694 UNSPEC_MOVA);
5696 else
5698 lab = add_constant (src, mode, 0);
5699 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5700 newsrc = gen_const_mem (mode, newsrc);
5702 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5703 INSN_CODE (scan) = -1;
5706 dump_table (need_aligned_label ? insn : 0, barrier);
5707 insn = barrier;
5710 free_alloc_pool (label_ref_list_pool);
5711 for (insn = first; insn; insn = NEXT_INSN (insn))
5712 PUT_MODE (insn, VOIDmode);
5714 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5715 INSN_ADDRESSES_FREE ();
5716 split_branches (first);
5718 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5719 also has an effect on the register that holds the address of the sfunc.
5720 Insert an extra dummy insn in front of each sfunc that pretends to
5721 use this register. */
5722 if (flag_delayed_branch)
5724 for (insn = first; insn; insn = NEXT_INSN (insn))
5726 rtx reg = sfunc_uses_reg (insn);
5728 if (! reg)
5729 continue;
5730 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5733 #if 0
5734 /* fpscr is not actually a user variable, but we pretend it is for the
5735 sake of the previous optimization passes, since we want it handled like
5736 one. However, we don't have any debugging information for it, so turn
5737 it into a non-user variable now. */
5738 if (TARGET_SH4)
5739 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5740 #endif
5741 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5745 get_dest_uid (rtx label, int max_uid)
5747 rtx dest = next_real_insn (label);
5748 int dest_uid;
5749 if (! dest)
5750 /* This can happen for an undefined label. */
5751 return 0;
5752 dest_uid = INSN_UID (dest);
5753 /* If this is a newly created branch redirection blocking instruction,
5754 we cannot index the branch_uid or insn_addresses arrays with its
5755 uid. But then, we won't need to, because the actual destination is
5756 the following branch. */
5757 while (dest_uid >= max_uid)
5759 dest = NEXT_INSN (dest);
5760 dest_uid = INSN_UID (dest);
5762 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5763 return 0;
5764 return dest_uid;
5767 /* Split condbranches that are out of range. Also add clobbers for
5768 scratch registers that are needed in far jumps.
5769 We do this before delay slot scheduling, so that it can take our
5770 newly created instructions into account. It also allows us to
5771 find branches with common targets more easily. */
5773 static void
5774 split_branches (rtx first)
5776 rtx insn;
5777 struct far_branch **uid_branch, *far_branch_list = 0;
5778 int max_uid = get_max_uid ();
5779 int ok;
5781 /* Find out which branches are out of range. */
5782 shorten_branches (first);
5784 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5785 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5787 for (insn = first; insn; insn = NEXT_INSN (insn))
5788 if (! INSN_P (insn))
5789 continue;
5790 else if (INSN_DELETED_P (insn))
5792 /* Shorten_branches would split this instruction again,
5793 so transform it into a note. */
5794 SET_INSN_DELETED (insn);
5796 else if (JUMP_P (insn)
5797 /* Don't mess with ADDR_DIFF_VEC */
5798 && (GET_CODE (PATTERN (insn)) == SET
5799 || GET_CODE (PATTERN (insn)) == RETURN))
5801 enum attr_type type = get_attr_type (insn);
5802 if (type == TYPE_CBRANCH)
5804 rtx next, beyond;
5806 if (get_attr_length (insn) > 4)
5808 rtx src = SET_SRC (PATTERN (insn));
5809 rtx olabel = XEXP (XEXP (src, 1), 0);
5810 int addr = INSN_ADDRESSES (INSN_UID (insn));
5811 rtx label = 0;
5812 int dest_uid = get_dest_uid (olabel, max_uid);
5813 struct far_branch *bp = uid_branch[dest_uid];
5815 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5816 the label if the LABEL_NUSES count drops to zero. There is
5817 always a jump_optimize pass that sets these values, but it
5818 proceeds to delete unreferenced code, and then if not
5819 optimizing, to un-delete the deleted instructions, thus
5820 leaving labels with too low uses counts. */
5821 if (! optimize)
5823 JUMP_LABEL (insn) = olabel;
5824 LABEL_NUSES (olabel)++;
5826 if (! bp)
5828 bp = (struct far_branch *) alloca (sizeof *bp);
5829 uid_branch[dest_uid] = bp;
5830 bp->prev = far_branch_list;
5831 far_branch_list = bp;
5832 bp->far_label
5833 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5834 LABEL_NUSES (bp->far_label)++;
5836 else
5838 label = bp->near_label;
5839 if (! label && bp->address - addr >= CONDJUMP_MIN)
5841 rtx block = bp->insert_place;
5843 if (GET_CODE (PATTERN (block)) == RETURN)
5844 block = PREV_INSN (block);
5845 else
5846 block = gen_block_redirect (block,
5847 bp->address, 2);
5848 label = emit_label_after (gen_label_rtx (),
5849 PREV_INSN (block));
5850 bp->near_label = label;
5852 else if (label && ! NEXT_INSN (label))
5854 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5855 bp->insert_place = insn;
5856 else
5857 gen_far_branch (bp);
5860 if (! label
5861 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5863 bp->near_label = label = gen_label_rtx ();
5864 bp->insert_place = insn;
5865 bp->address = addr;
5867 ok = redirect_jump (insn, label, 0);
5868 gcc_assert (ok);
5870 else
5872 /* get_attr_length (insn) == 2 */
5873 /* Check if we have a pattern where reorg wants to redirect
5874 the branch to a label from an unconditional branch that
5875 is too far away. */
5876 /* We can't use JUMP_LABEL here because it might be undefined
5877 when not optimizing. */
5878 /* A syntax error might cause beyond to be NULL_RTX. */
5879 beyond
5880 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5881 0));
5883 if (beyond
5884 && (JUMP_P (beyond)
5885 || ((beyond = next_active_insn (beyond))
5886 && JUMP_P (beyond)))
5887 && GET_CODE (PATTERN (beyond)) == SET
5888 && recog_memoized (beyond) == CODE_FOR_jump_compact
5889 && ((INSN_ADDRESSES
5890 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5891 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5892 > 252 + 258 + 2))
5893 gen_block_redirect (beyond,
5894 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5897 next = next_active_insn (insn);
5899 if (next
5900 && (JUMP_P (next)
5901 || ((next = next_active_insn (next))
5902 && JUMP_P (next)))
5903 && GET_CODE (PATTERN (next)) == SET
5904 && recog_memoized (next) == CODE_FOR_jump_compact
5905 && ((INSN_ADDRESSES
5906 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5907 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5908 > 252 + 258 + 2))
5909 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5911 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5913 int addr = INSN_ADDRESSES (INSN_UID (insn));
5914 rtx far_label = 0;
5915 int dest_uid = 0;
5916 struct far_branch *bp;
5918 if (type == TYPE_JUMP)
5920 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5921 dest_uid = get_dest_uid (far_label, max_uid);
5922 if (! dest_uid)
5924 /* Parse errors can lead to labels outside
5925 the insn stream. */
5926 if (! NEXT_INSN (far_label))
5927 continue;
5929 if (! optimize)
5931 JUMP_LABEL (insn) = far_label;
5932 LABEL_NUSES (far_label)++;
5934 redirect_jump (insn, NULL_RTX, 1);
5935 far_label = 0;
5938 bp = uid_branch[dest_uid];
5939 if (! bp)
5941 bp = (struct far_branch *) alloca (sizeof *bp);
5942 uid_branch[dest_uid] = bp;
5943 bp->prev = far_branch_list;
5944 far_branch_list = bp;
5945 bp->near_label = 0;
5946 bp->far_label = far_label;
5947 if (far_label)
5948 LABEL_NUSES (far_label)++;
5950 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5951 if (addr - bp->address <= CONDJUMP_MAX)
5952 emit_label_after (bp->near_label, PREV_INSN (insn));
5953 else
5955 gen_far_branch (bp);
5956 bp->near_label = 0;
5958 else
5959 bp->near_label = 0;
5960 bp->address = addr;
5961 bp->insert_place = insn;
5962 if (! far_label)
5963 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5964 else
5965 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5968 /* Generate all pending far branches,
5969 and free our references to the far labels. */
5970 while (far_branch_list)
5972 if (far_branch_list->near_label
5973 && ! NEXT_INSN (far_branch_list->near_label))
5974 gen_far_branch (far_branch_list);
5975 if (optimize
5976 && far_branch_list->far_label
5977 && ! --LABEL_NUSES (far_branch_list->far_label))
5978 delete_insn (far_branch_list->far_label);
5979 far_branch_list = far_branch_list->prev;
5982 /* Instruction length information is no longer valid due to the new
5983 instructions that have been generated. */
5984 init_insn_lengths ();
5987 /* Dump out instruction addresses, which is useful for debugging the
5988 constant pool table stuff.
5990 If relaxing, output the label and pseudo-ops used to link together
5991 calls and the instruction which set the registers. */
5993 /* ??? The addresses printed by this routine for insns are nonsense for
5994 insns which are inside of a sequence where none of the inner insns have
5995 variable length. This is because the second pass of shorten_branches
5996 does not bother to update them. */
5998 void
5999 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6000 int noperands ATTRIBUTE_UNUSED)
6002 if (TARGET_DUMPISIZE)
6003 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6005 if (TARGET_RELAX)
6007 rtx note;
6009 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6010 if (note)
6012 rtx pattern;
6014 pattern = PATTERN (insn);
6015 if (GET_CODE (pattern) == PARALLEL)
6016 pattern = XVECEXP (pattern, 0, 0);
6017 switch (GET_CODE (pattern))
6019 case SET:
6020 if (GET_CODE (SET_SRC (pattern)) != CALL
6021 && get_attr_type (insn) != TYPE_SFUNC)
6023 targetm.asm_out.internal_label
6024 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6025 break;
6027 /* else FALLTHROUGH */
6028 case CALL:
6029 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6030 CODE_LABEL_NUMBER (XEXP (note, 0)));
6031 break;
6033 default:
6034 gcc_unreachable ();
6040 /* Dump out any constants accumulated in the final pass. These will
6041 only be labels. */
6043 const char *
6044 output_jump_label_table (void)
6046 int i;
6048 if (pool_size)
6050 fprintf (asm_out_file, "\t.align 2\n");
6051 for (i = 0; i < pool_size; i++)
6053 pool_node *p = &pool_vector[i];
6055 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6056 CODE_LABEL_NUMBER (p->label));
6057 output_asm_insn (".long %O0", &p->value);
6059 pool_size = 0;
6062 return "";
6065 /* A full frame looks like:
6067 arg-5
6068 arg-4
6069 [ if current_function_anonymous_args
6070 arg-3
6071 arg-2
6072 arg-1
6073 arg-0 ]
6074 saved-fp
6075 saved-r10
6076 saved-r11
6077 saved-r12
6078 saved-pr
6079 local-n
6081 local-1
6082 local-0 <- fp points here. */
6084 /* Number of bytes pushed for anonymous args, used to pass information
6085 between expand_prologue and expand_epilogue. */
6087 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6088 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6089 for an epilogue and a negative value means that it's for a sibcall
6090 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6091 all the registers that are about to be restored, and hence dead. */
6093 static void
6094 output_stack_adjust (int size, rtx reg, int epilogue_p,
6095 HARD_REG_SET *live_regs_mask, bool frame_p)
6097 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6098 if (size)
6100 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6102 /* This test is bogus, as output_stack_adjust is used to re-align the
6103 stack. */
6104 #if 0
6105 gcc_assert (!(size % align));
6106 #endif
6108 if (CONST_OK_FOR_ADD (size))
6109 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6110 /* Try to do it with two partial adjustments; however, we must make
6111 sure that the stack is properly aligned at all times, in case
6112 an interrupt occurs between the two partial adjustments. */
6113 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6114 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6116 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6117 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6119 else
6121 rtx const_reg;
6122 rtx insn;
6123 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6124 int i;
6126 /* If TEMP is invalid, we could temporarily save a general
6127 register to MACL. However, there is currently no need
6128 to handle this case, so just die when we see it. */
6129 if (epilogue_p < 0
6130 || current_function_interrupt
6131 || ! call_really_used_regs[temp] || fixed_regs[temp])
6132 temp = -1;
6133 if (temp < 0 && ! current_function_interrupt
6134 && (TARGET_SHMEDIA || epilogue_p >= 0))
6136 HARD_REG_SET temps;
6137 COPY_HARD_REG_SET (temps, call_used_reg_set);
6138 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6139 if (epilogue_p > 0)
6141 int nreg = 0;
6142 if (crtl->return_rtx)
6144 enum machine_mode mode;
6145 mode = GET_MODE (crtl->return_rtx);
6146 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6147 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6149 for (i = 0; i < nreg; i++)
6150 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6151 if (crtl->calls_eh_return)
6153 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6154 for (i = 0; i <= 3; i++)
6155 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6158 if (TARGET_SHMEDIA && epilogue_p < 0)
6159 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6160 CLEAR_HARD_REG_BIT (temps, i);
6161 if (epilogue_p <= 0)
6163 for (i = FIRST_PARM_REG;
6164 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6165 CLEAR_HARD_REG_BIT (temps, i);
6166 if (cfun->static_chain_decl != NULL)
6167 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6169 temp = scavenge_reg (&temps);
6171 if (temp < 0 && live_regs_mask)
6173 HARD_REG_SET temps;
6175 COPY_HARD_REG_SET (temps, *live_regs_mask);
6176 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6177 temp = scavenge_reg (&temps);
6179 if (temp < 0)
6181 rtx adj_reg, tmp_reg, mem;
6183 /* If we reached here, the most likely case is the (sibcall)
6184 epilogue for non SHmedia. Put a special push/pop sequence
6185 for such case as the last resort. This looks lengthy but
6186 would not be problem because it seems to be very
6187 rare. */
6189 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6192 /* ??? There is still the slight possibility that r4 or
6193 r5 have been reserved as fixed registers or assigned
6194 as global registers, and they change during an
6195 interrupt. There are possible ways to handle this:
6197 - If we are adjusting the frame pointer (r14), we can do
6198 with a single temp register and an ordinary push / pop
6199 on the stack.
6200 - Grab any call-used or call-saved registers (i.e. not
6201 fixed or globals) for the temps we need. We might
6202 also grab r14 if we are adjusting the stack pointer.
6203 If we can't find enough available registers, issue
6204 a diagnostic and die - the user must have reserved
6205 way too many registers.
6206 But since all this is rather unlikely to happen and
6207 would require extra testing, we just die if r4 / r5
6208 are not available. */
6209 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6210 && !global_regs[4] && !global_regs[5]);
6212 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6213 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6214 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6215 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6216 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6217 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6218 emit_move_insn (mem, tmp_reg);
6219 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6220 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6221 emit_move_insn (mem, tmp_reg);
6222 emit_move_insn (reg, adj_reg);
6223 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6224 emit_move_insn (adj_reg, mem);
6225 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6226 emit_move_insn (tmp_reg, mem);
6227 /* Tell flow the insns that pop r4/r5 aren't dead. */
6228 emit_use (tmp_reg);
6229 emit_use (adj_reg);
6230 return;
6232 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6234 /* If SIZE is negative, subtract the positive value.
6235 This sometimes allows a constant pool entry to be shared
6236 between prologue and epilogue code. */
6237 if (size < 0)
6239 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6240 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6242 else
6244 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6245 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6247 if (! epilogue_p)
6248 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6249 gen_rtx_SET (VOIDmode, reg,
6250 gen_rtx_PLUS (SImode, reg,
6251 GEN_INT (size))));
6256 static rtx
6257 frame_insn (rtx x)
6259 x = emit_insn (x);
6260 RTX_FRAME_RELATED_P (x) = 1;
6261 return x;
6264 /* Output RTL to push register RN onto the stack. */
6266 static rtx
6267 push (int rn)
6269 rtx x;
6270 if (rn == FPUL_REG)
6271 x = gen_push_fpul ();
6272 else if (rn == FPSCR_REG)
6273 x = gen_push_fpscr ();
6274 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6275 && FP_OR_XD_REGISTER_P (rn))
6277 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6278 return NULL_RTX;
6279 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6281 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6282 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6283 else
6284 x = gen_push (gen_rtx_REG (SImode, rn));
6286 x = frame_insn (x);
6287 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6288 return x;
6291 /* Output RTL to pop register RN from the stack. */
6293 static void
6294 pop (int rn)
6296 rtx x;
6297 if (rn == FPUL_REG)
6298 x = gen_pop_fpul ();
6299 else if (rn == FPSCR_REG)
6300 x = gen_pop_fpscr ();
6301 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6302 && FP_OR_XD_REGISTER_P (rn))
6304 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6305 return;
6306 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6308 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6309 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6310 else
6311 x = gen_pop (gen_rtx_REG (SImode, rn));
6313 x = emit_insn (x);
6314 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6317 /* Generate code to push the regs specified in the mask. */
6319 static void
6320 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6322 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6323 int skip_fpscr = 0;
6325 /* Push PR last; this gives better latencies after the prologue, and
6326 candidates for the return delay slot when there are no general
6327 registers pushed. */
6328 for (; i < FIRST_PSEUDO_REGISTER; i++)
6330 /* If this is an interrupt handler, and the SZ bit varies,
6331 and we have to push any floating point register, we need
6332 to switch to the correct precision first. */
6333 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6334 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6336 HARD_REG_SET unsaved;
6338 push (FPSCR_REG);
6339 COMPL_HARD_REG_SET (unsaved, *mask);
6340 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6341 skip_fpscr = 1;
6343 if (i != PR_REG
6344 && (i != FPSCR_REG || ! skip_fpscr)
6345 && TEST_HARD_REG_BIT (*mask, i))
6347 /* If the ISR has RESBANK attribute assigned, don't push any of
6348 the following registers - R0-R14, MACH, MACL and GBR. */
6349 if (! (sh_cfun_resbank_handler_p ()
6350 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6351 || i == MACH_REG
6352 || i == MACL_REG
6353 || i == GBR_REG)))
6354 push (i);
6358 /* Push banked registers last to improve delay slot opportunities. */
6359 if (interrupt_handler)
6360 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6361 if (TEST_HARD_REG_BIT (*mask, i))
6362 push (i);
6364 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6365 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6366 push (PR_REG);
6369 /* Calculate how much extra space is needed to save all callee-saved
6370 target registers.
6371 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6373 static int
6374 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6376 int reg;
6377 int stack_space = 0;
6378 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6380 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6381 if ((! call_really_used_regs[reg] || interrupt_handler)
6382 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6383 /* Leave space to save this target register on the stack,
6384 in case target register allocation wants to use it. */
6385 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6386 return stack_space;
6389 /* Decide whether we should reserve space for callee-save target registers,
6390 in case target register allocation wants to use them. REGS_SAVED is
6391 the space, in bytes, that is already required for register saves.
6392 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6394 static int
6395 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6396 HARD_REG_SET *live_regs_mask)
6398 if (optimize_size)
6399 return 0;
6400 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6403 /* Decide how much space to reserve for callee-save target registers
6404 in case target register allocation wants to use them.
6405 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6407 static int
6408 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6410 if (shmedia_space_reserved_for_target_registers)
6411 return shmedia_target_regs_stack_space (live_regs_mask);
6412 else
6413 return 0;
6416 /* Work out the registers which need to be saved, both as a mask and a
6417 count of saved words. Return the count.
6419 If doing a pragma interrupt function, then push all regs used by the
6420 function, and if we call another function (we can tell by looking at PR),
6421 make sure that all the regs it clobbers are safe too. */
6423 static int
6424 calc_live_regs (HARD_REG_SET *live_regs_mask)
6426 unsigned int reg;
6427 int count;
6428 tree attrs;
6429 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6430 bool nosave_low_regs;
6431 int pr_live, has_call;
6433 attrs = DECL_ATTRIBUTES (current_function_decl);
6434 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6435 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6436 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6437 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6439 CLEAR_HARD_REG_SET (*live_regs_mask);
6440 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6441 && df_regs_ever_live_p (FPSCR_REG))
6442 target_flags &= ~MASK_FPU_SINGLE;
6443 /* If we can save a lot of saves by switching to double mode, do that. */
6444 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6445 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6446 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6447 && (! call_really_used_regs[reg]
6448 || interrupt_handler)
6449 && ++count > 2)
6451 target_flags &= ~MASK_FPU_SINGLE;
6452 break;
6454 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6455 knows how to use it. That means the pseudo originally allocated for
6456 the initial value can become the PR_MEDIA_REG hard register, as seen for
6457 execute/20010122-1.c:test9. */
6458 if (TARGET_SHMEDIA)
6459 /* ??? this function is called from initial_elimination_offset, hence we
6460 can't use the result of sh_media_register_for_return here. */
6461 pr_live = sh_pr_n_sets ();
6462 else
6464 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6465 pr_live = (pr_initial
6466 ? (!REG_P (pr_initial)
6467 || REGNO (pr_initial) != (PR_REG))
6468 : df_regs_ever_live_p (PR_REG));
6469 /* For Shcompact, if not optimizing, we end up with a memory reference
6470 using the return address pointer for __builtin_return_address even
6471 though there is no actual need to put the PR register on the stack. */
6472 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6474 /* Force PR to be live if the prologue has to call the SHmedia
6475 argument decoder or register saver. */
6476 if (TARGET_SHCOMPACT
6477 && ((crtl->args.info.call_cookie
6478 & ~ CALL_COOKIE_RET_TRAMP (1))
6479 || crtl->saves_all_registers))
6480 pr_live = 1;
6481 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6482 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6484 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6485 ? pr_live
6486 : interrupt_handler
6487 ? (/* Need to save all the regs ever live. */
6488 (df_regs_ever_live_p (reg)
6489 || (call_really_used_regs[reg]
6490 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6491 || reg == PIC_OFFSET_TABLE_REGNUM)
6492 && has_call)
6493 || (TARGET_SHMEDIA && has_call
6494 && REGISTER_NATURAL_MODE (reg) == SImode
6495 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6496 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6497 && reg != RETURN_ADDRESS_POINTER_REGNUM
6498 && reg != T_REG && reg != GBR_REG
6499 /* Push fpscr only on targets which have FPU */
6500 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6501 : (/* Only push those regs which are used and need to be saved. */
6502 (TARGET_SHCOMPACT
6503 && flag_pic
6504 && crtl->args.info.call_cookie
6505 && reg == PIC_OFFSET_TABLE_REGNUM)
6506 || (df_regs_ever_live_p (reg)
6507 && ((!call_really_used_regs[reg]
6508 && !(reg != PIC_OFFSET_TABLE_REGNUM
6509 && fixed_regs[reg] && call_used_regs[reg]))
6510 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6511 || (crtl->calls_eh_return
6512 && (reg == EH_RETURN_DATA_REGNO (0)
6513 || reg == EH_RETURN_DATA_REGNO (1)
6514 || reg == EH_RETURN_DATA_REGNO (2)
6515 || reg == EH_RETURN_DATA_REGNO (3)))
6516 || ((reg == MACL_REG || reg == MACH_REG)
6517 && df_regs_ever_live_p (reg)
6518 && sh_cfun_attr_renesas_p ())
6521 SET_HARD_REG_BIT (*live_regs_mask, reg);
6522 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6524 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6525 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6527 if (FP_REGISTER_P (reg))
6529 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6531 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6532 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6535 else if (XD_REGISTER_P (reg))
6537 /* Must switch to double mode to access these registers. */
6538 target_flags &= ~MASK_FPU_SINGLE;
6542 if (nosave_low_regs && reg == R8_REG)
6543 break;
6545 /* If we have a target register optimization pass after prologue / epilogue
6546 threading, we need to assume all target registers will be live even if
6547 they aren't now. */
6548 if (flag_branch_target_load_optimize2
6549 && TARGET_SAVE_ALL_TARGET_REGS
6550 && shmedia_space_reserved_for_target_registers)
6551 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6552 if ((! call_really_used_regs[reg] || interrupt_handler)
6553 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6555 SET_HARD_REG_BIT (*live_regs_mask, reg);
6556 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6558 /* If this is an interrupt handler, we don't have any call-clobbered
6559 registers we can conveniently use for target register save/restore.
6560 Make sure we save at least one general purpose register when we need
6561 to save target registers. */
6562 if (interrupt_handler
6563 && hard_reg_set_intersect_p (*live_regs_mask,
6564 reg_class_contents[TARGET_REGS])
6565 && ! hard_reg_set_intersect_p (*live_regs_mask,
6566 reg_class_contents[GENERAL_REGS]))
6568 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6569 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6572 return count;
6575 /* Code to generate prologue and epilogue sequences */
6577 /* PUSHED is the number of bytes that are being pushed on the
6578 stack for register saves. Return the frame size, padded
6579 appropriately so that the stack stays properly aligned. */
6580 static HOST_WIDE_INT
6581 rounded_frame_size (int pushed)
6583 HOST_WIDE_INT size = get_frame_size ();
6584 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6586 return ((size + pushed + align - 1) & -align) - pushed;
6589 /* Choose a call-clobbered target-branch register that remains
6590 unchanged along the whole function. We set it up as the return
6591 value in the prologue. */
6593 sh_media_register_for_return (void)
6595 int regno;
6596 int tr0_used;
6598 if (! current_function_is_leaf)
6599 return -1;
6600 if (lookup_attribute ("interrupt_handler",
6601 DECL_ATTRIBUTES (current_function_decl)))
6602 return -1;
6603 if (sh_cfun_interrupt_handler_p ())
6604 return -1;
6606 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6608 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6609 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6610 return regno;
6612 return -1;
6615 /* The maximum registers we need to save are:
6616 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6617 - 32 floating point registers (for each pair, we save none,
6618 one single precision value, or a double precision value).
6619 - 8 target registers
6620 - add 1 entry for a delimiter. */
6621 #define MAX_SAVED_REGS (62+32+8)
6623 typedef struct save_entry_s
6625 unsigned char reg;
6626 unsigned char mode;
6627 short offset;
6628 } save_entry;
6630 #define MAX_TEMPS 4
6632 /* There will be a delimiter entry with VOIDmode both at the start and the
6633 end of a filled in schedule. The end delimiter has the offset of the
6634 save with the smallest (i.e. most negative) offset. */
6635 typedef struct save_schedule_s
6637 save_entry entries[MAX_SAVED_REGS + 2];
6638 int temps[MAX_TEMPS+1];
6639 } save_schedule;
6641 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6642 use reverse order. Returns the last entry written to (not counting
6643 the delimiter). OFFSET_BASE is a number to be added to all offset
6644 entries. */
6646 static save_entry *
6647 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6648 int offset_base)
6650 int align, i;
6651 save_entry *entry = schedule->entries;
6652 int tmpx = 0;
6653 int offset;
6655 if (! current_function_interrupt)
6656 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6657 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6658 && ! FUNCTION_ARG_REGNO_P (i)
6659 && i != FIRST_RET_REG
6660 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6661 && ! (crtl->calls_eh_return
6662 && (i == EH_RETURN_STACKADJ_REGNO
6663 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6664 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6665 schedule->temps[tmpx++] = i;
6666 entry->reg = -1;
6667 entry->mode = VOIDmode;
6668 entry->offset = offset_base;
6669 entry++;
6670 /* We loop twice: first, we save 8-byte aligned registers in the
6671 higher addresses, that are known to be aligned. Then, we
6672 proceed to saving 32-bit registers that don't need 8-byte
6673 alignment.
6674 If this is an interrupt function, all registers that need saving
6675 need to be saved in full. moreover, we need to postpone saving
6676 target registers till we have saved some general purpose registers
6677 we can then use as scratch registers. */
6678 offset = offset_base;
6679 for (align = 1; align >= 0; align--)
6681 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6682 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6684 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6685 int reg = i;
6687 if (current_function_interrupt)
6689 if (TARGET_REGISTER_P (i))
6690 continue;
6691 if (GENERAL_REGISTER_P (i))
6692 mode = DImode;
6694 if (mode == SFmode && (i % 2) == 1
6695 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6696 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6698 mode = DFmode;
6699 i--;
6700 reg--;
6703 /* If we're doing the aligned pass and this is not aligned,
6704 or we're doing the unaligned pass and this is aligned,
6705 skip it. */
6706 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6707 != align)
6708 continue;
6710 if (current_function_interrupt
6711 && GENERAL_REGISTER_P (i)
6712 && tmpx < MAX_TEMPS)
6713 schedule->temps[tmpx++] = i;
6715 offset -= GET_MODE_SIZE (mode);
6716 entry->reg = i;
6717 entry->mode = mode;
6718 entry->offset = offset;
6719 entry++;
6721 if (align && current_function_interrupt)
6722 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6723 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6725 offset -= GET_MODE_SIZE (DImode);
6726 entry->reg = i;
6727 entry->mode = DImode;
6728 entry->offset = offset;
6729 entry++;
6732 entry->reg = -1;
6733 entry->mode = VOIDmode;
6734 entry->offset = offset;
6735 schedule->temps[tmpx] = -1;
6736 return entry - 1;
6739 void
6740 sh_expand_prologue (void)
6742 HARD_REG_SET live_regs_mask;
6743 int d, i;
6744 int d_rounding = 0;
6745 int save_flags = target_flags;
6746 int pretend_args;
6747 tree sp_switch_attr
6748 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6750 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6752 /* We have pretend args if we had an object sent partially in registers
6753 and partially on the stack, e.g. a large structure. */
6754 pretend_args = crtl->args.pretend_args_size;
6755 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6756 && (NPARM_REGS(SImode)
6757 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6758 pretend_args = 0;
6759 /* Dwarf2 module doesn't expect frame related insns here. */
6760 output_stack_adjust (-pretend_args
6761 - crtl->args.info.stack_regs * 8,
6762 stack_pointer_rtx, 0, NULL, false);
6764 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6765 /* We're going to use the PIC register to load the address of the
6766 incoming-argument decoder and/or of the return trampoline from
6767 the GOT, so make sure the PIC register is preserved and
6768 initialized. */
6769 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6771 if (TARGET_SHCOMPACT
6772 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6774 int reg;
6776 /* First, make all registers with incoming arguments that will
6777 be pushed onto the stack live, so that register renaming
6778 doesn't overwrite them. */
6779 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6780 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6781 >= NPARM_REGS (SImode) - reg)
6782 for (; reg < NPARM_REGS (SImode); reg++)
6783 emit_insn (gen_shcompact_preserve_incoming_args
6784 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6785 else if (CALL_COOKIE_INT_REG_GET
6786 (crtl->args.info.call_cookie, reg) == 1)
6787 emit_insn (gen_shcompact_preserve_incoming_args
6788 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6790 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6791 stack_pointer_rtx);
6792 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6793 GEN_INT (crtl->args.info.call_cookie));
6794 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6795 gen_rtx_REG (SImode, R0_REG));
6797 else if (TARGET_SHMEDIA)
6799 int tr = sh_media_register_for_return ();
6801 if (tr >= 0)
6802 emit_move_insn (gen_rtx_REG (DImode, tr),
6803 gen_rtx_REG (DImode, PR_MEDIA_REG));
6806 /* Emit the code for SETUP_VARARGS. */
6807 if (cfun->stdarg)
6809 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6811 /* Push arg regs as if they'd been provided by caller in stack. */
6812 for (i = 0; i < NPARM_REGS(SImode); i++)
6814 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6815 rtx insn;
6817 if (i >= (NPARM_REGS(SImode)
6818 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6820 break;
6821 insn = push (rn);
6826 /* If we're supposed to switch stacks at function entry, do so now. */
6827 if (sp_switch_attr)
6829 rtx lab, newsrc;
6830 /* The argument specifies a variable holding the address of the
6831 stack the interrupt function should switch to/from at entry/exit. */
6832 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6833 const char *s
6834 = ggc_strdup (TREE_STRING_POINTER (arg));
6835 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6837 lab = add_constant (sp_switch, SImode, 0);
6838 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6839 newsrc = gen_const_mem (SImode, newsrc);
6841 emit_insn (gen_sp_switch_1 (newsrc));
6844 d = calc_live_regs (&live_regs_mask);
6845 /* ??? Maybe we could save some switching if we can move a mode switch
6846 that already happens to be at the function start into the prologue. */
6847 if (target_flags != save_flags && ! current_function_interrupt)
6848 emit_insn (gen_toggle_sz ());
6850 if (TARGET_SH5)
6852 int offset_base, offset;
6853 rtx r0 = NULL_RTX;
6854 int offset_in_r0 = -1;
6855 int sp_in_r0 = 0;
6856 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6857 int total_size, save_size;
6858 save_schedule schedule;
6859 save_entry *entry;
6860 int *tmp_pnt;
6862 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6863 && ! current_function_interrupt)
6864 r0 = gen_rtx_REG (Pmode, R0_REG);
6866 /* D is the actual number of bytes that we need for saving registers,
6867 however, in initial_elimination_offset we have committed to using
6868 an additional TREGS_SPACE amount of bytes - in order to keep both
6869 addresses to arguments supplied by the caller and local variables
6870 valid, we must keep this gap. Place it between the incoming
6871 arguments and the actually saved registers in a bid to optimize
6872 locality of reference. */
6873 total_size = d + tregs_space;
6874 total_size += rounded_frame_size (total_size);
6875 save_size = total_size - rounded_frame_size (d);
6876 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6877 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6878 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6880 /* If adjusting the stack in a single step costs nothing extra, do so.
6881 I.e. either if a single addi is enough, or we need a movi anyway,
6882 and we don't exceed the maximum offset range (the test for the
6883 latter is conservative for simplicity). */
6884 if (TARGET_SHMEDIA
6885 && (CONST_OK_FOR_I10 (-total_size)
6886 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6887 && total_size <= 2044)))
6888 d_rounding = total_size - save_size;
6890 offset_base = d + d_rounding;
6892 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6893 0, NULL, true);
6895 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6896 tmp_pnt = schedule.temps;
6897 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6899 enum machine_mode mode = (enum machine_mode) entry->mode;
6900 unsigned int reg = entry->reg;
6901 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6902 rtx orig_reg_rtx;
6904 offset = entry->offset;
6906 reg_rtx = gen_rtx_REG (mode, reg);
6908 mem_rtx = gen_frame_mem (mode,
6909 gen_rtx_PLUS (Pmode,
6910 stack_pointer_rtx,
6911 GEN_INT (offset)));
6913 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6915 gcc_assert (r0);
6916 mem_rtx = NULL_RTX;
6919 if (HAVE_PRE_DECREMENT
6920 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6921 || mem_rtx == NULL_RTX
6922 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6924 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6926 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6927 pre_dec = NULL_RTX;
6928 else
6930 mem_rtx = NULL_RTX;
6931 offset += GET_MODE_SIZE (mode);
6935 if (mem_rtx != NULL_RTX)
6936 goto addr_ok;
6938 if (offset_in_r0 == -1)
6940 emit_move_insn (r0, GEN_INT (offset));
6941 offset_in_r0 = offset;
6943 else if (offset != offset_in_r0)
6945 emit_move_insn (r0,
6946 gen_rtx_PLUS
6947 (Pmode, r0,
6948 GEN_INT (offset - offset_in_r0)));
6949 offset_in_r0 += offset - offset_in_r0;
6952 if (pre_dec != NULL_RTX)
6954 if (! sp_in_r0)
6956 emit_move_insn (r0,
6957 gen_rtx_PLUS
6958 (Pmode, r0, stack_pointer_rtx));
6959 sp_in_r0 = 1;
6962 offset -= GET_MODE_SIZE (mode);
6963 offset_in_r0 -= GET_MODE_SIZE (mode);
6965 mem_rtx = pre_dec;
6967 else if (sp_in_r0)
6968 mem_rtx = gen_frame_mem (mode, r0);
6969 else
6970 mem_rtx = gen_frame_mem (mode,
6971 gen_rtx_PLUS (Pmode,
6972 stack_pointer_rtx,
6973 r0));
6975 /* We must not use an r0-based address for target-branch
6976 registers or for special registers without pre-dec
6977 memory addresses, since we store their values in r0
6978 first. */
6979 gcc_assert (!TARGET_REGISTER_P (reg)
6980 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6981 || mem_rtx == pre_dec));
6983 addr_ok:
6984 orig_reg_rtx = reg_rtx;
6985 if (TARGET_REGISTER_P (reg)
6986 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6987 && mem_rtx != pre_dec))
6989 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6991 emit_move_insn (tmp_reg, reg_rtx);
6993 if (REGNO (tmp_reg) == R0_REG)
6995 offset_in_r0 = -1;
6996 sp_in_r0 = 0;
6997 gcc_assert (!refers_to_regno_p
6998 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7001 if (*++tmp_pnt <= 0)
7002 tmp_pnt = schedule.temps;
7004 reg_rtx = tmp_reg;
7007 rtx insn;
7009 /* Mark as interesting for dwarf cfi generator */
7010 insn = emit_move_insn (mem_rtx, reg_rtx);
7011 RTX_FRAME_RELATED_P (insn) = 1;
7012 /* If we use an intermediate register for the save, we can't
7013 describe this exactly in cfi as a copy of the to-be-saved
7014 register into the temporary register and then the temporary
7015 register on the stack, because the temporary register can
7016 have a different natural size than the to-be-saved register.
7017 Thus, we gloss over the intermediate copy and pretend we do
7018 a direct save from the to-be-saved register. */
7019 if (REGNO (reg_rtx) != reg)
7021 rtx set;
7023 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7024 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7027 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7029 rtx reg_rtx = gen_rtx_REG (mode, reg);
7030 rtx set;
7031 rtx mem_rtx = gen_frame_mem (mode,
7032 gen_rtx_PLUS (Pmode,
7033 stack_pointer_rtx,
7034 GEN_INT (offset)));
7036 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7037 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7042 gcc_assert (entry->offset == d_rounding);
7044 else
7045 push_regs (&live_regs_mask, current_function_interrupt);
7047 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7048 emit_insn (gen_GOTaddr2picreg ());
7050 if (SHMEDIA_REGS_STACK_ADJUST ())
7052 /* This must NOT go through the PLT, otherwise mach and macl
7053 may be clobbered. */
7054 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7055 (TARGET_FPU_ANY
7056 ? "__GCC_push_shmedia_regs"
7057 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7058 emit_insn (gen_shmedia_save_restore_regs_compact
7059 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7062 if (target_flags != save_flags && ! current_function_interrupt)
7063 emit_insn (gen_toggle_sz ());
7065 target_flags = save_flags;
7067 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7068 stack_pointer_rtx, 0, NULL, true);
7070 if (frame_pointer_needed)
7071 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7073 if (TARGET_SHCOMPACT
7074 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7076 /* This must NOT go through the PLT, otherwise mach and macl
7077 may be clobbered. */
7078 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7079 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7080 emit_insn (gen_shcompact_incoming_args ());
7084 void
7085 sh_expand_epilogue (bool sibcall_p)
7087 HARD_REG_SET live_regs_mask;
7088 int d, i;
7089 int d_rounding = 0;
7091 int save_flags = target_flags;
7092 int frame_size, save_size;
7093 int fpscr_deferred = 0;
7094 int e = sibcall_p ? -1 : 1;
7096 d = calc_live_regs (&live_regs_mask);
7098 save_size = d;
7099 frame_size = rounded_frame_size (d);
7101 if (TARGET_SH5)
7103 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7104 int total_size;
7105 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7106 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7107 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7109 total_size = d + tregs_space;
7110 total_size += rounded_frame_size (total_size);
7111 save_size = total_size - frame_size;
7113 /* If adjusting the stack in a single step costs nothing extra, do so.
7114 I.e. either if a single addi is enough, or we need a movi anyway,
7115 and we don't exceed the maximum offset range (the test for the
7116 latter is conservative for simplicity). */
7117 if (TARGET_SHMEDIA
7118 && ! frame_pointer_needed
7119 && (CONST_OK_FOR_I10 (total_size)
7120 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7121 && total_size <= 2044)))
7122 d_rounding = frame_size;
7124 frame_size -= d_rounding;
7127 if (frame_pointer_needed)
7129 /* We must avoid scheduling the epilogue with previous basic blocks.
7130 See PR/18032 and PR/40313. */
7131 emit_insn (gen_blockage ());
7132 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7133 &live_regs_mask, false);
7135 /* We must avoid moving the stack pointer adjustment past code
7136 which reads from the local frame, else an interrupt could
7137 occur after the SP adjustment and clobber data in the local
7138 frame. */
7139 emit_insn (gen_blockage ());
7140 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7142 else if (frame_size)
7144 /* We must avoid moving the stack pointer adjustment past code
7145 which reads from the local frame, else an interrupt could
7146 occur after the SP adjustment and clobber data in the local
7147 frame. */
7148 emit_insn (gen_blockage ());
7149 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7150 &live_regs_mask, false);
7153 if (SHMEDIA_REGS_STACK_ADJUST ())
7155 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7156 (TARGET_FPU_ANY
7157 ? "__GCC_pop_shmedia_regs"
7158 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7159 /* This must NOT go through the PLT, otherwise mach and macl
7160 may be clobbered. */
7161 emit_insn (gen_shmedia_save_restore_regs_compact
7162 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7165 /* Pop all the registers. */
7167 if (target_flags != save_flags && ! current_function_interrupt)
7168 emit_insn (gen_toggle_sz ());
7169 if (TARGET_SH5)
7171 int offset_base, offset;
7172 int offset_in_r0 = -1;
7173 int sp_in_r0 = 0;
7174 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7175 save_schedule schedule;
7176 save_entry *entry;
7177 int *tmp_pnt;
7179 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7180 offset_base = -entry[1].offset + d_rounding;
7181 tmp_pnt = schedule.temps;
7182 for (; entry->mode != VOIDmode; entry--)
7184 enum machine_mode mode = (enum machine_mode) entry->mode;
7185 int reg = entry->reg;
7186 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7188 offset = offset_base + entry->offset;
7189 reg_rtx = gen_rtx_REG (mode, reg);
7191 mem_rtx = gen_frame_mem (mode,
7192 gen_rtx_PLUS (Pmode,
7193 stack_pointer_rtx,
7194 GEN_INT (offset)));
7196 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7197 mem_rtx = NULL_RTX;
7199 if (HAVE_POST_INCREMENT
7200 && (offset == offset_in_r0
7201 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7202 && mem_rtx == NULL_RTX)
7203 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7205 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7207 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7208 post_inc = NULL_RTX;
7209 else
7210 mem_rtx = NULL_RTX;
7213 if (mem_rtx != NULL_RTX)
7214 goto addr_ok;
7216 if (offset_in_r0 == -1)
7218 emit_move_insn (r0, GEN_INT (offset));
7219 offset_in_r0 = offset;
7221 else if (offset != offset_in_r0)
7223 emit_move_insn (r0,
7224 gen_rtx_PLUS
7225 (Pmode, r0,
7226 GEN_INT (offset - offset_in_r0)));
7227 offset_in_r0 += offset - offset_in_r0;
7230 if (post_inc != NULL_RTX)
7232 if (! sp_in_r0)
7234 emit_move_insn (r0,
7235 gen_rtx_PLUS
7236 (Pmode, r0, stack_pointer_rtx));
7237 sp_in_r0 = 1;
7240 mem_rtx = post_inc;
7242 offset_in_r0 += GET_MODE_SIZE (mode);
7244 else if (sp_in_r0)
7245 mem_rtx = gen_frame_mem (mode, r0);
7246 else
7247 mem_rtx = gen_frame_mem (mode,
7248 gen_rtx_PLUS (Pmode,
7249 stack_pointer_rtx,
7250 r0));
7252 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7253 || mem_rtx == post_inc);
7255 addr_ok:
7256 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7257 && mem_rtx != post_inc)
7259 insn = emit_move_insn (r0, mem_rtx);
7260 mem_rtx = r0;
7262 else if (TARGET_REGISTER_P (reg))
7264 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7266 /* Give the scheduler a bit of freedom by using up to
7267 MAX_TEMPS registers in a round-robin fashion. */
7268 insn = emit_move_insn (tmp_reg, mem_rtx);
7269 mem_rtx = tmp_reg;
7270 if (*++tmp_pnt < 0)
7271 tmp_pnt = schedule.temps;
7274 insn = emit_move_insn (reg_rtx, mem_rtx);
7277 gcc_assert (entry->offset + offset_base == d + d_rounding);
7279 else /* ! TARGET_SH5 */
7281 int last_reg;
7283 save_size = 0;
7284 /* For an ISR with RESBANK attribute assigned, don't pop PR
7285 register. */
7286 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7287 && !sh_cfun_resbank_handler_p ())
7289 if (!frame_pointer_needed)
7290 emit_insn (gen_blockage ());
7291 pop (PR_REG);
7294 /* Banked registers are popped first to avoid being scheduled in the
7295 delay slot. RTE switches banks before the ds instruction. */
7296 if (current_function_interrupt)
7298 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7299 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7300 pop (i);
7302 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7304 else
7305 last_reg = FIRST_PSEUDO_REGISTER;
7307 for (i = 0; i < last_reg; i++)
7309 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7311 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7312 && hard_reg_set_intersect_p (live_regs_mask,
7313 reg_class_contents[DF_REGS]))
7314 fpscr_deferred = 1;
7315 /* For an ISR with RESBANK attribute assigned, don't pop
7316 following registers, R0-R14, MACH, MACL and GBR. */
7317 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7318 && ! (sh_cfun_resbank_handler_p ()
7319 && ((j >= FIRST_GENERAL_REG
7320 && j < LAST_GENERAL_REG)
7321 || j == MACH_REG
7322 || j == MACL_REG
7323 || j == GBR_REG)))
7324 pop (j);
7326 if (j == FIRST_FP_REG && fpscr_deferred)
7327 pop (FPSCR_REG);
7330 if (target_flags != save_flags && ! current_function_interrupt)
7331 emit_insn (gen_toggle_sz ());
7332 target_flags = save_flags;
7334 output_stack_adjust (crtl->args.pretend_args_size
7335 + save_size + d_rounding
7336 + crtl->args.info.stack_regs * 8,
7337 stack_pointer_rtx, e, NULL, false);
7339 if (crtl->calls_eh_return)
7340 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7341 EH_RETURN_STACKADJ_RTX));
7343 /* Switch back to the normal stack if necessary. */
7344 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7345 emit_insn (gen_sp_switch_2 ());
7347 /* Tell flow the insn that pops PR isn't dead. */
7348 /* PR_REG will never be live in SHmedia mode, and we don't need to
7349 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7350 by the return pattern. */
7351 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7352 emit_use (gen_rtx_REG (SImode, PR_REG));
7355 static int sh_need_epilogue_known = 0;
7358 sh_need_epilogue (void)
7360 if (! sh_need_epilogue_known)
7362 rtx epilogue;
7364 start_sequence ();
7365 sh_expand_epilogue (0);
7366 epilogue = get_insns ();
7367 end_sequence ();
7368 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7370 return sh_need_epilogue_known > 0;
7373 /* Emit code to change the current function's return address to RA.
7374 TEMP is available as a scratch register, if needed. */
7376 void
7377 sh_set_return_address (rtx ra, rtx tmp)
7379 HARD_REG_SET live_regs_mask;
7380 int d;
7381 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7382 int pr_offset;
7384 d = calc_live_regs (&live_regs_mask);
7386 /* If pr_reg isn't life, we can set it (or the register given in
7387 sh_media_register_for_return) directly. */
7388 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7390 rtx rr;
7392 if (TARGET_SHMEDIA)
7394 int rr_regno = sh_media_register_for_return ();
7396 if (rr_regno < 0)
7397 rr_regno = pr_reg;
7399 rr = gen_rtx_REG (DImode, rr_regno);
7401 else
7402 rr = gen_rtx_REG (SImode, pr_reg);
7404 emit_insn (GEN_MOV (rr, ra));
7405 /* Tell flow the register for return isn't dead. */
7406 emit_use (rr);
7407 return;
7410 if (TARGET_SH5)
7412 int offset;
7413 save_schedule schedule;
7414 save_entry *entry;
7416 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7417 offset = entry[1].offset;
7418 for (; entry->mode != VOIDmode; entry--)
7419 if (entry->reg == pr_reg)
7420 goto found;
7422 /* We can't find pr register. */
7423 gcc_unreachable ();
7425 found:
7426 offset = entry->offset - offset;
7427 pr_offset = (rounded_frame_size (d) + offset
7428 + SHMEDIA_REGS_STACK_ADJUST ());
7430 else
7431 pr_offset = rounded_frame_size (d);
7433 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7434 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7436 tmp = gen_frame_mem (Pmode, tmp);
7437 emit_insn (GEN_MOV (tmp, ra));
7438 /* Tell this store isn't dead. */
7439 emit_use (tmp);
7442 /* Clear variables at function end. */
7444 static void
7445 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7446 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7448 sh_need_epilogue_known = 0;
7451 static rtx
7452 sh_builtin_saveregs (void)
7454 /* First unnamed integer register. */
7455 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7456 /* Number of integer registers we need to save. */
7457 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7458 /* First unnamed SFmode float reg */
7459 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7460 /* Number of SFmode float regs to save. */
7461 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7462 rtx regbuf, fpregs;
7463 int bufsize, regno;
7464 alias_set_type alias_set;
7466 if (TARGET_SH5)
7468 if (n_intregs)
7470 int pushregs = n_intregs;
7472 while (pushregs < NPARM_REGS (SImode) - 1
7473 && (CALL_COOKIE_INT_REG_GET
7474 (crtl->args.info.call_cookie,
7475 NPARM_REGS (SImode) - pushregs)
7476 == 1))
7478 crtl->args.info.call_cookie
7479 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7480 - pushregs, 1);
7481 pushregs++;
7484 if (pushregs == NPARM_REGS (SImode))
7485 crtl->args.info.call_cookie
7486 |= (CALL_COOKIE_INT_REG (0, 1)
7487 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7488 else
7489 crtl->args.info.call_cookie
7490 |= CALL_COOKIE_STACKSEQ (pushregs);
7492 crtl->args.pretend_args_size += 8 * n_intregs;
7494 if (TARGET_SHCOMPACT)
7495 return const0_rtx;
7498 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7500 error ("__builtin_saveregs not supported by this subtarget");
7501 return const0_rtx;
7504 if (TARGET_SHMEDIA)
7505 n_floatregs = 0;
7507 /* Allocate block of memory for the regs. */
7508 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7509 Or can assign_stack_local accept a 0 SIZE argument? */
7510 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7512 if (TARGET_SHMEDIA)
7513 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7514 else if (n_floatregs & 1)
7516 rtx addr;
7518 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7519 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7520 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7521 regbuf = change_address (regbuf, BLKmode, addr);
7523 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7525 rtx addr, mask;
7527 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7528 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7529 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7530 emit_insn (gen_andsi3 (addr, addr, mask));
7531 regbuf = change_address (regbuf, BLKmode, addr);
7533 else
7534 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7535 alias_set = get_varargs_alias_set ();
7536 set_mem_alias_set (regbuf, alias_set);
7538 /* Save int args.
7539 This is optimized to only save the regs that are necessary. Explicitly
7540 named args need not be saved. */
7541 if (n_intregs > 0)
7542 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7543 adjust_address (regbuf, BLKmode,
7544 n_floatregs * UNITS_PER_WORD),
7545 n_intregs);
7547 if (TARGET_SHMEDIA)
7548 /* Return the address of the regbuf. */
7549 return XEXP (regbuf, 0);
7551 /* Save float args.
7552 This is optimized to only save the regs that are necessary. Explicitly
7553 named args need not be saved.
7554 We explicitly build a pointer to the buffer because it halves the insn
7555 count when not optimizing (otherwise the pointer is built for each reg
7556 saved).
7557 We emit the moves in reverse order so that we can use predecrement. */
7559 fpregs = copy_to_mode_reg (Pmode,
7560 plus_constant (XEXP (regbuf, 0),
7561 n_floatregs * UNITS_PER_WORD));
7562 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7564 rtx mem;
7565 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7567 emit_insn (gen_addsi3 (fpregs, fpregs,
7568 GEN_INT (-2 * UNITS_PER_WORD)));
7569 mem = change_address (regbuf, DFmode, fpregs);
7570 emit_move_insn (mem,
7571 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7573 regno = first_floatreg;
7574 if (regno & 1)
7576 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7577 mem = change_address (regbuf, SFmode, fpregs);
7578 emit_move_insn (mem,
7579 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7580 - (TARGET_LITTLE_ENDIAN != 0)));
7583 else
7584 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7586 rtx mem;
7588 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7589 mem = change_address (regbuf, SFmode, fpregs);
7590 emit_move_insn (mem,
7591 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7594 /* Return the address of the regbuf. */
7595 return XEXP (regbuf, 0);
7598 /* Define the `__builtin_va_list' type for the ABI. */
7600 static tree
7601 sh_build_builtin_va_list (void)
7603 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7604 tree record;
7606 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7607 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7608 return ptr_type_node;
7610 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7612 f_next_o = build_decl (BUILTINS_LOCATION,
7613 FIELD_DECL, get_identifier ("__va_next_o"),
7614 ptr_type_node);
7615 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7616 FIELD_DECL,
7617 get_identifier ("__va_next_o_limit"),
7618 ptr_type_node);
7619 f_next_fp = build_decl (BUILTINS_LOCATION,
7620 FIELD_DECL, get_identifier ("__va_next_fp"),
7621 ptr_type_node);
7622 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7623 FIELD_DECL,
7624 get_identifier ("__va_next_fp_limit"),
7625 ptr_type_node);
7626 f_next_stack = build_decl (BUILTINS_LOCATION,
7627 FIELD_DECL, get_identifier ("__va_next_stack"),
7628 ptr_type_node);
7630 DECL_FIELD_CONTEXT (f_next_o) = record;
7631 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7632 DECL_FIELD_CONTEXT (f_next_fp) = record;
7633 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7634 DECL_FIELD_CONTEXT (f_next_stack) = record;
7636 TYPE_FIELDS (record) = f_next_o;
7637 TREE_CHAIN (f_next_o) = f_next_o_limit;
7638 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7639 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7640 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7642 layout_type (record);
7644 return record;
7647 /* Implement `va_start' for varargs and stdarg. */
7649 static void
7650 sh_va_start (tree valist, rtx nextarg)
7652 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7653 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7654 tree t, u;
7655 int nfp, nint;
7657 if (TARGET_SH5)
7659 expand_builtin_saveregs ();
7660 std_expand_builtin_va_start (valist, nextarg);
7661 return;
7664 if ((! TARGET_SH2E && ! TARGET_SH4)
7665 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7667 std_expand_builtin_va_start (valist, nextarg);
7668 return;
7671 f_next_o = TYPE_FIELDS (va_list_type_node);
7672 f_next_o_limit = TREE_CHAIN (f_next_o);
7673 f_next_fp = TREE_CHAIN (f_next_o_limit);
7674 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7675 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7677 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7678 NULL_TREE);
7679 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7680 valist, f_next_o_limit, NULL_TREE);
7681 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7682 NULL_TREE);
7683 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7684 valist, f_next_fp_limit, NULL_TREE);
7685 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7686 valist, f_next_stack, NULL_TREE);
7688 /* Call __builtin_saveregs. */
7689 u = make_tree (sizetype, expand_builtin_saveregs ());
7690 u = fold_convert (ptr_type_node, u);
7691 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7692 TREE_SIDE_EFFECTS (t) = 1;
7693 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7695 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7696 if (nfp < 8)
7697 nfp = 8 - nfp;
7698 else
7699 nfp = 0;
7700 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7701 size_int (UNITS_PER_WORD * nfp));
7702 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7703 TREE_SIDE_EFFECTS (t) = 1;
7704 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7706 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7707 TREE_SIDE_EFFECTS (t) = 1;
7708 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7710 nint = crtl->args.info.arg_count[SH_ARG_INT];
7711 if (nint < 4)
7712 nint = 4 - nint;
7713 else
7714 nint = 0;
7715 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7716 size_int (UNITS_PER_WORD * nint));
7717 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7718 TREE_SIDE_EFFECTS (t) = 1;
7719 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7721 u = make_tree (ptr_type_node, nextarg);
7722 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7723 TREE_SIDE_EFFECTS (t) = 1;
7724 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7727 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7728 member, return it. */
7729 static tree
7730 find_sole_member (tree type)
7732 tree field, member = NULL_TREE;
7734 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7736 if (TREE_CODE (field) != FIELD_DECL)
7737 continue;
7738 if (!DECL_SIZE (field))
7739 return NULL_TREE;
7740 if (integer_zerop (DECL_SIZE (field)))
7741 continue;
7742 if (member)
7743 return NULL_TREE;
7744 member = field;
7746 return member;
7748 /* Implement `va_arg'. */
7750 static tree
7751 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7752 gimple_seq *post_p ATTRIBUTE_UNUSED)
7754 HOST_WIDE_INT size, rsize;
7755 tree tmp, pptr_type_node;
7756 tree addr, lab_over = NULL, result = NULL;
7757 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7758 tree eff_type;
7760 if (pass_by_ref)
7761 type = build_pointer_type (type);
7763 size = int_size_in_bytes (type);
7764 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7765 pptr_type_node = build_pointer_type (ptr_type_node);
7767 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7768 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7770 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7771 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7772 int pass_as_float;
7773 tree lab_false;
7774 tree member;
7776 f_next_o = TYPE_FIELDS (va_list_type_node);
7777 f_next_o_limit = TREE_CHAIN (f_next_o);
7778 f_next_fp = TREE_CHAIN (f_next_o_limit);
7779 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7780 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7782 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7783 NULL_TREE);
7784 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7785 valist, f_next_o_limit, NULL_TREE);
7786 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7787 valist, f_next_fp, NULL_TREE);
7788 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7789 valist, f_next_fp_limit, NULL_TREE);
7790 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7791 valist, f_next_stack, NULL_TREE);
7793 /* Structures with a single member with a distinct mode are passed
7794 like their member. This is relevant if the latter has a REAL_TYPE
7795 or COMPLEX_TYPE type. */
7796 eff_type = type;
7797 while (TREE_CODE (eff_type) == RECORD_TYPE
7798 && (member = find_sole_member (eff_type))
7799 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7800 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7801 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7803 tree field_type = TREE_TYPE (member);
7805 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7806 eff_type = field_type;
7807 else
7809 gcc_assert ((TYPE_ALIGN (eff_type)
7810 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7811 || (TYPE_ALIGN (eff_type)
7812 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7813 break;
7817 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7819 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7820 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7821 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7822 && size <= 16));
7824 else
7826 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7829 addr = create_tmp_var (pptr_type_node, NULL);
7830 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7831 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7833 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7835 if (pass_as_float)
7837 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7838 tree cmp;
7839 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7841 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7842 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7844 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7845 tmp = next_fp_limit;
7846 if (size > 4 && !is_double)
7847 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7848 unshare_expr (tmp), size_int (4 - size));
7849 tmp = build2 (GE_EXPR, boolean_type_node,
7850 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7851 cmp = build3 (COND_EXPR, void_type_node, tmp,
7852 build1 (GOTO_EXPR, void_type_node,
7853 unshare_expr (lab_false)), NULL_TREE);
7854 if (!is_double)
7855 gimplify_and_add (cmp, pre_p);
7857 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7858 || (is_double || size == 16))
7860 tmp = fold_convert (sizetype, next_fp_tmp);
7861 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7862 size_int (UNITS_PER_WORD));
7863 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7864 unshare_expr (next_fp_tmp), tmp);
7865 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7867 if (is_double)
7868 gimplify_and_add (cmp, pre_p);
7870 #ifdef FUNCTION_ARG_SCmode_WART
7871 if (TYPE_MODE (eff_type) == SCmode
7872 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7874 tree subtype = TREE_TYPE (eff_type);
7875 tree real, imag;
7877 imag
7878 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7879 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7881 real
7882 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7883 real = get_initialized_tmp_var (real, pre_p, NULL);
7885 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7886 if (type != eff_type)
7887 result = build1 (VIEW_CONVERT_EXPR, type, result);
7888 result = get_initialized_tmp_var (result, pre_p, NULL);
7890 #endif /* FUNCTION_ARG_SCmode_WART */
7892 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7893 gimplify_and_add (tmp, pre_p);
7895 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7896 gimplify_and_add (tmp, pre_p);
7898 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7899 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7900 gimplify_assign (unshare_expr (next_fp_tmp),
7901 unshare_expr (valist), pre_p);
7903 gimplify_assign (unshare_expr (valist),
7904 unshare_expr (next_fp_tmp), post_p);
7905 valist = next_fp_tmp;
7907 else
7909 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7910 unshare_expr (next_o), size_int (rsize));
7911 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7912 unshare_expr (next_o_limit));
7913 tmp = build3 (COND_EXPR, void_type_node, tmp,
7914 build1 (GOTO_EXPR, void_type_node,
7915 unshare_expr (lab_false)),
7916 NULL_TREE);
7917 gimplify_and_add (tmp, pre_p);
7919 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7920 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7922 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7923 gimplify_and_add (tmp, pre_p);
7925 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7926 gimplify_and_add (tmp, pre_p);
7928 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7929 gimplify_assign (unshare_expr (next_o),
7930 unshare_expr (next_o_limit), pre_p);
7932 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7933 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7936 if (!result)
7938 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7939 gimplify_and_add (tmp, pre_p);
7943 /* ??? In va-sh.h, there had been code to make values larger than
7944 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7946 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7947 if (result)
7949 gimplify_assign (result, tmp, pre_p);
7950 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7951 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7952 gimplify_and_add (tmp, pre_p);
7954 else
7955 result = tmp;
7957 if (pass_by_ref)
7958 result = build_va_arg_indirect_ref (result);
7960 return result;
7963 /* 64 bit floating points memory transfers are paired single precision loads
7964 or store. So DWARF information needs fixing in little endian (unless
7965 PR=SZ=1 in FPSCR). */
7967 sh_dwarf_register_span (rtx reg)
7969 unsigned regno = REGNO (reg);
7971 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7972 return NULL_RTX;
7974 return
7975 gen_rtx_PARALLEL (VOIDmode,
7976 gen_rtvec (2,
7977 gen_rtx_REG (SFmode,
7978 DBX_REGISTER_NUMBER (regno+1)),
7979 gen_rtx_REG (SFmode,
7980 DBX_REGISTER_NUMBER (regno))));
7983 static enum machine_mode
7984 sh_promote_function_mode (const_tree type, enum machine_mode mode,
7985 int *punsignedp, const_tree funtype,
7986 int for_return ATTRIBUTE_UNUSED)
7988 if (sh_promote_prototypes (funtype))
7989 return promote_mode (type, mode, punsignedp);
7990 else
7991 return mode;
7994 static bool
7995 sh_promote_prototypes (const_tree type)
7997 if (TARGET_HITACHI)
7998 return 0;
7999 if (! type)
8000 return 1;
8001 return ! sh_attr_renesas_p (type);
8004 /* Whether an argument must be passed by reference. On SHcompact, we
8005 pretend arguments wider than 32-bits that would have been passed in
8006 registers are passed by reference, so that an SHmedia trampoline
8007 loads them into the full 64-bits registers. */
8009 static int
8010 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8011 const_tree type, bool named)
8013 unsigned HOST_WIDE_INT size;
8015 if (type)
8016 size = int_size_in_bytes (type);
8017 else
8018 size = GET_MODE_SIZE (mode);
8020 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8021 && (!named
8022 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8023 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8024 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8025 && size > 4
8026 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8027 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8028 return size;
8029 else
8030 return 0;
8033 static bool
8034 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8035 const_tree type, bool named)
8037 if (targetm.calls.must_pass_in_stack (mode, type))
8038 return true;
8040 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8041 wants to know about pass-by-reference semantics for incoming
8042 arguments. */
8043 if (! cum)
8044 return false;
8046 if (TARGET_SHCOMPACT)
8048 cum->byref = shcompact_byref (cum, mode, type, named);
8049 return cum->byref != 0;
8052 return false;
8055 static bool
8056 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8057 const_tree type, bool named ATTRIBUTE_UNUSED)
8059 /* ??? How can it possibly be correct to return true only on the
8060 caller side of the equation? Is there someplace else in the
8061 sh backend that's magically producing the copies? */
8062 return (cum->outgoing
8063 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8064 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8067 static int
8068 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8069 tree type, bool named ATTRIBUTE_UNUSED)
8071 int words = 0;
8073 if (!TARGET_SH5
8074 && PASS_IN_REG_P (*cum, mode, type)
8075 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8076 && (ROUND_REG (*cum, mode)
8077 + (mode != BLKmode
8078 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8079 : ROUND_ADVANCE (int_size_in_bytes (type)))
8080 > NPARM_REGS (mode)))
8081 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8083 else if (!TARGET_SHCOMPACT
8084 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8085 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8087 return words * UNITS_PER_WORD;
8091 /* Define where to put the arguments to a function.
8092 Value is zero to push the argument on the stack,
8093 or a hard register in which to store the argument.
8095 MODE is the argument's machine mode.
8096 TYPE is the data type of the argument (as a tree).
8097 This is null for libcalls where that information may
8098 not be available.
8099 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8100 the preceding args and about the function being called.
8101 NAMED is nonzero if this argument is a named parameter
8102 (otherwise it is an extra parameter matching an ellipsis).
8104 On SH the first args are normally in registers
8105 and the rest are pushed. Any arg that starts within the first
8106 NPARM_REGS words is at least partially passed in a register unless
8107 its data type forbids. */
8111 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8112 tree type, int named)
8114 if (! TARGET_SH5 && mode == VOIDmode)
8115 return GEN_INT (ca->renesas_abi ? 1 : 0);
8117 if (! TARGET_SH5
8118 && PASS_IN_REG_P (*ca, mode, type)
8119 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8121 int regno;
8123 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8124 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8126 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8127 gen_rtx_REG (SFmode,
8128 BASE_ARG_REG (mode)
8129 + (ROUND_REG (*ca, mode) ^ 1)),
8130 const0_rtx);
8131 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8132 gen_rtx_REG (SFmode,
8133 BASE_ARG_REG (mode)
8134 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8135 GEN_INT (4));
8136 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8139 /* If the alignment of a DF value causes an SF register to be
8140 skipped, we will use that skipped register for the next SF
8141 value. */
8142 if ((TARGET_HITACHI || ca->renesas_abi)
8143 && ca->free_single_fp_reg
8144 && mode == SFmode)
8145 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8147 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8148 ^ (mode == SFmode && TARGET_SH4
8149 && TARGET_LITTLE_ENDIAN != 0
8150 && ! TARGET_HITACHI && ! ca->renesas_abi);
8151 return gen_rtx_REG (mode, regno);
8155 if (TARGET_SH5)
8157 if (mode == VOIDmode && TARGET_SHCOMPACT)
8158 return GEN_INT (ca->call_cookie);
8160 /* The following test assumes unnamed arguments are promoted to
8161 DFmode. */
8162 if (mode == SFmode && ca->free_single_fp_reg)
8163 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8165 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8166 && (named || ! ca->prototype_p)
8167 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8169 if (! ca->prototype_p && TARGET_SHMEDIA)
8170 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8172 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8173 FIRST_FP_PARM_REG
8174 + ca->arg_count[(int) SH_ARG_FLOAT]);
8177 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8178 && (! TARGET_SHCOMPACT
8179 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8180 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8181 type, named))))
8183 return gen_rtx_REG (mode, (FIRST_PARM_REG
8184 + ca->arg_count[(int) SH_ARG_INT]));
8187 return 0;
8190 return 0;
8193 /* Update the data in CUM to advance over an argument
8194 of mode MODE and data type TYPE.
8195 (TYPE is null for libcalls where that information may not be
8196 available.) */
8198 void
8199 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8200 tree type, int named)
8202 if (ca->force_mem)
8203 ca->force_mem = 0;
8204 else if (TARGET_SH5)
8206 tree type2 = (ca->byref && type
8207 ? TREE_TYPE (type)
8208 : type);
8209 enum machine_mode mode2 = (ca->byref && type
8210 ? TYPE_MODE (type2)
8211 : mode);
8212 int dwords = ((ca->byref
8213 ? ca->byref
8214 : mode2 == BLKmode
8215 ? int_size_in_bytes (type2)
8216 : GET_MODE_SIZE (mode2)) + 7) / 8;
8217 int numregs = MIN (dwords, NPARM_REGS (SImode)
8218 - ca->arg_count[(int) SH_ARG_INT]);
8220 if (numregs)
8222 ca->arg_count[(int) SH_ARG_INT] += numregs;
8223 if (TARGET_SHCOMPACT
8224 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8226 ca->call_cookie
8227 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8228 - numregs, 1);
8229 /* N.B. We want this also for outgoing. */
8230 ca->stack_regs += numregs;
8232 else if (ca->byref)
8234 if (! ca->outgoing)
8235 ca->stack_regs += numregs;
8236 ca->byref_regs += numregs;
8237 ca->byref = 0;
8239 ca->call_cookie
8240 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8241 - numregs, 2);
8242 while (--numregs);
8243 ca->call_cookie
8244 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8245 - 1, 1);
8247 else if (dwords > numregs)
8249 int pushregs = numregs;
8251 if (TARGET_SHCOMPACT)
8252 ca->stack_regs += numregs;
8253 while (pushregs < NPARM_REGS (SImode) - 1
8254 && (CALL_COOKIE_INT_REG_GET
8255 (ca->call_cookie,
8256 NPARM_REGS (SImode) - pushregs)
8257 == 1))
8259 ca->call_cookie
8260 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8261 - pushregs, 1);
8262 pushregs++;
8264 if (numregs == NPARM_REGS (SImode))
8265 ca->call_cookie
8266 |= CALL_COOKIE_INT_REG (0, 1)
8267 | CALL_COOKIE_STACKSEQ (numregs - 1);
8268 else
8269 ca->call_cookie
8270 |= CALL_COOKIE_STACKSEQ (numregs);
8273 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8274 && (named || ! ca->prototype_p))
8276 if (mode2 == SFmode && ca->free_single_fp_reg)
8277 ca->free_single_fp_reg = 0;
8278 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8279 < NPARM_REGS (SFmode))
8281 int numfpregs
8282 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8283 NPARM_REGS (SFmode)
8284 - ca->arg_count[(int) SH_ARG_FLOAT]);
8286 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8288 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8290 if (ca->outgoing && numregs > 0)
8293 ca->call_cookie
8294 |= (CALL_COOKIE_INT_REG
8295 (ca->arg_count[(int) SH_ARG_INT]
8296 - numregs + ((numfpregs - 2) / 2),
8297 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8298 - numfpregs) / 2));
8300 while (numfpregs -= 2);
8302 else if (mode2 == SFmode && (named)
8303 && (ca->arg_count[(int) SH_ARG_FLOAT]
8304 < NPARM_REGS (SFmode)))
8305 ca->free_single_fp_reg
8306 = FIRST_FP_PARM_REG - numfpregs
8307 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8310 return;
8313 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8315 /* Note that we've used the skipped register. */
8316 if (mode == SFmode && ca->free_single_fp_reg)
8318 ca->free_single_fp_reg = 0;
8319 return;
8321 /* When we have a DF after an SF, there's an SF register that get
8322 skipped in order to align the DF value. We note this skipped
8323 register, because the next SF value will use it, and not the
8324 SF that follows the DF. */
8325 if (mode == DFmode
8326 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8328 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8329 + BASE_ARG_REG (mode));
8333 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8334 || PASS_IN_REG_P (*ca, mode, type))
8335 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8336 = (ROUND_REG (*ca, mode)
8337 + (mode == BLKmode
8338 ? ROUND_ADVANCE (int_size_in_bytes (type))
8339 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8342 /* The Renesas calling convention doesn't quite fit into this scheme since
8343 the address is passed like an invisible argument, but one that is always
8344 passed in memory. */
8345 static rtx
8346 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8348 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8349 return 0;
8350 return gen_rtx_REG (Pmode, 2);
8353 /* Worker function for TARGET_FUNCTION_VALUE.
8355 For the SH, this is like LIBCALL_VALUE, except that we must change the
8356 mode like PROMOTE_MODE does.
8357 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8358 tested here has to be kept in sync with the one in explow.c:promote_mode.
8361 static rtx
8362 sh_function_value (const_tree valtype,
8363 const_tree fn_decl_or_type,
8364 bool outgoing ATTRIBUTE_UNUSED)
8366 if (fn_decl_or_type
8367 && !DECL_P (fn_decl_or_type))
8368 fn_decl_or_type = NULL;
8370 return gen_rtx_REG (
8371 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8372 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8373 && (TREE_CODE (valtype) == INTEGER_TYPE
8374 || TREE_CODE (valtype) == ENUMERAL_TYPE
8375 || TREE_CODE (valtype) == BOOLEAN_TYPE
8376 || TREE_CODE (valtype) == REAL_TYPE
8377 || TREE_CODE (valtype) == OFFSET_TYPE))
8378 && sh_promote_prototypes (fn_decl_or_type)
8379 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8380 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8383 /* Worker function for TARGET_LIBCALL_VALUE. */
8385 static rtx
8386 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8388 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8391 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8393 bool
8394 sh_function_value_regno_p (const unsigned int regno)
8396 return ((regno) == FIRST_RET_REG
8397 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8398 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8401 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8403 static bool
8404 sh_return_in_memory (const_tree type, const_tree fndecl)
8406 if (TARGET_SH5)
8408 if (TYPE_MODE (type) == BLKmode)
8409 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8410 else
8411 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8413 else
8415 return (TYPE_MODE (type) == BLKmode
8416 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8417 && TREE_CODE (type) == RECORD_TYPE));
8421 /* We actually emit the code in sh_expand_prologue. We used to use
8422 a static variable to flag that we need to emit this code, but that
8423 doesn't when inlining, when functions are deferred and then emitted
8424 later. Fortunately, we already have two flags that are part of struct
8425 function that tell if a function uses varargs or stdarg. */
8426 static void
8427 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8428 enum machine_mode mode,
8429 tree type,
8430 int *pretend_arg_size,
8431 int second_time ATTRIBUTE_UNUSED)
8433 gcc_assert (cfun->stdarg);
8434 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8436 int named_parm_regs, anon_parm_regs;
8438 named_parm_regs = (ROUND_REG (*ca, mode)
8439 + (mode == BLKmode
8440 ? ROUND_ADVANCE (int_size_in_bytes (type))
8441 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8442 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8443 if (anon_parm_regs > 0)
8444 *pretend_arg_size = anon_parm_regs * 4;
8448 static bool
8449 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8451 return TARGET_SH5;
8454 static bool
8455 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8457 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8461 /* Define the offset between two registers, one to be eliminated, and
8462 the other its replacement, at the start of a routine. */
8465 initial_elimination_offset (int from, int to)
8467 int regs_saved;
8468 int regs_saved_rounding = 0;
8469 int total_saved_regs_space;
8470 int total_auto_space;
8471 int save_flags = target_flags;
8472 int copy_flags;
8473 HARD_REG_SET live_regs_mask;
8475 shmedia_space_reserved_for_target_registers = false;
8476 regs_saved = calc_live_regs (&live_regs_mask);
8477 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8479 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8481 shmedia_space_reserved_for_target_registers = true;
8482 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8485 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8486 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8487 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8489 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8490 copy_flags = target_flags;
8491 target_flags = save_flags;
8493 total_saved_regs_space = regs_saved + regs_saved_rounding;
8495 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8496 return total_saved_regs_space + total_auto_space
8497 + crtl->args.info.byref_regs * 8;
8499 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8500 return total_saved_regs_space + total_auto_space
8501 + crtl->args.info.byref_regs * 8;
8503 /* Initial gap between fp and sp is 0. */
8504 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8505 return 0;
8507 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8508 return rounded_frame_size (0);
8510 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8511 return rounded_frame_size (0);
8513 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8514 && (to == HARD_FRAME_POINTER_REGNUM
8515 || to == STACK_POINTER_REGNUM));
8516 if (TARGET_SH5)
8518 int n = total_saved_regs_space;
8519 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8520 save_schedule schedule;
8521 save_entry *entry;
8523 n += total_auto_space;
8525 /* If it wasn't saved, there's not much we can do. */
8526 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8527 return n;
8529 target_flags = copy_flags;
8531 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8532 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8533 if (entry->reg == pr_reg)
8535 target_flags = save_flags;
8536 return entry->offset;
8538 gcc_unreachable ();
8540 else
8541 return total_auto_space;
8544 /* Parse the -mfixed-range= option string. */
8545 void
8546 sh_fix_range (const char *const_str)
8548 int i, first, last;
8549 char *str, *dash, *comma;
8551 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8552 REG2 are either register names or register numbers. The effect
8553 of this option is to mark the registers in the range from REG1 to
8554 REG2 as ``fixed'' so they won't be used by the compiler. */
8556 i = strlen (const_str);
8557 str = (char *) alloca (i + 1);
8558 memcpy (str, const_str, i + 1);
8560 while (1)
8562 dash = strchr (str, '-');
8563 if (!dash)
8565 warning (0, "value of -mfixed-range must have form REG1-REG2");
8566 return;
8568 *dash = '\0';
8569 comma = strchr (dash + 1, ',');
8570 if (comma)
8571 *comma = '\0';
8573 first = decode_reg_name (str);
8574 if (first < 0)
8576 warning (0, "unknown register name: %s", str);
8577 return;
8580 last = decode_reg_name (dash + 1);
8581 if (last < 0)
8583 warning (0, "unknown register name: %s", dash + 1);
8584 return;
8587 *dash = '-';
8589 if (first > last)
8591 warning (0, "%s-%s is an empty range", str, dash + 1);
8592 return;
8595 for (i = first; i <= last; ++i)
8596 fixed_regs[i] = call_used_regs[i] = 1;
8598 if (!comma)
8599 break;
8601 *comma = ',';
8602 str = comma + 1;
8606 /* Insert any deferred function attributes from earlier pragmas. */
8607 static void
8608 sh_insert_attributes (tree node, tree *attributes)
8610 tree attrs;
8612 if (TREE_CODE (node) != FUNCTION_DECL)
8613 return;
8615 /* We are only interested in fields. */
8616 if (!DECL_P (node))
8617 return;
8619 /* Append the attributes to the deferred attributes. */
8620 *sh_deferred_function_attributes_tail = *attributes;
8621 attrs = sh_deferred_function_attributes;
8622 if (!attrs)
8623 return;
8625 /* Some attributes imply or require the interrupt attribute. */
8626 if (!lookup_attribute ("interrupt_handler", attrs)
8627 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8629 /* If we have a trapa_handler, but no interrupt_handler attribute,
8630 insert an interrupt_handler attribute. */
8631 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8632 /* We can't use sh_pr_interrupt here because that's not in the
8633 java frontend. */
8634 attrs
8635 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8636 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8637 if the interrupt attribute is missing, we ignore the attribute
8638 and warn. */
8639 else if (lookup_attribute ("sp_switch", attrs)
8640 || lookup_attribute ("trap_exit", attrs)
8641 || lookup_attribute ("nosave_low_regs", attrs)
8642 || lookup_attribute ("resbank", attrs))
8644 tree *tail;
8646 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8648 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8649 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8650 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8651 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8652 warning (OPT_Wattributes,
8653 "%qE attribute only applies to interrupt functions",
8654 TREE_PURPOSE (attrs));
8655 else
8657 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8658 NULL_TREE);
8659 tail = &TREE_CHAIN (*tail);
8662 attrs = *attributes;
8666 /* Install the processed list. */
8667 *attributes = attrs;
8669 /* Clear deferred attributes. */
8670 sh_deferred_function_attributes = NULL_TREE;
8671 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8673 return;
8676 /* Supported attributes:
8678 interrupt_handler -- specifies this function is an interrupt handler.
8680 trapa_handler - like above, but don't save all registers.
8682 sp_switch -- specifies an alternate stack for an interrupt handler
8683 to run on.
8685 trap_exit -- use a trapa to exit an interrupt function instead of
8686 an rte instruction.
8688 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8689 This is useful on the SH3 and upwards,
8690 which has a separate set of low regs for User and Supervisor modes.
8691 This should only be used for the lowest level of interrupts. Higher levels
8692 of interrupts must save the registers in case they themselves are
8693 interrupted.
8695 renesas -- use Renesas calling/layout conventions (functions and
8696 structures).
8698 resbank -- In case of an ISR, use a register bank to save registers
8699 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8702 /* Handle a 'resbank' attribute. */
8703 static tree
8704 sh_handle_resbank_handler_attribute (tree * node, tree name,
8705 tree args ATTRIBUTE_UNUSED,
8706 int flags ATTRIBUTE_UNUSED,
8707 bool * no_add_attrs)
8709 if (!TARGET_SH2A)
8711 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8712 name);
8713 *no_add_attrs = true;
8715 if (TREE_CODE (*node) != FUNCTION_DECL)
8717 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8718 name);
8719 *no_add_attrs = true;
8722 return NULL_TREE;
8725 /* Handle an "interrupt_handler" attribute; arguments as in
8726 struct attribute_spec.handler. */
8727 static tree
8728 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8729 tree args ATTRIBUTE_UNUSED,
8730 int flags ATTRIBUTE_UNUSED,
8731 bool *no_add_attrs)
8733 if (TREE_CODE (*node) != FUNCTION_DECL)
8735 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8736 name);
8737 *no_add_attrs = true;
8739 else if (TARGET_SHCOMPACT)
8741 error ("attribute interrupt_handler is not compatible with -m5-compact");
8742 *no_add_attrs = true;
8745 return NULL_TREE;
8748 /* Handle an 'function_vector' attribute; arguments as in
8749 struct attribute_spec.handler. */
8750 static tree
8751 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8752 tree args ATTRIBUTE_UNUSED,
8753 int flags ATTRIBUTE_UNUSED,
8754 bool * no_add_attrs)
8756 if (!TARGET_SH2A)
8758 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8759 name);
8760 *no_add_attrs = true;
8762 else if (TREE_CODE (*node) != FUNCTION_DECL)
8764 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8765 name);
8766 *no_add_attrs = true;
8768 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8770 /* The argument must be a constant integer. */
8771 warning (OPT_Wattributes,
8772 "%qE attribute argument not an integer constant",
8773 name);
8774 *no_add_attrs = true;
8776 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8778 /* The argument value must be between 0 to 255. */
8779 warning (OPT_Wattributes,
8780 "%qE attribute argument should be between 0 to 255",
8781 name);
8782 *no_add_attrs = true;
8784 return NULL_TREE;
8787 /* Returns 1 if current function has been assigned the attribute
8788 'function_vector'. */
8790 sh2a_is_function_vector_call (rtx x)
8792 if (GET_CODE (x) == SYMBOL_REF
8793 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8795 tree tr = SYMBOL_REF_DECL (x);
8797 if (sh2a_function_vector_p (tr))
8798 return 1;
8801 return 0;
8804 /* Returns the function vector number, if the the attribute
8805 'function_vector' is assigned, otherwise returns zero. */
8807 sh2a_get_function_vector_number (rtx x)
8809 int num;
8810 tree list, t;
8812 if ((GET_CODE (x) == SYMBOL_REF)
8813 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8815 t = SYMBOL_REF_DECL (x);
8817 if (TREE_CODE (t) != FUNCTION_DECL)
8818 return 0;
8820 list = SH_ATTRIBUTES (t);
8821 while (list)
8823 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8825 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8826 return num;
8829 list = TREE_CHAIN (list);
8832 return 0;
8834 else
8835 return 0;
8838 /* Handle an "sp_switch" attribute; arguments as in
8839 struct attribute_spec.handler. */
8840 static tree
8841 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8842 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8844 if (TREE_CODE (*node) != FUNCTION_DECL)
8846 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8847 name);
8848 *no_add_attrs = true;
8850 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8852 /* The argument must be a constant string. */
8853 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8854 name);
8855 *no_add_attrs = true;
8858 return NULL_TREE;
8861 /* Handle an "trap_exit" attribute; arguments as in
8862 struct attribute_spec.handler. */
8863 static tree
8864 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8865 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8867 if (TREE_CODE (*node) != FUNCTION_DECL)
8869 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8870 name);
8871 *no_add_attrs = true;
8873 /* The argument specifies a trap number to be used in a trapa instruction
8874 at function exit (instead of an rte instruction). */
8875 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8877 /* The argument must be a constant integer. */
8878 warning (OPT_Wattributes, "%qE attribute argument not an "
8879 "integer constant", name);
8880 *no_add_attrs = true;
8883 return NULL_TREE;
8886 static tree
8887 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8888 tree name ATTRIBUTE_UNUSED,
8889 tree args ATTRIBUTE_UNUSED,
8890 int flags ATTRIBUTE_UNUSED,
8891 bool *no_add_attrs ATTRIBUTE_UNUSED)
8893 return NULL_TREE;
8896 /* True if __attribute__((renesas)) or -mrenesas. */
8898 sh_attr_renesas_p (const_tree td)
8900 if (TARGET_HITACHI)
8901 return 1;
8902 if (td == 0)
8903 return 0;
8904 if (DECL_P (td))
8905 td = TREE_TYPE (td);
8906 if (td == error_mark_node)
8907 return 0;
8908 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8909 != NULL_TREE);
8912 /* True if __attribute__((renesas)) or -mrenesas, for the current
8913 function. */
8915 sh_cfun_attr_renesas_p (void)
8917 return sh_attr_renesas_p (current_function_decl);
8921 sh_cfun_interrupt_handler_p (void)
8923 return (lookup_attribute ("interrupt_handler",
8924 DECL_ATTRIBUTES (current_function_decl))
8925 != NULL_TREE);
8928 /* Returns 1 if FUNC has been assigned the attribute
8929 "function_vector". */
8931 sh2a_function_vector_p (tree func)
8933 tree list;
8934 if (TREE_CODE (func) != FUNCTION_DECL)
8935 return 0;
8937 list = SH_ATTRIBUTES (func);
8938 while (list)
8940 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8941 return 1;
8943 list = TREE_CHAIN (list);
8945 return 0;
8948 /* Returns TRUE if given tree has the "resbank" attribute. */
8951 sh_cfun_resbank_handler_p (void)
8953 return ((lookup_attribute ("resbank",
8954 DECL_ATTRIBUTES (current_function_decl))
8955 != NULL_TREE)
8956 && (lookup_attribute ("interrupt_handler",
8957 DECL_ATTRIBUTES (current_function_decl))
8958 != NULL_TREE) && TARGET_SH2A);
8961 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8963 static const char *
8964 sh_check_pch_target_flags (int old_flags)
8966 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8967 | MASK_SH_E | MASK_HARD_SH4
8968 | MASK_FPU_SINGLE | MASK_SH4))
8969 return _("created and used with different architectures / ABIs");
8970 if ((old_flags ^ target_flags) & MASK_HITACHI)
8971 return _("created and used with different ABIs");
8972 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8973 return _("created and used with different endianness");
8974 return NULL;
8977 /* Predicates used by the templates. */
8979 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8980 Used only in general_movsrc_operand. */
8983 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8985 switch (REGNO (op))
8987 case PR_REG:
8988 case MACL_REG:
8989 case MACH_REG:
8990 return 1;
8992 return 0;
8995 /* Nonzero if OP is a floating point value with value 0.0. */
8998 fp_zero_operand (rtx op)
9000 REAL_VALUE_TYPE r;
9002 if (GET_MODE (op) != SFmode)
9003 return 0;
9005 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9006 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9009 /* Nonzero if OP is a floating point value with value 1.0. */
9012 fp_one_operand (rtx op)
9014 REAL_VALUE_TYPE r;
9016 if (GET_MODE (op) != SFmode)
9017 return 0;
9019 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9020 return REAL_VALUES_EQUAL (r, dconst1);
9023 /* In general mode switching is used. If we are
9024 compiling without -mfmovd, movsf_ie isn't taken into account for
9025 mode switching. We could check in machine_dependent_reorg for
9026 cases where we know we are in single precision mode, but there is
9027 interface to find that out during reload, so we must avoid
9028 choosing an fldi alternative during reload and thus failing to
9029 allocate a scratch register for the constant loading. */
9031 fldi_ok (void)
9033 return 1;
9037 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9039 enum rtx_code code = GET_CODE (op);
9040 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9043 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9044 enum tls_model
9045 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9047 if (GET_CODE (op) != SYMBOL_REF)
9048 return TLS_MODEL_NONE;
9049 return SYMBOL_REF_TLS_MODEL (op);
9052 /* Return the destination address of a branch. */
9054 static int
9055 branch_dest (rtx branch)
9057 rtx dest = SET_SRC (PATTERN (branch));
9058 int dest_uid;
9060 if (GET_CODE (dest) == IF_THEN_ELSE)
9061 dest = XEXP (dest, 1);
9062 dest = XEXP (dest, 0);
9063 dest_uid = INSN_UID (dest);
9064 return INSN_ADDRESSES (dest_uid);
9067 /* Return nonzero if REG is not used after INSN.
9068 We assume REG is a reload reg, and therefore does
9069 not live past labels. It may live past calls or jumps though. */
9071 reg_unused_after (rtx reg, rtx insn)
9073 enum rtx_code code;
9074 rtx set;
9076 /* If the reg is set by this instruction, then it is safe for our
9077 case. Disregard the case where this is a store to memory, since
9078 we are checking a register used in the store address. */
9079 set = single_set (insn);
9080 if (set && !MEM_P (SET_DEST (set))
9081 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9082 return 1;
9084 while ((insn = NEXT_INSN (insn)))
9086 rtx set;
9087 if (!INSN_P (insn))
9088 continue;
9090 code = GET_CODE (insn);
9092 #if 0
9093 /* If this is a label that existed before reload, then the register
9094 if dead here. However, if this is a label added by reorg, then
9095 the register may still be live here. We can't tell the difference,
9096 so we just ignore labels completely. */
9097 if (code == CODE_LABEL)
9098 return 1;
9099 /* else */
9100 #endif
9102 if (code == JUMP_INSN)
9103 return 0;
9105 /* If this is a sequence, we must handle them all at once.
9106 We could have for instance a call that sets the target register,
9107 and an insn in a delay slot that uses the register. In this case,
9108 we must return 0. */
9109 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9111 int i;
9112 int retval = 0;
9114 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9116 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9117 rtx set = single_set (this_insn);
9119 if (CALL_P (this_insn))
9120 code = CALL_INSN;
9121 else if (JUMP_P (this_insn))
9123 if (INSN_ANNULLED_BRANCH_P (this_insn))
9124 return 0;
9125 code = JUMP_INSN;
9128 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9129 return 0;
9130 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9132 if (!MEM_P (SET_DEST (set)))
9133 retval = 1;
9134 else
9135 return 0;
9137 if (set == 0
9138 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9139 return 0;
9141 if (retval == 1)
9142 return 1;
9143 else if (code == JUMP_INSN)
9144 return 0;
9147 set = single_set (insn);
9148 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9149 return 0;
9150 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9151 return !MEM_P (SET_DEST (set));
9152 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9153 return 0;
9155 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9156 return 1;
9158 return 1;
9161 #include "ggc.h"
9163 static GTY(()) rtx fpscr_rtx;
9165 get_fpscr_rtx (void)
9167 if (! fpscr_rtx)
9169 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9170 REG_USERVAR_P (fpscr_rtx) = 1;
9171 mark_user_reg (fpscr_rtx);
9173 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9174 mark_user_reg (fpscr_rtx);
9175 return fpscr_rtx;
9178 static GTY(()) tree fpscr_values;
9180 static void
9181 emit_fpu_switch (rtx scratch, int index)
9183 rtx dst, src;
9185 if (fpscr_values == NULL)
9187 tree t;
9189 t = build_index_type (integer_one_node);
9190 t = build_array_type (integer_type_node, t);
9191 t = build_decl (BUILTINS_LOCATION,
9192 VAR_DECL, get_identifier ("__fpscr_values"), t);
9193 DECL_ARTIFICIAL (t) = 1;
9194 DECL_IGNORED_P (t) = 1;
9195 DECL_EXTERNAL (t) = 1;
9196 TREE_STATIC (t) = 1;
9197 TREE_PUBLIC (t) = 1;
9198 TREE_USED (t) = 1;
9200 fpscr_values = t;
9203 src = DECL_RTL (fpscr_values);
9204 if (!can_create_pseudo_p ())
9206 emit_move_insn (scratch, XEXP (src, 0));
9207 if (index != 0)
9208 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9209 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9211 else
9212 src = adjust_address (src, PSImode, index * 4);
9214 dst = get_fpscr_rtx ();
9215 emit_move_insn (dst, src);
9218 void
9219 emit_sf_insn (rtx pat)
9221 emit_insn (pat);
9224 void
9225 emit_df_insn (rtx pat)
9227 emit_insn (pat);
9230 void
9231 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9233 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9236 void
9237 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9239 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9240 get_fpscr_rtx ()));
9243 void
9244 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9246 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9249 void
9250 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9252 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9253 get_fpscr_rtx ()));
9256 static rtx get_free_reg (HARD_REG_SET);
9258 /* This function returns a register to use to load the address to load
9259 the fpscr from. Currently it always returns r1 or r7, but when we are
9260 able to use pseudo registers after combine, or have a better mechanism
9261 for choosing a register, it should be done here. */
9262 /* REGS_LIVE is the liveness information for the point for which we
9263 need this allocation. In some bare-bones exit blocks, r1 is live at the
9264 start. We can even have all of r0..r3 being live:
9265 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9266 INSN before which new insns are placed with will clobber the register
9267 we return. If a basic block consists only of setting the return value
9268 register to a pseudo and using that register, the return value is not
9269 live before or after this block, yet we we'll insert our insns right in
9270 the middle. */
9272 static rtx
9273 get_free_reg (HARD_REG_SET regs_live)
9275 if (! TEST_HARD_REG_BIT (regs_live, 1))
9276 return gen_rtx_REG (Pmode, 1);
9278 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9279 there shouldn't be anything but a jump before the function end. */
9280 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9281 return gen_rtx_REG (Pmode, 7);
9284 /* This function will set the fpscr from memory.
9285 MODE is the mode we are setting it to. */
9286 void
9287 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9289 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9290 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9291 rtx addr_reg;
9293 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9294 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9297 /* Is the given character a logical line separator for the assembler? */
9298 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9299 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9300 #endif
9303 sh_insn_length_adjustment (rtx insn)
9305 /* Instructions with unfilled delay slots take up an extra two bytes for
9306 the nop in the delay slot. */
9307 if (((NONJUMP_INSN_P (insn)
9308 && GET_CODE (PATTERN (insn)) != USE
9309 && GET_CODE (PATTERN (insn)) != CLOBBER)
9310 || CALL_P (insn)
9311 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9312 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9313 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9314 return 2;
9316 /* SH2e has a bug that prevents the use of annulled branches, so if
9317 the delay slot is not filled, we'll have to put a NOP in it. */
9318 if (sh_cpu_attr == CPU_SH2E
9319 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9320 && get_attr_type (insn) == TYPE_CBRANCH
9321 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9322 return 2;
9324 /* sh-dsp parallel processing insn take four bytes instead of two. */
9326 if (NONJUMP_INSN_P (insn))
9328 int sum = 0;
9329 rtx body = PATTERN (insn);
9330 const char *templ;
9331 char c;
9332 int maybe_label = 1;
9334 if (GET_CODE (body) == ASM_INPUT)
9335 templ = XSTR (body, 0);
9336 else if (asm_noperands (body) >= 0)
9337 templ
9338 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9339 else
9340 return 0;
9343 int ppi_adjust = 0;
9346 c = *templ++;
9347 while (c == ' ' || c == '\t');
9348 /* all sh-dsp parallel-processing insns start with p.
9349 The only non-ppi sh insn starting with p is pref.
9350 The only ppi starting with pr is prnd. */
9351 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9352 ppi_adjust = 2;
9353 /* The repeat pseudo-insn expands two three insns, a total of
9354 six bytes in size. */
9355 else if ((c == 'r' || c == 'R')
9356 && ! strncasecmp ("epeat", templ, 5))
9357 ppi_adjust = 4;
9358 while (c && c != '\n'
9359 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9361 /* If this is a label, it is obviously not a ppi insn. */
9362 if (c == ':' && maybe_label)
9364 ppi_adjust = 0;
9365 break;
9367 else if (c == '\'' || c == '"')
9368 maybe_label = 0;
9369 c = *templ++;
9371 sum += ppi_adjust;
9372 maybe_label = c != ':';
9374 while (c);
9375 return sum;
9377 return 0;
9380 /* Return TRUE for a valid displacement for the REG+disp addressing
9381 with MODE. */
9383 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9384 into the FRx registers. We implement this by setting the maximum offset
9385 to zero when the value is SFmode. This also restricts loading of SFmode
9386 values into the integer registers, but that can't be helped. */
9388 /* The SH allows a displacement in a QI or HI amode, but only when the
9389 other operand is R0. GCC doesn't handle this very well, so we forgot
9390 all of that.
9392 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9393 DI can be any number 0..60. */
9395 bool
9396 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9398 if (CONST_INT_P (op))
9400 if (TARGET_SHMEDIA)
9402 int size;
9404 /* Check if this the address of an unaligned load / store. */
9405 if (mode == VOIDmode)
9406 return CONST_OK_FOR_I06 (INTVAL (op));
9408 size = GET_MODE_SIZE (mode);
9409 return (!(INTVAL (op) & (size - 1))
9410 && INTVAL (op) >= -512 * size
9411 && INTVAL (op) < 512 * size);
9414 if (TARGET_SH2A)
9416 if (GET_MODE_SIZE (mode) == 1
9417 && (unsigned) INTVAL (op) < 4096)
9418 return true;
9421 if ((GET_MODE_SIZE (mode) == 4
9422 && (unsigned) INTVAL (op) < 64
9423 && !(INTVAL (op) & 3)
9424 && !(TARGET_SH2E && mode == SFmode))
9425 || (GET_MODE_SIZE (mode) == 4
9426 && (unsigned) INTVAL (op) < 16383
9427 && !(INTVAL (op) & 3) && TARGET_SH2A))
9428 return true;
9430 if ((GET_MODE_SIZE (mode) == 8
9431 && (unsigned) INTVAL (op) < 60
9432 && !(INTVAL (op) & 3)
9433 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9434 || ((GET_MODE_SIZE (mode)==8)
9435 && (unsigned) INTVAL (op) < 8192
9436 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9437 && (TARGET_SH2A && mode == DFmode)))
9438 return true;
9441 return false;
9444 /* Recognize an RTL expression that is a valid memory address for
9445 an instruction.
9446 The MODE argument is the machine mode for the MEM expression
9447 that wants to use this address.
9448 Allow REG
9449 REG+disp
9450 REG+r0
9451 REG++
9452 --REG */
9454 static bool
9455 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9457 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9458 return true;
9459 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9460 && ! TARGET_SHMEDIA
9461 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9462 return true;
9463 else if (GET_CODE (x) == PLUS
9464 && (mode != PSImode || reload_completed))
9466 rtx xop0 = XEXP (x, 0);
9467 rtx xop1 = XEXP (x, 1);
9469 if (GET_MODE_SIZE (mode) <= 8
9470 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9471 && sh_legitimate_index_p (mode, xop1))
9472 return true;
9474 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9475 || ((xop0 == stack_pointer_rtx
9476 || xop0 == hard_frame_pointer_rtx)
9477 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9478 || ((xop1 == stack_pointer_rtx
9479 || xop1 == hard_frame_pointer_rtx)
9480 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9481 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9482 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9483 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9484 && TARGET_FMOVD && mode == DFmode)))
9486 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9487 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9488 return true;
9489 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9490 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9491 return true;
9495 return false;
9498 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9499 isn't protected by a PIC unspec. */
9501 nonpic_symbol_mentioned_p (rtx x)
9503 register const char *fmt;
9504 register int i;
9506 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9507 || GET_CODE (x) == PC)
9508 return 1;
9510 /* We don't want to look into the possible MEM location of a
9511 CONST_DOUBLE, since we're not going to use it, in general. */
9512 if (GET_CODE (x) == CONST_DOUBLE)
9513 return 0;
9515 if (GET_CODE (x) == UNSPEC
9516 && (XINT (x, 1) == UNSPEC_PIC
9517 || XINT (x, 1) == UNSPEC_GOT
9518 || XINT (x, 1) == UNSPEC_GOTOFF
9519 || XINT (x, 1) == UNSPEC_GOTPLT
9520 || XINT (x, 1) == UNSPEC_GOTTPOFF
9521 || XINT (x, 1) == UNSPEC_DTPOFF
9522 || XINT (x, 1) == UNSPEC_TPOFF
9523 || XINT (x, 1) == UNSPEC_PLT
9524 || XINT (x, 1) == UNSPEC_SYMOFF
9525 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9526 return 0;
9528 fmt = GET_RTX_FORMAT (GET_CODE (x));
9529 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9531 if (fmt[i] == 'E')
9533 register int j;
9535 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9536 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9537 return 1;
9539 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9540 return 1;
9543 return 0;
9546 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9547 @GOTOFF in `reg'. */
9549 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9550 rtx reg)
9552 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9553 return orig;
9555 if (GET_CODE (orig) == LABEL_REF
9556 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9558 if (reg == 0)
9559 reg = gen_reg_rtx (Pmode);
9561 emit_insn (gen_symGOTOFF2reg (reg, orig));
9562 return reg;
9564 else if (GET_CODE (orig) == SYMBOL_REF)
9566 if (reg == 0)
9567 reg = gen_reg_rtx (Pmode);
9569 emit_insn (gen_symGOT2reg (reg, orig));
9570 return reg;
9572 return orig;
9575 /* Try machine-dependent ways of modifying an illegitimate address
9576 to be legitimate. If we find one, return the new, valid address.
9577 Otherwise, return X.
9579 For the SH, if X is almost suitable for indexing, but the offset is
9580 out of range, convert it into a normal form so that CSE has a chance
9581 of reducing the number of address registers used. */
9583 static rtx
9584 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9586 if (flag_pic)
9587 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9589 if (GET_CODE (x) == PLUS
9590 && (GET_MODE_SIZE (mode) == 4
9591 || GET_MODE_SIZE (mode) == 8)
9592 && CONST_INT_P (XEXP (x, 1))
9593 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9594 && ! TARGET_SHMEDIA
9595 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9596 && ! (TARGET_SH2E && mode == SFmode))
9598 rtx index_rtx = XEXP (x, 1);
9599 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9600 rtx sum;
9602 /* On rare occasions, we might get an unaligned pointer
9603 that is indexed in a way to give an aligned address.
9604 Therefore, keep the lower two bits in offset_base. */
9605 /* Instead of offset_base 128..131 use 124..127, so that
9606 simple add suffices. */
9607 if (offset > 127)
9608 offset_base = ((offset + 4) & ~60) - 4;
9609 else
9610 offset_base = offset & ~60;
9612 /* Sometimes the normal form does not suit DImode. We
9613 could avoid that by using smaller ranges, but that
9614 would give less optimized code when SImode is
9615 prevalent. */
9616 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9618 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9619 GEN_INT (offset_base), NULL_RTX, 0,
9620 OPTAB_LIB_WIDEN);
9622 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9626 return x;
9629 /* Attempt to replace *P, which is an address that needs reloading, with
9630 a valid memory address for an operand of mode MODE.
9631 Like for sh_legitimize_address, for the SH we try to get a normal form
9632 of the address. That will allow inheritance of the address reloads. */
9634 bool
9635 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9636 int itype)
9638 enum reload_type type = (enum reload_type) itype;
9640 if (GET_CODE (*p) == PLUS
9641 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9642 && CONST_INT_P (XEXP (*p, 1))
9643 && BASE_REGISTER_RTX_P (XEXP (*p, 0))
9644 && ! TARGET_SHMEDIA
9645 && ! (TARGET_SH4 && mode == DFmode)
9646 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9647 && (ALLOW_INDEXED_ADDRESS
9648 || XEXP (*p, 0) == stack_pointer_rtx
9649 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9651 rtx index_rtx = XEXP (*p, 1);
9652 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9653 rtx sum;
9655 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9657 push_reload (*p, NULL_RTX, p, NULL,
9658 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9659 goto win;
9661 if (TARGET_SH2E && mode == SFmode)
9663 *p = copy_rtx (*p);
9664 push_reload (*p, NULL_RTX, p, NULL,
9665 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9666 goto win;
9668 /* Instead of offset_base 128..131 use 124..127, so that
9669 simple add suffices. */
9670 if (offset > 127)
9671 offset_base = ((offset + 4) & ~60) - 4;
9672 else
9673 offset_base = offset & ~60;
9674 /* Sometimes the normal form does not suit DImode. We could avoid
9675 that by using smaller ranges, but that would give less optimized
9676 code when SImode is prevalent. */
9677 if (offset_base != 0
9678 && GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9680 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9681 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9682 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9683 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9684 goto win;
9687 /* We must re-recognize what we created before. */
9688 else if (GET_CODE (*p) == PLUS
9689 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9690 && GET_CODE (XEXP (*p, 0)) == PLUS
9691 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9692 && BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0))
9693 && CONST_INT_P (XEXP (*p, 1))
9694 && ! TARGET_SHMEDIA
9695 && ! (TARGET_SH2E && mode == SFmode))
9697 /* Because this address is so complex, we know it must have
9698 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9699 it is already unshared, and needs no further unsharing. */
9700 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9701 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9702 goto win;
9705 return false;
9707 win:
9708 return true;
9711 /* Mark the use of a constant in the literal table. If the constant
9712 has multiple labels, make it unique. */
9713 static rtx
9714 mark_constant_pool_use (rtx x)
9716 rtx insn, lab, pattern;
9718 if (x == NULL)
9719 return x;
9721 switch (GET_CODE (x))
9723 case LABEL_REF:
9724 x = XEXP (x, 0);
9725 case CODE_LABEL:
9726 break;
9727 default:
9728 return x;
9731 /* Get the first label in the list of labels for the same constant
9732 and delete another labels in the list. */
9733 lab = x;
9734 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9736 if (!LABEL_P (insn)
9737 || LABEL_REFS (insn) != NEXT_INSN (insn))
9738 break;
9739 lab = insn;
9742 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9743 INSN_DELETED_P (insn) = 1;
9745 /* Mark constants in a window. */
9746 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9748 if (!NONJUMP_INSN_P (insn))
9749 continue;
9751 pattern = PATTERN (insn);
9752 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9753 continue;
9755 switch (XINT (pattern, 1))
9757 case UNSPECV_CONST2:
9758 case UNSPECV_CONST4:
9759 case UNSPECV_CONST8:
9760 XVECEXP (pattern, 0, 1) = const1_rtx;
9761 break;
9762 case UNSPECV_WINDOW_END:
9763 if (XVECEXP (pattern, 0, 0) == x)
9764 return lab;
9765 break;
9766 case UNSPECV_CONST_END:
9767 return lab;
9768 default:
9769 break;
9773 return lab;
9776 /* Return true if it's possible to redirect BRANCH1 to the destination
9777 of an unconditional jump BRANCH2. We only want to do this if the
9778 resulting branch will have a short displacement. */
9780 sh_can_redirect_branch (rtx branch1, rtx branch2)
9782 if (flag_expensive_optimizations && simplejump_p (branch2))
9784 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9785 rtx insn;
9786 int distance;
9788 for (distance = 0, insn = NEXT_INSN (branch1);
9789 insn && distance < 256;
9790 insn = PREV_INSN (insn))
9792 if (insn == dest)
9793 return 1;
9794 else
9795 distance += get_attr_length (insn);
9797 for (distance = 0, insn = NEXT_INSN (branch1);
9798 insn && distance < 256;
9799 insn = NEXT_INSN (insn))
9801 if (insn == dest)
9802 return 1;
9803 else
9804 distance += get_attr_length (insn);
9807 return 0;
9810 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9812 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9813 unsigned int new_reg)
9815 /* Interrupt functions can only use registers that have already been
9816 saved by the prologue, even if they would normally be
9817 call-clobbered. */
9819 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9820 return 0;
9822 return 1;
9825 /* Function to update the integer COST
9826 based on the relationship between INSN that is dependent on
9827 DEP_INSN through the dependence LINK. The default is to make no
9828 adjustment to COST. This can be used for example to specify to
9829 the scheduler that an output- or anti-dependence does not incur
9830 the same cost as a data-dependence. The return value should be
9831 the new value for COST. */
9832 static int
9833 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9835 rtx reg, use_pat;
9837 if (TARGET_SHMEDIA)
9839 /* On SHmedia, if the dependence is an anti-dependence or
9840 output-dependence, there is no cost. */
9841 if (REG_NOTE_KIND (link) != 0)
9843 /* However, dependencies between target register loads and
9844 uses of the register in a subsequent block that are separated
9845 by a conditional branch are not modelled - we have to do with
9846 the anti-dependency between the target register load and the
9847 conditional branch that ends the current block. */
9848 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9849 && GET_CODE (PATTERN (dep_insn)) == SET
9850 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9851 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9852 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9854 int orig_cost = cost;
9855 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9856 rtx target = ((! note
9857 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9858 ? insn : JUMP_LABEL (insn));
9859 /* On the likely path, the branch costs 1, on the unlikely path,
9860 it costs 3. */
9861 cost--;
9863 target = next_active_insn (target);
9864 while (target && ! flow_dependent_p (target, dep_insn)
9865 && --cost > 0);
9866 /* If two branches are executed in immediate succession, with the
9867 first branch properly predicted, this causes a stall at the
9868 second branch, hence we won't need the target for the
9869 second branch for two cycles after the launch of the first
9870 branch. */
9871 if (cost > orig_cost - 2)
9872 cost = orig_cost - 2;
9874 else
9875 cost = 0;
9878 else if (get_attr_is_mac_media (insn)
9879 && get_attr_is_mac_media (dep_insn))
9880 cost = 1;
9882 else if (! reload_completed
9883 && GET_CODE (PATTERN (insn)) == SET
9884 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9885 && GET_CODE (PATTERN (dep_insn)) == SET
9886 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9887 && cost < 4)
9888 cost = 4;
9889 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9890 that is needed at the target. */
9891 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9892 && ! flow_dependent_p (insn, dep_insn))
9893 cost--;
9895 else if (REG_NOTE_KIND (link) == 0)
9897 enum attr_type type;
9898 rtx dep_set;
9900 if (recog_memoized (insn) < 0
9901 || recog_memoized (dep_insn) < 0)
9902 return cost;
9904 dep_set = single_set (dep_insn);
9906 /* The latency that we specify in the scheduling description refers
9907 to the actual output, not to an auto-increment register; for that,
9908 the latency is one. */
9909 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9911 rtx set = single_set (insn);
9913 if (set
9914 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9915 && (!MEM_P (SET_DEST (set))
9916 || !reg_mentioned_p (SET_DEST (dep_set),
9917 XEXP (SET_DEST (set), 0))))
9918 cost = 1;
9920 /* The only input for a call that is timing-critical is the
9921 function's address. */
9922 if (CALL_P (insn))
9924 rtx call = PATTERN (insn);
9926 if (GET_CODE (call) == PARALLEL)
9927 call = XVECEXP (call, 0 ,0);
9928 if (GET_CODE (call) == SET)
9929 call = SET_SRC (call);
9930 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9931 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9932 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9933 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9934 cost -= TARGET_SH4_300 ? 3 : 6;
9936 /* Likewise, the most timing critical input for an sfuncs call
9937 is the function address. However, sfuncs typically start
9938 using their arguments pretty quickly.
9939 Assume a four cycle delay for SH4 before they are needed.
9940 Cached ST40-300 calls are quicker, so assume only a one
9941 cycle delay there.
9942 ??? Maybe we should encode the delays till input registers
9943 are needed by sfuncs into the sfunc call insn. */
9944 /* All sfunc calls are parallels with at least four components.
9945 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9946 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9947 && XVECLEN (PATTERN (insn), 0) >= 4
9948 && (reg = sfunc_uses_reg (insn)))
9950 if (! reg_set_p (reg, dep_insn))
9951 cost -= TARGET_SH4_300 ? 1 : 4;
9953 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9955 enum attr_type dep_type = get_attr_type (dep_insn);
9957 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9958 cost--;
9959 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9960 && (type = get_attr_type (insn)) != TYPE_CALL
9961 && type != TYPE_SFUNC)
9962 cost--;
9963 /* When the preceding instruction loads the shift amount of
9964 the following SHAD/SHLD, the latency of the load is increased
9965 by 1 cycle. */
9966 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9967 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9968 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9969 XEXP (SET_SRC (single_set (insn)),
9970 1)))
9971 cost++;
9972 /* When an LS group instruction with a latency of less than
9973 3 cycles is followed by a double-precision floating-point
9974 instruction, FIPR, or FTRV, the latency of the first
9975 instruction is increased to 3 cycles. */
9976 else if (cost < 3
9977 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9978 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9979 cost = 3;
9980 /* The lsw register of a double-precision computation is ready one
9981 cycle earlier. */
9982 else if (reload_completed
9983 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9984 && (use_pat = single_set (insn))
9985 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9986 SET_SRC (use_pat)))
9987 cost -= 1;
9989 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9990 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9991 cost -= 1;
9993 else if (TARGET_SH4_300)
9995 /* Stores need their input register two cycles later. */
9996 if (dep_set && cost >= 1
9997 && ((type = get_attr_type (insn)) == TYPE_STORE
9998 || type == TYPE_PSTORE
9999 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10001 rtx set = single_set (insn);
10003 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10004 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10006 cost -= 2;
10007 /* But don't reduce the cost below 1 if the address depends
10008 on a side effect of dep_insn. */
10009 if (cost < 1
10010 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10011 cost = 1;
10016 /* An anti-dependence penalty of two applies if the first insn is a double
10017 precision fadd / fsub / fmul. */
10018 else if (!TARGET_SH4_300
10019 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10020 && recog_memoized (dep_insn) >= 0
10021 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10022 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10023 /* A lot of alleged anti-flow dependences are fake,
10024 so check this one is real. */
10025 && flow_dependent_p (dep_insn, insn))
10026 cost = 2;
10028 return cost;
10031 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10032 if DEP_INSN is anti-flow dependent on INSN. */
10033 static int
10034 flow_dependent_p (rtx insn, rtx dep_insn)
10036 rtx tmp = PATTERN (insn);
10038 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10039 return tmp == NULL_RTX;
10042 /* A helper function for flow_dependent_p called through note_stores. */
10043 static void
10044 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10046 rtx * pinsn = (rtx *) data;
10048 if (*pinsn && reg_referenced_p (x, *pinsn))
10049 *pinsn = NULL_RTX;
10052 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10053 'special function' patterns (type sfunc) that clobber pr, but that
10054 do not look like function calls to leaf_function_p. Hence we must
10055 do this extra check. */
10056 static int
10057 sh_pr_n_sets (void)
10059 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10062 /* Return where to allocate pseudo for a given hard register initial
10063 value. */
10064 static rtx
10065 sh_allocate_initial_value (rtx hard_reg)
10067 rtx x;
10069 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10071 if (current_function_is_leaf
10072 && ! sh_pr_n_sets ()
10073 && ! (TARGET_SHCOMPACT
10074 && ((crtl->args.info.call_cookie
10075 & ~ CALL_COOKIE_RET_TRAMP (1))
10076 || crtl->saves_all_registers)))
10077 x = hard_reg;
10078 else
10079 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10081 else
10082 x = NULL_RTX;
10084 return x;
10087 /* This function returns "2" to indicate dual issue for the SH4
10088 processor. To be used by the DFA pipeline description. */
10089 static int
10090 sh_issue_rate (void)
10092 if (TARGET_SUPERSCALAR)
10093 return 2;
10094 else
10095 return 1;
10098 /* Functions for ready queue reordering for sched1. */
10100 /* Get weight for mode for a set x. */
10101 static short
10102 find_set_regmode_weight (rtx x, enum machine_mode mode)
10104 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10105 return 1;
10106 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10108 if (REG_P (SET_DEST (x)))
10110 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10111 return 1;
10112 else
10113 return 0;
10115 return 1;
10117 return 0;
10120 /* Get regmode weight for insn. */
10121 static short
10122 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10124 short reg_weight = 0;
10125 rtx x;
10127 /* Increment weight for each register born here. */
10128 x = PATTERN (insn);
10129 reg_weight += find_set_regmode_weight (x, mode);
10130 if (GET_CODE (x) == PARALLEL)
10132 int j;
10133 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10135 x = XVECEXP (PATTERN (insn), 0, j);
10136 reg_weight += find_set_regmode_weight (x, mode);
10139 /* Decrement weight for each register that dies here. */
10140 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10142 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10144 rtx note = XEXP (x, 0);
10145 if (REG_P (note) && GET_MODE (note) == mode)
10146 reg_weight--;
10149 return reg_weight;
10152 /* Calculate regmode weights for all insns of a basic block. */
10153 static void
10154 find_regmode_weight (basic_block b, enum machine_mode mode)
10156 rtx insn, next_tail, head, tail;
10158 get_ebb_head_tail (b, b, &head, &tail);
10159 next_tail = NEXT_INSN (tail);
10161 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10163 /* Handle register life information. */
10164 if (!INSN_P (insn))
10165 continue;
10167 if (mode == SFmode)
10168 INSN_REGMODE_WEIGHT (insn, mode) =
10169 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10170 else if (mode == SImode)
10171 INSN_REGMODE_WEIGHT (insn, mode) =
10172 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10176 /* Comparison function for ready queue sorting. */
10177 static int
10178 rank_for_reorder (const void *x, const void *y)
10180 rtx tmp = *(const rtx *) y;
10181 rtx tmp2 = *(const rtx *) x;
10183 /* The insn in a schedule group should be issued the first. */
10184 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10185 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10187 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10188 minimizes instruction movement, thus minimizing sched's effect on
10189 register pressure. */
10190 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10193 /* Resort the array A in which only element at index N may be out of order. */
10194 static void
10195 swap_reorder (rtx *a, int n)
10197 rtx insn = a[n - 1];
10198 int i = n - 2;
10200 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10202 a[i + 1] = a[i];
10203 i -= 1;
10205 a[i + 1] = insn;
10208 #define SCHED_REORDER(READY, N_READY) \
10209 do \
10211 if ((N_READY) == 2) \
10212 swap_reorder (READY, N_READY); \
10213 else if ((N_READY) > 2) \
10214 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10216 while (0)
10218 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10219 macro. */
10220 static void
10221 ready_reorder (rtx *ready, int nready)
10223 SCHED_REORDER (ready, nready);
10226 /* Count life regions of r0 for a block. */
10227 static int
10228 find_r0_life_regions (basic_block b)
10230 rtx end, insn;
10231 rtx pset;
10232 rtx r0_reg;
10233 int live;
10234 int set;
10235 int death = 0;
10237 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10239 set = 1;
10240 live = 1;
10242 else
10244 set = 0;
10245 live = 0;
10248 insn = BB_HEAD (b);
10249 end = BB_END (b);
10250 r0_reg = gen_rtx_REG (SImode, R0_REG);
10251 while (1)
10253 if (INSN_P (insn))
10255 if (find_regno_note (insn, REG_DEAD, R0_REG))
10257 death++;
10258 live = 0;
10260 if (!live
10261 && (pset = single_set (insn))
10262 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10263 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10265 set++;
10266 live = 1;
10269 if (insn == end)
10270 break;
10271 insn = NEXT_INSN (insn);
10273 return set - death;
10276 /* Calculate regmode weights for all insns of all basic block. */
10277 static void
10278 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10279 int verbose ATTRIBUTE_UNUSED,
10280 int old_max_uid)
10282 basic_block b;
10284 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10285 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10286 r0_life_regions = 0;
10288 FOR_EACH_BB_REVERSE (b)
10290 find_regmode_weight (b, SImode);
10291 find_regmode_weight (b, SFmode);
10292 if (!reload_completed)
10293 r0_life_regions += find_r0_life_regions (b);
10296 CURR_REGMODE_PRESSURE (SImode) = 0;
10297 CURR_REGMODE_PRESSURE (SFmode) = 0;
10301 /* Cleanup. */
10302 static void
10303 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10304 int verbose ATTRIBUTE_UNUSED)
10306 if (regmode_weight[0])
10308 free (regmode_weight[0]);
10309 regmode_weight[0] = NULL;
10311 if (regmode_weight[1])
10313 free (regmode_weight[1]);
10314 regmode_weight[1] = NULL;
10318 /* The scalar modes supported differs from the default version in TImode
10319 for 32-bit SHMEDIA. */
10320 static bool
10321 sh_scalar_mode_supported_p (enum machine_mode mode)
10323 if (TARGET_SHMEDIA32 && mode == TImode)
10324 return false;
10326 return default_scalar_mode_supported_p (mode);
10329 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10330 keep count of register pressures on SImode and SFmode. */
10331 static int
10332 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10333 int sched_verbose ATTRIBUTE_UNUSED,
10334 rtx insn,
10335 int can_issue_more)
10337 if (GET_CODE (PATTERN (insn)) != USE
10338 && GET_CODE (PATTERN (insn)) != CLOBBER)
10339 cached_can_issue_more = can_issue_more - 1;
10340 else
10341 cached_can_issue_more = can_issue_more;
10343 if (reload_completed)
10344 return cached_can_issue_more;
10346 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10347 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10349 return cached_can_issue_more;
10352 static void
10353 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10354 int verbose ATTRIBUTE_UNUSED,
10355 int veclen ATTRIBUTE_UNUSED)
10357 CURR_REGMODE_PRESSURE (SImode) = 0;
10358 CURR_REGMODE_PRESSURE (SFmode) = 0;
10361 /* Some magic numbers. */
10362 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10363 functions that already have high pressure on r0. */
10364 #define R0_MAX_LIFE_REGIONS 2
10365 /* Register Pressure thresholds for SImode and SFmode registers. */
10366 #define SIMODE_MAX_WEIGHT 5
10367 #define SFMODE_MAX_WEIGHT 10
10369 /* Return true if the pressure is high for MODE. */
10370 static short
10371 high_pressure (enum machine_mode mode)
10373 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10374 functions that already have high pressure on r0. */
10375 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10376 return 1;
10378 if (mode == SFmode)
10379 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10380 else
10381 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10384 /* Reorder ready queue if register pressure is high. */
10385 static int
10386 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10387 int sched_verbose ATTRIBUTE_UNUSED,
10388 rtx *ready,
10389 int *n_readyp,
10390 int clock_var ATTRIBUTE_UNUSED)
10392 if (reload_completed)
10393 return sh_issue_rate ();
10395 if (high_pressure (SFmode) || high_pressure (SImode))
10397 ready_reorder (ready, *n_readyp);
10400 return sh_issue_rate ();
10403 /* Skip cycles if the current register pressure is high. */
10404 static int
10405 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10406 int sched_verbose ATTRIBUTE_UNUSED,
10407 rtx *ready ATTRIBUTE_UNUSED,
10408 int *n_readyp ATTRIBUTE_UNUSED,
10409 int clock_var ATTRIBUTE_UNUSED)
10411 if (reload_completed)
10412 return cached_can_issue_more;
10414 if (high_pressure(SFmode) || high_pressure (SImode))
10415 skip_cycles = 1;
10417 return cached_can_issue_more;
10420 /* Skip cycles without sorting the ready queue. This will move insn from
10421 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10422 queue by sh_reorder. */
10424 /* Generally, skipping these many cycles are sufficient for all insns to move
10425 from Q -> R. */
10426 #define MAX_SKIPS 8
10428 static int
10429 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10430 int sched_verbose ATTRIBUTE_UNUSED,
10431 rtx insn ATTRIBUTE_UNUSED,
10432 int last_clock_var,
10433 int clock_var,
10434 int *sort_p)
10436 if (reload_completed)
10437 return 0;
10439 if (skip_cycles)
10441 if ((clock_var - last_clock_var) < MAX_SKIPS)
10443 *sort_p = 0;
10444 return 1;
10446 /* If this is the last cycle we are skipping, allow reordering of R. */
10447 if ((clock_var - last_clock_var) == MAX_SKIPS)
10449 *sort_p = 1;
10450 return 1;
10454 skip_cycles = 0;
10456 return 0;
10459 /* SHmedia requires registers for branches, so we can't generate new
10460 branches past reload. */
10461 static bool
10462 sh_cannot_modify_jumps_p (void)
10464 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10467 static enum reg_class
10468 sh_target_reg_class (void)
10470 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10473 static bool
10474 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10476 HARD_REG_SET dummy;
10477 #if 0
10478 rtx insn;
10479 #endif
10481 if (! shmedia_space_reserved_for_target_registers)
10482 return 0;
10483 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10484 return 0;
10485 if (calc_live_regs (&dummy) >= 6 * 8)
10486 return 1;
10487 return 0;
10490 static bool
10491 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10493 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10497 On the SH1..SH4, the trampoline looks like
10498 2 0002 D202 mov.l l2,r2
10499 1 0000 D301 mov.l l1,r3
10500 3 0004 422B jmp @r2
10501 4 0006 0009 nop
10502 5 0008 00000000 l1: .long area
10503 6 000c 00000000 l2: .long function
10505 SH5 (compact) uses r1 instead of r3 for the static chain. */
10508 /* Emit RTL insns to initialize the variable parts of a trampoline.
10509 FNADDR is an RTX for the address of the function's pure code.
10510 CXT is an RTX for the static chain value for the function. */
10512 static void
10513 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10515 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10516 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10518 if (TARGET_SHMEDIA64)
10520 rtx tramp_templ;
10521 int fixed_len;
10523 rtx movi1 = GEN_INT (0xcc000010);
10524 rtx shori1 = GEN_INT (0xc8000010);
10525 rtx src, dst;
10527 /* The following trampoline works within a +- 128 KB range for cxt:
10528 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10529 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10530 gettr tr1,r1; blink tr0,r63 */
10531 /* Address rounding makes it hard to compute the exact bounds of the
10532 offset for this trampoline, but we have a rather generous offset
10533 range, so frame_offset should do fine as an upper bound. */
10534 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10536 /* ??? could optimize this trampoline initialization
10537 by writing DImode words with two insns each. */
10538 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10539 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10540 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10541 insn = gen_rtx_AND (DImode, insn, mask);
10542 /* Or in ptb/u .,tr1 pattern */
10543 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10544 insn = force_operand (insn, NULL_RTX);
10545 insn = gen_lowpart (SImode, insn);
10546 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10547 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10548 insn = gen_rtx_AND (DImode, insn, mask);
10549 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10550 insn = gen_lowpart (SImode, insn);
10551 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10552 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10553 insn = gen_rtx_AND (DImode, insn, mask);
10554 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10555 insn = gen_lowpart (SImode, insn);
10556 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10557 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10558 insn = gen_rtx_AND (DImode, insn, mask);
10559 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10560 insn = gen_lowpart (SImode, insn);
10561 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10562 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10563 insn = gen_rtx_AND (DImode, insn, mask);
10564 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10565 insn = gen_lowpart (SImode, insn);
10566 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10567 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10568 GEN_INT (0x6bf10600));
10569 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10570 GEN_INT (0x4415fc10));
10571 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10572 GEN_INT (0x4401fff0));
10573 emit_insn (gen_ic_invalidate_line (tramp));
10574 return;
10576 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10577 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10579 tramp_templ = gen_datalabel_ref (tramp_templ);
10580 dst = tramp_mem;
10581 src = gen_const_mem (BLKmode, tramp_templ);
10582 set_mem_align (dst, 256);
10583 set_mem_align (src, 64);
10584 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10586 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10587 emit_move_insn (adjust_address (tramp_mem, Pmode,
10588 fixed_len + GET_MODE_SIZE (Pmode)),
10589 cxt);
10590 emit_insn (gen_ic_invalidate_line (tramp));
10591 return;
10593 else if (TARGET_SHMEDIA)
10595 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10596 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10597 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10598 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10599 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10600 rotated 10 right, and higher 16 bit of every 32 selected. */
10601 rtx movishori
10602 = force_reg (V2HImode, (simplify_gen_subreg
10603 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10604 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10605 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10607 fnaddr = force_reg (SImode, fnaddr);
10608 cxt = force_reg (SImode, cxt);
10609 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10610 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10611 movishori));
10612 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10613 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10614 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10615 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10616 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10617 gen_rtx_SUBREG (V2HImode, cxt, 0),
10618 movishori));
10619 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10620 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10621 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10622 if (TARGET_LITTLE_ENDIAN)
10624 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10625 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10627 else
10629 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10630 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10632 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10633 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10634 emit_insn (gen_ic_invalidate_line (tramp));
10635 return;
10637 else if (TARGET_SHCOMPACT)
10639 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10640 return;
10642 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10643 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10644 SImode));
10645 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10646 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10647 SImode));
10648 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10649 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10650 if (TARGET_HARVARD)
10652 if (!TARGET_INLINE_IC_INVALIDATE
10653 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10654 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10655 FUNCTION_ORDINARY),
10656 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10657 else
10658 emit_insn (gen_ic_invalidate_line (tramp));
10662 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10664 static rtx
10665 sh_trampoline_adjust_address (rtx tramp)
10667 if (TARGET_SHMEDIA)
10668 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10669 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10670 return tramp;
10673 /* FIXME: This is overly conservative. A SHcompact function that
10674 receives arguments ``by reference'' will have them stored in its
10675 own stack frame, so it must not pass pointers or references to
10676 these arguments to other functions by means of sibling calls. */
10677 /* If PIC, we cannot make sibling calls to global functions
10678 because the PLT requires r12 to be live. */
10679 static bool
10680 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10682 return (1
10683 && (! TARGET_SHCOMPACT
10684 || crtl->args.info.stack_regs == 0)
10685 && ! sh_cfun_interrupt_handler_p ()
10686 && (! flag_pic
10687 || (decl && ! TREE_PUBLIC (decl))
10688 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10691 /* Machine specific built-in functions. */
10693 struct builtin_description
10695 const enum insn_code icode;
10696 const char *const name;
10697 int signature;
10698 tree fndecl;
10701 /* describe number and signedness of arguments; arg[0] == result
10702 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10703 /* 9: 64-bit pointer, 10: 32-bit pointer */
10704 static const char signature_args[][4] =
10706 #define SH_BLTIN_V2SI2 0
10707 { 4, 4 },
10708 #define SH_BLTIN_V4HI2 1
10709 { 4, 4 },
10710 #define SH_BLTIN_V2SI3 2
10711 { 4, 4, 4 },
10712 #define SH_BLTIN_V4HI3 3
10713 { 4, 4, 4 },
10714 #define SH_BLTIN_V8QI3 4
10715 { 4, 4, 4 },
10716 #define SH_BLTIN_MAC_HISI 5
10717 { 1, 4, 4, 1 },
10718 #define SH_BLTIN_SH_HI 6
10719 { 4, 4, 1 },
10720 #define SH_BLTIN_SH_SI 7
10721 { 4, 4, 1 },
10722 #define SH_BLTIN_V4HI2V2SI 8
10723 { 4, 4, 4 },
10724 #define SH_BLTIN_V4HI2V8QI 9
10725 { 4, 4, 4 },
10726 #define SH_BLTIN_SISF 10
10727 { 4, 2 },
10728 #define SH_BLTIN_LDUA_L 11
10729 { 2, 10 },
10730 #define SH_BLTIN_LDUA_Q 12
10731 { 1, 10 },
10732 #define SH_BLTIN_STUA_L 13
10733 { 0, 10, 2 },
10734 #define SH_BLTIN_STUA_Q 14
10735 { 0, 10, 1 },
10736 #define SH_BLTIN_LDUA_L64 15
10737 { 2, 9 },
10738 #define SH_BLTIN_LDUA_Q64 16
10739 { 1, 9 },
10740 #define SH_BLTIN_STUA_L64 17
10741 { 0, 9, 2 },
10742 #define SH_BLTIN_STUA_Q64 18
10743 { 0, 9, 1 },
10744 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10745 #define SH_BLTIN_2 19
10746 #define SH_BLTIN_SU 19
10747 { 1, 2 },
10748 #define SH_BLTIN_3 20
10749 #define SH_BLTIN_SUS 20
10750 { 2, 2, 1 },
10751 #define SH_BLTIN_PSSV 21
10752 { 0, 8, 2, 2 },
10753 #define SH_BLTIN_XXUU 22
10754 #define SH_BLTIN_UUUU 22
10755 { 1, 1, 1, 1 },
10756 #define SH_BLTIN_PV 23
10757 { 0, 8 },
10759 /* mcmv: operands considered unsigned. */
10760 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10761 /* mperm: control value considered unsigned int. */
10762 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10763 /* mshards_q: returns signed short. */
10764 /* nsb: takes long long arg, returns unsigned char. */
10765 static struct builtin_description bdesc[] =
10767 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10768 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10769 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10770 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10771 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10772 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10773 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10774 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10775 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10776 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10777 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10778 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10779 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10780 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10781 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10782 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10783 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10784 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10785 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10786 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10787 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10788 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10789 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10790 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10791 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10792 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10793 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10794 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10795 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10796 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10797 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10798 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10799 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10800 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10801 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10802 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10803 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10804 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10805 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10806 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10807 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10808 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10809 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10810 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10811 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10812 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10813 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10814 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10815 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10816 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10817 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10818 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10819 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10820 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10821 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10822 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10823 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10824 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10825 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10826 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10827 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10828 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10829 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10830 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10831 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10832 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10833 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10834 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10835 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10836 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10837 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10838 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10839 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10840 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10841 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10842 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10843 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10844 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10845 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10846 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10847 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10848 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10849 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10850 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10853 static void
10854 sh_media_init_builtins (void)
10856 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10857 struct builtin_description *d;
10859 memset (shared, 0, sizeof shared);
10860 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10862 tree type, arg_type = 0;
10863 int signature = d->signature;
10864 int i;
10866 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10867 type = shared[signature];
10868 else
10870 int has_result = signature_args[signature][0] != 0;
10872 if ((signature_args[signature][1] & 8)
10873 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10874 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10875 continue;
10876 if (! TARGET_FPU_ANY
10877 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10878 continue;
10879 type = void_list_node;
10880 for (i = 3; ; i--)
10882 int arg = signature_args[signature][i];
10883 int opno = i - 1 + has_result;
10885 if (arg & 8)
10886 arg_type = ptr_type_node;
10887 else if (arg)
10888 arg_type = (*lang_hooks.types.type_for_mode)
10889 (insn_data[d->icode].operand[opno].mode,
10890 (arg & 1));
10891 else if (i)
10892 continue;
10893 else
10894 arg_type = void_type_node;
10895 if (i == 0)
10896 break;
10897 type = tree_cons (NULL_TREE, arg_type, type);
10899 type = build_function_type (arg_type, type);
10900 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10901 shared[signature] = type;
10903 d->fndecl =
10904 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10905 NULL, NULL_TREE);
10909 /* Returns the shmedia builtin decl for CODE. */
10911 static tree
10912 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10914 if (code >= ARRAY_SIZE (bdesc))
10915 return error_mark_node;
10917 return bdesc[code].fndecl;
10920 /* Implements target hook vector_mode_supported_p. */
10921 bool
10922 sh_vector_mode_supported_p (enum machine_mode mode)
10924 if (TARGET_FPU_ANY
10925 && ((mode == V2SFmode)
10926 || (mode == V4SFmode)
10927 || (mode == V16SFmode)))
10928 return true;
10930 else if (TARGET_SHMEDIA
10931 && ((mode == V8QImode)
10932 || (mode == V2HImode)
10933 || (mode == V4HImode)
10934 || (mode == V2SImode)))
10935 return true;
10937 return false;
10940 /* Implements target hook dwarf_calling_convention. Return an enum
10941 of dwarf_calling_convention. */
10943 sh_dwarf_calling_convention (const_tree func)
10945 if (sh_attr_renesas_p (func))
10946 return DW_CC_GNU_renesas_sh;
10948 return DW_CC_normal;
10951 static void
10952 sh_init_builtins (void)
10954 if (TARGET_SHMEDIA)
10955 sh_media_init_builtins ();
10958 /* Returns the sh builtin decl for CODE. */
10960 static tree
10961 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10963 if (TARGET_SHMEDIA)
10964 return sh_media_builtin_decl (code, initialize_p);
10966 return error_mark_node;
10969 /* Expand an expression EXP that calls a built-in function,
10970 with result going to TARGET if that's convenient
10971 (and in mode MODE if that's convenient).
10972 SUBTARGET may be used as the target for computing one of EXP's operands.
10973 IGNORE is nonzero if the value is to be ignored. */
10975 static rtx
10976 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10977 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10979 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10980 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10981 const struct builtin_description *d = &bdesc[fcode];
10982 enum insn_code icode = d->icode;
10983 int signature = d->signature;
10984 enum machine_mode tmode = VOIDmode;
10985 int nop = 0, i;
10986 rtx op[4];
10987 rtx pat = 0;
10989 if (signature_args[signature][0])
10991 if (ignore)
10992 return 0;
10994 tmode = insn_data[icode].operand[0].mode;
10995 if (! target
10996 || GET_MODE (target) != tmode
10997 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10998 target = gen_reg_rtx (tmode);
10999 op[nop++] = target;
11001 else
11002 target = 0;
11004 for (i = 1; i <= 3; i++, nop++)
11006 tree arg;
11007 enum machine_mode opmode, argmode;
11008 tree optype;
11010 if (! signature_args[signature][i])
11011 break;
11012 arg = CALL_EXPR_ARG (exp, i - 1);
11013 if (arg == error_mark_node)
11014 return const0_rtx;
11015 if (signature_args[signature][i] & 8)
11017 opmode = ptr_mode;
11018 optype = ptr_type_node;
11020 else
11022 opmode = insn_data[icode].operand[nop].mode;
11023 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11025 argmode = TYPE_MODE (TREE_TYPE (arg));
11026 if (argmode != opmode)
11027 arg = build1 (NOP_EXPR, optype, arg);
11028 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11029 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11030 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11033 switch (nop)
11035 case 1:
11036 pat = (*insn_data[d->icode].genfun) (op[0]);
11037 break;
11038 case 2:
11039 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11040 break;
11041 case 3:
11042 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11043 break;
11044 case 4:
11045 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11046 break;
11047 default:
11048 gcc_unreachable ();
11050 if (! pat)
11051 return 0;
11052 emit_insn (pat);
11053 return target;
11056 void
11057 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11059 rtx sel0 = const0_rtx;
11060 rtx sel1 = const1_rtx;
11061 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11062 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11064 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11065 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11068 void
11069 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11071 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11073 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11074 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11077 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11078 We can allow any mode in any general register. The special registers
11079 only allow SImode. Don't allow any mode in the PR.
11081 We cannot hold DCmode values in the XD registers because alter_reg
11082 handles subregs of them incorrectly. We could work around this by
11083 spacing the XD registers like the DR registers, but this would require
11084 additional memory in every compilation to hold larger register vectors.
11085 We could hold SFmode / SCmode values in XD registers, but that
11086 would require a tertiary reload when reloading from / to memory,
11087 and a secondary reload to reload from / to general regs; that
11088 seems to be a loosing proposition.
11090 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11091 it won't be ferried through GP registers first. */
11093 bool
11094 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11096 if (SPECIAL_REGISTER_P (regno))
11097 return mode == SImode;
11099 if (regno == FPUL_REG)
11100 return (mode == SImode || mode == SFmode);
11102 if (FP_REGISTER_P (regno) && mode == SFmode)
11103 return true;
11105 if (mode == V2SFmode)
11107 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11108 || GENERAL_REGISTER_P (regno)))
11109 return true;
11110 else
11111 return false;
11114 if (mode == V4SFmode)
11116 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11117 || GENERAL_REGISTER_P (regno))
11118 return true;
11119 else
11120 return false;
11123 if (mode == V16SFmode)
11125 if (TARGET_SHMEDIA)
11127 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11128 return true;
11129 else
11130 return false;
11132 else
11133 return regno == FIRST_XD_REG;
11136 if (FP_REGISTER_P (regno))
11138 if (mode == SFmode
11139 || mode == SImode
11140 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11141 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11142 || mode == DCmode
11143 || (TARGET_SHMEDIA
11144 && (mode == DFmode || mode == DImode
11145 || mode == V2SFmode || mode == TImode)))
11146 && ((regno - FIRST_FP_REG) & 1) == 0)
11147 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11148 && ((regno - FIRST_FP_REG) & 3) == 0))
11149 return true;
11150 else
11151 return false;
11154 if (XD_REGISTER_P (regno))
11155 return mode == DFmode;
11157 if (TARGET_REGISTER_P (regno))
11158 return (mode == DImode || mode == SImode || mode == PDImode);
11160 if (regno == PR_REG)
11161 return mode == SImode;
11163 if (regno == FPSCR_REG)
11164 return mode == PSImode;
11166 /* FIXME. This works around PR target/37633 for -O0. */
11167 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11169 unsigned int n = GET_MODE_SIZE (mode) / 8;
11171 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11172 && regno <= FIRST_GENERAL_REG + 14)
11173 return false;
11176 return true;
11179 /* Return the class of registers for which a mode change from FROM to TO
11180 is invalid. */
11181 bool
11182 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11183 enum reg_class rclass)
11185 /* We want to enable the use of SUBREGs as a means to
11186 VEC_SELECT a single element of a vector. */
11187 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11188 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11190 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11192 if (TARGET_LITTLE_ENDIAN)
11194 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11195 return reg_classes_intersect_p (DF_REGS, rclass);
11197 else
11199 if (GET_MODE_SIZE (from) < 8)
11200 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11203 return 0;
11207 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11208 that label is used. */
11210 void
11211 sh_mark_label (rtx address, int nuses)
11213 if (GOTOFF_P (address))
11215 /* Extract the label or symbol. */
11216 address = XEXP (address, 0);
11217 if (GET_CODE (address) == PLUS)
11218 address = XEXP (address, 0);
11219 address = XVECEXP (address, 0, 0);
11221 if (GET_CODE (address) == LABEL_REF
11222 && LABEL_P (XEXP (address, 0)))
11223 LABEL_NUSES (XEXP (address, 0)) += nuses;
11226 /* Compute extra cost of moving data between one register class
11227 and another. */
11229 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11230 uses this information. Hence, the general register <-> floating point
11231 register information here is not used for SFmode. */
11234 sh_register_move_cost (enum machine_mode mode,
11235 enum reg_class srcclass, enum reg_class dstclass)
11237 if (dstclass == T_REGS || dstclass == PR_REGS)
11238 return 10;
11240 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11241 return 4;
11243 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11244 && REGCLASS_HAS_FP_REG (srcclass)
11245 && REGCLASS_HAS_FP_REG (dstclass))
11246 return 4;
11248 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11249 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11251 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11252 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11253 return 9;
11255 if ((REGCLASS_HAS_FP_REG (dstclass)
11256 && REGCLASS_HAS_GENERAL_REG (srcclass))
11257 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11258 && REGCLASS_HAS_FP_REG (srcclass)))
11259 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11260 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11262 if ((dstclass == FPUL_REGS
11263 && REGCLASS_HAS_GENERAL_REG (srcclass))
11264 || (srcclass == FPUL_REGS
11265 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11266 return 5;
11268 if ((dstclass == FPUL_REGS
11269 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11270 || (srcclass == FPUL_REGS
11271 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11272 return 7;
11274 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11275 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11276 return 20;
11278 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11279 if (TARGET_SHMEDIA
11280 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11282 if (sh_gettrcost >= 0)
11283 return sh_gettrcost;
11284 else if (!TARGET_PT_FIXED)
11285 return 100;
11288 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11289 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11290 return 4;
11292 if (TARGET_SHMEDIA
11293 || (TARGET_FMOVD
11294 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11295 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11296 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11298 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11301 static rtx emit_load_ptr (rtx, rtx);
11303 static rtx
11304 emit_load_ptr (rtx reg, rtx addr)
11306 rtx mem = gen_const_mem (ptr_mode, addr);
11308 if (Pmode != ptr_mode)
11309 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11310 return emit_move_insn (reg, mem);
11313 static void
11314 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11315 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11316 tree function)
11318 CUMULATIVE_ARGS cum;
11319 int structure_value_byref = 0;
11320 rtx this_rtx, this_value, sibcall, insns, funexp;
11321 tree funtype = TREE_TYPE (function);
11322 int simple_add = CONST_OK_FOR_ADD (delta);
11323 int did_load = 0;
11324 rtx scratch0, scratch1, scratch2;
11325 unsigned i;
11327 reload_completed = 1;
11328 epilogue_completed = 1;
11329 current_function_uses_only_leaf_regs = 1;
11331 emit_note (NOTE_INSN_PROLOGUE_END);
11333 /* Find the "this" pointer. We have such a wide range of ABIs for the
11334 SH that it's best to do this completely machine independently.
11335 "this" is passed as first argument, unless a structure return pointer
11336 comes first, in which case "this" comes second. */
11337 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11338 #ifndef PCC_STATIC_STRUCT_RETURN
11339 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11340 structure_value_byref = 1;
11341 #endif /* not PCC_STATIC_STRUCT_RETURN */
11342 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11344 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11346 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11348 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11350 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11351 static chain pointer (even if you can't have nested virtual functions
11352 right now, someone might implement them sometime), and the rest of the
11353 registers are used for argument passing, are callee-saved, or reserved. */
11354 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11355 -ffixed-reg has been used. */
11356 if (! call_used_regs[0] || fixed_regs[0])
11357 error ("r0 needs to be available as a call-clobbered register");
11358 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11359 if (! TARGET_SH5)
11361 if (call_used_regs[1] && ! fixed_regs[1])
11362 scratch1 = gen_rtx_REG (ptr_mode, 1);
11363 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11364 pointing where to return struct values. */
11365 if (call_used_regs[3] && ! fixed_regs[3])
11366 scratch2 = gen_rtx_REG (Pmode, 3);
11368 else if (TARGET_SHMEDIA)
11370 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11371 if (i != REGNO (scratch0) &&
11372 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11374 scratch1 = gen_rtx_REG (ptr_mode, i);
11375 break;
11377 if (scratch1 == scratch0)
11378 error ("Need a second call-clobbered general purpose register");
11379 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11380 if (call_used_regs[i] && ! fixed_regs[i])
11382 scratch2 = gen_rtx_REG (Pmode, i);
11383 break;
11385 if (scratch2 == scratch0)
11386 error ("Need a call-clobbered target register");
11389 this_value = plus_constant (this_rtx, delta);
11390 if (vcall_offset
11391 && (simple_add || scratch0 != scratch1)
11392 && strict_memory_address_p (ptr_mode, this_value))
11394 emit_load_ptr (scratch0, this_value);
11395 did_load = 1;
11398 if (!delta)
11399 ; /* Do nothing. */
11400 else if (simple_add)
11401 emit_move_insn (this_rtx, this_value);
11402 else
11404 emit_move_insn (scratch1, GEN_INT (delta));
11405 emit_insn (gen_add2_insn (this_rtx, scratch1));
11408 if (vcall_offset)
11410 rtx offset_addr;
11412 if (!did_load)
11413 emit_load_ptr (scratch0, this_rtx);
11415 offset_addr = plus_constant (scratch0, vcall_offset);
11416 if (strict_memory_address_p (ptr_mode, offset_addr))
11417 ; /* Do nothing. */
11418 else if (! TARGET_SH5 && scratch0 != scratch1)
11420 /* scratch0 != scratch1, and we have indexed loads. Get better
11421 schedule by loading the offset into r1 and using an indexed
11422 load - then the load of r1 can issue before the load from
11423 (this_rtx + delta) finishes. */
11424 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11425 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11427 else if (CONST_OK_FOR_ADD (vcall_offset))
11429 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11430 offset_addr = scratch0;
11432 else if (scratch0 != scratch1)
11434 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11435 emit_insn (gen_add2_insn (scratch0, scratch1));
11436 offset_addr = scratch0;
11438 else
11439 gcc_unreachable (); /* FIXME */
11440 emit_load_ptr (scratch0, offset_addr);
11442 if (Pmode != ptr_mode)
11443 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11444 emit_insn (gen_add2_insn (this_rtx, scratch0));
11447 /* Generate a tail call to the target function. */
11448 if (! TREE_USED (function))
11450 assemble_external (function);
11451 TREE_USED (function) = 1;
11453 funexp = XEXP (DECL_RTL (function), 0);
11454 /* If the function is overridden, so is the thunk, hence we don't
11455 need GOT addressing even if this is a public symbol. */
11456 #if 0
11457 if (TARGET_SH1 && ! flag_weak)
11458 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11459 else
11460 #endif
11461 if (TARGET_SH2 && flag_pic)
11463 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11464 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11466 else
11468 if (TARGET_SHMEDIA && flag_pic)
11470 funexp = gen_sym2PIC (funexp);
11471 PUT_MODE (funexp, Pmode);
11473 emit_move_insn (scratch2, funexp);
11474 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11475 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11477 sibcall = emit_call_insn (sibcall);
11478 SIBLING_CALL_P (sibcall) = 1;
11479 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11480 emit_barrier ();
11482 /* Run just enough of rest_of_compilation to do scheduling and get
11483 the insns emitted. Note that use_thunk calls
11484 assemble_start_function and assemble_end_function. */
11486 insn_locators_alloc ();
11487 insns = get_insns ();
11489 if (optimize > 0)
11491 if (! cfun->cfg)
11492 init_flow (cfun);
11493 split_all_insns_noflow ();
11496 sh_reorg ();
11498 if (optimize > 0 && flag_delayed_branch)
11499 dbr_schedule (insns);
11501 shorten_branches (insns);
11502 final_start_function (insns, file, 1);
11503 final (insns, file, 1);
11504 final_end_function ();
11506 reload_completed = 0;
11507 epilogue_completed = 0;
11511 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11513 rtx sym;
11515 /* If this is not an ordinary function, the name usually comes from a
11516 string literal or an sprintf buffer. Make sure we use the same
11517 string consistently, so that cse will be able to unify address loads. */
11518 if (kind != FUNCTION_ORDINARY)
11519 name = IDENTIFIER_POINTER (get_identifier (name));
11520 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11521 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11522 if (flag_pic)
11523 switch (kind)
11525 case FUNCTION_ORDINARY:
11526 break;
11527 case SFUNC_GOT:
11529 rtx reg = target ? target : gen_reg_rtx (Pmode);
11531 emit_insn (gen_symGOT2reg (reg, sym));
11532 sym = reg;
11533 break;
11535 case SFUNC_STATIC:
11537 /* ??? To allow cse to work, we use GOTOFF relocations.
11538 we could add combiner patterns to transform this into
11539 straight pc-relative calls with sym2PIC / bsrf when
11540 label load and function call are still 1:1 and in the
11541 same basic block during combine. */
11542 rtx reg = target ? target : gen_reg_rtx (Pmode);
11544 emit_insn (gen_symGOTOFF2reg (reg, sym));
11545 sym = reg;
11546 break;
11549 if (target && sym != target)
11551 emit_move_insn (target, sym);
11552 return target;
11554 return sym;
11557 /* Find the number of a general purpose register in S. */
11558 static int
11559 scavenge_reg (HARD_REG_SET *s)
11561 int r;
11562 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11563 if (TEST_HARD_REG_BIT (*s, r))
11564 return r;
11565 return -1;
11569 sh_get_pr_initial_val (void)
11571 rtx val;
11573 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11574 PR register on SHcompact, because it might be clobbered by the prologue.
11575 We check first if that is known to be the case. */
11576 if (TARGET_SHCOMPACT
11577 && ((crtl->args.info.call_cookie
11578 & ~ CALL_COOKIE_RET_TRAMP (1))
11579 || crtl->saves_all_registers))
11580 return gen_frame_mem (SImode, return_address_pointer_rtx);
11582 /* If we haven't finished rtl generation, there might be a nonlocal label
11583 that we haven't seen yet.
11584 ??? get_hard_reg_initial_val fails if it is called after register
11585 allocation has started, unless it has been called before for the
11586 same register. And even then, we end in trouble if we didn't use
11587 the register in the same basic block before. So call
11588 get_hard_reg_initial_val now and wrap it in an unspec if we might
11589 need to replace it. */
11590 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11591 combine can put the pseudo returned by get_hard_reg_initial_val into
11592 instructions that need a general purpose registers, which will fail to
11593 be recognized when the pseudo becomes allocated to PR. */
11595 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11596 if (TARGET_SH1)
11597 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11598 return val;
11602 sh_expand_t_scc (rtx operands[])
11604 enum rtx_code code = GET_CODE (operands[1]);
11605 rtx target = operands[0];
11606 rtx op0 = operands[2];
11607 rtx op1 = operands[3];
11608 rtx result = target;
11609 HOST_WIDE_INT val;
11611 if (!REG_P (op0) || REGNO (op0) != T_REG
11612 || !CONST_INT_P (op1))
11613 return 0;
11614 if (!REG_P (result))
11615 result = gen_reg_rtx (SImode);
11616 val = INTVAL (op1);
11617 if ((code == EQ && val == 1) || (code == NE && val == 0))
11618 emit_insn (gen_movt (result));
11619 else if (TARGET_SH2A && ((code == EQ && val == 0)
11620 || (code == NE && val == 1)))
11621 emit_insn (gen_xorsi3_movrt (result));
11622 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11624 emit_clobber (result);
11625 emit_insn (gen_subc (result, result, result));
11626 emit_insn (gen_addsi3 (result, result, const1_rtx));
11628 else if (code == EQ || code == NE)
11629 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11630 else
11631 return 0;
11632 if (result != target)
11633 emit_move_insn (target, result);
11634 return 1;
11637 /* INSN is an sfunc; return the rtx that describes the address used. */
11638 static rtx
11639 extract_sfunc_addr (rtx insn)
11641 rtx pattern, part = NULL_RTX;
11642 int len, i;
11644 pattern = PATTERN (insn);
11645 len = XVECLEN (pattern, 0);
11646 for (i = 0; i < len; i++)
11648 part = XVECEXP (pattern, 0, i);
11649 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11650 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11651 return XEXP (part, 0);
11653 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11654 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11657 /* Verify that the register in use_sfunc_addr still agrees with the address
11658 used in the sfunc. This prevents fill_slots_from_thread from changing
11659 use_sfunc_addr.
11660 INSN is the use_sfunc_addr instruction, and REG is the register it
11661 guards. */
11663 check_use_sfunc_addr (rtx insn, rtx reg)
11665 /* Search for the sfunc. It should really come right after INSN. */
11666 while ((insn = NEXT_INSN (insn)))
11668 if (LABEL_P (insn) || JUMP_P (insn))
11669 break;
11670 if (! INSN_P (insn))
11671 continue;
11673 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11674 insn = XVECEXP (PATTERN (insn), 0, 0);
11675 if (GET_CODE (PATTERN (insn)) != PARALLEL
11676 || get_attr_type (insn) != TYPE_SFUNC)
11677 continue;
11678 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11680 gcc_unreachable ();
11683 /* This function returns a constant rtx that represents pi / 2**15 in
11684 SFmode. it's used to scale SFmode angles, in radians, to a
11685 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11686 maps to 0x10000). */
11688 static GTY(()) rtx sh_fsca_sf2int_rtx;
11691 sh_fsca_sf2int (void)
11693 if (! sh_fsca_sf2int_rtx)
11695 REAL_VALUE_TYPE rv;
11697 real_from_string (&rv, "10430.378350470453");
11698 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11701 return sh_fsca_sf2int_rtx;
11704 /* This function returns a constant rtx that represents pi / 2**15 in
11705 DFmode. it's used to scale DFmode angles, in radians, to a
11706 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11707 maps to 0x10000). */
11709 static GTY(()) rtx sh_fsca_df2int_rtx;
11712 sh_fsca_df2int (void)
11714 if (! sh_fsca_df2int_rtx)
11716 REAL_VALUE_TYPE rv;
11718 real_from_string (&rv, "10430.378350470453");
11719 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11722 return sh_fsca_df2int_rtx;
11725 /* This function returns a constant rtx that represents 2**15 / pi in
11726 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11727 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11728 2*pi). */
11730 static GTY(()) rtx sh_fsca_int2sf_rtx;
11733 sh_fsca_int2sf (void)
11735 if (! sh_fsca_int2sf_rtx)
11737 REAL_VALUE_TYPE rv;
11739 real_from_string (&rv, "9.587379924285257e-5");
11740 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11743 return sh_fsca_int2sf_rtx;
11746 /* Initialize the CUMULATIVE_ARGS structure. */
11748 void
11749 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11750 tree fntype,
11751 rtx libname ATTRIBUTE_UNUSED,
11752 tree fndecl,
11753 signed int n_named_args,
11754 enum machine_mode mode)
11756 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11757 pcum->free_single_fp_reg = 0;
11758 pcum->stack_regs = 0;
11759 pcum->byref_regs = 0;
11760 pcum->byref = 0;
11761 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11763 /* XXX - Should we check TARGET_HITACHI here ??? */
11764 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11766 if (fntype)
11768 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11769 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11770 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11771 pcum->arg_count [(int) SH_ARG_INT]
11772 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11774 pcum->call_cookie
11775 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11776 && pcum->arg_count [(int) SH_ARG_INT] == 0
11777 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11778 ? int_size_in_bytes (TREE_TYPE (fntype))
11779 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11780 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11781 == FIRST_RET_REG));
11783 else
11785 pcum->arg_count [(int) SH_ARG_INT] = 0;
11786 pcum->prototype_p = FALSE;
11787 if (mode != VOIDmode)
11789 pcum->call_cookie =
11790 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11791 && GET_MODE_SIZE (mode) > 4
11792 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11794 /* If the default ABI is the Renesas ABI then all library
11795 calls must assume that the library will be using the
11796 Renesas ABI. So if the function would return its result
11797 in memory then we must force the address of this memory
11798 block onto the stack. Ideally we would like to call
11799 targetm.calls.return_in_memory() here but we do not have
11800 the TYPE or the FNDECL available so we synthesize the
11801 contents of that function as best we can. */
11802 pcum->force_mem =
11803 (TARGET_DEFAULT & MASK_HITACHI)
11804 && (mode == BLKmode
11805 || (GET_MODE_SIZE (mode) > 4
11806 && !(mode == DFmode
11807 && TARGET_FPU_DOUBLE)));
11809 else
11811 pcum->call_cookie = 0;
11812 pcum->force_mem = FALSE;
11817 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11818 not enter into CONST_DOUBLE for the replace.
11820 Note that copying is not done so X must not be shared unless all copies
11821 are to be modified.
11823 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11824 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11825 replacements[n*2+1] - and that we take mode changes into account.
11827 If a replacement is ambiguous, return NULL_RTX.
11829 If MODIFY is zero, don't modify any rtl in place,
11830 just return zero or nonzero for failure / success. */
11833 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11835 int i, j;
11836 const char *fmt;
11838 /* The following prevents loops occurrence when we change MEM in
11839 CONST_DOUBLE onto the same CONST_DOUBLE. */
11840 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11841 return x;
11843 for (i = n_replacements - 1; i >= 0 ; i--)
11844 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11845 return replacements[i*2+1];
11847 /* Allow this function to make replacements in EXPR_LISTs. */
11848 if (x == 0)
11849 return 0;
11851 if (GET_CODE (x) == SUBREG)
11853 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11854 n_replacements, modify);
11856 if (CONST_INT_P (new_rtx))
11858 x = simplify_subreg (GET_MODE (x), new_rtx,
11859 GET_MODE (SUBREG_REG (x)),
11860 SUBREG_BYTE (x));
11861 if (! x)
11862 abort ();
11864 else if (modify)
11865 SUBREG_REG (x) = new_rtx;
11867 return x;
11869 else if (REG_P (x))
11871 unsigned regno = REGNO (x);
11872 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11873 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11874 rtx result = NULL_RTX;
11876 for (i = n_replacements - 1; i >= 0; i--)
11878 rtx from = replacements[i*2];
11879 rtx to = replacements[i*2+1];
11880 unsigned from_regno, from_nregs, to_regno, new_regno;
11882 if (!REG_P (from))
11883 continue;
11884 from_regno = REGNO (from);
11885 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11886 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11887 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11889 if (regno < from_regno
11890 || regno + nregs > from_regno + nregs
11891 || !REG_P (to)
11892 || result)
11893 return NULL_RTX;
11894 to_regno = REGNO (to);
11895 if (to_regno < FIRST_PSEUDO_REGISTER)
11897 new_regno = regno + to_regno - from_regno;
11898 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11899 != nregs)
11900 return NULL_RTX;
11901 result = gen_rtx_REG (GET_MODE (x), new_regno);
11903 else if (GET_MODE (x) <= GET_MODE (to))
11904 result = gen_lowpart_common (GET_MODE (x), to);
11905 else
11906 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11909 return result ? result : x;
11911 else if (GET_CODE (x) == ZERO_EXTEND)
11913 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11914 n_replacements, modify);
11916 if (CONST_INT_P (new_rtx))
11918 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11919 new_rtx, GET_MODE (XEXP (x, 0)));
11920 if (! x)
11921 abort ();
11923 else if (modify)
11924 XEXP (x, 0) = new_rtx;
11926 return x;
11929 fmt = GET_RTX_FORMAT (GET_CODE (x));
11930 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11932 rtx new_rtx;
11934 if (fmt[i] == 'e')
11936 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11937 n_replacements, modify);
11938 if (!new_rtx)
11939 return NULL_RTX;
11940 if (modify)
11941 XEXP (x, i) = new_rtx;
11943 else if (fmt[i] == 'E')
11944 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11946 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11947 n_replacements, modify);
11948 if (!new_rtx)
11949 return NULL_RTX;
11950 if (modify)
11951 XVECEXP (x, i, j) = new_rtx;
11955 return x;
11959 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11961 enum rtx_code code = TRUNCATE;
11963 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11965 rtx inner = XEXP (x, 0);
11966 enum machine_mode inner_mode = GET_MODE (inner);
11968 if (inner_mode == mode)
11969 return inner;
11970 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11971 x = inner;
11972 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11973 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11975 code = GET_CODE (x);
11976 x = inner;
11979 return gen_rtx_fmt_e (code, mode, x);
11982 /* called via for_each_rtx after reload, to clean up truncates of
11983 registers that span multiple actual hard registers. */
11985 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11987 rtx x = *p, reg;
11989 if (GET_CODE (x) != TRUNCATE)
11990 return 0;
11991 reg = XEXP (x, 0);
11992 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
11994 enum machine_mode reg_mode = GET_MODE (reg);
11995 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11996 subreg_lowpart_offset (DImode, reg_mode));
11997 *(int*) n_changes += 1;
11998 return -1;
12000 return 0;
12003 /* Load and store depend on the highpart of the address. However,
12004 set_attr_alternative does not give well-defined results before reload,
12005 so we must look at the rtl ourselves to see if any of the feeding
12006 registers is used in a memref. */
12008 /* Called by sh_contains_memref_p via for_each_rtx. */
12009 static int
12010 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12012 return (MEM_P (*loc));
12015 /* Return nonzero iff INSN contains a MEM. */
12017 sh_contains_memref_p (rtx insn)
12019 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12022 /* Return nonzero iff INSN loads a banked register. */
12024 sh_loads_bankedreg_p (rtx insn)
12026 if (GET_CODE (PATTERN (insn)) == SET)
12028 rtx op = SET_DEST (PATTERN(insn));
12029 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12030 return 1;
12033 return 0;
12036 /* FNADDR is the MEM expression from a call expander. Return an address
12037 to use in an SHmedia insn pattern. */
12039 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12041 int is_sym;
12043 fnaddr = XEXP (fnaddr, 0);
12044 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12045 if (flag_pic && is_sym)
12047 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12049 rtx reg = gen_reg_rtx (Pmode);
12051 /* We must not use GOTPLT for sibcalls, because PIC_REG
12052 must be restored before the PLT code gets to run. */
12053 if (is_sibcall)
12054 emit_insn (gen_symGOT2reg (reg, fnaddr));
12055 else
12056 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12057 fnaddr = reg;
12059 else
12061 fnaddr = gen_sym2PIC (fnaddr);
12062 PUT_MODE (fnaddr, Pmode);
12065 /* If ptabs might trap, make this visible to the rest of the compiler.
12066 We generally assume that symbols pertain to valid locations, but
12067 it is possible to generate invalid symbols with asm or linker tricks.
12068 In a list of functions where each returns its successor, an invalid
12069 symbol might denote an empty list. */
12070 if (!TARGET_PT_FIXED
12071 && (!is_sym || TARGET_INVALID_SYMBOLS)
12072 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12074 rtx tr = gen_reg_rtx (PDImode);
12076 emit_insn (gen_ptabs (tr, fnaddr));
12077 fnaddr = tr;
12079 else if (! target_reg_operand (fnaddr, Pmode))
12080 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12081 return fnaddr;
12084 enum reg_class
12085 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
12086 enum machine_mode mode, secondary_reload_info *sri)
12088 if (in_p)
12090 if (REGCLASS_HAS_FP_REG (rclass)
12091 && ! TARGET_SHMEDIA
12092 && immediate_operand ((x), mode)
12093 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12094 && mode == SFmode && fldi_ok ()))
12095 switch (mode)
12097 case SFmode:
12098 sri->icode = CODE_FOR_reload_insf__frn;
12099 return NO_REGS;
12100 case DFmode:
12101 sri->icode = CODE_FOR_reload_indf__frn;
12102 return NO_REGS;
12103 case SImode:
12104 /* ??? If we knew that we are in the appropriate mode -
12105 single precision - we could use a reload pattern directly. */
12106 return FPUL_REGS;
12107 default:
12108 abort ();
12110 if (rclass == FPUL_REGS
12111 && ((REG_P (x)
12112 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12113 || REGNO (x) == T_REG))
12114 || GET_CODE (x) == PLUS))
12115 return GENERAL_REGS;
12116 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12118 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12119 return GENERAL_REGS;
12120 else if (mode == SFmode)
12121 return FP_REGS;
12122 sri->icode = CODE_FOR_reload_insi__i_fpul;
12123 return NO_REGS;
12125 if (rclass == FPSCR_REGS
12126 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12127 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12128 return GENERAL_REGS;
12129 if (REGCLASS_HAS_FP_REG (rclass)
12130 && TARGET_SHMEDIA
12131 && immediate_operand (x, mode)
12132 && x != CONST0_RTX (GET_MODE (x))
12133 && GET_MODE (x) != V4SFmode)
12134 return GENERAL_REGS;
12135 if ((mode == QImode || mode == HImode)
12136 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12138 sri->icode = ((mode == QImode)
12139 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12140 return NO_REGS;
12142 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12143 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12144 return TARGET_REGS;
12145 } /* end of input-only processing. */
12147 if (((REGCLASS_HAS_FP_REG (rclass)
12148 && (REG_P (x)
12149 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12150 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12151 && TARGET_FMOVD))))
12152 || (REGCLASS_HAS_GENERAL_REG (rclass)
12153 && REG_P (x)
12154 && FP_REGISTER_P (REGNO (x))))
12155 && ! TARGET_SHMEDIA
12156 && (mode == SFmode || mode == SImode))
12157 return FPUL_REGS;
12158 if ((rclass == FPUL_REGS
12159 || (REGCLASS_HAS_FP_REG (rclass)
12160 && ! TARGET_SHMEDIA && mode == SImode))
12161 && (MEM_P (x)
12162 || (REG_P (x)
12163 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12164 || REGNO (x) == T_REG
12165 || system_reg_operand (x, VOIDmode)))))
12167 if (rclass == FPUL_REGS)
12168 return GENERAL_REGS;
12169 return FPUL_REGS;
12171 if ((rclass == TARGET_REGS
12172 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12173 && !satisfies_constraint_Csy (x)
12174 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12175 return GENERAL_REGS;
12176 if ((rclass == MAC_REGS || rclass == PR_REGS)
12177 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12178 && rclass != REGNO_REG_CLASS (REGNO (x)))
12179 return GENERAL_REGS;
12180 if (rclass != GENERAL_REGS && REG_P (x)
12181 && TARGET_REGISTER_P (REGNO (x)))
12182 return GENERAL_REGS;
12183 return NO_REGS;
12186 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12188 #include "gt-sh.h"