Merged trunk at revision 161680 into branch.
[official-gcc.git] / gcc / config / sh / sh.c
blobb8d2be17c36bc059c45b673d207e6a6a3db6da8c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "toplev.h"
41 #include "recog.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "params.h"
54 #include "ggc.h"
55 #include "gimple.h"
56 #include "cfgloop.h"
57 #include "alloc-pool.h"
58 #include "tm-constrs.h"
61 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
63 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
64 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
66 /* These are some macros to abstract register modes. */
67 #define CONST_OK_FOR_ADD(size) \
68 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
69 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
70 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
71 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
73 /* Used to simplify the logic below. Find the attributes wherever
74 they may be. */
75 #define SH_ATTRIBUTES(decl) \
76 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
77 : DECL_ATTRIBUTES (decl) \
78 ? (DECL_ATTRIBUTES (decl)) \
79 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
81 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
82 int current_function_interrupt;
84 tree sh_deferred_function_attributes;
85 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
87 /* Global variables for machine-dependent things. */
89 /* Which cpu are we scheduling for. */
90 enum processor_type sh_cpu;
92 /* Definitions used in ready queue reordering for first scheduling pass. */
94 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
95 static short *regmode_weight[2];
97 /* Total SFmode and SImode weights of scheduled insns. */
98 static int curr_regmode_pressure[2];
100 /* Number of r0 life regions. */
101 static int r0_life_regions;
103 /* If true, skip cycles for Q -> R movement. */
104 static int skip_cycles = 0;
106 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
107 and returned from sh_reorder2. */
108 static short cached_can_issue_more;
110 /* Unique number for UNSPEC_BBR pattern. */
111 static unsigned int unspec_bbr_uid = 1;
113 /* Provides the class number of the smallest class containing
114 reg number. */
116 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
118 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
155 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
156 GENERAL_REGS, GENERAL_REGS,
159 char sh_register_names[FIRST_PSEUDO_REGISTER] \
160 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
162 char sh_additional_register_names[ADDREGNAMES_SIZE] \
163 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
164 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
166 int assembler_dialect;
168 static bool shmedia_space_reserved_for_target_registers;
170 static bool sh_handle_option (size_t, const char *, int);
171 static void split_branches (rtx);
172 static int branch_dest (rtx);
173 static void force_into (rtx, rtx);
174 static void print_slot (rtx);
175 static rtx add_constant (rtx, enum machine_mode, rtx);
176 static void dump_table (rtx, rtx);
177 static int hi_const (rtx);
178 static int broken_move (rtx);
179 static int mova_p (rtx);
180 static rtx find_barrier (int, rtx, rtx);
181 static int noncall_uses_reg (rtx, rtx, rtx *);
182 static rtx gen_block_redirect (rtx, int, int);
183 static void sh_reorg (void);
184 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
185 static rtx frame_insn (rtx);
186 static rtx push (int);
187 static void pop (int);
188 static void push_regs (HARD_REG_SET *, int);
189 static int calc_live_regs (HARD_REG_SET *);
190 static HOST_WIDE_INT rounded_frame_size (int);
191 static bool sh_frame_pointer_required (void);
192 static rtx mark_constant_pool_use (rtx);
193 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_resbank_handler_attribute (tree *, tree,
195 tree, int, bool *);
196 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
197 tree, int, bool *);
198 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
201 static void sh_print_operand (FILE *, rtx, int);
202 static void sh_print_operand_address (FILE *, rtx);
203 static bool sh_print_operand_punct_valid_p (unsigned char code);
204 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
205 static void sh_insert_attributes (tree, tree *);
206 static const char *sh_check_pch_target_flags (int);
207 static int sh_adjust_cost (rtx, rtx, rtx, int);
208 static int sh_issue_rate (void);
209 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
210 static short find_set_regmode_weight (rtx, enum machine_mode);
211 static short find_insn_regmode_weight (rtx, enum machine_mode);
212 static void find_regmode_weight (basic_block, enum machine_mode);
213 static int find_r0_life_regions (basic_block);
214 static void sh_md_init_global (FILE *, int, int);
215 static void sh_md_finish_global (FILE *, int);
216 static int rank_for_reorder (const void *, const void *);
217 static void swap_reorder (rtx *, int);
218 static void ready_reorder (rtx *, int);
219 static short high_pressure (enum machine_mode);
220 static int sh_reorder (FILE *, int, rtx *, int *, int);
221 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
222 static void sh_md_init (FILE *, int, int);
223 static int sh_variable_issue (FILE *, int, rtx, int);
225 static bool sh_function_ok_for_sibcall (tree, tree);
227 static bool sh_cannot_modify_jumps_p (void);
228 static reg_class_t sh_target_reg_class (void);
229 static bool sh_optimize_target_register_callee_saved (bool);
230 static bool sh_ms_bitfield_layout_p (const_tree);
232 static void sh_init_builtins (void);
233 static tree sh_builtin_decl (unsigned, bool);
234 static void sh_media_init_builtins (void);
235 static tree sh_media_builtin_decl (unsigned, bool);
236 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
237 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
238 static void sh_file_start (void);
239 static int flow_dependent_p (rtx, rtx);
240 static void flow_dependent_p_1 (rtx, const_rtx, void *);
241 static int shiftcosts (rtx);
242 static int andcosts (rtx);
243 static int addsubcosts (rtx);
244 static int multcosts (rtx);
245 static bool unspec_caller_rtx_p (rtx);
246 static bool sh_cannot_copy_insn_p (rtx);
247 static bool sh_rtx_costs (rtx, int, int, int *, bool);
248 static int sh_address_cost (rtx, bool);
249 static int sh_pr_n_sets (void);
250 static rtx sh_allocate_initial_value (rtx);
251 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
252 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
253 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
254 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
255 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
256 static int scavenge_reg (HARD_REG_SET *s);
257 struct save_schedule_s;
258 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
259 struct save_schedule_s *, int);
261 static rtx sh_struct_value_rtx (tree, int);
262 static rtx sh_function_value (const_tree, const_tree, bool);
263 static rtx sh_libcall_value (enum machine_mode, const_rtx);
264 static bool sh_return_in_memory (const_tree, const_tree);
265 static rtx sh_builtin_saveregs (void);
266 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
267 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
268 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
269 static tree sh_build_builtin_va_list (void);
270 static void sh_va_start (tree, rtx);
271 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
272 static bool sh_promote_prototypes (const_tree);
273 static enum machine_mode sh_promote_function_mode (const_tree type,
274 enum machine_mode,
275 int *punsignedp,
276 const_tree funtype,
277 int for_return);
278 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
279 const_tree, bool);
280 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
281 const_tree, bool);
282 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
283 tree, bool);
284 static bool sh_scalar_mode_supported_p (enum machine_mode);
285 static int sh_dwarf_calling_convention (const_tree);
286 static void sh_encode_section_info (tree, rtx, int);
287 static int sh2a_function_vector_p (tree);
288 static void sh_trampoline_init (rtx, tree, rtx);
289 static rtx sh_trampoline_adjust_address (rtx);
291 static const struct attribute_spec sh_attribute_table[] =
293 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
294 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
295 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
296 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
297 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
298 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
299 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
300 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
301 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
302 #ifdef SYMBIAN
303 /* Symbian support adds three new attributes:
304 dllexport - for exporting a function/variable that will live in a dll
305 dllimport - for importing a function/variable from a dll
307 Microsoft allows multiple declspecs in one __declspec, separating
308 them with spaces. We do NOT support this. Instead, use __declspec
309 multiple times. */
310 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
311 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
312 #endif
313 { NULL, 0, 0, false, false, false, NULL }
316 /* Initialize the GCC target structure. */
317 #undef TARGET_ATTRIBUTE_TABLE
318 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
320 /* The next two are used for debug info when compiling with -gdwarf. */
321 #undef TARGET_ASM_UNALIGNED_HI_OP
322 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
323 #undef TARGET_ASM_UNALIGNED_SI_OP
324 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
326 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
327 #undef TARGET_ASM_UNALIGNED_DI_OP
328 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
329 #undef TARGET_ASM_ALIGNED_DI_OP
330 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
332 #undef TARGET_PRINT_OPERAND
333 #define TARGET_PRINT_OPERAND sh_print_operand
334 #undef TARGET_PRINT_OPERAND_ADDRESS
335 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
336 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
337 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
339 #undef TARGET_ASM_FUNCTION_EPILOGUE
340 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
342 #undef TARGET_ASM_OUTPUT_MI_THUNK
343 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
345 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
346 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
348 #undef TARGET_ASM_FILE_START
349 #define TARGET_ASM_FILE_START sh_file_start
350 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
351 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
353 #undef TARGET_DEFAULT_TARGET_FLAGS
354 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
355 #undef TARGET_HANDLE_OPTION
356 #define TARGET_HANDLE_OPTION sh_handle_option
358 #undef TARGET_INSERT_ATTRIBUTES
359 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
361 #undef TARGET_SCHED_ADJUST_COST
362 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
364 #undef TARGET_SCHED_ISSUE_RATE
365 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
367 /* The next 5 hooks have been implemented for reenabling sched1. With the
368 help of these macros we are limiting the movement of insns in sched1 to
369 reduce the register pressure. The overall idea is to keep count of SImode
370 and SFmode regs required by already scheduled insns. When these counts
371 cross some threshold values; give priority to insns that free registers.
372 The insn that frees registers is most likely to be the insn with lowest
373 LUID (original insn order); but such an insn might be there in the stalled
374 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
375 upto a max of 8 cycles so that such insns may move from Q -> R.
377 The description of the hooks are as below:
379 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
380 scheduler; it is called inside the sched_init function just after
381 find_insn_reg_weights function call. It is used to calculate the SImode
382 and SFmode weights of insns of basic blocks; much similar to what
383 find_insn_reg_weights does.
384 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
386 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
387 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
388 (Q)->(R).
390 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
391 high; reorder the ready queue so that the insn with lowest LUID will be
392 issued next.
394 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
395 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
397 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
398 can be returned from TARGET_SCHED_REORDER2.
400 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
402 #undef TARGET_SCHED_DFA_NEW_CYCLE
403 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
405 #undef TARGET_SCHED_INIT_GLOBAL
406 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
408 #undef TARGET_SCHED_FINISH_GLOBAL
409 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
411 #undef TARGET_SCHED_VARIABLE_ISSUE
412 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
414 #undef TARGET_SCHED_REORDER
415 #define TARGET_SCHED_REORDER sh_reorder
417 #undef TARGET_SCHED_REORDER2
418 #define TARGET_SCHED_REORDER2 sh_reorder2
420 #undef TARGET_SCHED_INIT
421 #define TARGET_SCHED_INIT sh_md_init
423 #undef TARGET_LEGITIMIZE_ADDRESS
424 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
426 #undef TARGET_CANNOT_MODIFY_JUMPS_P
427 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
428 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
429 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
430 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
431 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
432 sh_optimize_target_register_callee_saved
434 #undef TARGET_MS_BITFIELD_LAYOUT_P
435 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
437 #undef TARGET_INIT_BUILTINS
438 #define TARGET_INIT_BUILTINS sh_init_builtins
439 #undef TARGET_BUILTIN_DECL
440 #define TARGET_BUILTIN_DECL sh_builtin_decl
441 #undef TARGET_EXPAND_BUILTIN
442 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
444 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
445 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
447 #undef TARGET_CANNOT_COPY_INSN_P
448 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
449 #undef TARGET_RTX_COSTS
450 #define TARGET_RTX_COSTS sh_rtx_costs
451 #undef TARGET_ADDRESS_COST
452 #define TARGET_ADDRESS_COST sh_address_cost
453 #undef TARGET_ALLOCATE_INITIAL_VALUE
454 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
456 #undef TARGET_MACHINE_DEPENDENT_REORG
457 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
459 #undef TARGET_DWARF_REGISTER_SPAN
460 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
462 #ifdef HAVE_AS_TLS
463 #undef TARGET_HAVE_TLS
464 #define TARGET_HAVE_TLS true
465 #endif
467 #undef TARGET_PROMOTE_PROTOTYPES
468 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
469 #undef TARGET_PROMOTE_FUNCTION_MODE
470 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
472 #undef TARGET_FUNCTION_VALUE
473 #define TARGET_FUNCTION_VALUE sh_function_value
474 #undef TARGET_LIBCALL_VALUE
475 #define TARGET_LIBCALL_VALUE sh_libcall_value
476 #undef TARGET_STRUCT_VALUE_RTX
477 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
478 #undef TARGET_RETURN_IN_MEMORY
479 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
481 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
482 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
483 #undef TARGET_SETUP_INCOMING_VARARGS
484 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
485 #undef TARGET_STRICT_ARGUMENT_NAMING
486 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
487 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
488 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
489 #undef TARGET_MUST_PASS_IN_STACK
490 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
491 #undef TARGET_PASS_BY_REFERENCE
492 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
493 #undef TARGET_CALLEE_COPIES
494 #define TARGET_CALLEE_COPIES sh_callee_copies
495 #undef TARGET_ARG_PARTIAL_BYTES
496 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
498 #undef TARGET_BUILD_BUILTIN_VA_LIST
499 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
500 #undef TARGET_EXPAND_BUILTIN_VA_START
501 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
502 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
503 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
505 #undef TARGET_SCALAR_MODE_SUPPORTED_P
506 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
507 #undef TARGET_VECTOR_MODE_SUPPORTED_P
508 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
510 #undef TARGET_CHECK_PCH_TARGET_FLAGS
511 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
513 #undef TARGET_DWARF_CALLING_CONVENTION
514 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
516 #undef TARGET_FRAME_POINTER_REQUIRED
517 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
519 /* Return regmode weight for insn. */
520 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
522 /* Return current register pressure for regmode. */
523 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
525 #undef TARGET_ENCODE_SECTION_INFO
526 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
528 #ifdef SYMBIAN
530 #undef TARGET_ENCODE_SECTION_INFO
531 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
532 #undef TARGET_STRIP_NAME_ENCODING
533 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
534 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
535 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
537 #endif /* SYMBIAN */
539 #undef TARGET_SECONDARY_RELOAD
540 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
542 #undef TARGET_LEGITIMATE_ADDRESS_P
543 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
545 #undef TARGET_TRAMPOLINE_INIT
546 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
547 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
548 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
550 /* Machine-specific symbol_ref flags. */
551 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
553 struct gcc_target targetm = TARGET_INITIALIZER;
555 /* Implement TARGET_HANDLE_OPTION. */
557 static bool
558 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
559 int value ATTRIBUTE_UNUSED)
561 switch (code)
563 case OPT_m1:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
565 return true;
567 case OPT_m2:
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
569 return true;
571 case OPT_m2a:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
573 return true;
575 case OPT_m2a_nofpu:
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
577 return true;
579 case OPT_m2a_single:
580 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
581 return true;
583 case OPT_m2a_single_only:
584 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
585 return true;
587 case OPT_m2e:
588 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
589 return true;
591 case OPT_m3:
592 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
593 return true;
595 case OPT_m3e:
596 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
597 return true;
599 case OPT_m4:
600 case OPT_m4_100:
601 case OPT_m4_200:
602 case OPT_m4_300:
603 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
604 return true;
606 case OPT_m4_nofpu:
607 case OPT_m4_100_nofpu:
608 case OPT_m4_200_nofpu:
609 case OPT_m4_300_nofpu:
610 case OPT_m4_340:
611 case OPT_m4_400:
612 case OPT_m4_500:
613 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
614 return true;
616 case OPT_m4_single:
617 case OPT_m4_100_single:
618 case OPT_m4_200_single:
619 case OPT_m4_300_single:
620 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
621 return true;
623 case OPT_m4_single_only:
624 case OPT_m4_100_single_only:
625 case OPT_m4_200_single_only:
626 case OPT_m4_300_single_only:
627 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
628 return true;
630 case OPT_m4a:
631 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
632 return true;
634 case OPT_m4a_nofpu:
635 case OPT_m4al:
636 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
637 return true;
639 case OPT_m4a_single:
640 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
641 return true;
643 case OPT_m4a_single_only:
644 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
645 return true;
647 case OPT_m5_32media:
648 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
649 return true;
651 case OPT_m5_32media_nofpu:
652 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
653 return true;
655 case OPT_m5_64media:
656 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
657 return true;
659 case OPT_m5_64media_nofpu:
660 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
661 return true;
663 case OPT_m5_compact:
664 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
665 return true;
667 case OPT_m5_compact_nofpu:
668 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
669 return true;
671 default:
672 return true;
676 /* Set default optimization options. */
677 void
678 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
680 if (level)
682 if (!size)
683 sh_div_str = "inv:minlat";
685 if (size)
687 target_flags |= MASK_SMALLCODE;
688 sh_div_str = SH_DIV_STR_FOR_SIZE ;
690 else
691 TARGET_CBRANCHDI4 = 1;
692 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
693 haven't been parsed yet, hence we'd read only the default.
694 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
695 it's OK to always set flag_branch_target_load_optimize. */
696 if (level > 1)
698 flag_branch_target_load_optimize = 1;
699 if (!size)
700 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
702 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
703 here, so leave it to OVERRIDE_OPTIONS to set
704 flag_finite_math_only. We set it to 2 here so we know if the user
705 explicitly requested this to be on or off. */
706 flag_finite_math_only = 2;
707 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
708 the user explicitly requested this to be on or off. */
709 if (flag_schedule_insns > 0)
710 flag_schedule_insns = 2;
712 set_param_value ("simultaneous-prefetches", 2);
715 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
716 options, and do some machine dependent initialization. */
717 void
718 sh_override_options (void)
720 int regno;
722 SUBTARGET_OVERRIDE_OPTIONS;
723 if (flag_finite_math_only == 2)
724 flag_finite_math_only
725 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
726 if (TARGET_SH2E && !flag_finite_math_only)
727 target_flags |= MASK_IEEE;
728 sh_cpu = PROCESSOR_SH1;
729 assembler_dialect = 0;
730 if (TARGET_SH2)
731 sh_cpu = PROCESSOR_SH2;
732 if (TARGET_SH2E)
733 sh_cpu = PROCESSOR_SH2E;
734 if (TARGET_SH2A)
735 sh_cpu = PROCESSOR_SH2A;
736 if (TARGET_SH3)
737 sh_cpu = PROCESSOR_SH3;
738 if (TARGET_SH3E)
739 sh_cpu = PROCESSOR_SH3E;
740 if (TARGET_SH4)
742 assembler_dialect = 1;
743 sh_cpu = PROCESSOR_SH4;
745 if (TARGET_SH4A_ARCH)
747 assembler_dialect = 1;
748 sh_cpu = PROCESSOR_SH4A;
750 if (TARGET_SH5)
752 sh_cpu = PROCESSOR_SH5;
753 target_flags |= MASK_ALIGN_DOUBLE;
754 if (TARGET_SHMEDIA_FPU)
755 target_flags |= MASK_FMOVD;
756 if (TARGET_SHMEDIA)
758 /* There are no delay slots on SHmedia. */
759 flag_delayed_branch = 0;
760 /* Relaxation isn't yet supported for SHmedia */
761 target_flags &= ~MASK_RELAX;
762 /* After reload, if conversion does little good but can cause
763 ICEs:
764 - find_if_block doesn't do anything for SH because we don't
765 have conditional execution patterns. (We use conditional
766 move patterns, which are handled differently, and only
767 before reload).
768 - find_cond_trap doesn't do anything for the SH because we
769 don't have conditional traps.
770 - find_if_case_1 uses redirect_edge_and_branch_force in
771 the only path that does an optimization, and this causes
772 an ICE when branch targets are in registers.
773 - find_if_case_2 doesn't do anything for the SHmedia after
774 reload except when it can redirect a tablejump - and
775 that's rather rare. */
776 flag_if_conversion2 = 0;
777 if (! strcmp (sh_div_str, "call"))
778 sh_div_strategy = SH_DIV_CALL;
779 else if (! strcmp (sh_div_str, "call2"))
780 sh_div_strategy = SH_DIV_CALL2;
781 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
782 sh_div_strategy = SH_DIV_FP;
783 else if (! strcmp (sh_div_str, "inv"))
784 sh_div_strategy = SH_DIV_INV;
785 else if (! strcmp (sh_div_str, "inv:minlat"))
786 sh_div_strategy = SH_DIV_INV_MINLAT;
787 else if (! strcmp (sh_div_str, "inv20u"))
788 sh_div_strategy = SH_DIV_INV20U;
789 else if (! strcmp (sh_div_str, "inv20l"))
790 sh_div_strategy = SH_DIV_INV20L;
791 else if (! strcmp (sh_div_str, "inv:call2"))
792 sh_div_strategy = SH_DIV_INV_CALL2;
793 else if (! strcmp (sh_div_str, "inv:call"))
794 sh_div_strategy = SH_DIV_INV_CALL;
795 else if (! strcmp (sh_div_str, "inv:fp"))
797 if (TARGET_FPU_ANY)
798 sh_div_strategy = SH_DIV_INV_FP;
799 else
800 sh_div_strategy = SH_DIV_INV;
802 TARGET_CBRANCHDI4 = 0;
803 /* Assembler CFI isn't yet fully supported for SHmedia. */
804 flag_dwarf2_cfi_asm = 0;
807 else
809 /* Only the sh64-elf assembler fully supports .quad properly. */
810 targetm.asm_out.aligned_op.di = NULL;
811 targetm.asm_out.unaligned_op.di = NULL;
813 if (TARGET_SH1)
815 if (! strcmp (sh_div_str, "call-div1"))
816 sh_div_strategy = SH_DIV_CALL_DIV1;
817 else if (! strcmp (sh_div_str, "call-fp")
818 && (TARGET_FPU_DOUBLE
819 || (TARGET_HARD_SH4 && TARGET_SH2E)
820 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
821 sh_div_strategy = SH_DIV_CALL_FP;
822 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
823 sh_div_strategy = SH_DIV_CALL_TABLE;
824 else
825 /* Pick one that makes most sense for the target in general.
826 It is not much good to use different functions depending
827 on -Os, since then we'll end up with two different functions
828 when some of the code is compiled for size, and some for
829 speed. */
831 /* SH4 tends to emphasize speed. */
832 if (TARGET_HARD_SH4)
833 sh_div_strategy = SH_DIV_CALL_TABLE;
834 /* These have their own way of doing things. */
835 else if (TARGET_SH2A)
836 sh_div_strategy = SH_DIV_INTRINSIC;
837 /* ??? Should we use the integer SHmedia function instead? */
838 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
839 sh_div_strategy = SH_DIV_CALL_FP;
840 /* SH1 .. SH3 cores often go into small-footprint systems, so
841 default to the smallest implementation available. */
842 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
843 sh_div_strategy = SH_DIV_CALL_TABLE;
844 else
845 sh_div_strategy = SH_DIV_CALL_DIV1;
847 if (!TARGET_SH1)
848 TARGET_PRETEND_CMOVE = 0;
849 if (sh_divsi3_libfunc[0])
850 ; /* User supplied - leave it alone. */
851 else if (TARGET_DIVIDE_CALL_FP)
852 sh_divsi3_libfunc = "__sdivsi3_i4";
853 else if (TARGET_DIVIDE_CALL_TABLE)
854 sh_divsi3_libfunc = "__sdivsi3_i4i";
855 else if (TARGET_SH5)
856 sh_divsi3_libfunc = "__sdivsi3_1";
857 else
858 sh_divsi3_libfunc = "__sdivsi3";
859 if (sh_branch_cost == -1)
860 sh_branch_cost
861 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
863 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
864 if (! VALID_REGISTER_P (regno))
865 sh_register_names[regno][0] = '\0';
867 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
868 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
869 sh_additional_register_names[regno][0] = '\0';
871 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
873 if ((flag_pic && ! TARGET_PREFERGOT)
874 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
875 flag_no_function_cse = 1;
877 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
879 /* Never run scheduling before reload, since that can
880 break global alloc, and generates slower code anyway due
881 to the pressure on R0. */
882 /* Enable sched1 for SH4 if the user explicitly requests.
883 When sched1 is enabled, the ready queue will be reordered by
884 the target hooks if pressure is high. We can not do this for
885 PIC, SH3 and lower as they give spill failures for R0. */
886 if (!TARGET_HARD_SH4 || flag_pic)
887 flag_schedule_insns = 0;
888 /* ??? Current exception handling places basic block boundaries
889 after call_insns. It causes the high pressure on R0 and gives
890 spill failures for R0 in reload. See PR 22553 and the thread
891 on gcc-patches
892 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
893 else if (flag_exceptions)
895 if (flag_schedule_insns == 1)
896 warning (0, "ignoring -fschedule-insns because of exception handling bug");
897 flag_schedule_insns = 0;
899 else if (flag_schedule_insns == 2)
900 flag_schedule_insns = 0;
903 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
904 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
906 /* Unwind info is not correct around the CFG unless either a frame
907 pointer is present or M_A_O_A is set. Fixing this requires rewriting
908 unwind info generation to be aware of the CFG and propagating states
909 around edges. */
910 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
911 || flag_exceptions || flag_non_call_exceptions)
912 && flag_omit_frame_pointer
913 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
915 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
916 warning (0, "unwind tables currently require either a frame pointer "
917 "or -maccumulate-outgoing-args for correctness");
918 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
921 /* Unwinding with -freorder-blocks-and-partition does not work on this
922 architecture, because it requires far jumps to label crossing between
923 hot/cold sections which are rejected on this architecture. */
924 if (flag_reorder_blocks_and_partition)
926 if (flag_exceptions)
928 inform (input_location,
929 "-freorder-blocks-and-partition does not work with "
930 "exceptions on this architecture");
931 flag_reorder_blocks_and_partition = 0;
932 flag_reorder_blocks = 1;
934 else if (flag_unwind_tables)
936 inform (input_location,
937 "-freorder-blocks-and-partition does not support unwind "
938 "info on this architecture");
939 flag_reorder_blocks_and_partition = 0;
940 flag_reorder_blocks = 1;
944 if (align_loops == 0)
945 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
946 if (align_jumps == 0)
947 align_jumps = 1 << CACHE_LOG;
948 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
949 align_jumps = TARGET_SHMEDIA ? 4 : 2;
951 /* Allocation boundary (in *bytes*) for the code of a function.
952 SH1: 32 bit alignment is faster, because instructions are always
953 fetched as a pair from a longword boundary.
954 SH2 .. SH5 : align to cache line start. */
955 if (align_functions == 0)
956 align_functions
957 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
958 /* The linker relaxation code breaks when a function contains
959 alignments that are larger than that at the start of a
960 compilation unit. */
961 if (TARGET_RELAX)
963 int min_align
964 = align_loops > align_jumps ? align_loops : align_jumps;
966 /* Also take possible .long constants / mova tables int account. */
967 if (min_align < 4)
968 min_align = 4;
969 if (align_functions < min_align)
970 align_functions = min_align;
973 if (sh_fixed_range_str)
974 sh_fix_range (sh_fixed_range_str);
977 /* Print the operand address in x to the stream. */
979 static void
980 sh_print_operand_address (FILE *stream, rtx x)
982 switch (GET_CODE (x))
984 case REG:
985 case SUBREG:
986 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
987 break;
989 case PLUS:
991 rtx base = XEXP (x, 0);
992 rtx index = XEXP (x, 1);
994 switch (GET_CODE (index))
996 case CONST_INT:
997 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
998 reg_names[true_regnum (base)]);
999 break;
1001 case REG:
1002 case SUBREG:
1004 int base_num = true_regnum (base);
1005 int index_num = true_regnum (index);
1007 fprintf (stream, "@(r0,%s)",
1008 reg_names[MAX (base_num, index_num)]);
1009 break;
1012 default:
1013 gcc_unreachable ();
1016 break;
1018 case PRE_DEC:
1019 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1020 break;
1022 case POST_INC:
1023 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1024 break;
1026 default:
1027 x = mark_constant_pool_use (x);
1028 output_addr_const (stream, x);
1029 break;
1033 /* Print operand x (an rtx) in assembler syntax to file stream
1034 according to modifier code.
1036 '.' print a .s if insn needs delay slot
1037 ',' print LOCAL_LABEL_PREFIX
1038 '@' print trap, rte or rts depending upon pragma interruptness
1039 '#' output a nop if there is nothing to put in the delay slot
1040 ''' print likelihood suffix (/u for unlikely).
1041 '>' print branch target if -fverbose-asm
1042 'O' print a constant without the #
1043 'R' print the LSW of a dp value - changes if in little endian
1044 'S' print the MSW of a dp value - changes if in little endian
1045 'T' print the next word of a dp value - same as 'R' in big endian mode.
1046 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1047 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1048 'N' print 'r63' if the operand is (const_int 0).
1049 'd' print a V2SF reg as dN instead of fpN.
1050 'm' print a pair `base,offset' or `base,index', for LD and ST.
1051 'U' Likewise for {LD,ST}{HI,LO}.
1052 'V' print the position of a single bit set.
1053 'W' print the position of a single bit cleared.
1054 't' print a memory address which is a register.
1055 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1056 'o' output an operator. */
1058 static void
1059 sh_print_operand (FILE *stream, rtx x, int code)
1061 int regno;
1062 enum machine_mode mode;
1064 switch (code)
1066 tree trapa_attr;
1068 case '.':
1069 if (final_sequence
1070 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1071 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1072 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1073 break;
1074 case ',':
1075 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1076 break;
1077 case '@':
1078 trapa_attr = lookup_attribute ("trap_exit",
1079 DECL_ATTRIBUTES (current_function_decl));
1080 if (trapa_attr)
1081 fprintf (stream, "trapa #%ld",
1082 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1083 else if (sh_cfun_interrupt_handler_p ())
1085 if (sh_cfun_resbank_handler_p ())
1086 fprintf (stream, "resbank\n");
1087 fprintf (stream, "rte");
1089 else
1090 fprintf (stream, "rts");
1091 break;
1092 case '#':
1093 /* Output a nop if there's nothing in the delay slot. */
1094 if (dbr_sequence_length () == 0)
1095 fprintf (stream, "\n\tnop");
1096 break;
1097 case '\'':
1099 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1101 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1102 fputs ("/u", stream);
1103 break;
1105 case '>':
1106 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1108 fputs ("\t! target: ", stream);
1109 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1111 break;
1112 case 'O':
1113 x = mark_constant_pool_use (x);
1114 output_addr_const (stream, x);
1115 break;
1116 /* N.B.: %R / %S / %T adjust memory addresses by four.
1117 For SHMEDIA, that means they can be used to access the first and
1118 second 32 bit part of a 64 bit (or larger) value that
1119 might be held in floating point registers or memory.
1120 While they can be used to access 64 bit parts of a larger value
1121 held in general purpose registers, that won't work with memory -
1122 neither for fp registers, since the frxx names are used. */
1123 case 'R':
1124 if (REG_P (x) || GET_CODE (x) == SUBREG)
1126 regno = true_regnum (x);
1127 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1128 fputs (reg_names[regno], (stream));
1130 else if (MEM_P (x))
1132 x = adjust_address (x, SImode, 4 * LSW);
1133 sh_print_operand_address (stream, XEXP (x, 0));
1135 else
1137 rtx sub = NULL_RTX;
1139 mode = GET_MODE (x);
1140 if (mode == VOIDmode)
1141 mode = DImode;
1142 if (GET_MODE_SIZE (mode) >= 8)
1143 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1144 if (sub)
1145 sh_print_operand (stream, sub, 0);
1146 else
1147 output_operand_lossage ("invalid operand to %%R");
1149 break;
1150 case 'S':
1151 if (REG_P (x) || GET_CODE (x) == SUBREG)
1153 regno = true_regnum (x);
1154 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1155 fputs (reg_names[regno], (stream));
1157 else if (MEM_P (x))
1159 x = adjust_address (x, SImode, 4 * MSW);
1160 sh_print_operand_address (stream, XEXP (x, 0));
1162 else
1164 rtx sub = NULL_RTX;
1166 mode = GET_MODE (x);
1167 if (mode == VOIDmode)
1168 mode = DImode;
1169 if (GET_MODE_SIZE (mode) >= 8)
1170 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1171 if (sub)
1172 sh_print_operand (stream, sub, 0);
1173 else
1174 output_operand_lossage ("invalid operand to %%S");
1176 break;
1177 case 'T':
1178 /* Next word of a double. */
1179 switch (GET_CODE (x))
1181 case REG:
1182 fputs (reg_names[REGNO (x) + 1], (stream));
1183 break;
1184 case MEM:
1185 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1186 && GET_CODE (XEXP (x, 0)) != POST_INC)
1187 x = adjust_address (x, SImode, 4);
1188 sh_print_operand_address (stream, XEXP (x, 0));
1189 break;
1190 default:
1191 break;
1193 break;
1195 case 't':
1196 gcc_assert (MEM_P (x));
1197 x = XEXP (x, 0);
1198 switch (GET_CODE (x))
1200 case REG:
1201 case SUBREG:
1202 sh_print_operand (stream, x, 0);
1203 break;
1204 default:
1205 break;
1207 break;
1209 case 'o':
1210 switch (GET_CODE (x))
1212 case PLUS: fputs ("add", stream); break;
1213 case MINUS: fputs ("sub", stream); break;
1214 case MULT: fputs ("mul", stream); break;
1215 case DIV: fputs ("div", stream); break;
1216 case EQ: fputs ("eq", stream); break;
1217 case NE: fputs ("ne", stream); break;
1218 case GT: case LT: fputs ("gt", stream); break;
1219 case GE: case LE: fputs ("ge", stream); break;
1220 case GTU: case LTU: fputs ("gtu", stream); break;
1221 case GEU: case LEU: fputs ("geu", stream); break;
1222 default:
1223 break;
1225 break;
1226 case 'M':
1227 if (TARGET_SHMEDIA)
1229 if (MEM_P (x)
1230 && GET_CODE (XEXP (x, 0)) == PLUS
1231 && (REG_P (XEXP (XEXP (x, 0), 1))
1232 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1233 fputc ('x', stream);
1235 else
1237 if (MEM_P (x))
1239 switch (GET_MODE (x))
1241 case QImode: fputs (".b", stream); break;
1242 case HImode: fputs (".w", stream); break;
1243 case SImode: fputs (".l", stream); break;
1244 case SFmode: fputs (".s", stream); break;
1245 case DFmode: fputs (".d", stream); break;
1246 default: gcc_unreachable ();
1250 break;
1252 case 'm':
1253 gcc_assert (MEM_P (x));
1254 x = XEXP (x, 0);
1255 /* Fall through. */
1256 case 'U':
1257 switch (GET_CODE (x))
1259 case REG:
1260 case SUBREG:
1261 sh_print_operand (stream, x, 0);
1262 fputs (", 0", stream);
1263 break;
1265 case PLUS:
1266 sh_print_operand (stream, XEXP (x, 0), 0);
1267 fputs (", ", stream);
1268 sh_print_operand (stream, XEXP (x, 1), 0);
1269 break;
1271 default:
1272 gcc_unreachable ();
1274 break;
1276 case 'V':
1278 int num = exact_log2 (INTVAL (x));
1279 gcc_assert (num >= 0);
1280 fprintf (stream, "#%d", num);
1282 break;
1284 case 'W':
1286 int num = exact_log2 (~INTVAL (x));
1287 gcc_assert (num >= 0);
1288 fprintf (stream, "#%d", num);
1290 break;
1292 case 'd':
1293 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1295 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1296 break;
1298 case 'N':
1299 if (x == CONST0_RTX (GET_MODE (x)))
1301 fprintf ((stream), "r63");
1302 break;
1304 goto default_output;
1305 case 'u':
1306 if (CONST_INT_P (x))
1308 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1309 break;
1311 /* Fall through. */
1313 default_output:
1314 default:
1315 regno = 0;
1316 mode = GET_MODE (x);
1318 switch (GET_CODE (x))
1320 case TRUNCATE:
1322 rtx inner = XEXP (x, 0);
1323 int offset = 0;
1324 enum machine_mode inner_mode;
1326 /* We might see SUBREGs with vector mode registers inside. */
1327 if (GET_CODE (inner) == SUBREG
1328 && (GET_MODE_SIZE (GET_MODE (inner))
1329 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1330 && subreg_lowpart_p (inner))
1331 inner = SUBREG_REG (inner);
1332 if (CONST_INT_P (inner))
1334 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1335 goto default_output;
1337 inner_mode = GET_MODE (inner);
1338 if (GET_CODE (inner) == SUBREG
1339 && (GET_MODE_SIZE (GET_MODE (inner))
1340 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1341 && REG_P (SUBREG_REG (inner)))
1343 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1344 GET_MODE (SUBREG_REG (inner)),
1345 SUBREG_BYTE (inner),
1346 GET_MODE (inner));
1347 inner = SUBREG_REG (inner);
1349 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1350 abort ();
1351 /* Floating point register pairs are always big endian;
1352 general purpose registers are 64 bit wide. */
1353 regno = REGNO (inner);
1354 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1355 - HARD_REGNO_NREGS (regno, mode))
1356 + offset;
1357 x = inner;
1358 goto reg;
1360 case SIGN_EXTEND:
1361 x = XEXP (x, 0);
1362 goto reg;
1363 /* FIXME: We need this on SHmedia32 because reload generates
1364 some sign-extended HI or QI loads into DImode registers
1365 but, because Pmode is SImode, the address ends up with a
1366 subreg:SI of the DImode register. Maybe reload should be
1367 fixed so as to apply alter_subreg to such loads? */
1368 case IF_THEN_ELSE:
1369 gcc_assert (trapping_target_operand (x, VOIDmode));
1370 x = XEXP (XEXP (x, 2), 0);
1371 goto default_output;
1372 case SUBREG:
1373 gcc_assert (SUBREG_BYTE (x) == 0
1374 && REG_P (SUBREG_REG (x)));
1376 x = SUBREG_REG (x);
1377 /* Fall through. */
1379 reg:
1380 case REG:
1381 regno += REGNO (x);
1382 if (FP_REGISTER_P (regno)
1383 && mode == V16SFmode)
1384 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1385 else if (FP_REGISTER_P (REGNO (x))
1386 && mode == V4SFmode)
1387 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1388 else if (REG_P (x)
1389 && mode == V2SFmode)
1390 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1391 else if (FP_REGISTER_P (REGNO (x))
1392 && GET_MODE_SIZE (mode) > 4)
1393 fprintf ((stream), "d%s", reg_names[regno] + 1);
1394 else
1395 fputs (reg_names[regno], (stream));
1396 break;
1398 case MEM:
1399 output_address (XEXP (x, 0));
1400 break;
1402 default:
1403 if (TARGET_SH1)
1404 fputc ('#', stream);
1405 output_addr_const (stream, x);
1406 break;
1408 break;
1412 static bool
1413 sh_print_operand_punct_valid_p (unsigned char code)
1415 return (code == '.' || code == '#' || code == '@' || code == ','
1416 || code == '$' || code == '\'' || code == '>');
1420 /* Encode symbol attributes of a SYMBOL_REF into its
1421 SYMBOL_REF_FLAGS. */
1422 static void
1423 sh_encode_section_info (tree decl, rtx rtl, int first)
1425 default_encode_section_info (decl, rtl, first);
1427 if (TREE_CODE (decl) == FUNCTION_DECL
1428 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1429 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1432 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1433 static void
1434 force_into (rtx value, rtx target)
1436 value = force_operand (value, target);
1437 if (! rtx_equal_p (value, target))
1438 emit_insn (gen_move_insn (target, value));
1441 /* Emit code to perform a block move. Choose the best method.
1443 OPERANDS[0] is the destination.
1444 OPERANDS[1] is the source.
1445 OPERANDS[2] is the size.
1446 OPERANDS[3] is the alignment safe to use. */
1449 expand_block_move (rtx *operands)
1451 int align = INTVAL (operands[3]);
1452 int constp = (CONST_INT_P (operands[2]));
1453 int bytes = (constp ? INTVAL (operands[2]) : 0);
1455 if (! constp)
1456 return 0;
1458 /* If we could use mov.l to move words and dest is word-aligned, we
1459 can use movua.l for loads and still generate a relatively short
1460 and efficient sequence. */
1461 if (TARGET_SH4A_ARCH && align < 4
1462 && MEM_ALIGN (operands[0]) >= 32
1463 && can_move_by_pieces (bytes, 32))
1465 rtx dest = copy_rtx (operands[0]);
1466 rtx src = copy_rtx (operands[1]);
1467 /* We could use different pseudos for each copied word, but
1468 since movua can only load into r0, it's kind of
1469 pointless. */
1470 rtx temp = gen_reg_rtx (SImode);
1471 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1472 int copied = 0;
1474 while (copied + 4 <= bytes)
1476 rtx to = adjust_address (dest, SImode, copied);
1477 rtx from = adjust_automodify_address (src, BLKmode,
1478 src_addr, copied);
1480 set_mem_size (from, GEN_INT (4));
1481 emit_insn (gen_movua (temp, from));
1482 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1483 emit_move_insn (to, temp);
1484 copied += 4;
1487 if (copied < bytes)
1488 move_by_pieces (adjust_address (dest, BLKmode, copied),
1489 adjust_automodify_address (src, BLKmode,
1490 src_addr, copied),
1491 bytes - copied, align, 0);
1493 return 1;
1496 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1497 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1498 if (align < 4 || (bytes % 4 != 0))
1499 return 0;
1501 if (TARGET_HARD_SH4)
1503 if (bytes < 12)
1504 return 0;
1505 else if (bytes == 12)
1507 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1508 rtx r4 = gen_rtx_REG (SImode, 4);
1509 rtx r5 = gen_rtx_REG (SImode, 5);
1511 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1512 force_into (XEXP (operands[0], 0), r4);
1513 force_into (XEXP (operands[1], 0), r5);
1514 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1515 return 1;
1517 else if (! TARGET_SMALLCODE)
1519 const char *entry_name;
1520 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1521 int dwords;
1522 rtx r4 = gen_rtx_REG (SImode, 4);
1523 rtx r5 = gen_rtx_REG (SImode, 5);
1524 rtx r6 = gen_rtx_REG (SImode, 6);
1526 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1527 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1528 force_into (XEXP (operands[0], 0), r4);
1529 force_into (XEXP (operands[1], 0), r5);
1531 dwords = bytes >> 3;
1532 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1533 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1534 return 1;
1536 else
1537 return 0;
1539 if (bytes < 64)
1541 char entry[30];
1542 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1543 rtx r4 = gen_rtx_REG (SImode, 4);
1544 rtx r5 = gen_rtx_REG (SImode, 5);
1546 sprintf (entry, "__movmemSI%d", bytes);
1547 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1548 force_into (XEXP (operands[0], 0), r4);
1549 force_into (XEXP (operands[1], 0), r5);
1550 emit_insn (gen_block_move_real (func_addr_rtx));
1551 return 1;
1554 /* This is the same number of bytes as a memcpy call, but to a different
1555 less common function name, so this will occasionally use more space. */
1556 if (! TARGET_SMALLCODE)
1558 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1559 int final_switch, while_loop;
1560 rtx r4 = gen_rtx_REG (SImode, 4);
1561 rtx r5 = gen_rtx_REG (SImode, 5);
1562 rtx r6 = gen_rtx_REG (SImode, 6);
1564 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1565 force_into (XEXP (operands[0], 0), r4);
1566 force_into (XEXP (operands[1], 0), r5);
1568 /* r6 controls the size of the move. 16 is decremented from it
1569 for each 64 bytes moved. Then the negative bit left over is used
1570 as an index into a list of move instructions. e.g., a 72 byte move
1571 would be set up with size(r6) = 14, for one iteration through the
1572 big while loop, and a switch of -2 for the last part. */
1574 final_switch = 16 - ((bytes / 4) % 16);
1575 while_loop = ((bytes / 4) / 16 - 1) * 16;
1576 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1577 emit_insn (gen_block_lump_real (func_addr_rtx));
1578 return 1;
1581 return 0;
1584 /* Prepare operands for a move define_expand; specifically, one of the
1585 operands must be in a register. */
1588 prepare_move_operands (rtx operands[], enum machine_mode mode)
1590 if ((mode == SImode || mode == DImode)
1591 && flag_pic
1592 && ! ((mode == Pmode || mode == ptr_mode)
1593 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1595 rtx temp;
1596 if (SYMBOLIC_CONST_P (operands[1]))
1598 if (MEM_P (operands[0]))
1599 operands[1] = force_reg (Pmode, operands[1]);
1600 else if (TARGET_SHMEDIA
1601 && GET_CODE (operands[1]) == LABEL_REF
1602 && target_reg_operand (operands[0], mode))
1603 /* It's ok. */;
1604 else
1606 temp = (!can_create_pseudo_p ()
1607 ? operands[0]
1608 : gen_reg_rtx (Pmode));
1609 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1612 else if (GET_CODE (operands[1]) == CONST
1613 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1614 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1616 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1617 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1618 mode, temp);
1619 operands[1] = expand_binop (mode, add_optab, temp,
1620 XEXP (XEXP (operands[1], 0), 1),
1621 (!can_create_pseudo_p ()
1622 ? temp
1623 : gen_reg_rtx (Pmode)),
1624 0, OPTAB_LIB_WIDEN);
1628 if (! reload_in_progress && ! reload_completed)
1630 /* Copy the source to a register if both operands aren't registers. */
1631 if (! register_operand (operands[0], mode)
1632 && ! sh_register_operand (operands[1], mode))
1633 operands[1] = copy_to_mode_reg (mode, operands[1]);
1635 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1637 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1638 except that we can't use that function because it is static. */
1639 rtx new_rtx = change_address (operands[0], mode, 0);
1640 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1641 operands[0] = new_rtx;
1644 /* This case can happen while generating code to move the result
1645 of a library call to the target. Reject `st r0,@(rX,rY)' because
1646 reload will fail to find a spill register for rX, since r0 is already
1647 being used for the source. */
1648 else if (TARGET_SH1
1649 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1650 && MEM_P (operands[0])
1651 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1652 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1653 operands[1] = copy_to_mode_reg (mode, operands[1]);
1656 if (mode == Pmode || mode == ptr_mode)
1658 rtx op0, op1, opc;
1659 enum tls_model tls_kind;
1661 op0 = operands[0];
1662 op1 = operands[1];
1663 if (GET_CODE (op1) == CONST
1664 && GET_CODE (XEXP (op1, 0)) == PLUS
1665 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1666 != TLS_MODEL_NONE))
1668 opc = XEXP (XEXP (op1, 0), 1);
1669 op1 = XEXP (XEXP (op1, 0), 0);
1671 else
1672 opc = NULL_RTX;
1674 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1676 rtx tga_op1, tga_ret, tmp, tmp2;
1678 switch (tls_kind)
1680 case TLS_MODEL_GLOBAL_DYNAMIC:
1681 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1682 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1683 op1 = tga_ret;
1684 break;
1686 case TLS_MODEL_LOCAL_DYNAMIC:
1687 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1688 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1690 tmp = gen_reg_rtx (Pmode);
1691 emit_move_insn (tmp, tga_ret);
1693 if (register_operand (op0, Pmode))
1694 tmp2 = op0;
1695 else
1696 tmp2 = gen_reg_rtx (Pmode);
1698 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1699 op1 = tmp2;
1700 break;
1702 case TLS_MODEL_INITIAL_EXEC:
1703 if (! flag_pic)
1705 /* Don't schedule insns for getting GOT address when
1706 the first scheduling is enabled, to avoid spill
1707 failures for R0. */
1708 if (flag_schedule_insns)
1709 emit_insn (gen_blockage ());
1710 emit_insn (gen_GOTaddr2picreg ());
1711 emit_use (gen_rtx_REG (SImode, PIC_REG));
1712 if (flag_schedule_insns)
1713 emit_insn (gen_blockage ());
1715 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1716 tmp = gen_sym2GOTTPOFF (op1);
1717 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1718 op1 = tga_op1;
1719 break;
1721 case TLS_MODEL_LOCAL_EXEC:
1722 tmp2 = gen_reg_rtx (Pmode);
1723 emit_insn (gen_load_gbr (tmp2));
1724 tmp = gen_reg_rtx (Pmode);
1725 emit_insn (gen_symTPOFF2reg (tmp, op1));
1727 if (register_operand (op0, Pmode))
1728 op1 = op0;
1729 else
1730 op1 = gen_reg_rtx (Pmode);
1732 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1733 break;
1735 default:
1736 gcc_unreachable ();
1738 if (opc)
1739 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1740 operands[1] = op1;
1744 return 0;
1747 enum rtx_code
1748 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1749 enum rtx_code comparison)
1751 rtx op1;
1752 rtx scratch = NULL_RTX;
1754 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1755 comparison = GET_CODE (operands[0]);
1756 else
1757 scratch = operands[4];
1758 if (CONST_INT_P (operands[1])
1759 && !CONST_INT_P (operands[2]))
1761 rtx tmp = operands[1];
1763 operands[1] = operands[2];
1764 operands[2] = tmp;
1765 comparison = swap_condition (comparison);
1767 if (CONST_INT_P (operands[2]))
1769 HOST_WIDE_INT val = INTVAL (operands[2]);
1770 if ((val == -1 || val == -0x81)
1771 && (comparison == GT || comparison == LE))
1773 comparison = (comparison == GT) ? GE : LT;
1774 operands[2] = gen_int_mode (val + 1, mode);
1776 else if ((val == 1 || val == 0x80)
1777 && (comparison == GE || comparison == LT))
1779 comparison = (comparison == GE) ? GT : LE;
1780 operands[2] = gen_int_mode (val - 1, mode);
1782 else if (val == 1 && (comparison == GEU || comparison == LTU))
1784 comparison = (comparison == GEU) ? NE : EQ;
1785 operands[2] = CONST0_RTX (mode);
1787 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1789 comparison = (comparison == GEU) ? GTU : LEU;
1790 operands[2] = gen_int_mode (val - 1, mode);
1792 else if (val == 0 && (comparison == GTU || comparison == LEU))
1793 comparison = (comparison == GTU) ? NE : EQ;
1794 else if (mode == SImode
1795 && ((val == 0x7fffffff
1796 && (comparison == GTU || comparison == LEU))
1797 || ((unsigned HOST_WIDE_INT) val
1798 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1799 && (comparison == GEU || comparison == LTU))))
1801 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1802 operands[2] = CONST0_RTX (mode);
1805 op1 = operands[1];
1806 if (can_create_pseudo_p ())
1807 operands[1] = force_reg (mode, op1);
1808 /* When we are handling DImode comparisons, we want to keep constants so
1809 that we can optimize the component comparisons; however, memory loads
1810 are better issued as a whole so that they can be scheduled well.
1811 SImode equality comparisons allow I08 constants, but only when they
1812 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1813 into a register, that register might as well be r0, and we allow the
1814 constant. If it is already in a register, this is likely to be
1815 allocated to a different hard register, thus we load the constant into
1816 a register unless it is zero. */
1817 if (!REG_P (operands[2])
1818 && (!CONST_INT_P (operands[2])
1819 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1820 && ((comparison != EQ && comparison != NE)
1821 || (REG_P (op1) && REGNO (op1) != R0_REG)
1822 || !satisfies_constraint_I08 (operands[2])))))
1824 if (scratch && GET_MODE (scratch) == mode)
1826 emit_move_insn (scratch, operands[2]);
1827 operands[2] = scratch;
1829 else if (can_create_pseudo_p ())
1830 operands[2] = force_reg (mode, operands[2]);
1832 return comparison;
1835 void
1836 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1838 rtx (*branch_expander) (rtx) = gen_branch_true;
1839 rtx jump;
1841 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1842 switch (comparison)
1844 case NE: case LT: case LE: case LTU: case LEU:
1845 comparison = reverse_condition (comparison);
1846 branch_expander = gen_branch_false;
1847 default: ;
1849 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1850 gen_rtx_fmt_ee (comparison, SImode,
1851 operands[1], operands[2])));
1852 jump = emit_jump_insn (branch_expander (operands[3]));
1853 if (probability >= 0)
1854 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1858 /* ??? How should we distribute probabilities when more than one branch
1859 is generated. So far we only have soem ad-hoc observations:
1860 - If the operands are random, they are likely to differ in both parts.
1861 - If comparing items in a hash chain, the operands are random or equal;
1862 operation should be EQ or NE.
1863 - If items are searched in an ordered tree from the root, we can expect
1864 the highpart to be unequal about half of the time; operation should be
1865 an inequality comparison, operands non-constant, and overall probability
1866 about 50%. Likewise for quicksort.
1867 - Range checks will be often made against constants. Even if we assume for
1868 simplicity an even distribution of the non-constant operand over a
1869 sub-range here, the same probability could be generated with differently
1870 wide sub-ranges - as long as the ratio of the part of the subrange that
1871 is before the threshold to the part that comes after the threshold stays
1872 the same. Thus, we can't really tell anything here;
1873 assuming random distribution is at least simple.
1876 bool
1877 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1879 enum rtx_code msw_taken, msw_skip, lsw_taken;
1880 rtx skip_label = NULL_RTX;
1881 rtx op1h, op1l, op2h, op2l;
1882 int num_branches;
1883 int prob, rev_prob;
1884 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1885 rtx scratch = operands[4];
1887 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1888 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1889 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1890 op1l = gen_lowpart (SImode, operands[1]);
1891 op2l = gen_lowpart (SImode, operands[2]);
1892 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1893 prob = split_branch_probability;
1894 rev_prob = REG_BR_PROB_BASE - prob;
1895 switch (comparison)
1897 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1898 That costs 1 cycle more when the first branch can be predicted taken,
1899 but saves us mispredicts because only one branch needs prediction.
1900 It also enables generating the cmpeqdi_t-1 pattern. */
1901 case EQ:
1902 if (TARGET_CMPEQDI_T)
1904 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1905 emit_jump_insn (gen_branch_true (operands[3]));
1906 return true;
1908 msw_skip = NE;
1909 lsw_taken = EQ;
1910 if (prob >= 0)
1912 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1914 msw_skip_prob = rev_prob;
1915 if (REG_BR_PROB_BASE <= 65535)
1916 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1917 else
1919 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1920 lsw_taken_prob
1921 = (prob
1922 ? (REG_BR_PROB_BASE
1923 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1924 / ((HOST_WIDEST_INT) prob << 32)))
1925 : 0);
1928 break;
1929 case NE:
1930 if (TARGET_CMPEQDI_T)
1932 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1933 emit_jump_insn (gen_branch_false (operands[3]));
1934 return true;
1936 msw_taken = NE;
1937 msw_taken_prob = prob;
1938 lsw_taken = NE;
1939 lsw_taken_prob = 0;
1940 break;
1941 case GTU: case GT:
1942 msw_taken = comparison;
1943 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1944 break;
1945 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1946 msw_skip = swap_condition (msw_taken);
1947 lsw_taken = GTU;
1948 break;
1949 case GEU: case GE:
1950 if (op2l == CONST0_RTX (SImode))
1951 msw_taken = comparison;
1952 else
1954 msw_taken = comparison == GE ? GT : GTU;
1955 msw_skip = swap_condition (msw_taken);
1956 lsw_taken = GEU;
1958 break;
1959 case LTU: case LT:
1960 msw_taken = comparison;
1961 if (op2l == CONST0_RTX (SImode))
1962 break;
1963 msw_skip = swap_condition (msw_taken);
1964 lsw_taken = LTU;
1965 break;
1966 case LEU: case LE:
1967 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1968 msw_taken = comparison;
1969 else
1971 lsw_taken = LEU;
1972 if (comparison == LE)
1973 msw_taken = LT;
1974 else if (op2h != CONST0_RTX (SImode))
1975 msw_taken = LTU;
1976 else
1977 break;
1978 msw_skip = swap_condition (msw_taken);
1980 break;
1981 default: return false;
1983 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1984 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1985 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1986 if (comparison != EQ && comparison != NE && num_branches > 1)
1988 if (!CONSTANT_P (operands[2])
1989 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1990 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1992 msw_taken_prob = prob / 2U;
1993 msw_skip_prob
1994 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1995 lsw_taken_prob = prob;
1997 else
1999 msw_taken_prob = prob;
2000 msw_skip_prob = REG_BR_PROB_BASE;
2001 /* ??? If we have a constant op2h, should we use that when
2002 calculating lsw_taken_prob? */
2003 lsw_taken_prob = prob;
2006 operands[1] = op1h;
2007 operands[2] = op2h;
2008 operands[4] = NULL_RTX;
2009 if (reload_completed
2010 && ! arith_reg_or_0_operand (op2h, SImode)
2011 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2012 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2013 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2015 emit_move_insn (scratch, operands[2]);
2016 operands[2] = scratch;
2018 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2019 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2020 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2022 rtx taken_label = operands[3];
2024 /* Operands were possibly modified, but msw_skip doesn't expect this.
2025 Always use the original ones. */
2026 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2028 operands[1] = op1h;
2029 operands[2] = op2h;
2032 operands[3] = skip_label = gen_label_rtx ();
2033 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2034 operands[3] = taken_label;
2036 operands[1] = op1l;
2037 operands[2] = op2l;
2038 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2040 if (reload_completed
2041 && ! arith_reg_or_0_operand (op2l, SImode)
2042 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2044 emit_move_insn (scratch, operands[2]);
2045 operands[2] = scratch;
2047 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2049 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2050 emit_label (skip_label);
2051 return true;
2054 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2056 static void
2057 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2059 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2061 insn = gen_rtx_PARALLEL (VOIDmode,
2062 gen_rtvec (2, insn,
2063 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2064 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2066 else
2067 emit_insn (insn);
2070 /* Prepare the operands for an scc instruction; make sure that the
2071 compare has been done and the result is in T_REG. */
2072 void
2073 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2075 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2076 enum rtx_code oldcode = code;
2077 enum machine_mode mode;
2079 /* First need a compare insn. */
2080 switch (code)
2082 case NE:
2083 /* It isn't possible to handle this case. */
2084 gcc_unreachable ();
2085 case LT:
2086 code = GT;
2087 break;
2088 case LE:
2089 code = GE;
2090 break;
2091 case LTU:
2092 code = GTU;
2093 break;
2094 case LEU:
2095 code = GEU;
2096 break;
2097 default:
2098 break;
2100 if (code != oldcode)
2102 rtx tmp = op0;
2103 op0 = op1;
2104 op1 = tmp;
2107 mode = GET_MODE (op0);
2108 if (mode == VOIDmode)
2109 mode = GET_MODE (op1);
2111 op0 = force_reg (mode, op0);
2112 if ((code != EQ && code != NE
2113 && (op1 != const0_rtx
2114 || code == GTU || code == GEU || code == LTU || code == LEU))
2115 || (mode == DImode && op1 != const0_rtx)
2116 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2117 op1 = force_reg (mode, op1);
2119 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2120 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2121 mode);
2125 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2126 rtx op0, rtx op1)
2128 rtx target = gen_reg_rtx (SImode);
2129 rtx tmp;
2131 gcc_assert (TARGET_SHMEDIA);
2132 switch (code)
2134 case EQ:
2135 case GT:
2136 case LT:
2137 case UNORDERED:
2138 case GTU:
2139 case LTU:
2140 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2141 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2142 code = NE;
2143 break;
2145 case NE:
2146 case GE:
2147 case LE:
2148 case ORDERED:
2149 case GEU:
2150 case LEU:
2151 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2152 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2153 code = EQ;
2154 break;
2156 case UNEQ:
2157 case UNGE:
2158 case UNGT:
2159 case UNLE:
2160 case UNLT:
2161 case LTGT:
2162 return NULL_RTX;
2164 default:
2165 gcc_unreachable ();
2168 if (mode == DImode)
2170 rtx t2 = gen_reg_rtx (DImode);
2171 emit_insn (gen_extendsidi2 (t2, target));
2172 target = t2;
2175 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2178 /* Called from the md file, set up the operands of a compare instruction. */
2180 void
2181 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2183 enum rtx_code code = GET_CODE (operands[0]);
2184 enum rtx_code branch_code;
2185 rtx op0 = operands[1];
2186 rtx op1 = operands[2];
2187 rtx insn, tem;
2188 bool need_ccmpeq = false;
2190 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2192 op0 = force_reg (mode, op0);
2193 op1 = force_reg (mode, op1);
2195 else
2197 if (code != EQ || mode == DImode)
2199 /* Force args into regs, since we can't use constants here. */
2200 op0 = force_reg (mode, op0);
2201 if (op1 != const0_rtx || code == GTU || code == GEU)
2202 op1 = force_reg (mode, op1);
2206 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2208 if (code == LT
2209 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2210 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2212 tem = op0, op0 = op1, op1 = tem;
2213 code = swap_condition (code);
2216 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2217 if (code == GE)
2219 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2220 need_ccmpeq = true;
2221 code = GT;
2224 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2225 to EQ/GT respectively. */
2226 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2229 switch (code)
2231 case EQ:
2232 case GT:
2233 case GE:
2234 case GTU:
2235 case GEU:
2236 branch_code = code;
2237 break;
2238 case NE:
2239 case LT:
2240 case LE:
2241 case LTU:
2242 case LEU:
2243 branch_code = reverse_condition (code);
2244 break;
2245 default:
2246 gcc_unreachable ();
2249 insn = gen_rtx_SET (VOIDmode,
2250 gen_rtx_REG (SImode, T_REG),
2251 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2253 sh_emit_set_t_insn (insn, mode);
2254 if (need_ccmpeq)
2255 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2257 if (branch_code == code)
2258 emit_jump_insn (gen_branch_true (operands[3]));
2259 else
2260 emit_jump_insn (gen_branch_false (operands[3]));
2263 void
2264 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2266 enum rtx_code code = GET_CODE (operands[1]);
2267 rtx op0 = operands[2];
2268 rtx op1 = operands[3];
2269 rtx lab = NULL_RTX;
2270 bool invert = false;
2271 rtx tem;
2273 op0 = force_reg (mode, op0);
2274 if ((code != EQ && code != NE
2275 && (op1 != const0_rtx
2276 || code == GTU || code == GEU || code == LTU || code == LEU))
2277 || (mode == DImode && op1 != const0_rtx)
2278 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2279 op1 = force_reg (mode, op1);
2281 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2283 if (code == LT || code == LE)
2285 code = swap_condition (code);
2286 tem = op0, op0 = op1, op1 = tem;
2288 if (code == GE)
2290 if (TARGET_IEEE)
2292 lab = gen_label_rtx ();
2293 sh_emit_scc_to_t (EQ, op0, op1);
2294 emit_jump_insn (gen_branch_true (lab));
2295 code = GT;
2297 else
2299 code = LT;
2300 invert = true;
2305 if (code == NE)
2307 code = EQ;
2308 invert = true;
2311 sh_emit_scc_to_t (code, op0, op1);
2312 if (lab)
2313 emit_label (lab);
2314 if (invert)
2315 emit_insn (gen_movnegt (operands[0]));
2316 else
2317 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2320 /* Functions to output assembly code. */
2322 /* Return a sequence of instructions to perform DI or DF move.
2324 Since the SH cannot move a DI or DF in one instruction, we have
2325 to take care when we see overlapping source and dest registers. */
2327 const char *
2328 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2329 enum machine_mode mode)
2331 rtx dst = operands[0];
2332 rtx src = operands[1];
2334 if (MEM_P (dst)
2335 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2336 return "mov.l %T1,%0\n\tmov.l %1,%0";
2338 if (register_operand (dst, mode)
2339 && register_operand (src, mode))
2341 if (REGNO (src) == MACH_REG)
2342 return "sts mach,%S0\n\tsts macl,%R0";
2344 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2345 when mov.d r1,r0 do r1->r0 then r2->r1. */
2347 if (REGNO (src) + 1 == REGNO (dst))
2348 return "mov %T1,%T0\n\tmov %1,%0";
2349 else
2350 return "mov %1,%0\n\tmov %T1,%T0";
2352 else if (CONST_INT_P (src))
2354 if (INTVAL (src) < 0)
2355 output_asm_insn ("mov #-1,%S0", operands);
2356 else
2357 output_asm_insn ("mov #0,%S0", operands);
2359 return "mov %1,%R0";
2361 else if (MEM_P (src))
2363 int ptrreg = -1;
2364 int dreg = REGNO (dst);
2365 rtx inside = XEXP (src, 0);
2367 switch (GET_CODE (inside))
2369 case REG:
2370 ptrreg = REGNO (inside);
2371 break;
2373 case SUBREG:
2374 ptrreg = subreg_regno (inside);
2375 break;
2377 case PLUS:
2378 ptrreg = REGNO (XEXP (inside, 0));
2379 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2380 an offsettable address. Unfortunately, offsettable addresses use
2381 QImode to check the offset, and a QImode offsettable address
2382 requires r0 for the other operand, which is not currently
2383 supported, so we can't use the 'o' constraint.
2384 Thus we must check for and handle r0+REG addresses here.
2385 We punt for now, since this is likely very rare. */
2386 gcc_assert (!REG_P (XEXP (inside, 1)));
2387 break;
2389 case LABEL_REF:
2390 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2391 case POST_INC:
2392 return "mov.l %1,%0\n\tmov.l %1,%T0";
2393 default:
2394 gcc_unreachable ();
2397 /* Work out the safe way to copy. Copy into the second half first. */
2398 if (dreg == ptrreg)
2399 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2402 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2405 /* Print an instruction which would have gone into a delay slot after
2406 another instruction, but couldn't because the other instruction expanded
2407 into a sequence where putting the slot insn at the end wouldn't work. */
2409 static void
2410 print_slot (rtx insn)
2412 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2414 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2417 const char *
2418 output_far_jump (rtx insn, rtx op)
2420 struct { rtx lab, reg, op; } this_jmp;
2421 rtx braf_base_lab = NULL_RTX;
2422 const char *jump;
2423 int far;
2424 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2425 rtx prev;
2427 this_jmp.lab = gen_label_rtx ();
2429 if (TARGET_SH2
2430 && offset >= -32764
2431 && offset - get_attr_length (insn) <= 32766)
2433 far = 0;
2434 jump = "mov.w %O0,%1; braf %1";
2436 else
2438 far = 1;
2439 if (flag_pic)
2441 if (TARGET_SH2)
2442 jump = "mov.l %O0,%1; braf %1";
2443 else
2444 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2446 else
2447 jump = "mov.l %O0,%1; jmp @%1";
2449 /* If we have a scratch register available, use it. */
2450 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2451 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2453 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2454 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2455 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2456 output_asm_insn (jump, &this_jmp.lab);
2457 if (dbr_sequence_length ())
2458 print_slot (final_sequence);
2459 else
2460 output_asm_insn ("nop", 0);
2462 else
2464 /* Output the delay slot insn first if any. */
2465 if (dbr_sequence_length ())
2466 print_slot (final_sequence);
2468 this_jmp.reg = gen_rtx_REG (SImode, 13);
2469 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2470 Fortunately, MACL is fixed and call-clobbered, and we never
2471 need its value across jumps, so save r13 in it instead of in
2472 the stack. */
2473 if (TARGET_SH5)
2474 output_asm_insn ("lds r13, macl", 0);
2475 else
2476 output_asm_insn ("mov.l r13,@-r15", 0);
2477 output_asm_insn (jump, &this_jmp.lab);
2478 if (TARGET_SH5)
2479 output_asm_insn ("sts macl, r13", 0);
2480 else
2481 output_asm_insn ("mov.l @r15+,r13", 0);
2483 if (far && flag_pic && TARGET_SH2)
2485 braf_base_lab = gen_label_rtx ();
2486 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2487 CODE_LABEL_NUMBER (braf_base_lab));
2489 if (far)
2490 output_asm_insn (".align 2", 0);
2491 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2492 this_jmp.op = op;
2493 if (far && flag_pic)
2495 if (TARGET_SH2)
2496 this_jmp.lab = braf_base_lab;
2497 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2499 else
2500 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2501 return "";
2504 /* Local label counter, used for constants in the pool and inside
2505 pattern branches. */
2507 static int lf = 100;
2509 /* Output code for ordinary branches. */
2511 const char *
2512 output_branch (int logic, rtx insn, rtx *operands)
2514 switch (get_attr_length (insn))
2516 case 6:
2517 /* This can happen if filling the delay slot has caused a forward
2518 branch to exceed its range (we could reverse it, but only
2519 when we know we won't overextend other branches; this should
2520 best be handled by relaxation).
2521 It can also happen when other condbranches hoist delay slot insn
2522 from their destination, thus leading to code size increase.
2523 But the branch will still be in the range -4092..+4098 bytes. */
2525 if (! TARGET_RELAX)
2527 int label = lf++;
2528 /* The call to print_slot will clobber the operands. */
2529 rtx op0 = operands[0];
2531 /* If the instruction in the delay slot is annulled (true), then
2532 there is no delay slot where we can put it now. The only safe
2533 place for it is after the label. final will do that by default. */
2535 if (final_sequence
2536 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2537 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2539 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2540 ASSEMBLER_DIALECT ? "/" : ".", label);
2541 print_slot (final_sequence);
2543 else
2544 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2546 output_asm_insn ("bra\t%l0", &op0);
2547 fprintf (asm_out_file, "\tnop\n");
2548 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2550 return "";
2552 /* When relaxing, handle this like a short branch. The linker
2553 will fix it up if it still doesn't fit after relaxation. */
2554 case 2:
2555 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2557 /* These are for SH2e, in which we have to account for the
2558 extra nop because of the hardware bug in annulled branches. */
2559 case 8:
2560 if (! TARGET_RELAX)
2562 int label = lf++;
2564 gcc_assert (!final_sequence
2565 || !(INSN_ANNULLED_BRANCH_P
2566 (XVECEXP (final_sequence, 0, 0))));
2567 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2568 logic ? "f" : "t",
2569 ASSEMBLER_DIALECT ? "/" : ".", label);
2570 fprintf (asm_out_file, "\tnop\n");
2571 output_asm_insn ("bra\t%l0", operands);
2572 fprintf (asm_out_file, "\tnop\n");
2573 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2575 return "";
2577 /* When relaxing, fall through. */
2578 case 4:
2580 char buffer[10];
2582 sprintf (buffer, "b%s%ss\t%%l0",
2583 logic ? "t" : "f",
2584 ASSEMBLER_DIALECT ? "/" : ".");
2585 output_asm_insn (buffer, &operands[0]);
2586 return "nop";
2589 default:
2590 /* There should be no longer branches now - that would
2591 indicate that something has destroyed the branches set
2592 up in machine_dependent_reorg. */
2593 gcc_unreachable ();
2597 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2598 fill in operands 9 as a label to the successor insn.
2599 We try to use jump threading where possible.
2600 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2601 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2602 follow jmp and bt, if the address is in range. */
2603 const char *
2604 output_branchy_insn (enum rtx_code code, const char *templ,
2605 rtx insn, rtx *operands)
2607 rtx next_insn = NEXT_INSN (insn);
2609 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2611 rtx src = SET_SRC (PATTERN (next_insn));
2612 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2614 /* Following branch not taken */
2615 operands[9] = gen_label_rtx ();
2616 emit_label_after (operands[9], next_insn);
2617 INSN_ADDRESSES_NEW (operands[9],
2618 INSN_ADDRESSES (INSN_UID (next_insn))
2619 + get_attr_length (next_insn));
2620 return templ;
2622 else
2624 int offset = (branch_dest (next_insn)
2625 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2626 if (offset >= -252 && offset <= 258)
2628 if (GET_CODE (src) == IF_THEN_ELSE)
2629 /* branch_true */
2630 src = XEXP (src, 1);
2631 operands[9] = src;
2632 return templ;
2636 operands[9] = gen_label_rtx ();
2637 emit_label_after (operands[9], insn);
2638 INSN_ADDRESSES_NEW (operands[9],
2639 INSN_ADDRESSES (INSN_UID (insn))
2640 + get_attr_length (insn));
2641 return templ;
2644 const char *
2645 output_ieee_ccmpeq (rtx insn, rtx *operands)
2647 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2648 insn, operands);
2651 /* Output the start of the assembler file. */
2653 static void
2654 sh_file_start (void)
2656 default_file_start ();
2658 #ifdef SYMBIAN
2659 /* Declare the .directive section before it is used. */
2660 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2661 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2662 #endif
2664 if (TARGET_ELF)
2665 /* We need to show the text section with the proper
2666 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2667 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2668 will complain. We can teach GAS specifically about the
2669 default attributes for our choice of text section, but
2670 then we would have to change GAS again if/when we change
2671 the text section name. */
2672 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2673 else
2674 /* Switch to the data section so that the coffsem symbol
2675 isn't in the text section. */
2676 switch_to_section (data_section);
2678 if (TARGET_LITTLE_ENDIAN)
2679 fputs ("\t.little\n", asm_out_file);
2681 if (!TARGET_ELF)
2683 if (TARGET_SHCOMPACT)
2684 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2685 else if (TARGET_SHMEDIA)
2686 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2687 TARGET_SHMEDIA64 ? 64 : 32);
2691 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2693 static bool
2694 unspec_caller_rtx_p (rtx pat)
2696 rtx base, offset;
2697 int i;
2699 split_const (pat, &base, &offset);
2700 if (GET_CODE (base) == UNSPEC)
2702 if (XINT (base, 1) == UNSPEC_CALLER)
2703 return true;
2704 for (i = 0; i < XVECLEN (base, 0); i++)
2705 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2706 return true;
2708 return false;
2711 /* Indicate that INSN cannot be duplicated. This is true for insn
2712 that generates a unique label. */
2714 static bool
2715 sh_cannot_copy_insn_p (rtx insn)
2717 rtx pat;
2719 if (!reload_completed || !flag_pic)
2720 return false;
2722 if (!NONJUMP_INSN_P (insn))
2723 return false;
2724 if (asm_noperands (insn) >= 0)
2725 return false;
2727 pat = PATTERN (insn);
2728 if (GET_CODE (pat) != SET)
2729 return false;
2730 pat = SET_SRC (pat);
2732 if (unspec_caller_rtx_p (pat))
2733 return true;
2735 return false;
2738 /* Actual number of instructions used to make a shift by N. */
2739 static const char ashiftrt_insns[] =
2740 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2742 /* Left shift and logical right shift are the same. */
2743 static const char shift_insns[] =
2744 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2746 /* Individual shift amounts needed to get the above length sequences.
2747 One bit right shifts clobber the T bit, so when possible, put one bit
2748 shifts in the middle of the sequence, so the ends are eligible for
2749 branch delay slots. */
2750 static const short shift_amounts[32][5] = {
2751 {0}, {1}, {2}, {2, 1},
2752 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2753 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2754 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2755 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2756 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2757 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2758 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2760 /* Likewise, but for shift amounts < 16, up to three highmost bits
2761 might be clobbered. This is typically used when combined with some
2762 kind of sign or zero extension. */
2764 static const char ext_shift_insns[] =
2765 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2767 static const short ext_shift_amounts[32][4] = {
2768 {0}, {1}, {2}, {2, 1},
2769 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2770 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2771 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2772 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2773 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2774 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2775 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2777 /* Assuming we have a value that has been sign-extended by at least one bit,
2778 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2779 to shift it by N without data loss, and quicker than by other means? */
2780 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2782 /* This is used in length attributes in sh.md to help compute the length
2783 of arbitrary constant shift instructions. */
2786 shift_insns_rtx (rtx insn)
2788 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2789 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2790 enum rtx_code shift_code = GET_CODE (set_src);
2792 switch (shift_code)
2794 case ASHIFTRT:
2795 return ashiftrt_insns[shift_count];
2796 case LSHIFTRT:
2797 case ASHIFT:
2798 return shift_insns[shift_count];
2799 default:
2800 gcc_unreachable ();
2804 /* Return the cost of a shift. */
2806 static inline int
2807 shiftcosts (rtx x)
2809 int value;
2811 if (TARGET_SHMEDIA)
2812 return 1;
2814 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2816 if (GET_MODE (x) == DImode
2817 && CONST_INT_P (XEXP (x, 1))
2818 && INTVAL (XEXP (x, 1)) == 1)
2819 return 2;
2821 /* Everything else is invalid, because there is no pattern for it. */
2822 return MAX_COST;
2824 /* If shift by a non constant, then this will be expensive. */
2825 if (!CONST_INT_P (XEXP (x, 1)))
2826 return SH_DYNAMIC_SHIFT_COST;
2828 /* Otherwise, return the true cost in instructions. Cope with out of range
2829 shift counts more or less arbitrarily. */
2830 value = INTVAL (XEXP (x, 1)) & 31;
2832 if (GET_CODE (x) == ASHIFTRT)
2834 int cost = ashiftrt_insns[value];
2835 /* If SH3, then we put the constant in a reg and use shad. */
2836 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2837 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2838 return cost;
2840 else
2841 return shift_insns[value];
2844 /* Return the cost of an AND operation. */
2846 static inline int
2847 andcosts (rtx x)
2849 int i;
2851 /* Anding with a register is a single cycle and instruction. */
2852 if (!CONST_INT_P (XEXP (x, 1)))
2853 return 1;
2855 i = INTVAL (XEXP (x, 1));
2857 if (TARGET_SHMEDIA)
2859 if (satisfies_constraint_I10 (XEXP (x, 1))
2860 || satisfies_constraint_J16 (XEXP (x, 1)))
2861 return 1;
2862 else
2863 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2866 /* These constants are single cycle extu.[bw] instructions. */
2867 if (i == 0xff || i == 0xffff)
2868 return 1;
2869 /* Constants that can be used in an and immediate instruction in a single
2870 cycle, but this requires r0, so make it a little more expensive. */
2871 if (CONST_OK_FOR_K08 (i))
2872 return 2;
2873 /* Constants that can be loaded with a mov immediate and an and.
2874 This case is probably unnecessary. */
2875 if (CONST_OK_FOR_I08 (i))
2876 return 2;
2877 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2878 This case is probably unnecessary. */
2879 return 3;
2882 /* Return the cost of an addition or a subtraction. */
2884 static inline int
2885 addsubcosts (rtx x)
2887 /* Adding a register is a single cycle insn. */
2888 if (REG_P (XEXP (x, 1))
2889 || GET_CODE (XEXP (x, 1)) == SUBREG)
2890 return 1;
2892 /* Likewise for small constants. */
2893 if (CONST_INT_P (XEXP (x, 1))
2894 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2895 return 1;
2897 if (TARGET_SHMEDIA)
2898 switch (GET_CODE (XEXP (x, 1)))
2900 case CONST:
2901 case LABEL_REF:
2902 case SYMBOL_REF:
2903 return TARGET_SHMEDIA64 ? 5 : 3;
2905 case CONST_INT:
2906 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2907 return 2;
2908 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2909 return 3;
2910 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2911 return 4;
2913 /* Fall through. */
2914 default:
2915 return 5;
2918 /* Any other constant requires a 2 cycle pc-relative load plus an
2919 addition. */
2920 return 3;
2923 /* Return the cost of a multiply. */
2924 static inline int
2925 multcosts (rtx x ATTRIBUTE_UNUSED)
2927 if (sh_multcost >= 0)
2928 return sh_multcost;
2929 if (TARGET_SHMEDIA)
2930 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2931 accept constants. Ideally, we would use a cost of one or two and
2932 add the cost of the operand, but disregard the latter when inside loops
2933 and loop invariant code motion is still to follow.
2934 Using a multiply first and splitting it later if it's a loss
2935 doesn't work because of different sign / zero extension semantics
2936 of multiplies vs. shifts. */
2937 return TARGET_SMALLCODE ? 2 : 3;
2939 if (TARGET_SH2)
2941 /* We have a mul insn, so we can never take more than the mul and the
2942 read of the mac reg, but count more because of the latency and extra
2943 reg usage. */
2944 if (TARGET_SMALLCODE)
2945 return 2;
2946 return 3;
2949 /* If we're aiming at small code, then just count the number of
2950 insns in a multiply call sequence. */
2951 if (TARGET_SMALLCODE)
2952 return 5;
2954 /* Otherwise count all the insns in the routine we'd be calling too. */
2955 return 20;
2958 /* Compute a (partial) cost for rtx X. Return true if the complete
2959 cost has been computed, and false if subexpressions should be
2960 scanned. In either case, *TOTAL contains the cost result. */
2962 static bool
2963 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2964 bool speed ATTRIBUTE_UNUSED)
2966 switch (code)
2968 case CONST_INT:
2969 if (TARGET_SHMEDIA)
2971 if (INTVAL (x) == 0)
2972 *total = 0;
2973 else if (outer_code == AND && and_operand ((x), DImode))
2974 *total = 0;
2975 else if ((outer_code == IOR || outer_code == XOR
2976 || outer_code == PLUS)
2977 && CONST_OK_FOR_I10 (INTVAL (x)))
2978 *total = 0;
2979 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2980 *total = COSTS_N_INSNS (outer_code != SET);
2981 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2982 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2983 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2984 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2985 else
2986 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2987 return true;
2989 if (CONST_OK_FOR_I08 (INTVAL (x)))
2990 *total = 0;
2991 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2992 && CONST_OK_FOR_K08 (INTVAL (x)))
2993 *total = 1;
2994 /* prepare_cmp_insn will force costly constants int registers before
2995 the cbranch[sd]i4 patterns can see them, so preserve potentially
2996 interesting ones not covered by I08 above. */
2997 else if (outer_code == COMPARE
2998 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2999 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3000 || INTVAL (x) == 0x7fffffff
3001 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3002 *total = 1;
3003 else
3004 *total = 8;
3005 return true;
3007 case CONST:
3008 case LABEL_REF:
3009 case SYMBOL_REF:
3010 if (TARGET_SHMEDIA64)
3011 *total = COSTS_N_INSNS (4);
3012 else if (TARGET_SHMEDIA32)
3013 *total = COSTS_N_INSNS (2);
3014 else
3015 *total = 5;
3016 return true;
3018 case CONST_DOUBLE:
3019 if (TARGET_SHMEDIA)
3020 *total = COSTS_N_INSNS (4);
3021 /* prepare_cmp_insn will force costly constants int registers before
3022 the cbranchdi4 pattern can see them, so preserve potentially
3023 interesting ones. */
3024 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3025 *total = 1;
3026 else
3027 *total = 10;
3028 return true;
3029 case CONST_VECTOR:
3030 if (x == CONST0_RTX (GET_MODE (x)))
3031 *total = 0;
3032 else if (sh_1el_vec (x, VOIDmode))
3033 *total = outer_code != SET;
3034 if (sh_rep_vec (x, VOIDmode))
3035 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3036 + (outer_code != SET));
3037 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3038 return true;
3040 case PLUS:
3041 case MINUS:
3042 *total = COSTS_N_INSNS (addsubcosts (x));
3043 return true;
3045 case AND:
3046 *total = COSTS_N_INSNS (andcosts (x));
3047 return true;
3049 case MULT:
3050 *total = COSTS_N_INSNS (multcosts (x));
3051 return true;
3053 case ASHIFT:
3054 case ASHIFTRT:
3055 case LSHIFTRT:
3056 *total = COSTS_N_INSNS (shiftcosts (x));
3057 return true;
3059 case DIV:
3060 case UDIV:
3061 case MOD:
3062 case UMOD:
3063 *total = COSTS_N_INSNS (20);
3064 return true;
3066 case PARALLEL:
3067 if (sh_1el_vec (x, VOIDmode))
3068 *total = outer_code != SET;
3069 if (sh_rep_vec (x, VOIDmode))
3070 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3071 + (outer_code != SET));
3072 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3073 return true;
3075 case FLOAT:
3076 case FIX:
3077 *total = 100;
3078 return true;
3080 default:
3081 return false;
3085 /* Compute the cost of an address. For the SH, all valid addresses are
3086 the same cost. Use a slightly higher cost for reg + reg addressing,
3087 since it increases pressure on r0. */
3089 static int
3090 sh_address_cost (rtx X,
3091 bool speed ATTRIBUTE_UNUSED)
3093 return (GET_CODE (X) == PLUS
3094 && ! CONSTANT_P (XEXP (X, 1))
3095 && ! TARGET_SHMEDIA ? 1 : 0);
3098 /* Code to expand a shift. */
3100 void
3101 gen_ashift (int type, int n, rtx reg)
3103 /* Negative values here come from the shift_amounts array. */
3104 if (n < 0)
3106 if (type == ASHIFT)
3107 type = LSHIFTRT;
3108 else
3109 type = ASHIFT;
3110 n = -n;
3113 switch (type)
3115 case ASHIFTRT:
3116 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3117 break;
3118 case LSHIFTRT:
3119 if (n == 1)
3120 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3121 else
3122 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3123 break;
3124 case ASHIFT:
3125 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3126 break;
3130 /* Same for HImode */
3132 void
3133 gen_ashift_hi (int type, int n, rtx reg)
3135 /* Negative values here come from the shift_amounts array. */
3136 if (n < 0)
3138 if (type == ASHIFT)
3139 type = LSHIFTRT;
3140 else
3141 type = ASHIFT;
3142 n = -n;
3145 switch (type)
3147 case ASHIFTRT:
3148 case LSHIFTRT:
3149 /* We don't have HImode right shift operations because using the
3150 ordinary 32 bit shift instructions for that doesn't generate proper
3151 zero/sign extension.
3152 gen_ashift_hi is only called in contexts where we know that the
3153 sign extension works out correctly. */
3155 int offset = 0;
3156 if (GET_CODE (reg) == SUBREG)
3158 offset = SUBREG_BYTE (reg);
3159 reg = SUBREG_REG (reg);
3161 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3162 break;
3164 case ASHIFT:
3165 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3166 break;
3170 /* Output RTL to split a constant shift into its component SH constant
3171 shift instructions. */
3173 void
3174 gen_shifty_op (int code, rtx *operands)
3176 int value = INTVAL (operands[2]);
3177 int max, i;
3179 /* Truncate the shift count in case it is out of bounds. */
3180 value = value & 31;
3182 if (value == 31)
3184 if (code == LSHIFTRT)
3186 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3187 emit_insn (gen_movt (operands[0]));
3188 return;
3190 else if (code == ASHIFT)
3192 /* There is a two instruction sequence for 31 bit left shifts,
3193 but it requires r0. */
3194 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3196 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3197 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3198 return;
3202 else if (value == 0)
3204 /* This can happen even when optimizing, if there were subregs before
3205 reload. Don't output a nop here, as this is never optimized away;
3206 use a no-op move instead. */
3207 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3208 return;
3211 max = shift_insns[value];
3212 for (i = 0; i < max; i++)
3213 gen_ashift (code, shift_amounts[value][i], operands[0]);
3216 /* Same as above, but optimized for values where the topmost bits don't
3217 matter. */
3219 void
3220 gen_shifty_hi_op (int code, rtx *operands)
3222 int value = INTVAL (operands[2]);
3223 int max, i;
3224 void (*gen_fun) (int, int, rtx);
3226 /* This operation is used by and_shl for SImode values with a few
3227 high bits known to be cleared. */
3228 value &= 31;
3229 if (value == 0)
3231 emit_insn (gen_nop ());
3232 return;
3235 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3236 if (code == ASHIFT)
3238 max = ext_shift_insns[value];
3239 for (i = 0; i < max; i++)
3240 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3242 else
3243 /* When shifting right, emit the shifts in reverse order, so that
3244 solitary negative values come first. */
3245 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3246 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3249 /* Output RTL for an arithmetic right shift. */
3251 /* ??? Rewrite to use super-optimizer sequences. */
3254 expand_ashiftrt (rtx *operands)
3256 rtx wrk;
3257 char func[18];
3258 int value;
3260 if (TARGET_SH3)
3262 if (!CONST_INT_P (operands[2]))
3264 rtx count = copy_to_mode_reg (SImode, operands[2]);
3265 emit_insn (gen_negsi2 (count, count));
3266 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3267 return 1;
3269 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3270 > 1 + SH_DYNAMIC_SHIFT_COST)
3272 rtx count
3273 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3274 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3275 return 1;
3278 if (!CONST_INT_P (operands[2]))
3279 return 0;
3281 value = INTVAL (operands[2]) & 31;
3283 if (value == 31)
3285 /* If we are called from abs expansion, arrange things so that we
3286 we can use a single MT instruction that doesn't clobber the source,
3287 if LICM can hoist out the load of the constant zero. */
3288 if (currently_expanding_to_rtl)
3290 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3291 operands[1]));
3292 emit_insn (gen_mov_neg_si_t (operands[0]));
3293 return 1;
3295 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3296 return 1;
3298 else if (value >= 16 && value <= 19)
3300 wrk = gen_reg_rtx (SImode);
3301 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3302 value -= 16;
3303 while (value--)
3304 gen_ashift (ASHIFTRT, 1, wrk);
3305 emit_move_insn (operands[0], wrk);
3306 return 1;
3308 /* Expand a short sequence inline, longer call a magic routine. */
3309 else if (value <= 5)
3311 wrk = gen_reg_rtx (SImode);
3312 emit_move_insn (wrk, operands[1]);
3313 while (value--)
3314 gen_ashift (ASHIFTRT, 1, wrk);
3315 emit_move_insn (operands[0], wrk);
3316 return 1;
3319 wrk = gen_reg_rtx (Pmode);
3321 /* Load the value into an arg reg and call a helper. */
3322 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3323 sprintf (func, "__ashiftrt_r4_%d", value);
3324 function_symbol (wrk, func, SFUNC_STATIC);
3325 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3326 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3327 return 1;
3331 sh_dynamicalize_shift_p (rtx count)
3333 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3336 /* Try to find a good way to implement the combiner pattern
3337 [(set (match_operand:SI 0 "register_operand" "r")
3338 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3339 (match_operand:SI 2 "const_int_operand" "n"))
3340 (match_operand:SI 3 "const_int_operand" "n"))) .
3341 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3342 return 0 for simple right / left or left/right shift combination.
3343 return 1 for a combination of shifts with zero_extend.
3344 return 2 for a combination of shifts with an AND that needs r0.
3345 return 3 for a combination of shifts with an AND that needs an extra
3346 scratch register, when the three highmost bits of the AND mask are clear.
3347 return 4 for a combination of shifts with an AND that needs an extra
3348 scratch register, when any of the three highmost bits of the AND mask
3349 is set.
3350 If ATTRP is set, store an initial right shift width in ATTRP[0],
3351 and the instruction length in ATTRP[1] . These values are not valid
3352 when returning 0.
3353 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3354 shift_amounts for the last shift value that is to be used before the
3355 sign extend. */
3357 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3359 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3360 int left = INTVAL (left_rtx), right;
3361 int best = 0;
3362 int cost, best_cost = 10000;
3363 int best_right = 0, best_len = 0;
3364 int i;
3365 int can_ext;
3367 if (left < 0 || left > 31)
3368 return 0;
3369 if (CONST_INT_P (mask_rtx))
3370 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3371 else
3372 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3373 /* Can this be expressed as a right shift / left shift pair? */
3374 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3375 right = exact_log2 (lsb);
3376 mask2 = ~(mask + lsb - 1);
3377 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3378 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3379 if (! mask2)
3380 best_cost = shift_insns[right] + shift_insns[right + left];
3381 /* mask has no trailing zeroes <==> ! right */
3382 else if (! right && mask2 == ~(lsb2 - 1))
3384 int late_right = exact_log2 (lsb2);
3385 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3387 /* Try to use zero extend. */
3388 if (mask2 == ~(lsb2 - 1))
3390 int width, first;
3392 for (width = 8; width <= 16; width += 8)
3394 /* Can we zero-extend right away? */
3395 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3397 cost
3398 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3399 if (cost < best_cost)
3401 best = 1;
3402 best_cost = cost;
3403 best_right = right;
3404 best_len = cost;
3405 if (attrp)
3406 attrp[2] = -1;
3408 continue;
3410 /* ??? Could try to put zero extend into initial right shift,
3411 or even shift a bit left before the right shift. */
3412 /* Determine value of first part of left shift, to get to the
3413 zero extend cut-off point. */
3414 first = width - exact_log2 (lsb2) + right;
3415 if (first >= 0 && right + left - first >= 0)
3417 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3418 + ext_shift_insns[right + left - first];
3419 if (cost < best_cost)
3421 best = 1;
3422 best_cost = cost;
3423 best_right = right;
3424 best_len = cost;
3425 if (attrp)
3426 attrp[2] = first;
3431 /* Try to use r0 AND pattern */
3432 for (i = 0; i <= 2; i++)
3434 if (i > right)
3435 break;
3436 if (! CONST_OK_FOR_K08 (mask >> i))
3437 continue;
3438 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3439 if (cost < best_cost)
3441 best = 2;
3442 best_cost = cost;
3443 best_right = i;
3444 best_len = cost - 1;
3447 /* Try to use a scratch register to hold the AND operand. */
3448 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3449 for (i = 0; i <= 2; i++)
3451 if (i > right)
3452 break;
3453 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3454 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3455 if (cost < best_cost)
3457 best = 4 - can_ext;
3458 best_cost = cost;
3459 best_right = i;
3460 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3464 if (attrp)
3466 attrp[0] = best_right;
3467 attrp[1] = best_len;
3469 return best;
3472 /* This is used in length attributes of the unnamed instructions
3473 corresponding to shl_and_kind return values of 1 and 2. */
3475 shl_and_length (rtx insn)
3477 rtx set_src, left_rtx, mask_rtx;
3478 int attributes[3];
3480 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3481 left_rtx = XEXP (XEXP (set_src, 0), 1);
3482 mask_rtx = XEXP (set_src, 1);
3483 shl_and_kind (left_rtx, mask_rtx, attributes);
3484 return attributes[1];
3487 /* This is used in length attribute of the and_shl_scratch instruction. */
3490 shl_and_scr_length (rtx insn)
3492 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3493 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3494 rtx op = XEXP (set_src, 0);
3495 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3496 op = XEXP (XEXP (op, 0), 0);
3497 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3500 /* Generate rtl for instructions for which shl_and_kind advised a particular
3501 method of generating them, i.e. returned zero. */
3504 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3506 int attributes[3];
3507 unsigned HOST_WIDE_INT mask;
3508 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3509 int right, total_shift;
3510 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3512 right = attributes[0];
3513 total_shift = INTVAL (left_rtx) + right;
3514 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3515 switch (kind)
3517 default:
3518 return -1;
3519 case 1:
3521 int first = attributes[2];
3522 rtx operands[3];
3524 if (first < 0)
3526 emit_insn ((mask << right) <= 0xff
3527 ? gen_zero_extendqisi2 (dest,
3528 gen_lowpart (QImode, source))
3529 : gen_zero_extendhisi2 (dest,
3530 gen_lowpart (HImode, source)));
3531 source = dest;
3533 if (source != dest)
3534 emit_insn (gen_movsi (dest, source));
3535 operands[0] = dest;
3536 if (right)
3538 operands[2] = GEN_INT (right);
3539 gen_shifty_hi_op (LSHIFTRT, operands);
3541 if (first > 0)
3543 operands[2] = GEN_INT (first);
3544 gen_shifty_hi_op (ASHIFT, operands);
3545 total_shift -= first;
3546 mask <<= first;
3548 if (first >= 0)
3549 emit_insn (mask <= 0xff
3550 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3551 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3552 if (total_shift > 0)
3554 operands[2] = GEN_INT (total_shift);
3555 gen_shifty_hi_op (ASHIFT, operands);
3557 break;
3559 case 4:
3560 shift_gen_fun = gen_shifty_op;
3561 case 3:
3562 /* If the topmost bit that matters is set, set the topmost bits
3563 that don't matter. This way, we might be able to get a shorter
3564 signed constant. */
3565 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3566 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3567 case 2:
3568 /* Don't expand fine-grained when combining, because that will
3569 make the pattern fail. */
3570 if (currently_expanding_to_rtl
3571 || reload_in_progress || reload_completed)
3573 rtx operands[3];
3575 /* Cases 3 and 4 should be handled by this split
3576 only while combining */
3577 gcc_assert (kind <= 2);
3578 if (right)
3580 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3581 source = dest;
3583 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3584 if (total_shift)
3586 operands[0] = dest;
3587 operands[1] = dest;
3588 operands[2] = GEN_INT (total_shift);
3589 shift_gen_fun (ASHIFT, operands);
3591 break;
3593 else
3595 int neg = 0;
3596 if (kind != 4 && total_shift < 16)
3598 neg = -ext_shift_amounts[total_shift][1];
3599 if (neg > 0)
3600 neg -= ext_shift_amounts[total_shift][2];
3601 else
3602 neg = 0;
3604 emit_insn (gen_and_shl_scratch (dest, source,
3605 GEN_INT (right),
3606 GEN_INT (mask),
3607 GEN_INT (total_shift + neg),
3608 GEN_INT (neg)));
3609 emit_insn (gen_movsi (dest, dest));
3610 break;
3613 return 0;
3616 /* Try to find a good way to implement the combiner pattern
3617 [(set (match_operand:SI 0 "register_operand" "=r")
3618 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3619 (match_operand:SI 2 "const_int_operand" "n")
3620 (match_operand:SI 3 "const_int_operand" "n")
3621 (const_int 0)))
3622 (clobber (reg:SI T_REG))]
3623 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3624 return 0 for simple left / right shift combination.
3625 return 1 for left shift / 8 bit sign extend / left shift.
3626 return 2 for left shift / 16 bit sign extend / left shift.
3627 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3628 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3629 return 5 for left shift / 16 bit sign extend / right shift
3630 return 6 for < 8 bit sign extend / left shift.
3631 return 7 for < 8 bit sign extend / left shift / single right shift.
3632 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3635 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3637 int left, size, insize, ext;
3638 int cost = 0, best_cost;
3639 int kind;
3641 left = INTVAL (left_rtx);
3642 size = INTVAL (size_rtx);
3643 insize = size - left;
3644 gcc_assert (insize > 0);
3645 /* Default to left / right shift. */
3646 kind = 0;
3647 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3648 if (size <= 16)
3650 /* 16 bit shift / sign extend / 16 bit shift */
3651 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3652 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3653 below, by alternative 3 or something even better. */
3654 if (cost < best_cost)
3656 kind = 5;
3657 best_cost = cost;
3660 /* Try a plain sign extend between two shifts. */
3661 for (ext = 16; ext >= insize; ext -= 8)
3663 if (ext <= size)
3665 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3666 if (cost < best_cost)
3668 kind = ext / (unsigned) 8;
3669 best_cost = cost;
3672 /* Check if we can do a sloppy shift with a final signed shift
3673 restoring the sign. */
3674 if (EXT_SHIFT_SIGNED (size - ext))
3675 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3676 /* If not, maybe it's still cheaper to do the second shift sloppy,
3677 and do a final sign extend? */
3678 else if (size <= 16)
3679 cost = ext_shift_insns[ext - insize] + 1
3680 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3681 else
3682 continue;
3683 if (cost < best_cost)
3685 kind = ext / (unsigned) 8 + 2;
3686 best_cost = cost;
3689 /* Check if we can sign extend in r0 */
3690 if (insize < 8)
3692 cost = 3 + shift_insns[left];
3693 if (cost < best_cost)
3695 kind = 6;
3696 best_cost = cost;
3698 /* Try the same with a final signed shift. */
3699 if (left < 31)
3701 cost = 3 + ext_shift_insns[left + 1] + 1;
3702 if (cost < best_cost)
3704 kind = 7;
3705 best_cost = cost;
3709 if (TARGET_SH3)
3711 /* Try to use a dynamic shift. */
3712 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3713 if (cost < best_cost)
3715 kind = 0;
3716 best_cost = cost;
3719 if (costp)
3720 *costp = cost;
3721 return kind;
3724 /* Function to be used in the length attribute of the instructions
3725 implementing this pattern. */
3728 shl_sext_length (rtx insn)
3730 rtx set_src, left_rtx, size_rtx;
3731 int cost;
3733 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3734 left_rtx = XEXP (XEXP (set_src, 0), 1);
3735 size_rtx = XEXP (set_src, 1);
3736 shl_sext_kind (left_rtx, size_rtx, &cost);
3737 return cost;
3740 /* Generate rtl for this pattern */
3743 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3745 int kind;
3746 int left, size, insize, cost;
3747 rtx operands[3];
3749 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3750 left = INTVAL (left_rtx);
3751 size = INTVAL (size_rtx);
3752 insize = size - left;
3753 switch (kind)
3755 case 1:
3756 case 2:
3757 case 3:
3758 case 4:
3760 int ext = kind & 1 ? 8 : 16;
3761 int shift2 = size - ext;
3763 /* Don't expand fine-grained when combining, because that will
3764 make the pattern fail. */
3765 if (! currently_expanding_to_rtl
3766 && ! reload_in_progress && ! reload_completed)
3768 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3769 emit_insn (gen_movsi (dest, source));
3770 break;
3772 if (dest != source)
3773 emit_insn (gen_movsi (dest, source));
3774 operands[0] = dest;
3775 if (ext - insize)
3777 operands[2] = GEN_INT (ext - insize);
3778 gen_shifty_hi_op (ASHIFT, operands);
3780 emit_insn (kind & 1
3781 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3782 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3783 if (kind <= 2)
3785 if (shift2)
3787 operands[2] = GEN_INT (shift2);
3788 gen_shifty_op (ASHIFT, operands);
3791 else
3793 if (shift2 > 0)
3795 if (EXT_SHIFT_SIGNED (shift2))
3797 operands[2] = GEN_INT (shift2 + 1);
3798 gen_shifty_op (ASHIFT, operands);
3799 operands[2] = const1_rtx;
3800 gen_shifty_op (ASHIFTRT, operands);
3801 break;
3803 operands[2] = GEN_INT (shift2);
3804 gen_shifty_hi_op (ASHIFT, operands);
3806 else if (shift2)
3808 operands[2] = GEN_INT (-shift2);
3809 gen_shifty_hi_op (LSHIFTRT, operands);
3811 emit_insn (size <= 8
3812 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3813 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3815 break;
3817 case 5:
3819 int i = 16 - size;
3820 if (! currently_expanding_to_rtl
3821 && ! reload_in_progress && ! reload_completed)
3822 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3823 else
3825 operands[0] = dest;
3826 operands[2] = GEN_INT (16 - insize);
3827 gen_shifty_hi_op (ASHIFT, operands);
3828 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3830 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3831 while (--i >= 0)
3832 gen_ashift (ASHIFTRT, 1, dest);
3833 break;
3835 case 6:
3836 case 7:
3837 /* Don't expand fine-grained when combining, because that will
3838 make the pattern fail. */
3839 if (! currently_expanding_to_rtl
3840 && ! reload_in_progress && ! reload_completed)
3842 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3843 emit_insn (gen_movsi (dest, source));
3844 break;
3846 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3847 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3848 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3849 operands[0] = dest;
3850 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3851 gen_shifty_op (ASHIFT, operands);
3852 if (kind == 7)
3853 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3854 break;
3855 default:
3856 return -1;
3858 return 0;
3861 /* Prefix a symbol_ref name with "datalabel". */
3864 gen_datalabel_ref (rtx sym)
3866 const char *str;
3868 if (GET_CODE (sym) == LABEL_REF)
3869 return gen_rtx_CONST (GET_MODE (sym),
3870 gen_rtx_UNSPEC (GET_MODE (sym),
3871 gen_rtvec (1, sym),
3872 UNSPEC_DATALABEL));
3874 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3876 str = XSTR (sym, 0);
3877 /* Share all SYMBOL_REF strings with the same value - that is important
3878 for cse. */
3879 str = IDENTIFIER_POINTER (get_identifier (str));
3880 XSTR (sym, 0) = str;
3882 return sym;
3886 static alloc_pool label_ref_list_pool;
3888 typedef struct label_ref_list_d
3890 rtx label;
3891 struct label_ref_list_d *next;
3892 } *label_ref_list_t;
3894 /* The SH cannot load a large constant into a register, constants have to
3895 come from a pc relative load. The reference of a pc relative load
3896 instruction must be less than 1k in front of the instruction. This
3897 means that we often have to dump a constant inside a function, and
3898 generate code to branch around it.
3900 It is important to minimize this, since the branches will slow things
3901 down and make things bigger.
3903 Worst case code looks like:
3905 mov.l L1,rn
3906 bra L2
3908 align
3909 L1: .long value
3913 mov.l L3,rn
3914 bra L4
3916 align
3917 L3: .long value
3921 We fix this by performing a scan before scheduling, which notices which
3922 instructions need to have their operands fetched from the constant table
3923 and builds the table.
3925 The algorithm is:
3927 scan, find an instruction which needs a pcrel move. Look forward, find the
3928 last barrier which is within MAX_COUNT bytes of the requirement.
3929 If there isn't one, make one. Process all the instructions between
3930 the find and the barrier.
3932 In the above example, we can tell that L3 is within 1k of L1, so
3933 the first move can be shrunk from the 3 insn+constant sequence into
3934 just 1 insn, and the constant moved to L3 to make:
3936 mov.l L1,rn
3938 mov.l L3,rn
3939 bra L4
3941 align
3942 L3:.long value
3943 L4:.long value
3945 Then the second move becomes the target for the shortening process. */
3947 typedef struct
3949 rtx value; /* Value in table. */
3950 rtx label; /* Label of value. */
3951 label_ref_list_t wend; /* End of window. */
3952 enum machine_mode mode; /* Mode of value. */
3954 /* True if this constant is accessed as part of a post-increment
3955 sequence. Note that HImode constants are never accessed in this way. */
3956 bool part_of_sequence_p;
3957 } pool_node;
3959 /* The maximum number of constants that can fit into one pool, since
3960 constants in the range 0..510 are at least 2 bytes long, and in the
3961 range from there to 1018 at least 4 bytes. */
3963 #define MAX_POOL_SIZE 372
3964 static pool_node pool_vector[MAX_POOL_SIZE];
3965 static int pool_size;
3966 static rtx pool_window_label;
3967 static int pool_window_last;
3969 static int max_labelno_before_reorg;
3971 /* ??? If we need a constant in HImode which is the truncated value of a
3972 constant we need in SImode, we could combine the two entries thus saving
3973 two bytes. Is this common enough to be worth the effort of implementing
3974 it? */
3976 /* ??? This stuff should be done at the same time that we shorten branches.
3977 As it is now, we must assume that all branches are the maximum size, and
3978 this causes us to almost always output constant pools sooner than
3979 necessary. */
3981 /* Add a constant to the pool and return its label. */
3983 static rtx
3984 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3986 int i;
3987 rtx lab, new_rtx;
3988 label_ref_list_t ref, newref;
3990 /* First see if we've already got it. */
3991 for (i = 0; i < pool_size; i++)
3993 if (x->code == pool_vector[i].value->code
3994 && mode == pool_vector[i].mode)
3996 if (x->code == CODE_LABEL)
3998 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3999 continue;
4001 if (rtx_equal_p (x, pool_vector[i].value))
4003 lab = new_rtx = 0;
4004 if (! last_value
4005 || ! i
4006 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4008 new_rtx = gen_label_rtx ();
4009 LABEL_REFS (new_rtx) = pool_vector[i].label;
4010 pool_vector[i].label = lab = new_rtx;
4012 if (lab && pool_window_label)
4014 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4015 newref->label = pool_window_label;
4016 ref = pool_vector[pool_window_last].wend;
4017 newref->next = ref;
4018 pool_vector[pool_window_last].wend = newref;
4020 if (new_rtx)
4021 pool_window_label = new_rtx;
4022 pool_window_last = i;
4023 return lab;
4028 /* Need a new one. */
4029 pool_vector[pool_size].value = x;
4030 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4032 lab = 0;
4033 pool_vector[pool_size - 1].part_of_sequence_p = true;
4035 else
4036 lab = gen_label_rtx ();
4037 pool_vector[pool_size].mode = mode;
4038 pool_vector[pool_size].label = lab;
4039 pool_vector[pool_size].wend = NULL;
4040 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4041 if (lab && pool_window_label)
4043 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4044 newref->label = pool_window_label;
4045 ref = pool_vector[pool_window_last].wend;
4046 newref->next = ref;
4047 pool_vector[pool_window_last].wend = newref;
4049 if (lab)
4050 pool_window_label = lab;
4051 pool_window_last = pool_size;
4052 pool_size++;
4053 return lab;
4056 /* Output the literal table. START, if nonzero, is the first instruction
4057 this table is needed for, and also indicates that there is at least one
4058 casesi_worker_2 instruction; We have to emit the operand3 labels from
4059 these insns at a 4-byte aligned position. BARRIER is the barrier
4060 after which we are to place the table. */
4062 static void
4063 dump_table (rtx start, rtx barrier)
4065 rtx scan = barrier;
4066 int i;
4067 int need_align = 1;
4068 rtx lab;
4069 label_ref_list_t ref;
4070 int have_df = 0;
4072 /* Do two passes, first time dump out the HI sized constants. */
4074 for (i = 0; i < pool_size; i++)
4076 pool_node *p = &pool_vector[i];
4078 if (p->mode == HImode)
4080 if (need_align)
4082 scan = emit_insn_after (gen_align_2 (), scan);
4083 need_align = 0;
4085 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4086 scan = emit_label_after (lab, scan);
4087 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4088 scan);
4089 for (ref = p->wend; ref; ref = ref->next)
4091 lab = ref->label;
4092 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4095 else if (p->mode == DFmode)
4096 have_df = 1;
4099 need_align = 1;
4101 if (start)
4103 scan = emit_insn_after (gen_align_4 (), scan);
4104 need_align = 0;
4105 for (; start != barrier; start = NEXT_INSN (start))
4106 if (NONJUMP_INSN_P (start)
4107 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4109 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4110 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4112 scan = emit_label_after (lab, scan);
4115 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4117 rtx align_insn = NULL_RTX;
4119 scan = emit_label_after (gen_label_rtx (), scan);
4120 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4121 need_align = 0;
4123 for (i = 0; i < pool_size; i++)
4125 pool_node *p = &pool_vector[i];
4127 switch (p->mode)
4129 case HImode:
4130 break;
4131 case SImode:
4132 case SFmode:
4133 if (align_insn && !p->part_of_sequence_p)
4135 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4136 emit_label_before (lab, align_insn);
4137 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4138 align_insn);
4139 for (ref = p->wend; ref; ref = ref->next)
4141 lab = ref->label;
4142 emit_insn_before (gen_consttable_window_end (lab),
4143 align_insn);
4145 delete_insn (align_insn);
4146 align_insn = NULL_RTX;
4147 continue;
4149 else
4151 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4152 scan = emit_label_after (lab, scan);
4153 scan = emit_insn_after (gen_consttable_4 (p->value,
4154 const0_rtx), scan);
4155 need_align = ! need_align;
4157 break;
4158 case DFmode:
4159 if (need_align)
4161 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4162 align_insn = scan;
4163 need_align = 0;
4165 case DImode:
4166 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4167 scan = emit_label_after (lab, scan);
4168 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4169 scan);
4170 break;
4171 default:
4172 gcc_unreachable ();
4175 if (p->mode != HImode)
4177 for (ref = p->wend; ref; ref = ref->next)
4179 lab = ref->label;
4180 scan = emit_insn_after (gen_consttable_window_end (lab),
4181 scan);
4186 pool_size = 0;
4189 for (i = 0; i < pool_size; i++)
4191 pool_node *p = &pool_vector[i];
4193 switch (p->mode)
4195 case HImode:
4196 break;
4197 case SImode:
4198 case SFmode:
4199 if (need_align)
4201 need_align = 0;
4202 scan = emit_label_after (gen_label_rtx (), scan);
4203 scan = emit_insn_after (gen_align_4 (), scan);
4205 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4206 scan = emit_label_after (lab, scan);
4207 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4208 scan);
4209 break;
4210 case DFmode:
4211 case DImode:
4212 if (need_align)
4214 need_align = 0;
4215 scan = emit_label_after (gen_label_rtx (), scan);
4216 scan = emit_insn_after (gen_align_4 (), scan);
4218 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4219 scan = emit_label_after (lab, scan);
4220 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4221 scan);
4222 break;
4223 default:
4224 gcc_unreachable ();
4227 if (p->mode != HImode)
4229 for (ref = p->wend; ref; ref = ref->next)
4231 lab = ref->label;
4232 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4237 scan = emit_insn_after (gen_consttable_end (), scan);
4238 scan = emit_barrier_after (scan);
4239 pool_size = 0;
4240 pool_window_label = NULL_RTX;
4241 pool_window_last = 0;
4244 /* Return nonzero if constant would be an ok source for a
4245 mov.w instead of a mov.l. */
4247 static int
4248 hi_const (rtx src)
4250 return (CONST_INT_P (src)
4251 && INTVAL (src) >= -32768
4252 && INTVAL (src) <= 32767);
4255 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4257 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4259 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4260 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4261 need to fix it if the input value is CONST_OK_FOR_I08. */
4263 static int
4264 broken_move (rtx insn)
4266 if (NONJUMP_INSN_P (insn))
4268 rtx pat = PATTERN (insn);
4269 if (GET_CODE (pat) == PARALLEL)
4270 pat = XVECEXP (pat, 0, 0);
4271 if (GET_CODE (pat) == SET
4272 /* We can load any 8-bit value if we don't care what the high
4273 order bits end up as. */
4274 && GET_MODE (SET_DEST (pat)) != QImode
4275 && (CONSTANT_P (SET_SRC (pat))
4276 /* Match mova_const. */
4277 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4278 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4279 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4280 && ! (TARGET_SH2E
4281 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4282 && (fp_zero_operand (SET_SRC (pat))
4283 || fp_one_operand (SET_SRC (pat)))
4284 /* In general we don't know the current setting of fpscr, so disable fldi.
4285 There is an exception if this was a register-register move
4286 before reload - and hence it was ascertained that we have
4287 single precision setting - and in a post-reload optimization
4288 we changed this to do a constant load. In that case
4289 we don't have an r0 clobber, hence we must use fldi. */
4290 && (TARGET_FMOVD
4291 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4292 == SCRATCH))
4293 && REG_P (SET_DEST (pat))
4294 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4295 && ! (TARGET_SH2A
4296 && GET_MODE (SET_DEST (pat)) == SImode
4297 && (satisfies_constraint_I20 (SET_SRC (pat))
4298 || satisfies_constraint_I28 (SET_SRC (pat))))
4299 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4300 return 1;
4303 return 0;
4306 static int
4307 mova_p (rtx insn)
4309 return (NONJUMP_INSN_P (insn)
4310 && GET_CODE (PATTERN (insn)) == SET
4311 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4312 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4313 /* Don't match mova_const. */
4314 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4317 /* Fix up a mova from a switch that went out of range. */
4318 static void
4319 fixup_mova (rtx mova)
4321 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4322 if (! flag_pic)
4324 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4325 INSN_CODE (mova) = -1;
4327 else
4329 rtx worker = mova;
4330 rtx lab = gen_label_rtx ();
4331 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4335 worker = NEXT_INSN (worker);
4336 gcc_assert (worker
4337 && !LABEL_P (worker)
4338 && !JUMP_P (worker));
4339 } while (NOTE_P (worker)
4340 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4341 wpat = PATTERN (worker);
4342 wpat0 = XVECEXP (wpat, 0, 0);
4343 wpat1 = XVECEXP (wpat, 0, 1);
4344 wsrc = SET_SRC (wpat0);
4345 PATTERN (worker) = (gen_casesi_worker_2
4346 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4347 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4348 XEXP (wpat1, 0)));
4349 INSN_CODE (worker) = -1;
4350 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4351 base = gen_rtx_LABEL_REF (Pmode, lab);
4352 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4353 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4354 INSN_CODE (mova) = -1;
4358 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4359 *num_mova, and check if the new mova is not nested within the first one.
4360 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4361 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4362 static int
4363 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4365 int n_addr = 0; /* Initialization to shut up spurious warning. */
4366 int f_target, n_target = 0; /* Likewise. */
4368 if (optimize)
4370 /* If NEW_MOVA has no address yet, it will be handled later. */
4371 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4372 return -1;
4374 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4375 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4376 if (n_addr > n_target || n_addr + 1022 < n_target)
4378 /* Change the mova into a load.
4379 broken_move will then return true for it. */
4380 fixup_mova (new_mova);
4381 return 1;
4384 if (!(*num_mova)++)
4386 *first_mova = new_mova;
4387 return 2;
4389 if (!optimize
4390 || ((f_target
4391 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4392 >= n_target))
4393 return -1;
4395 (*num_mova)--;
4396 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4397 > n_target - n_addr)
4399 fixup_mova (*first_mova);
4400 return 0;
4402 else
4404 fixup_mova (new_mova);
4405 return 1;
4409 /* Find the last barrier from insn FROM which is close enough to hold the
4410 constant pool. If we can't find one, then create one near the end of
4411 the range. */
4413 static rtx
4414 find_barrier (int num_mova, rtx mova, rtx from)
4416 int count_si = 0;
4417 int count_hi = 0;
4418 int found_hi = 0;
4419 int found_si = 0;
4420 int found_di = 0;
4421 int hi_align = 2;
4422 int si_align = 2;
4423 int leading_mova = num_mova;
4424 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4425 int si_limit;
4426 int hi_limit;
4427 rtx orig = from;
4428 rtx last_got = NULL_RTX;
4429 rtx last_symoff = NULL_RTX;
4431 /* For HImode: range is 510, add 4 because pc counts from address of
4432 second instruction after this one, subtract 2 for the jump instruction
4433 that we may need to emit before the table, subtract 2 for the instruction
4434 that fills the jump delay slot (in very rare cases, reorg will take an
4435 instruction from after the constant pool or will leave the delay slot
4436 empty). This gives 510.
4437 For SImode: range is 1020, add 4 because pc counts from address of
4438 second instruction after this one, subtract 2 in case pc is 2 byte
4439 aligned, subtract 2 for the jump instruction that we may need to emit
4440 before the table, subtract 2 for the instruction that fills the jump
4441 delay slot. This gives 1018. */
4443 /* The branch will always be shortened now that the reference address for
4444 forward branches is the successor address, thus we need no longer make
4445 adjustments to the [sh]i_limit for -O0. */
4447 si_limit = 1018;
4448 hi_limit = 510;
4450 while (from && count_si < si_limit && count_hi < hi_limit)
4452 int inc = get_attr_length (from);
4453 int new_align = 1;
4455 /* If this is a label that existed at the time of the compute_alignments
4456 call, determine the alignment. N.B. When find_barrier recurses for
4457 an out-of-reach mova, we might see labels at the start of previously
4458 inserted constant tables. */
4459 if (LABEL_P (from)
4460 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4462 if (optimize)
4463 new_align = 1 << label_to_alignment (from);
4464 else if (BARRIER_P (prev_nonnote_insn (from)))
4465 new_align = 1 << barrier_align (from);
4466 else
4467 new_align = 1;
4468 inc = 0;
4470 /* In case we are scanning a constant table because of recursion, check
4471 for explicit alignments. If the table is long, we might be forced
4472 to emit the new table in front of it; the length of the alignment
4473 might be the last straw. */
4474 else if (NONJUMP_INSN_P (from)
4475 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4476 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4477 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4478 /* When we find the end of a constant table, paste the new constant
4479 at the end. That is better than putting it in front because
4480 this way, we don't need extra alignment for adding a 4-byte-aligned
4481 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4482 else if (NONJUMP_INSN_P (from)
4483 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4484 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4485 return from;
4487 if (BARRIER_P (from))
4489 rtx next;
4491 found_barrier = from;
4493 /* If we are at the end of the function, or in front of an alignment
4494 instruction, we need not insert an extra alignment. We prefer
4495 this kind of barrier. */
4496 if (barrier_align (from) > 2)
4497 good_barrier = from;
4499 /* If we are at the end of a hot/cold block, dump the constants
4500 here. */
4501 next = NEXT_INSN (from);
4502 if (next
4503 && NOTE_P (next)
4504 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4505 break;
4508 if (broken_move (from))
4510 rtx pat, src, dst;
4511 enum machine_mode mode;
4513 pat = PATTERN (from);
4514 if (GET_CODE (pat) == PARALLEL)
4515 pat = XVECEXP (pat, 0, 0);
4516 src = SET_SRC (pat);
4517 dst = SET_DEST (pat);
4518 mode = GET_MODE (dst);
4520 /* GOT pcrelat setting comes in pair of
4521 mova .L8,r0
4522 mov.l .L8,r12
4523 instructions. (plus add r0,r12).
4524 Remember if we see one without the other. */
4525 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4526 last_got = last_got ? NULL_RTX : from;
4527 else if (PIC_ADDR_P (src))
4528 last_got = last_got ? NULL_RTX : from;
4530 /* We must explicitly check the mode, because sometimes the
4531 front end will generate code to load unsigned constants into
4532 HImode targets without properly sign extending them. */
4533 if (mode == HImode
4534 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4536 found_hi += 2;
4537 /* We put the short constants before the long constants, so
4538 we must count the length of short constants in the range
4539 for the long constants. */
4540 /* ??? This isn't optimal, but is easy to do. */
4541 si_limit -= 2;
4543 else
4545 /* We dump DF/DI constants before SF/SI ones, because
4546 the limit is the same, but the alignment requirements
4547 are higher. We may waste up to 4 additional bytes
4548 for alignment, and the DF/DI constant may have
4549 another SF/SI constant placed before it. */
4550 if (TARGET_SHCOMPACT
4551 && ! found_di
4552 && (mode == DFmode || mode == DImode))
4554 found_di = 1;
4555 si_limit -= 8;
4557 while (si_align > 2 && found_si + si_align - 2 > count_si)
4558 si_align >>= 1;
4559 if (found_si > count_si)
4560 count_si = found_si;
4561 found_si += GET_MODE_SIZE (mode);
4562 if (num_mova)
4563 si_limit -= GET_MODE_SIZE (mode);
4567 if (mova_p (from))
4569 switch (untangle_mova (&num_mova, &mova, from))
4571 case 1:
4572 if (flag_pic)
4574 rtx src = SET_SRC (PATTERN (from));
4575 if (GET_CODE (src) == CONST
4576 && GET_CODE (XEXP (src, 0)) == UNSPEC
4577 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4578 last_symoff = from;
4580 break;
4581 case 0: return find_barrier (0, 0, mova);
4582 case 2:
4584 leading_mova = 0;
4585 barrier_before_mova
4586 = good_barrier ? good_barrier : found_barrier;
4588 default: break;
4590 if (found_si > count_si)
4591 count_si = found_si;
4593 else if (JUMP_TABLE_DATA_P (from))
4595 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4596 || (num_mova
4597 && (prev_nonnote_insn (from)
4598 == XEXP (MOVA_LABELREF (mova), 0))))
4599 num_mova--;
4600 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4602 /* We have just passed the barrier in front of the
4603 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4604 the ADDR_DIFF_VEC is accessed as data, just like our pool
4605 constants, this is a good opportunity to accommodate what
4606 we have gathered so far.
4607 If we waited any longer, we could end up at a barrier in
4608 front of code, which gives worse cache usage for separated
4609 instruction / data caches. */
4610 good_barrier = found_barrier;
4611 break;
4613 else
4615 rtx body = PATTERN (from);
4616 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4619 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4620 else if (JUMP_P (from)
4621 && ! TARGET_SH2
4622 && ! TARGET_SMALLCODE)
4623 new_align = 4;
4625 /* There is a possibility that a bf is transformed into a bf/s by the
4626 delay slot scheduler. */
4627 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4628 && get_attr_type (from) == TYPE_CBRANCH
4629 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4630 inc += 2;
4632 if (found_si)
4634 count_si += inc;
4635 if (new_align > si_align)
4637 si_limit -= (count_si - 1) & (new_align - si_align);
4638 si_align = new_align;
4640 count_si = (count_si + new_align - 1) & -new_align;
4642 if (found_hi)
4644 count_hi += inc;
4645 if (new_align > hi_align)
4647 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4648 hi_align = new_align;
4650 count_hi = (count_hi + new_align - 1) & -new_align;
4652 from = NEXT_INSN (from);
4655 if (num_mova)
4657 if (leading_mova)
4659 /* Try as we might, the leading mova is out of range. Change
4660 it into a load (which will become a pcload) and retry. */
4661 fixup_mova (mova);
4662 return find_barrier (0, 0, mova);
4664 else
4666 /* Insert the constant pool table before the mova instruction,
4667 to prevent the mova label reference from going out of range. */
4668 from = mova;
4669 good_barrier = found_barrier = barrier_before_mova;
4673 if (found_barrier)
4675 if (good_barrier && next_real_insn (found_barrier))
4676 found_barrier = good_barrier;
4678 else
4680 /* We didn't find a barrier in time to dump our stuff,
4681 so we'll make one. */
4682 rtx label = gen_label_rtx ();
4684 /* Don't emit a constant table in the middle of insns for
4685 casesi_worker_2. This is a bit overkill but is enough
4686 because casesi_worker_2 wouldn't appear so frequently. */
4687 if (last_symoff)
4688 from = last_symoff;
4690 /* If we exceeded the range, then we must back up over the last
4691 instruction we looked at. Otherwise, we just need to undo the
4692 NEXT_INSN at the end of the loop. */
4693 if (PREV_INSN (from) != orig
4694 && (count_hi > hi_limit || count_si > si_limit))
4695 from = PREV_INSN (PREV_INSN (from));
4696 else
4697 from = PREV_INSN (from);
4699 /* Don't emit a constant table int the middle of global pointer setting,
4700 since that that would move the addressing base GOT into another table.
4701 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4702 in the pool anyway, so just move up the whole constant pool. */
4703 if (last_got)
4704 from = PREV_INSN (last_got);
4706 /* Don't insert the constant pool table at the position which
4707 may be the landing pad. */
4708 if (flag_exceptions
4709 && CALL_P (from)
4710 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4711 from = PREV_INSN (from);
4713 /* Walk back to be just before any jump or label.
4714 Putting it before a label reduces the number of times the branch
4715 around the constant pool table will be hit. Putting it before
4716 a jump makes it more likely that the bra delay slot will be
4717 filled. */
4718 while (NOTE_P (from) || JUMP_P (from)
4719 || LABEL_P (from))
4720 from = PREV_INSN (from);
4722 from = emit_jump_insn_after (gen_jump (label), from);
4723 JUMP_LABEL (from) = label;
4724 LABEL_NUSES (label) = 1;
4725 found_barrier = emit_barrier_after (from);
4726 emit_label_after (label, found_barrier);
4729 return found_barrier;
4732 /* If the instruction INSN is implemented by a special function, and we can
4733 positively find the register that is used to call the sfunc, and this
4734 register is not used anywhere else in this instruction - except as the
4735 destination of a set, return this register; else, return 0. */
4737 sfunc_uses_reg (rtx insn)
4739 int i;
4740 rtx pattern, part, reg_part, reg;
4742 if (!NONJUMP_INSN_P (insn))
4743 return 0;
4744 pattern = PATTERN (insn);
4745 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4746 return 0;
4748 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4750 part = XVECEXP (pattern, 0, i);
4751 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4752 reg_part = part;
4754 if (! reg_part)
4755 return 0;
4756 reg = XEXP (reg_part, 0);
4757 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4759 part = XVECEXP (pattern, 0, i);
4760 if (part == reg_part || GET_CODE (part) == CLOBBER)
4761 continue;
4762 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4763 && REG_P (SET_DEST (part)))
4764 ? SET_SRC (part) : part)))
4765 return 0;
4767 return reg;
4770 /* See if the only way in which INSN uses REG is by calling it, or by
4771 setting it while calling it. Set *SET to a SET rtx if the register
4772 is set by INSN. */
4774 static int
4775 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4777 rtx pattern, reg2;
4779 *set = NULL_RTX;
4781 reg2 = sfunc_uses_reg (insn);
4782 if (reg2 && REGNO (reg2) == REGNO (reg))
4784 pattern = single_set (insn);
4785 if (pattern
4786 && REG_P (SET_DEST (pattern))
4787 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4788 *set = pattern;
4789 return 0;
4791 if (!CALL_P (insn))
4793 /* We don't use rtx_equal_p because we don't care if the mode is
4794 different. */
4795 pattern = single_set (insn);
4796 if (pattern
4797 && REG_P (SET_DEST (pattern))
4798 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4800 rtx par, part;
4801 int i;
4803 *set = pattern;
4804 par = PATTERN (insn);
4805 if (GET_CODE (par) == PARALLEL)
4806 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4808 part = XVECEXP (par, 0, i);
4809 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4810 return 1;
4812 return reg_mentioned_p (reg, SET_SRC (pattern));
4815 return 1;
4818 pattern = PATTERN (insn);
4820 if (GET_CODE (pattern) == PARALLEL)
4822 int i;
4824 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4825 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4826 return 1;
4827 pattern = XVECEXP (pattern, 0, 0);
4830 if (GET_CODE (pattern) == SET)
4832 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4834 /* We don't use rtx_equal_p, because we don't care if the
4835 mode is different. */
4836 if (!REG_P (SET_DEST (pattern))
4837 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4838 return 1;
4840 *set = pattern;
4843 pattern = SET_SRC (pattern);
4846 if (GET_CODE (pattern) != CALL
4847 || !MEM_P (XEXP (pattern, 0))
4848 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4849 return 1;
4851 return 0;
4854 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4855 general registers. Bits 0..15 mean that the respective registers
4856 are used as inputs in the instruction. Bits 16..31 mean that the
4857 registers 0..15, respectively, are used as outputs, or are clobbered.
4858 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4860 regs_used (rtx x, int is_dest)
4862 enum rtx_code code;
4863 const char *fmt;
4864 int i, used = 0;
4866 if (! x)
4867 return used;
4868 code = GET_CODE (x);
4869 switch (code)
4871 case REG:
4872 if (REGNO (x) < 16)
4873 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4874 << (REGNO (x) + is_dest));
4875 return 0;
4876 case SUBREG:
4878 rtx y = SUBREG_REG (x);
4880 if (!REG_P (y))
4881 break;
4882 if (REGNO (y) < 16)
4883 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4884 << (REGNO (y) +
4885 subreg_regno_offset (REGNO (y),
4886 GET_MODE (y),
4887 SUBREG_BYTE (x),
4888 GET_MODE (x)) + is_dest));
4889 return 0;
4891 case SET:
4892 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4893 case RETURN:
4894 /* If there was a return value, it must have been indicated with USE. */
4895 return 0x00ffff00;
4896 case CLOBBER:
4897 is_dest = 1;
4898 break;
4899 case MEM:
4900 is_dest = 0;
4901 break;
4902 case CALL:
4903 used |= 0x00ff00f0;
4904 break;
4905 default:
4906 break;
4909 fmt = GET_RTX_FORMAT (code);
4911 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4913 if (fmt[i] == 'E')
4915 register int j;
4916 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4917 used |= regs_used (XVECEXP (x, i, j), is_dest);
4919 else if (fmt[i] == 'e')
4920 used |= regs_used (XEXP (x, i), is_dest);
4922 return used;
4925 /* Create an instruction that prevents redirection of a conditional branch
4926 to the destination of the JUMP with address ADDR.
4927 If the branch needs to be implemented as an indirect jump, try to find
4928 a scratch register for it.
4929 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4930 If any preceding insn that doesn't fit into a delay slot is good enough,
4931 pass 1. Pass 2 if a definite blocking insn is needed.
4932 -1 is used internally to avoid deep recursion.
4933 If a blocking instruction is made or recognized, return it. */
4935 static rtx
4936 gen_block_redirect (rtx jump, int addr, int need_block)
4938 int dead = 0;
4939 rtx prev = prev_nonnote_insn (jump);
4940 rtx dest;
4942 /* First, check if we already have an instruction that satisfies our need. */
4943 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4945 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4946 return prev;
4947 if (GET_CODE (PATTERN (prev)) == USE
4948 || GET_CODE (PATTERN (prev)) == CLOBBER
4949 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4950 prev = jump;
4951 else if ((need_block &= ~1) < 0)
4952 return prev;
4953 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4954 need_block = 0;
4956 if (GET_CODE (PATTERN (jump)) == RETURN)
4958 if (! need_block)
4959 return prev;
4960 /* Reorg even does nasty things with return insns that cause branches
4961 to go out of range - see find_end_label and callers. */
4962 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4964 /* We can't use JUMP_LABEL here because it might be undefined
4965 when not optimizing. */
4966 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4967 /* If the branch is out of range, try to find a scratch register for it. */
4968 if (optimize
4969 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4970 > 4092 + 4098))
4972 rtx scan;
4973 /* Don't look for the stack pointer as a scratch register,
4974 it would cause trouble if an interrupt occurred. */
4975 unsigned attempt = 0x7fff, used;
4976 int jump_left = flag_expensive_optimizations + 1;
4978 /* It is likely that the most recent eligible instruction is wanted for
4979 the delay slot. Therefore, find out which registers it uses, and
4980 try to avoid using them. */
4982 for (scan = jump; (scan = PREV_INSN (scan)); )
4984 enum rtx_code code;
4986 if (INSN_DELETED_P (scan))
4987 continue;
4988 code = GET_CODE (scan);
4989 if (code == CODE_LABEL || code == JUMP_INSN)
4990 break;
4991 if (code == INSN
4992 && GET_CODE (PATTERN (scan)) != USE
4993 && GET_CODE (PATTERN (scan)) != CLOBBER
4994 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4996 attempt &= ~regs_used (PATTERN (scan), 0);
4997 break;
5000 for (used = dead = 0, scan = JUMP_LABEL (jump);
5001 (scan = NEXT_INSN (scan)); )
5003 enum rtx_code code;
5005 if (INSN_DELETED_P (scan))
5006 continue;
5007 code = GET_CODE (scan);
5008 if (INSN_P (scan))
5010 used |= regs_used (PATTERN (scan), 0);
5011 if (code == CALL_INSN)
5012 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5013 dead |= (used >> 16) & ~used;
5014 if (dead & attempt)
5016 dead &= attempt;
5017 break;
5019 if (code == JUMP_INSN)
5021 if (jump_left-- && simplejump_p (scan))
5022 scan = JUMP_LABEL (scan);
5023 else
5024 break;
5028 /* Mask out the stack pointer again, in case it was
5029 the only 'free' register we have found. */
5030 dead &= 0x7fff;
5032 /* If the immediate destination is still in range, check for possible
5033 threading with a jump beyond the delay slot insn.
5034 Don't check if we are called recursively; the jump has been or will be
5035 checked in a different invocation then. */
5037 else if (optimize && need_block >= 0)
5039 rtx next = next_active_insn (next_active_insn (dest));
5040 if (next && JUMP_P (next)
5041 && GET_CODE (PATTERN (next)) == SET
5042 && recog_memoized (next) == CODE_FOR_jump_compact)
5044 dest = JUMP_LABEL (next);
5045 if (dest
5046 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5047 > 4092 + 4098))
5048 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5052 if (dead)
5054 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5056 /* It would be nice if we could convert the jump into an indirect
5057 jump / far branch right now, and thus exposing all constituent
5058 instructions to further optimization. However, reorg uses
5059 simplejump_p to determine if there is an unconditional jump where
5060 it should try to schedule instructions from the target of the
5061 branch; simplejump_p fails for indirect jumps even if they have
5062 a JUMP_LABEL. */
5063 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5064 (reg, GEN_INT (unspec_bbr_uid++)),
5065 jump);
5066 /* ??? We would like this to have the scope of the jump, but that
5067 scope will change when a delay slot insn of an inner scope is added.
5068 Hence, after delay slot scheduling, we'll have to expect
5069 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5070 the jump. */
5072 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5073 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5074 return insn;
5076 else if (need_block)
5077 /* We can't use JUMP_LABEL here because it might be undefined
5078 when not optimizing. */
5079 return emit_insn_before (gen_block_branch_redirect
5080 (GEN_INT (unspec_bbr_uid++)),
5081 jump);
5082 return prev;
5085 #define CONDJUMP_MIN -252
5086 #define CONDJUMP_MAX 262
5087 struct far_branch
5089 /* A label (to be placed) in front of the jump
5090 that jumps to our ultimate destination. */
5091 rtx near_label;
5092 /* Where we are going to insert it if we cannot move the jump any farther,
5093 or the jump itself if we have picked up an existing jump. */
5094 rtx insert_place;
5095 /* The ultimate destination. */
5096 rtx far_label;
5097 struct far_branch *prev;
5098 /* If the branch has already been created, its address;
5099 else the address of its first prospective user. */
5100 int address;
5103 static void gen_far_branch (struct far_branch *);
5104 enum mdep_reorg_phase_e mdep_reorg_phase;
5105 static void
5106 gen_far_branch (struct far_branch *bp)
5108 rtx insn = bp->insert_place;
5109 rtx jump;
5110 rtx label = gen_label_rtx ();
5111 int ok;
5113 emit_label_after (label, insn);
5114 if (bp->far_label)
5116 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5117 LABEL_NUSES (bp->far_label)++;
5119 else
5120 jump = emit_jump_insn_after (gen_return (), insn);
5121 /* Emit a barrier so that reorg knows that any following instructions
5122 are not reachable via a fall-through path.
5123 But don't do this when not optimizing, since we wouldn't suppress the
5124 alignment for the barrier then, and could end up with out-of-range
5125 pc-relative loads. */
5126 if (optimize)
5127 emit_barrier_after (jump);
5128 emit_label_after (bp->near_label, insn);
5129 JUMP_LABEL (jump) = bp->far_label;
5130 ok = invert_jump (insn, label, 1);
5131 gcc_assert (ok);
5133 /* If we are branching around a jump (rather than a return), prevent
5134 reorg from using an insn from the jump target as the delay slot insn -
5135 when reorg did this, it pessimized code (we rather hide the delay slot)
5136 and it could cause branches to go out of range. */
5137 if (bp->far_label)
5138 (emit_insn_after
5139 (gen_stuff_delay_slot
5140 (GEN_INT (unspec_bbr_uid++),
5141 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5142 insn));
5143 /* Prevent reorg from undoing our splits. */
5144 gen_block_redirect (jump, bp->address += 2, 2);
5147 /* Fix up ADDR_DIFF_VECs. */
5148 void
5149 fixup_addr_diff_vecs (rtx first)
5151 rtx insn;
5153 for (insn = first; insn; insn = NEXT_INSN (insn))
5155 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5157 if (!JUMP_P (insn)
5158 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5159 continue;
5160 pat = PATTERN (insn);
5161 vec_lab = XEXP (XEXP (pat, 0), 0);
5163 /* Search the matching casesi_jump_2. */
5164 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5166 if (!JUMP_P (prev))
5167 continue;
5168 prevpat = PATTERN (prev);
5169 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5170 continue;
5171 x = XVECEXP (prevpat, 0, 1);
5172 if (GET_CODE (x) != USE)
5173 continue;
5174 x = XEXP (x, 0);
5175 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5176 break;
5178 /* FIXME: This is a bug in the optimizer, but it seems harmless
5179 to just avoid panicing. */
5180 if (!prev)
5181 continue;
5183 /* Emit the reference label of the braf where it belongs, right after
5184 the casesi_jump_2 (i.e. braf). */
5185 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5186 emit_label_after (braf_label, prev);
5188 /* Fix up the ADDR_DIF_VEC to be relative
5189 to the reference address of the braf. */
5190 XEXP (XEXP (pat, 0), 0) = braf_label;
5194 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5195 a barrier. Return the base 2 logarithm of the desired alignment. */
5197 barrier_align (rtx barrier_or_label)
5199 rtx next = next_real_insn (barrier_or_label), pat, prev;
5200 int slot, credit, jump_to_next = 0;
5202 if (! next)
5203 return 0;
5205 pat = PATTERN (next);
5207 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5208 return 2;
5210 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5211 /* This is a barrier in front of a constant table. */
5212 return 0;
5214 prev = prev_real_insn (barrier_or_label);
5215 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5217 pat = PATTERN (prev);
5218 /* If this is a very small table, we want to keep the alignment after
5219 the table to the minimum for proper code alignment. */
5220 return ((TARGET_SMALLCODE
5221 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5222 <= (unsigned) 1 << (CACHE_LOG - 2)))
5223 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5226 if (TARGET_SMALLCODE)
5227 return 0;
5229 if (! TARGET_SH2 || ! optimize)
5230 return align_jumps_log;
5232 /* When fixing up pcloads, a constant table might be inserted just before
5233 the basic block that ends with the barrier. Thus, we can't trust the
5234 instruction lengths before that. */
5235 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5237 /* Check if there is an immediately preceding branch to the insn beyond
5238 the barrier. We must weight the cost of discarding useful information
5239 from the current cache line when executing this branch and there is
5240 an alignment, against that of fetching unneeded insn in front of the
5241 branch target when there is no alignment. */
5243 /* There are two delay_slot cases to consider. One is the simple case
5244 where the preceding branch is to the insn beyond the barrier (simple
5245 delay slot filling), and the other is where the preceding branch has
5246 a delay slot that is a duplicate of the insn after the barrier
5247 (fill_eager_delay_slots) and the branch is to the insn after the insn
5248 after the barrier. */
5250 /* PREV is presumed to be the JUMP_INSN for the barrier under
5251 investigation. Skip to the insn before it. */
5252 prev = prev_real_insn (prev);
5254 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5255 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5256 prev = prev_real_insn (prev))
5258 jump_to_next = 0;
5259 if (GET_CODE (PATTERN (prev)) == USE
5260 || GET_CODE (PATTERN (prev)) == CLOBBER)
5261 continue;
5262 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5264 prev = XVECEXP (PATTERN (prev), 0, 1);
5265 if (INSN_UID (prev) == INSN_UID (next))
5267 /* Delay slot was filled with insn at jump target. */
5268 jump_to_next = 1;
5269 continue;
5273 if (slot &&
5274 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5275 slot = 0;
5276 credit -= get_attr_length (prev);
5278 if (prev
5279 && JUMP_P (prev)
5280 && JUMP_LABEL (prev))
5282 rtx x;
5283 if (jump_to_next
5284 || next_real_insn (JUMP_LABEL (prev)) == next
5285 /* If relax_delay_slots() decides NEXT was redundant
5286 with some previous instruction, it will have
5287 redirected PREV's jump to the following insn. */
5288 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5289 /* There is no upper bound on redundant instructions
5290 that might have been skipped, but we must not put an
5291 alignment where none had been before. */
5292 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5293 (INSN_P (x)
5294 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5295 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5296 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5298 rtx pat = PATTERN (prev);
5299 if (GET_CODE (pat) == PARALLEL)
5300 pat = XVECEXP (pat, 0, 0);
5301 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5302 return 0;
5307 return align_jumps_log;
5310 /* If we are inside a phony loop, almost any kind of label can turn up as the
5311 first one in the loop. Aligning a braf label causes incorrect switch
5312 destination addresses; we can detect braf labels because they are
5313 followed by a BARRIER.
5314 Applying loop alignment to small constant or switch tables is a waste
5315 of space, so we suppress this too. */
5317 sh_loop_align (rtx label)
5319 rtx next = label;
5322 next = next_nonnote_insn (next);
5323 while (next && LABEL_P (next));
5325 if (! next
5326 || ! INSN_P (next)
5327 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5328 || recog_memoized (next) == CODE_FOR_consttable_2)
5329 return 0;
5331 return align_loops_log;
5334 /* Do a final pass over the function, just before delayed branch
5335 scheduling. */
5337 static void
5338 sh_reorg (void)
5340 rtx first, insn, mova = NULL_RTX;
5341 int num_mova;
5342 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5343 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5345 first = get_insns ();
5346 max_labelno_before_reorg = max_label_num ();
5348 /* We must split call insns before introducing `mova's. If we're
5349 optimizing, they'll have already been split. Otherwise, make
5350 sure we don't split them too late. */
5351 if (! optimize)
5352 split_all_insns_noflow ();
5354 if (TARGET_SHMEDIA)
5355 return;
5357 /* If relaxing, generate pseudo-ops to associate function calls with
5358 the symbols they call. It does no harm to not generate these
5359 pseudo-ops. However, when we can generate them, it enables to
5360 linker to potentially relax the jsr to a bsr, and eliminate the
5361 register load and, possibly, the constant pool entry. */
5363 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5364 if (TARGET_RELAX)
5366 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5367 own purposes. This works because none of the remaining passes
5368 need to look at them.
5370 ??? But it may break in the future. We should use a machine
5371 dependent REG_NOTE, or some other approach entirely. */
5372 for (insn = first; insn; insn = NEXT_INSN (insn))
5374 if (INSN_P (insn))
5376 rtx note;
5378 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5379 NULL_RTX)) != 0)
5380 remove_note (insn, note);
5384 for (insn = first; insn; insn = NEXT_INSN (insn))
5386 rtx pattern, reg, link, set, scan, dies, label;
5387 int rescan = 0, foundinsn = 0;
5389 if (CALL_P (insn))
5391 pattern = PATTERN (insn);
5393 if (GET_CODE (pattern) == PARALLEL)
5394 pattern = XVECEXP (pattern, 0, 0);
5395 if (GET_CODE (pattern) == SET)
5396 pattern = SET_SRC (pattern);
5398 if (GET_CODE (pattern) != CALL
5399 || !MEM_P (XEXP (pattern, 0)))
5400 continue;
5402 reg = XEXP (XEXP (pattern, 0), 0);
5404 else
5406 reg = sfunc_uses_reg (insn);
5407 if (! reg)
5408 continue;
5411 if (!REG_P (reg))
5412 continue;
5414 /* Try scanning backward to find where the register is set. */
5415 link = NULL;
5416 for (scan = PREV_INSN (insn);
5417 scan && !LABEL_P (scan);
5418 scan = PREV_INSN (scan))
5420 if (! INSN_P (scan))
5421 continue;
5423 if (! reg_mentioned_p (reg, scan))
5424 continue;
5426 if (noncall_uses_reg (reg, scan, &set))
5427 break;
5429 if (set)
5431 link = scan;
5432 break;
5436 if (! link)
5437 continue;
5439 /* The register is set at LINK. */
5441 /* We can only optimize the function call if the register is
5442 being set to a symbol. In theory, we could sometimes
5443 optimize calls to a constant location, but the assembler
5444 and linker do not support that at present. */
5445 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5446 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5447 continue;
5449 /* Scan forward from LINK to the place where REG dies, and
5450 make sure that the only insns which use REG are
5451 themselves function calls. */
5453 /* ??? This doesn't work for call targets that were allocated
5454 by reload, since there may not be a REG_DEAD note for the
5455 register. */
5457 dies = NULL_RTX;
5458 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5460 rtx scanset;
5462 /* Don't try to trace forward past a CODE_LABEL if we haven't
5463 seen INSN yet. Ordinarily, we will only find the setting insn
5464 if it is in the same basic block. However,
5465 cross-jumping can insert code labels in between the load and
5466 the call, and can result in situations where a single call
5467 insn may have two targets depending on where we came from. */
5469 if (LABEL_P (scan) && ! foundinsn)
5470 break;
5472 if (! INSN_P (scan))
5473 continue;
5475 /* Don't try to trace forward past a JUMP. To optimize
5476 safely, we would have to check that all the
5477 instructions at the jump destination did not use REG. */
5479 if (JUMP_P (scan))
5480 break;
5482 if (! reg_mentioned_p (reg, scan))
5483 continue;
5485 if (noncall_uses_reg (reg, scan, &scanset))
5486 break;
5488 if (scan == insn)
5489 foundinsn = 1;
5491 if (scan != insn
5492 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5494 /* There is a function call to this register other
5495 than the one we are checking. If we optimize
5496 this call, we need to rescan again below. */
5497 rescan = 1;
5500 /* ??? We shouldn't have to worry about SCANSET here.
5501 We should just be able to check for a REG_DEAD note
5502 on a function call. However, the REG_DEAD notes are
5503 apparently not dependable around libcalls; c-torture
5504 execute/920501-2 is a test case. If SCANSET is set,
5505 then this insn sets the register, so it must have
5506 died earlier. Unfortunately, this will only handle
5507 the cases in which the register is, in fact, set in a
5508 later insn. */
5510 /* ??? We shouldn't have to use FOUNDINSN here.
5511 This dates back to when we used LOG_LINKS to find
5512 the most recent insn which sets the register. */
5514 if (foundinsn
5515 && (scanset
5516 || find_reg_note (scan, REG_DEAD, reg)))
5518 dies = scan;
5519 break;
5523 if (! dies)
5525 /* Either there was a branch, or some insn used REG
5526 other than as a function call address. */
5527 continue;
5530 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5531 on the insn which sets the register, and on each call insn
5532 which uses the register. In final_prescan_insn we look for
5533 the REG_LABEL_OPERAND notes, and output the appropriate label
5534 or pseudo-op. */
5536 label = gen_label_rtx ();
5537 add_reg_note (link, REG_LABEL_OPERAND, label);
5538 add_reg_note (insn, REG_LABEL_OPERAND, label);
5539 if (rescan)
5541 scan = link;
5544 rtx reg2;
5546 scan = NEXT_INSN (scan);
5547 if (scan != insn
5548 && ((CALL_P (scan)
5549 && reg_mentioned_p (reg, scan))
5550 || ((reg2 = sfunc_uses_reg (scan))
5551 && REGNO (reg2) == REGNO (reg))))
5552 add_reg_note (scan, REG_LABEL_OPERAND, label);
5554 while (scan != dies);
5559 if (TARGET_SH2)
5560 fixup_addr_diff_vecs (first);
5562 if (optimize)
5564 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5565 shorten_branches (first);
5568 /* Scan the function looking for move instructions which have to be
5569 changed to pc-relative loads and insert the literal tables. */
5570 label_ref_list_pool = create_alloc_pool ("label references list",
5571 sizeof (struct label_ref_list_d),
5572 30);
5573 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5574 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5576 if (mova_p (insn))
5578 /* ??? basic block reordering can move a switch table dispatch
5579 below the switch table. Check if that has happened.
5580 We only have the addresses available when optimizing; but then,
5581 this check shouldn't be needed when not optimizing. */
5582 if (!untangle_mova (&num_mova, &mova, insn))
5584 insn = mova;
5585 num_mova = 0;
5588 else if (JUMP_P (insn)
5589 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5590 && num_mova
5591 /* ??? loop invariant motion can also move a mova out of a
5592 loop. Since loop does this code motion anyway, maybe we
5593 should wrap UNSPEC_MOVA into a CONST, so that reload can
5594 move it back. */
5595 && ((num_mova > 1
5596 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5597 || (prev_nonnote_insn (insn)
5598 == XEXP (MOVA_LABELREF (mova), 0))))
5600 rtx scan;
5601 int total;
5603 num_mova--;
5605 /* Some code might have been inserted between the mova and
5606 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5607 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5608 total += get_attr_length (scan);
5610 /* range of mova is 1020, add 4 because pc counts from address of
5611 second instruction after this one, subtract 2 in case pc is 2
5612 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5613 cancels out with alignment effects of the mova itself. */
5614 if (total > 1022)
5616 /* Change the mova into a load, and restart scanning
5617 there. broken_move will then return true for mova. */
5618 fixup_mova (mova);
5619 insn = mova;
5622 if (broken_move (insn)
5623 || (NONJUMP_INSN_P (insn)
5624 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5626 rtx scan;
5627 /* Scan ahead looking for a barrier to stick the constant table
5628 behind. */
5629 rtx barrier = find_barrier (num_mova, mova, insn);
5630 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5631 int need_aligned_label = 0;
5633 if (num_mova && ! mova_p (mova))
5635 /* find_barrier had to change the first mova into a
5636 pcload; thus, we have to start with this new pcload. */
5637 insn = mova;
5638 num_mova = 0;
5640 /* Now find all the moves between the points and modify them. */
5641 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5643 if (LABEL_P (scan))
5644 last_float = 0;
5645 if (NONJUMP_INSN_P (scan)
5646 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5647 need_aligned_label = 1;
5648 if (broken_move (scan))
5650 rtx *patp = &PATTERN (scan), pat = *patp;
5651 rtx src, dst;
5652 rtx lab;
5653 rtx newsrc;
5654 enum machine_mode mode;
5656 if (GET_CODE (pat) == PARALLEL)
5657 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5658 src = SET_SRC (pat);
5659 dst = SET_DEST (pat);
5660 mode = GET_MODE (dst);
5662 if (mode == SImode && hi_const (src)
5663 && REGNO (dst) != FPUL_REG)
5665 int offset = 0;
5667 mode = HImode;
5668 while (GET_CODE (dst) == SUBREG)
5670 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5671 GET_MODE (SUBREG_REG (dst)),
5672 SUBREG_BYTE (dst),
5673 GET_MODE (dst));
5674 dst = SUBREG_REG (dst);
5676 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5678 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5680 /* This must be an insn that clobbers r0. */
5681 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5682 XVECLEN (PATTERN (scan), 0)
5683 - 1);
5684 rtx clobber = *clobberp;
5686 gcc_assert (GET_CODE (clobber) == CLOBBER
5687 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5689 if (last_float
5690 && reg_set_between_p (r0_rtx, last_float_move, scan))
5691 last_float = 0;
5692 if (last_float
5693 && TARGET_SHCOMPACT
5694 && GET_MODE_SIZE (mode) != 4
5695 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5696 last_float = 0;
5697 lab = add_constant (src, mode, last_float);
5698 if (lab)
5699 emit_insn_before (gen_mova (lab), scan);
5700 else
5702 /* There will be a REG_UNUSED note for r0 on
5703 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5704 lest reorg:mark_target_live_regs will not
5705 consider r0 to be used, and we end up with delay
5706 slot insn in front of SCAN that clobbers r0. */
5707 rtx note
5708 = find_regno_note (last_float_move, REG_UNUSED, 0);
5710 /* If we are not optimizing, then there may not be
5711 a note. */
5712 if (note)
5713 PUT_REG_NOTE_KIND (note, REG_INC);
5715 *last_float_addr = r0_inc_rtx;
5717 last_float_move = scan;
5718 last_float = src;
5719 newsrc = gen_const_mem (mode,
5720 (((TARGET_SH4 && ! TARGET_FMOVD)
5721 || REGNO (dst) == FPUL_REG)
5722 ? r0_inc_rtx
5723 : r0_rtx));
5724 last_float_addr = &XEXP (newsrc, 0);
5726 /* Remove the clobber of r0. */
5727 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5728 gen_rtx_SCRATCH (Pmode));
5730 /* This is a mova needing a label. Create it. */
5731 else if (GET_CODE (src) == UNSPEC
5732 && XINT (src, 1) == UNSPEC_MOVA
5733 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5735 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5736 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5737 newsrc = gen_rtx_UNSPEC (SImode,
5738 gen_rtvec (1, newsrc),
5739 UNSPEC_MOVA);
5741 else
5743 lab = add_constant (src, mode, 0);
5744 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5745 newsrc = gen_const_mem (mode, newsrc);
5747 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5748 INSN_CODE (scan) = -1;
5751 dump_table (need_aligned_label ? insn : 0, barrier);
5752 insn = barrier;
5755 free_alloc_pool (label_ref_list_pool);
5756 for (insn = first; insn; insn = NEXT_INSN (insn))
5757 PUT_MODE (insn, VOIDmode);
5759 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5760 INSN_ADDRESSES_FREE ();
5761 split_branches (first);
5763 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5764 also has an effect on the register that holds the address of the sfunc.
5765 Insert an extra dummy insn in front of each sfunc that pretends to
5766 use this register. */
5767 if (flag_delayed_branch)
5769 for (insn = first; insn; insn = NEXT_INSN (insn))
5771 rtx reg = sfunc_uses_reg (insn);
5773 if (! reg)
5774 continue;
5775 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5778 #if 0
5779 /* fpscr is not actually a user variable, but we pretend it is for the
5780 sake of the previous optimization passes, since we want it handled like
5781 one. However, we don't have any debugging information for it, so turn
5782 it into a non-user variable now. */
5783 if (TARGET_SH4)
5784 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5785 #endif
5786 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5790 get_dest_uid (rtx label, int max_uid)
5792 rtx dest = next_real_insn (label);
5793 int dest_uid;
5794 if (! dest)
5795 /* This can happen for an undefined label. */
5796 return 0;
5797 dest_uid = INSN_UID (dest);
5798 /* If this is a newly created branch redirection blocking instruction,
5799 we cannot index the branch_uid or insn_addresses arrays with its
5800 uid. But then, we won't need to, because the actual destination is
5801 the following branch. */
5802 while (dest_uid >= max_uid)
5804 dest = NEXT_INSN (dest);
5805 dest_uid = INSN_UID (dest);
5807 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5808 return 0;
5809 return dest_uid;
5812 /* Split condbranches that are out of range. Also add clobbers for
5813 scratch registers that are needed in far jumps.
5814 We do this before delay slot scheduling, so that it can take our
5815 newly created instructions into account. It also allows us to
5816 find branches with common targets more easily. */
5818 static void
5819 split_branches (rtx first)
5821 rtx insn;
5822 struct far_branch **uid_branch, *far_branch_list = 0;
5823 int max_uid = get_max_uid ();
5824 int ok;
5826 /* Find out which branches are out of range. */
5827 shorten_branches (first);
5829 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5830 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5832 for (insn = first; insn; insn = NEXT_INSN (insn))
5833 if (! INSN_P (insn))
5834 continue;
5835 else if (INSN_DELETED_P (insn))
5837 /* Shorten_branches would split this instruction again,
5838 so transform it into a note. */
5839 SET_INSN_DELETED (insn);
5841 else if (JUMP_P (insn)
5842 /* Don't mess with ADDR_DIFF_VEC */
5843 && (GET_CODE (PATTERN (insn)) == SET
5844 || GET_CODE (PATTERN (insn)) == RETURN))
5846 enum attr_type type = get_attr_type (insn);
5847 if (type == TYPE_CBRANCH)
5849 rtx next, beyond;
5851 if (get_attr_length (insn) > 4)
5853 rtx src = SET_SRC (PATTERN (insn));
5854 rtx olabel = XEXP (XEXP (src, 1), 0);
5855 int addr = INSN_ADDRESSES (INSN_UID (insn));
5856 rtx label = 0;
5857 int dest_uid = get_dest_uid (olabel, max_uid);
5858 struct far_branch *bp = uid_branch[dest_uid];
5860 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5861 the label if the LABEL_NUSES count drops to zero. There is
5862 always a jump_optimize pass that sets these values, but it
5863 proceeds to delete unreferenced code, and then if not
5864 optimizing, to un-delete the deleted instructions, thus
5865 leaving labels with too low uses counts. */
5866 if (! optimize)
5868 JUMP_LABEL (insn) = olabel;
5869 LABEL_NUSES (olabel)++;
5871 if (! bp)
5873 bp = (struct far_branch *) alloca (sizeof *bp);
5874 uid_branch[dest_uid] = bp;
5875 bp->prev = far_branch_list;
5876 far_branch_list = bp;
5877 bp->far_label
5878 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5879 LABEL_NUSES (bp->far_label)++;
5881 else
5883 label = bp->near_label;
5884 if (! label && bp->address - addr >= CONDJUMP_MIN)
5886 rtx block = bp->insert_place;
5888 if (GET_CODE (PATTERN (block)) == RETURN)
5889 block = PREV_INSN (block);
5890 else
5891 block = gen_block_redirect (block,
5892 bp->address, 2);
5893 label = emit_label_after (gen_label_rtx (),
5894 PREV_INSN (block));
5895 bp->near_label = label;
5897 else if (label && ! NEXT_INSN (label))
5899 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5900 bp->insert_place = insn;
5901 else
5902 gen_far_branch (bp);
5905 if (! label
5906 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5908 bp->near_label = label = gen_label_rtx ();
5909 bp->insert_place = insn;
5910 bp->address = addr;
5912 ok = redirect_jump (insn, label, 0);
5913 gcc_assert (ok);
5915 else
5917 /* get_attr_length (insn) == 2 */
5918 /* Check if we have a pattern where reorg wants to redirect
5919 the branch to a label from an unconditional branch that
5920 is too far away. */
5921 /* We can't use JUMP_LABEL here because it might be undefined
5922 when not optimizing. */
5923 /* A syntax error might cause beyond to be NULL_RTX. */
5924 beyond
5925 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5926 0));
5928 if (beyond
5929 && (JUMP_P (beyond)
5930 || ((beyond = next_active_insn (beyond))
5931 && JUMP_P (beyond)))
5932 && GET_CODE (PATTERN (beyond)) == SET
5933 && recog_memoized (beyond) == CODE_FOR_jump_compact
5934 && ((INSN_ADDRESSES
5935 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5936 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5937 > 252 + 258 + 2))
5938 gen_block_redirect (beyond,
5939 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5942 next = next_active_insn (insn);
5944 if (next
5945 && (JUMP_P (next)
5946 || ((next = next_active_insn (next))
5947 && JUMP_P (next)))
5948 && GET_CODE (PATTERN (next)) == SET
5949 && recog_memoized (next) == CODE_FOR_jump_compact
5950 && ((INSN_ADDRESSES
5951 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5952 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5953 > 252 + 258 + 2))
5954 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5956 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5958 int addr = INSN_ADDRESSES (INSN_UID (insn));
5959 rtx far_label = 0;
5960 int dest_uid = 0;
5961 struct far_branch *bp;
5963 if (type == TYPE_JUMP)
5965 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5966 dest_uid = get_dest_uid (far_label, max_uid);
5967 if (! dest_uid)
5969 /* Parse errors can lead to labels outside
5970 the insn stream. */
5971 if (! NEXT_INSN (far_label))
5972 continue;
5974 if (! optimize)
5976 JUMP_LABEL (insn) = far_label;
5977 LABEL_NUSES (far_label)++;
5979 redirect_jump (insn, NULL_RTX, 1);
5980 far_label = 0;
5983 bp = uid_branch[dest_uid];
5984 if (! bp)
5986 bp = (struct far_branch *) alloca (sizeof *bp);
5987 uid_branch[dest_uid] = bp;
5988 bp->prev = far_branch_list;
5989 far_branch_list = bp;
5990 bp->near_label = 0;
5991 bp->far_label = far_label;
5992 if (far_label)
5993 LABEL_NUSES (far_label)++;
5995 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5996 if (addr - bp->address <= CONDJUMP_MAX)
5997 emit_label_after (bp->near_label, PREV_INSN (insn));
5998 else
6000 gen_far_branch (bp);
6001 bp->near_label = 0;
6003 else
6004 bp->near_label = 0;
6005 bp->address = addr;
6006 bp->insert_place = insn;
6007 if (! far_label)
6008 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6009 else
6010 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6013 /* Generate all pending far branches,
6014 and free our references to the far labels. */
6015 while (far_branch_list)
6017 if (far_branch_list->near_label
6018 && ! NEXT_INSN (far_branch_list->near_label))
6019 gen_far_branch (far_branch_list);
6020 if (optimize
6021 && far_branch_list->far_label
6022 && ! --LABEL_NUSES (far_branch_list->far_label))
6023 delete_insn (far_branch_list->far_label);
6024 far_branch_list = far_branch_list->prev;
6027 /* Instruction length information is no longer valid due to the new
6028 instructions that have been generated. */
6029 init_insn_lengths ();
6032 /* Dump out instruction addresses, which is useful for debugging the
6033 constant pool table stuff.
6035 If relaxing, output the label and pseudo-ops used to link together
6036 calls and the instruction which set the registers. */
6038 /* ??? The addresses printed by this routine for insns are nonsense for
6039 insns which are inside of a sequence where none of the inner insns have
6040 variable length. This is because the second pass of shorten_branches
6041 does not bother to update them. */
6043 void
6044 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6045 int noperands ATTRIBUTE_UNUSED)
6047 if (TARGET_DUMPISIZE)
6048 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6050 if (TARGET_RELAX)
6052 rtx note;
6054 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6055 if (note)
6057 rtx pattern;
6059 pattern = PATTERN (insn);
6060 if (GET_CODE (pattern) == PARALLEL)
6061 pattern = XVECEXP (pattern, 0, 0);
6062 switch (GET_CODE (pattern))
6064 case SET:
6065 if (GET_CODE (SET_SRC (pattern)) != CALL
6066 && get_attr_type (insn) != TYPE_SFUNC)
6068 targetm.asm_out.internal_label
6069 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6070 break;
6072 /* else FALLTHROUGH */
6073 case CALL:
6074 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6075 CODE_LABEL_NUMBER (XEXP (note, 0)));
6076 break;
6078 default:
6079 gcc_unreachable ();
6085 /* Dump out any constants accumulated in the final pass. These will
6086 only be labels. */
6088 const char *
6089 output_jump_label_table (void)
6091 int i;
6093 if (pool_size)
6095 fprintf (asm_out_file, "\t.align 2\n");
6096 for (i = 0; i < pool_size; i++)
6098 pool_node *p = &pool_vector[i];
6100 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6101 CODE_LABEL_NUMBER (p->label));
6102 output_asm_insn (".long %O0", &p->value);
6104 pool_size = 0;
6107 return "";
6110 /* A full frame looks like:
6112 arg-5
6113 arg-4
6114 [ if current_function_anonymous_args
6115 arg-3
6116 arg-2
6117 arg-1
6118 arg-0 ]
6119 saved-fp
6120 saved-r10
6121 saved-r11
6122 saved-r12
6123 saved-pr
6124 local-n
6126 local-1
6127 local-0 <- fp points here. */
6129 /* Number of bytes pushed for anonymous args, used to pass information
6130 between expand_prologue and expand_epilogue. */
6132 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6133 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6134 for an epilogue and a negative value means that it's for a sibcall
6135 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6136 all the registers that are about to be restored, and hence dead. */
6138 static void
6139 output_stack_adjust (int size, rtx reg, int epilogue_p,
6140 HARD_REG_SET *live_regs_mask, bool frame_p)
6142 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6143 if (size)
6145 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6147 /* This test is bogus, as output_stack_adjust is used to re-align the
6148 stack. */
6149 #if 0
6150 gcc_assert (!(size % align));
6151 #endif
6153 if (CONST_OK_FOR_ADD (size))
6154 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6155 /* Try to do it with two partial adjustments; however, we must make
6156 sure that the stack is properly aligned at all times, in case
6157 an interrupt occurs between the two partial adjustments. */
6158 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6159 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6161 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6162 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6164 else
6166 rtx const_reg;
6167 rtx insn;
6168 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6169 int i;
6171 /* If TEMP is invalid, we could temporarily save a general
6172 register to MACL. However, there is currently no need
6173 to handle this case, so just die when we see it. */
6174 if (epilogue_p < 0
6175 || current_function_interrupt
6176 || ! call_really_used_regs[temp] || fixed_regs[temp])
6177 temp = -1;
6178 if (temp < 0 && ! current_function_interrupt
6179 && (TARGET_SHMEDIA || epilogue_p >= 0))
6181 HARD_REG_SET temps;
6182 COPY_HARD_REG_SET (temps, call_used_reg_set);
6183 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6184 if (epilogue_p > 0)
6186 int nreg = 0;
6187 if (crtl->return_rtx)
6189 enum machine_mode mode;
6190 mode = GET_MODE (crtl->return_rtx);
6191 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6192 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6194 for (i = 0; i < nreg; i++)
6195 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6196 if (crtl->calls_eh_return)
6198 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6199 for (i = 0; i <= 3; i++)
6200 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6203 if (TARGET_SHMEDIA && epilogue_p < 0)
6204 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6205 CLEAR_HARD_REG_BIT (temps, i);
6206 if (epilogue_p <= 0)
6208 for (i = FIRST_PARM_REG;
6209 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6210 CLEAR_HARD_REG_BIT (temps, i);
6211 if (cfun->static_chain_decl != NULL)
6212 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6214 temp = scavenge_reg (&temps);
6216 if (temp < 0 && live_regs_mask)
6218 HARD_REG_SET temps;
6220 COPY_HARD_REG_SET (temps, *live_regs_mask);
6221 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6222 temp = scavenge_reg (&temps);
6224 if (temp < 0)
6226 rtx adj_reg, tmp_reg, mem;
6228 /* If we reached here, the most likely case is the (sibcall)
6229 epilogue for non SHmedia. Put a special push/pop sequence
6230 for such case as the last resort. This looks lengthy but
6231 would not be problem because it seems to be very
6232 rare. */
6234 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6237 /* ??? There is still the slight possibility that r4 or
6238 r5 have been reserved as fixed registers or assigned
6239 as global registers, and they change during an
6240 interrupt. There are possible ways to handle this:
6242 - If we are adjusting the frame pointer (r14), we can do
6243 with a single temp register and an ordinary push / pop
6244 on the stack.
6245 - Grab any call-used or call-saved registers (i.e. not
6246 fixed or globals) for the temps we need. We might
6247 also grab r14 if we are adjusting the stack pointer.
6248 If we can't find enough available registers, issue
6249 a diagnostic and die - the user must have reserved
6250 way too many registers.
6251 But since all this is rather unlikely to happen and
6252 would require extra testing, we just die if r4 / r5
6253 are not available. */
6254 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6255 && !global_regs[4] && !global_regs[5]);
6257 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6258 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6259 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6260 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6261 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6262 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6263 emit_move_insn (mem, tmp_reg);
6264 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6265 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6266 emit_move_insn (mem, tmp_reg);
6267 emit_move_insn (reg, adj_reg);
6268 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6269 emit_move_insn (adj_reg, mem);
6270 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6271 emit_move_insn (tmp_reg, mem);
6272 /* Tell flow the insns that pop r4/r5 aren't dead. */
6273 emit_use (tmp_reg);
6274 emit_use (adj_reg);
6275 return;
6277 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6279 /* If SIZE is negative, subtract the positive value.
6280 This sometimes allows a constant pool entry to be shared
6281 between prologue and epilogue code. */
6282 if (size < 0)
6284 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6285 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6287 else
6289 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6290 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6292 if (! epilogue_p)
6293 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6294 gen_rtx_SET (VOIDmode, reg,
6295 gen_rtx_PLUS (SImode, reg,
6296 GEN_INT (size))));
6301 static rtx
6302 frame_insn (rtx x)
6304 x = emit_insn (x);
6305 RTX_FRAME_RELATED_P (x) = 1;
6306 return x;
6309 /* Output RTL to push register RN onto the stack. */
6311 static rtx
6312 push (int rn)
6314 rtx x;
6315 if (rn == FPUL_REG)
6316 x = gen_push_fpul ();
6317 else if (rn == FPSCR_REG)
6318 x = gen_push_fpscr ();
6319 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6320 && FP_OR_XD_REGISTER_P (rn))
6322 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6323 return NULL_RTX;
6324 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6326 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6327 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6328 else
6329 x = gen_push (gen_rtx_REG (SImode, rn));
6331 x = frame_insn (x);
6332 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6333 return x;
6336 /* Output RTL to pop register RN from the stack. */
6338 static void
6339 pop (int rn)
6341 rtx x;
6342 if (rn == FPUL_REG)
6343 x = gen_pop_fpul ();
6344 else if (rn == FPSCR_REG)
6345 x = gen_pop_fpscr ();
6346 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6347 && FP_OR_XD_REGISTER_P (rn))
6349 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6350 return;
6351 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6353 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6354 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6355 else
6356 x = gen_pop (gen_rtx_REG (SImode, rn));
6358 x = emit_insn (x);
6359 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6362 /* Generate code to push the regs specified in the mask. */
6364 static void
6365 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6367 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6368 int skip_fpscr = 0;
6370 /* Push PR last; this gives better latencies after the prologue, and
6371 candidates for the return delay slot when there are no general
6372 registers pushed. */
6373 for (; i < FIRST_PSEUDO_REGISTER; i++)
6375 /* If this is an interrupt handler, and the SZ bit varies,
6376 and we have to push any floating point register, we need
6377 to switch to the correct precision first. */
6378 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6379 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6381 HARD_REG_SET unsaved;
6383 push (FPSCR_REG);
6384 COMPL_HARD_REG_SET (unsaved, *mask);
6385 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6386 skip_fpscr = 1;
6388 if (i != PR_REG
6389 && (i != FPSCR_REG || ! skip_fpscr)
6390 && TEST_HARD_REG_BIT (*mask, i))
6392 /* If the ISR has RESBANK attribute assigned, don't push any of
6393 the following registers - R0-R14, MACH, MACL and GBR. */
6394 if (! (sh_cfun_resbank_handler_p ()
6395 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6396 || i == MACH_REG
6397 || i == MACL_REG
6398 || i == GBR_REG)))
6399 push (i);
6403 /* Push banked registers last to improve delay slot opportunities. */
6404 if (interrupt_handler)
6405 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6406 if (TEST_HARD_REG_BIT (*mask, i))
6407 push (i);
6409 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6410 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6411 push (PR_REG);
6414 /* Calculate how much extra space is needed to save all callee-saved
6415 target registers.
6416 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6418 static int
6419 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6421 int reg;
6422 int stack_space = 0;
6423 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6425 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6426 if ((! call_really_used_regs[reg] || interrupt_handler)
6427 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6428 /* Leave space to save this target register on the stack,
6429 in case target register allocation wants to use it. */
6430 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6431 return stack_space;
6434 /* Decide whether we should reserve space for callee-save target registers,
6435 in case target register allocation wants to use them. REGS_SAVED is
6436 the space, in bytes, that is already required for register saves.
6437 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6439 static int
6440 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6441 HARD_REG_SET *live_regs_mask)
6443 if (optimize_size)
6444 return 0;
6445 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6448 /* Decide how much space to reserve for callee-save target registers
6449 in case target register allocation wants to use them.
6450 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6452 static int
6453 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6455 if (shmedia_space_reserved_for_target_registers)
6456 return shmedia_target_regs_stack_space (live_regs_mask);
6457 else
6458 return 0;
6461 /* Work out the registers which need to be saved, both as a mask and a
6462 count of saved words. Return the count.
6464 If doing a pragma interrupt function, then push all regs used by the
6465 function, and if we call another function (we can tell by looking at PR),
6466 make sure that all the regs it clobbers are safe too. */
6468 static int
6469 calc_live_regs (HARD_REG_SET *live_regs_mask)
6471 unsigned int reg;
6472 int count;
6473 tree attrs;
6474 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6475 bool nosave_low_regs;
6476 int pr_live, has_call;
6478 attrs = DECL_ATTRIBUTES (current_function_decl);
6479 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6480 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6481 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6482 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6484 CLEAR_HARD_REG_SET (*live_regs_mask);
6485 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6486 && df_regs_ever_live_p (FPSCR_REG))
6487 target_flags &= ~MASK_FPU_SINGLE;
6488 /* If we can save a lot of saves by switching to double mode, do that. */
6489 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6490 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6491 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6492 && (! call_really_used_regs[reg]
6493 || interrupt_handler)
6494 && ++count > 2)
6496 target_flags &= ~MASK_FPU_SINGLE;
6497 break;
6499 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6500 knows how to use it. That means the pseudo originally allocated for
6501 the initial value can become the PR_MEDIA_REG hard register, as seen for
6502 execute/20010122-1.c:test9. */
6503 if (TARGET_SHMEDIA)
6504 /* ??? this function is called from initial_elimination_offset, hence we
6505 can't use the result of sh_media_register_for_return here. */
6506 pr_live = sh_pr_n_sets ();
6507 else
6509 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6510 pr_live = (pr_initial
6511 ? (!REG_P (pr_initial)
6512 || REGNO (pr_initial) != (PR_REG))
6513 : df_regs_ever_live_p (PR_REG));
6514 /* For Shcompact, if not optimizing, we end up with a memory reference
6515 using the return address pointer for __builtin_return_address even
6516 though there is no actual need to put the PR register on the stack. */
6517 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6519 /* Force PR to be live if the prologue has to call the SHmedia
6520 argument decoder or register saver. */
6521 if (TARGET_SHCOMPACT
6522 && ((crtl->args.info.call_cookie
6523 & ~ CALL_COOKIE_RET_TRAMP (1))
6524 || crtl->saves_all_registers))
6525 pr_live = 1;
6526 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6527 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6529 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6530 ? pr_live
6531 : interrupt_handler
6532 ? (/* Need to save all the regs ever live. */
6533 (df_regs_ever_live_p (reg)
6534 || (call_really_used_regs[reg]
6535 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6536 || reg == PIC_OFFSET_TABLE_REGNUM)
6537 && has_call)
6538 || (TARGET_SHMEDIA && has_call
6539 && REGISTER_NATURAL_MODE (reg) == SImode
6540 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6541 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6542 && reg != RETURN_ADDRESS_POINTER_REGNUM
6543 && reg != T_REG && reg != GBR_REG
6544 /* Push fpscr only on targets which have FPU */
6545 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6546 : (/* Only push those regs which are used and need to be saved. */
6547 (TARGET_SHCOMPACT
6548 && flag_pic
6549 && crtl->args.info.call_cookie
6550 && reg == PIC_OFFSET_TABLE_REGNUM)
6551 || (df_regs_ever_live_p (reg)
6552 && ((!call_really_used_regs[reg]
6553 && !(reg != PIC_OFFSET_TABLE_REGNUM
6554 && fixed_regs[reg] && call_used_regs[reg]))
6555 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6556 || (crtl->calls_eh_return
6557 && (reg == EH_RETURN_DATA_REGNO (0)
6558 || reg == EH_RETURN_DATA_REGNO (1)
6559 || reg == EH_RETURN_DATA_REGNO (2)
6560 || reg == EH_RETURN_DATA_REGNO (3)))
6561 || ((reg == MACL_REG || reg == MACH_REG)
6562 && df_regs_ever_live_p (reg)
6563 && sh_cfun_attr_renesas_p ())
6566 SET_HARD_REG_BIT (*live_regs_mask, reg);
6567 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6569 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6570 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6572 if (FP_REGISTER_P (reg))
6574 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6576 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6577 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6580 else if (XD_REGISTER_P (reg))
6582 /* Must switch to double mode to access these registers. */
6583 target_flags &= ~MASK_FPU_SINGLE;
6587 if (nosave_low_regs && reg == R8_REG)
6588 break;
6590 /* If we have a target register optimization pass after prologue / epilogue
6591 threading, we need to assume all target registers will be live even if
6592 they aren't now. */
6593 if (flag_branch_target_load_optimize2
6594 && TARGET_SAVE_ALL_TARGET_REGS
6595 && shmedia_space_reserved_for_target_registers)
6596 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6597 if ((! call_really_used_regs[reg] || interrupt_handler)
6598 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6600 SET_HARD_REG_BIT (*live_regs_mask, reg);
6601 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6603 /* If this is an interrupt handler, we don't have any call-clobbered
6604 registers we can conveniently use for target register save/restore.
6605 Make sure we save at least one general purpose register when we need
6606 to save target registers. */
6607 if (interrupt_handler
6608 && hard_reg_set_intersect_p (*live_regs_mask,
6609 reg_class_contents[TARGET_REGS])
6610 && ! hard_reg_set_intersect_p (*live_regs_mask,
6611 reg_class_contents[GENERAL_REGS]))
6613 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6614 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6617 return count;
6620 /* Code to generate prologue and epilogue sequences */
6622 /* PUSHED is the number of bytes that are being pushed on the
6623 stack for register saves. Return the frame size, padded
6624 appropriately so that the stack stays properly aligned. */
6625 static HOST_WIDE_INT
6626 rounded_frame_size (int pushed)
6628 HOST_WIDE_INT size = get_frame_size ();
6629 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6631 if (ACCUMULATE_OUTGOING_ARGS)
6632 size += crtl->outgoing_args_size;
6634 return ((size + pushed + align - 1) & -align) - pushed;
6637 /* Choose a call-clobbered target-branch register that remains
6638 unchanged along the whole function. We set it up as the return
6639 value in the prologue. */
6641 sh_media_register_for_return (void)
6643 int regno;
6644 int tr0_used;
6646 if (! current_function_is_leaf)
6647 return -1;
6648 if (lookup_attribute ("interrupt_handler",
6649 DECL_ATTRIBUTES (current_function_decl)))
6650 return -1;
6651 if (sh_cfun_interrupt_handler_p ())
6652 return -1;
6654 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6656 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6657 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6658 return regno;
6660 return -1;
6663 /* The maximum registers we need to save are:
6664 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6665 - 32 floating point registers (for each pair, we save none,
6666 one single precision value, or a double precision value).
6667 - 8 target registers
6668 - add 1 entry for a delimiter. */
6669 #define MAX_SAVED_REGS (62+32+8)
6671 typedef struct save_entry_s
6673 unsigned char reg;
6674 unsigned char mode;
6675 short offset;
6676 } save_entry;
6678 #define MAX_TEMPS 4
6680 /* There will be a delimiter entry with VOIDmode both at the start and the
6681 end of a filled in schedule. The end delimiter has the offset of the
6682 save with the smallest (i.e. most negative) offset. */
6683 typedef struct save_schedule_s
6685 save_entry entries[MAX_SAVED_REGS + 2];
6686 int temps[MAX_TEMPS+1];
6687 } save_schedule;
6689 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6690 use reverse order. Returns the last entry written to (not counting
6691 the delimiter). OFFSET_BASE is a number to be added to all offset
6692 entries. */
6694 static save_entry *
6695 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6696 int offset_base)
6698 int align, i;
6699 save_entry *entry = schedule->entries;
6700 int tmpx = 0;
6701 int offset;
6703 if (! current_function_interrupt)
6704 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6705 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6706 && ! FUNCTION_ARG_REGNO_P (i)
6707 && i != FIRST_RET_REG
6708 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6709 && ! (crtl->calls_eh_return
6710 && (i == EH_RETURN_STACKADJ_REGNO
6711 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6712 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6713 schedule->temps[tmpx++] = i;
6714 entry->reg = -1;
6715 entry->mode = VOIDmode;
6716 entry->offset = offset_base;
6717 entry++;
6718 /* We loop twice: first, we save 8-byte aligned registers in the
6719 higher addresses, that are known to be aligned. Then, we
6720 proceed to saving 32-bit registers that don't need 8-byte
6721 alignment.
6722 If this is an interrupt function, all registers that need saving
6723 need to be saved in full. moreover, we need to postpone saving
6724 target registers till we have saved some general purpose registers
6725 we can then use as scratch registers. */
6726 offset = offset_base;
6727 for (align = 1; align >= 0; align--)
6729 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6730 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6732 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6733 int reg = i;
6735 if (current_function_interrupt)
6737 if (TARGET_REGISTER_P (i))
6738 continue;
6739 if (GENERAL_REGISTER_P (i))
6740 mode = DImode;
6742 if (mode == SFmode && (i % 2) == 1
6743 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6744 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6746 mode = DFmode;
6747 i--;
6748 reg--;
6751 /* If we're doing the aligned pass and this is not aligned,
6752 or we're doing the unaligned pass and this is aligned,
6753 skip it. */
6754 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6755 != align)
6756 continue;
6758 if (current_function_interrupt
6759 && GENERAL_REGISTER_P (i)
6760 && tmpx < MAX_TEMPS)
6761 schedule->temps[tmpx++] = i;
6763 offset -= GET_MODE_SIZE (mode);
6764 entry->reg = i;
6765 entry->mode = mode;
6766 entry->offset = offset;
6767 entry++;
6769 if (align && current_function_interrupt)
6770 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6771 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6773 offset -= GET_MODE_SIZE (DImode);
6774 entry->reg = i;
6775 entry->mode = DImode;
6776 entry->offset = offset;
6777 entry++;
6780 entry->reg = -1;
6781 entry->mode = VOIDmode;
6782 entry->offset = offset;
6783 schedule->temps[tmpx] = -1;
6784 return entry - 1;
6787 void
6788 sh_expand_prologue (void)
6790 HARD_REG_SET live_regs_mask;
6791 int d, i;
6792 int d_rounding = 0;
6793 int save_flags = target_flags;
6794 int pretend_args;
6795 tree sp_switch_attr
6796 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6798 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6800 /* We have pretend args if we had an object sent partially in registers
6801 and partially on the stack, e.g. a large structure. */
6802 pretend_args = crtl->args.pretend_args_size;
6803 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6804 && (NPARM_REGS(SImode)
6805 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6806 pretend_args = 0;
6807 /* Dwarf2 module doesn't expect frame related insns here. */
6808 output_stack_adjust (-pretend_args
6809 - crtl->args.info.stack_regs * 8,
6810 stack_pointer_rtx, 0, NULL, false);
6812 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6813 /* We're going to use the PIC register to load the address of the
6814 incoming-argument decoder and/or of the return trampoline from
6815 the GOT, so make sure the PIC register is preserved and
6816 initialized. */
6817 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6819 if (TARGET_SHCOMPACT
6820 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6822 int reg;
6824 /* First, make all registers with incoming arguments that will
6825 be pushed onto the stack live, so that register renaming
6826 doesn't overwrite them. */
6827 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6828 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6829 >= NPARM_REGS (SImode) - reg)
6830 for (; reg < NPARM_REGS (SImode); reg++)
6831 emit_insn (gen_shcompact_preserve_incoming_args
6832 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6833 else if (CALL_COOKIE_INT_REG_GET
6834 (crtl->args.info.call_cookie, reg) == 1)
6835 emit_insn (gen_shcompact_preserve_incoming_args
6836 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6838 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6839 stack_pointer_rtx);
6840 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6841 GEN_INT (crtl->args.info.call_cookie));
6842 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6843 gen_rtx_REG (SImode, R0_REG));
6845 else if (TARGET_SHMEDIA)
6847 int tr = sh_media_register_for_return ();
6849 if (tr >= 0)
6850 emit_move_insn (gen_rtx_REG (DImode, tr),
6851 gen_rtx_REG (DImode, PR_MEDIA_REG));
6854 /* Emit the code for SETUP_VARARGS. */
6855 if (cfun->stdarg)
6857 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6859 /* Push arg regs as if they'd been provided by caller in stack. */
6860 for (i = 0; i < NPARM_REGS(SImode); i++)
6862 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6863 rtx insn;
6865 if (i >= (NPARM_REGS(SImode)
6866 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6868 break;
6869 insn = push (rn);
6874 /* If we're supposed to switch stacks at function entry, do so now. */
6875 if (sp_switch_attr)
6877 rtx lab, newsrc;
6878 /* The argument specifies a variable holding the address of the
6879 stack the interrupt function should switch to/from at entry/exit. */
6880 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6881 const char *s
6882 = ggc_strdup (TREE_STRING_POINTER (arg));
6883 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6885 lab = add_constant (sp_switch, SImode, 0);
6886 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6887 newsrc = gen_const_mem (SImode, newsrc);
6889 emit_insn (gen_sp_switch_1 (newsrc));
6892 d = calc_live_regs (&live_regs_mask);
6893 /* ??? Maybe we could save some switching if we can move a mode switch
6894 that already happens to be at the function start into the prologue. */
6895 if (target_flags != save_flags && ! current_function_interrupt)
6896 emit_insn (gen_toggle_sz ());
6898 if (TARGET_SH5)
6900 int offset_base, offset;
6901 rtx r0 = NULL_RTX;
6902 int offset_in_r0 = -1;
6903 int sp_in_r0 = 0;
6904 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6905 int total_size, save_size;
6906 save_schedule schedule;
6907 save_entry *entry;
6908 int *tmp_pnt;
6910 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6911 && ! current_function_interrupt)
6912 r0 = gen_rtx_REG (Pmode, R0_REG);
6914 /* D is the actual number of bytes that we need for saving registers,
6915 however, in initial_elimination_offset we have committed to using
6916 an additional TREGS_SPACE amount of bytes - in order to keep both
6917 addresses to arguments supplied by the caller and local variables
6918 valid, we must keep this gap. Place it between the incoming
6919 arguments and the actually saved registers in a bid to optimize
6920 locality of reference. */
6921 total_size = d + tregs_space;
6922 total_size += rounded_frame_size (total_size);
6923 save_size = total_size - rounded_frame_size (d);
6924 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6925 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6926 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6928 /* If adjusting the stack in a single step costs nothing extra, do so.
6929 I.e. either if a single addi is enough, or we need a movi anyway,
6930 and we don't exceed the maximum offset range (the test for the
6931 latter is conservative for simplicity). */
6932 if (TARGET_SHMEDIA
6933 && (CONST_OK_FOR_I10 (-total_size)
6934 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6935 && total_size <= 2044)))
6936 d_rounding = total_size - save_size;
6938 offset_base = d + d_rounding;
6940 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6941 0, NULL, true);
6943 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6944 tmp_pnt = schedule.temps;
6945 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6947 enum machine_mode mode = (enum machine_mode) entry->mode;
6948 unsigned int reg = entry->reg;
6949 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6950 rtx orig_reg_rtx;
6952 offset = entry->offset;
6954 reg_rtx = gen_rtx_REG (mode, reg);
6956 mem_rtx = gen_frame_mem (mode,
6957 gen_rtx_PLUS (Pmode,
6958 stack_pointer_rtx,
6959 GEN_INT (offset)));
6961 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6963 gcc_assert (r0);
6964 mem_rtx = NULL_RTX;
6967 if (HAVE_PRE_DECREMENT
6968 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6969 || mem_rtx == NULL_RTX
6970 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6972 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6974 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6975 pre_dec = NULL_RTX;
6976 else
6978 mem_rtx = NULL_RTX;
6979 offset += GET_MODE_SIZE (mode);
6983 if (mem_rtx != NULL_RTX)
6984 goto addr_ok;
6986 if (offset_in_r0 == -1)
6988 emit_move_insn (r0, GEN_INT (offset));
6989 offset_in_r0 = offset;
6991 else if (offset != offset_in_r0)
6993 emit_move_insn (r0,
6994 gen_rtx_PLUS
6995 (Pmode, r0,
6996 GEN_INT (offset - offset_in_r0)));
6997 offset_in_r0 += offset - offset_in_r0;
7000 if (pre_dec != NULL_RTX)
7002 if (! sp_in_r0)
7004 emit_move_insn (r0,
7005 gen_rtx_PLUS
7006 (Pmode, r0, stack_pointer_rtx));
7007 sp_in_r0 = 1;
7010 offset -= GET_MODE_SIZE (mode);
7011 offset_in_r0 -= GET_MODE_SIZE (mode);
7013 mem_rtx = pre_dec;
7015 else if (sp_in_r0)
7016 mem_rtx = gen_frame_mem (mode, r0);
7017 else
7018 mem_rtx = gen_frame_mem (mode,
7019 gen_rtx_PLUS (Pmode,
7020 stack_pointer_rtx,
7021 r0));
7023 /* We must not use an r0-based address for target-branch
7024 registers or for special registers without pre-dec
7025 memory addresses, since we store their values in r0
7026 first. */
7027 gcc_assert (!TARGET_REGISTER_P (reg)
7028 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7029 || mem_rtx == pre_dec));
7031 addr_ok:
7032 orig_reg_rtx = reg_rtx;
7033 if (TARGET_REGISTER_P (reg)
7034 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7035 && mem_rtx != pre_dec))
7037 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7039 emit_move_insn (tmp_reg, reg_rtx);
7041 if (REGNO (tmp_reg) == R0_REG)
7043 offset_in_r0 = -1;
7044 sp_in_r0 = 0;
7045 gcc_assert (!refers_to_regno_p
7046 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7049 if (*++tmp_pnt <= 0)
7050 tmp_pnt = schedule.temps;
7052 reg_rtx = tmp_reg;
7055 rtx insn;
7057 /* Mark as interesting for dwarf cfi generator */
7058 insn = emit_move_insn (mem_rtx, reg_rtx);
7059 RTX_FRAME_RELATED_P (insn) = 1;
7060 /* If we use an intermediate register for the save, we can't
7061 describe this exactly in cfi as a copy of the to-be-saved
7062 register into the temporary register and then the temporary
7063 register on the stack, because the temporary register can
7064 have a different natural size than the to-be-saved register.
7065 Thus, we gloss over the intermediate copy and pretend we do
7066 a direct save from the to-be-saved register. */
7067 if (REGNO (reg_rtx) != reg)
7069 rtx set;
7071 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7072 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7075 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7077 rtx reg_rtx = gen_rtx_REG (mode, reg);
7078 rtx set;
7079 rtx mem_rtx = gen_frame_mem (mode,
7080 gen_rtx_PLUS (Pmode,
7081 stack_pointer_rtx,
7082 GEN_INT (offset)));
7084 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7085 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7090 gcc_assert (entry->offset == d_rounding);
7092 else
7093 push_regs (&live_regs_mask, current_function_interrupt);
7095 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7096 emit_insn (gen_GOTaddr2picreg ());
7098 if (SHMEDIA_REGS_STACK_ADJUST ())
7100 /* This must NOT go through the PLT, otherwise mach and macl
7101 may be clobbered. */
7102 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7103 (TARGET_FPU_ANY
7104 ? "__GCC_push_shmedia_regs"
7105 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7106 emit_insn (gen_shmedia_save_restore_regs_compact
7107 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7110 if (target_flags != save_flags && ! current_function_interrupt)
7111 emit_insn (gen_toggle_sz ());
7113 target_flags = save_flags;
7115 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7116 stack_pointer_rtx, 0, NULL, true);
7118 if (frame_pointer_needed)
7119 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7121 if (TARGET_SHCOMPACT
7122 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7124 /* This must NOT go through the PLT, otherwise mach and macl
7125 may be clobbered. */
7126 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7127 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7128 emit_insn (gen_shcompact_incoming_args ());
7132 void
7133 sh_expand_epilogue (bool sibcall_p)
7135 HARD_REG_SET live_regs_mask;
7136 int d, i;
7137 int d_rounding = 0;
7139 int save_flags = target_flags;
7140 int frame_size, save_size;
7141 int fpscr_deferred = 0;
7142 int e = sibcall_p ? -1 : 1;
7144 d = calc_live_regs (&live_regs_mask);
7146 save_size = d;
7147 frame_size = rounded_frame_size (d);
7149 if (TARGET_SH5)
7151 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7152 int total_size;
7153 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7154 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7155 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7157 total_size = d + tregs_space;
7158 total_size += rounded_frame_size (total_size);
7159 save_size = total_size - frame_size;
7161 /* If adjusting the stack in a single step costs nothing extra, do so.
7162 I.e. either if a single addi is enough, or we need a movi anyway,
7163 and we don't exceed the maximum offset range (the test for the
7164 latter is conservative for simplicity). */
7165 if (TARGET_SHMEDIA
7166 && ! frame_pointer_needed
7167 && (CONST_OK_FOR_I10 (total_size)
7168 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7169 && total_size <= 2044)))
7170 d_rounding = frame_size;
7172 frame_size -= d_rounding;
7175 if (frame_pointer_needed)
7177 /* We must avoid scheduling the epilogue with previous basic blocks.
7178 See PR/18032 and PR/40313. */
7179 emit_insn (gen_blockage ());
7180 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7181 &live_regs_mask, false);
7183 /* We must avoid moving the stack pointer adjustment past code
7184 which reads from the local frame, else an interrupt could
7185 occur after the SP adjustment and clobber data in the local
7186 frame. */
7187 emit_insn (gen_blockage ());
7188 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7190 else if (frame_size)
7192 /* We must avoid moving the stack pointer adjustment past code
7193 which reads from the local frame, else an interrupt could
7194 occur after the SP adjustment and clobber data in the local
7195 frame. */
7196 emit_insn (gen_blockage ());
7197 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7198 &live_regs_mask, false);
7201 if (SHMEDIA_REGS_STACK_ADJUST ())
7203 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7204 (TARGET_FPU_ANY
7205 ? "__GCC_pop_shmedia_regs"
7206 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7207 /* This must NOT go through the PLT, otherwise mach and macl
7208 may be clobbered. */
7209 emit_insn (gen_shmedia_save_restore_regs_compact
7210 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7213 /* Pop all the registers. */
7215 if (target_flags != save_flags && ! current_function_interrupt)
7216 emit_insn (gen_toggle_sz ());
7217 if (TARGET_SH5)
7219 int offset_base, offset;
7220 int offset_in_r0 = -1;
7221 int sp_in_r0 = 0;
7222 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7223 save_schedule schedule;
7224 save_entry *entry;
7225 int *tmp_pnt;
7227 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7228 offset_base = -entry[1].offset + d_rounding;
7229 tmp_pnt = schedule.temps;
7230 for (; entry->mode != VOIDmode; entry--)
7232 enum machine_mode mode = (enum machine_mode) entry->mode;
7233 int reg = entry->reg;
7234 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7236 offset = offset_base + entry->offset;
7237 reg_rtx = gen_rtx_REG (mode, reg);
7239 mem_rtx = gen_frame_mem (mode,
7240 gen_rtx_PLUS (Pmode,
7241 stack_pointer_rtx,
7242 GEN_INT (offset)));
7244 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7245 mem_rtx = NULL_RTX;
7247 if (HAVE_POST_INCREMENT
7248 && (offset == offset_in_r0
7249 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7250 && mem_rtx == NULL_RTX)
7251 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7253 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7255 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7256 post_inc = NULL_RTX;
7257 else
7258 mem_rtx = NULL_RTX;
7261 if (mem_rtx != NULL_RTX)
7262 goto addr_ok;
7264 if (offset_in_r0 == -1)
7266 emit_move_insn (r0, GEN_INT (offset));
7267 offset_in_r0 = offset;
7269 else if (offset != offset_in_r0)
7271 emit_move_insn (r0,
7272 gen_rtx_PLUS
7273 (Pmode, r0,
7274 GEN_INT (offset - offset_in_r0)));
7275 offset_in_r0 += offset - offset_in_r0;
7278 if (post_inc != NULL_RTX)
7280 if (! sp_in_r0)
7282 emit_move_insn (r0,
7283 gen_rtx_PLUS
7284 (Pmode, r0, stack_pointer_rtx));
7285 sp_in_r0 = 1;
7288 mem_rtx = post_inc;
7290 offset_in_r0 += GET_MODE_SIZE (mode);
7292 else if (sp_in_r0)
7293 mem_rtx = gen_frame_mem (mode, r0);
7294 else
7295 mem_rtx = gen_frame_mem (mode,
7296 gen_rtx_PLUS (Pmode,
7297 stack_pointer_rtx,
7298 r0));
7300 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7301 || mem_rtx == post_inc);
7303 addr_ok:
7304 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7305 && mem_rtx != post_inc)
7307 insn = emit_move_insn (r0, mem_rtx);
7308 mem_rtx = r0;
7310 else if (TARGET_REGISTER_P (reg))
7312 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7314 /* Give the scheduler a bit of freedom by using up to
7315 MAX_TEMPS registers in a round-robin fashion. */
7316 insn = emit_move_insn (tmp_reg, mem_rtx);
7317 mem_rtx = tmp_reg;
7318 if (*++tmp_pnt < 0)
7319 tmp_pnt = schedule.temps;
7322 insn = emit_move_insn (reg_rtx, mem_rtx);
7325 gcc_assert (entry->offset + offset_base == d + d_rounding);
7327 else /* ! TARGET_SH5 */
7329 int last_reg;
7331 save_size = 0;
7332 /* For an ISR with RESBANK attribute assigned, don't pop PR
7333 register. */
7334 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7335 && !sh_cfun_resbank_handler_p ())
7337 if (!frame_pointer_needed)
7338 emit_insn (gen_blockage ());
7339 pop (PR_REG);
7342 /* Banked registers are popped first to avoid being scheduled in the
7343 delay slot. RTE switches banks before the ds instruction. */
7344 if (current_function_interrupt)
7346 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7347 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7348 pop (i);
7350 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7352 else
7353 last_reg = FIRST_PSEUDO_REGISTER;
7355 for (i = 0; i < last_reg; i++)
7357 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7359 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7360 && hard_reg_set_intersect_p (live_regs_mask,
7361 reg_class_contents[DF_REGS]))
7362 fpscr_deferred = 1;
7363 /* For an ISR with RESBANK attribute assigned, don't pop
7364 following registers, R0-R14, MACH, MACL and GBR. */
7365 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7366 && ! (sh_cfun_resbank_handler_p ()
7367 && ((j >= FIRST_GENERAL_REG
7368 && j < LAST_GENERAL_REG)
7369 || j == MACH_REG
7370 || j == MACL_REG
7371 || j == GBR_REG)))
7372 pop (j);
7374 if (j == FIRST_FP_REG && fpscr_deferred)
7375 pop (FPSCR_REG);
7378 if (target_flags != save_flags && ! current_function_interrupt)
7379 emit_insn (gen_toggle_sz ());
7380 target_flags = save_flags;
7382 output_stack_adjust (crtl->args.pretend_args_size
7383 + save_size + d_rounding
7384 + crtl->args.info.stack_regs * 8,
7385 stack_pointer_rtx, e, NULL, false);
7387 if (crtl->calls_eh_return)
7388 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7389 EH_RETURN_STACKADJ_RTX));
7391 /* Switch back to the normal stack if necessary. */
7392 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7393 emit_insn (gen_sp_switch_2 ());
7395 /* Tell flow the insn that pops PR isn't dead. */
7396 /* PR_REG will never be live in SHmedia mode, and we don't need to
7397 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7398 by the return pattern. */
7399 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7400 emit_use (gen_rtx_REG (SImode, PR_REG));
7403 static int sh_need_epilogue_known = 0;
7406 sh_need_epilogue (void)
7408 if (! sh_need_epilogue_known)
7410 rtx epilogue;
7412 start_sequence ();
7413 sh_expand_epilogue (0);
7414 epilogue = get_insns ();
7415 end_sequence ();
7416 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7418 return sh_need_epilogue_known > 0;
7421 /* Emit code to change the current function's return address to RA.
7422 TEMP is available as a scratch register, if needed. */
7424 void
7425 sh_set_return_address (rtx ra, rtx tmp)
7427 HARD_REG_SET live_regs_mask;
7428 int d;
7429 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7430 int pr_offset;
7432 d = calc_live_regs (&live_regs_mask);
7434 /* If pr_reg isn't life, we can set it (or the register given in
7435 sh_media_register_for_return) directly. */
7436 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7438 rtx rr;
7440 if (TARGET_SHMEDIA)
7442 int rr_regno = sh_media_register_for_return ();
7444 if (rr_regno < 0)
7445 rr_regno = pr_reg;
7447 rr = gen_rtx_REG (DImode, rr_regno);
7449 else
7450 rr = gen_rtx_REG (SImode, pr_reg);
7452 emit_insn (GEN_MOV (rr, ra));
7453 /* Tell flow the register for return isn't dead. */
7454 emit_use (rr);
7455 return;
7458 if (TARGET_SH5)
7460 int offset;
7461 save_schedule schedule;
7462 save_entry *entry;
7464 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7465 offset = entry[1].offset;
7466 for (; entry->mode != VOIDmode; entry--)
7467 if (entry->reg == pr_reg)
7468 goto found;
7470 /* We can't find pr register. */
7471 gcc_unreachable ();
7473 found:
7474 offset = entry->offset - offset;
7475 pr_offset = (rounded_frame_size (d) + offset
7476 + SHMEDIA_REGS_STACK_ADJUST ());
7478 else
7479 pr_offset = rounded_frame_size (d);
7481 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7483 if (frame_pointer_needed)
7484 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7485 else
7486 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7488 tmp = gen_frame_mem (Pmode, tmp);
7489 emit_insn (GEN_MOV (tmp, ra));
7490 /* Tell this store isn't dead. */
7491 emit_use (tmp);
7494 /* Clear variables at function end. */
7496 static void
7497 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7498 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7500 sh_need_epilogue_known = 0;
7503 static rtx
7504 sh_builtin_saveregs (void)
7506 /* First unnamed integer register. */
7507 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7508 /* Number of integer registers we need to save. */
7509 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7510 /* First unnamed SFmode float reg */
7511 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7512 /* Number of SFmode float regs to save. */
7513 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7514 rtx regbuf, fpregs;
7515 int bufsize, regno;
7516 alias_set_type alias_set;
7518 if (TARGET_SH5)
7520 if (n_intregs)
7522 int pushregs = n_intregs;
7524 while (pushregs < NPARM_REGS (SImode) - 1
7525 && (CALL_COOKIE_INT_REG_GET
7526 (crtl->args.info.call_cookie,
7527 NPARM_REGS (SImode) - pushregs)
7528 == 1))
7530 crtl->args.info.call_cookie
7531 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7532 - pushregs, 1);
7533 pushregs++;
7536 if (pushregs == NPARM_REGS (SImode))
7537 crtl->args.info.call_cookie
7538 |= (CALL_COOKIE_INT_REG (0, 1)
7539 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7540 else
7541 crtl->args.info.call_cookie
7542 |= CALL_COOKIE_STACKSEQ (pushregs);
7544 crtl->args.pretend_args_size += 8 * n_intregs;
7546 if (TARGET_SHCOMPACT)
7547 return const0_rtx;
7550 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7552 error ("__builtin_saveregs not supported by this subtarget");
7553 return const0_rtx;
7556 if (TARGET_SHMEDIA)
7557 n_floatregs = 0;
7559 /* Allocate block of memory for the regs. */
7560 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7561 Or can assign_stack_local accept a 0 SIZE argument? */
7562 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7564 if (TARGET_SHMEDIA)
7565 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7566 else if (n_floatregs & 1)
7568 rtx addr;
7570 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7571 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7572 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7573 regbuf = change_address (regbuf, BLKmode, addr);
7575 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7577 rtx addr, mask;
7579 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7580 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7581 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7582 emit_insn (gen_andsi3 (addr, addr, mask));
7583 regbuf = change_address (regbuf, BLKmode, addr);
7585 else
7586 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7587 alias_set = get_varargs_alias_set ();
7588 set_mem_alias_set (regbuf, alias_set);
7590 /* Save int args.
7591 This is optimized to only save the regs that are necessary. Explicitly
7592 named args need not be saved. */
7593 if (n_intregs > 0)
7594 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7595 adjust_address (regbuf, BLKmode,
7596 n_floatregs * UNITS_PER_WORD),
7597 n_intregs);
7599 if (TARGET_SHMEDIA)
7600 /* Return the address of the regbuf. */
7601 return XEXP (regbuf, 0);
7603 /* Save float args.
7604 This is optimized to only save the regs that are necessary. Explicitly
7605 named args need not be saved.
7606 We explicitly build a pointer to the buffer because it halves the insn
7607 count when not optimizing (otherwise the pointer is built for each reg
7608 saved).
7609 We emit the moves in reverse order so that we can use predecrement. */
7611 fpregs = copy_to_mode_reg (Pmode,
7612 plus_constant (XEXP (regbuf, 0),
7613 n_floatregs * UNITS_PER_WORD));
7614 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7616 rtx mem;
7617 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7619 emit_insn (gen_addsi3 (fpregs, fpregs,
7620 GEN_INT (-2 * UNITS_PER_WORD)));
7621 mem = change_address (regbuf, DFmode, fpregs);
7622 emit_move_insn (mem,
7623 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7625 regno = first_floatreg;
7626 if (regno & 1)
7628 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7629 mem = change_address (regbuf, SFmode, fpregs);
7630 emit_move_insn (mem,
7631 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7632 - (TARGET_LITTLE_ENDIAN != 0)));
7635 else
7636 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7638 rtx mem;
7640 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7641 mem = change_address (regbuf, SFmode, fpregs);
7642 emit_move_insn (mem,
7643 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7646 /* Return the address of the regbuf. */
7647 return XEXP (regbuf, 0);
7650 /* Define the `__builtin_va_list' type for the ABI. */
7652 static tree
7653 sh_build_builtin_va_list (void)
7655 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7656 tree record, type_decl;
7658 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7659 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7660 return ptr_type_node;
7662 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7663 type_decl = build_decl (BUILTINS_LOCATION,
7664 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7666 f_next_o = build_decl (BUILTINS_LOCATION,
7667 FIELD_DECL, get_identifier ("__va_next_o"),
7668 ptr_type_node);
7669 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7670 FIELD_DECL,
7671 get_identifier ("__va_next_o_limit"),
7672 ptr_type_node);
7673 f_next_fp = build_decl (BUILTINS_LOCATION,
7674 FIELD_DECL, get_identifier ("__va_next_fp"),
7675 ptr_type_node);
7676 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7677 FIELD_DECL,
7678 get_identifier ("__va_next_fp_limit"),
7679 ptr_type_node);
7680 f_next_stack = build_decl (BUILTINS_LOCATION,
7681 FIELD_DECL, get_identifier ("__va_next_stack"),
7682 ptr_type_node);
7684 DECL_FIELD_CONTEXT (f_next_o) = record;
7685 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7686 DECL_FIELD_CONTEXT (f_next_fp) = record;
7687 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7688 DECL_FIELD_CONTEXT (f_next_stack) = record;
7690 TREE_CHAIN (record) = type_decl;
7691 TYPE_NAME (record) = type_decl;
7692 TYPE_FIELDS (record) = f_next_o;
7693 TREE_CHAIN (f_next_o) = f_next_o_limit;
7694 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7695 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7696 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7698 layout_type (record);
7700 return record;
7703 /* Implement `va_start' for varargs and stdarg. */
7705 static void
7706 sh_va_start (tree valist, rtx nextarg)
7708 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7709 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7710 tree t, u;
7711 int nfp, nint;
7713 if (TARGET_SH5)
7715 expand_builtin_saveregs ();
7716 std_expand_builtin_va_start (valist, nextarg);
7717 return;
7720 if ((! TARGET_SH2E && ! TARGET_SH4)
7721 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7723 std_expand_builtin_va_start (valist, nextarg);
7724 return;
7727 f_next_o = TYPE_FIELDS (va_list_type_node);
7728 f_next_o_limit = TREE_CHAIN (f_next_o);
7729 f_next_fp = TREE_CHAIN (f_next_o_limit);
7730 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7731 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7733 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7734 NULL_TREE);
7735 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7736 valist, f_next_o_limit, NULL_TREE);
7737 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7738 NULL_TREE);
7739 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7740 valist, f_next_fp_limit, NULL_TREE);
7741 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7742 valist, f_next_stack, NULL_TREE);
7744 /* Call __builtin_saveregs. */
7745 u = make_tree (sizetype, expand_builtin_saveregs ());
7746 u = fold_convert (ptr_type_node, u);
7747 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7748 TREE_SIDE_EFFECTS (t) = 1;
7749 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7751 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7752 if (nfp < 8)
7753 nfp = 8 - nfp;
7754 else
7755 nfp = 0;
7756 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7757 size_int (UNITS_PER_WORD * nfp));
7758 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7759 TREE_SIDE_EFFECTS (t) = 1;
7760 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7762 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7763 TREE_SIDE_EFFECTS (t) = 1;
7764 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7766 nint = crtl->args.info.arg_count[SH_ARG_INT];
7767 if (nint < 4)
7768 nint = 4 - nint;
7769 else
7770 nint = 0;
7771 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7772 size_int (UNITS_PER_WORD * nint));
7773 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7774 TREE_SIDE_EFFECTS (t) = 1;
7775 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7777 u = make_tree (ptr_type_node, nextarg);
7778 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7779 TREE_SIDE_EFFECTS (t) = 1;
7780 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7783 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7784 member, return it. */
7785 static tree
7786 find_sole_member (tree type)
7788 tree field, member = NULL_TREE;
7790 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7792 if (TREE_CODE (field) != FIELD_DECL)
7793 continue;
7794 if (!DECL_SIZE (field))
7795 return NULL_TREE;
7796 if (integer_zerop (DECL_SIZE (field)))
7797 continue;
7798 if (member)
7799 return NULL_TREE;
7800 member = field;
7802 return member;
7804 /* Implement `va_arg'. */
7806 static tree
7807 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7808 gimple_seq *post_p ATTRIBUTE_UNUSED)
7810 HOST_WIDE_INT size, rsize;
7811 tree tmp, pptr_type_node;
7812 tree addr, lab_over = NULL, result = NULL;
7813 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7814 tree eff_type;
7816 if (pass_by_ref)
7817 type = build_pointer_type (type);
7819 size = int_size_in_bytes (type);
7820 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7821 pptr_type_node = build_pointer_type (ptr_type_node);
7823 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7824 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7826 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7827 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7828 int pass_as_float;
7829 tree lab_false;
7830 tree member;
7832 f_next_o = TYPE_FIELDS (va_list_type_node);
7833 f_next_o_limit = TREE_CHAIN (f_next_o);
7834 f_next_fp = TREE_CHAIN (f_next_o_limit);
7835 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7836 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7838 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7839 NULL_TREE);
7840 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7841 valist, f_next_o_limit, NULL_TREE);
7842 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7843 valist, f_next_fp, NULL_TREE);
7844 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7845 valist, f_next_fp_limit, NULL_TREE);
7846 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7847 valist, f_next_stack, NULL_TREE);
7849 /* Structures with a single member with a distinct mode are passed
7850 like their member. This is relevant if the latter has a REAL_TYPE
7851 or COMPLEX_TYPE type. */
7852 eff_type = type;
7853 while (TREE_CODE (eff_type) == RECORD_TYPE
7854 && (member = find_sole_member (eff_type))
7855 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7856 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7857 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7859 tree field_type = TREE_TYPE (member);
7861 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7862 eff_type = field_type;
7863 else
7865 gcc_assert ((TYPE_ALIGN (eff_type)
7866 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7867 || (TYPE_ALIGN (eff_type)
7868 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7869 break;
7873 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7875 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7876 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7877 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7878 && size <= 16));
7880 else
7882 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7885 addr = create_tmp_var (pptr_type_node, NULL);
7886 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7887 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7889 valist = build_simple_mem_ref (addr);
7891 if (pass_as_float)
7893 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7894 tree cmp;
7895 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7897 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7898 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7900 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7901 tmp = next_fp_limit;
7902 if (size > 4 && !is_double)
7903 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7904 unshare_expr (tmp), size_int (4 - size));
7905 tmp = build2 (GE_EXPR, boolean_type_node,
7906 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7907 cmp = build3 (COND_EXPR, void_type_node, tmp,
7908 build1 (GOTO_EXPR, void_type_node,
7909 unshare_expr (lab_false)), NULL_TREE);
7910 if (!is_double)
7911 gimplify_and_add (cmp, pre_p);
7913 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7914 || (is_double || size == 16))
7916 tmp = fold_convert (sizetype, next_fp_tmp);
7917 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7918 size_int (UNITS_PER_WORD));
7919 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7920 unshare_expr (next_fp_tmp), tmp);
7921 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7923 if (is_double)
7924 gimplify_and_add (cmp, pre_p);
7926 #ifdef FUNCTION_ARG_SCmode_WART
7927 if (TYPE_MODE (eff_type) == SCmode
7928 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7930 tree subtype = TREE_TYPE (eff_type);
7931 tree real, imag;
7933 imag
7934 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7935 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7937 real
7938 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7939 real = get_initialized_tmp_var (real, pre_p, NULL);
7941 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7942 if (type != eff_type)
7943 result = build1 (VIEW_CONVERT_EXPR, type, result);
7944 result = get_initialized_tmp_var (result, pre_p, NULL);
7946 #endif /* FUNCTION_ARG_SCmode_WART */
7948 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7949 gimplify_and_add (tmp, pre_p);
7951 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7952 gimplify_and_add (tmp, pre_p);
7954 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7955 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7956 gimplify_assign (unshare_expr (next_fp_tmp),
7957 unshare_expr (valist), pre_p);
7959 gimplify_assign (unshare_expr (valist),
7960 unshare_expr (next_fp_tmp), post_p);
7961 valist = next_fp_tmp;
7963 else
7965 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7966 unshare_expr (next_o), size_int (rsize));
7967 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7968 unshare_expr (next_o_limit));
7969 tmp = build3 (COND_EXPR, void_type_node, tmp,
7970 build1 (GOTO_EXPR, void_type_node,
7971 unshare_expr (lab_false)),
7972 NULL_TREE);
7973 gimplify_and_add (tmp, pre_p);
7975 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7976 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7978 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7979 gimplify_and_add (tmp, pre_p);
7981 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7982 gimplify_and_add (tmp, pre_p);
7984 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7985 gimplify_assign (unshare_expr (next_o),
7986 unshare_expr (next_o_limit), pre_p);
7988 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7989 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7992 if (!result)
7994 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7995 gimplify_and_add (tmp, pre_p);
7999 /* ??? In va-sh.h, there had been code to make values larger than
8000 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8002 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8003 if (result)
8005 gimplify_assign (result, tmp, pre_p);
8006 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8007 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8008 gimplify_and_add (tmp, pre_p);
8010 else
8011 result = tmp;
8013 if (pass_by_ref)
8014 result = build_va_arg_indirect_ref (result);
8016 return result;
8019 /* 64 bit floating points memory transfers are paired single precision loads
8020 or store. So DWARF information needs fixing in little endian (unless
8021 PR=SZ=1 in FPSCR). */
8023 sh_dwarf_register_span (rtx reg)
8025 unsigned regno = REGNO (reg);
8027 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8028 return NULL_RTX;
8030 return
8031 gen_rtx_PARALLEL (VOIDmode,
8032 gen_rtvec (2,
8033 gen_rtx_REG (SFmode,
8034 DBX_REGISTER_NUMBER (regno+1)),
8035 gen_rtx_REG (SFmode,
8036 DBX_REGISTER_NUMBER (regno))));
8039 static enum machine_mode
8040 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8041 int *punsignedp, const_tree funtype,
8042 int for_return ATTRIBUTE_UNUSED)
8044 if (sh_promote_prototypes (funtype))
8045 return promote_mode (type, mode, punsignedp);
8046 else
8047 return mode;
8050 static bool
8051 sh_promote_prototypes (const_tree type)
8053 if (TARGET_HITACHI)
8054 return 0;
8055 if (! type)
8056 return 1;
8057 return ! sh_attr_renesas_p (type);
8060 /* Whether an argument must be passed by reference. On SHcompact, we
8061 pretend arguments wider than 32-bits that would have been passed in
8062 registers are passed by reference, so that an SHmedia trampoline
8063 loads them into the full 64-bits registers. */
8065 static int
8066 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8067 const_tree type, bool named)
8069 unsigned HOST_WIDE_INT size;
8071 if (type)
8072 size = int_size_in_bytes (type);
8073 else
8074 size = GET_MODE_SIZE (mode);
8076 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8077 && (!named
8078 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8079 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8080 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8081 && size > 4
8082 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8083 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8084 return size;
8085 else
8086 return 0;
8089 static bool
8090 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8091 const_tree type, bool named)
8093 if (targetm.calls.must_pass_in_stack (mode, type))
8094 return true;
8096 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8097 wants to know about pass-by-reference semantics for incoming
8098 arguments. */
8099 if (! cum)
8100 return false;
8102 if (TARGET_SHCOMPACT)
8104 cum->byref = shcompact_byref (cum, mode, type, named);
8105 return cum->byref != 0;
8108 return false;
8111 static bool
8112 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8113 const_tree type, bool named ATTRIBUTE_UNUSED)
8115 /* ??? How can it possibly be correct to return true only on the
8116 caller side of the equation? Is there someplace else in the
8117 sh backend that's magically producing the copies? */
8118 return (cum->outgoing
8119 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8120 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8123 static int
8124 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8125 tree type, bool named ATTRIBUTE_UNUSED)
8127 int words = 0;
8129 if (!TARGET_SH5
8130 && PASS_IN_REG_P (*cum, mode, type)
8131 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8132 && (ROUND_REG (*cum, mode)
8133 + (mode != BLKmode
8134 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8135 : ROUND_ADVANCE (int_size_in_bytes (type)))
8136 > NPARM_REGS (mode)))
8137 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8139 else if (!TARGET_SHCOMPACT
8140 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8141 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8143 return words * UNITS_PER_WORD;
8147 /* Define where to put the arguments to a function.
8148 Value is zero to push the argument on the stack,
8149 or a hard register in which to store the argument.
8151 MODE is the argument's machine mode.
8152 TYPE is the data type of the argument (as a tree).
8153 This is null for libcalls where that information may
8154 not be available.
8155 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8156 the preceding args and about the function being called.
8157 NAMED is nonzero if this argument is a named parameter
8158 (otherwise it is an extra parameter matching an ellipsis).
8160 On SH the first args are normally in registers
8161 and the rest are pushed. Any arg that starts within the first
8162 NPARM_REGS words is at least partially passed in a register unless
8163 its data type forbids. */
8167 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8168 tree type, int named)
8170 if (! TARGET_SH5 && mode == VOIDmode)
8171 return GEN_INT (ca->renesas_abi ? 1 : 0);
8173 if (! TARGET_SH5
8174 && PASS_IN_REG_P (*ca, mode, type)
8175 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8177 int regno;
8179 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8180 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8182 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8183 gen_rtx_REG (SFmode,
8184 BASE_ARG_REG (mode)
8185 + (ROUND_REG (*ca, mode) ^ 1)),
8186 const0_rtx);
8187 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8188 gen_rtx_REG (SFmode,
8189 BASE_ARG_REG (mode)
8190 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8191 GEN_INT (4));
8192 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8195 /* If the alignment of a DF value causes an SF register to be
8196 skipped, we will use that skipped register for the next SF
8197 value. */
8198 if ((TARGET_HITACHI || ca->renesas_abi)
8199 && ca->free_single_fp_reg
8200 && mode == SFmode)
8201 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8203 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8204 ^ (mode == SFmode && TARGET_SH4
8205 && TARGET_LITTLE_ENDIAN != 0
8206 && ! TARGET_HITACHI && ! ca->renesas_abi);
8207 return gen_rtx_REG (mode, regno);
8211 if (TARGET_SH5)
8213 if (mode == VOIDmode && TARGET_SHCOMPACT)
8214 return GEN_INT (ca->call_cookie);
8216 /* The following test assumes unnamed arguments are promoted to
8217 DFmode. */
8218 if (mode == SFmode && ca->free_single_fp_reg)
8219 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8221 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8222 && (named || ! ca->prototype_p)
8223 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8225 if (! ca->prototype_p && TARGET_SHMEDIA)
8226 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8228 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8229 FIRST_FP_PARM_REG
8230 + ca->arg_count[(int) SH_ARG_FLOAT]);
8233 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8234 && (! TARGET_SHCOMPACT
8235 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8236 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8237 type, named))))
8239 return gen_rtx_REG (mode, (FIRST_PARM_REG
8240 + ca->arg_count[(int) SH_ARG_INT]));
8243 return 0;
8246 return 0;
8249 /* Update the data in CUM to advance over an argument
8250 of mode MODE and data type TYPE.
8251 (TYPE is null for libcalls where that information may not be
8252 available.) */
8254 void
8255 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8256 tree type, int named)
8258 if (ca->force_mem)
8259 ca->force_mem = 0;
8260 else if (TARGET_SH5)
8262 tree type2 = (ca->byref && type
8263 ? TREE_TYPE (type)
8264 : type);
8265 enum machine_mode mode2 = (ca->byref && type
8266 ? TYPE_MODE (type2)
8267 : mode);
8268 int dwords = ((ca->byref
8269 ? ca->byref
8270 : mode2 == BLKmode
8271 ? int_size_in_bytes (type2)
8272 : GET_MODE_SIZE (mode2)) + 7) / 8;
8273 int numregs = MIN (dwords, NPARM_REGS (SImode)
8274 - ca->arg_count[(int) SH_ARG_INT]);
8276 if (numregs)
8278 ca->arg_count[(int) SH_ARG_INT] += numregs;
8279 if (TARGET_SHCOMPACT
8280 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8282 ca->call_cookie
8283 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8284 - numregs, 1);
8285 /* N.B. We want this also for outgoing. */
8286 ca->stack_regs += numregs;
8288 else if (ca->byref)
8290 if (! ca->outgoing)
8291 ca->stack_regs += numregs;
8292 ca->byref_regs += numregs;
8293 ca->byref = 0;
8295 ca->call_cookie
8296 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8297 - numregs, 2);
8298 while (--numregs);
8299 ca->call_cookie
8300 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8301 - 1, 1);
8303 else if (dwords > numregs)
8305 int pushregs = numregs;
8307 if (TARGET_SHCOMPACT)
8308 ca->stack_regs += numregs;
8309 while (pushregs < NPARM_REGS (SImode) - 1
8310 && (CALL_COOKIE_INT_REG_GET
8311 (ca->call_cookie,
8312 NPARM_REGS (SImode) - pushregs)
8313 == 1))
8315 ca->call_cookie
8316 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8317 - pushregs, 1);
8318 pushregs++;
8320 if (numregs == NPARM_REGS (SImode))
8321 ca->call_cookie
8322 |= CALL_COOKIE_INT_REG (0, 1)
8323 | CALL_COOKIE_STACKSEQ (numregs - 1);
8324 else
8325 ca->call_cookie
8326 |= CALL_COOKIE_STACKSEQ (numregs);
8329 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8330 && (named || ! ca->prototype_p))
8332 if (mode2 == SFmode && ca->free_single_fp_reg)
8333 ca->free_single_fp_reg = 0;
8334 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8335 < NPARM_REGS (SFmode))
8337 int numfpregs
8338 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8339 NPARM_REGS (SFmode)
8340 - ca->arg_count[(int) SH_ARG_FLOAT]);
8342 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8344 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8346 if (ca->outgoing && numregs > 0)
8349 ca->call_cookie
8350 |= (CALL_COOKIE_INT_REG
8351 (ca->arg_count[(int) SH_ARG_INT]
8352 - numregs + ((numfpregs - 2) / 2),
8353 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8354 - numfpregs) / 2));
8356 while (numfpregs -= 2);
8358 else if (mode2 == SFmode && (named)
8359 && (ca->arg_count[(int) SH_ARG_FLOAT]
8360 < NPARM_REGS (SFmode)))
8361 ca->free_single_fp_reg
8362 = FIRST_FP_PARM_REG - numfpregs
8363 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8366 return;
8369 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8371 /* Note that we've used the skipped register. */
8372 if (mode == SFmode && ca->free_single_fp_reg)
8374 ca->free_single_fp_reg = 0;
8375 return;
8377 /* When we have a DF after an SF, there's an SF register that get
8378 skipped in order to align the DF value. We note this skipped
8379 register, because the next SF value will use it, and not the
8380 SF that follows the DF. */
8381 if (mode == DFmode
8382 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8384 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8385 + BASE_ARG_REG (mode));
8389 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8390 || PASS_IN_REG_P (*ca, mode, type))
8391 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8392 = (ROUND_REG (*ca, mode)
8393 + (mode == BLKmode
8394 ? ROUND_ADVANCE (int_size_in_bytes (type))
8395 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8398 /* The Renesas calling convention doesn't quite fit into this scheme since
8399 the address is passed like an invisible argument, but one that is always
8400 passed in memory. */
8401 static rtx
8402 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8404 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8405 return 0;
8406 return gen_rtx_REG (Pmode, 2);
8409 /* Worker function for TARGET_FUNCTION_VALUE.
8411 For the SH, this is like LIBCALL_VALUE, except that we must change the
8412 mode like PROMOTE_MODE does.
8413 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8414 tested here has to be kept in sync with the one in explow.c:promote_mode.
8417 static rtx
8418 sh_function_value (const_tree valtype,
8419 const_tree fn_decl_or_type,
8420 bool outgoing ATTRIBUTE_UNUSED)
8422 if (fn_decl_or_type
8423 && !DECL_P (fn_decl_or_type))
8424 fn_decl_or_type = NULL;
8426 return gen_rtx_REG (
8427 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8428 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8429 && (TREE_CODE (valtype) == INTEGER_TYPE
8430 || TREE_CODE (valtype) == ENUMERAL_TYPE
8431 || TREE_CODE (valtype) == BOOLEAN_TYPE
8432 || TREE_CODE (valtype) == REAL_TYPE
8433 || TREE_CODE (valtype) == OFFSET_TYPE))
8434 && sh_promote_prototypes (fn_decl_or_type)
8435 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8436 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8439 /* Worker function for TARGET_LIBCALL_VALUE. */
8441 static rtx
8442 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8444 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8447 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8449 bool
8450 sh_function_value_regno_p (const unsigned int regno)
8452 return ((regno) == FIRST_RET_REG
8453 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8454 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8457 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8459 static bool
8460 sh_return_in_memory (const_tree type, const_tree fndecl)
8462 if (TARGET_SH5)
8464 if (TYPE_MODE (type) == BLKmode)
8465 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8466 else
8467 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8469 else
8471 return (TYPE_MODE (type) == BLKmode
8472 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8473 && TREE_CODE (type) == RECORD_TYPE));
8477 /* We actually emit the code in sh_expand_prologue. We used to use
8478 a static variable to flag that we need to emit this code, but that
8479 doesn't when inlining, when functions are deferred and then emitted
8480 later. Fortunately, we already have two flags that are part of struct
8481 function that tell if a function uses varargs or stdarg. */
8482 static void
8483 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8484 enum machine_mode mode,
8485 tree type,
8486 int *pretend_arg_size,
8487 int second_time ATTRIBUTE_UNUSED)
8489 gcc_assert (cfun->stdarg);
8490 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8492 int named_parm_regs, anon_parm_regs;
8494 named_parm_regs = (ROUND_REG (*ca, mode)
8495 + (mode == BLKmode
8496 ? ROUND_ADVANCE (int_size_in_bytes (type))
8497 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8498 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8499 if (anon_parm_regs > 0)
8500 *pretend_arg_size = anon_parm_regs * 4;
8504 static bool
8505 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8507 return TARGET_SH5;
8510 static bool
8511 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8513 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8517 /* Define the offset between two registers, one to be eliminated, and
8518 the other its replacement, at the start of a routine. */
8521 initial_elimination_offset (int from, int to)
8523 int regs_saved;
8524 int regs_saved_rounding = 0;
8525 int total_saved_regs_space;
8526 int total_auto_space;
8527 int save_flags = target_flags;
8528 int copy_flags;
8529 HARD_REG_SET live_regs_mask;
8531 shmedia_space_reserved_for_target_registers = false;
8532 regs_saved = calc_live_regs (&live_regs_mask);
8533 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8535 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8537 shmedia_space_reserved_for_target_registers = true;
8538 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8541 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8542 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8543 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8545 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8546 copy_flags = target_flags;
8547 target_flags = save_flags;
8549 total_saved_regs_space = regs_saved + regs_saved_rounding;
8551 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8552 return total_saved_regs_space + total_auto_space
8553 + crtl->args.info.byref_regs * 8;
8555 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8556 return total_saved_regs_space + total_auto_space
8557 + crtl->args.info.byref_regs * 8;
8559 /* Initial gap between fp and sp is 0. */
8560 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8561 return 0;
8563 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8564 return rounded_frame_size (0);
8566 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8567 return rounded_frame_size (0);
8569 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8570 && (to == HARD_FRAME_POINTER_REGNUM
8571 || to == STACK_POINTER_REGNUM));
8572 if (TARGET_SH5)
8574 int n = total_saved_regs_space;
8575 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8576 save_schedule schedule;
8577 save_entry *entry;
8579 n += total_auto_space;
8581 /* If it wasn't saved, there's not much we can do. */
8582 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8583 return n;
8585 target_flags = copy_flags;
8587 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8588 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8589 if (entry->reg == pr_reg)
8591 target_flags = save_flags;
8592 return entry->offset;
8594 gcc_unreachable ();
8596 else
8597 return total_auto_space;
8600 /* Parse the -mfixed-range= option string. */
8601 void
8602 sh_fix_range (const char *const_str)
8604 int i, first, last;
8605 char *str, *dash, *comma;
8607 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8608 REG2 are either register names or register numbers. The effect
8609 of this option is to mark the registers in the range from REG1 to
8610 REG2 as ``fixed'' so they won't be used by the compiler. */
8612 i = strlen (const_str);
8613 str = (char *) alloca (i + 1);
8614 memcpy (str, const_str, i + 1);
8616 while (1)
8618 dash = strchr (str, '-');
8619 if (!dash)
8621 warning (0, "value of -mfixed-range must have form REG1-REG2");
8622 return;
8624 *dash = '\0';
8625 comma = strchr (dash + 1, ',');
8626 if (comma)
8627 *comma = '\0';
8629 first = decode_reg_name (str);
8630 if (first < 0)
8632 warning (0, "unknown register name: %s", str);
8633 return;
8636 last = decode_reg_name (dash + 1);
8637 if (last < 0)
8639 warning (0, "unknown register name: %s", dash + 1);
8640 return;
8643 *dash = '-';
8645 if (first > last)
8647 warning (0, "%s-%s is an empty range", str, dash + 1);
8648 return;
8651 for (i = first; i <= last; ++i)
8652 fixed_regs[i] = call_used_regs[i] = 1;
8654 if (!comma)
8655 break;
8657 *comma = ',';
8658 str = comma + 1;
8662 /* Insert any deferred function attributes from earlier pragmas. */
8663 static void
8664 sh_insert_attributes (tree node, tree *attributes)
8666 tree attrs;
8668 if (TREE_CODE (node) != FUNCTION_DECL)
8669 return;
8671 /* We are only interested in fields. */
8672 if (!DECL_P (node))
8673 return;
8675 /* Append the attributes to the deferred attributes. */
8676 *sh_deferred_function_attributes_tail = *attributes;
8677 attrs = sh_deferred_function_attributes;
8678 if (!attrs)
8679 return;
8681 /* Some attributes imply or require the interrupt attribute. */
8682 if (!lookup_attribute ("interrupt_handler", attrs)
8683 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8685 /* If we have a trapa_handler, but no interrupt_handler attribute,
8686 insert an interrupt_handler attribute. */
8687 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8688 /* We can't use sh_pr_interrupt here because that's not in the
8689 java frontend. */
8690 attrs
8691 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8692 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8693 if the interrupt attribute is missing, we ignore the attribute
8694 and warn. */
8695 else if (lookup_attribute ("sp_switch", attrs)
8696 || lookup_attribute ("trap_exit", attrs)
8697 || lookup_attribute ("nosave_low_regs", attrs)
8698 || lookup_attribute ("resbank", attrs))
8700 tree *tail;
8702 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8704 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8705 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8706 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8707 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8708 warning (OPT_Wattributes,
8709 "%qE attribute only applies to interrupt functions",
8710 TREE_PURPOSE (attrs));
8711 else
8713 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8714 NULL_TREE);
8715 tail = &TREE_CHAIN (*tail);
8718 attrs = *attributes;
8722 /* Install the processed list. */
8723 *attributes = attrs;
8725 /* Clear deferred attributes. */
8726 sh_deferred_function_attributes = NULL_TREE;
8727 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8729 return;
8732 /* Supported attributes:
8734 interrupt_handler -- specifies this function is an interrupt handler.
8736 trapa_handler - like above, but don't save all registers.
8738 sp_switch -- specifies an alternate stack for an interrupt handler
8739 to run on.
8741 trap_exit -- use a trapa to exit an interrupt function instead of
8742 an rte instruction.
8744 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8745 This is useful on the SH3 and upwards,
8746 which has a separate set of low regs for User and Supervisor modes.
8747 This should only be used for the lowest level of interrupts. Higher levels
8748 of interrupts must save the registers in case they themselves are
8749 interrupted.
8751 renesas -- use Renesas calling/layout conventions (functions and
8752 structures).
8754 resbank -- In case of an ISR, use a register bank to save registers
8755 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8758 /* Handle a 'resbank' attribute. */
8759 static tree
8760 sh_handle_resbank_handler_attribute (tree * node, tree name,
8761 tree args ATTRIBUTE_UNUSED,
8762 int flags ATTRIBUTE_UNUSED,
8763 bool * no_add_attrs)
8765 if (!TARGET_SH2A)
8767 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8768 name);
8769 *no_add_attrs = true;
8771 if (TREE_CODE (*node) != FUNCTION_DECL)
8773 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8774 name);
8775 *no_add_attrs = true;
8778 return NULL_TREE;
8781 /* Handle an "interrupt_handler" attribute; arguments as in
8782 struct attribute_spec.handler. */
8783 static tree
8784 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8785 tree args ATTRIBUTE_UNUSED,
8786 int flags ATTRIBUTE_UNUSED,
8787 bool *no_add_attrs)
8789 if (TREE_CODE (*node) != FUNCTION_DECL)
8791 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8792 name);
8793 *no_add_attrs = true;
8795 else if (TARGET_SHCOMPACT)
8797 error ("attribute interrupt_handler is not compatible with -m5-compact");
8798 *no_add_attrs = true;
8801 return NULL_TREE;
8804 /* Handle an 'function_vector' attribute; arguments as in
8805 struct attribute_spec.handler. */
8806 static tree
8807 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8808 tree args ATTRIBUTE_UNUSED,
8809 int flags ATTRIBUTE_UNUSED,
8810 bool * no_add_attrs)
8812 if (!TARGET_SH2A)
8814 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8815 name);
8816 *no_add_attrs = true;
8818 else if (TREE_CODE (*node) != FUNCTION_DECL)
8820 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8821 name);
8822 *no_add_attrs = true;
8824 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8826 /* The argument must be a constant integer. */
8827 warning (OPT_Wattributes,
8828 "%qE attribute argument not an integer constant",
8829 name);
8830 *no_add_attrs = true;
8832 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8834 /* The argument value must be between 0 to 255. */
8835 warning (OPT_Wattributes,
8836 "%qE attribute argument should be between 0 to 255",
8837 name);
8838 *no_add_attrs = true;
8840 return NULL_TREE;
8843 /* Returns 1 if current function has been assigned the attribute
8844 'function_vector'. */
8846 sh2a_is_function_vector_call (rtx x)
8848 if (GET_CODE (x) == SYMBOL_REF
8849 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8851 tree tr = SYMBOL_REF_DECL (x);
8853 if (sh2a_function_vector_p (tr))
8854 return 1;
8857 return 0;
8860 /* Returns the function vector number, if the the attribute
8861 'function_vector' is assigned, otherwise returns zero. */
8863 sh2a_get_function_vector_number (rtx x)
8865 int num;
8866 tree list, t;
8868 if ((GET_CODE (x) == SYMBOL_REF)
8869 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8871 t = SYMBOL_REF_DECL (x);
8873 if (TREE_CODE (t) != FUNCTION_DECL)
8874 return 0;
8876 list = SH_ATTRIBUTES (t);
8877 while (list)
8879 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8881 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8882 return num;
8885 list = TREE_CHAIN (list);
8888 return 0;
8890 else
8891 return 0;
8894 /* Handle an "sp_switch" attribute; arguments as in
8895 struct attribute_spec.handler. */
8896 static tree
8897 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8898 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8900 if (TREE_CODE (*node) != FUNCTION_DECL)
8902 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8903 name);
8904 *no_add_attrs = true;
8906 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8908 /* The argument must be a constant string. */
8909 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8910 name);
8911 *no_add_attrs = true;
8914 return NULL_TREE;
8917 /* Handle an "trap_exit" attribute; arguments as in
8918 struct attribute_spec.handler. */
8919 static tree
8920 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8921 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8923 if (TREE_CODE (*node) != FUNCTION_DECL)
8925 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8926 name);
8927 *no_add_attrs = true;
8929 /* The argument specifies a trap number to be used in a trapa instruction
8930 at function exit (instead of an rte instruction). */
8931 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8933 /* The argument must be a constant integer. */
8934 warning (OPT_Wattributes, "%qE attribute argument not an "
8935 "integer constant", name);
8936 *no_add_attrs = true;
8939 return NULL_TREE;
8942 static tree
8943 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8944 tree name ATTRIBUTE_UNUSED,
8945 tree args ATTRIBUTE_UNUSED,
8946 int flags ATTRIBUTE_UNUSED,
8947 bool *no_add_attrs ATTRIBUTE_UNUSED)
8949 return NULL_TREE;
8952 /* True if __attribute__((renesas)) or -mrenesas. */
8954 sh_attr_renesas_p (const_tree td)
8956 if (TARGET_HITACHI)
8957 return 1;
8958 if (td == 0)
8959 return 0;
8960 if (DECL_P (td))
8961 td = TREE_TYPE (td);
8962 if (td == error_mark_node)
8963 return 0;
8964 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8965 != NULL_TREE);
8968 /* True if __attribute__((renesas)) or -mrenesas, for the current
8969 function. */
8971 sh_cfun_attr_renesas_p (void)
8973 return sh_attr_renesas_p (current_function_decl);
8977 sh_cfun_interrupt_handler_p (void)
8979 return (lookup_attribute ("interrupt_handler",
8980 DECL_ATTRIBUTES (current_function_decl))
8981 != NULL_TREE);
8984 /* Returns 1 if FUNC has been assigned the attribute
8985 "function_vector". */
8987 sh2a_function_vector_p (tree func)
8989 tree list;
8990 if (TREE_CODE (func) != FUNCTION_DECL)
8991 return 0;
8993 list = SH_ATTRIBUTES (func);
8994 while (list)
8996 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8997 return 1;
8999 list = TREE_CHAIN (list);
9001 return 0;
9004 /* Returns TRUE if given tree has the "resbank" attribute. */
9007 sh_cfun_resbank_handler_p (void)
9009 return ((lookup_attribute ("resbank",
9010 DECL_ATTRIBUTES (current_function_decl))
9011 != NULL_TREE)
9012 && (lookup_attribute ("interrupt_handler",
9013 DECL_ATTRIBUTES (current_function_decl))
9014 != NULL_TREE) && TARGET_SH2A);
9017 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9019 static const char *
9020 sh_check_pch_target_flags (int old_flags)
9022 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9023 | MASK_SH_E | MASK_HARD_SH4
9024 | MASK_FPU_SINGLE | MASK_SH4))
9025 return _("created and used with different architectures / ABIs");
9026 if ((old_flags ^ target_flags) & MASK_HITACHI)
9027 return _("created and used with different ABIs");
9028 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9029 return _("created and used with different endianness");
9030 return NULL;
9033 /* Predicates used by the templates. */
9035 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9036 Used only in general_movsrc_operand. */
9039 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9041 switch (REGNO (op))
9043 case PR_REG:
9044 case MACL_REG:
9045 case MACH_REG:
9046 return 1;
9048 return 0;
9051 /* Nonzero if OP is a floating point value with value 0.0. */
9054 fp_zero_operand (rtx op)
9056 REAL_VALUE_TYPE r;
9058 if (GET_MODE (op) != SFmode)
9059 return 0;
9061 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9062 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9065 /* Nonzero if OP is a floating point value with value 1.0. */
9068 fp_one_operand (rtx op)
9070 REAL_VALUE_TYPE r;
9072 if (GET_MODE (op) != SFmode)
9073 return 0;
9075 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9076 return REAL_VALUES_EQUAL (r, dconst1);
9079 /* In general mode switching is used. If we are
9080 compiling without -mfmovd, movsf_ie isn't taken into account for
9081 mode switching. We could check in machine_dependent_reorg for
9082 cases where we know we are in single precision mode, but there is
9083 interface to find that out during reload, so we must avoid
9084 choosing an fldi alternative during reload and thus failing to
9085 allocate a scratch register for the constant loading. */
9087 fldi_ok (void)
9089 return 1;
9093 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9095 enum rtx_code code = GET_CODE (op);
9096 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9099 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9100 enum tls_model
9101 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9103 if (GET_CODE (op) != SYMBOL_REF)
9104 return TLS_MODEL_NONE;
9105 return SYMBOL_REF_TLS_MODEL (op);
9108 /* Return the destination address of a branch. */
9110 static int
9111 branch_dest (rtx branch)
9113 rtx dest = SET_SRC (PATTERN (branch));
9114 int dest_uid;
9116 if (GET_CODE (dest) == IF_THEN_ELSE)
9117 dest = XEXP (dest, 1);
9118 dest = XEXP (dest, 0);
9119 dest_uid = INSN_UID (dest);
9120 return INSN_ADDRESSES (dest_uid);
9123 /* Return nonzero if REG is not used after INSN.
9124 We assume REG is a reload reg, and therefore does
9125 not live past labels. It may live past calls or jumps though. */
9127 reg_unused_after (rtx reg, rtx insn)
9129 enum rtx_code code;
9130 rtx set;
9132 /* If the reg is set by this instruction, then it is safe for our
9133 case. Disregard the case where this is a store to memory, since
9134 we are checking a register used in the store address. */
9135 set = single_set (insn);
9136 if (set && !MEM_P (SET_DEST (set))
9137 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9138 return 1;
9140 while ((insn = NEXT_INSN (insn)))
9142 rtx set;
9143 if (!INSN_P (insn))
9144 continue;
9146 code = GET_CODE (insn);
9148 #if 0
9149 /* If this is a label that existed before reload, then the register
9150 if dead here. However, if this is a label added by reorg, then
9151 the register may still be live here. We can't tell the difference,
9152 so we just ignore labels completely. */
9153 if (code == CODE_LABEL)
9154 return 1;
9155 /* else */
9156 #endif
9158 if (code == JUMP_INSN)
9159 return 0;
9161 /* If this is a sequence, we must handle them all at once.
9162 We could have for instance a call that sets the target register,
9163 and an insn in a delay slot that uses the register. In this case,
9164 we must return 0. */
9165 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9167 int i;
9168 int retval = 0;
9170 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9172 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9173 rtx set = single_set (this_insn);
9175 if (CALL_P (this_insn))
9176 code = CALL_INSN;
9177 else if (JUMP_P (this_insn))
9179 if (INSN_ANNULLED_BRANCH_P (this_insn))
9180 return 0;
9181 code = JUMP_INSN;
9184 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9185 return 0;
9186 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9188 if (!MEM_P (SET_DEST (set)))
9189 retval = 1;
9190 else
9191 return 0;
9193 if (set == 0
9194 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9195 return 0;
9197 if (retval == 1)
9198 return 1;
9199 else if (code == JUMP_INSN)
9200 return 0;
9203 set = single_set (insn);
9204 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9205 return 0;
9206 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9207 return !MEM_P (SET_DEST (set));
9208 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9209 return 0;
9211 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9212 return 1;
9214 return 1;
9217 #include "ggc.h"
9219 static GTY(()) rtx fpscr_rtx;
9221 get_fpscr_rtx (void)
9223 if (! fpscr_rtx)
9225 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9226 REG_USERVAR_P (fpscr_rtx) = 1;
9227 mark_user_reg (fpscr_rtx);
9229 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9230 mark_user_reg (fpscr_rtx);
9231 return fpscr_rtx;
9234 static GTY(()) tree fpscr_values;
9236 static void
9237 emit_fpu_switch (rtx scratch, int index)
9239 rtx dst, src;
9241 if (fpscr_values == NULL)
9243 tree t;
9245 t = build_index_type (integer_one_node);
9246 t = build_array_type (integer_type_node, t);
9247 t = build_decl (BUILTINS_LOCATION,
9248 VAR_DECL, get_identifier ("__fpscr_values"), t);
9249 DECL_ARTIFICIAL (t) = 1;
9250 DECL_IGNORED_P (t) = 1;
9251 DECL_EXTERNAL (t) = 1;
9252 TREE_STATIC (t) = 1;
9253 TREE_PUBLIC (t) = 1;
9254 TREE_USED (t) = 1;
9256 fpscr_values = t;
9259 src = DECL_RTL (fpscr_values);
9260 if (!can_create_pseudo_p ())
9262 emit_move_insn (scratch, XEXP (src, 0));
9263 if (index != 0)
9264 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9265 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9267 else
9268 src = adjust_address (src, PSImode, index * 4);
9270 dst = get_fpscr_rtx ();
9271 emit_move_insn (dst, src);
9274 void
9275 emit_sf_insn (rtx pat)
9277 emit_insn (pat);
9280 void
9281 emit_df_insn (rtx pat)
9283 emit_insn (pat);
9286 void
9287 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9289 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9292 void
9293 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9295 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9296 get_fpscr_rtx ()));
9299 void
9300 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9302 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9305 void
9306 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9308 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9309 get_fpscr_rtx ()));
9312 static rtx get_free_reg (HARD_REG_SET);
9314 /* This function returns a register to use to load the address to load
9315 the fpscr from. Currently it always returns r1 or r7, but when we are
9316 able to use pseudo registers after combine, or have a better mechanism
9317 for choosing a register, it should be done here. */
9318 /* REGS_LIVE is the liveness information for the point for which we
9319 need this allocation. In some bare-bones exit blocks, r1 is live at the
9320 start. We can even have all of r0..r3 being live:
9321 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9322 INSN before which new insns are placed with will clobber the register
9323 we return. If a basic block consists only of setting the return value
9324 register to a pseudo and using that register, the return value is not
9325 live before or after this block, yet we we'll insert our insns right in
9326 the middle. */
9328 static rtx
9329 get_free_reg (HARD_REG_SET regs_live)
9331 if (! TEST_HARD_REG_BIT (regs_live, 1))
9332 return gen_rtx_REG (Pmode, 1);
9334 /* Hard reg 1 is live; since this is a small register classes target,
9335 there shouldn't be anything but a jump before the function end. */
9336 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9337 return gen_rtx_REG (Pmode, 7);
9340 /* This function will set the fpscr from memory.
9341 MODE is the mode we are setting it to. */
9342 void
9343 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9345 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9346 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9347 rtx addr_reg;
9349 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9350 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9353 /* Is the given character a logical line separator for the assembler? */
9354 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9355 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9356 #endif
9359 sh_insn_length_adjustment (rtx insn)
9361 /* Instructions with unfilled delay slots take up an extra two bytes for
9362 the nop in the delay slot. */
9363 if (((NONJUMP_INSN_P (insn)
9364 && GET_CODE (PATTERN (insn)) != USE
9365 && GET_CODE (PATTERN (insn)) != CLOBBER)
9366 || CALL_P (insn)
9367 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9368 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9369 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9370 return 2;
9372 /* SH2e has a bug that prevents the use of annulled branches, so if
9373 the delay slot is not filled, we'll have to put a NOP in it. */
9374 if (sh_cpu_attr == CPU_SH2E
9375 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9376 && get_attr_type (insn) == TYPE_CBRANCH
9377 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9378 return 2;
9380 /* sh-dsp parallel processing insn take four bytes instead of two. */
9382 if (NONJUMP_INSN_P (insn))
9384 int sum = 0;
9385 rtx body = PATTERN (insn);
9386 const char *templ;
9387 char c;
9388 int maybe_label = 1;
9390 if (GET_CODE (body) == ASM_INPUT)
9391 templ = XSTR (body, 0);
9392 else if (asm_noperands (body) >= 0)
9393 templ
9394 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9395 else
9396 return 0;
9399 int ppi_adjust = 0;
9402 c = *templ++;
9403 while (c == ' ' || c == '\t');
9404 /* all sh-dsp parallel-processing insns start with p.
9405 The only non-ppi sh insn starting with p is pref.
9406 The only ppi starting with pr is prnd. */
9407 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9408 ppi_adjust = 2;
9409 /* The repeat pseudo-insn expands two three insns, a total of
9410 six bytes in size. */
9411 else if ((c == 'r' || c == 'R')
9412 && ! strncasecmp ("epeat", templ, 5))
9413 ppi_adjust = 4;
9414 while (c && c != '\n'
9415 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9417 /* If this is a label, it is obviously not a ppi insn. */
9418 if (c == ':' && maybe_label)
9420 ppi_adjust = 0;
9421 break;
9423 else if (c == '\'' || c == '"')
9424 maybe_label = 0;
9425 c = *templ++;
9427 sum += ppi_adjust;
9428 maybe_label = c != ':';
9430 while (c);
9431 return sum;
9433 return 0;
9436 /* Return TRUE for a valid displacement for the REG+disp addressing
9437 with MODE. */
9439 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9440 into the FRx registers. We implement this by setting the maximum offset
9441 to zero when the value is SFmode. This also restricts loading of SFmode
9442 values into the integer registers, but that can't be helped. */
9444 /* The SH allows a displacement in a QI or HI amode, but only when the
9445 other operand is R0. GCC doesn't handle this very well, so we forgot
9446 all of that.
9448 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9449 DI can be any number 0..60. */
9451 bool
9452 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9454 if (CONST_INT_P (op))
9456 if (TARGET_SHMEDIA)
9458 int size;
9460 /* Check if this the address of an unaligned load / store. */
9461 if (mode == VOIDmode)
9462 return CONST_OK_FOR_I06 (INTVAL (op));
9464 size = GET_MODE_SIZE (mode);
9465 return (!(INTVAL (op) & (size - 1))
9466 && INTVAL (op) >= -512 * size
9467 && INTVAL (op) < 512 * size);
9470 if (TARGET_SH2A)
9472 if (GET_MODE_SIZE (mode) == 1
9473 && (unsigned) INTVAL (op) < 4096)
9474 return true;
9477 if ((GET_MODE_SIZE (mode) == 4
9478 && (unsigned) INTVAL (op) < 64
9479 && !(INTVAL (op) & 3)
9480 && !(TARGET_SH2E && mode == SFmode))
9481 || (GET_MODE_SIZE (mode) == 4
9482 && (unsigned) INTVAL (op) < 16383
9483 && !(INTVAL (op) & 3) && TARGET_SH2A))
9484 return true;
9486 if ((GET_MODE_SIZE (mode) == 8
9487 && (unsigned) INTVAL (op) < 60
9488 && !(INTVAL (op) & 3)
9489 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9490 || ((GET_MODE_SIZE (mode)==8)
9491 && (unsigned) INTVAL (op) < 8192
9492 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9493 && (TARGET_SH2A && mode == DFmode)))
9494 return true;
9497 return false;
9500 /* Recognize an RTL expression that is a valid memory address for
9501 an instruction.
9502 The MODE argument is the machine mode for the MEM expression
9503 that wants to use this address.
9504 Allow REG
9505 REG+disp
9506 REG+r0
9507 REG++
9508 --REG */
9510 static bool
9511 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9513 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9514 return true;
9515 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9516 && ! TARGET_SHMEDIA
9517 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9518 return true;
9519 else if (GET_CODE (x) == PLUS
9520 && (mode != PSImode || reload_completed))
9522 rtx xop0 = XEXP (x, 0);
9523 rtx xop1 = XEXP (x, 1);
9525 if (GET_MODE_SIZE (mode) <= 8
9526 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9527 && sh_legitimate_index_p (mode, xop1))
9528 return true;
9530 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9531 || ((xop0 == stack_pointer_rtx
9532 || xop0 == hard_frame_pointer_rtx)
9533 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9534 || ((xop1 == stack_pointer_rtx
9535 || xop1 == hard_frame_pointer_rtx)
9536 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9537 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9538 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9539 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9540 && TARGET_FMOVD && mode == DFmode)))
9542 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9543 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9544 return true;
9545 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9546 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9547 return true;
9551 return false;
9554 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9555 isn't protected by a PIC unspec. */
9557 nonpic_symbol_mentioned_p (rtx x)
9559 register const char *fmt;
9560 register int i;
9562 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9563 || GET_CODE (x) == PC)
9564 return 1;
9566 /* We don't want to look into the possible MEM location of a
9567 CONST_DOUBLE, since we're not going to use it, in general. */
9568 if (GET_CODE (x) == CONST_DOUBLE)
9569 return 0;
9571 if (GET_CODE (x) == UNSPEC
9572 && (XINT (x, 1) == UNSPEC_PIC
9573 || XINT (x, 1) == UNSPEC_GOT
9574 || XINT (x, 1) == UNSPEC_GOTOFF
9575 || XINT (x, 1) == UNSPEC_GOTPLT
9576 || XINT (x, 1) == UNSPEC_GOTTPOFF
9577 || XINT (x, 1) == UNSPEC_DTPOFF
9578 || XINT (x, 1) == UNSPEC_TPOFF
9579 || XINT (x, 1) == UNSPEC_PLT
9580 || XINT (x, 1) == UNSPEC_SYMOFF
9581 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9582 return 0;
9584 fmt = GET_RTX_FORMAT (GET_CODE (x));
9585 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9587 if (fmt[i] == 'E')
9589 register int j;
9591 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9592 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9593 return 1;
9595 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9596 return 1;
9599 return 0;
9602 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9603 @GOTOFF in `reg'. */
9605 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9606 rtx reg)
9608 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9609 return orig;
9611 if (GET_CODE (orig) == LABEL_REF
9612 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9614 if (reg == 0)
9615 reg = gen_reg_rtx (Pmode);
9617 emit_insn (gen_symGOTOFF2reg (reg, orig));
9618 return reg;
9620 else if (GET_CODE (orig) == SYMBOL_REF)
9622 if (reg == 0)
9623 reg = gen_reg_rtx (Pmode);
9625 emit_insn (gen_symGOT2reg (reg, orig));
9626 return reg;
9628 return orig;
9631 /* Try machine-dependent ways of modifying an illegitimate address
9632 to be legitimate. If we find one, return the new, valid address.
9633 Otherwise, return X.
9635 For the SH, if X is almost suitable for indexing, but the offset is
9636 out of range, convert it into a normal form so that CSE has a chance
9637 of reducing the number of address registers used. */
9639 static rtx
9640 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9642 if (flag_pic)
9643 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9645 if (GET_CODE (x) == PLUS
9646 && (GET_MODE_SIZE (mode) == 4
9647 || GET_MODE_SIZE (mode) == 8)
9648 && CONST_INT_P (XEXP (x, 1))
9649 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9650 && ! TARGET_SHMEDIA
9651 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9652 && ! (TARGET_SH2E && mode == SFmode))
9654 rtx index_rtx = XEXP (x, 1);
9655 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9656 rtx sum;
9658 /* On rare occasions, we might get an unaligned pointer
9659 that is indexed in a way to give an aligned address.
9660 Therefore, keep the lower two bits in offset_base. */
9661 /* Instead of offset_base 128..131 use 124..127, so that
9662 simple add suffices. */
9663 if (offset > 127)
9664 offset_base = ((offset + 4) & ~60) - 4;
9665 else
9666 offset_base = offset & ~60;
9668 /* Sometimes the normal form does not suit DImode. We
9669 could avoid that by using smaller ranges, but that
9670 would give less optimized code when SImode is
9671 prevalent. */
9672 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9674 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9675 GEN_INT (offset_base), NULL_RTX, 0,
9676 OPTAB_LIB_WIDEN);
9678 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9682 return x;
9685 /* Attempt to replace *P, which is an address that needs reloading, with
9686 a valid memory address for an operand of mode MODE.
9687 Like for sh_legitimize_address, for the SH we try to get a normal form
9688 of the address. That will allow inheritance of the address reloads. */
9690 bool
9691 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9692 int itype)
9694 enum reload_type type = (enum reload_type) itype;
9696 if (GET_CODE (*p) == PLUS
9697 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9698 && CONST_INT_P (XEXP (*p, 1))
9699 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9700 && ! TARGET_SHMEDIA
9701 && ! (TARGET_SH4 && mode == DFmode)
9702 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9703 && (ALLOW_INDEXED_ADDRESS
9704 || XEXP (*p, 0) == stack_pointer_rtx
9705 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9707 rtx index_rtx = XEXP (*p, 1);
9708 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9709 rtx sum;
9711 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9713 push_reload (*p, NULL_RTX, p, NULL,
9714 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9715 goto win;
9717 if (TARGET_SH2E && mode == SFmode)
9719 *p = copy_rtx (*p);
9720 push_reload (*p, NULL_RTX, p, NULL,
9721 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9722 goto win;
9724 /* Instead of offset_base 128..131 use 124..127, so that
9725 simple add suffices. */
9726 if (offset > 127)
9727 offset_base = ((offset + 4) & ~60) - 4;
9728 else
9729 offset_base = offset & ~60;
9730 /* Sometimes the normal form does not suit DImode. We could avoid
9731 that by using smaller ranges, but that would give less optimized
9732 code when SImode is prevalent. */
9733 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9735 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9736 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9737 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9738 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9739 goto win;
9742 /* We must re-recognize what we created before. */
9743 else if (GET_CODE (*p) == PLUS
9744 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9745 && GET_CODE (XEXP (*p, 0)) == PLUS
9746 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9747 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9748 && CONST_INT_P (XEXP (*p, 1))
9749 && ! TARGET_SHMEDIA
9750 && ! (TARGET_SH2E && mode == SFmode))
9752 /* Because this address is so complex, we know it must have
9753 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9754 it is already unshared, and needs no further unsharing. */
9755 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9756 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9757 goto win;
9760 return false;
9762 win:
9763 return true;
9766 /* Mark the use of a constant in the literal table. If the constant
9767 has multiple labels, make it unique. */
9768 static rtx
9769 mark_constant_pool_use (rtx x)
9771 rtx insn, lab, pattern;
9773 if (x == NULL)
9774 return x;
9776 switch (GET_CODE (x))
9778 case LABEL_REF:
9779 x = XEXP (x, 0);
9780 case CODE_LABEL:
9781 break;
9782 default:
9783 return x;
9786 /* Get the first label in the list of labels for the same constant
9787 and delete another labels in the list. */
9788 lab = x;
9789 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9791 if (!LABEL_P (insn)
9792 || LABEL_REFS (insn) != NEXT_INSN (insn))
9793 break;
9794 lab = insn;
9797 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9798 INSN_DELETED_P (insn) = 1;
9800 /* Mark constants in a window. */
9801 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9803 if (!NONJUMP_INSN_P (insn))
9804 continue;
9806 pattern = PATTERN (insn);
9807 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9808 continue;
9810 switch (XINT (pattern, 1))
9812 case UNSPECV_CONST2:
9813 case UNSPECV_CONST4:
9814 case UNSPECV_CONST8:
9815 XVECEXP (pattern, 0, 1) = const1_rtx;
9816 break;
9817 case UNSPECV_WINDOW_END:
9818 if (XVECEXP (pattern, 0, 0) == x)
9819 return lab;
9820 break;
9821 case UNSPECV_CONST_END:
9822 return lab;
9823 default:
9824 break;
9828 return lab;
9831 /* Return true if it's possible to redirect BRANCH1 to the destination
9832 of an unconditional jump BRANCH2. We only want to do this if the
9833 resulting branch will have a short displacement. */
9835 sh_can_redirect_branch (rtx branch1, rtx branch2)
9837 if (flag_expensive_optimizations && simplejump_p (branch2))
9839 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9840 rtx insn;
9841 int distance;
9843 for (distance = 0, insn = NEXT_INSN (branch1);
9844 insn && distance < 256;
9845 insn = PREV_INSN (insn))
9847 if (insn == dest)
9848 return 1;
9849 else
9850 distance += get_attr_length (insn);
9852 for (distance = 0, insn = NEXT_INSN (branch1);
9853 insn && distance < 256;
9854 insn = NEXT_INSN (insn))
9856 if (insn == dest)
9857 return 1;
9858 else
9859 distance += get_attr_length (insn);
9862 return 0;
9865 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9867 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9868 unsigned int new_reg)
9870 /* Interrupt functions can only use registers that have already been
9871 saved by the prologue, even if they would normally be
9872 call-clobbered. */
9874 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9875 return 0;
9877 return 1;
9880 /* Function to update the integer COST
9881 based on the relationship between INSN that is dependent on
9882 DEP_INSN through the dependence LINK. The default is to make no
9883 adjustment to COST. This can be used for example to specify to
9884 the scheduler that an output- or anti-dependence does not incur
9885 the same cost as a data-dependence. The return value should be
9886 the new value for COST. */
9887 static int
9888 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9890 rtx reg, use_pat;
9892 if (TARGET_SHMEDIA)
9894 /* On SHmedia, if the dependence is an anti-dependence or
9895 output-dependence, there is no cost. */
9896 if (REG_NOTE_KIND (link) != 0)
9898 /* However, dependencies between target register loads and
9899 uses of the register in a subsequent block that are separated
9900 by a conditional branch are not modelled - we have to do with
9901 the anti-dependency between the target register load and the
9902 conditional branch that ends the current block. */
9903 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9904 && GET_CODE (PATTERN (dep_insn)) == SET
9905 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9906 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9907 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9909 int orig_cost = cost;
9910 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9911 rtx target = ((! note
9912 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9913 ? insn : JUMP_LABEL (insn));
9914 /* On the likely path, the branch costs 1, on the unlikely path,
9915 it costs 3. */
9916 cost--;
9918 target = next_active_insn (target);
9919 while (target && ! flow_dependent_p (target, dep_insn)
9920 && --cost > 0);
9921 /* If two branches are executed in immediate succession, with the
9922 first branch properly predicted, this causes a stall at the
9923 second branch, hence we won't need the target for the
9924 second branch for two cycles after the launch of the first
9925 branch. */
9926 if (cost > orig_cost - 2)
9927 cost = orig_cost - 2;
9929 else
9930 cost = 0;
9933 else if (get_attr_is_mac_media (insn)
9934 && get_attr_is_mac_media (dep_insn))
9935 cost = 1;
9937 else if (! reload_completed
9938 && GET_CODE (PATTERN (insn)) == SET
9939 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9940 && GET_CODE (PATTERN (dep_insn)) == SET
9941 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9942 && cost < 4)
9943 cost = 4;
9944 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9945 that is needed at the target. */
9946 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9947 && ! flow_dependent_p (insn, dep_insn))
9948 cost--;
9950 else if (REG_NOTE_KIND (link) == 0)
9952 enum attr_type type;
9953 rtx dep_set;
9955 if (recog_memoized (insn) < 0
9956 || recog_memoized (dep_insn) < 0)
9957 return cost;
9959 dep_set = single_set (dep_insn);
9961 /* The latency that we specify in the scheduling description refers
9962 to the actual output, not to an auto-increment register; for that,
9963 the latency is one. */
9964 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9966 rtx set = single_set (insn);
9968 if (set
9969 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9970 && (!MEM_P (SET_DEST (set))
9971 || !reg_mentioned_p (SET_DEST (dep_set),
9972 XEXP (SET_DEST (set), 0))))
9973 cost = 1;
9975 /* The only input for a call that is timing-critical is the
9976 function's address. */
9977 if (CALL_P (insn))
9979 rtx call = PATTERN (insn);
9981 if (GET_CODE (call) == PARALLEL)
9982 call = XVECEXP (call, 0 ,0);
9983 if (GET_CODE (call) == SET)
9984 call = SET_SRC (call);
9985 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9986 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9987 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9988 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9989 cost -= TARGET_SH4_300 ? 3 : 6;
9991 /* Likewise, the most timing critical input for an sfuncs call
9992 is the function address. However, sfuncs typically start
9993 using their arguments pretty quickly.
9994 Assume a four cycle delay for SH4 before they are needed.
9995 Cached ST40-300 calls are quicker, so assume only a one
9996 cycle delay there.
9997 ??? Maybe we should encode the delays till input registers
9998 are needed by sfuncs into the sfunc call insn. */
9999 /* All sfunc calls are parallels with at least four components.
10000 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10001 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10002 && XVECLEN (PATTERN (insn), 0) >= 4
10003 && (reg = sfunc_uses_reg (insn)))
10005 if (! reg_set_p (reg, dep_insn))
10006 cost -= TARGET_SH4_300 ? 1 : 4;
10008 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10010 enum attr_type dep_type = get_attr_type (dep_insn);
10012 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10013 cost--;
10014 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10015 && (type = get_attr_type (insn)) != TYPE_CALL
10016 && type != TYPE_SFUNC)
10017 cost--;
10018 /* When the preceding instruction loads the shift amount of
10019 the following SHAD/SHLD, the latency of the load is increased
10020 by 1 cycle. */
10021 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10022 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10023 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10024 XEXP (SET_SRC (single_set (insn)),
10025 1)))
10026 cost++;
10027 /* When an LS group instruction with a latency of less than
10028 3 cycles is followed by a double-precision floating-point
10029 instruction, FIPR, or FTRV, the latency of the first
10030 instruction is increased to 3 cycles. */
10031 else if (cost < 3
10032 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10033 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10034 cost = 3;
10035 /* The lsw register of a double-precision computation is ready one
10036 cycle earlier. */
10037 else if (reload_completed
10038 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10039 && (use_pat = single_set (insn))
10040 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10041 SET_SRC (use_pat)))
10042 cost -= 1;
10044 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10045 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10046 cost -= 1;
10048 else if (TARGET_SH4_300)
10050 /* Stores need their input register two cycles later. */
10051 if (dep_set && cost >= 1
10052 && ((type = get_attr_type (insn)) == TYPE_STORE
10053 || type == TYPE_PSTORE
10054 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10056 rtx set = single_set (insn);
10058 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10059 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10061 cost -= 2;
10062 /* But don't reduce the cost below 1 if the address depends
10063 on a side effect of dep_insn. */
10064 if (cost < 1
10065 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10066 cost = 1;
10071 /* An anti-dependence penalty of two applies if the first insn is a double
10072 precision fadd / fsub / fmul. */
10073 else if (!TARGET_SH4_300
10074 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10075 && recog_memoized (dep_insn) >= 0
10076 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10077 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10078 /* A lot of alleged anti-flow dependences are fake,
10079 so check this one is real. */
10080 && flow_dependent_p (dep_insn, insn))
10081 cost = 2;
10083 return cost;
10086 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10087 if DEP_INSN is anti-flow dependent on INSN. */
10088 static int
10089 flow_dependent_p (rtx insn, rtx dep_insn)
10091 rtx tmp = PATTERN (insn);
10093 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10094 return tmp == NULL_RTX;
10097 /* A helper function for flow_dependent_p called through note_stores. */
10098 static void
10099 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10101 rtx * pinsn = (rtx *) data;
10103 if (*pinsn && reg_referenced_p (x, *pinsn))
10104 *pinsn = NULL_RTX;
10107 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10108 'special function' patterns (type sfunc) that clobber pr, but that
10109 do not look like function calls to leaf_function_p. Hence we must
10110 do this extra check. */
10111 static int
10112 sh_pr_n_sets (void)
10114 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10117 /* Return where to allocate pseudo for a given hard register initial
10118 value. */
10119 static rtx
10120 sh_allocate_initial_value (rtx hard_reg)
10122 rtx x;
10124 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10126 if (current_function_is_leaf
10127 && ! sh_pr_n_sets ()
10128 && ! (TARGET_SHCOMPACT
10129 && ((crtl->args.info.call_cookie
10130 & ~ CALL_COOKIE_RET_TRAMP (1))
10131 || crtl->saves_all_registers)))
10132 x = hard_reg;
10133 else
10134 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10136 else
10137 x = NULL_RTX;
10139 return x;
10142 /* This function returns "2" to indicate dual issue for the SH4
10143 processor. To be used by the DFA pipeline description. */
10144 static int
10145 sh_issue_rate (void)
10147 if (TARGET_SUPERSCALAR)
10148 return 2;
10149 else
10150 return 1;
10153 /* Functions for ready queue reordering for sched1. */
10155 /* Get weight for mode for a set x. */
10156 static short
10157 find_set_regmode_weight (rtx x, enum machine_mode mode)
10159 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10160 return 1;
10161 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10163 if (REG_P (SET_DEST (x)))
10165 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10166 return 1;
10167 else
10168 return 0;
10170 return 1;
10172 return 0;
10175 /* Get regmode weight for insn. */
10176 static short
10177 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10179 short reg_weight = 0;
10180 rtx x;
10182 /* Increment weight for each register born here. */
10183 x = PATTERN (insn);
10184 reg_weight += find_set_regmode_weight (x, mode);
10185 if (GET_CODE (x) == PARALLEL)
10187 int j;
10188 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10190 x = XVECEXP (PATTERN (insn), 0, j);
10191 reg_weight += find_set_regmode_weight (x, mode);
10194 /* Decrement weight for each register that dies here. */
10195 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10197 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10199 rtx note = XEXP (x, 0);
10200 if (REG_P (note) && GET_MODE (note) == mode)
10201 reg_weight--;
10204 return reg_weight;
10207 /* Calculate regmode weights for all insns of a basic block. */
10208 static void
10209 find_regmode_weight (basic_block b, enum machine_mode mode)
10211 rtx insn, next_tail, head, tail;
10213 get_ebb_head_tail (b, b, &head, &tail);
10214 next_tail = NEXT_INSN (tail);
10216 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10218 /* Handle register life information. */
10219 if (!INSN_P (insn))
10220 continue;
10222 if (mode == SFmode)
10223 INSN_REGMODE_WEIGHT (insn, mode) =
10224 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10225 else if (mode == SImode)
10226 INSN_REGMODE_WEIGHT (insn, mode) =
10227 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10231 /* Comparison function for ready queue sorting. */
10232 static int
10233 rank_for_reorder (const void *x, const void *y)
10235 rtx tmp = *(const rtx *) y;
10236 rtx tmp2 = *(const rtx *) x;
10238 /* The insn in a schedule group should be issued the first. */
10239 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10240 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10242 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10243 minimizes instruction movement, thus minimizing sched's effect on
10244 register pressure. */
10245 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10248 /* Resort the array A in which only element at index N may be out of order. */
10249 static void
10250 swap_reorder (rtx *a, int n)
10252 rtx insn = a[n - 1];
10253 int i = n - 2;
10255 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10257 a[i + 1] = a[i];
10258 i -= 1;
10260 a[i + 1] = insn;
10263 #define SCHED_REORDER(READY, N_READY) \
10264 do \
10266 if ((N_READY) == 2) \
10267 swap_reorder (READY, N_READY); \
10268 else if ((N_READY) > 2) \
10269 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10271 while (0)
10273 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10274 macro. */
10275 static void
10276 ready_reorder (rtx *ready, int nready)
10278 SCHED_REORDER (ready, nready);
10281 /* Count life regions of r0 for a block. */
10282 static int
10283 find_r0_life_regions (basic_block b)
10285 rtx end, insn;
10286 rtx pset;
10287 rtx r0_reg;
10288 int live;
10289 int set;
10290 int death = 0;
10292 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10294 set = 1;
10295 live = 1;
10297 else
10299 set = 0;
10300 live = 0;
10303 insn = BB_HEAD (b);
10304 end = BB_END (b);
10305 r0_reg = gen_rtx_REG (SImode, R0_REG);
10306 while (1)
10308 if (INSN_P (insn))
10310 if (find_regno_note (insn, REG_DEAD, R0_REG))
10312 death++;
10313 live = 0;
10315 if (!live
10316 && (pset = single_set (insn))
10317 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10318 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10320 set++;
10321 live = 1;
10324 if (insn == end)
10325 break;
10326 insn = NEXT_INSN (insn);
10328 return set - death;
10331 /* Calculate regmode weights for all insns of all basic block. */
10332 static void
10333 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10334 int verbose ATTRIBUTE_UNUSED,
10335 int old_max_uid)
10337 basic_block b;
10339 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10340 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10341 r0_life_regions = 0;
10343 FOR_EACH_BB_REVERSE (b)
10345 find_regmode_weight (b, SImode);
10346 find_regmode_weight (b, SFmode);
10347 if (!reload_completed)
10348 r0_life_regions += find_r0_life_regions (b);
10351 CURR_REGMODE_PRESSURE (SImode) = 0;
10352 CURR_REGMODE_PRESSURE (SFmode) = 0;
10356 /* Cleanup. */
10357 static void
10358 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10359 int verbose ATTRIBUTE_UNUSED)
10361 if (regmode_weight[0])
10363 free (regmode_weight[0]);
10364 regmode_weight[0] = NULL;
10366 if (regmode_weight[1])
10368 free (regmode_weight[1]);
10369 regmode_weight[1] = NULL;
10373 /* The scalar modes supported differs from the default version in TImode
10374 for 32-bit SHMEDIA. */
10375 static bool
10376 sh_scalar_mode_supported_p (enum machine_mode mode)
10378 if (TARGET_SHMEDIA32 && mode == TImode)
10379 return false;
10381 return default_scalar_mode_supported_p (mode);
10384 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10385 keep count of register pressures on SImode and SFmode. */
10386 static int
10387 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10388 int sched_verbose ATTRIBUTE_UNUSED,
10389 rtx insn,
10390 int can_issue_more)
10392 if (GET_CODE (PATTERN (insn)) != USE
10393 && GET_CODE (PATTERN (insn)) != CLOBBER)
10394 cached_can_issue_more = can_issue_more - 1;
10395 else
10396 cached_can_issue_more = can_issue_more;
10398 if (reload_completed)
10399 return cached_can_issue_more;
10401 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10402 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10404 return cached_can_issue_more;
10407 static void
10408 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10409 int verbose ATTRIBUTE_UNUSED,
10410 int veclen ATTRIBUTE_UNUSED)
10412 CURR_REGMODE_PRESSURE (SImode) = 0;
10413 CURR_REGMODE_PRESSURE (SFmode) = 0;
10416 /* Some magic numbers. */
10417 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10418 functions that already have high pressure on r0. */
10419 #define R0_MAX_LIFE_REGIONS 2
10420 /* Register Pressure thresholds for SImode and SFmode registers. */
10421 #define SIMODE_MAX_WEIGHT 5
10422 #define SFMODE_MAX_WEIGHT 10
10424 /* Return true if the pressure is high for MODE. */
10425 static short
10426 high_pressure (enum machine_mode mode)
10428 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10429 functions that already have high pressure on r0. */
10430 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10431 return 1;
10433 if (mode == SFmode)
10434 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10435 else
10436 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10439 /* Reorder ready queue if register pressure is high. */
10440 static int
10441 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10442 int sched_verbose ATTRIBUTE_UNUSED,
10443 rtx *ready,
10444 int *n_readyp,
10445 int clock_var ATTRIBUTE_UNUSED)
10447 if (reload_completed)
10448 return sh_issue_rate ();
10450 if (high_pressure (SFmode) || high_pressure (SImode))
10452 ready_reorder (ready, *n_readyp);
10455 return sh_issue_rate ();
10458 /* Skip cycles if the current register pressure is high. */
10459 static int
10460 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10461 int sched_verbose ATTRIBUTE_UNUSED,
10462 rtx *ready ATTRIBUTE_UNUSED,
10463 int *n_readyp ATTRIBUTE_UNUSED,
10464 int clock_var ATTRIBUTE_UNUSED)
10466 if (reload_completed)
10467 return cached_can_issue_more;
10469 if (high_pressure(SFmode) || high_pressure (SImode))
10470 skip_cycles = 1;
10472 return cached_can_issue_more;
10475 /* Skip cycles without sorting the ready queue. This will move insn from
10476 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10477 queue by sh_reorder. */
10479 /* Generally, skipping these many cycles are sufficient for all insns to move
10480 from Q -> R. */
10481 #define MAX_SKIPS 8
10483 static int
10484 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10485 int sched_verbose ATTRIBUTE_UNUSED,
10486 rtx insn ATTRIBUTE_UNUSED,
10487 int last_clock_var,
10488 int clock_var,
10489 int *sort_p)
10491 if (reload_completed)
10492 return 0;
10494 if (skip_cycles)
10496 if ((clock_var - last_clock_var) < MAX_SKIPS)
10498 *sort_p = 0;
10499 return 1;
10501 /* If this is the last cycle we are skipping, allow reordering of R. */
10502 if ((clock_var - last_clock_var) == MAX_SKIPS)
10504 *sort_p = 1;
10505 return 1;
10509 skip_cycles = 0;
10511 return 0;
10514 /* SHmedia requires registers for branches, so we can't generate new
10515 branches past reload. */
10516 static bool
10517 sh_cannot_modify_jumps_p (void)
10519 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10522 static reg_class_t
10523 sh_target_reg_class (void)
10525 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10528 static bool
10529 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10531 HARD_REG_SET dummy;
10532 #if 0
10533 rtx insn;
10534 #endif
10536 if (! shmedia_space_reserved_for_target_registers)
10537 return 0;
10538 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10539 return 0;
10540 if (calc_live_regs (&dummy) >= 6 * 8)
10541 return 1;
10542 return 0;
10545 static bool
10546 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10548 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10552 On the SH1..SH4, the trampoline looks like
10553 2 0002 D202 mov.l l2,r2
10554 1 0000 D301 mov.l l1,r3
10555 3 0004 422B jmp @r2
10556 4 0006 0009 nop
10557 5 0008 00000000 l1: .long area
10558 6 000c 00000000 l2: .long function
10560 SH5 (compact) uses r1 instead of r3 for the static chain. */
10563 /* Emit RTL insns to initialize the variable parts of a trampoline.
10564 FNADDR is an RTX for the address of the function's pure code.
10565 CXT is an RTX for the static chain value for the function. */
10567 static void
10568 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10570 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10571 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10573 if (TARGET_SHMEDIA64)
10575 rtx tramp_templ;
10576 int fixed_len;
10578 rtx movi1 = GEN_INT (0xcc000010);
10579 rtx shori1 = GEN_INT (0xc8000010);
10580 rtx src, dst;
10582 /* The following trampoline works within a +- 128 KB range for cxt:
10583 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10584 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10585 gettr tr1,r1; blink tr0,r63 */
10586 /* Address rounding makes it hard to compute the exact bounds of the
10587 offset for this trampoline, but we have a rather generous offset
10588 range, so frame_offset should do fine as an upper bound. */
10589 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10591 /* ??? could optimize this trampoline initialization
10592 by writing DImode words with two insns each. */
10593 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10594 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10595 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10596 insn = gen_rtx_AND (DImode, insn, mask);
10597 /* Or in ptb/u .,tr1 pattern */
10598 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10599 insn = force_operand (insn, NULL_RTX);
10600 insn = gen_lowpart (SImode, insn);
10601 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10602 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10603 insn = gen_rtx_AND (DImode, insn, mask);
10604 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10605 insn = gen_lowpart (SImode, insn);
10606 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10607 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10608 insn = gen_rtx_AND (DImode, insn, mask);
10609 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10610 insn = gen_lowpart (SImode, insn);
10611 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10612 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10613 insn = gen_rtx_AND (DImode, insn, mask);
10614 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10615 insn = gen_lowpart (SImode, insn);
10616 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10617 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10618 insn = gen_rtx_AND (DImode, insn, mask);
10619 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10620 insn = gen_lowpart (SImode, insn);
10621 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10622 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10623 GEN_INT (0x6bf10600));
10624 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10625 GEN_INT (0x4415fc10));
10626 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10627 GEN_INT (0x4401fff0));
10628 emit_insn (gen_ic_invalidate_line (tramp));
10629 return;
10631 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10632 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10634 tramp_templ = gen_datalabel_ref (tramp_templ);
10635 dst = tramp_mem;
10636 src = gen_const_mem (BLKmode, tramp_templ);
10637 set_mem_align (dst, 256);
10638 set_mem_align (src, 64);
10639 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10641 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10642 emit_move_insn (adjust_address (tramp_mem, Pmode,
10643 fixed_len + GET_MODE_SIZE (Pmode)),
10644 cxt);
10645 emit_insn (gen_ic_invalidate_line (tramp));
10646 return;
10648 else if (TARGET_SHMEDIA)
10650 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10651 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10652 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10653 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10654 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10655 rotated 10 right, and higher 16 bit of every 32 selected. */
10656 rtx movishori
10657 = force_reg (V2HImode, (simplify_gen_subreg
10658 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10659 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10660 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10662 fnaddr = force_reg (SImode, fnaddr);
10663 cxt = force_reg (SImode, cxt);
10664 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10665 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10666 movishori));
10667 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10668 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10669 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10670 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10671 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10672 gen_rtx_SUBREG (V2HImode, cxt, 0),
10673 movishori));
10674 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10675 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10676 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10677 if (TARGET_LITTLE_ENDIAN)
10679 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10680 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10682 else
10684 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10685 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10687 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10688 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10689 emit_insn (gen_ic_invalidate_line (tramp));
10690 return;
10692 else if (TARGET_SHCOMPACT)
10694 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10695 return;
10697 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10698 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10699 SImode));
10700 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10701 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10702 SImode));
10703 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10704 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10705 if (TARGET_HARVARD)
10707 if (!TARGET_INLINE_IC_INVALIDATE
10708 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10709 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10710 FUNCTION_ORDINARY),
10711 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10712 else
10713 emit_insn (gen_ic_invalidate_line (tramp));
10717 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10719 static rtx
10720 sh_trampoline_adjust_address (rtx tramp)
10722 if (TARGET_SHMEDIA)
10723 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10724 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10725 return tramp;
10728 /* FIXME: This is overly conservative. A SHcompact function that
10729 receives arguments ``by reference'' will have them stored in its
10730 own stack frame, so it must not pass pointers or references to
10731 these arguments to other functions by means of sibling calls. */
10732 /* If PIC, we cannot make sibling calls to global functions
10733 because the PLT requires r12 to be live. */
10734 static bool
10735 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10737 return (1
10738 && (! TARGET_SHCOMPACT
10739 || crtl->args.info.stack_regs == 0)
10740 && ! sh_cfun_interrupt_handler_p ()
10741 && (! flag_pic
10742 || (decl && ! TREE_PUBLIC (decl))
10743 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10746 /* Machine specific built-in functions. */
10748 struct builtin_description
10750 const enum insn_code icode;
10751 const char *const name;
10752 int signature;
10753 tree fndecl;
10756 /* describe number and signedness of arguments; arg[0] == result
10757 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10758 /* 9: 64-bit pointer, 10: 32-bit pointer */
10759 static const char signature_args[][4] =
10761 #define SH_BLTIN_V2SI2 0
10762 { 4, 4 },
10763 #define SH_BLTIN_V4HI2 1
10764 { 4, 4 },
10765 #define SH_BLTIN_V2SI3 2
10766 { 4, 4, 4 },
10767 #define SH_BLTIN_V4HI3 3
10768 { 4, 4, 4 },
10769 #define SH_BLTIN_V8QI3 4
10770 { 4, 4, 4 },
10771 #define SH_BLTIN_MAC_HISI 5
10772 { 1, 4, 4, 1 },
10773 #define SH_BLTIN_SH_HI 6
10774 { 4, 4, 1 },
10775 #define SH_BLTIN_SH_SI 7
10776 { 4, 4, 1 },
10777 #define SH_BLTIN_V4HI2V2SI 8
10778 { 4, 4, 4 },
10779 #define SH_BLTIN_V4HI2V8QI 9
10780 { 4, 4, 4 },
10781 #define SH_BLTIN_SISF 10
10782 { 4, 2 },
10783 #define SH_BLTIN_LDUA_L 11
10784 { 2, 10 },
10785 #define SH_BLTIN_LDUA_Q 12
10786 { 1, 10 },
10787 #define SH_BLTIN_STUA_L 13
10788 { 0, 10, 2 },
10789 #define SH_BLTIN_STUA_Q 14
10790 { 0, 10, 1 },
10791 #define SH_BLTIN_LDUA_L64 15
10792 { 2, 9 },
10793 #define SH_BLTIN_LDUA_Q64 16
10794 { 1, 9 },
10795 #define SH_BLTIN_STUA_L64 17
10796 { 0, 9, 2 },
10797 #define SH_BLTIN_STUA_Q64 18
10798 { 0, 9, 1 },
10799 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10800 #define SH_BLTIN_2 19
10801 #define SH_BLTIN_SU 19
10802 { 1, 2 },
10803 #define SH_BLTIN_3 20
10804 #define SH_BLTIN_SUS 20
10805 { 2, 2, 1 },
10806 #define SH_BLTIN_PSSV 21
10807 { 0, 8, 2, 2 },
10808 #define SH_BLTIN_XXUU 22
10809 #define SH_BLTIN_UUUU 22
10810 { 1, 1, 1, 1 },
10811 #define SH_BLTIN_PV 23
10812 { 0, 8 },
10814 /* mcmv: operands considered unsigned. */
10815 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10816 /* mperm: control value considered unsigned int. */
10817 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10818 /* mshards_q: returns signed short. */
10819 /* nsb: takes long long arg, returns unsigned char. */
10820 static struct builtin_description bdesc[] =
10822 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10823 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10824 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10825 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10826 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10827 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10828 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10829 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10830 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10831 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10832 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10833 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10834 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10835 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10836 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10837 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10838 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10839 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10840 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10841 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10842 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10843 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10844 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10845 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10846 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10847 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10848 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10849 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10850 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10851 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10852 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10853 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10854 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10855 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10856 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10857 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10858 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10859 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10860 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10861 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10862 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10863 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10864 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10865 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10866 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10867 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10868 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10869 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10870 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10871 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10872 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10873 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10874 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10875 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10876 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10877 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10878 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10879 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10880 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10881 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10882 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10883 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10884 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10885 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10886 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10887 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10888 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10889 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10890 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10891 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10892 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10893 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10894 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10895 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10896 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10897 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10898 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10899 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10900 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10901 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10902 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10903 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10904 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10905 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10908 static void
10909 sh_media_init_builtins (void)
10911 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10912 struct builtin_description *d;
10914 memset (shared, 0, sizeof shared);
10915 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10917 tree type, arg_type = 0;
10918 int signature = d->signature;
10919 int i;
10921 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10922 type = shared[signature];
10923 else
10925 int has_result = signature_args[signature][0] != 0;
10927 if ((signature_args[signature][1] & 8)
10928 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10929 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10930 continue;
10931 if (! TARGET_FPU_ANY
10932 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10933 continue;
10934 type = void_list_node;
10935 for (i = 3; ; i--)
10937 int arg = signature_args[signature][i];
10938 int opno = i - 1 + has_result;
10940 if (arg & 8)
10941 arg_type = ptr_type_node;
10942 else if (arg)
10943 arg_type = (*lang_hooks.types.type_for_mode)
10944 (insn_data[d->icode].operand[opno].mode,
10945 (arg & 1));
10946 else if (i)
10947 continue;
10948 else
10949 arg_type = void_type_node;
10950 if (i == 0)
10951 break;
10952 type = tree_cons (NULL_TREE, arg_type, type);
10954 type = build_function_type (arg_type, type);
10955 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10956 shared[signature] = type;
10958 d->fndecl =
10959 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10960 NULL, NULL_TREE);
10964 /* Returns the shmedia builtin decl for CODE. */
10966 static tree
10967 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10969 if (code >= ARRAY_SIZE (bdesc))
10970 return error_mark_node;
10972 return bdesc[code].fndecl;
10975 /* Implements target hook vector_mode_supported_p. */
10976 bool
10977 sh_vector_mode_supported_p (enum machine_mode mode)
10979 if (TARGET_FPU_ANY
10980 && ((mode == V2SFmode)
10981 || (mode == V4SFmode)
10982 || (mode == V16SFmode)))
10983 return true;
10985 else if (TARGET_SHMEDIA
10986 && ((mode == V8QImode)
10987 || (mode == V2HImode)
10988 || (mode == V4HImode)
10989 || (mode == V2SImode)))
10990 return true;
10992 return false;
10995 bool
10996 sh_frame_pointer_required (void)
10998 /* If needed override this in other tm.h files to cope with various OS
10999 lossage requiring a frame pointer. */
11000 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11001 return true;
11003 if (crtl->profile)
11004 return true;
11006 return false;
11009 /* Implements target hook dwarf_calling_convention. Return an enum
11010 of dwarf_calling_convention. */
11012 sh_dwarf_calling_convention (const_tree func)
11014 if (sh_attr_renesas_p (func))
11015 return DW_CC_GNU_renesas_sh;
11017 return DW_CC_normal;
11020 static void
11021 sh_init_builtins (void)
11023 if (TARGET_SHMEDIA)
11024 sh_media_init_builtins ();
11027 /* Returns the sh builtin decl for CODE. */
11029 static tree
11030 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11032 if (TARGET_SHMEDIA)
11033 return sh_media_builtin_decl (code, initialize_p);
11035 return error_mark_node;
11038 /* Expand an expression EXP that calls a built-in function,
11039 with result going to TARGET if that's convenient
11040 (and in mode MODE if that's convenient).
11041 SUBTARGET may be used as the target for computing one of EXP's operands.
11042 IGNORE is nonzero if the value is to be ignored. */
11044 static rtx
11045 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11046 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11048 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11049 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11050 const struct builtin_description *d = &bdesc[fcode];
11051 enum insn_code icode = d->icode;
11052 int signature = d->signature;
11053 enum machine_mode tmode = VOIDmode;
11054 int nop = 0, i;
11055 rtx op[4];
11056 rtx pat = 0;
11058 if (signature_args[signature][0])
11060 if (ignore)
11061 return 0;
11063 tmode = insn_data[icode].operand[0].mode;
11064 if (! target
11065 || GET_MODE (target) != tmode
11066 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11067 target = gen_reg_rtx (tmode);
11068 op[nop++] = target;
11070 else
11071 target = 0;
11073 for (i = 1; i <= 3; i++, nop++)
11075 tree arg;
11076 enum machine_mode opmode, argmode;
11077 tree optype;
11079 if (! signature_args[signature][i])
11080 break;
11081 arg = CALL_EXPR_ARG (exp, i - 1);
11082 if (arg == error_mark_node)
11083 return const0_rtx;
11084 if (signature_args[signature][i] & 8)
11086 opmode = ptr_mode;
11087 optype = ptr_type_node;
11089 else
11091 opmode = insn_data[icode].operand[nop].mode;
11092 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11094 argmode = TYPE_MODE (TREE_TYPE (arg));
11095 if (argmode != opmode)
11096 arg = build1 (NOP_EXPR, optype, arg);
11097 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11098 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11099 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11102 switch (nop)
11104 case 1:
11105 pat = (*insn_data[d->icode].genfun) (op[0]);
11106 break;
11107 case 2:
11108 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11109 break;
11110 case 3:
11111 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11112 break;
11113 case 4:
11114 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11115 break;
11116 default:
11117 gcc_unreachable ();
11119 if (! pat)
11120 return 0;
11121 emit_insn (pat);
11122 return target;
11125 void
11126 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11128 rtx sel0 = const0_rtx;
11129 rtx sel1 = const1_rtx;
11130 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11131 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11133 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11134 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11137 void
11138 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11140 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11142 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11143 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11146 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11147 We can allow any mode in any general register. The special registers
11148 only allow SImode. Don't allow any mode in the PR.
11150 We cannot hold DCmode values in the XD registers because alter_reg
11151 handles subregs of them incorrectly. We could work around this by
11152 spacing the XD registers like the DR registers, but this would require
11153 additional memory in every compilation to hold larger register vectors.
11154 We could hold SFmode / SCmode values in XD registers, but that
11155 would require a tertiary reload when reloading from / to memory,
11156 and a secondary reload to reload from / to general regs; that
11157 seems to be a loosing proposition.
11159 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11160 it won't be ferried through GP registers first. */
11162 bool
11163 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11165 if (SPECIAL_REGISTER_P (regno))
11166 return mode == SImode;
11168 if (regno == FPUL_REG)
11169 return (mode == SImode || mode == SFmode);
11171 if (FP_REGISTER_P (regno) && mode == SFmode)
11172 return true;
11174 if (mode == V2SFmode)
11176 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11177 || GENERAL_REGISTER_P (regno)))
11178 return true;
11179 else
11180 return false;
11183 if (mode == V4SFmode)
11185 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11186 || GENERAL_REGISTER_P (regno))
11187 return true;
11188 else
11189 return false;
11192 if (mode == V16SFmode)
11194 if (TARGET_SHMEDIA)
11196 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11197 return true;
11198 else
11199 return false;
11201 else
11202 return regno == FIRST_XD_REG;
11205 if (FP_REGISTER_P (regno))
11207 if (mode == SFmode
11208 || mode == SImode
11209 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11210 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11211 || mode == DCmode
11212 || (TARGET_SHMEDIA
11213 && (mode == DFmode || mode == DImode
11214 || mode == V2SFmode || mode == TImode)))
11215 && ((regno - FIRST_FP_REG) & 1) == 0)
11216 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11217 && ((regno - FIRST_FP_REG) & 3) == 0))
11218 return true;
11219 else
11220 return false;
11223 if (XD_REGISTER_P (regno))
11224 return mode == DFmode;
11226 if (TARGET_REGISTER_P (regno))
11227 return (mode == DImode || mode == SImode || mode == PDImode);
11229 if (regno == PR_REG)
11230 return mode == SImode;
11232 if (regno == FPSCR_REG)
11233 return mode == PSImode;
11235 /* FIXME. This works around PR target/37633 for -O0. */
11236 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11238 unsigned int n = GET_MODE_SIZE (mode) / 8;
11240 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11241 && regno <= FIRST_GENERAL_REG + 14)
11242 return false;
11245 return true;
11248 /* Return the class of registers for which a mode change from FROM to TO
11249 is invalid. */
11250 bool
11251 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11252 enum reg_class rclass)
11254 /* We want to enable the use of SUBREGs as a means to
11255 VEC_SELECT a single element of a vector. */
11256 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11257 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11259 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11261 if (TARGET_LITTLE_ENDIAN)
11263 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11264 return reg_classes_intersect_p (DF_REGS, rclass);
11266 else
11268 if (GET_MODE_SIZE (from) < 8)
11269 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11272 return 0;
11275 /* Return true if registers in machine mode MODE will likely be
11276 allocated to registers in small register classes. */
11278 bool
11279 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11281 return (! TARGET_SHMEDIA);
11284 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11285 that label is used. */
11287 void
11288 sh_mark_label (rtx address, int nuses)
11290 if (GOTOFF_P (address))
11292 /* Extract the label or symbol. */
11293 address = XEXP (address, 0);
11294 if (GET_CODE (address) == PLUS)
11295 address = XEXP (address, 0);
11296 address = XVECEXP (address, 0, 0);
11298 if (GET_CODE (address) == LABEL_REF
11299 && LABEL_P (XEXP (address, 0)))
11300 LABEL_NUSES (XEXP (address, 0)) += nuses;
11303 /* Compute extra cost of moving data between one register class
11304 and another. */
11306 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11307 uses this information. Hence, the general register <-> floating point
11308 register information here is not used for SFmode. */
11311 sh_register_move_cost (enum machine_mode mode,
11312 enum reg_class srcclass, enum reg_class dstclass)
11314 if (dstclass == T_REGS || dstclass == PR_REGS)
11315 return 10;
11317 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11318 return 4;
11320 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11321 && REGCLASS_HAS_FP_REG (srcclass)
11322 && REGCLASS_HAS_FP_REG (dstclass))
11323 return 4;
11325 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11326 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11328 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11329 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11330 return 9;
11332 if ((REGCLASS_HAS_FP_REG (dstclass)
11333 && REGCLASS_HAS_GENERAL_REG (srcclass))
11334 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11335 && REGCLASS_HAS_FP_REG (srcclass)))
11336 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11337 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11339 if ((dstclass == FPUL_REGS
11340 && REGCLASS_HAS_GENERAL_REG (srcclass))
11341 || (srcclass == FPUL_REGS
11342 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11343 return 5;
11345 if ((dstclass == FPUL_REGS
11346 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11347 || (srcclass == FPUL_REGS
11348 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11349 return 7;
11351 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11352 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11353 return 20;
11355 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11356 if (TARGET_SHMEDIA
11357 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11359 if (sh_gettrcost >= 0)
11360 return sh_gettrcost;
11361 else if (!TARGET_PT_FIXED)
11362 return 100;
11365 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11366 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11367 return 4;
11369 if (TARGET_SHMEDIA
11370 || (TARGET_FMOVD
11371 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11372 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11373 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11375 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11378 static rtx emit_load_ptr (rtx, rtx);
11380 static rtx
11381 emit_load_ptr (rtx reg, rtx addr)
11383 rtx mem = gen_const_mem (ptr_mode, addr);
11385 if (Pmode != ptr_mode)
11386 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11387 return emit_move_insn (reg, mem);
11390 static void
11391 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11392 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11393 tree function)
11395 CUMULATIVE_ARGS cum;
11396 int structure_value_byref = 0;
11397 rtx this_rtx, this_value, sibcall, insns, funexp;
11398 tree funtype = TREE_TYPE (function);
11399 int simple_add = CONST_OK_FOR_ADD (delta);
11400 int did_load = 0;
11401 rtx scratch0, scratch1, scratch2;
11402 unsigned i;
11404 reload_completed = 1;
11405 epilogue_completed = 1;
11406 current_function_uses_only_leaf_regs = 1;
11408 emit_note (NOTE_INSN_PROLOGUE_END);
11410 /* Find the "this" pointer. We have such a wide range of ABIs for the
11411 SH that it's best to do this completely machine independently.
11412 "this" is passed as first argument, unless a structure return pointer
11413 comes first, in which case "this" comes second. */
11414 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11415 #ifndef PCC_STATIC_STRUCT_RETURN
11416 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11417 structure_value_byref = 1;
11418 #endif /* not PCC_STATIC_STRUCT_RETURN */
11419 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11421 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11423 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11425 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11427 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11428 static chain pointer (even if you can't have nested virtual functions
11429 right now, someone might implement them sometime), and the rest of the
11430 registers are used for argument passing, are callee-saved, or reserved. */
11431 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11432 -ffixed-reg has been used. */
11433 if (! call_used_regs[0] || fixed_regs[0])
11434 error ("r0 needs to be available as a call-clobbered register");
11435 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11436 if (! TARGET_SH5)
11438 if (call_used_regs[1] && ! fixed_regs[1])
11439 scratch1 = gen_rtx_REG (ptr_mode, 1);
11440 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11441 pointing where to return struct values. */
11442 if (call_used_regs[3] && ! fixed_regs[3])
11443 scratch2 = gen_rtx_REG (Pmode, 3);
11445 else if (TARGET_SHMEDIA)
11447 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11448 if (i != REGNO (scratch0) &&
11449 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11451 scratch1 = gen_rtx_REG (ptr_mode, i);
11452 break;
11454 if (scratch1 == scratch0)
11455 error ("Need a second call-clobbered general purpose register");
11456 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11457 if (call_used_regs[i] && ! fixed_regs[i])
11459 scratch2 = gen_rtx_REG (Pmode, i);
11460 break;
11462 if (scratch2 == scratch0)
11463 error ("Need a call-clobbered target register");
11466 this_value = plus_constant (this_rtx, delta);
11467 if (vcall_offset
11468 && (simple_add || scratch0 != scratch1)
11469 && strict_memory_address_p (ptr_mode, this_value))
11471 emit_load_ptr (scratch0, this_value);
11472 did_load = 1;
11475 if (!delta)
11476 ; /* Do nothing. */
11477 else if (simple_add)
11478 emit_move_insn (this_rtx, this_value);
11479 else
11481 emit_move_insn (scratch1, GEN_INT (delta));
11482 emit_insn (gen_add2_insn (this_rtx, scratch1));
11485 if (vcall_offset)
11487 rtx offset_addr;
11489 if (!did_load)
11490 emit_load_ptr (scratch0, this_rtx);
11492 offset_addr = plus_constant (scratch0, vcall_offset);
11493 if (strict_memory_address_p (ptr_mode, offset_addr))
11494 ; /* Do nothing. */
11495 else if (! TARGET_SH5 && scratch0 != scratch1)
11497 /* scratch0 != scratch1, and we have indexed loads. Get better
11498 schedule by loading the offset into r1 and using an indexed
11499 load - then the load of r1 can issue before the load from
11500 (this_rtx + delta) finishes. */
11501 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11502 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11504 else if (CONST_OK_FOR_ADD (vcall_offset))
11506 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11507 offset_addr = scratch0;
11509 else if (scratch0 != scratch1)
11511 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11512 emit_insn (gen_add2_insn (scratch0, scratch1));
11513 offset_addr = scratch0;
11515 else
11516 gcc_unreachable (); /* FIXME */
11517 emit_load_ptr (scratch0, offset_addr);
11519 if (Pmode != ptr_mode)
11520 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11521 emit_insn (gen_add2_insn (this_rtx, scratch0));
11524 /* Generate a tail call to the target function. */
11525 if (! TREE_USED (function))
11527 assemble_external (function);
11528 TREE_USED (function) = 1;
11530 funexp = XEXP (DECL_RTL (function), 0);
11531 /* If the function is overridden, so is the thunk, hence we don't
11532 need GOT addressing even if this is a public symbol. */
11533 #if 0
11534 if (TARGET_SH1 && ! flag_weak)
11535 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11536 else
11537 #endif
11538 if (TARGET_SH2 && flag_pic)
11540 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11541 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11543 else
11545 if (TARGET_SHMEDIA && flag_pic)
11547 funexp = gen_sym2PIC (funexp);
11548 PUT_MODE (funexp, Pmode);
11550 emit_move_insn (scratch2, funexp);
11551 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11552 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11554 sibcall = emit_call_insn (sibcall);
11555 SIBLING_CALL_P (sibcall) = 1;
11556 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11557 emit_barrier ();
11559 /* Run just enough of rest_of_compilation to do scheduling and get
11560 the insns emitted. Note that use_thunk calls
11561 assemble_start_function and assemble_end_function. */
11563 insn_locators_alloc ();
11564 insns = get_insns ();
11566 if (optimize > 0)
11568 if (! cfun->cfg)
11569 init_flow (cfun);
11570 split_all_insns_noflow ();
11573 sh_reorg ();
11575 if (optimize > 0 && flag_delayed_branch)
11576 dbr_schedule (insns);
11578 shorten_branches (insns);
11579 final_start_function (insns, file, 1);
11580 final (insns, file, 1);
11581 final_end_function ();
11583 reload_completed = 0;
11584 epilogue_completed = 0;
11588 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11590 rtx sym;
11592 /* If this is not an ordinary function, the name usually comes from a
11593 string literal or an sprintf buffer. Make sure we use the same
11594 string consistently, so that cse will be able to unify address loads. */
11595 if (kind != FUNCTION_ORDINARY)
11596 name = IDENTIFIER_POINTER (get_identifier (name));
11597 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11598 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11599 if (flag_pic)
11600 switch (kind)
11602 case FUNCTION_ORDINARY:
11603 break;
11604 case SFUNC_GOT:
11606 rtx reg = target ? target : gen_reg_rtx (Pmode);
11608 emit_insn (gen_symGOT2reg (reg, sym));
11609 sym = reg;
11610 break;
11612 case SFUNC_STATIC:
11614 /* ??? To allow cse to work, we use GOTOFF relocations.
11615 we could add combiner patterns to transform this into
11616 straight pc-relative calls with sym2PIC / bsrf when
11617 label load and function call are still 1:1 and in the
11618 same basic block during combine. */
11619 rtx reg = target ? target : gen_reg_rtx (Pmode);
11621 emit_insn (gen_symGOTOFF2reg (reg, sym));
11622 sym = reg;
11623 break;
11626 if (target && sym != target)
11628 emit_move_insn (target, sym);
11629 return target;
11631 return sym;
11634 /* Find the number of a general purpose register in S. */
11635 static int
11636 scavenge_reg (HARD_REG_SET *s)
11638 int r;
11639 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11640 if (TEST_HARD_REG_BIT (*s, r))
11641 return r;
11642 return -1;
11646 sh_get_pr_initial_val (void)
11648 rtx val;
11650 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11651 PR register on SHcompact, because it might be clobbered by the prologue.
11652 We check first if that is known to be the case. */
11653 if (TARGET_SHCOMPACT
11654 && ((crtl->args.info.call_cookie
11655 & ~ CALL_COOKIE_RET_TRAMP (1))
11656 || crtl->saves_all_registers))
11657 return gen_frame_mem (SImode, return_address_pointer_rtx);
11659 /* If we haven't finished rtl generation, there might be a nonlocal label
11660 that we haven't seen yet.
11661 ??? get_hard_reg_initial_val fails if it is called after register
11662 allocation has started, unless it has been called before for the
11663 same register. And even then, we end in trouble if we didn't use
11664 the register in the same basic block before. So call
11665 get_hard_reg_initial_val now and wrap it in an unspec if we might
11666 need to replace it. */
11667 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11668 combine can put the pseudo returned by get_hard_reg_initial_val into
11669 instructions that need a general purpose registers, which will fail to
11670 be recognized when the pseudo becomes allocated to PR. */
11672 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11673 if (TARGET_SH1)
11674 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11675 return val;
11679 sh_expand_t_scc (rtx operands[])
11681 enum rtx_code code = GET_CODE (operands[1]);
11682 rtx target = operands[0];
11683 rtx op0 = operands[2];
11684 rtx op1 = operands[3];
11685 rtx result = target;
11686 HOST_WIDE_INT val;
11688 if (!REG_P (op0) || REGNO (op0) != T_REG
11689 || !CONST_INT_P (op1))
11690 return 0;
11691 if (!REG_P (result))
11692 result = gen_reg_rtx (SImode);
11693 val = INTVAL (op1);
11694 if ((code == EQ && val == 1) || (code == NE && val == 0))
11695 emit_insn (gen_movt (result));
11696 else if (TARGET_SH2A && ((code == EQ && val == 0)
11697 || (code == NE && val == 1)))
11698 emit_insn (gen_xorsi3_movrt (result));
11699 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11701 emit_clobber (result);
11702 emit_insn (gen_subc (result, result, result));
11703 emit_insn (gen_addsi3 (result, result, const1_rtx));
11705 else if (code == EQ || code == NE)
11706 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11707 else
11708 return 0;
11709 if (result != target)
11710 emit_move_insn (target, result);
11711 return 1;
11714 /* INSN is an sfunc; return the rtx that describes the address used. */
11715 static rtx
11716 extract_sfunc_addr (rtx insn)
11718 rtx pattern, part = NULL_RTX;
11719 int len, i;
11721 pattern = PATTERN (insn);
11722 len = XVECLEN (pattern, 0);
11723 for (i = 0; i < len; i++)
11725 part = XVECEXP (pattern, 0, i);
11726 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11727 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11728 return XEXP (part, 0);
11730 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11731 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11734 /* Verify that the register in use_sfunc_addr still agrees with the address
11735 used in the sfunc. This prevents fill_slots_from_thread from changing
11736 use_sfunc_addr.
11737 INSN is the use_sfunc_addr instruction, and REG is the register it
11738 guards. */
11740 check_use_sfunc_addr (rtx insn, rtx reg)
11742 /* Search for the sfunc. It should really come right after INSN. */
11743 while ((insn = NEXT_INSN (insn)))
11745 if (LABEL_P (insn) || JUMP_P (insn))
11746 break;
11747 if (! INSN_P (insn))
11748 continue;
11750 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11751 insn = XVECEXP (PATTERN (insn), 0, 0);
11752 if (GET_CODE (PATTERN (insn)) != PARALLEL
11753 || get_attr_type (insn) != TYPE_SFUNC)
11754 continue;
11755 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11757 gcc_unreachable ();
11760 /* This function returns a constant rtx that represents pi / 2**15 in
11761 SFmode. it's used to scale SFmode angles, in radians, to a
11762 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11763 maps to 0x10000). */
11765 static GTY(()) rtx sh_fsca_sf2int_rtx;
11768 sh_fsca_sf2int (void)
11770 if (! sh_fsca_sf2int_rtx)
11772 REAL_VALUE_TYPE rv;
11774 real_from_string (&rv, "10430.378350470453");
11775 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11778 return sh_fsca_sf2int_rtx;
11781 /* This function returns a constant rtx that represents pi / 2**15 in
11782 DFmode. it's used to scale DFmode angles, in radians, to a
11783 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11784 maps to 0x10000). */
11786 static GTY(()) rtx sh_fsca_df2int_rtx;
11789 sh_fsca_df2int (void)
11791 if (! sh_fsca_df2int_rtx)
11793 REAL_VALUE_TYPE rv;
11795 real_from_string (&rv, "10430.378350470453");
11796 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11799 return sh_fsca_df2int_rtx;
11802 /* This function returns a constant rtx that represents 2**15 / pi in
11803 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11804 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11805 2*pi). */
11807 static GTY(()) rtx sh_fsca_int2sf_rtx;
11810 sh_fsca_int2sf (void)
11812 if (! sh_fsca_int2sf_rtx)
11814 REAL_VALUE_TYPE rv;
11816 real_from_string (&rv, "9.587379924285257e-5");
11817 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11820 return sh_fsca_int2sf_rtx;
11823 /* Initialize the CUMULATIVE_ARGS structure. */
11825 void
11826 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11827 tree fntype,
11828 rtx libname ATTRIBUTE_UNUSED,
11829 tree fndecl,
11830 signed int n_named_args,
11831 enum machine_mode mode)
11833 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11834 pcum->free_single_fp_reg = 0;
11835 pcum->stack_regs = 0;
11836 pcum->byref_regs = 0;
11837 pcum->byref = 0;
11838 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11840 /* XXX - Should we check TARGET_HITACHI here ??? */
11841 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11843 if (fntype)
11845 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11846 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11847 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11848 pcum->arg_count [(int) SH_ARG_INT]
11849 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11851 pcum->call_cookie
11852 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11853 && pcum->arg_count [(int) SH_ARG_INT] == 0
11854 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11855 ? int_size_in_bytes (TREE_TYPE (fntype))
11856 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11857 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11858 == FIRST_RET_REG));
11860 else
11862 pcum->arg_count [(int) SH_ARG_INT] = 0;
11863 pcum->prototype_p = FALSE;
11864 if (mode != VOIDmode)
11866 pcum->call_cookie =
11867 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11868 && GET_MODE_SIZE (mode) > 4
11869 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11871 /* If the default ABI is the Renesas ABI then all library
11872 calls must assume that the library will be using the
11873 Renesas ABI. So if the function would return its result
11874 in memory then we must force the address of this memory
11875 block onto the stack. Ideally we would like to call
11876 targetm.calls.return_in_memory() here but we do not have
11877 the TYPE or the FNDECL available so we synthesize the
11878 contents of that function as best we can. */
11879 pcum->force_mem =
11880 (TARGET_DEFAULT & MASK_HITACHI)
11881 && (mode == BLKmode
11882 || (GET_MODE_SIZE (mode) > 4
11883 && !(mode == DFmode
11884 && TARGET_FPU_DOUBLE)));
11886 else
11888 pcum->call_cookie = 0;
11889 pcum->force_mem = FALSE;
11894 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11895 not enter into CONST_DOUBLE for the replace.
11897 Note that copying is not done so X must not be shared unless all copies
11898 are to be modified.
11900 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11901 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11902 replacements[n*2+1] - and that we take mode changes into account.
11904 If a replacement is ambiguous, return NULL_RTX.
11906 If MODIFY is zero, don't modify any rtl in place,
11907 just return zero or nonzero for failure / success. */
11910 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11912 int i, j;
11913 const char *fmt;
11915 /* The following prevents loops occurrence when we change MEM in
11916 CONST_DOUBLE onto the same CONST_DOUBLE. */
11917 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11918 return x;
11920 for (i = n_replacements - 1; i >= 0 ; i--)
11921 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11922 return replacements[i*2+1];
11924 /* Allow this function to make replacements in EXPR_LISTs. */
11925 if (x == 0)
11926 return 0;
11928 if (GET_CODE (x) == SUBREG)
11930 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11931 n_replacements, modify);
11933 if (CONST_INT_P (new_rtx))
11935 x = simplify_subreg (GET_MODE (x), new_rtx,
11936 GET_MODE (SUBREG_REG (x)),
11937 SUBREG_BYTE (x));
11938 if (! x)
11939 abort ();
11941 else if (modify)
11942 SUBREG_REG (x) = new_rtx;
11944 return x;
11946 else if (REG_P (x))
11948 unsigned regno = REGNO (x);
11949 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11950 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11951 rtx result = NULL_RTX;
11953 for (i = n_replacements - 1; i >= 0; i--)
11955 rtx from = replacements[i*2];
11956 rtx to = replacements[i*2+1];
11957 unsigned from_regno, from_nregs, to_regno, new_regno;
11959 if (!REG_P (from))
11960 continue;
11961 from_regno = REGNO (from);
11962 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11963 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11964 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11966 if (regno < from_regno
11967 || regno + nregs > from_regno + nregs
11968 || !REG_P (to)
11969 || result)
11970 return NULL_RTX;
11971 to_regno = REGNO (to);
11972 if (to_regno < FIRST_PSEUDO_REGISTER)
11974 new_regno = regno + to_regno - from_regno;
11975 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11976 != nregs)
11977 return NULL_RTX;
11978 result = gen_rtx_REG (GET_MODE (x), new_regno);
11980 else if (GET_MODE (x) <= GET_MODE (to))
11981 result = gen_lowpart_common (GET_MODE (x), to);
11982 else
11983 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11986 return result ? result : x;
11988 else if (GET_CODE (x) == ZERO_EXTEND)
11990 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11991 n_replacements, modify);
11993 if (CONST_INT_P (new_rtx))
11995 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11996 new_rtx, GET_MODE (XEXP (x, 0)));
11997 if (! x)
11998 abort ();
12000 else if (modify)
12001 XEXP (x, 0) = new_rtx;
12003 return x;
12006 fmt = GET_RTX_FORMAT (GET_CODE (x));
12007 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12009 rtx new_rtx;
12011 if (fmt[i] == 'e')
12013 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12014 n_replacements, modify);
12015 if (!new_rtx)
12016 return NULL_RTX;
12017 if (modify)
12018 XEXP (x, i) = new_rtx;
12020 else if (fmt[i] == 'E')
12021 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12023 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12024 n_replacements, modify);
12025 if (!new_rtx)
12026 return NULL_RTX;
12027 if (modify)
12028 XVECEXP (x, i, j) = new_rtx;
12032 return x;
12036 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12038 enum rtx_code code = TRUNCATE;
12040 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12042 rtx inner = XEXP (x, 0);
12043 enum machine_mode inner_mode = GET_MODE (inner);
12045 if (inner_mode == mode)
12046 return inner;
12047 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12048 x = inner;
12049 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12050 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12052 code = GET_CODE (x);
12053 x = inner;
12056 return gen_rtx_fmt_e (code, mode, x);
12059 /* called via for_each_rtx after reload, to clean up truncates of
12060 registers that span multiple actual hard registers. */
12062 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12064 rtx x = *p, reg;
12066 if (GET_CODE (x) != TRUNCATE)
12067 return 0;
12068 reg = XEXP (x, 0);
12069 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12071 enum machine_mode reg_mode = GET_MODE (reg);
12072 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12073 subreg_lowpart_offset (DImode, reg_mode));
12074 *(int*) n_changes += 1;
12075 return -1;
12077 return 0;
12080 /* Load and store depend on the highpart of the address. However,
12081 set_attr_alternative does not give well-defined results before reload,
12082 so we must look at the rtl ourselves to see if any of the feeding
12083 registers is used in a memref. */
12085 /* Called by sh_contains_memref_p via for_each_rtx. */
12086 static int
12087 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12089 return (MEM_P (*loc));
12092 /* Return nonzero iff INSN contains a MEM. */
12094 sh_contains_memref_p (rtx insn)
12096 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12099 /* Return nonzero iff INSN loads a banked register. */
12101 sh_loads_bankedreg_p (rtx insn)
12103 if (GET_CODE (PATTERN (insn)) == SET)
12105 rtx op = SET_DEST (PATTERN(insn));
12106 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12107 return 1;
12110 return 0;
12113 /* FNADDR is the MEM expression from a call expander. Return an address
12114 to use in an SHmedia insn pattern. */
12116 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12118 int is_sym;
12120 fnaddr = XEXP (fnaddr, 0);
12121 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12122 if (flag_pic && is_sym)
12124 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12126 rtx reg = gen_reg_rtx (Pmode);
12128 /* We must not use GOTPLT for sibcalls, because PIC_REG
12129 must be restored before the PLT code gets to run. */
12130 if (is_sibcall)
12131 emit_insn (gen_symGOT2reg (reg, fnaddr));
12132 else
12133 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12134 fnaddr = reg;
12136 else
12138 fnaddr = gen_sym2PIC (fnaddr);
12139 PUT_MODE (fnaddr, Pmode);
12142 /* If ptabs might trap, make this visible to the rest of the compiler.
12143 We generally assume that symbols pertain to valid locations, but
12144 it is possible to generate invalid symbols with asm or linker tricks.
12145 In a list of functions where each returns its successor, an invalid
12146 symbol might denote an empty list. */
12147 if (!TARGET_PT_FIXED
12148 && (!is_sym || TARGET_INVALID_SYMBOLS)
12149 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12151 rtx tr = gen_reg_rtx (PDImode);
12153 emit_insn (gen_ptabs (tr, fnaddr));
12154 fnaddr = tr;
12156 else if (! target_reg_operand (fnaddr, Pmode))
12157 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12158 return fnaddr;
12161 reg_class_t
12162 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12163 enum machine_mode mode, secondary_reload_info *sri)
12165 enum reg_class rclass = (enum reg_class) rclass_i;
12167 if (in_p)
12169 if (REGCLASS_HAS_FP_REG (rclass)
12170 && ! TARGET_SHMEDIA
12171 && immediate_operand ((x), mode)
12172 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12173 && mode == SFmode && fldi_ok ()))
12174 switch (mode)
12176 case SFmode:
12177 sri->icode = CODE_FOR_reload_insf__frn;
12178 return NO_REGS;
12179 case DFmode:
12180 sri->icode = CODE_FOR_reload_indf__frn;
12181 return NO_REGS;
12182 case SImode:
12183 /* ??? If we knew that we are in the appropriate mode -
12184 single precision - we could use a reload pattern directly. */
12185 return FPUL_REGS;
12186 default:
12187 abort ();
12189 if (rclass == FPUL_REGS
12190 && ((REG_P (x)
12191 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12192 || REGNO (x) == T_REG))
12193 || GET_CODE (x) == PLUS))
12194 return GENERAL_REGS;
12195 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12197 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12198 return GENERAL_REGS;
12199 else if (mode == SFmode)
12200 return FP_REGS;
12201 sri->icode = CODE_FOR_reload_insi__i_fpul;
12202 return NO_REGS;
12204 if (rclass == FPSCR_REGS
12205 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12206 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12207 return GENERAL_REGS;
12208 if (REGCLASS_HAS_FP_REG (rclass)
12209 && TARGET_SHMEDIA
12210 && immediate_operand (x, mode)
12211 && x != CONST0_RTX (GET_MODE (x))
12212 && GET_MODE (x) != V4SFmode)
12213 return GENERAL_REGS;
12214 if ((mode == QImode || mode == HImode)
12215 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12217 sri->icode = ((mode == QImode)
12218 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12219 return NO_REGS;
12221 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12222 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12223 return TARGET_REGS;
12224 } /* end of input-only processing. */
12226 if (((REGCLASS_HAS_FP_REG (rclass)
12227 && (REG_P (x)
12228 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12229 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12230 && TARGET_FMOVD))))
12231 || (REGCLASS_HAS_GENERAL_REG (rclass)
12232 && REG_P (x)
12233 && FP_REGISTER_P (REGNO (x))))
12234 && ! TARGET_SHMEDIA
12235 && (mode == SFmode || mode == SImode))
12236 return FPUL_REGS;
12237 if ((rclass == FPUL_REGS
12238 || (REGCLASS_HAS_FP_REG (rclass)
12239 && ! TARGET_SHMEDIA && mode == SImode))
12240 && (MEM_P (x)
12241 || (REG_P (x)
12242 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12243 || REGNO (x) == T_REG
12244 || system_reg_operand (x, VOIDmode)))))
12246 if (rclass == FPUL_REGS)
12247 return GENERAL_REGS;
12248 return FPUL_REGS;
12250 if ((rclass == TARGET_REGS
12251 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12252 && !satisfies_constraint_Csy (x)
12253 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12254 return GENERAL_REGS;
12255 if ((rclass == MAC_REGS || rclass == PR_REGS)
12256 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12257 && rclass != REGNO_REG_CLASS (REGNO (x)))
12258 return GENERAL_REGS;
12259 if (rclass != GENERAL_REGS && REG_P (x)
12260 && TARGET_REGISTER_P (REGNO (x)))
12261 return GENERAL_REGS;
12262 return NO_REGS;
12265 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12267 #include "gt-sh.h"