* config/sh/sh.c (output_stack_adjust): Add new argument frame_p.
[official-gcc.git] / gcc / config / sh / sh.c
blobd0f9932288a46018739c107ea38bc53c11808530
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "insn-config.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "flags.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "integrate.h"
41 #include "dwarf2.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "real.h"
46 #include "langhooks.h"
47 #include "basic-block.h"
48 #include "df.h"
49 #include "cfglayout.h"
50 #include "intl.h"
51 #include "sched-int.h"
52 #include "params.h"
53 #include "ggc.h"
54 #include "gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
73 they may be. */
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Provides the class number of the smallest class containing
110 reg number. */
112 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
114 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
150 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
151 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
152 GENERAL_REGS, GENERAL_REGS,
155 char sh_register_names[FIRST_PSEUDO_REGISTER] \
156 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
158 char sh_additional_register_names[ADDREGNAMES_SIZE] \
159 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
160 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
162 int assembler_dialect;
164 static bool shmedia_space_reserved_for_target_registers;
166 static bool sh_handle_option (size_t, const char *, int);
167 static void split_branches (rtx);
168 static int branch_dest (rtx);
169 static void force_into (rtx, rtx);
170 static void print_slot (rtx);
171 static rtx add_constant (rtx, enum machine_mode, rtx);
172 static void dump_table (rtx, rtx);
173 static int hi_const (rtx);
174 static int broken_move (rtx);
175 static int mova_p (rtx);
176 static rtx find_barrier (int, rtx, rtx);
177 static int noncall_uses_reg (rtx, rtx, rtx *);
178 static rtx gen_block_redirect (rtx, int, int);
179 static void sh_reorg (void);
180 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
181 static rtx frame_insn (rtx);
182 static rtx push (int);
183 static void pop (int);
184 static void push_regs (HARD_REG_SET *, int);
185 static int calc_live_regs (HARD_REG_SET *);
186 static HOST_WIDE_INT rounded_frame_size (int);
187 static rtx mark_constant_pool_use (rtx);
188 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
189 static tree sh_handle_resbank_handler_attribute (tree *, tree,
190 tree, int, bool *);
191 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
192 tree, int, bool *);
193 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
196 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
197 static void sh_insert_attributes (tree, tree *);
198 static const char *sh_check_pch_target_flags (int);
199 static int sh_adjust_cost (rtx, rtx, rtx, int);
200 static int sh_issue_rate (void);
201 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
202 static short find_set_regmode_weight (rtx, enum machine_mode);
203 static short find_insn_regmode_weight (rtx, enum machine_mode);
204 static void find_regmode_weight (basic_block, enum machine_mode);
205 static int find_r0_life_regions (basic_block);
206 static void sh_md_init_global (FILE *, int, int);
207 static void sh_md_finish_global (FILE *, int);
208 static int rank_for_reorder (const void *, const void *);
209 static void swap_reorder (rtx *, int);
210 static void ready_reorder (rtx *, int);
211 static short high_pressure (enum machine_mode);
212 static int sh_reorder (FILE *, int, rtx *, int *, int);
213 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
214 static void sh_md_init (FILE *, int, int);
215 static int sh_variable_issue (FILE *, int, rtx, int);
217 static bool sh_function_ok_for_sibcall (tree, tree);
219 static bool sh_cannot_modify_jumps_p (void);
220 static enum reg_class sh_target_reg_class (void);
221 static bool sh_optimize_target_register_callee_saved (bool);
222 static bool sh_ms_bitfield_layout_p (const_tree);
224 static void sh_init_builtins (void);
225 static void sh_media_init_builtins (void);
226 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
227 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
228 static void sh_file_start (void);
229 static int flow_dependent_p (rtx, rtx);
230 static void flow_dependent_p_1 (rtx, const_rtx, void *);
231 static int shiftcosts (rtx);
232 static int andcosts (rtx);
233 static int addsubcosts (rtx);
234 static int multcosts (rtx);
235 static bool unspec_caller_rtx_p (rtx);
236 static bool sh_cannot_copy_insn_p (rtx);
237 static bool sh_rtx_costs (rtx, int, int, int *, bool);
238 static int sh_address_cost (rtx, bool);
239 static int sh_pr_n_sets (void);
240 static rtx sh_allocate_initial_value (rtx);
241 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
242 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
243 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
244 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
245 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
246 static int scavenge_reg (HARD_REG_SET *s);
247 struct save_schedule_s;
248 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
249 struct save_schedule_s *, int);
251 static rtx sh_struct_value_rtx (tree, int);
252 static bool sh_return_in_memory (const_tree, const_tree);
253 static rtx sh_builtin_saveregs (void);
254 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
255 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
256 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
257 static tree sh_build_builtin_va_list (void);
258 static void sh_va_start (tree, rtx);
259 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
260 static enum machine_mode sh_promote_function_mode (const_tree type,
261 enum machine_mode,
262 int *punsignedp,
263 const_tree funtype,
264 int for_return);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
266 const_tree, bool);
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
268 const_tree, bool);
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
270 tree, bool);
271 static bool sh_scalar_mode_supported_p (enum machine_mode);
272 static int sh_dwarf_calling_convention (const_tree);
273 static void sh_encode_section_info (tree, rtx, int);
274 static int sh2a_function_vector_p (tree);
276 static const struct attribute_spec sh_attribute_table[] =
278 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
279 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
280 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
281 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
282 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
283 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
284 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
285 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
286 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
287 #ifdef SYMBIAN
288 /* Symbian support adds three new attributes:
289 dllexport - for exporting a function/variable that will live in a dll
290 dllimport - for importing a function/variable from a dll
292 Microsoft allows multiple declspecs in one __declspec, separating
293 them with spaces. We do NOT support this. Instead, use __declspec
294 multiple times. */
295 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
296 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
297 #endif
298 { NULL, 0, 0, false, false, false, NULL }
301 /* Initialize the GCC target structure. */
302 #undef TARGET_ATTRIBUTE_TABLE
303 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
305 /* The next two are used for debug info when compiling with -gdwarf. */
306 #undef TARGET_ASM_UNALIGNED_HI_OP
307 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
308 #undef TARGET_ASM_UNALIGNED_SI_OP
309 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
311 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
312 #undef TARGET_ASM_UNALIGNED_DI_OP
313 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
314 #undef TARGET_ASM_ALIGNED_DI_OP
315 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
317 #undef TARGET_ASM_FUNCTION_EPILOGUE
318 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
320 #undef TARGET_ASM_OUTPUT_MI_THUNK
321 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
323 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
324 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
326 #undef TARGET_ASM_FILE_START
327 #define TARGET_ASM_FILE_START sh_file_start
328 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
329 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
331 #undef TARGET_DEFAULT_TARGET_FLAGS
332 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
333 #undef TARGET_HANDLE_OPTION
334 #define TARGET_HANDLE_OPTION sh_handle_option
336 #undef TARGET_INSERT_ATTRIBUTES
337 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
339 #undef TARGET_SCHED_ADJUST_COST
340 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
342 #undef TARGET_SCHED_ISSUE_RATE
343 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
345 /* The next 5 hooks have been implemented for reenabling sched1. With the
346 help of these macros we are limiting the movement of insns in sched1 to
347 reduce the register pressure. The overall idea is to keep count of SImode
348 and SFmode regs required by already scheduled insns. When these counts
349 cross some threshold values; give priority to insns that free registers.
350 The insn that frees registers is most likely to be the insn with lowest
351 LUID (original insn order); but such an insn might be there in the stalled
352 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
353 upto a max of 8 cycles so that such insns may move from Q -> R.
355 The description of the hooks are as below:
357 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
358 scheduler; it is called inside the sched_init function just after
359 find_insn_reg_weights function call. It is used to calculate the SImode
360 and SFmode weights of insns of basic blocks; much similar to what
361 find_insn_reg_weights does.
362 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
364 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
365 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
366 (Q)->(R).
368 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
369 high; reorder the ready queue so that the insn with lowest LUID will be
370 issued next.
372 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
373 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
375 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
376 can be returned from TARGET_SCHED_REORDER2.
378 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
380 #undef TARGET_SCHED_DFA_NEW_CYCLE
381 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
383 #undef TARGET_SCHED_INIT_GLOBAL
384 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
386 #undef TARGET_SCHED_FINISH_GLOBAL
387 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
389 #undef TARGET_SCHED_VARIABLE_ISSUE
390 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
392 #undef TARGET_SCHED_REORDER
393 #define TARGET_SCHED_REORDER sh_reorder
395 #undef TARGET_SCHED_REORDER2
396 #define TARGET_SCHED_REORDER2 sh_reorder2
398 #undef TARGET_SCHED_INIT
399 #define TARGET_SCHED_INIT sh_md_init
401 #undef TARGET_LEGITIMIZE_ADDRESS
402 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
404 #undef TARGET_CANNOT_MODIFY_JUMPS_P
405 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
406 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
407 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
408 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
409 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
410 sh_optimize_target_register_callee_saved
412 #undef TARGET_MS_BITFIELD_LAYOUT_P
413 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
415 #undef TARGET_INIT_BUILTINS
416 #define TARGET_INIT_BUILTINS sh_init_builtins
417 #undef TARGET_EXPAND_BUILTIN
418 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
420 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
421 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
423 #undef TARGET_CANNOT_COPY_INSN_P
424 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
425 #undef TARGET_RTX_COSTS
426 #define TARGET_RTX_COSTS sh_rtx_costs
427 #undef TARGET_ADDRESS_COST
428 #define TARGET_ADDRESS_COST sh_address_cost
429 #undef TARGET_ALLOCATE_INITIAL_VALUE
430 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
432 #undef TARGET_MACHINE_DEPENDENT_REORG
433 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
435 #undef TARGET_DWARF_REGISTER_SPAN
436 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
438 #ifdef HAVE_AS_TLS
439 #undef TARGET_HAVE_TLS
440 #define TARGET_HAVE_TLS true
441 #endif
443 #undef TARGET_PROMOTE_PROTOTYPES
444 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
445 #undef TARGET_PROMOTE_FUNCTION_MODE
446 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
448 #undef TARGET_STRUCT_VALUE_RTX
449 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
450 #undef TARGET_RETURN_IN_MEMORY
451 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
453 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
454 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
455 #undef TARGET_SETUP_INCOMING_VARARGS
456 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
457 #undef TARGET_STRICT_ARGUMENT_NAMING
458 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
459 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
460 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
461 #undef TARGET_MUST_PASS_IN_STACK
462 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
463 #undef TARGET_PASS_BY_REFERENCE
464 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
465 #undef TARGET_CALLEE_COPIES
466 #define TARGET_CALLEE_COPIES sh_callee_copies
467 #undef TARGET_ARG_PARTIAL_BYTES
468 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
470 #undef TARGET_BUILD_BUILTIN_VA_LIST
471 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
472 #undef TARGET_EXPAND_BUILTIN_VA_START
473 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
474 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
475 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
477 #undef TARGET_SCALAR_MODE_SUPPORTED_P
478 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
480 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
482 #undef TARGET_CHECK_PCH_TARGET_FLAGS
483 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
485 #undef TARGET_DWARF_CALLING_CONVENTION
486 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
488 /* Return regmode weight for insn. */
489 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
491 /* Return current register pressure for regmode. */
492 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
494 #undef TARGET_ENCODE_SECTION_INFO
495 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
497 #ifdef SYMBIAN
499 #undef TARGET_ENCODE_SECTION_INFO
500 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
501 #undef TARGET_STRIP_NAME_ENCODING
502 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
503 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
504 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
506 #endif /* SYMBIAN */
508 #undef TARGET_SECONDARY_RELOAD
509 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
511 #undef TARGET_LEGITIMATE_ADDRESS_P
512 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
514 /* Machine-specific symbol_ref flags. */
515 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
517 struct gcc_target targetm = TARGET_INITIALIZER;
519 /* Implement TARGET_HANDLE_OPTION. */
521 static bool
522 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
523 int value ATTRIBUTE_UNUSED)
525 switch (code)
527 case OPT_m1:
528 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
529 return true;
531 case OPT_m2:
532 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
533 return true;
535 case OPT_m2a:
536 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
537 return true;
539 case OPT_m2a_nofpu:
540 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
541 return true;
543 case OPT_m2a_single:
544 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
545 return true;
547 case OPT_m2a_single_only:
548 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
549 return true;
551 case OPT_m2e:
552 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
553 return true;
555 case OPT_m3:
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
557 return true;
559 case OPT_m3e:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
561 return true;
563 case OPT_m4:
564 case OPT_m4_100:
565 case OPT_m4_200:
566 case OPT_m4_300:
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
568 return true;
570 case OPT_m4_nofpu:
571 case OPT_m4_100_nofpu:
572 case OPT_m4_200_nofpu:
573 case OPT_m4_300_nofpu:
574 case OPT_m4_340:
575 case OPT_m4_400:
576 case OPT_m4_500:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
578 return true;
580 case OPT_m4_single:
581 case OPT_m4_100_single:
582 case OPT_m4_200_single:
583 case OPT_m4_300_single:
584 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
585 return true;
587 case OPT_m4_single_only:
588 case OPT_m4_100_single_only:
589 case OPT_m4_200_single_only:
590 case OPT_m4_300_single_only:
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
592 return true;
594 case OPT_m4a:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
596 return true;
598 case OPT_m4a_nofpu:
599 case OPT_m4al:
600 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
601 return true;
603 case OPT_m4a_single:
604 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
605 return true;
607 case OPT_m4a_single_only:
608 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
609 return true;
611 case OPT_m5_32media:
612 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
613 return true;
615 case OPT_m5_32media_nofpu:
616 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
617 return true;
619 case OPT_m5_64media:
620 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
621 return true;
623 case OPT_m5_64media_nofpu:
624 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
625 return true;
627 case OPT_m5_compact:
628 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
629 return true;
631 case OPT_m5_compact_nofpu:
632 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
633 return true;
635 default:
636 return true;
640 /* Set default optimization options. */
641 void
642 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
644 if (level)
646 flag_omit_frame_pointer = 2;
647 if (!size)
648 sh_div_str = "inv:minlat";
650 if (size)
652 target_flags |= MASK_SMALLCODE;
653 sh_div_str = SH_DIV_STR_FOR_SIZE ;
655 else
656 TARGET_CBRANCHDI4 = 1;
657 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
658 haven't been parsed yet, hence we'd read only the default.
659 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
660 it's OK to always set flag_branch_target_load_optimize. */
661 if (level > 1)
663 flag_branch_target_load_optimize = 1;
664 if (!size)
665 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
667 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
668 here, so leave it to OVERRIDE_OPTIONS to set
669 flag_finite_math_only. We set it to 2 here so we know if the user
670 explicitly requested this to be on or off. */
671 flag_finite_math_only = 2;
672 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
673 the user explicitly requested this to be on or off. */
674 if (flag_schedule_insns > 0)
675 flag_schedule_insns = 2;
677 set_param_value ("simultaneous-prefetches", 2);
680 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
681 options, and do some machine dependent initialization. */
682 void
683 sh_override_options (void)
685 int regno;
687 SUBTARGET_OVERRIDE_OPTIONS;
688 if (flag_finite_math_only == 2)
689 flag_finite_math_only
690 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
691 if (TARGET_SH2E && !flag_finite_math_only)
692 target_flags |= MASK_IEEE;
693 sh_cpu = PROCESSOR_SH1;
694 assembler_dialect = 0;
695 if (TARGET_SH2)
696 sh_cpu = PROCESSOR_SH2;
697 if (TARGET_SH2E)
698 sh_cpu = PROCESSOR_SH2E;
699 if (TARGET_SH2A)
700 sh_cpu = PROCESSOR_SH2A;
701 if (TARGET_SH3)
702 sh_cpu = PROCESSOR_SH3;
703 if (TARGET_SH3E)
704 sh_cpu = PROCESSOR_SH3E;
705 if (TARGET_SH4)
707 assembler_dialect = 1;
708 sh_cpu = PROCESSOR_SH4;
710 if (TARGET_SH4A_ARCH)
712 assembler_dialect = 1;
713 sh_cpu = PROCESSOR_SH4A;
715 if (TARGET_SH5)
717 sh_cpu = PROCESSOR_SH5;
718 target_flags |= MASK_ALIGN_DOUBLE;
719 if (TARGET_SHMEDIA_FPU)
720 target_flags |= MASK_FMOVD;
721 if (TARGET_SHMEDIA)
723 /* There are no delay slots on SHmedia. */
724 flag_delayed_branch = 0;
725 /* Relaxation isn't yet supported for SHmedia */
726 target_flags &= ~MASK_RELAX;
727 /* After reload, if conversion does little good but can cause
728 ICEs:
729 - find_if_block doesn't do anything for SH because we don't
730 have conditional execution patterns. (We use conditional
731 move patterns, which are handled differently, and only
732 before reload).
733 - find_cond_trap doesn't do anything for the SH because we
734 don't have conditional traps.
735 - find_if_case_1 uses redirect_edge_and_branch_force in
736 the only path that does an optimization, and this causes
737 an ICE when branch targets are in registers.
738 - find_if_case_2 doesn't do anything for the SHmedia after
739 reload except when it can redirect a tablejump - and
740 that's rather rare. */
741 flag_if_conversion2 = 0;
742 if (! strcmp (sh_div_str, "call"))
743 sh_div_strategy = SH_DIV_CALL;
744 else if (! strcmp (sh_div_str, "call2"))
745 sh_div_strategy = SH_DIV_CALL2;
746 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
747 sh_div_strategy = SH_DIV_FP;
748 else if (! strcmp (sh_div_str, "inv"))
749 sh_div_strategy = SH_DIV_INV;
750 else if (! strcmp (sh_div_str, "inv:minlat"))
751 sh_div_strategy = SH_DIV_INV_MINLAT;
752 else if (! strcmp (sh_div_str, "inv20u"))
753 sh_div_strategy = SH_DIV_INV20U;
754 else if (! strcmp (sh_div_str, "inv20l"))
755 sh_div_strategy = SH_DIV_INV20L;
756 else if (! strcmp (sh_div_str, "inv:call2"))
757 sh_div_strategy = SH_DIV_INV_CALL2;
758 else if (! strcmp (sh_div_str, "inv:call"))
759 sh_div_strategy = SH_DIV_INV_CALL;
760 else if (! strcmp (sh_div_str, "inv:fp"))
762 if (TARGET_FPU_ANY)
763 sh_div_strategy = SH_DIV_INV_FP;
764 else
765 sh_div_strategy = SH_DIV_INV;
767 TARGET_CBRANCHDI4 = 0;
768 /* Assembler CFI isn't yet fully supported for SHmedia. */
769 flag_dwarf2_cfi_asm = 0;
772 else
774 /* Only the sh64-elf assembler fully supports .quad properly. */
775 targetm.asm_out.aligned_op.di = NULL;
776 targetm.asm_out.unaligned_op.di = NULL;
778 if (TARGET_SH1)
780 if (! strcmp (sh_div_str, "call-div1"))
781 sh_div_strategy = SH_DIV_CALL_DIV1;
782 else if (! strcmp (sh_div_str, "call-fp")
783 && (TARGET_FPU_DOUBLE
784 || (TARGET_HARD_SH4 && TARGET_SH2E)
785 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
786 sh_div_strategy = SH_DIV_CALL_FP;
787 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
788 sh_div_strategy = SH_DIV_CALL_TABLE;
789 else
790 /* Pick one that makes most sense for the target in general.
791 It is not much good to use different functions depending
792 on -Os, since then we'll end up with two different functions
793 when some of the code is compiled for size, and some for
794 speed. */
796 /* SH4 tends to emphasize speed. */
797 if (TARGET_HARD_SH4)
798 sh_div_strategy = SH_DIV_CALL_TABLE;
799 /* These have their own way of doing things. */
800 else if (TARGET_SH2A)
801 sh_div_strategy = SH_DIV_INTRINSIC;
802 /* ??? Should we use the integer SHmedia function instead? */
803 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
804 sh_div_strategy = SH_DIV_CALL_FP;
805 /* SH1 .. SH3 cores often go into small-footprint systems, so
806 default to the smallest implementation available. */
807 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
808 sh_div_strategy = SH_DIV_CALL_TABLE;
809 else
810 sh_div_strategy = SH_DIV_CALL_DIV1;
812 if (!TARGET_SH1)
813 TARGET_PRETEND_CMOVE = 0;
814 if (sh_divsi3_libfunc[0])
815 ; /* User supplied - leave it alone. */
816 else if (TARGET_DIVIDE_CALL_FP)
817 sh_divsi3_libfunc = "__sdivsi3_i4";
818 else if (TARGET_DIVIDE_CALL_TABLE)
819 sh_divsi3_libfunc = "__sdivsi3_i4i";
820 else if (TARGET_SH5)
821 sh_divsi3_libfunc = "__sdivsi3_1";
822 else
823 sh_divsi3_libfunc = "__sdivsi3";
824 if (sh_branch_cost == -1)
825 sh_branch_cost
826 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
828 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
829 if (! VALID_REGISTER_P (regno))
830 sh_register_names[regno][0] = '\0';
832 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
833 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
834 sh_additional_register_names[regno][0] = '\0';
836 if (flag_omit_frame_pointer == 2)
838 /* The debugging information is sufficient,
839 but gdb doesn't implement this yet */
840 if (0)
841 flag_omit_frame_pointer
842 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
843 else
844 flag_omit_frame_pointer = 0;
847 if ((flag_pic && ! TARGET_PREFERGOT)
848 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
849 flag_no_function_cse = 1;
851 if (SMALL_REGISTER_CLASSES)
853 /* Never run scheduling before reload, since that can
854 break global alloc, and generates slower code anyway due
855 to the pressure on R0. */
856 /* Enable sched1 for SH4 if the user explicitly requests.
857 When sched1 is enabled, the ready queue will be reordered by
858 the target hooks if pressure is high. We can not do this for
859 PIC, SH3 and lower as they give spill failures for R0. */
860 if (!TARGET_HARD_SH4 || flag_pic)
861 flag_schedule_insns = 0;
862 /* ??? Current exception handling places basic block boundaries
863 after call_insns. It causes the high pressure on R0 and gives
864 spill failures for R0 in reload. See PR 22553 and the thread
865 on gcc-patches
866 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
867 else if (flag_exceptions)
869 if (flag_schedule_insns == 1)
870 warning (0, "ignoring -fschedule-insns because of exception handling bug");
871 flag_schedule_insns = 0;
873 else if (flag_schedule_insns == 2)
874 flag_schedule_insns = 0;
877 /* Unwinding with -freorder-blocks-and-partition does not work on this
878 architecture, because it requires far jumps to label crossing between
879 hot/cold sections which are rejected on this architecture. */
880 if (flag_reorder_blocks_and_partition)
882 if (flag_exceptions)
884 inform (input_location,
885 "-freorder-blocks-and-partition does not work with "
886 "exceptions on this architecture");
887 flag_reorder_blocks_and_partition = 0;
888 flag_reorder_blocks = 1;
890 else if (flag_unwind_tables)
892 inform (input_location,
893 "-freorder-blocks-and-partition does not support unwind "
894 "info on this architecture");
895 flag_reorder_blocks_and_partition = 0;
896 flag_reorder_blocks = 1;
900 if (align_loops == 0)
901 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
902 if (align_jumps == 0)
903 align_jumps = 1 << CACHE_LOG;
904 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
905 align_jumps = TARGET_SHMEDIA ? 4 : 2;
907 /* Allocation boundary (in *bytes*) for the code of a function.
908 SH1: 32 bit alignment is faster, because instructions are always
909 fetched as a pair from a longword boundary.
910 SH2 .. SH5 : align to cache line start. */
911 if (align_functions == 0)
912 align_functions
913 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
914 /* The linker relaxation code breaks when a function contains
915 alignments that are larger than that at the start of a
916 compilation unit. */
917 if (TARGET_RELAX)
919 int min_align
920 = align_loops > align_jumps ? align_loops : align_jumps;
922 /* Also take possible .long constants / mova tables int account. */
923 if (min_align < 4)
924 min_align = 4;
925 if (align_functions < min_align)
926 align_functions = min_align;
929 if (sh_fixed_range_str)
930 sh_fix_range (sh_fixed_range_str);
933 /* Print the operand address in x to the stream. */
935 void
936 print_operand_address (FILE *stream, rtx x)
938 switch (GET_CODE (x))
940 case REG:
941 case SUBREG:
942 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
943 break;
945 case PLUS:
947 rtx base = XEXP (x, 0);
948 rtx index = XEXP (x, 1);
950 switch (GET_CODE (index))
952 case CONST_INT:
953 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
954 reg_names[true_regnum (base)]);
955 break;
957 case REG:
958 case SUBREG:
960 int base_num = true_regnum (base);
961 int index_num = true_regnum (index);
963 fprintf (stream, "@(r0,%s)",
964 reg_names[MAX (base_num, index_num)]);
965 break;
968 default:
969 gcc_unreachable ();
972 break;
974 case PRE_DEC:
975 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
976 break;
978 case POST_INC:
979 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
980 break;
982 default:
983 x = mark_constant_pool_use (x);
984 output_addr_const (stream, x);
985 break;
989 /* Print operand x (an rtx) in assembler syntax to file stream
990 according to modifier code.
992 '.' print a .s if insn needs delay slot
993 ',' print LOCAL_LABEL_PREFIX
994 '@' print trap, rte or rts depending upon pragma interruptness
995 '#' output a nop if there is nothing to put in the delay slot
996 ''' print likelihood suffix (/u for unlikely).
997 '>' print branch target if -fverbose-asm
998 'O' print a constant without the #
999 'R' print the LSW of a dp value - changes if in little endian
1000 'S' print the MSW of a dp value - changes if in little endian
1001 'T' print the next word of a dp value - same as 'R' in big endian mode.
1002 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1003 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1004 'N' print 'r63' if the operand is (const_int 0).
1005 'd' print a V2SF reg as dN instead of fpN.
1006 'm' print a pair `base,offset' or `base,index', for LD and ST.
1007 'U' Likewise for {LD,ST}{HI,LO}.
1008 'V' print the position of a single bit set.
1009 'W' print the position of a single bit cleared.
1010 't' print a memory address which is a register.
1011 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1012 'o' output an operator. */
1014 void
1015 print_operand (FILE *stream, rtx x, int code)
1017 int regno;
1018 enum machine_mode mode;
1020 switch (code)
1022 tree trapa_attr;
1024 case '.':
1025 if (final_sequence
1026 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1027 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1028 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1029 break;
1030 case ',':
1031 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1032 break;
1033 case '@':
1034 trapa_attr = lookup_attribute ("trap_exit",
1035 DECL_ATTRIBUTES (current_function_decl));
1036 if (trapa_attr)
1037 fprintf (stream, "trapa #%ld",
1038 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1039 else if (sh_cfun_interrupt_handler_p ())
1041 if (sh_cfun_resbank_handler_p ())
1042 fprintf (stream, "resbank\n");
1043 fprintf (stream, "rte");
1045 else
1046 fprintf (stream, "rts");
1047 break;
1048 case '#':
1049 /* Output a nop if there's nothing in the delay slot. */
1050 if (dbr_sequence_length () == 0)
1051 fprintf (stream, "\n\tnop");
1052 break;
1053 case '\'':
1055 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1057 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1058 fputs ("/u", stream);
1059 break;
1061 case '>':
1062 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1064 fputs ("\t! target: ", stream);
1065 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1067 break;
1068 case 'O':
1069 x = mark_constant_pool_use (x);
1070 output_addr_const (stream, x);
1071 break;
1072 /* N.B.: %R / %S / %T adjust memory addresses by four.
1073 For SHMEDIA, that means they can be used to access the first and
1074 second 32 bit part of a 64 bit (or larger) value that
1075 might be held in floating point registers or memory.
1076 While they can be used to access 64 bit parts of a larger value
1077 held in general purpose registers, that won't work with memory -
1078 neither for fp registers, since the frxx names are used. */
1079 case 'R':
1080 if (REG_P (x) || GET_CODE (x) == SUBREG)
1082 regno = true_regnum (x);
1083 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1084 fputs (reg_names[regno], (stream));
1086 else if (MEM_P (x))
1088 x = adjust_address (x, SImode, 4 * LSW);
1089 print_operand_address (stream, XEXP (x, 0));
1091 else
1093 rtx sub = NULL_RTX;
1095 mode = GET_MODE (x);
1096 if (mode == VOIDmode)
1097 mode = DImode;
1098 if (GET_MODE_SIZE (mode) >= 8)
1099 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1100 if (sub)
1101 print_operand (stream, sub, 0);
1102 else
1103 output_operand_lossage ("invalid operand to %%R");
1105 break;
1106 case 'S':
1107 if (REG_P (x) || GET_CODE (x) == SUBREG)
1109 regno = true_regnum (x);
1110 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1111 fputs (reg_names[regno], (stream));
1113 else if (MEM_P (x))
1115 x = adjust_address (x, SImode, 4 * MSW);
1116 print_operand_address (stream, XEXP (x, 0));
1118 else
1120 rtx sub = NULL_RTX;
1122 mode = GET_MODE (x);
1123 if (mode == VOIDmode)
1124 mode = DImode;
1125 if (GET_MODE_SIZE (mode) >= 8)
1126 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1127 if (sub)
1128 print_operand (stream, sub, 0);
1129 else
1130 output_operand_lossage ("invalid operand to %%S");
1132 break;
1133 case 'T':
1134 /* Next word of a double. */
1135 switch (GET_CODE (x))
1137 case REG:
1138 fputs (reg_names[REGNO (x) + 1], (stream));
1139 break;
1140 case MEM:
1141 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1142 && GET_CODE (XEXP (x, 0)) != POST_INC)
1143 x = adjust_address (x, SImode, 4);
1144 print_operand_address (stream, XEXP (x, 0));
1145 break;
1146 default:
1147 break;
1149 break;
1151 case 't':
1152 gcc_assert (MEM_P (x));
1153 x = XEXP (x, 0);
1154 switch (GET_CODE (x))
1156 case REG:
1157 case SUBREG:
1158 print_operand (stream, x, 0);
1159 break;
1160 default:
1161 break;
1163 break;
1165 case 'o':
1166 switch (GET_CODE (x))
1168 case PLUS: fputs ("add", stream); break;
1169 case MINUS: fputs ("sub", stream); break;
1170 case MULT: fputs ("mul", stream); break;
1171 case DIV: fputs ("div", stream); break;
1172 case EQ: fputs ("eq", stream); break;
1173 case NE: fputs ("ne", stream); break;
1174 case GT: case LT: fputs ("gt", stream); break;
1175 case GE: case LE: fputs ("ge", stream); break;
1176 case GTU: case LTU: fputs ("gtu", stream); break;
1177 case GEU: case LEU: fputs ("geu", stream); break;
1178 default:
1179 break;
1181 break;
1182 case 'M':
1183 if (TARGET_SHMEDIA)
1185 if (MEM_P (x)
1186 && GET_CODE (XEXP (x, 0)) == PLUS
1187 && (REG_P (XEXP (XEXP (x, 0), 1))
1188 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1189 fputc ('x', stream);
1191 else
1193 if (MEM_P (x))
1195 switch (GET_MODE (x))
1197 case QImode: fputs (".b", stream); break;
1198 case HImode: fputs (".w", stream); break;
1199 case SImode: fputs (".l", stream); break;
1200 case SFmode: fputs (".s", stream); break;
1201 case DFmode: fputs (".d", stream); break;
1202 default: gcc_unreachable ();
1206 break;
1208 case 'm':
1209 gcc_assert (MEM_P (x));
1210 x = XEXP (x, 0);
1211 /* Fall through. */
1212 case 'U':
1213 switch (GET_CODE (x))
1215 case REG:
1216 case SUBREG:
1217 print_operand (stream, x, 0);
1218 fputs (", 0", stream);
1219 break;
1221 case PLUS:
1222 print_operand (stream, XEXP (x, 0), 0);
1223 fputs (", ", stream);
1224 print_operand (stream, XEXP (x, 1), 0);
1225 break;
1227 default:
1228 gcc_unreachable ();
1230 break;
1232 case 'V':
1234 int num = exact_log2 (INTVAL (x));
1235 gcc_assert (num >= 0);
1236 fprintf (stream, "#%d", num);
1238 break;
1240 case 'W':
1242 int num = exact_log2 (~INTVAL (x));
1243 gcc_assert (num >= 0);
1244 fprintf (stream, "#%d", num);
1246 break;
1248 case 'd':
1249 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1251 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1252 break;
1254 case 'N':
1255 if (x == CONST0_RTX (GET_MODE (x)))
1257 fprintf ((stream), "r63");
1258 break;
1260 goto default_output;
1261 case 'u':
1262 if (CONST_INT_P (x))
1264 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1265 break;
1267 /* Fall through. */
1269 default_output:
1270 default:
1271 regno = 0;
1272 mode = GET_MODE (x);
1274 switch (GET_CODE (x))
1276 case TRUNCATE:
1278 rtx inner = XEXP (x, 0);
1279 int offset = 0;
1280 enum machine_mode inner_mode;
1282 /* We might see SUBREGs with vector mode registers inside. */
1283 if (GET_CODE (inner) == SUBREG
1284 && (GET_MODE_SIZE (GET_MODE (inner))
1285 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1286 && subreg_lowpart_p (inner))
1287 inner = SUBREG_REG (inner);
1288 if (CONST_INT_P (inner))
1290 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1291 goto default_output;
1293 inner_mode = GET_MODE (inner);
1294 if (GET_CODE (inner) == SUBREG
1295 && (GET_MODE_SIZE (GET_MODE (inner))
1296 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1297 && REG_P (SUBREG_REG (inner)))
1299 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1300 GET_MODE (SUBREG_REG (inner)),
1301 SUBREG_BYTE (inner),
1302 GET_MODE (inner));
1303 inner = SUBREG_REG (inner);
1305 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1306 abort ();
1307 /* Floating point register pairs are always big endian;
1308 general purpose registers are 64 bit wide. */
1309 regno = REGNO (inner);
1310 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1311 - HARD_REGNO_NREGS (regno, mode))
1312 + offset;
1313 x = inner;
1314 goto reg;
1316 case SIGN_EXTEND:
1317 x = XEXP (x, 0);
1318 goto reg;
1319 /* FIXME: We need this on SHmedia32 because reload generates
1320 some sign-extended HI or QI loads into DImode registers
1321 but, because Pmode is SImode, the address ends up with a
1322 subreg:SI of the DImode register. Maybe reload should be
1323 fixed so as to apply alter_subreg to such loads? */
1324 case IF_THEN_ELSE:
1325 gcc_assert (trapping_target_operand (x, VOIDmode));
1326 x = XEXP (XEXP (x, 2), 0);
1327 goto default_output;
1328 case SUBREG:
1329 gcc_assert (SUBREG_BYTE (x) == 0
1330 && REG_P (SUBREG_REG (x)));
1332 x = SUBREG_REG (x);
1333 /* Fall through. */
1335 reg:
1336 case REG:
1337 regno += REGNO (x);
1338 if (FP_REGISTER_P (regno)
1339 && mode == V16SFmode)
1340 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1341 else if (FP_REGISTER_P (REGNO (x))
1342 && mode == V4SFmode)
1343 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1344 else if (REG_P (x)
1345 && mode == V2SFmode)
1346 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1347 else if (FP_REGISTER_P (REGNO (x))
1348 && GET_MODE_SIZE (mode) > 4)
1349 fprintf ((stream), "d%s", reg_names[regno] + 1);
1350 else
1351 fputs (reg_names[regno], (stream));
1352 break;
1354 case MEM:
1355 output_address (XEXP (x, 0));
1356 break;
1358 default:
1359 if (TARGET_SH1)
1360 fputc ('#', stream);
1361 output_addr_const (stream, x);
1362 break;
1364 break;
1369 /* Encode symbol attributes of a SYMBOL_REF into its
1370 SYMBOL_REF_FLAGS. */
1371 static void
1372 sh_encode_section_info (tree decl, rtx rtl, int first)
1374 default_encode_section_info (decl, rtl, first);
1376 if (TREE_CODE (decl) == FUNCTION_DECL
1377 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1378 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1381 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1382 static void
1383 force_into (rtx value, rtx target)
1385 value = force_operand (value, target);
1386 if (! rtx_equal_p (value, target))
1387 emit_insn (gen_move_insn (target, value));
1390 /* Emit code to perform a block move. Choose the best method.
1392 OPERANDS[0] is the destination.
1393 OPERANDS[1] is the source.
1394 OPERANDS[2] is the size.
1395 OPERANDS[3] is the alignment safe to use. */
1398 expand_block_move (rtx *operands)
1400 int align = INTVAL (operands[3]);
1401 int constp = (CONST_INT_P (operands[2]));
1402 int bytes = (constp ? INTVAL (operands[2]) : 0);
1404 if (! constp)
1405 return 0;
1407 /* If we could use mov.l to move words and dest is word-aligned, we
1408 can use movua.l for loads and still generate a relatively short
1409 and efficient sequence. */
1410 if (TARGET_SH4A_ARCH && align < 4
1411 && MEM_ALIGN (operands[0]) >= 32
1412 && can_move_by_pieces (bytes, 32))
1414 rtx dest = copy_rtx (operands[0]);
1415 rtx src = copy_rtx (operands[1]);
1416 /* We could use different pseudos for each copied word, but
1417 since movua can only load into r0, it's kind of
1418 pointless. */
1419 rtx temp = gen_reg_rtx (SImode);
1420 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1421 int copied = 0;
1423 while (copied + 4 <= bytes)
1425 rtx to = adjust_address (dest, SImode, copied);
1426 rtx from = adjust_automodify_address (src, BLKmode,
1427 src_addr, copied);
1429 set_mem_size (from, GEN_INT (4));
1430 emit_insn (gen_movua (temp, from));
1431 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1432 emit_move_insn (to, temp);
1433 copied += 4;
1436 if (copied < bytes)
1437 move_by_pieces (adjust_address (dest, BLKmode, copied),
1438 adjust_automodify_address (src, BLKmode,
1439 src_addr, copied),
1440 bytes - copied, align, 0);
1442 return 1;
1445 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1446 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1447 if (align < 4 || (bytes % 4 != 0))
1448 return 0;
1450 if (TARGET_HARD_SH4)
1452 if (bytes < 12)
1453 return 0;
1454 else if (bytes == 12)
1456 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1457 rtx r4 = gen_rtx_REG (SImode, 4);
1458 rtx r5 = gen_rtx_REG (SImode, 5);
1460 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1461 force_into (XEXP (operands[0], 0), r4);
1462 force_into (XEXP (operands[1], 0), r5);
1463 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1464 return 1;
1466 else if (! TARGET_SMALLCODE)
1468 const char *entry_name;
1469 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1470 int dwords;
1471 rtx r4 = gen_rtx_REG (SImode, 4);
1472 rtx r5 = gen_rtx_REG (SImode, 5);
1473 rtx r6 = gen_rtx_REG (SImode, 6);
1475 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1476 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1477 force_into (XEXP (operands[0], 0), r4);
1478 force_into (XEXP (operands[1], 0), r5);
1480 dwords = bytes >> 3;
1481 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1482 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1483 return 1;
1485 else
1486 return 0;
1488 if (bytes < 64)
1490 char entry[30];
1491 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1492 rtx r4 = gen_rtx_REG (SImode, 4);
1493 rtx r5 = gen_rtx_REG (SImode, 5);
1495 sprintf (entry, "__movmemSI%d", bytes);
1496 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1497 force_into (XEXP (operands[0], 0), r4);
1498 force_into (XEXP (operands[1], 0), r5);
1499 emit_insn (gen_block_move_real (func_addr_rtx));
1500 return 1;
1503 /* This is the same number of bytes as a memcpy call, but to a different
1504 less common function name, so this will occasionally use more space. */
1505 if (! TARGET_SMALLCODE)
1507 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1508 int final_switch, while_loop;
1509 rtx r4 = gen_rtx_REG (SImode, 4);
1510 rtx r5 = gen_rtx_REG (SImode, 5);
1511 rtx r6 = gen_rtx_REG (SImode, 6);
1513 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1514 force_into (XEXP (operands[0], 0), r4);
1515 force_into (XEXP (operands[1], 0), r5);
1517 /* r6 controls the size of the move. 16 is decremented from it
1518 for each 64 bytes moved. Then the negative bit left over is used
1519 as an index into a list of move instructions. e.g., a 72 byte move
1520 would be set up with size(r6) = 14, for one iteration through the
1521 big while loop, and a switch of -2 for the last part. */
1523 final_switch = 16 - ((bytes / 4) % 16);
1524 while_loop = ((bytes / 4) / 16 - 1) * 16;
1525 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1526 emit_insn (gen_block_lump_real (func_addr_rtx));
1527 return 1;
1530 return 0;
1533 /* Prepare operands for a move define_expand; specifically, one of the
1534 operands must be in a register. */
1537 prepare_move_operands (rtx operands[], enum machine_mode mode)
1539 if ((mode == SImode || mode == DImode)
1540 && flag_pic
1541 && ! ((mode == Pmode || mode == ptr_mode)
1542 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1544 rtx temp;
1545 if (SYMBOLIC_CONST_P (operands[1]))
1547 if (MEM_P (operands[0]))
1548 operands[1] = force_reg (Pmode, operands[1]);
1549 else if (TARGET_SHMEDIA
1550 && GET_CODE (operands[1]) == LABEL_REF
1551 && target_reg_operand (operands[0], mode))
1552 /* It's ok. */;
1553 else
1555 temp = (!can_create_pseudo_p ()
1556 ? operands[0]
1557 : gen_reg_rtx (Pmode));
1558 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1561 else if (GET_CODE (operands[1]) == CONST
1562 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1563 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1565 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1566 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1567 mode, temp);
1568 operands[1] = expand_binop (mode, add_optab, temp,
1569 XEXP (XEXP (operands[1], 0), 1),
1570 (!can_create_pseudo_p ()
1571 ? temp
1572 : gen_reg_rtx (Pmode)),
1573 0, OPTAB_LIB_WIDEN);
1577 if (! reload_in_progress && ! reload_completed)
1579 /* Copy the source to a register if both operands aren't registers. */
1580 if (! register_operand (operands[0], mode)
1581 && ! sh_register_operand (operands[1], mode))
1582 operands[1] = copy_to_mode_reg (mode, operands[1]);
1584 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1586 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1587 except that we can't use that function because it is static. */
1588 rtx new_rtx = change_address (operands[0], mode, 0);
1589 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1590 operands[0] = new_rtx;
1593 /* This case can happen while generating code to move the result
1594 of a library call to the target. Reject `st r0,@(rX,rY)' because
1595 reload will fail to find a spill register for rX, since r0 is already
1596 being used for the source. */
1597 else if (TARGET_SH1
1598 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1599 && MEM_P (operands[0])
1600 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1601 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1602 operands[1] = copy_to_mode_reg (mode, operands[1]);
1605 if (mode == Pmode || mode == ptr_mode)
1607 rtx op0, op1, opc;
1608 enum tls_model tls_kind;
1610 op0 = operands[0];
1611 op1 = operands[1];
1612 if (GET_CODE (op1) == CONST
1613 && GET_CODE (XEXP (op1, 0)) == PLUS
1614 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1615 != TLS_MODEL_NONE))
1617 opc = XEXP (XEXP (op1, 0), 1);
1618 op1 = XEXP (XEXP (op1, 0), 0);
1620 else
1621 opc = NULL_RTX;
1623 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1625 rtx tga_op1, tga_ret, tmp, tmp2;
1627 switch (tls_kind)
1629 case TLS_MODEL_GLOBAL_DYNAMIC:
1630 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1631 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1632 op1 = tga_ret;
1633 break;
1635 case TLS_MODEL_LOCAL_DYNAMIC:
1636 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1637 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1639 tmp = gen_reg_rtx (Pmode);
1640 emit_move_insn (tmp, tga_ret);
1642 if (register_operand (op0, Pmode))
1643 tmp2 = op0;
1644 else
1645 tmp2 = gen_reg_rtx (Pmode);
1647 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1648 op1 = tmp2;
1649 break;
1651 case TLS_MODEL_INITIAL_EXEC:
1652 if (! flag_pic)
1654 /* Don't schedule insns for getting GOT address when
1655 the first scheduling is enabled, to avoid spill
1656 failures for R0. */
1657 if (flag_schedule_insns)
1658 emit_insn (gen_blockage ());
1659 emit_insn (gen_GOTaddr2picreg ());
1660 emit_use (gen_rtx_REG (SImode, PIC_REG));
1661 if (flag_schedule_insns)
1662 emit_insn (gen_blockage ());
1664 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1665 tmp = gen_sym2GOTTPOFF (op1);
1666 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1667 op1 = tga_op1;
1668 break;
1670 case TLS_MODEL_LOCAL_EXEC:
1671 tmp2 = gen_reg_rtx (Pmode);
1672 emit_insn (gen_load_gbr (tmp2));
1673 tmp = gen_reg_rtx (Pmode);
1674 emit_insn (gen_symTPOFF2reg (tmp, op1));
1676 if (register_operand (op0, Pmode))
1677 op1 = op0;
1678 else
1679 op1 = gen_reg_rtx (Pmode);
1681 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1682 break;
1684 default:
1685 gcc_unreachable ();
1687 if (opc)
1688 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1689 operands[1] = op1;
1693 return 0;
1696 enum rtx_code
1697 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1698 enum rtx_code comparison)
1700 rtx op1;
1701 rtx scratch = NULL_RTX;
1703 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1704 comparison = GET_CODE (operands[0]);
1705 else
1706 scratch = operands[4];
1707 if (CONST_INT_P (operands[1])
1708 && !CONST_INT_P (operands[2]))
1710 rtx tmp = operands[1];
1712 operands[1] = operands[2];
1713 operands[2] = tmp;
1714 comparison = swap_condition (comparison);
1716 if (CONST_INT_P (operands[2]))
1718 HOST_WIDE_INT val = INTVAL (operands[2]);
1719 if ((val == -1 || val == -0x81)
1720 && (comparison == GT || comparison == LE))
1722 comparison = (comparison == GT) ? GE : LT;
1723 operands[2] = gen_int_mode (val + 1, mode);
1725 else if ((val == 1 || val == 0x80)
1726 && (comparison == GE || comparison == LT))
1728 comparison = (comparison == GE) ? GT : LE;
1729 operands[2] = gen_int_mode (val - 1, mode);
1731 else if (val == 1 && (comparison == GEU || comparison == LTU))
1733 comparison = (comparison == GEU) ? NE : EQ;
1734 operands[2] = CONST0_RTX (mode);
1736 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1738 comparison = (comparison == GEU) ? GTU : LEU;
1739 operands[2] = gen_int_mode (val - 1, mode);
1741 else if (val == 0 && (comparison == GTU || comparison == LEU))
1742 comparison = (comparison == GTU) ? NE : EQ;
1743 else if (mode == SImode
1744 && ((val == 0x7fffffff
1745 && (comparison == GTU || comparison == LEU))
1746 || ((unsigned HOST_WIDE_INT) val
1747 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1748 && (comparison == GEU || comparison == LTU))))
1750 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1751 operands[2] = CONST0_RTX (mode);
1754 op1 = operands[1];
1755 if (can_create_pseudo_p ())
1756 operands[1] = force_reg (mode, op1);
1757 /* When we are handling DImode comparisons, we want to keep constants so
1758 that we can optimize the component comparisons; however, memory loads
1759 are better issued as a whole so that they can be scheduled well.
1760 SImode equality comparisons allow I08 constants, but only when they
1761 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1762 into a register, that register might as well be r0, and we allow the
1763 constant. If it is already in a register, this is likely to be
1764 allocated to a different hard register, thus we load the constant into
1765 a register unless it is zero. */
1766 if (!REG_P (operands[2])
1767 && (!CONST_INT_P (operands[2])
1768 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1769 && ((comparison != EQ && comparison != NE)
1770 || (REG_P (op1) && REGNO (op1) != R0_REG)
1771 || !satisfies_constraint_I08 (operands[2])))))
1773 if (scratch && GET_MODE (scratch) == mode)
1775 emit_move_insn (scratch, operands[2]);
1776 operands[2] = scratch;
1778 else if (can_create_pseudo_p ())
1779 operands[2] = force_reg (mode, operands[2]);
1781 return comparison;
1784 void
1785 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1787 rtx (*branch_expander) (rtx) = gen_branch_true;
1788 rtx jump;
1790 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1791 switch (comparison)
1793 case NE: case LT: case LE: case LTU: case LEU:
1794 comparison = reverse_condition (comparison);
1795 branch_expander = gen_branch_false;
1796 default: ;
1798 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1799 gen_rtx_fmt_ee (comparison, SImode,
1800 operands[1], operands[2])));
1801 jump = emit_jump_insn (branch_expander (operands[3]));
1802 if (probability >= 0)
1803 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1807 /* ??? How should we distribute probabilities when more than one branch
1808 is generated. So far we only have soem ad-hoc observations:
1809 - If the operands are random, they are likely to differ in both parts.
1810 - If comparing items in a hash chain, the operands are random or equal;
1811 operation should be EQ or NE.
1812 - If items are searched in an ordered tree from the root, we can expect
1813 the highpart to be unequal about half of the time; operation should be
1814 an inequality comparison, operands non-constant, and overall probability
1815 about 50%. Likewise for quicksort.
1816 - Range checks will be often made against constants. Even if we assume for
1817 simplicity an even distribution of the non-constant operand over a
1818 sub-range here, the same probability could be generated with differently
1819 wide sub-ranges - as long as the ratio of the part of the subrange that
1820 is before the threshold to the part that comes after the threshold stays
1821 the same. Thus, we can't really tell anything here;
1822 assuming random distribution is at least simple.
1825 bool
1826 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1828 enum rtx_code msw_taken, msw_skip, lsw_taken;
1829 rtx skip_label = NULL_RTX;
1830 rtx op1h, op1l, op2h, op2l;
1831 int num_branches;
1832 int prob, rev_prob;
1833 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1834 rtx scratch = operands[4];
1836 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1837 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1838 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1839 op1l = gen_lowpart (SImode, operands[1]);
1840 op2l = gen_lowpart (SImode, operands[2]);
1841 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1842 prob = split_branch_probability;
1843 rev_prob = REG_BR_PROB_BASE - prob;
1844 switch (comparison)
1846 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1847 That costs 1 cycle more when the first branch can be predicted taken,
1848 but saves us mispredicts because only one branch needs prediction.
1849 It also enables generating the cmpeqdi_t-1 pattern. */
1850 case EQ:
1851 if (TARGET_CMPEQDI_T)
1853 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1854 emit_jump_insn (gen_branch_true (operands[3]));
1855 return true;
1857 msw_skip = NE;
1858 lsw_taken = EQ;
1859 if (prob >= 0)
1861 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1863 msw_skip_prob = rev_prob;
1864 if (REG_BR_PROB_BASE <= 65535)
1865 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1866 else
1868 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1869 lsw_taken_prob
1870 = (prob
1871 ? (REG_BR_PROB_BASE
1872 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1873 / ((HOST_WIDEST_INT) prob << 32)))
1874 : 0);
1877 break;
1878 case NE:
1879 if (TARGET_CMPEQDI_T)
1881 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1882 emit_jump_insn (gen_branch_false (operands[3]));
1883 return true;
1885 msw_taken = NE;
1886 msw_taken_prob = prob;
1887 lsw_taken = NE;
1888 lsw_taken_prob = 0;
1889 break;
1890 case GTU: case GT:
1891 msw_taken = comparison;
1892 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1893 break;
1894 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1895 msw_skip = swap_condition (msw_taken);
1896 lsw_taken = GTU;
1897 break;
1898 case GEU: case GE:
1899 if (op2l == CONST0_RTX (SImode))
1900 msw_taken = comparison;
1901 else
1903 msw_taken = comparison == GE ? GT : GTU;
1904 msw_skip = swap_condition (msw_taken);
1905 lsw_taken = GEU;
1907 break;
1908 case LTU: case LT:
1909 msw_taken = comparison;
1910 if (op2l == CONST0_RTX (SImode))
1911 break;
1912 msw_skip = swap_condition (msw_taken);
1913 lsw_taken = LTU;
1914 break;
1915 case LEU: case LE:
1916 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1917 msw_taken = comparison;
1918 else
1920 lsw_taken = LEU;
1921 if (comparison == LE)
1922 msw_taken = LT;
1923 else if (op2h != CONST0_RTX (SImode))
1924 msw_taken = LTU;
1925 else
1926 break;
1927 msw_skip = swap_condition (msw_taken);
1929 break;
1930 default: return false;
1932 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1933 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1934 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1935 if (comparison != EQ && comparison != NE && num_branches > 1)
1937 if (!CONSTANT_P (operands[2])
1938 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1939 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1941 msw_taken_prob = prob / 2U;
1942 msw_skip_prob
1943 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1944 lsw_taken_prob = prob;
1946 else
1948 msw_taken_prob = prob;
1949 msw_skip_prob = REG_BR_PROB_BASE;
1950 /* ??? If we have a constant op2h, should we use that when
1951 calculating lsw_taken_prob? */
1952 lsw_taken_prob = prob;
1955 operands[1] = op1h;
1956 operands[2] = op2h;
1957 operands[4] = NULL_RTX;
1958 if (reload_completed
1959 && ! arith_reg_or_0_operand (op2h, SImode)
1960 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1961 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1962 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1964 emit_move_insn (scratch, operands[2]);
1965 operands[2] = scratch;
1967 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1968 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1969 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1971 rtx taken_label = operands[3];
1973 /* Operands were possibly modified, but msw_skip doesn't expect this.
1974 Always use the original ones. */
1975 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1977 operands[1] = op1h;
1978 operands[2] = op2h;
1981 operands[3] = skip_label = gen_label_rtx ();
1982 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1983 operands[3] = taken_label;
1985 operands[1] = op1l;
1986 operands[2] = op2l;
1987 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
1989 if (reload_completed
1990 && ! arith_reg_or_0_operand (op2l, SImode)
1991 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
1993 emit_move_insn (scratch, operands[2]);
1994 operands[2] = scratch;
1996 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1998 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1999 emit_label (skip_label);
2000 return true;
2003 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2005 static void
2006 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2008 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2010 insn = gen_rtx_PARALLEL (VOIDmode,
2011 gen_rtvec (2, insn,
2012 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2013 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2015 else
2016 emit_insn (insn);
2019 /* Prepare the operands for an scc instruction; make sure that the
2020 compare has been done and the result is in T_REG. */
2021 void
2022 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2024 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2025 enum rtx_code oldcode = code;
2026 enum machine_mode mode;
2028 /* First need a compare insn. */
2029 switch (code)
2031 case NE:
2032 /* It isn't possible to handle this case. */
2033 gcc_unreachable ();
2034 case LT:
2035 code = GT;
2036 break;
2037 case LE:
2038 code = GE;
2039 break;
2040 case LTU:
2041 code = GTU;
2042 break;
2043 case LEU:
2044 code = GEU;
2045 break;
2046 default:
2047 break;
2049 if (code != oldcode)
2051 rtx tmp = op0;
2052 op0 = op1;
2053 op1 = tmp;
2056 mode = GET_MODE (op0);
2057 if (mode == VOIDmode)
2058 mode = GET_MODE (op1);
2060 op0 = force_reg (mode, op0);
2061 if ((code != EQ && code != NE
2062 && (op1 != const0_rtx
2063 || code == GTU || code == GEU || code == LTU || code == LEU))
2064 || (mode == DImode && op1 != const0_rtx)
2065 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2066 op1 = force_reg (mode, op1);
2068 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2069 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2070 mode);
2074 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2075 rtx op0, rtx op1)
2077 rtx target = gen_reg_rtx (SImode);
2078 rtx tmp;
2080 gcc_assert (TARGET_SHMEDIA);
2081 switch (code)
2083 case EQ:
2084 case GT:
2085 case LT:
2086 case UNORDERED:
2087 case GTU:
2088 case LTU:
2089 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2090 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2091 code = NE;
2092 break;
2094 case NE:
2095 case GE:
2096 case LE:
2097 case ORDERED:
2098 case GEU:
2099 case LEU:
2100 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2101 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2102 code = EQ;
2103 break;
2105 case UNEQ:
2106 case UNGE:
2107 case UNGT:
2108 case UNLE:
2109 case UNLT:
2110 case LTGT:
2111 return NULL_RTX;
2113 default:
2114 gcc_unreachable ();
2117 if (mode == DImode)
2119 rtx t2 = gen_reg_rtx (DImode);
2120 emit_insn (gen_extendsidi2 (t2, target));
2121 target = t2;
2124 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2127 /* Called from the md file, set up the operands of a compare instruction. */
2129 void
2130 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2132 enum rtx_code code = GET_CODE (operands[0]);
2133 enum rtx_code branch_code;
2134 rtx op0 = operands[1];
2135 rtx op1 = operands[2];
2136 rtx insn, tem;
2137 bool need_ccmpeq = false;
2139 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2141 op0 = force_reg (mode, op0);
2142 op1 = force_reg (mode, op1);
2144 else
2146 if (code != EQ || mode == DImode)
2148 /* Force args into regs, since we can't use constants here. */
2149 op0 = force_reg (mode, op0);
2150 if (op1 != const0_rtx || code == GTU || code == GEU)
2151 op1 = force_reg (mode, op1);
2155 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2157 if (code == LT
2158 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2159 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2161 tem = op0, op0 = op1, op1 = tem;
2162 code = swap_condition (code);
2165 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2166 if (code == GE)
2168 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2169 need_ccmpeq = true;
2170 code = GT;
2173 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2174 to EQ/GT respectively. */
2175 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2178 switch (code)
2180 case EQ:
2181 case GT:
2182 case GE:
2183 case GTU:
2184 case GEU:
2185 branch_code = code;
2186 break;
2187 case NE:
2188 case LT:
2189 case LE:
2190 case LTU:
2191 case LEU:
2192 branch_code = reverse_condition (code);
2193 break;
2194 default:
2195 gcc_unreachable ();
2198 insn = gen_rtx_SET (VOIDmode,
2199 gen_rtx_REG (SImode, T_REG),
2200 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2202 sh_emit_set_t_insn (insn, mode);
2203 if (need_ccmpeq)
2204 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2206 if (branch_code == code)
2207 emit_jump_insn (gen_branch_true (operands[3]));
2208 else
2209 emit_jump_insn (gen_branch_false (operands[3]));
2212 void
2213 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2215 enum rtx_code code = GET_CODE (operands[1]);
2216 rtx op0 = operands[2];
2217 rtx op1 = operands[3];
2218 rtx lab = NULL_RTX;
2219 bool invert = false;
2220 rtx tem;
2222 op0 = force_reg (mode, op0);
2223 if ((code != EQ && code != NE
2224 && (op1 != const0_rtx
2225 || code == GTU || code == GEU || code == LTU || code == LEU))
2226 || (mode == DImode && op1 != const0_rtx)
2227 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2228 op1 = force_reg (mode, op1);
2230 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2232 if (code == LT || code == LE)
2234 code = swap_condition (code);
2235 tem = op0, op0 = op1, op1 = tem;
2237 if (code == GE)
2239 if (TARGET_IEEE)
2241 lab = gen_label_rtx ();
2242 sh_emit_scc_to_t (EQ, op0, op1);
2243 emit_jump_insn (gen_branch_true (lab));
2244 code = GT;
2246 else
2248 code = LT;
2249 invert = true;
2254 if (code == NE)
2256 code = EQ;
2257 invert = true;
2260 sh_emit_scc_to_t (code, op0, op1);
2261 if (lab)
2262 emit_label (lab);
2263 if (invert)
2264 emit_insn (gen_movnegt (operands[0]));
2265 else
2266 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2269 /* Functions to output assembly code. */
2271 /* Return a sequence of instructions to perform DI or DF move.
2273 Since the SH cannot move a DI or DF in one instruction, we have
2274 to take care when we see overlapping source and dest registers. */
2276 const char *
2277 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2278 enum machine_mode mode)
2280 rtx dst = operands[0];
2281 rtx src = operands[1];
2283 if (MEM_P (dst)
2284 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2285 return "mov.l %T1,%0\n\tmov.l %1,%0";
2287 if (register_operand (dst, mode)
2288 && register_operand (src, mode))
2290 if (REGNO (src) == MACH_REG)
2291 return "sts mach,%S0\n\tsts macl,%R0";
2293 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2294 when mov.d r1,r0 do r1->r0 then r2->r1. */
2296 if (REGNO (src) + 1 == REGNO (dst))
2297 return "mov %T1,%T0\n\tmov %1,%0";
2298 else
2299 return "mov %1,%0\n\tmov %T1,%T0";
2301 else if (CONST_INT_P (src))
2303 if (INTVAL (src) < 0)
2304 output_asm_insn ("mov #-1,%S0", operands);
2305 else
2306 output_asm_insn ("mov #0,%S0", operands);
2308 return "mov %1,%R0";
2310 else if (MEM_P (src))
2312 int ptrreg = -1;
2313 int dreg = REGNO (dst);
2314 rtx inside = XEXP (src, 0);
2316 switch (GET_CODE (inside))
2318 case REG:
2319 ptrreg = REGNO (inside);
2320 break;
2322 case SUBREG:
2323 ptrreg = subreg_regno (inside);
2324 break;
2326 case PLUS:
2327 ptrreg = REGNO (XEXP (inside, 0));
2328 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2329 an offsettable address. Unfortunately, offsettable addresses use
2330 QImode to check the offset, and a QImode offsettable address
2331 requires r0 for the other operand, which is not currently
2332 supported, so we can't use the 'o' constraint.
2333 Thus we must check for and handle r0+REG addresses here.
2334 We punt for now, since this is likely very rare. */
2335 gcc_assert (!REG_P (XEXP (inside, 1)));
2336 break;
2338 case LABEL_REF:
2339 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2340 case POST_INC:
2341 return "mov.l %1,%0\n\tmov.l %1,%T0";
2342 default:
2343 gcc_unreachable ();
2346 /* Work out the safe way to copy. Copy into the second half first. */
2347 if (dreg == ptrreg)
2348 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2351 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2354 /* Print an instruction which would have gone into a delay slot after
2355 another instruction, but couldn't because the other instruction expanded
2356 into a sequence where putting the slot insn at the end wouldn't work. */
2358 static void
2359 print_slot (rtx insn)
2361 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2363 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2366 const char *
2367 output_far_jump (rtx insn, rtx op)
2369 struct { rtx lab, reg, op; } this_jmp;
2370 rtx braf_base_lab = NULL_RTX;
2371 const char *jump;
2372 int far;
2373 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2374 rtx prev;
2376 this_jmp.lab = gen_label_rtx ();
2378 if (TARGET_SH2
2379 && offset >= -32764
2380 && offset - get_attr_length (insn) <= 32766)
2382 far = 0;
2383 jump = "mov.w %O0,%1; braf %1";
2385 else
2387 far = 1;
2388 if (flag_pic)
2390 if (TARGET_SH2)
2391 jump = "mov.l %O0,%1; braf %1";
2392 else
2393 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2395 else
2396 jump = "mov.l %O0,%1; jmp @%1";
2398 /* If we have a scratch register available, use it. */
2399 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2400 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2402 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2403 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2404 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2405 output_asm_insn (jump, &this_jmp.lab);
2406 if (dbr_sequence_length ())
2407 print_slot (final_sequence);
2408 else
2409 output_asm_insn ("nop", 0);
2411 else
2413 /* Output the delay slot insn first if any. */
2414 if (dbr_sequence_length ())
2415 print_slot (final_sequence);
2417 this_jmp.reg = gen_rtx_REG (SImode, 13);
2418 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2419 Fortunately, MACL is fixed and call-clobbered, and we never
2420 need its value across jumps, so save r13 in it instead of in
2421 the stack. */
2422 if (TARGET_SH5)
2423 output_asm_insn ("lds r13, macl", 0);
2424 else
2425 output_asm_insn ("mov.l r13,@-r15", 0);
2426 output_asm_insn (jump, &this_jmp.lab);
2427 if (TARGET_SH5)
2428 output_asm_insn ("sts macl, r13", 0);
2429 else
2430 output_asm_insn ("mov.l @r15+,r13", 0);
2432 if (far && flag_pic && TARGET_SH2)
2434 braf_base_lab = gen_label_rtx ();
2435 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2436 CODE_LABEL_NUMBER (braf_base_lab));
2438 if (far)
2439 output_asm_insn (".align 2", 0);
2440 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2441 this_jmp.op = op;
2442 if (far && flag_pic)
2444 if (TARGET_SH2)
2445 this_jmp.lab = braf_base_lab;
2446 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2448 else
2449 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2450 return "";
2453 /* Local label counter, used for constants in the pool and inside
2454 pattern branches. */
2456 static int lf = 100;
2458 /* Output code for ordinary branches. */
2460 const char *
2461 output_branch (int logic, rtx insn, rtx *operands)
2463 switch (get_attr_length (insn))
2465 case 6:
2466 /* This can happen if filling the delay slot has caused a forward
2467 branch to exceed its range (we could reverse it, but only
2468 when we know we won't overextend other branches; this should
2469 best be handled by relaxation).
2470 It can also happen when other condbranches hoist delay slot insn
2471 from their destination, thus leading to code size increase.
2472 But the branch will still be in the range -4092..+4098 bytes. */
2474 if (! TARGET_RELAX)
2476 int label = lf++;
2477 /* The call to print_slot will clobber the operands. */
2478 rtx op0 = operands[0];
2480 /* If the instruction in the delay slot is annulled (true), then
2481 there is no delay slot where we can put it now. The only safe
2482 place for it is after the label. final will do that by default. */
2484 if (final_sequence
2485 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2486 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2488 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2489 ASSEMBLER_DIALECT ? "/" : ".", label);
2490 print_slot (final_sequence);
2492 else
2493 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2495 output_asm_insn ("bra\t%l0", &op0);
2496 fprintf (asm_out_file, "\tnop\n");
2497 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2499 return "";
2501 /* When relaxing, handle this like a short branch. The linker
2502 will fix it up if it still doesn't fit after relaxation. */
2503 case 2:
2504 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2506 /* These are for SH2e, in which we have to account for the
2507 extra nop because of the hardware bug in annulled branches. */
2508 case 8:
2509 if (! TARGET_RELAX)
2511 int label = lf++;
2513 gcc_assert (!final_sequence
2514 || !(INSN_ANNULLED_BRANCH_P
2515 (XVECEXP (final_sequence, 0, 0))));
2516 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2517 logic ? "f" : "t",
2518 ASSEMBLER_DIALECT ? "/" : ".", label);
2519 fprintf (asm_out_file, "\tnop\n");
2520 output_asm_insn ("bra\t%l0", operands);
2521 fprintf (asm_out_file, "\tnop\n");
2522 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2524 return "";
2526 /* When relaxing, fall through. */
2527 case 4:
2529 char buffer[10];
2531 sprintf (buffer, "b%s%ss\t%%l0",
2532 logic ? "t" : "f",
2533 ASSEMBLER_DIALECT ? "/" : ".");
2534 output_asm_insn (buffer, &operands[0]);
2535 return "nop";
2538 default:
2539 /* There should be no longer branches now - that would
2540 indicate that something has destroyed the branches set
2541 up in machine_dependent_reorg. */
2542 gcc_unreachable ();
2546 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2547 fill in operands 9 as a label to the successor insn.
2548 We try to use jump threading where possible.
2549 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2550 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2551 follow jmp and bt, if the address is in range. */
2552 const char *
2553 output_branchy_insn (enum rtx_code code, const char *templ,
2554 rtx insn, rtx *operands)
2556 rtx next_insn = NEXT_INSN (insn);
2558 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2560 rtx src = SET_SRC (PATTERN (next_insn));
2561 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2563 /* Following branch not taken */
2564 operands[9] = gen_label_rtx ();
2565 emit_label_after (operands[9], next_insn);
2566 INSN_ADDRESSES_NEW (operands[9],
2567 INSN_ADDRESSES (INSN_UID (next_insn))
2568 + get_attr_length (next_insn));
2569 return templ;
2571 else
2573 int offset = (branch_dest (next_insn)
2574 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2575 if (offset >= -252 && offset <= 258)
2577 if (GET_CODE (src) == IF_THEN_ELSE)
2578 /* branch_true */
2579 src = XEXP (src, 1);
2580 operands[9] = src;
2581 return templ;
2585 operands[9] = gen_label_rtx ();
2586 emit_label_after (operands[9], insn);
2587 INSN_ADDRESSES_NEW (operands[9],
2588 INSN_ADDRESSES (INSN_UID (insn))
2589 + get_attr_length (insn));
2590 return templ;
2593 const char *
2594 output_ieee_ccmpeq (rtx insn, rtx *operands)
2596 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2597 insn, operands);
2600 /* Output the start of the assembler file. */
2602 static void
2603 sh_file_start (void)
2605 default_file_start ();
2607 #ifdef SYMBIAN
2608 /* Declare the .directive section before it is used. */
2609 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2610 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2611 #endif
2613 if (TARGET_ELF)
2614 /* We need to show the text section with the proper
2615 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2616 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2617 will complain. We can teach GAS specifically about the
2618 default attributes for our choice of text section, but
2619 then we would have to change GAS again if/when we change
2620 the text section name. */
2621 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2622 else
2623 /* Switch to the data section so that the coffsem symbol
2624 isn't in the text section. */
2625 switch_to_section (data_section);
2627 if (TARGET_LITTLE_ENDIAN)
2628 fputs ("\t.little\n", asm_out_file);
2630 if (!TARGET_ELF)
2632 if (TARGET_SHCOMPACT)
2633 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2634 else if (TARGET_SHMEDIA)
2635 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2636 TARGET_SHMEDIA64 ? 64 : 32);
2640 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2642 static bool
2643 unspec_caller_rtx_p (rtx pat)
2645 rtx base, offset;
2646 int i;
2648 split_const (pat, &base, &offset);
2649 if (GET_CODE (base) == UNSPEC)
2651 if (XINT (base, 1) == UNSPEC_CALLER)
2652 return true;
2653 for (i = 0; i < XVECLEN (base, 0); i++)
2654 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2655 return true;
2657 return false;
2660 /* Indicate that INSN cannot be duplicated. This is true for insn
2661 that generates a unique label. */
2663 static bool
2664 sh_cannot_copy_insn_p (rtx insn)
2666 rtx pat;
2668 if (!reload_completed || !flag_pic)
2669 return false;
2671 if (!NONJUMP_INSN_P (insn))
2672 return false;
2673 if (asm_noperands (insn) >= 0)
2674 return false;
2676 pat = PATTERN (insn);
2677 if (GET_CODE (pat) != SET)
2678 return false;
2679 pat = SET_SRC (pat);
2681 if (unspec_caller_rtx_p (pat))
2682 return true;
2684 return false;
2687 /* Actual number of instructions used to make a shift by N. */
2688 static const char ashiftrt_insns[] =
2689 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2691 /* Left shift and logical right shift are the same. */
2692 static const char shift_insns[] =
2693 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2695 /* Individual shift amounts needed to get the above length sequences.
2696 One bit right shifts clobber the T bit, so when possible, put one bit
2697 shifts in the middle of the sequence, so the ends are eligible for
2698 branch delay slots. */
2699 static const short shift_amounts[32][5] = {
2700 {0}, {1}, {2}, {2, 1},
2701 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2702 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2703 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2704 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2705 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2706 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2707 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2709 /* Likewise, but for shift amounts < 16, up to three highmost bits
2710 might be clobbered. This is typically used when combined with some
2711 kind of sign or zero extension. */
2713 static const char ext_shift_insns[] =
2714 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2716 static const short ext_shift_amounts[32][4] = {
2717 {0}, {1}, {2}, {2, 1},
2718 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2719 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2720 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2721 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2722 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2723 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2724 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2726 /* Assuming we have a value that has been sign-extended by at least one bit,
2727 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2728 to shift it by N without data loss, and quicker than by other means? */
2729 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2731 /* This is used in length attributes in sh.md to help compute the length
2732 of arbitrary constant shift instructions. */
2735 shift_insns_rtx (rtx insn)
2737 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2738 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2739 enum rtx_code shift_code = GET_CODE (set_src);
2741 switch (shift_code)
2743 case ASHIFTRT:
2744 return ashiftrt_insns[shift_count];
2745 case LSHIFTRT:
2746 case ASHIFT:
2747 return shift_insns[shift_count];
2748 default:
2749 gcc_unreachable ();
2753 /* Return the cost of a shift. */
2755 static inline int
2756 shiftcosts (rtx x)
2758 int value;
2760 if (TARGET_SHMEDIA)
2761 return 1;
2763 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2765 if (GET_MODE (x) == DImode
2766 && CONST_INT_P (XEXP (x, 1))
2767 && INTVAL (XEXP (x, 1)) == 1)
2768 return 2;
2770 /* Everything else is invalid, because there is no pattern for it. */
2771 return MAX_COST;
2773 /* If shift by a non constant, then this will be expensive. */
2774 if (!CONST_INT_P (XEXP (x, 1)))
2775 return SH_DYNAMIC_SHIFT_COST;
2777 /* Otherwise, return the true cost in instructions. Cope with out of range
2778 shift counts more or less arbitrarily. */
2779 value = INTVAL (XEXP (x, 1)) & 31;
2781 if (GET_CODE (x) == ASHIFTRT)
2783 int cost = ashiftrt_insns[value];
2784 /* If SH3, then we put the constant in a reg and use shad. */
2785 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2786 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2787 return cost;
2789 else
2790 return shift_insns[value];
2793 /* Return the cost of an AND operation. */
2795 static inline int
2796 andcosts (rtx x)
2798 int i;
2800 /* Anding with a register is a single cycle and instruction. */
2801 if (!CONST_INT_P (XEXP (x, 1)))
2802 return 1;
2804 i = INTVAL (XEXP (x, 1));
2806 if (TARGET_SHMEDIA)
2808 if (satisfies_constraint_I10 (XEXP (x, 1))
2809 || satisfies_constraint_J16 (XEXP (x, 1)))
2810 return 1;
2811 else
2812 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2815 /* These constants are single cycle extu.[bw] instructions. */
2816 if (i == 0xff || i == 0xffff)
2817 return 1;
2818 /* Constants that can be used in an and immediate instruction in a single
2819 cycle, but this requires r0, so make it a little more expensive. */
2820 if (CONST_OK_FOR_K08 (i))
2821 return 2;
2822 /* Constants that can be loaded with a mov immediate and an and.
2823 This case is probably unnecessary. */
2824 if (CONST_OK_FOR_I08 (i))
2825 return 2;
2826 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2827 This case is probably unnecessary. */
2828 return 3;
2831 /* Return the cost of an addition or a subtraction. */
2833 static inline int
2834 addsubcosts (rtx x)
2836 /* Adding a register is a single cycle insn. */
2837 if (REG_P (XEXP (x, 1))
2838 || GET_CODE (XEXP (x, 1)) == SUBREG)
2839 return 1;
2841 /* Likewise for small constants. */
2842 if (CONST_INT_P (XEXP (x, 1))
2843 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2844 return 1;
2846 if (TARGET_SHMEDIA)
2847 switch (GET_CODE (XEXP (x, 1)))
2849 case CONST:
2850 case LABEL_REF:
2851 case SYMBOL_REF:
2852 return TARGET_SHMEDIA64 ? 5 : 3;
2854 case CONST_INT:
2855 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2856 return 2;
2857 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2858 return 3;
2859 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2860 return 4;
2862 /* Fall through. */
2863 default:
2864 return 5;
2867 /* Any other constant requires a 2 cycle pc-relative load plus an
2868 addition. */
2869 return 3;
2872 /* Return the cost of a multiply. */
2873 static inline int
2874 multcosts (rtx x ATTRIBUTE_UNUSED)
2876 if (sh_multcost >= 0)
2877 return sh_multcost;
2878 if (TARGET_SHMEDIA)
2879 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2880 accept constants. Ideally, we would use a cost of one or two and
2881 add the cost of the operand, but disregard the latter when inside loops
2882 and loop invariant code motion is still to follow.
2883 Using a multiply first and splitting it later if it's a loss
2884 doesn't work because of different sign / zero extension semantics
2885 of multiplies vs. shifts. */
2886 return TARGET_SMALLCODE ? 2 : 3;
2888 if (TARGET_SH2)
2890 /* We have a mul insn, so we can never take more than the mul and the
2891 read of the mac reg, but count more because of the latency and extra
2892 reg usage. */
2893 if (TARGET_SMALLCODE)
2894 return 2;
2895 return 3;
2898 /* If we're aiming at small code, then just count the number of
2899 insns in a multiply call sequence. */
2900 if (TARGET_SMALLCODE)
2901 return 5;
2903 /* Otherwise count all the insns in the routine we'd be calling too. */
2904 return 20;
2907 /* Compute a (partial) cost for rtx X. Return true if the complete
2908 cost has been computed, and false if subexpressions should be
2909 scanned. In either case, *TOTAL contains the cost result. */
2911 static bool
2912 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2913 bool speed ATTRIBUTE_UNUSED)
2915 switch (code)
2917 case CONST_INT:
2918 if (TARGET_SHMEDIA)
2920 if (INTVAL (x) == 0)
2921 *total = 0;
2922 else if (outer_code == AND && and_operand ((x), DImode))
2923 *total = 0;
2924 else if ((outer_code == IOR || outer_code == XOR
2925 || outer_code == PLUS)
2926 && CONST_OK_FOR_I10 (INTVAL (x)))
2927 *total = 0;
2928 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2929 *total = COSTS_N_INSNS (outer_code != SET);
2930 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2931 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2932 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2933 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2934 else
2935 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2936 return true;
2938 if (CONST_OK_FOR_I08 (INTVAL (x)))
2939 *total = 0;
2940 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2941 && CONST_OK_FOR_K08 (INTVAL (x)))
2942 *total = 1;
2943 /* prepare_cmp_insn will force costly constants int registers before
2944 the cbranch[sd]i4 patterns can see them, so preserve potentially
2945 interesting ones not covered by I08 above. */
2946 else if (outer_code == COMPARE
2947 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2948 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2949 || INTVAL (x) == 0x7fffffff
2950 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2951 *total = 1;
2952 else
2953 *total = 8;
2954 return true;
2956 case CONST:
2957 case LABEL_REF:
2958 case SYMBOL_REF:
2959 if (TARGET_SHMEDIA64)
2960 *total = COSTS_N_INSNS (4);
2961 else if (TARGET_SHMEDIA32)
2962 *total = COSTS_N_INSNS (2);
2963 else
2964 *total = 5;
2965 return true;
2967 case CONST_DOUBLE:
2968 if (TARGET_SHMEDIA)
2969 *total = COSTS_N_INSNS (4);
2970 /* prepare_cmp_insn will force costly constants int registers before
2971 the cbranchdi4 pattern can see them, so preserve potentially
2972 interesting ones. */
2973 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2974 *total = 1;
2975 else
2976 *total = 10;
2977 return true;
2978 case CONST_VECTOR:
2979 if (x == CONST0_RTX (GET_MODE (x)))
2980 *total = 0;
2981 else if (sh_1el_vec (x, VOIDmode))
2982 *total = outer_code != SET;
2983 if (sh_rep_vec (x, VOIDmode))
2984 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2985 + (outer_code != SET));
2986 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2987 return true;
2989 case PLUS:
2990 case MINUS:
2991 *total = COSTS_N_INSNS (addsubcosts (x));
2992 return true;
2994 case AND:
2995 *total = COSTS_N_INSNS (andcosts (x));
2996 return true;
2998 case MULT:
2999 *total = COSTS_N_INSNS (multcosts (x));
3000 return true;
3002 case ASHIFT:
3003 case ASHIFTRT:
3004 case LSHIFTRT:
3005 *total = COSTS_N_INSNS (shiftcosts (x));
3006 return true;
3008 case DIV:
3009 case UDIV:
3010 case MOD:
3011 case UMOD:
3012 *total = COSTS_N_INSNS (20);
3013 return true;
3015 case PARALLEL:
3016 if (sh_1el_vec (x, VOIDmode))
3017 *total = outer_code != SET;
3018 if (sh_rep_vec (x, VOIDmode))
3019 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3020 + (outer_code != SET));
3021 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3022 return true;
3024 case FLOAT:
3025 case FIX:
3026 *total = 100;
3027 return true;
3029 default:
3030 return false;
3034 /* Compute the cost of an address. For the SH, all valid addresses are
3035 the same cost. Use a slightly higher cost for reg + reg addressing,
3036 since it increases pressure on r0. */
3038 static int
3039 sh_address_cost (rtx X,
3040 bool speed ATTRIBUTE_UNUSED)
3042 return (GET_CODE (X) == PLUS
3043 && ! CONSTANT_P (XEXP (X, 1))
3044 && ! TARGET_SHMEDIA ? 1 : 0);
3047 /* Code to expand a shift. */
3049 void
3050 gen_ashift (int type, int n, rtx reg)
3052 /* Negative values here come from the shift_amounts array. */
3053 if (n < 0)
3055 if (type == ASHIFT)
3056 type = LSHIFTRT;
3057 else
3058 type = ASHIFT;
3059 n = -n;
3062 switch (type)
3064 case ASHIFTRT:
3065 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3066 break;
3067 case LSHIFTRT:
3068 if (n == 1)
3069 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3070 else
3071 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3072 break;
3073 case ASHIFT:
3074 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3075 break;
3079 /* Same for HImode */
3081 void
3082 gen_ashift_hi (int type, int n, rtx reg)
3084 /* Negative values here come from the shift_amounts array. */
3085 if (n < 0)
3087 if (type == ASHIFT)
3088 type = LSHIFTRT;
3089 else
3090 type = ASHIFT;
3091 n = -n;
3094 switch (type)
3096 case ASHIFTRT:
3097 case LSHIFTRT:
3098 /* We don't have HImode right shift operations because using the
3099 ordinary 32 bit shift instructions for that doesn't generate proper
3100 zero/sign extension.
3101 gen_ashift_hi is only called in contexts where we know that the
3102 sign extension works out correctly. */
3104 int offset = 0;
3105 if (GET_CODE (reg) == SUBREG)
3107 offset = SUBREG_BYTE (reg);
3108 reg = SUBREG_REG (reg);
3110 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3111 break;
3113 case ASHIFT:
3114 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3115 break;
3119 /* Output RTL to split a constant shift into its component SH constant
3120 shift instructions. */
3122 void
3123 gen_shifty_op (int code, rtx *operands)
3125 int value = INTVAL (operands[2]);
3126 int max, i;
3128 /* Truncate the shift count in case it is out of bounds. */
3129 value = value & 31;
3131 if (value == 31)
3133 if (code == LSHIFTRT)
3135 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3136 emit_insn (gen_movt (operands[0]));
3137 return;
3139 else if (code == ASHIFT)
3141 /* There is a two instruction sequence for 31 bit left shifts,
3142 but it requires r0. */
3143 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3145 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3146 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3147 return;
3151 else if (value == 0)
3153 /* This can happen even when optimizing, if there were subregs before
3154 reload. Don't output a nop here, as this is never optimized away;
3155 use a no-op move instead. */
3156 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3157 return;
3160 max = shift_insns[value];
3161 for (i = 0; i < max; i++)
3162 gen_ashift (code, shift_amounts[value][i], operands[0]);
3165 /* Same as above, but optimized for values where the topmost bits don't
3166 matter. */
3168 void
3169 gen_shifty_hi_op (int code, rtx *operands)
3171 int value = INTVAL (operands[2]);
3172 int max, i;
3173 void (*gen_fun) (int, int, rtx);
3175 /* This operation is used by and_shl for SImode values with a few
3176 high bits known to be cleared. */
3177 value &= 31;
3178 if (value == 0)
3180 emit_insn (gen_nop ());
3181 return;
3184 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3185 if (code == ASHIFT)
3187 max = ext_shift_insns[value];
3188 for (i = 0; i < max; i++)
3189 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3191 else
3192 /* When shifting right, emit the shifts in reverse order, so that
3193 solitary negative values come first. */
3194 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3195 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3198 /* Output RTL for an arithmetic right shift. */
3200 /* ??? Rewrite to use super-optimizer sequences. */
3203 expand_ashiftrt (rtx *operands)
3205 rtx wrk;
3206 char func[18];
3207 int value;
3209 if (TARGET_SH3)
3211 if (!CONST_INT_P (operands[2]))
3213 rtx count = copy_to_mode_reg (SImode, operands[2]);
3214 emit_insn (gen_negsi2 (count, count));
3215 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3216 return 1;
3218 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3219 > 1 + SH_DYNAMIC_SHIFT_COST)
3221 rtx count
3222 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3223 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3224 return 1;
3227 if (!CONST_INT_P (operands[2]))
3228 return 0;
3230 value = INTVAL (operands[2]) & 31;
3232 if (value == 31)
3234 /* If we are called from abs expansion, arrange things so that we
3235 we can use a single MT instruction that doesn't clobber the source,
3236 if LICM can hoist out the load of the constant zero. */
3237 if (currently_expanding_to_rtl)
3239 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3240 operands[1]));
3241 emit_insn (gen_mov_neg_si_t (operands[0]));
3242 return 1;
3244 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3245 return 1;
3247 else if (value >= 16 && value <= 19)
3249 wrk = gen_reg_rtx (SImode);
3250 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3251 value -= 16;
3252 while (value--)
3253 gen_ashift (ASHIFTRT, 1, wrk);
3254 emit_move_insn (operands[0], wrk);
3255 return 1;
3257 /* Expand a short sequence inline, longer call a magic routine. */
3258 else if (value <= 5)
3260 wrk = gen_reg_rtx (SImode);
3261 emit_move_insn (wrk, operands[1]);
3262 while (value--)
3263 gen_ashift (ASHIFTRT, 1, wrk);
3264 emit_move_insn (operands[0], wrk);
3265 return 1;
3268 wrk = gen_reg_rtx (Pmode);
3270 /* Load the value into an arg reg and call a helper. */
3271 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3272 sprintf (func, "__ashiftrt_r4_%d", value);
3273 function_symbol (wrk, func, SFUNC_STATIC);
3274 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3275 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3276 return 1;
3280 sh_dynamicalize_shift_p (rtx count)
3282 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3285 /* Try to find a good way to implement the combiner pattern
3286 [(set (match_operand:SI 0 "register_operand" "r")
3287 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3288 (match_operand:SI 2 "const_int_operand" "n"))
3289 (match_operand:SI 3 "const_int_operand" "n"))) .
3290 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3291 return 0 for simple right / left or left/right shift combination.
3292 return 1 for a combination of shifts with zero_extend.
3293 return 2 for a combination of shifts with an AND that needs r0.
3294 return 3 for a combination of shifts with an AND that needs an extra
3295 scratch register, when the three highmost bits of the AND mask are clear.
3296 return 4 for a combination of shifts with an AND that needs an extra
3297 scratch register, when any of the three highmost bits of the AND mask
3298 is set.
3299 If ATTRP is set, store an initial right shift width in ATTRP[0],
3300 and the instruction length in ATTRP[1] . These values are not valid
3301 when returning 0.
3302 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3303 shift_amounts for the last shift value that is to be used before the
3304 sign extend. */
3306 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3308 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3309 int left = INTVAL (left_rtx), right;
3310 int best = 0;
3311 int cost, best_cost = 10000;
3312 int best_right = 0, best_len = 0;
3313 int i;
3314 int can_ext;
3316 if (left < 0 || left > 31)
3317 return 0;
3318 if (CONST_INT_P (mask_rtx))
3319 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3320 else
3321 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3322 /* Can this be expressed as a right shift / left shift pair? */
3323 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3324 right = exact_log2 (lsb);
3325 mask2 = ~(mask + lsb - 1);
3326 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3327 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3328 if (! mask2)
3329 best_cost = shift_insns[right] + shift_insns[right + left];
3330 /* mask has no trailing zeroes <==> ! right */
3331 else if (! right && mask2 == ~(lsb2 - 1))
3333 int late_right = exact_log2 (lsb2);
3334 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3336 /* Try to use zero extend. */
3337 if (mask2 == ~(lsb2 - 1))
3339 int width, first;
3341 for (width = 8; width <= 16; width += 8)
3343 /* Can we zero-extend right away? */
3344 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3346 cost
3347 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3348 if (cost < best_cost)
3350 best = 1;
3351 best_cost = cost;
3352 best_right = right;
3353 best_len = cost;
3354 if (attrp)
3355 attrp[2] = -1;
3357 continue;
3359 /* ??? Could try to put zero extend into initial right shift,
3360 or even shift a bit left before the right shift. */
3361 /* Determine value of first part of left shift, to get to the
3362 zero extend cut-off point. */
3363 first = width - exact_log2 (lsb2) + right;
3364 if (first >= 0 && right + left - first >= 0)
3366 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3367 + ext_shift_insns[right + left - first];
3368 if (cost < best_cost)
3370 best = 1;
3371 best_cost = cost;
3372 best_right = right;
3373 best_len = cost;
3374 if (attrp)
3375 attrp[2] = first;
3380 /* Try to use r0 AND pattern */
3381 for (i = 0; i <= 2; i++)
3383 if (i > right)
3384 break;
3385 if (! CONST_OK_FOR_K08 (mask >> i))
3386 continue;
3387 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3388 if (cost < best_cost)
3390 best = 2;
3391 best_cost = cost;
3392 best_right = i;
3393 best_len = cost - 1;
3396 /* Try to use a scratch register to hold the AND operand. */
3397 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3398 for (i = 0; i <= 2; i++)
3400 if (i > right)
3401 break;
3402 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3403 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3404 if (cost < best_cost)
3406 best = 4 - can_ext;
3407 best_cost = cost;
3408 best_right = i;
3409 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3413 if (attrp)
3415 attrp[0] = best_right;
3416 attrp[1] = best_len;
3418 return best;
3421 /* This is used in length attributes of the unnamed instructions
3422 corresponding to shl_and_kind return values of 1 and 2. */
3424 shl_and_length (rtx insn)
3426 rtx set_src, left_rtx, mask_rtx;
3427 int attributes[3];
3429 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3430 left_rtx = XEXP (XEXP (set_src, 0), 1);
3431 mask_rtx = XEXP (set_src, 1);
3432 shl_and_kind (left_rtx, mask_rtx, attributes);
3433 return attributes[1];
3436 /* This is used in length attribute of the and_shl_scratch instruction. */
3439 shl_and_scr_length (rtx insn)
3441 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3442 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3443 rtx op = XEXP (set_src, 0);
3444 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3445 op = XEXP (XEXP (op, 0), 0);
3446 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3449 /* Generate rtl for instructions for which shl_and_kind advised a particular
3450 method of generating them, i.e. returned zero. */
3453 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3455 int attributes[3];
3456 unsigned HOST_WIDE_INT mask;
3457 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3458 int right, total_shift;
3459 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3461 right = attributes[0];
3462 total_shift = INTVAL (left_rtx) + right;
3463 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3464 switch (kind)
3466 default:
3467 return -1;
3468 case 1:
3470 int first = attributes[2];
3471 rtx operands[3];
3473 if (first < 0)
3475 emit_insn ((mask << right) <= 0xff
3476 ? gen_zero_extendqisi2 (dest,
3477 gen_lowpart (QImode, source))
3478 : gen_zero_extendhisi2 (dest,
3479 gen_lowpart (HImode, source)));
3480 source = dest;
3482 if (source != dest)
3483 emit_insn (gen_movsi (dest, source));
3484 operands[0] = dest;
3485 if (right)
3487 operands[2] = GEN_INT (right);
3488 gen_shifty_hi_op (LSHIFTRT, operands);
3490 if (first > 0)
3492 operands[2] = GEN_INT (first);
3493 gen_shifty_hi_op (ASHIFT, operands);
3494 total_shift -= first;
3495 mask <<= first;
3497 if (first >= 0)
3498 emit_insn (mask <= 0xff
3499 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3500 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3501 if (total_shift > 0)
3503 operands[2] = GEN_INT (total_shift);
3504 gen_shifty_hi_op (ASHIFT, operands);
3506 break;
3508 case 4:
3509 shift_gen_fun = gen_shifty_op;
3510 case 3:
3511 /* If the topmost bit that matters is set, set the topmost bits
3512 that don't matter. This way, we might be able to get a shorter
3513 signed constant. */
3514 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3515 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3516 case 2:
3517 /* Don't expand fine-grained when combining, because that will
3518 make the pattern fail. */
3519 if (currently_expanding_to_rtl
3520 || reload_in_progress || reload_completed)
3522 rtx operands[3];
3524 /* Cases 3 and 4 should be handled by this split
3525 only while combining */
3526 gcc_assert (kind <= 2);
3527 if (right)
3529 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3530 source = dest;
3532 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3533 if (total_shift)
3535 operands[0] = dest;
3536 operands[1] = dest;
3537 operands[2] = GEN_INT (total_shift);
3538 shift_gen_fun (ASHIFT, operands);
3540 break;
3542 else
3544 int neg = 0;
3545 if (kind != 4 && total_shift < 16)
3547 neg = -ext_shift_amounts[total_shift][1];
3548 if (neg > 0)
3549 neg -= ext_shift_amounts[total_shift][2];
3550 else
3551 neg = 0;
3553 emit_insn (gen_and_shl_scratch (dest, source,
3554 GEN_INT (right),
3555 GEN_INT (mask),
3556 GEN_INT (total_shift + neg),
3557 GEN_INT (neg)));
3558 emit_insn (gen_movsi (dest, dest));
3559 break;
3562 return 0;
3565 /* Try to find a good way to implement the combiner pattern
3566 [(set (match_operand:SI 0 "register_operand" "=r")
3567 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3568 (match_operand:SI 2 "const_int_operand" "n")
3569 (match_operand:SI 3 "const_int_operand" "n")
3570 (const_int 0)))
3571 (clobber (reg:SI T_REG))]
3572 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3573 return 0 for simple left / right shift combination.
3574 return 1 for left shift / 8 bit sign extend / left shift.
3575 return 2 for left shift / 16 bit sign extend / left shift.
3576 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3577 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3578 return 5 for left shift / 16 bit sign extend / right shift
3579 return 6 for < 8 bit sign extend / left shift.
3580 return 7 for < 8 bit sign extend / left shift / single right shift.
3581 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3584 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3586 int left, size, insize, ext;
3587 int cost = 0, best_cost;
3588 int kind;
3590 left = INTVAL (left_rtx);
3591 size = INTVAL (size_rtx);
3592 insize = size - left;
3593 gcc_assert (insize > 0);
3594 /* Default to left / right shift. */
3595 kind = 0;
3596 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3597 if (size <= 16)
3599 /* 16 bit shift / sign extend / 16 bit shift */
3600 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3601 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3602 below, by alternative 3 or something even better. */
3603 if (cost < best_cost)
3605 kind = 5;
3606 best_cost = cost;
3609 /* Try a plain sign extend between two shifts. */
3610 for (ext = 16; ext >= insize; ext -= 8)
3612 if (ext <= size)
3614 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3615 if (cost < best_cost)
3617 kind = ext / (unsigned) 8;
3618 best_cost = cost;
3621 /* Check if we can do a sloppy shift with a final signed shift
3622 restoring the sign. */
3623 if (EXT_SHIFT_SIGNED (size - ext))
3624 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3625 /* If not, maybe it's still cheaper to do the second shift sloppy,
3626 and do a final sign extend? */
3627 else if (size <= 16)
3628 cost = ext_shift_insns[ext - insize] + 1
3629 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3630 else
3631 continue;
3632 if (cost < best_cost)
3634 kind = ext / (unsigned) 8 + 2;
3635 best_cost = cost;
3638 /* Check if we can sign extend in r0 */
3639 if (insize < 8)
3641 cost = 3 + shift_insns[left];
3642 if (cost < best_cost)
3644 kind = 6;
3645 best_cost = cost;
3647 /* Try the same with a final signed shift. */
3648 if (left < 31)
3650 cost = 3 + ext_shift_insns[left + 1] + 1;
3651 if (cost < best_cost)
3653 kind = 7;
3654 best_cost = cost;
3658 if (TARGET_SH3)
3660 /* Try to use a dynamic shift. */
3661 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3662 if (cost < best_cost)
3664 kind = 0;
3665 best_cost = cost;
3668 if (costp)
3669 *costp = cost;
3670 return kind;
3673 /* Function to be used in the length attribute of the instructions
3674 implementing this pattern. */
3677 shl_sext_length (rtx insn)
3679 rtx set_src, left_rtx, size_rtx;
3680 int cost;
3682 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3683 left_rtx = XEXP (XEXP (set_src, 0), 1);
3684 size_rtx = XEXP (set_src, 1);
3685 shl_sext_kind (left_rtx, size_rtx, &cost);
3686 return cost;
3689 /* Generate rtl for this pattern */
3692 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3694 int kind;
3695 int left, size, insize, cost;
3696 rtx operands[3];
3698 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3699 left = INTVAL (left_rtx);
3700 size = INTVAL (size_rtx);
3701 insize = size - left;
3702 switch (kind)
3704 case 1:
3705 case 2:
3706 case 3:
3707 case 4:
3709 int ext = kind & 1 ? 8 : 16;
3710 int shift2 = size - ext;
3712 /* Don't expand fine-grained when combining, because that will
3713 make the pattern fail. */
3714 if (! currently_expanding_to_rtl
3715 && ! reload_in_progress && ! reload_completed)
3717 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3718 emit_insn (gen_movsi (dest, source));
3719 break;
3721 if (dest != source)
3722 emit_insn (gen_movsi (dest, source));
3723 operands[0] = dest;
3724 if (ext - insize)
3726 operands[2] = GEN_INT (ext - insize);
3727 gen_shifty_hi_op (ASHIFT, operands);
3729 emit_insn (kind & 1
3730 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3731 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3732 if (kind <= 2)
3734 if (shift2)
3736 operands[2] = GEN_INT (shift2);
3737 gen_shifty_op (ASHIFT, operands);
3740 else
3742 if (shift2 > 0)
3744 if (EXT_SHIFT_SIGNED (shift2))
3746 operands[2] = GEN_INT (shift2 + 1);
3747 gen_shifty_op (ASHIFT, operands);
3748 operands[2] = const1_rtx;
3749 gen_shifty_op (ASHIFTRT, operands);
3750 break;
3752 operands[2] = GEN_INT (shift2);
3753 gen_shifty_hi_op (ASHIFT, operands);
3755 else if (shift2)
3757 operands[2] = GEN_INT (-shift2);
3758 gen_shifty_hi_op (LSHIFTRT, operands);
3760 emit_insn (size <= 8
3761 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3762 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3764 break;
3766 case 5:
3768 int i = 16 - size;
3769 if (! currently_expanding_to_rtl
3770 && ! reload_in_progress && ! reload_completed)
3771 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3772 else
3774 operands[0] = dest;
3775 operands[2] = GEN_INT (16 - insize);
3776 gen_shifty_hi_op (ASHIFT, operands);
3777 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3779 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3780 while (--i >= 0)
3781 gen_ashift (ASHIFTRT, 1, dest);
3782 break;
3784 case 6:
3785 case 7:
3786 /* Don't expand fine-grained when combining, because that will
3787 make the pattern fail. */
3788 if (! currently_expanding_to_rtl
3789 && ! reload_in_progress && ! reload_completed)
3791 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3792 emit_insn (gen_movsi (dest, source));
3793 break;
3795 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3796 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3797 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3798 operands[0] = dest;
3799 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3800 gen_shifty_op (ASHIFT, operands);
3801 if (kind == 7)
3802 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3803 break;
3804 default:
3805 return -1;
3807 return 0;
3810 /* Prefix a symbol_ref name with "datalabel". */
3813 gen_datalabel_ref (rtx sym)
3815 const char *str;
3817 if (GET_CODE (sym) == LABEL_REF)
3818 return gen_rtx_CONST (GET_MODE (sym),
3819 gen_rtx_UNSPEC (GET_MODE (sym),
3820 gen_rtvec (1, sym),
3821 UNSPEC_DATALABEL));
3823 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3825 str = XSTR (sym, 0);
3826 /* Share all SYMBOL_REF strings with the same value - that is important
3827 for cse. */
3828 str = IDENTIFIER_POINTER (get_identifier (str));
3829 XSTR (sym, 0) = str;
3831 return sym;
3835 static alloc_pool label_ref_list_pool;
3837 typedef struct label_ref_list_d
3839 rtx label;
3840 struct label_ref_list_d *next;
3841 } *label_ref_list_t;
3843 /* The SH cannot load a large constant into a register, constants have to
3844 come from a pc relative load. The reference of a pc relative load
3845 instruction must be less than 1k in front of the instruction. This
3846 means that we often have to dump a constant inside a function, and
3847 generate code to branch around it.
3849 It is important to minimize this, since the branches will slow things
3850 down and make things bigger.
3852 Worst case code looks like:
3854 mov.l L1,rn
3855 bra L2
3857 align
3858 L1: .long value
3862 mov.l L3,rn
3863 bra L4
3865 align
3866 L3: .long value
3870 We fix this by performing a scan before scheduling, which notices which
3871 instructions need to have their operands fetched from the constant table
3872 and builds the table.
3874 The algorithm is:
3876 scan, find an instruction which needs a pcrel move. Look forward, find the
3877 last barrier which is within MAX_COUNT bytes of the requirement.
3878 If there isn't one, make one. Process all the instructions between
3879 the find and the barrier.
3881 In the above example, we can tell that L3 is within 1k of L1, so
3882 the first move can be shrunk from the 3 insn+constant sequence into
3883 just 1 insn, and the constant moved to L3 to make:
3885 mov.l L1,rn
3887 mov.l L3,rn
3888 bra L4
3890 align
3891 L3:.long value
3892 L4:.long value
3894 Then the second move becomes the target for the shortening process. */
3896 typedef struct
3898 rtx value; /* Value in table. */
3899 rtx label; /* Label of value. */
3900 label_ref_list_t wend; /* End of window. */
3901 enum machine_mode mode; /* Mode of value. */
3903 /* True if this constant is accessed as part of a post-increment
3904 sequence. Note that HImode constants are never accessed in this way. */
3905 bool part_of_sequence_p;
3906 } pool_node;
3908 /* The maximum number of constants that can fit into one pool, since
3909 constants in the range 0..510 are at least 2 bytes long, and in the
3910 range from there to 1018 at least 4 bytes. */
3912 #define MAX_POOL_SIZE 372
3913 static pool_node pool_vector[MAX_POOL_SIZE];
3914 static int pool_size;
3915 static rtx pool_window_label;
3916 static int pool_window_last;
3918 static int max_labelno_before_reorg;
3920 /* ??? If we need a constant in HImode which is the truncated value of a
3921 constant we need in SImode, we could combine the two entries thus saving
3922 two bytes. Is this common enough to be worth the effort of implementing
3923 it? */
3925 /* ??? This stuff should be done at the same time that we shorten branches.
3926 As it is now, we must assume that all branches are the maximum size, and
3927 this causes us to almost always output constant pools sooner than
3928 necessary. */
3930 /* Add a constant to the pool and return its label. */
3932 static rtx
3933 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3935 int i;
3936 rtx lab, new_rtx;
3937 label_ref_list_t ref, newref;
3939 /* First see if we've already got it. */
3940 for (i = 0; i < pool_size; i++)
3942 if (x->code == pool_vector[i].value->code
3943 && mode == pool_vector[i].mode)
3945 if (x->code == CODE_LABEL)
3947 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3948 continue;
3950 if (rtx_equal_p (x, pool_vector[i].value))
3952 lab = new_rtx = 0;
3953 if (! last_value
3954 || ! i
3955 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3957 new_rtx = gen_label_rtx ();
3958 LABEL_REFS (new_rtx) = pool_vector[i].label;
3959 pool_vector[i].label = lab = new_rtx;
3961 if (lab && pool_window_label)
3963 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3964 newref->label = pool_window_label;
3965 ref = pool_vector[pool_window_last].wend;
3966 newref->next = ref;
3967 pool_vector[pool_window_last].wend = newref;
3969 if (new_rtx)
3970 pool_window_label = new_rtx;
3971 pool_window_last = i;
3972 return lab;
3977 /* Need a new one. */
3978 pool_vector[pool_size].value = x;
3979 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3981 lab = 0;
3982 pool_vector[pool_size - 1].part_of_sequence_p = true;
3984 else
3985 lab = gen_label_rtx ();
3986 pool_vector[pool_size].mode = mode;
3987 pool_vector[pool_size].label = lab;
3988 pool_vector[pool_size].wend = NULL;
3989 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3990 if (lab && pool_window_label)
3992 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3993 newref->label = pool_window_label;
3994 ref = pool_vector[pool_window_last].wend;
3995 newref->next = ref;
3996 pool_vector[pool_window_last].wend = newref;
3998 if (lab)
3999 pool_window_label = lab;
4000 pool_window_last = pool_size;
4001 pool_size++;
4002 return lab;
4005 /* Output the literal table. START, if nonzero, is the first instruction
4006 this table is needed for, and also indicates that there is at least one
4007 casesi_worker_2 instruction; We have to emit the operand3 labels from
4008 these insns at a 4-byte aligned position. BARRIER is the barrier
4009 after which we are to place the table. */
4011 static void
4012 dump_table (rtx start, rtx barrier)
4014 rtx scan = barrier;
4015 int i;
4016 int need_align = 1;
4017 rtx lab;
4018 label_ref_list_t ref;
4019 int have_df = 0;
4021 /* Do two passes, first time dump out the HI sized constants. */
4023 for (i = 0; i < pool_size; i++)
4025 pool_node *p = &pool_vector[i];
4027 if (p->mode == HImode)
4029 if (need_align)
4031 scan = emit_insn_after (gen_align_2 (), scan);
4032 need_align = 0;
4034 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4035 scan = emit_label_after (lab, scan);
4036 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4037 scan);
4038 for (ref = p->wend; ref; ref = ref->next)
4040 lab = ref->label;
4041 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4044 else if (p->mode == DFmode)
4045 have_df = 1;
4048 need_align = 1;
4050 if (start)
4052 scan = emit_insn_after (gen_align_4 (), scan);
4053 need_align = 0;
4054 for (; start != barrier; start = NEXT_INSN (start))
4055 if (NONJUMP_INSN_P (start)
4056 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4058 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4059 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4061 scan = emit_label_after (lab, scan);
4064 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4066 rtx align_insn = NULL_RTX;
4068 scan = emit_label_after (gen_label_rtx (), scan);
4069 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4070 need_align = 0;
4072 for (i = 0; i < pool_size; i++)
4074 pool_node *p = &pool_vector[i];
4076 switch (p->mode)
4078 case HImode:
4079 break;
4080 case SImode:
4081 case SFmode:
4082 if (align_insn && !p->part_of_sequence_p)
4084 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4085 emit_label_before (lab, align_insn);
4086 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4087 align_insn);
4088 for (ref = p->wend; ref; ref = ref->next)
4090 lab = ref->label;
4091 emit_insn_before (gen_consttable_window_end (lab),
4092 align_insn);
4094 delete_insn (align_insn);
4095 align_insn = NULL_RTX;
4096 continue;
4098 else
4100 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4101 scan = emit_label_after (lab, scan);
4102 scan = emit_insn_after (gen_consttable_4 (p->value,
4103 const0_rtx), scan);
4104 need_align = ! need_align;
4106 break;
4107 case DFmode:
4108 if (need_align)
4110 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4111 align_insn = scan;
4112 need_align = 0;
4114 case DImode:
4115 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4116 scan = emit_label_after (lab, scan);
4117 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4118 scan);
4119 break;
4120 default:
4121 gcc_unreachable ();
4124 if (p->mode != HImode)
4126 for (ref = p->wend; ref; ref = ref->next)
4128 lab = ref->label;
4129 scan = emit_insn_after (gen_consttable_window_end (lab),
4130 scan);
4135 pool_size = 0;
4138 for (i = 0; i < pool_size; i++)
4140 pool_node *p = &pool_vector[i];
4142 switch (p->mode)
4144 case HImode:
4145 break;
4146 case SImode:
4147 case SFmode:
4148 if (need_align)
4150 need_align = 0;
4151 scan = emit_label_after (gen_label_rtx (), scan);
4152 scan = emit_insn_after (gen_align_4 (), scan);
4154 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4155 scan = emit_label_after (lab, scan);
4156 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4157 scan);
4158 break;
4159 case DFmode:
4160 case DImode:
4161 if (need_align)
4163 need_align = 0;
4164 scan = emit_label_after (gen_label_rtx (), scan);
4165 scan = emit_insn_after (gen_align_4 (), scan);
4167 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4168 scan = emit_label_after (lab, scan);
4169 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4170 scan);
4171 break;
4172 default:
4173 gcc_unreachable ();
4176 if (p->mode != HImode)
4178 for (ref = p->wend; ref; ref = ref->next)
4180 lab = ref->label;
4181 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4186 scan = emit_insn_after (gen_consttable_end (), scan);
4187 scan = emit_barrier_after (scan);
4188 pool_size = 0;
4189 pool_window_label = NULL_RTX;
4190 pool_window_last = 0;
4193 /* Return nonzero if constant would be an ok source for a
4194 mov.w instead of a mov.l. */
4196 static int
4197 hi_const (rtx src)
4199 return (CONST_INT_P (src)
4200 && INTVAL (src) >= -32768
4201 && INTVAL (src) <= 32767);
4204 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4206 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4208 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4209 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4210 need to fix it if the input value is CONST_OK_FOR_I08. */
4212 static int
4213 broken_move (rtx insn)
4215 if (NONJUMP_INSN_P (insn))
4217 rtx pat = PATTERN (insn);
4218 if (GET_CODE (pat) == PARALLEL)
4219 pat = XVECEXP (pat, 0, 0);
4220 if (GET_CODE (pat) == SET
4221 /* We can load any 8-bit value if we don't care what the high
4222 order bits end up as. */
4223 && GET_MODE (SET_DEST (pat)) != QImode
4224 && (CONSTANT_P (SET_SRC (pat))
4225 /* Match mova_const. */
4226 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4227 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4228 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4229 && ! (TARGET_SH2E
4230 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4231 && (fp_zero_operand (SET_SRC (pat))
4232 || fp_one_operand (SET_SRC (pat)))
4233 /* In general we don't know the current setting of fpscr, so disable fldi.
4234 There is an exception if this was a register-register move
4235 before reload - and hence it was ascertained that we have
4236 single precision setting - and in a post-reload optimization
4237 we changed this to do a constant load. In that case
4238 we don't have an r0 clobber, hence we must use fldi. */
4239 && (TARGET_FMOVD
4240 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4241 == SCRATCH))
4242 && REG_P (SET_DEST (pat))
4243 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4244 && ! (TARGET_SH2A
4245 && GET_MODE (SET_DEST (pat)) == SImode
4246 && (satisfies_constraint_I20 (SET_SRC (pat))
4247 || satisfies_constraint_I28 (SET_SRC (pat))))
4248 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4249 return 1;
4252 return 0;
4255 static int
4256 mova_p (rtx insn)
4258 return (NONJUMP_INSN_P (insn)
4259 && GET_CODE (PATTERN (insn)) == SET
4260 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4261 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4262 /* Don't match mova_const. */
4263 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4266 /* Fix up a mova from a switch that went out of range. */
4267 static void
4268 fixup_mova (rtx mova)
4270 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4271 if (! flag_pic)
4273 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4274 INSN_CODE (mova) = -1;
4276 else
4278 rtx worker = mova;
4279 rtx lab = gen_label_rtx ();
4280 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4284 worker = NEXT_INSN (worker);
4285 gcc_assert (worker
4286 && !LABEL_P (worker)
4287 && !JUMP_P (worker));
4288 } while (NOTE_P (worker)
4289 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4290 wpat = PATTERN (worker);
4291 wpat0 = XVECEXP (wpat, 0, 0);
4292 wpat1 = XVECEXP (wpat, 0, 1);
4293 wsrc = SET_SRC (wpat0);
4294 PATTERN (worker) = (gen_casesi_worker_2
4295 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4296 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4297 XEXP (wpat1, 0)));
4298 INSN_CODE (worker) = -1;
4299 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4300 base = gen_rtx_LABEL_REF (Pmode, lab);
4301 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4302 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4303 INSN_CODE (mova) = -1;
4307 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4308 *num_mova, and check if the new mova is not nested within the first one.
4309 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4310 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4311 static int
4312 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4314 int n_addr = 0; /* Initialization to shut up spurious warning. */
4315 int f_target, n_target = 0; /* Likewise. */
4317 if (optimize)
4319 /* If NEW_MOVA has no address yet, it will be handled later. */
4320 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4321 return -1;
4323 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4324 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4325 if (n_addr > n_target || n_addr + 1022 < n_target)
4327 /* Change the mova into a load.
4328 broken_move will then return true for it. */
4329 fixup_mova (new_mova);
4330 return 1;
4333 if (!(*num_mova)++)
4335 *first_mova = new_mova;
4336 return 2;
4338 if (!optimize
4339 || ((f_target
4340 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4341 >= n_target))
4342 return -1;
4344 (*num_mova)--;
4345 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4346 > n_target - n_addr)
4348 fixup_mova (*first_mova);
4349 return 0;
4351 else
4353 fixup_mova (new_mova);
4354 return 1;
4358 /* Find the last barrier from insn FROM which is close enough to hold the
4359 constant pool. If we can't find one, then create one near the end of
4360 the range. */
4362 static rtx
4363 find_barrier (int num_mova, rtx mova, rtx from)
4365 int count_si = 0;
4366 int count_hi = 0;
4367 int found_hi = 0;
4368 int found_si = 0;
4369 int found_di = 0;
4370 int hi_align = 2;
4371 int si_align = 2;
4372 int leading_mova = num_mova;
4373 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4374 int si_limit;
4375 int hi_limit;
4376 rtx orig = from;
4378 /* For HImode: range is 510, add 4 because pc counts from address of
4379 second instruction after this one, subtract 2 for the jump instruction
4380 that we may need to emit before the table, subtract 2 for the instruction
4381 that fills the jump delay slot (in very rare cases, reorg will take an
4382 instruction from after the constant pool or will leave the delay slot
4383 empty). This gives 510.
4384 For SImode: range is 1020, add 4 because pc counts from address of
4385 second instruction after this one, subtract 2 in case pc is 2 byte
4386 aligned, subtract 2 for the jump instruction that we may need to emit
4387 before the table, subtract 2 for the instruction that fills the jump
4388 delay slot. This gives 1018. */
4390 /* The branch will always be shortened now that the reference address for
4391 forward branches is the successor address, thus we need no longer make
4392 adjustments to the [sh]i_limit for -O0. */
4394 si_limit = 1018;
4395 hi_limit = 510;
4397 while (from && count_si < si_limit && count_hi < hi_limit)
4399 int inc = get_attr_length (from);
4400 int new_align = 1;
4402 /* If this is a label that existed at the time of the compute_alignments
4403 call, determine the alignment. N.B. When find_barrier recurses for
4404 an out-of-reach mova, we might see labels at the start of previously
4405 inserted constant tables. */
4406 if (LABEL_P (from)
4407 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4409 if (optimize)
4410 new_align = 1 << label_to_alignment (from);
4411 else if (BARRIER_P (prev_nonnote_insn (from)))
4412 new_align = 1 << barrier_align (from);
4413 else
4414 new_align = 1;
4415 inc = 0;
4417 /* In case we are scanning a constant table because of recursion, check
4418 for explicit alignments. If the table is long, we might be forced
4419 to emit the new table in front of it; the length of the alignment
4420 might be the last straw. */
4421 else if (NONJUMP_INSN_P (from)
4422 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4423 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4424 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4425 /* When we find the end of a constant table, paste the new constant
4426 at the end. That is better than putting it in front because
4427 this way, we don't need extra alignment for adding a 4-byte-aligned
4428 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4429 else if (NONJUMP_INSN_P (from)
4430 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4431 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4432 return from;
4434 if (BARRIER_P (from))
4436 rtx next;
4438 found_barrier = from;
4440 /* If we are at the end of the function, or in front of an alignment
4441 instruction, we need not insert an extra alignment. We prefer
4442 this kind of barrier. */
4443 if (barrier_align (from) > 2)
4444 good_barrier = from;
4446 /* If we are at the end of a hot/cold block, dump the constants
4447 here. */
4448 next = NEXT_INSN (from);
4449 if (next
4450 && NOTE_P (next)
4451 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4452 break;
4455 if (broken_move (from))
4457 rtx pat, src, dst;
4458 enum machine_mode mode;
4460 pat = PATTERN (from);
4461 if (GET_CODE (pat) == PARALLEL)
4462 pat = XVECEXP (pat, 0, 0);
4463 src = SET_SRC (pat);
4464 dst = SET_DEST (pat);
4465 mode = GET_MODE (dst);
4467 /* We must explicitly check the mode, because sometimes the
4468 front end will generate code to load unsigned constants into
4469 HImode targets without properly sign extending them. */
4470 if (mode == HImode
4471 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4473 found_hi += 2;
4474 /* We put the short constants before the long constants, so
4475 we must count the length of short constants in the range
4476 for the long constants. */
4477 /* ??? This isn't optimal, but is easy to do. */
4478 si_limit -= 2;
4480 else
4482 /* We dump DF/DI constants before SF/SI ones, because
4483 the limit is the same, but the alignment requirements
4484 are higher. We may waste up to 4 additional bytes
4485 for alignment, and the DF/DI constant may have
4486 another SF/SI constant placed before it. */
4487 if (TARGET_SHCOMPACT
4488 && ! found_di
4489 && (mode == DFmode || mode == DImode))
4491 found_di = 1;
4492 si_limit -= 8;
4494 while (si_align > 2 && found_si + si_align - 2 > count_si)
4495 si_align >>= 1;
4496 if (found_si > count_si)
4497 count_si = found_si;
4498 found_si += GET_MODE_SIZE (mode);
4499 if (num_mova)
4500 si_limit -= GET_MODE_SIZE (mode);
4504 if (mova_p (from))
4506 switch (untangle_mova (&num_mova, &mova, from))
4508 case 0: return find_barrier (0, 0, mova);
4509 case 2:
4511 leading_mova = 0;
4512 barrier_before_mova
4513 = good_barrier ? good_barrier : found_barrier;
4515 default: break;
4517 if (found_si > count_si)
4518 count_si = found_si;
4520 else if (JUMP_TABLE_DATA_P (from))
4522 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4523 || (num_mova
4524 && (prev_nonnote_insn (from)
4525 == XEXP (MOVA_LABELREF (mova), 0))))
4526 num_mova--;
4527 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4529 /* We have just passed the barrier in front of the
4530 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4531 the ADDR_DIFF_VEC is accessed as data, just like our pool
4532 constants, this is a good opportunity to accommodate what
4533 we have gathered so far.
4534 If we waited any longer, we could end up at a barrier in
4535 front of code, which gives worse cache usage for separated
4536 instruction / data caches. */
4537 good_barrier = found_barrier;
4538 break;
4540 else
4542 rtx body = PATTERN (from);
4543 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4546 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4547 else if (JUMP_P (from)
4548 && ! TARGET_SH2
4549 && ! TARGET_SMALLCODE)
4550 new_align = 4;
4552 if (found_si)
4554 count_si += inc;
4555 if (new_align > si_align)
4557 si_limit -= (count_si - 1) & (new_align - si_align);
4558 si_align = new_align;
4560 count_si = (count_si + new_align - 1) & -new_align;
4562 if (found_hi)
4564 count_hi += inc;
4565 if (new_align > hi_align)
4567 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4568 hi_align = new_align;
4570 count_hi = (count_hi + new_align - 1) & -new_align;
4572 from = NEXT_INSN (from);
4575 if (num_mova)
4577 if (leading_mova)
4579 /* Try as we might, the leading mova is out of range. Change
4580 it into a load (which will become a pcload) and retry. */
4581 fixup_mova (mova);
4582 return find_barrier (0, 0, mova);
4584 else
4586 /* Insert the constant pool table before the mova instruction,
4587 to prevent the mova label reference from going out of range. */
4588 from = mova;
4589 good_barrier = found_barrier = barrier_before_mova;
4593 if (found_barrier)
4595 if (good_barrier && next_real_insn (found_barrier))
4596 found_barrier = good_barrier;
4598 else
4600 /* We didn't find a barrier in time to dump our stuff,
4601 so we'll make one. */
4602 rtx label = gen_label_rtx ();
4604 /* If we exceeded the range, then we must back up over the last
4605 instruction we looked at. Otherwise, we just need to undo the
4606 NEXT_INSN at the end of the loop. */
4607 if (PREV_INSN (from) != orig
4608 && (count_hi > hi_limit || count_si > si_limit))
4609 from = PREV_INSN (PREV_INSN (from));
4610 else
4611 from = PREV_INSN (from);
4613 /* Walk back to be just before any jump or label.
4614 Putting it before a label reduces the number of times the branch
4615 around the constant pool table will be hit. Putting it before
4616 a jump makes it more likely that the bra delay slot will be
4617 filled. */
4618 while (NOTE_P (from) || JUMP_P (from)
4619 || LABEL_P (from))
4620 from = PREV_INSN (from);
4622 from = emit_jump_insn_after (gen_jump (label), from);
4623 JUMP_LABEL (from) = label;
4624 LABEL_NUSES (label) = 1;
4625 found_barrier = emit_barrier_after (from);
4626 emit_label_after (label, found_barrier);
4629 return found_barrier;
4632 /* If the instruction INSN is implemented by a special function, and we can
4633 positively find the register that is used to call the sfunc, and this
4634 register is not used anywhere else in this instruction - except as the
4635 destination of a set, return this register; else, return 0. */
4637 sfunc_uses_reg (rtx insn)
4639 int i;
4640 rtx pattern, part, reg_part, reg;
4642 if (!NONJUMP_INSN_P (insn))
4643 return 0;
4644 pattern = PATTERN (insn);
4645 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4646 return 0;
4648 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4650 part = XVECEXP (pattern, 0, i);
4651 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4652 reg_part = part;
4654 if (! reg_part)
4655 return 0;
4656 reg = XEXP (reg_part, 0);
4657 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4659 part = XVECEXP (pattern, 0, i);
4660 if (part == reg_part || GET_CODE (part) == CLOBBER)
4661 continue;
4662 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4663 && REG_P (SET_DEST (part)))
4664 ? SET_SRC (part) : part)))
4665 return 0;
4667 return reg;
4670 /* See if the only way in which INSN uses REG is by calling it, or by
4671 setting it while calling it. Set *SET to a SET rtx if the register
4672 is set by INSN. */
4674 static int
4675 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4677 rtx pattern, reg2;
4679 *set = NULL_RTX;
4681 reg2 = sfunc_uses_reg (insn);
4682 if (reg2 && REGNO (reg2) == REGNO (reg))
4684 pattern = single_set (insn);
4685 if (pattern
4686 && REG_P (SET_DEST (pattern))
4687 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4688 *set = pattern;
4689 return 0;
4691 if (!CALL_P (insn))
4693 /* We don't use rtx_equal_p because we don't care if the mode is
4694 different. */
4695 pattern = single_set (insn);
4696 if (pattern
4697 && REG_P (SET_DEST (pattern))
4698 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4700 rtx par, part;
4701 int i;
4703 *set = pattern;
4704 par = PATTERN (insn);
4705 if (GET_CODE (par) == PARALLEL)
4706 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4708 part = XVECEXP (par, 0, i);
4709 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4710 return 1;
4712 return reg_mentioned_p (reg, SET_SRC (pattern));
4715 return 1;
4718 pattern = PATTERN (insn);
4720 if (GET_CODE (pattern) == PARALLEL)
4722 int i;
4724 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4725 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4726 return 1;
4727 pattern = XVECEXP (pattern, 0, 0);
4730 if (GET_CODE (pattern) == SET)
4732 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4734 /* We don't use rtx_equal_p, because we don't care if the
4735 mode is different. */
4736 if (!REG_P (SET_DEST (pattern))
4737 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4738 return 1;
4740 *set = pattern;
4743 pattern = SET_SRC (pattern);
4746 if (GET_CODE (pattern) != CALL
4747 || !MEM_P (XEXP (pattern, 0))
4748 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4749 return 1;
4751 return 0;
4754 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4755 general registers. Bits 0..15 mean that the respective registers
4756 are used as inputs in the instruction. Bits 16..31 mean that the
4757 registers 0..15, respectively, are used as outputs, or are clobbered.
4758 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4760 regs_used (rtx x, int is_dest)
4762 enum rtx_code code;
4763 const char *fmt;
4764 int i, used = 0;
4766 if (! x)
4767 return used;
4768 code = GET_CODE (x);
4769 switch (code)
4771 case REG:
4772 if (REGNO (x) < 16)
4773 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4774 << (REGNO (x) + is_dest));
4775 return 0;
4776 case SUBREG:
4778 rtx y = SUBREG_REG (x);
4780 if (!REG_P (y))
4781 break;
4782 if (REGNO (y) < 16)
4783 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4784 << (REGNO (y) +
4785 subreg_regno_offset (REGNO (y),
4786 GET_MODE (y),
4787 SUBREG_BYTE (x),
4788 GET_MODE (x)) + is_dest));
4789 return 0;
4791 case SET:
4792 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4793 case RETURN:
4794 /* If there was a return value, it must have been indicated with USE. */
4795 return 0x00ffff00;
4796 case CLOBBER:
4797 is_dest = 1;
4798 break;
4799 case MEM:
4800 is_dest = 0;
4801 break;
4802 case CALL:
4803 used |= 0x00ff00f0;
4804 break;
4805 default:
4806 break;
4809 fmt = GET_RTX_FORMAT (code);
4811 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4813 if (fmt[i] == 'E')
4815 register int j;
4816 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4817 used |= regs_used (XVECEXP (x, i, j), is_dest);
4819 else if (fmt[i] == 'e')
4820 used |= regs_used (XEXP (x, i), is_dest);
4822 return used;
4825 /* Create an instruction that prevents redirection of a conditional branch
4826 to the destination of the JUMP with address ADDR.
4827 If the branch needs to be implemented as an indirect jump, try to find
4828 a scratch register for it.
4829 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4830 If any preceding insn that doesn't fit into a delay slot is good enough,
4831 pass 1. Pass 2 if a definite blocking insn is needed.
4832 -1 is used internally to avoid deep recursion.
4833 If a blocking instruction is made or recognized, return it. */
4835 static rtx
4836 gen_block_redirect (rtx jump, int addr, int need_block)
4838 int dead = 0;
4839 rtx prev = prev_nonnote_insn (jump);
4840 rtx dest;
4842 /* First, check if we already have an instruction that satisfies our need. */
4843 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4845 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4846 return prev;
4847 if (GET_CODE (PATTERN (prev)) == USE
4848 || GET_CODE (PATTERN (prev)) == CLOBBER
4849 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4850 prev = jump;
4851 else if ((need_block &= ~1) < 0)
4852 return prev;
4853 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4854 need_block = 0;
4856 if (GET_CODE (PATTERN (jump)) == RETURN)
4858 if (! need_block)
4859 return prev;
4860 /* Reorg even does nasty things with return insns that cause branches
4861 to go out of range - see find_end_label and callers. */
4862 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4864 /* We can't use JUMP_LABEL here because it might be undefined
4865 when not optimizing. */
4866 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4867 /* If the branch is out of range, try to find a scratch register for it. */
4868 if (optimize
4869 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4870 > 4092 + 4098))
4872 rtx scan;
4873 /* Don't look for the stack pointer as a scratch register,
4874 it would cause trouble if an interrupt occurred. */
4875 unsigned attempt = 0x7fff, used;
4876 int jump_left = flag_expensive_optimizations + 1;
4878 /* It is likely that the most recent eligible instruction is wanted for
4879 the delay slot. Therefore, find out which registers it uses, and
4880 try to avoid using them. */
4882 for (scan = jump; (scan = PREV_INSN (scan)); )
4884 enum rtx_code code;
4886 if (INSN_DELETED_P (scan))
4887 continue;
4888 code = GET_CODE (scan);
4889 if (code == CODE_LABEL || code == JUMP_INSN)
4890 break;
4891 if (code == INSN
4892 && GET_CODE (PATTERN (scan)) != USE
4893 && GET_CODE (PATTERN (scan)) != CLOBBER
4894 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4896 attempt &= ~regs_used (PATTERN (scan), 0);
4897 break;
4900 for (used = dead = 0, scan = JUMP_LABEL (jump);
4901 (scan = NEXT_INSN (scan)); )
4903 enum rtx_code code;
4905 if (INSN_DELETED_P (scan))
4906 continue;
4907 code = GET_CODE (scan);
4908 if (INSN_P (scan))
4910 used |= regs_used (PATTERN (scan), 0);
4911 if (code == CALL_INSN)
4912 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4913 dead |= (used >> 16) & ~used;
4914 if (dead & attempt)
4916 dead &= attempt;
4917 break;
4919 if (code == JUMP_INSN)
4921 if (jump_left-- && simplejump_p (scan))
4922 scan = JUMP_LABEL (scan);
4923 else
4924 break;
4928 /* Mask out the stack pointer again, in case it was
4929 the only 'free' register we have found. */
4930 dead &= 0x7fff;
4932 /* If the immediate destination is still in range, check for possible
4933 threading with a jump beyond the delay slot insn.
4934 Don't check if we are called recursively; the jump has been or will be
4935 checked in a different invocation then. */
4937 else if (optimize && need_block >= 0)
4939 rtx next = next_active_insn (next_active_insn (dest));
4940 if (next && JUMP_P (next)
4941 && GET_CODE (PATTERN (next)) == SET
4942 && recog_memoized (next) == CODE_FOR_jump_compact)
4944 dest = JUMP_LABEL (next);
4945 if (dest
4946 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4947 > 4092 + 4098))
4948 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4952 if (dead)
4954 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4956 /* It would be nice if we could convert the jump into an indirect
4957 jump / far branch right now, and thus exposing all constituent
4958 instructions to further optimization. However, reorg uses
4959 simplejump_p to determine if there is an unconditional jump where
4960 it should try to schedule instructions from the target of the
4961 branch; simplejump_p fails for indirect jumps even if they have
4962 a JUMP_LABEL. */
4963 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4964 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4965 , jump);
4966 /* ??? We would like this to have the scope of the jump, but that
4967 scope will change when a delay slot insn of an inner scope is added.
4968 Hence, after delay slot scheduling, we'll have to expect
4969 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4970 the jump. */
4972 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4973 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4974 return insn;
4976 else if (need_block)
4977 /* We can't use JUMP_LABEL here because it might be undefined
4978 when not optimizing. */
4979 return emit_insn_before (gen_block_branch_redirect
4980 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4981 , jump);
4982 return prev;
4985 #define CONDJUMP_MIN -252
4986 #define CONDJUMP_MAX 262
4987 struct far_branch
4989 /* A label (to be placed) in front of the jump
4990 that jumps to our ultimate destination. */
4991 rtx near_label;
4992 /* Where we are going to insert it if we cannot move the jump any farther,
4993 or the jump itself if we have picked up an existing jump. */
4994 rtx insert_place;
4995 /* The ultimate destination. */
4996 rtx far_label;
4997 struct far_branch *prev;
4998 /* If the branch has already been created, its address;
4999 else the address of its first prospective user. */
5000 int address;
5003 static void gen_far_branch (struct far_branch *);
5004 enum mdep_reorg_phase_e mdep_reorg_phase;
5005 static void
5006 gen_far_branch (struct far_branch *bp)
5008 rtx insn = bp->insert_place;
5009 rtx jump;
5010 rtx label = gen_label_rtx ();
5011 int ok;
5013 emit_label_after (label, insn);
5014 if (bp->far_label)
5016 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5017 LABEL_NUSES (bp->far_label)++;
5019 else
5020 jump = emit_jump_insn_after (gen_return (), insn);
5021 /* Emit a barrier so that reorg knows that any following instructions
5022 are not reachable via a fall-through path.
5023 But don't do this when not optimizing, since we wouldn't suppress the
5024 alignment for the barrier then, and could end up with out-of-range
5025 pc-relative loads. */
5026 if (optimize)
5027 emit_barrier_after (jump);
5028 emit_label_after (bp->near_label, insn);
5029 JUMP_LABEL (jump) = bp->far_label;
5030 ok = invert_jump (insn, label, 1);
5031 gcc_assert (ok);
5033 /* If we are branching around a jump (rather than a return), prevent
5034 reorg from using an insn from the jump target as the delay slot insn -
5035 when reorg did this, it pessimized code (we rather hide the delay slot)
5036 and it could cause branches to go out of range. */
5037 if (bp->far_label)
5038 (emit_insn_after
5039 (gen_stuff_delay_slot
5040 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
5041 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5042 insn));
5043 /* Prevent reorg from undoing our splits. */
5044 gen_block_redirect (jump, bp->address += 2, 2);
5047 /* Fix up ADDR_DIFF_VECs. */
5048 void
5049 fixup_addr_diff_vecs (rtx first)
5051 rtx insn;
5053 for (insn = first; insn; insn = NEXT_INSN (insn))
5055 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5057 if (!JUMP_P (insn)
5058 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5059 continue;
5060 pat = PATTERN (insn);
5061 vec_lab = XEXP (XEXP (pat, 0), 0);
5063 /* Search the matching casesi_jump_2. */
5064 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5066 if (!JUMP_P (prev))
5067 continue;
5068 prevpat = PATTERN (prev);
5069 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5070 continue;
5071 x = XVECEXP (prevpat, 0, 1);
5072 if (GET_CODE (x) != USE)
5073 continue;
5074 x = XEXP (x, 0);
5075 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5076 break;
5078 /* FIXME: This is a bug in the optimizer, but it seems harmless
5079 to just avoid panicing. */
5080 if (!prev)
5081 continue;
5083 /* Emit the reference label of the braf where it belongs, right after
5084 the casesi_jump_2 (i.e. braf). */
5085 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5086 emit_label_after (braf_label, prev);
5088 /* Fix up the ADDR_DIF_VEC to be relative
5089 to the reference address of the braf. */
5090 XEXP (XEXP (pat, 0), 0) = braf_label;
5094 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5095 a barrier. Return the base 2 logarithm of the desired alignment. */
5097 barrier_align (rtx barrier_or_label)
5099 rtx next = next_real_insn (barrier_or_label), pat, prev;
5100 int slot, credit, jump_to_next = 0;
5102 if (! next)
5103 return 0;
5105 pat = PATTERN (next);
5107 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5108 return 2;
5110 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5111 /* This is a barrier in front of a constant table. */
5112 return 0;
5114 prev = prev_real_insn (barrier_or_label);
5115 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5117 pat = PATTERN (prev);
5118 /* If this is a very small table, we want to keep the alignment after
5119 the table to the minimum for proper code alignment. */
5120 return ((TARGET_SMALLCODE
5121 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5122 <= (unsigned) 1 << (CACHE_LOG - 2)))
5123 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5126 if (TARGET_SMALLCODE)
5127 return 0;
5129 if (! TARGET_SH2 || ! optimize)
5130 return align_jumps_log;
5132 /* When fixing up pcloads, a constant table might be inserted just before
5133 the basic block that ends with the barrier. Thus, we can't trust the
5134 instruction lengths before that. */
5135 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5137 /* Check if there is an immediately preceding branch to the insn beyond
5138 the barrier. We must weight the cost of discarding useful information
5139 from the current cache line when executing this branch and there is
5140 an alignment, against that of fetching unneeded insn in front of the
5141 branch target when there is no alignment. */
5143 /* There are two delay_slot cases to consider. One is the simple case
5144 where the preceding branch is to the insn beyond the barrier (simple
5145 delay slot filling), and the other is where the preceding branch has
5146 a delay slot that is a duplicate of the insn after the barrier
5147 (fill_eager_delay_slots) and the branch is to the insn after the insn
5148 after the barrier. */
5150 /* PREV is presumed to be the JUMP_INSN for the barrier under
5151 investigation. Skip to the insn before it. */
5152 prev = prev_real_insn (prev);
5154 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5155 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5156 prev = prev_real_insn (prev))
5158 jump_to_next = 0;
5159 if (GET_CODE (PATTERN (prev)) == USE
5160 || GET_CODE (PATTERN (prev)) == CLOBBER)
5161 continue;
5162 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5164 prev = XVECEXP (PATTERN (prev), 0, 1);
5165 if (INSN_UID (prev) == INSN_UID (next))
5167 /* Delay slot was filled with insn at jump target. */
5168 jump_to_next = 1;
5169 continue;
5173 if (slot &&
5174 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5175 slot = 0;
5176 credit -= get_attr_length (prev);
5178 if (prev
5179 && JUMP_P (prev)
5180 && JUMP_LABEL (prev))
5182 rtx x;
5183 if (jump_to_next
5184 || next_real_insn (JUMP_LABEL (prev)) == next
5185 /* If relax_delay_slots() decides NEXT was redundant
5186 with some previous instruction, it will have
5187 redirected PREV's jump to the following insn. */
5188 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5189 /* There is no upper bound on redundant instructions
5190 that might have been skipped, but we must not put an
5191 alignment where none had been before. */
5192 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5193 (INSN_P (x)
5194 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5195 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5196 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5198 rtx pat = PATTERN (prev);
5199 if (GET_CODE (pat) == PARALLEL)
5200 pat = XVECEXP (pat, 0, 0);
5201 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5202 return 0;
5207 return align_jumps_log;
5210 /* If we are inside a phony loop, almost any kind of label can turn up as the
5211 first one in the loop. Aligning a braf label causes incorrect switch
5212 destination addresses; we can detect braf labels because they are
5213 followed by a BARRIER.
5214 Applying loop alignment to small constant or switch tables is a waste
5215 of space, so we suppress this too. */
5217 sh_loop_align (rtx label)
5219 rtx next = label;
5222 next = next_nonnote_insn (next);
5223 while (next && LABEL_P (next));
5225 if (! next
5226 || ! INSN_P (next)
5227 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5228 || recog_memoized (next) == CODE_FOR_consttable_2)
5229 return 0;
5231 return align_loops_log;
5234 /* Do a final pass over the function, just before delayed branch
5235 scheduling. */
5237 static void
5238 sh_reorg (void)
5240 rtx first, insn, mova = NULL_RTX;
5241 int num_mova;
5242 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5243 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5245 first = get_insns ();
5246 max_labelno_before_reorg = max_label_num ();
5248 /* We must split call insns before introducing `mova's. If we're
5249 optimizing, they'll have already been split. Otherwise, make
5250 sure we don't split them too late. */
5251 if (! optimize)
5252 split_all_insns_noflow ();
5254 if (TARGET_SHMEDIA)
5255 return;
5257 /* If relaxing, generate pseudo-ops to associate function calls with
5258 the symbols they call. It does no harm to not generate these
5259 pseudo-ops. However, when we can generate them, it enables to
5260 linker to potentially relax the jsr to a bsr, and eliminate the
5261 register load and, possibly, the constant pool entry. */
5263 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5264 if (TARGET_RELAX)
5266 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5267 own purposes. This works because none of the remaining passes
5268 need to look at them.
5270 ??? But it may break in the future. We should use a machine
5271 dependent REG_NOTE, or some other approach entirely. */
5272 for (insn = first; insn; insn = NEXT_INSN (insn))
5274 if (INSN_P (insn))
5276 rtx note;
5278 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5279 NULL_RTX)) != 0)
5280 remove_note (insn, note);
5284 for (insn = first; insn; insn = NEXT_INSN (insn))
5286 rtx pattern, reg, link, set, scan, dies, label;
5287 int rescan = 0, foundinsn = 0;
5289 if (CALL_P (insn))
5291 pattern = PATTERN (insn);
5293 if (GET_CODE (pattern) == PARALLEL)
5294 pattern = XVECEXP (pattern, 0, 0);
5295 if (GET_CODE (pattern) == SET)
5296 pattern = SET_SRC (pattern);
5298 if (GET_CODE (pattern) != CALL
5299 || !MEM_P (XEXP (pattern, 0)))
5300 continue;
5302 reg = XEXP (XEXP (pattern, 0), 0);
5304 else
5306 reg = sfunc_uses_reg (insn);
5307 if (! reg)
5308 continue;
5311 if (!REG_P (reg))
5312 continue;
5314 /* Try scanning backward to find where the register is set. */
5315 link = NULL;
5316 for (scan = PREV_INSN (insn);
5317 scan && !LABEL_P (scan);
5318 scan = PREV_INSN (scan))
5320 if (! INSN_P (scan))
5321 continue;
5323 if (! reg_mentioned_p (reg, scan))
5324 continue;
5326 if (noncall_uses_reg (reg, scan, &set))
5327 break;
5329 if (set)
5331 link = scan;
5332 break;
5336 if (! link)
5337 continue;
5339 /* The register is set at LINK. */
5341 /* We can only optimize the function call if the register is
5342 being set to a symbol. In theory, we could sometimes
5343 optimize calls to a constant location, but the assembler
5344 and linker do not support that at present. */
5345 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5346 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5347 continue;
5349 /* Scan forward from LINK to the place where REG dies, and
5350 make sure that the only insns which use REG are
5351 themselves function calls. */
5353 /* ??? This doesn't work for call targets that were allocated
5354 by reload, since there may not be a REG_DEAD note for the
5355 register. */
5357 dies = NULL_RTX;
5358 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5360 rtx scanset;
5362 /* Don't try to trace forward past a CODE_LABEL if we haven't
5363 seen INSN yet. Ordinarily, we will only find the setting insn
5364 if it is in the same basic block. However,
5365 cross-jumping can insert code labels in between the load and
5366 the call, and can result in situations where a single call
5367 insn may have two targets depending on where we came from. */
5369 if (LABEL_P (scan) && ! foundinsn)
5370 break;
5372 if (! INSN_P (scan))
5373 continue;
5375 /* Don't try to trace forward past a JUMP. To optimize
5376 safely, we would have to check that all the
5377 instructions at the jump destination did not use REG. */
5379 if (JUMP_P (scan))
5380 break;
5382 if (! reg_mentioned_p (reg, scan))
5383 continue;
5385 if (noncall_uses_reg (reg, scan, &scanset))
5386 break;
5388 if (scan == insn)
5389 foundinsn = 1;
5391 if (scan != insn
5392 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5394 /* There is a function call to this register other
5395 than the one we are checking. If we optimize
5396 this call, we need to rescan again below. */
5397 rescan = 1;
5400 /* ??? We shouldn't have to worry about SCANSET here.
5401 We should just be able to check for a REG_DEAD note
5402 on a function call. However, the REG_DEAD notes are
5403 apparently not dependable around libcalls; c-torture
5404 execute/920501-2 is a test case. If SCANSET is set,
5405 then this insn sets the register, so it must have
5406 died earlier. Unfortunately, this will only handle
5407 the cases in which the register is, in fact, set in a
5408 later insn. */
5410 /* ??? We shouldn't have to use FOUNDINSN here.
5411 This dates back to when we used LOG_LINKS to find
5412 the most recent insn which sets the register. */
5414 if (foundinsn
5415 && (scanset
5416 || find_reg_note (scan, REG_DEAD, reg)))
5418 dies = scan;
5419 break;
5423 if (! dies)
5425 /* Either there was a branch, or some insn used REG
5426 other than as a function call address. */
5427 continue;
5430 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5431 on the insn which sets the register, and on each call insn
5432 which uses the register. In final_prescan_insn we look for
5433 the REG_LABEL_OPERAND notes, and output the appropriate label
5434 or pseudo-op. */
5436 label = gen_label_rtx ();
5437 add_reg_note (link, REG_LABEL_OPERAND, label);
5438 add_reg_note (insn, REG_LABEL_OPERAND, label);
5439 if (rescan)
5441 scan = link;
5444 rtx reg2;
5446 scan = NEXT_INSN (scan);
5447 if (scan != insn
5448 && ((CALL_P (scan)
5449 && reg_mentioned_p (reg, scan))
5450 || ((reg2 = sfunc_uses_reg (scan))
5451 && REGNO (reg2) == REGNO (reg))))
5452 add_reg_note (scan, REG_LABEL_OPERAND, label);
5454 while (scan != dies);
5459 if (TARGET_SH2)
5460 fixup_addr_diff_vecs (first);
5462 if (optimize)
5464 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5465 shorten_branches (first);
5468 /* Scan the function looking for move instructions which have to be
5469 changed to pc-relative loads and insert the literal tables. */
5470 label_ref_list_pool = create_alloc_pool ("label references list",
5471 sizeof (struct label_ref_list_d),
5472 30);
5473 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5474 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5476 if (mova_p (insn))
5478 /* ??? basic block reordering can move a switch table dispatch
5479 below the switch table. Check if that has happened.
5480 We only have the addresses available when optimizing; but then,
5481 this check shouldn't be needed when not optimizing. */
5482 if (!untangle_mova (&num_mova, &mova, insn))
5484 insn = mova;
5485 num_mova = 0;
5488 else if (JUMP_P (insn)
5489 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5490 && num_mova
5491 /* ??? loop invariant motion can also move a mova out of a
5492 loop. Since loop does this code motion anyway, maybe we
5493 should wrap UNSPEC_MOVA into a CONST, so that reload can
5494 move it back. */
5495 && ((num_mova > 1
5496 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5497 || (prev_nonnote_insn (insn)
5498 == XEXP (MOVA_LABELREF (mova), 0))))
5500 rtx scan;
5501 int total;
5503 num_mova--;
5505 /* Some code might have been inserted between the mova and
5506 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5507 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5508 total += get_attr_length (scan);
5510 /* range of mova is 1020, add 4 because pc counts from address of
5511 second instruction after this one, subtract 2 in case pc is 2
5512 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5513 cancels out with alignment effects of the mova itself. */
5514 if (total > 1022)
5516 /* Change the mova into a load, and restart scanning
5517 there. broken_move will then return true for mova. */
5518 fixup_mova (mova);
5519 insn = mova;
5522 if (broken_move (insn)
5523 || (NONJUMP_INSN_P (insn)
5524 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5526 rtx scan;
5527 /* Scan ahead looking for a barrier to stick the constant table
5528 behind. */
5529 rtx barrier = find_barrier (num_mova, mova, insn);
5530 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5531 int need_aligned_label = 0;
5533 if (num_mova && ! mova_p (mova))
5535 /* find_barrier had to change the first mova into a
5536 pcload; thus, we have to start with this new pcload. */
5537 insn = mova;
5538 num_mova = 0;
5540 /* Now find all the moves between the points and modify them. */
5541 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5543 if (LABEL_P (scan))
5544 last_float = 0;
5545 if (NONJUMP_INSN_P (scan)
5546 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5547 need_aligned_label = 1;
5548 if (broken_move (scan))
5550 rtx *patp = &PATTERN (scan), pat = *patp;
5551 rtx src, dst;
5552 rtx lab;
5553 rtx newsrc;
5554 enum machine_mode mode;
5556 if (GET_CODE (pat) == PARALLEL)
5557 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5558 src = SET_SRC (pat);
5559 dst = SET_DEST (pat);
5560 mode = GET_MODE (dst);
5562 if (mode == SImode && hi_const (src)
5563 && REGNO (dst) != FPUL_REG)
5565 int offset = 0;
5567 mode = HImode;
5568 while (GET_CODE (dst) == SUBREG)
5570 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5571 GET_MODE (SUBREG_REG (dst)),
5572 SUBREG_BYTE (dst),
5573 GET_MODE (dst));
5574 dst = SUBREG_REG (dst);
5576 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5578 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5580 /* This must be an insn that clobbers r0. */
5581 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5582 XVECLEN (PATTERN (scan), 0)
5583 - 1);
5584 rtx clobber = *clobberp;
5586 gcc_assert (GET_CODE (clobber) == CLOBBER
5587 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5589 if (last_float
5590 && reg_set_between_p (r0_rtx, last_float_move, scan))
5591 last_float = 0;
5592 if (last_float
5593 && TARGET_SHCOMPACT
5594 && GET_MODE_SIZE (mode) != 4
5595 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5596 last_float = 0;
5597 lab = add_constant (src, mode, last_float);
5598 if (lab)
5599 emit_insn_before (gen_mova (lab), scan);
5600 else
5602 /* There will be a REG_UNUSED note for r0 on
5603 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5604 lest reorg:mark_target_live_regs will not
5605 consider r0 to be used, and we end up with delay
5606 slot insn in front of SCAN that clobbers r0. */
5607 rtx note
5608 = find_regno_note (last_float_move, REG_UNUSED, 0);
5610 /* If we are not optimizing, then there may not be
5611 a note. */
5612 if (note)
5613 PUT_REG_NOTE_KIND (note, REG_INC);
5615 *last_float_addr = r0_inc_rtx;
5617 last_float_move = scan;
5618 last_float = src;
5619 newsrc = gen_const_mem (mode,
5620 (((TARGET_SH4 && ! TARGET_FMOVD)
5621 || REGNO (dst) == FPUL_REG)
5622 ? r0_inc_rtx
5623 : r0_rtx));
5624 last_float_addr = &XEXP (newsrc, 0);
5626 /* Remove the clobber of r0. */
5627 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5628 gen_rtx_SCRATCH (Pmode));
5630 /* This is a mova needing a label. Create it. */
5631 else if (GET_CODE (src) == UNSPEC
5632 && XINT (src, 1) == UNSPEC_MOVA
5633 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5635 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5636 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5637 newsrc = gen_rtx_UNSPEC (SImode,
5638 gen_rtvec (1, newsrc),
5639 UNSPEC_MOVA);
5641 else
5643 lab = add_constant (src, mode, 0);
5644 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5645 newsrc = gen_const_mem (mode, newsrc);
5647 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5648 INSN_CODE (scan) = -1;
5651 dump_table (need_aligned_label ? insn : 0, barrier);
5652 insn = barrier;
5655 free_alloc_pool (label_ref_list_pool);
5656 for (insn = first; insn; insn = NEXT_INSN (insn))
5657 PUT_MODE (insn, VOIDmode);
5659 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5660 INSN_ADDRESSES_FREE ();
5661 split_branches (first);
5663 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5664 also has an effect on the register that holds the address of the sfunc.
5665 Insert an extra dummy insn in front of each sfunc that pretends to
5666 use this register. */
5667 if (flag_delayed_branch)
5669 for (insn = first; insn; insn = NEXT_INSN (insn))
5671 rtx reg = sfunc_uses_reg (insn);
5673 if (! reg)
5674 continue;
5675 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5678 #if 0
5679 /* fpscr is not actually a user variable, but we pretend it is for the
5680 sake of the previous optimization passes, since we want it handled like
5681 one. However, we don't have any debugging information for it, so turn
5682 it into a non-user variable now. */
5683 if (TARGET_SH4)
5684 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5685 #endif
5686 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5690 get_dest_uid (rtx label, int max_uid)
5692 rtx dest = next_real_insn (label);
5693 int dest_uid;
5694 if (! dest)
5695 /* This can happen for an undefined label. */
5696 return 0;
5697 dest_uid = INSN_UID (dest);
5698 /* If this is a newly created branch redirection blocking instruction,
5699 we cannot index the branch_uid or insn_addresses arrays with its
5700 uid. But then, we won't need to, because the actual destination is
5701 the following branch. */
5702 while (dest_uid >= max_uid)
5704 dest = NEXT_INSN (dest);
5705 dest_uid = INSN_UID (dest);
5707 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5708 return 0;
5709 return dest_uid;
5712 /* Split condbranches that are out of range. Also add clobbers for
5713 scratch registers that are needed in far jumps.
5714 We do this before delay slot scheduling, so that it can take our
5715 newly created instructions into account. It also allows us to
5716 find branches with common targets more easily. */
5718 static void
5719 split_branches (rtx first)
5721 rtx insn;
5722 struct far_branch **uid_branch, *far_branch_list = 0;
5723 int max_uid = get_max_uid ();
5724 int ok;
5726 /* Find out which branches are out of range. */
5727 shorten_branches (first);
5729 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5730 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5732 for (insn = first; insn; insn = NEXT_INSN (insn))
5733 if (! INSN_P (insn))
5734 continue;
5735 else if (INSN_DELETED_P (insn))
5737 /* Shorten_branches would split this instruction again,
5738 so transform it into a note. */
5739 SET_INSN_DELETED (insn);
5741 else if (JUMP_P (insn)
5742 /* Don't mess with ADDR_DIFF_VEC */
5743 && (GET_CODE (PATTERN (insn)) == SET
5744 || GET_CODE (PATTERN (insn)) == RETURN))
5746 enum attr_type type = get_attr_type (insn);
5747 if (type == TYPE_CBRANCH)
5749 rtx next, beyond;
5751 if (get_attr_length (insn) > 4)
5753 rtx src = SET_SRC (PATTERN (insn));
5754 rtx olabel = XEXP (XEXP (src, 1), 0);
5755 int addr = INSN_ADDRESSES (INSN_UID (insn));
5756 rtx label = 0;
5757 int dest_uid = get_dest_uid (olabel, max_uid);
5758 struct far_branch *bp = uid_branch[dest_uid];
5760 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5761 the label if the LABEL_NUSES count drops to zero. There is
5762 always a jump_optimize pass that sets these values, but it
5763 proceeds to delete unreferenced code, and then if not
5764 optimizing, to un-delete the deleted instructions, thus
5765 leaving labels with too low uses counts. */
5766 if (! optimize)
5768 JUMP_LABEL (insn) = olabel;
5769 LABEL_NUSES (olabel)++;
5771 if (! bp)
5773 bp = (struct far_branch *) alloca (sizeof *bp);
5774 uid_branch[dest_uid] = bp;
5775 bp->prev = far_branch_list;
5776 far_branch_list = bp;
5777 bp->far_label
5778 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5779 LABEL_NUSES (bp->far_label)++;
5781 else
5783 label = bp->near_label;
5784 if (! label && bp->address - addr >= CONDJUMP_MIN)
5786 rtx block = bp->insert_place;
5788 if (GET_CODE (PATTERN (block)) == RETURN)
5789 block = PREV_INSN (block);
5790 else
5791 block = gen_block_redirect (block,
5792 bp->address, 2);
5793 label = emit_label_after (gen_label_rtx (),
5794 PREV_INSN (block));
5795 bp->near_label = label;
5797 else if (label && ! NEXT_INSN (label))
5799 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5800 bp->insert_place = insn;
5801 else
5802 gen_far_branch (bp);
5805 if (! label
5806 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5808 bp->near_label = label = gen_label_rtx ();
5809 bp->insert_place = insn;
5810 bp->address = addr;
5812 ok = redirect_jump (insn, label, 0);
5813 gcc_assert (ok);
5815 else
5817 /* get_attr_length (insn) == 2 */
5818 /* Check if we have a pattern where reorg wants to redirect
5819 the branch to a label from an unconditional branch that
5820 is too far away. */
5821 /* We can't use JUMP_LABEL here because it might be undefined
5822 when not optimizing. */
5823 /* A syntax error might cause beyond to be NULL_RTX. */
5824 beyond
5825 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5826 0));
5828 if (beyond
5829 && (JUMP_P (beyond)
5830 || ((beyond = next_active_insn (beyond))
5831 && JUMP_P (beyond)))
5832 && GET_CODE (PATTERN (beyond)) == SET
5833 && recog_memoized (beyond) == CODE_FOR_jump_compact
5834 && ((INSN_ADDRESSES
5835 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5836 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5837 > 252 + 258 + 2))
5838 gen_block_redirect (beyond,
5839 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5842 next = next_active_insn (insn);
5844 if (next
5845 && (JUMP_P (next)
5846 || ((next = next_active_insn (next))
5847 && JUMP_P (next)))
5848 && GET_CODE (PATTERN (next)) == SET
5849 && recog_memoized (next) == CODE_FOR_jump_compact
5850 && ((INSN_ADDRESSES
5851 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5852 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5853 > 252 + 258 + 2))
5854 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5856 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5858 int addr = INSN_ADDRESSES (INSN_UID (insn));
5859 rtx far_label = 0;
5860 int dest_uid = 0;
5861 struct far_branch *bp;
5863 if (type == TYPE_JUMP)
5865 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5866 dest_uid = get_dest_uid (far_label, max_uid);
5867 if (! dest_uid)
5869 /* Parse errors can lead to labels outside
5870 the insn stream. */
5871 if (! NEXT_INSN (far_label))
5872 continue;
5874 if (! optimize)
5876 JUMP_LABEL (insn) = far_label;
5877 LABEL_NUSES (far_label)++;
5879 redirect_jump (insn, NULL_RTX, 1);
5880 far_label = 0;
5883 bp = uid_branch[dest_uid];
5884 if (! bp)
5886 bp = (struct far_branch *) alloca (sizeof *bp);
5887 uid_branch[dest_uid] = bp;
5888 bp->prev = far_branch_list;
5889 far_branch_list = bp;
5890 bp->near_label = 0;
5891 bp->far_label = far_label;
5892 if (far_label)
5893 LABEL_NUSES (far_label)++;
5895 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5896 if (addr - bp->address <= CONDJUMP_MAX)
5897 emit_label_after (bp->near_label, PREV_INSN (insn));
5898 else
5900 gen_far_branch (bp);
5901 bp->near_label = 0;
5903 else
5904 bp->near_label = 0;
5905 bp->address = addr;
5906 bp->insert_place = insn;
5907 if (! far_label)
5908 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5909 else
5910 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5913 /* Generate all pending far branches,
5914 and free our references to the far labels. */
5915 while (far_branch_list)
5917 if (far_branch_list->near_label
5918 && ! NEXT_INSN (far_branch_list->near_label))
5919 gen_far_branch (far_branch_list);
5920 if (optimize
5921 && far_branch_list->far_label
5922 && ! --LABEL_NUSES (far_branch_list->far_label))
5923 delete_insn (far_branch_list->far_label);
5924 far_branch_list = far_branch_list->prev;
5927 /* Instruction length information is no longer valid due to the new
5928 instructions that have been generated. */
5929 init_insn_lengths ();
5932 /* Dump out instruction addresses, which is useful for debugging the
5933 constant pool table stuff.
5935 If relaxing, output the label and pseudo-ops used to link together
5936 calls and the instruction which set the registers. */
5938 /* ??? The addresses printed by this routine for insns are nonsense for
5939 insns which are inside of a sequence where none of the inner insns have
5940 variable length. This is because the second pass of shorten_branches
5941 does not bother to update them. */
5943 void
5944 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5945 int noperands ATTRIBUTE_UNUSED)
5947 if (TARGET_DUMPISIZE)
5948 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5950 if (TARGET_RELAX)
5952 rtx note;
5954 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5955 if (note)
5957 rtx pattern;
5959 pattern = PATTERN (insn);
5960 if (GET_CODE (pattern) == PARALLEL)
5961 pattern = XVECEXP (pattern, 0, 0);
5962 switch (GET_CODE (pattern))
5964 case SET:
5965 if (GET_CODE (SET_SRC (pattern)) != CALL
5966 && get_attr_type (insn) != TYPE_SFUNC)
5968 targetm.asm_out.internal_label
5969 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5970 break;
5972 /* else FALLTHROUGH */
5973 case CALL:
5974 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5975 CODE_LABEL_NUMBER (XEXP (note, 0)));
5976 break;
5978 default:
5979 gcc_unreachable ();
5985 /* Dump out any constants accumulated in the final pass. These will
5986 only be labels. */
5988 const char *
5989 output_jump_label_table (void)
5991 int i;
5993 if (pool_size)
5995 fprintf (asm_out_file, "\t.align 2\n");
5996 for (i = 0; i < pool_size; i++)
5998 pool_node *p = &pool_vector[i];
6000 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6001 CODE_LABEL_NUMBER (p->label));
6002 output_asm_insn (".long %O0", &p->value);
6004 pool_size = 0;
6007 return "";
6010 /* A full frame looks like:
6012 arg-5
6013 arg-4
6014 [ if current_function_anonymous_args
6015 arg-3
6016 arg-2
6017 arg-1
6018 arg-0 ]
6019 saved-fp
6020 saved-r10
6021 saved-r11
6022 saved-r12
6023 saved-pr
6024 local-n
6026 local-1
6027 local-0 <- fp points here. */
6029 /* Number of bytes pushed for anonymous args, used to pass information
6030 between expand_prologue and expand_epilogue. */
6032 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6033 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6034 for an epilogue and a negative value means that it's for a sibcall
6035 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6036 all the registers that are about to be restored, and hence dead. */
6038 static void
6039 output_stack_adjust (int size, rtx reg, int epilogue_p,
6040 HARD_REG_SET *live_regs_mask, bool frame_p)
6042 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6043 if (size)
6045 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6047 /* This test is bogus, as output_stack_adjust is used to re-align the
6048 stack. */
6049 #if 0
6050 gcc_assert (!(size % align));
6051 #endif
6053 if (CONST_OK_FOR_ADD (size))
6054 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6055 /* Try to do it with two partial adjustments; however, we must make
6056 sure that the stack is properly aligned at all times, in case
6057 an interrupt occurs between the two partial adjustments. */
6058 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6059 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6061 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6062 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6064 else
6066 rtx const_reg;
6067 rtx insn;
6068 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6069 int i;
6071 /* If TEMP is invalid, we could temporarily save a general
6072 register to MACL. However, there is currently no need
6073 to handle this case, so just die when we see it. */
6074 if (epilogue_p < 0
6075 || current_function_interrupt
6076 || ! call_really_used_regs[temp] || fixed_regs[temp])
6077 temp = -1;
6078 if (temp < 0 && ! current_function_interrupt
6079 && (TARGET_SHMEDIA || epilogue_p >= 0))
6081 HARD_REG_SET temps;
6082 COPY_HARD_REG_SET (temps, call_used_reg_set);
6083 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6084 if (epilogue_p > 0)
6086 int nreg = 0;
6087 if (crtl->return_rtx)
6089 enum machine_mode mode;
6090 mode = GET_MODE (crtl->return_rtx);
6091 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6092 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6094 for (i = 0; i < nreg; i++)
6095 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6096 if (crtl->calls_eh_return)
6098 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6099 for (i = 0; i <= 3; i++)
6100 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6103 if (TARGET_SHMEDIA && epilogue_p < 0)
6104 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6105 CLEAR_HARD_REG_BIT (temps, i);
6106 if (epilogue_p <= 0)
6108 for (i = FIRST_PARM_REG;
6109 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6110 CLEAR_HARD_REG_BIT (temps, i);
6111 if (cfun->static_chain_decl != NULL)
6112 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6114 temp = scavenge_reg (&temps);
6116 if (temp < 0 && live_regs_mask)
6118 HARD_REG_SET temps;
6120 COPY_HARD_REG_SET (temps, *live_regs_mask);
6121 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6122 temp = scavenge_reg (&temps);
6124 if (temp < 0)
6126 rtx adj_reg, tmp_reg, mem;
6128 /* If we reached here, the most likely case is the (sibcall)
6129 epilogue for non SHmedia. Put a special push/pop sequence
6130 for such case as the last resort. This looks lengthy but
6131 would not be problem because it seems to be very
6132 rare. */
6134 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6137 /* ??? There is still the slight possibility that r4 or
6138 r5 have been reserved as fixed registers or assigned
6139 as global registers, and they change during an
6140 interrupt. There are possible ways to handle this:
6142 - If we are adjusting the frame pointer (r14), we can do
6143 with a single temp register and an ordinary push / pop
6144 on the stack.
6145 - Grab any call-used or call-saved registers (i.e. not
6146 fixed or globals) for the temps we need. We might
6147 also grab r14 if we are adjusting the stack pointer.
6148 If we can't find enough available registers, issue
6149 a diagnostic and die - the user must have reserved
6150 way too many registers.
6151 But since all this is rather unlikely to happen and
6152 would require extra testing, we just die if r4 / r5
6153 are not available. */
6154 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6155 && !global_regs[4] && !global_regs[5]);
6157 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6158 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6159 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6160 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6161 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6162 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6163 emit_move_insn (mem, tmp_reg);
6164 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6165 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6166 emit_move_insn (mem, tmp_reg);
6167 emit_move_insn (reg, adj_reg);
6168 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6169 emit_move_insn (adj_reg, mem);
6170 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6171 emit_move_insn (tmp_reg, mem);
6172 /* Tell flow the insns that pop r4/r5 aren't dead. */
6173 emit_use (tmp_reg);
6174 emit_use (adj_reg);
6175 return;
6177 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6179 /* If SIZE is negative, subtract the positive value.
6180 This sometimes allows a constant pool entry to be shared
6181 between prologue and epilogue code. */
6182 if (size < 0)
6184 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6185 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6187 else
6189 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6190 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6192 if (! epilogue_p)
6193 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6194 gen_rtx_SET (VOIDmode, reg,
6195 gen_rtx_PLUS (SImode, reg,
6196 GEN_INT (size))));
6201 static rtx
6202 frame_insn (rtx x)
6204 x = emit_insn (x);
6205 RTX_FRAME_RELATED_P (x) = 1;
6206 return x;
6209 /* Output RTL to push register RN onto the stack. */
6211 static rtx
6212 push (int rn)
6214 rtx x;
6215 if (rn == FPUL_REG)
6216 x = gen_push_fpul ();
6217 else if (rn == FPSCR_REG)
6218 x = gen_push_fpscr ();
6219 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6220 && FP_OR_XD_REGISTER_P (rn))
6222 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6223 return NULL_RTX;
6224 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6226 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6227 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6228 else
6229 x = gen_push (gen_rtx_REG (SImode, rn));
6231 x = frame_insn (x);
6232 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6233 return x;
6236 /* Output RTL to pop register RN from the stack. */
6238 static void
6239 pop (int rn)
6241 rtx x;
6242 if (rn == FPUL_REG)
6243 x = gen_pop_fpul ();
6244 else if (rn == FPSCR_REG)
6245 x = gen_pop_fpscr ();
6246 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6247 && FP_OR_XD_REGISTER_P (rn))
6249 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6250 return;
6251 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6253 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6254 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6255 else
6256 x = gen_pop (gen_rtx_REG (SImode, rn));
6258 x = emit_insn (x);
6259 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6262 /* Generate code to push the regs specified in the mask. */
6264 static void
6265 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6267 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6268 int skip_fpscr = 0;
6270 /* Push PR last; this gives better latencies after the prologue, and
6271 candidates for the return delay slot when there are no general
6272 registers pushed. */
6273 for (; i < FIRST_PSEUDO_REGISTER; i++)
6275 /* If this is an interrupt handler, and the SZ bit varies,
6276 and we have to push any floating point register, we need
6277 to switch to the correct precision first. */
6278 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6279 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6281 HARD_REG_SET unsaved;
6283 push (FPSCR_REG);
6284 COMPL_HARD_REG_SET (unsaved, *mask);
6285 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6286 skip_fpscr = 1;
6288 if (i != PR_REG
6289 && (i != FPSCR_REG || ! skip_fpscr)
6290 && TEST_HARD_REG_BIT (*mask, i))
6292 /* If the ISR has RESBANK attribute assigned, don't push any of
6293 the following registers - R0-R14, MACH, MACL and GBR. */
6294 if (! (sh_cfun_resbank_handler_p ()
6295 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6296 || i == MACH_REG
6297 || i == MACL_REG
6298 || i == GBR_REG)))
6299 push (i);
6303 /* Push banked registers last to improve delay slot opportunities. */
6304 if (interrupt_handler)
6305 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6306 if (TEST_HARD_REG_BIT (*mask, i))
6307 push (i);
6309 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6310 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6311 push (PR_REG);
6314 /* Calculate how much extra space is needed to save all callee-saved
6315 target registers.
6316 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6318 static int
6319 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6321 int reg;
6322 int stack_space = 0;
6323 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6325 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6326 if ((! call_really_used_regs[reg] || interrupt_handler)
6327 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6328 /* Leave space to save this target register on the stack,
6329 in case target register allocation wants to use it. */
6330 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6331 return stack_space;
6334 /* Decide whether we should reserve space for callee-save target registers,
6335 in case target register allocation wants to use them. REGS_SAVED is
6336 the space, in bytes, that is already required for register saves.
6337 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6339 static int
6340 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6341 HARD_REG_SET *live_regs_mask)
6343 if (optimize_size)
6344 return 0;
6345 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6348 /* Decide how much space to reserve for callee-save target registers
6349 in case target register allocation wants to use them.
6350 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6352 static int
6353 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6355 if (shmedia_space_reserved_for_target_registers)
6356 return shmedia_target_regs_stack_space (live_regs_mask);
6357 else
6358 return 0;
6361 /* Work out the registers which need to be saved, both as a mask and a
6362 count of saved words. Return the count.
6364 If doing a pragma interrupt function, then push all regs used by the
6365 function, and if we call another function (we can tell by looking at PR),
6366 make sure that all the regs it clobbers are safe too. */
6368 static int
6369 calc_live_regs (HARD_REG_SET *live_regs_mask)
6371 unsigned int reg;
6372 int count;
6373 tree attrs;
6374 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6375 bool nosave_low_regs;
6376 int pr_live, has_call;
6378 attrs = DECL_ATTRIBUTES (current_function_decl);
6379 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6380 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6381 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6382 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6384 CLEAR_HARD_REG_SET (*live_regs_mask);
6385 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6386 && df_regs_ever_live_p (FPSCR_REG))
6387 target_flags &= ~MASK_FPU_SINGLE;
6388 /* If we can save a lot of saves by switching to double mode, do that. */
6389 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6390 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6391 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6392 && (! call_really_used_regs[reg]
6393 || interrupt_handler)
6394 && ++count > 2)
6396 target_flags &= ~MASK_FPU_SINGLE;
6397 break;
6399 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6400 knows how to use it. That means the pseudo originally allocated for
6401 the initial value can become the PR_MEDIA_REG hard register, as seen for
6402 execute/20010122-1.c:test9. */
6403 if (TARGET_SHMEDIA)
6404 /* ??? this function is called from initial_elimination_offset, hence we
6405 can't use the result of sh_media_register_for_return here. */
6406 pr_live = sh_pr_n_sets ();
6407 else
6409 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6410 pr_live = (pr_initial
6411 ? (!REG_P (pr_initial)
6412 || REGNO (pr_initial) != (PR_REG))
6413 : df_regs_ever_live_p (PR_REG));
6414 /* For Shcompact, if not optimizing, we end up with a memory reference
6415 using the return address pointer for __builtin_return_address even
6416 though there is no actual need to put the PR register on the stack. */
6417 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6419 /* Force PR to be live if the prologue has to call the SHmedia
6420 argument decoder or register saver. */
6421 if (TARGET_SHCOMPACT
6422 && ((crtl->args.info.call_cookie
6423 & ~ CALL_COOKIE_RET_TRAMP (1))
6424 || crtl->saves_all_registers))
6425 pr_live = 1;
6426 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6427 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6429 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6430 ? pr_live
6431 : interrupt_handler
6432 ? (/* Need to save all the regs ever live. */
6433 (df_regs_ever_live_p (reg)
6434 || (call_really_used_regs[reg]
6435 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6436 || reg == PIC_OFFSET_TABLE_REGNUM)
6437 && has_call)
6438 || (TARGET_SHMEDIA && has_call
6439 && REGISTER_NATURAL_MODE (reg) == SImode
6440 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6441 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6442 && reg != RETURN_ADDRESS_POINTER_REGNUM
6443 && reg != T_REG && reg != GBR_REG
6444 /* Push fpscr only on targets which have FPU */
6445 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6446 : (/* Only push those regs which are used and need to be saved. */
6447 (TARGET_SHCOMPACT
6448 && flag_pic
6449 && crtl->args.info.call_cookie
6450 && reg == PIC_OFFSET_TABLE_REGNUM)
6451 || (df_regs_ever_live_p (reg)
6452 && ((!call_really_used_regs[reg]
6453 && !(reg != PIC_OFFSET_TABLE_REGNUM
6454 && fixed_regs[reg] && call_used_regs[reg]))
6455 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6456 || (crtl->calls_eh_return
6457 && (reg == EH_RETURN_DATA_REGNO (0)
6458 || reg == EH_RETURN_DATA_REGNO (1)
6459 || reg == EH_RETURN_DATA_REGNO (2)
6460 || reg == EH_RETURN_DATA_REGNO (3)))
6461 || ((reg == MACL_REG || reg == MACH_REG)
6462 && df_regs_ever_live_p (reg)
6463 && sh_cfun_attr_renesas_p ())
6466 SET_HARD_REG_BIT (*live_regs_mask, reg);
6467 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6469 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6470 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6472 if (FP_REGISTER_P (reg))
6474 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6476 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6477 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6480 else if (XD_REGISTER_P (reg))
6482 /* Must switch to double mode to access these registers. */
6483 target_flags &= ~MASK_FPU_SINGLE;
6487 if (nosave_low_regs && reg == R8_REG)
6488 break;
6490 /* If we have a target register optimization pass after prologue / epilogue
6491 threading, we need to assume all target registers will be live even if
6492 they aren't now. */
6493 if (flag_branch_target_load_optimize2
6494 && TARGET_SAVE_ALL_TARGET_REGS
6495 && shmedia_space_reserved_for_target_registers)
6496 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6497 if ((! call_really_used_regs[reg] || interrupt_handler)
6498 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6500 SET_HARD_REG_BIT (*live_regs_mask, reg);
6501 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6503 /* If this is an interrupt handler, we don't have any call-clobbered
6504 registers we can conveniently use for target register save/restore.
6505 Make sure we save at least one general purpose register when we need
6506 to save target registers. */
6507 if (interrupt_handler
6508 && hard_reg_set_intersect_p (*live_regs_mask,
6509 reg_class_contents[TARGET_REGS])
6510 && ! hard_reg_set_intersect_p (*live_regs_mask,
6511 reg_class_contents[GENERAL_REGS]))
6513 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6514 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6517 return count;
6520 /* Code to generate prologue and epilogue sequences */
6522 /* PUSHED is the number of bytes that are being pushed on the
6523 stack for register saves. Return the frame size, padded
6524 appropriately so that the stack stays properly aligned. */
6525 static HOST_WIDE_INT
6526 rounded_frame_size (int pushed)
6528 HOST_WIDE_INT size = get_frame_size ();
6529 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6531 return ((size + pushed + align - 1) & -align) - pushed;
6534 /* Choose a call-clobbered target-branch register that remains
6535 unchanged along the whole function. We set it up as the return
6536 value in the prologue. */
6538 sh_media_register_for_return (void)
6540 int regno;
6541 int tr0_used;
6543 if (! current_function_is_leaf)
6544 return -1;
6545 if (lookup_attribute ("interrupt_handler",
6546 DECL_ATTRIBUTES (current_function_decl)))
6547 return -1;
6548 if (sh_cfun_interrupt_handler_p ())
6549 return -1;
6551 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6553 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6554 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6555 return regno;
6557 return -1;
6560 /* The maximum registers we need to save are:
6561 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6562 - 32 floating point registers (for each pair, we save none,
6563 one single precision value, or a double precision value).
6564 - 8 target registers
6565 - add 1 entry for a delimiter. */
6566 #define MAX_SAVED_REGS (62+32+8)
6568 typedef struct save_entry_s
6570 unsigned char reg;
6571 unsigned char mode;
6572 short offset;
6573 } save_entry;
6575 #define MAX_TEMPS 4
6577 /* There will be a delimiter entry with VOIDmode both at the start and the
6578 end of a filled in schedule. The end delimiter has the offset of the
6579 save with the smallest (i.e. most negative) offset. */
6580 typedef struct save_schedule_s
6582 save_entry entries[MAX_SAVED_REGS + 2];
6583 int temps[MAX_TEMPS+1];
6584 } save_schedule;
6586 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6587 use reverse order. Returns the last entry written to (not counting
6588 the delimiter). OFFSET_BASE is a number to be added to all offset
6589 entries. */
6591 static save_entry *
6592 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6593 int offset_base)
6595 int align, i;
6596 save_entry *entry = schedule->entries;
6597 int tmpx = 0;
6598 int offset;
6600 if (! current_function_interrupt)
6601 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6602 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6603 && ! FUNCTION_ARG_REGNO_P (i)
6604 && i != FIRST_RET_REG
6605 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6606 && ! (crtl->calls_eh_return
6607 && (i == EH_RETURN_STACKADJ_REGNO
6608 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6609 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6610 schedule->temps[tmpx++] = i;
6611 entry->reg = -1;
6612 entry->mode = VOIDmode;
6613 entry->offset = offset_base;
6614 entry++;
6615 /* We loop twice: first, we save 8-byte aligned registers in the
6616 higher addresses, that are known to be aligned. Then, we
6617 proceed to saving 32-bit registers that don't need 8-byte
6618 alignment.
6619 If this is an interrupt function, all registers that need saving
6620 need to be saved in full. moreover, we need to postpone saving
6621 target registers till we have saved some general purpose registers
6622 we can then use as scratch registers. */
6623 offset = offset_base;
6624 for (align = 1; align >= 0; align--)
6626 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6627 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6629 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6630 int reg = i;
6632 if (current_function_interrupt)
6634 if (TARGET_REGISTER_P (i))
6635 continue;
6636 if (GENERAL_REGISTER_P (i))
6637 mode = DImode;
6639 if (mode == SFmode && (i % 2) == 1
6640 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6641 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6643 mode = DFmode;
6644 i--;
6645 reg--;
6648 /* If we're doing the aligned pass and this is not aligned,
6649 or we're doing the unaligned pass and this is aligned,
6650 skip it. */
6651 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6652 != align)
6653 continue;
6655 if (current_function_interrupt
6656 && GENERAL_REGISTER_P (i)
6657 && tmpx < MAX_TEMPS)
6658 schedule->temps[tmpx++] = i;
6660 offset -= GET_MODE_SIZE (mode);
6661 entry->reg = i;
6662 entry->mode = mode;
6663 entry->offset = offset;
6664 entry++;
6666 if (align && current_function_interrupt)
6667 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6668 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6670 offset -= GET_MODE_SIZE (DImode);
6671 entry->reg = i;
6672 entry->mode = DImode;
6673 entry->offset = offset;
6674 entry++;
6677 entry->reg = -1;
6678 entry->mode = VOIDmode;
6679 entry->offset = offset;
6680 schedule->temps[tmpx] = -1;
6681 return entry - 1;
6684 void
6685 sh_expand_prologue (void)
6687 HARD_REG_SET live_regs_mask;
6688 int d, i;
6689 int d_rounding = 0;
6690 int save_flags = target_flags;
6691 int pretend_args;
6692 tree sp_switch_attr
6693 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6695 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6697 /* We have pretend args if we had an object sent partially in registers
6698 and partially on the stack, e.g. a large structure. */
6699 pretend_args = crtl->args.pretend_args_size;
6700 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6701 && (NPARM_REGS(SImode)
6702 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6703 pretend_args = 0;
6704 /* Dwarf2 module doesn't expect frame related insns here. */
6705 output_stack_adjust (-pretend_args
6706 - crtl->args.info.stack_regs * 8,
6707 stack_pointer_rtx, 0, NULL, false);
6709 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6710 /* We're going to use the PIC register to load the address of the
6711 incoming-argument decoder and/or of the return trampoline from
6712 the GOT, so make sure the PIC register is preserved and
6713 initialized. */
6714 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6716 if (TARGET_SHCOMPACT
6717 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6719 int reg;
6721 /* First, make all registers with incoming arguments that will
6722 be pushed onto the stack live, so that register renaming
6723 doesn't overwrite them. */
6724 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6725 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6726 >= NPARM_REGS (SImode) - reg)
6727 for (; reg < NPARM_REGS (SImode); reg++)
6728 emit_insn (gen_shcompact_preserve_incoming_args
6729 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6730 else if (CALL_COOKIE_INT_REG_GET
6731 (crtl->args.info.call_cookie, reg) == 1)
6732 emit_insn (gen_shcompact_preserve_incoming_args
6733 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6735 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6736 stack_pointer_rtx);
6737 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6738 GEN_INT (crtl->args.info.call_cookie));
6739 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6740 gen_rtx_REG (SImode, R0_REG));
6742 else if (TARGET_SHMEDIA)
6744 int tr = sh_media_register_for_return ();
6746 if (tr >= 0)
6747 emit_move_insn (gen_rtx_REG (DImode, tr),
6748 gen_rtx_REG (DImode, PR_MEDIA_REG));
6751 /* Emit the code for SETUP_VARARGS. */
6752 if (cfun->stdarg)
6754 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6756 /* Push arg regs as if they'd been provided by caller in stack. */
6757 for (i = 0; i < NPARM_REGS(SImode); i++)
6759 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6760 rtx insn;
6762 if (i >= (NPARM_REGS(SImode)
6763 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6765 break;
6766 insn = push (rn);
6771 /* If we're supposed to switch stacks at function entry, do so now. */
6772 if (sp_switch_attr)
6774 rtx lab, newsrc;
6775 /* The argument specifies a variable holding the address of the
6776 stack the interrupt function should switch to/from at entry/exit. */
6777 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6778 const char *s
6779 = ggc_strdup (TREE_STRING_POINTER (arg));
6780 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6782 lab = add_constant (sp_switch, SImode, 0);
6783 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6784 newsrc = gen_const_mem (SImode, newsrc);
6786 emit_insn (gen_sp_switch_1 (newsrc));
6789 d = calc_live_regs (&live_regs_mask);
6790 /* ??? Maybe we could save some switching if we can move a mode switch
6791 that already happens to be at the function start into the prologue. */
6792 if (target_flags != save_flags && ! current_function_interrupt)
6793 emit_insn (gen_toggle_sz ());
6795 if (TARGET_SH5)
6797 int offset_base, offset;
6798 rtx r0 = NULL_RTX;
6799 int offset_in_r0 = -1;
6800 int sp_in_r0 = 0;
6801 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6802 int total_size, save_size;
6803 save_schedule schedule;
6804 save_entry *entry;
6805 int *tmp_pnt;
6807 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6808 && ! current_function_interrupt)
6809 r0 = gen_rtx_REG (Pmode, R0_REG);
6811 /* D is the actual number of bytes that we need for saving registers,
6812 however, in initial_elimination_offset we have committed to using
6813 an additional TREGS_SPACE amount of bytes - in order to keep both
6814 addresses to arguments supplied by the caller and local variables
6815 valid, we must keep this gap. Place it between the incoming
6816 arguments and the actually saved registers in a bid to optimize
6817 locality of reference. */
6818 total_size = d + tregs_space;
6819 total_size += rounded_frame_size (total_size);
6820 save_size = total_size - rounded_frame_size (d);
6821 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6822 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6823 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6825 /* If adjusting the stack in a single step costs nothing extra, do so.
6826 I.e. either if a single addi is enough, or we need a movi anyway,
6827 and we don't exceed the maximum offset range (the test for the
6828 latter is conservative for simplicity). */
6829 if (TARGET_SHMEDIA
6830 && (CONST_OK_FOR_I10 (-total_size)
6831 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6832 && total_size <= 2044)))
6833 d_rounding = total_size - save_size;
6835 offset_base = d + d_rounding;
6837 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6838 0, NULL, true);
6840 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6841 tmp_pnt = schedule.temps;
6842 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6844 enum machine_mode mode = (enum machine_mode) entry->mode;
6845 unsigned int reg = entry->reg;
6846 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6847 rtx orig_reg_rtx;
6849 offset = entry->offset;
6851 reg_rtx = gen_rtx_REG (mode, reg);
6853 mem_rtx = gen_frame_mem (mode,
6854 gen_rtx_PLUS (Pmode,
6855 stack_pointer_rtx,
6856 GEN_INT (offset)));
6858 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6860 gcc_assert (r0);
6861 mem_rtx = NULL_RTX;
6864 if (HAVE_PRE_DECREMENT
6865 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6866 || mem_rtx == NULL_RTX
6867 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6869 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6871 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6872 pre_dec = NULL_RTX;
6873 else
6875 mem_rtx = NULL_RTX;
6876 offset += GET_MODE_SIZE (mode);
6880 if (mem_rtx != NULL_RTX)
6881 goto addr_ok;
6883 if (offset_in_r0 == -1)
6885 emit_move_insn (r0, GEN_INT (offset));
6886 offset_in_r0 = offset;
6888 else if (offset != offset_in_r0)
6890 emit_move_insn (r0,
6891 gen_rtx_PLUS
6892 (Pmode, r0,
6893 GEN_INT (offset - offset_in_r0)));
6894 offset_in_r0 += offset - offset_in_r0;
6897 if (pre_dec != NULL_RTX)
6899 if (! sp_in_r0)
6901 emit_move_insn (r0,
6902 gen_rtx_PLUS
6903 (Pmode, r0, stack_pointer_rtx));
6904 sp_in_r0 = 1;
6907 offset -= GET_MODE_SIZE (mode);
6908 offset_in_r0 -= GET_MODE_SIZE (mode);
6910 mem_rtx = pre_dec;
6912 else if (sp_in_r0)
6913 mem_rtx = gen_frame_mem (mode, r0);
6914 else
6915 mem_rtx = gen_frame_mem (mode,
6916 gen_rtx_PLUS (Pmode,
6917 stack_pointer_rtx,
6918 r0));
6920 /* We must not use an r0-based address for target-branch
6921 registers or for special registers without pre-dec
6922 memory addresses, since we store their values in r0
6923 first. */
6924 gcc_assert (!TARGET_REGISTER_P (reg)
6925 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6926 || mem_rtx == pre_dec));
6928 addr_ok:
6929 orig_reg_rtx = reg_rtx;
6930 if (TARGET_REGISTER_P (reg)
6931 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6932 && mem_rtx != pre_dec))
6934 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6936 emit_move_insn (tmp_reg, reg_rtx);
6938 if (REGNO (tmp_reg) == R0_REG)
6940 offset_in_r0 = -1;
6941 sp_in_r0 = 0;
6942 gcc_assert (!refers_to_regno_p
6943 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6946 if (*++tmp_pnt <= 0)
6947 tmp_pnt = schedule.temps;
6949 reg_rtx = tmp_reg;
6952 rtx insn;
6954 /* Mark as interesting for dwarf cfi generator */
6955 insn = emit_move_insn (mem_rtx, reg_rtx);
6956 RTX_FRAME_RELATED_P (insn) = 1;
6957 /* If we use an intermediate register for the save, we can't
6958 describe this exactly in cfi as a copy of the to-be-saved
6959 register into the temporary register and then the temporary
6960 register on the stack, because the temporary register can
6961 have a different natural size than the to-be-saved register.
6962 Thus, we gloss over the intermediate copy and pretend we do
6963 a direct save from the to-be-saved register. */
6964 if (REGNO (reg_rtx) != reg)
6966 rtx set;
6968 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6969 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6972 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6974 rtx reg_rtx = gen_rtx_REG (mode, reg);
6975 rtx set;
6976 rtx mem_rtx = gen_frame_mem (mode,
6977 gen_rtx_PLUS (Pmode,
6978 stack_pointer_rtx,
6979 GEN_INT (offset)));
6981 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6982 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6987 gcc_assert (entry->offset == d_rounding);
6989 else
6990 push_regs (&live_regs_mask, current_function_interrupt);
6992 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6993 emit_insn (gen_GOTaddr2picreg ());
6995 if (SHMEDIA_REGS_STACK_ADJUST ())
6997 /* This must NOT go through the PLT, otherwise mach and macl
6998 may be clobbered. */
6999 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7000 (TARGET_FPU_ANY
7001 ? "__GCC_push_shmedia_regs"
7002 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7003 emit_insn (gen_shmedia_save_restore_regs_compact
7004 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7007 if (target_flags != save_flags && ! current_function_interrupt)
7008 emit_insn (gen_toggle_sz ());
7010 target_flags = save_flags;
7012 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7013 stack_pointer_rtx, 0, NULL, true);
7015 if (frame_pointer_needed)
7016 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7018 if (TARGET_SHCOMPACT
7019 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7021 /* This must NOT go through the PLT, otherwise mach and macl
7022 may be clobbered. */
7023 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7024 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7025 emit_insn (gen_shcompact_incoming_args ());
7029 void
7030 sh_expand_epilogue (bool sibcall_p)
7032 HARD_REG_SET live_regs_mask;
7033 int d, i;
7034 int d_rounding = 0;
7036 int save_flags = target_flags;
7037 int frame_size, save_size;
7038 int fpscr_deferred = 0;
7039 int e = sibcall_p ? -1 : 1;
7041 d = calc_live_regs (&live_regs_mask);
7043 save_size = d;
7044 frame_size = rounded_frame_size (d);
7046 if (TARGET_SH5)
7048 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7049 int total_size;
7050 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7051 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7052 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7054 total_size = d + tregs_space;
7055 total_size += rounded_frame_size (total_size);
7056 save_size = total_size - frame_size;
7058 /* If adjusting the stack in a single step costs nothing extra, do so.
7059 I.e. either if a single addi is enough, or we need a movi anyway,
7060 and we don't exceed the maximum offset range (the test for the
7061 latter is conservative for simplicity). */
7062 if (TARGET_SHMEDIA
7063 && ! frame_pointer_needed
7064 && (CONST_OK_FOR_I10 (total_size)
7065 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7066 && total_size <= 2044)))
7067 d_rounding = frame_size;
7069 frame_size -= d_rounding;
7072 if (frame_pointer_needed)
7074 /* We must avoid scheduling the epilogue with previous basic blocks.
7075 See PR/18032 and PR/40313. */
7076 emit_insn (gen_blockage ());
7077 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7078 &live_regs_mask, false);
7080 /* We must avoid moving the stack pointer adjustment past code
7081 which reads from the local frame, else an interrupt could
7082 occur after the SP adjustment and clobber data in the local
7083 frame. */
7084 emit_insn (gen_blockage ());
7085 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7087 else if (frame_size)
7089 /* We must avoid moving the stack pointer adjustment past code
7090 which reads from the local frame, else an interrupt could
7091 occur after the SP adjustment and clobber data in the local
7092 frame. */
7093 emit_insn (gen_blockage ());
7094 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7095 &live_regs_mask, false);
7098 if (SHMEDIA_REGS_STACK_ADJUST ())
7100 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7101 (TARGET_FPU_ANY
7102 ? "__GCC_pop_shmedia_regs"
7103 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7104 /* This must NOT go through the PLT, otherwise mach and macl
7105 may be clobbered. */
7106 emit_insn (gen_shmedia_save_restore_regs_compact
7107 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7110 /* Pop all the registers. */
7112 if (target_flags != save_flags && ! current_function_interrupt)
7113 emit_insn (gen_toggle_sz ());
7114 if (TARGET_SH5)
7116 int offset_base, offset;
7117 int offset_in_r0 = -1;
7118 int sp_in_r0 = 0;
7119 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7120 save_schedule schedule;
7121 save_entry *entry;
7122 int *tmp_pnt;
7124 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7125 offset_base = -entry[1].offset + d_rounding;
7126 tmp_pnt = schedule.temps;
7127 for (; entry->mode != VOIDmode; entry--)
7129 enum machine_mode mode = (enum machine_mode) entry->mode;
7130 int reg = entry->reg;
7131 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7133 offset = offset_base + entry->offset;
7134 reg_rtx = gen_rtx_REG (mode, reg);
7136 mem_rtx = gen_frame_mem (mode,
7137 gen_rtx_PLUS (Pmode,
7138 stack_pointer_rtx,
7139 GEN_INT (offset)));
7141 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7142 mem_rtx = NULL_RTX;
7144 if (HAVE_POST_INCREMENT
7145 && (offset == offset_in_r0
7146 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7147 && mem_rtx == NULL_RTX)
7148 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7150 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7152 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7153 post_inc = NULL_RTX;
7154 else
7155 mem_rtx = NULL_RTX;
7158 if (mem_rtx != NULL_RTX)
7159 goto addr_ok;
7161 if (offset_in_r0 == -1)
7163 emit_move_insn (r0, GEN_INT (offset));
7164 offset_in_r0 = offset;
7166 else if (offset != offset_in_r0)
7168 emit_move_insn (r0,
7169 gen_rtx_PLUS
7170 (Pmode, r0,
7171 GEN_INT (offset - offset_in_r0)));
7172 offset_in_r0 += offset - offset_in_r0;
7175 if (post_inc != NULL_RTX)
7177 if (! sp_in_r0)
7179 emit_move_insn (r0,
7180 gen_rtx_PLUS
7181 (Pmode, r0, stack_pointer_rtx));
7182 sp_in_r0 = 1;
7185 mem_rtx = post_inc;
7187 offset_in_r0 += GET_MODE_SIZE (mode);
7189 else if (sp_in_r0)
7190 mem_rtx = gen_frame_mem (mode, r0);
7191 else
7192 mem_rtx = gen_frame_mem (mode,
7193 gen_rtx_PLUS (Pmode,
7194 stack_pointer_rtx,
7195 r0));
7197 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7198 || mem_rtx == post_inc);
7200 addr_ok:
7201 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7202 && mem_rtx != post_inc)
7204 insn = emit_move_insn (r0, mem_rtx);
7205 mem_rtx = r0;
7207 else if (TARGET_REGISTER_P (reg))
7209 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7211 /* Give the scheduler a bit of freedom by using up to
7212 MAX_TEMPS registers in a round-robin fashion. */
7213 insn = emit_move_insn (tmp_reg, mem_rtx);
7214 mem_rtx = tmp_reg;
7215 if (*++tmp_pnt < 0)
7216 tmp_pnt = schedule.temps;
7219 insn = emit_move_insn (reg_rtx, mem_rtx);
7222 gcc_assert (entry->offset + offset_base == d + d_rounding);
7224 else /* ! TARGET_SH5 */
7226 int last_reg;
7228 save_size = 0;
7229 /* For an ISR with RESBANK attribute assigned, don't pop PR
7230 register. */
7231 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7232 && !sh_cfun_resbank_handler_p ())
7234 if (!frame_pointer_needed)
7235 emit_insn (gen_blockage ());
7236 pop (PR_REG);
7239 /* Banked registers are poped first to avoid being scheduled in the
7240 delay slot. RTE switches banks before the ds instruction. */
7241 if (current_function_interrupt)
7243 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7244 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7245 pop (LAST_BANKED_REG - i);
7247 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7249 else
7250 last_reg = FIRST_PSEUDO_REGISTER;
7252 for (i = 0; i < last_reg; i++)
7254 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7256 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7257 && hard_reg_set_intersect_p (live_regs_mask,
7258 reg_class_contents[DF_REGS]))
7259 fpscr_deferred = 1;
7260 /* For an ISR with RESBANK attribute assigned, don't pop
7261 following registers, R0-R14, MACH, MACL and GBR. */
7262 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7263 && ! (sh_cfun_resbank_handler_p ()
7264 && ((j >= FIRST_GENERAL_REG
7265 && j < LAST_GENERAL_REG)
7266 || j == MACH_REG
7267 || j == MACL_REG
7268 || j == GBR_REG)))
7269 pop (j);
7271 if (j == FIRST_FP_REG && fpscr_deferred)
7272 pop (FPSCR_REG);
7275 if (target_flags != save_flags && ! current_function_interrupt)
7276 emit_insn (gen_toggle_sz ());
7277 target_flags = save_flags;
7279 output_stack_adjust (crtl->args.pretend_args_size
7280 + save_size + d_rounding
7281 + crtl->args.info.stack_regs * 8,
7282 stack_pointer_rtx, e, NULL, false);
7284 if (crtl->calls_eh_return)
7285 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7286 EH_RETURN_STACKADJ_RTX));
7288 /* Switch back to the normal stack if necessary. */
7289 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7290 emit_insn (gen_sp_switch_2 ());
7292 /* Tell flow the insn that pops PR isn't dead. */
7293 /* PR_REG will never be live in SHmedia mode, and we don't need to
7294 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7295 by the return pattern. */
7296 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7297 emit_use (gen_rtx_REG (SImode, PR_REG));
7300 static int sh_need_epilogue_known = 0;
7303 sh_need_epilogue (void)
7305 if (! sh_need_epilogue_known)
7307 rtx epilogue;
7309 start_sequence ();
7310 sh_expand_epilogue (0);
7311 epilogue = get_insns ();
7312 end_sequence ();
7313 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7315 return sh_need_epilogue_known > 0;
7318 /* Emit code to change the current function's return address to RA.
7319 TEMP is available as a scratch register, if needed. */
7321 void
7322 sh_set_return_address (rtx ra, rtx tmp)
7324 HARD_REG_SET live_regs_mask;
7325 int d;
7326 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7327 int pr_offset;
7329 d = calc_live_regs (&live_regs_mask);
7331 /* If pr_reg isn't life, we can set it (or the register given in
7332 sh_media_register_for_return) directly. */
7333 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7335 rtx rr;
7337 if (TARGET_SHMEDIA)
7339 int rr_regno = sh_media_register_for_return ();
7341 if (rr_regno < 0)
7342 rr_regno = pr_reg;
7344 rr = gen_rtx_REG (DImode, rr_regno);
7346 else
7347 rr = gen_rtx_REG (SImode, pr_reg);
7349 emit_insn (GEN_MOV (rr, ra));
7350 /* Tell flow the register for return isn't dead. */
7351 emit_use (rr);
7352 return;
7355 if (TARGET_SH5)
7357 int offset;
7358 save_schedule schedule;
7359 save_entry *entry;
7361 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7362 offset = entry[1].offset;
7363 for (; entry->mode != VOIDmode; entry--)
7364 if (entry->reg == pr_reg)
7365 goto found;
7367 /* We can't find pr register. */
7368 gcc_unreachable ();
7370 found:
7371 offset = entry->offset - offset;
7372 pr_offset = (rounded_frame_size (d) + offset
7373 + SHMEDIA_REGS_STACK_ADJUST ());
7375 else
7376 pr_offset = rounded_frame_size (d);
7378 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7379 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7381 tmp = gen_frame_mem (Pmode, tmp);
7382 emit_insn (GEN_MOV (tmp, ra));
7383 /* Tell this store isn't dead. */
7384 emit_use (tmp);
7387 /* Clear variables at function end. */
7389 static void
7390 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7391 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7393 sh_need_epilogue_known = 0;
7396 static rtx
7397 sh_builtin_saveregs (void)
7399 /* First unnamed integer register. */
7400 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7401 /* Number of integer registers we need to save. */
7402 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7403 /* First unnamed SFmode float reg */
7404 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7405 /* Number of SFmode float regs to save. */
7406 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7407 rtx regbuf, fpregs;
7408 int bufsize, regno;
7409 alias_set_type alias_set;
7411 if (TARGET_SH5)
7413 if (n_intregs)
7415 int pushregs = n_intregs;
7417 while (pushregs < NPARM_REGS (SImode) - 1
7418 && (CALL_COOKIE_INT_REG_GET
7419 (crtl->args.info.call_cookie,
7420 NPARM_REGS (SImode) - pushregs)
7421 == 1))
7423 crtl->args.info.call_cookie
7424 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7425 - pushregs, 1);
7426 pushregs++;
7429 if (pushregs == NPARM_REGS (SImode))
7430 crtl->args.info.call_cookie
7431 |= (CALL_COOKIE_INT_REG (0, 1)
7432 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7433 else
7434 crtl->args.info.call_cookie
7435 |= CALL_COOKIE_STACKSEQ (pushregs);
7437 crtl->args.pretend_args_size += 8 * n_intregs;
7439 if (TARGET_SHCOMPACT)
7440 return const0_rtx;
7443 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7445 error ("__builtin_saveregs not supported by this subtarget");
7446 return const0_rtx;
7449 if (TARGET_SHMEDIA)
7450 n_floatregs = 0;
7452 /* Allocate block of memory for the regs. */
7453 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7454 Or can assign_stack_local accept a 0 SIZE argument? */
7455 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7457 if (TARGET_SHMEDIA)
7458 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7459 else if (n_floatregs & 1)
7461 rtx addr;
7463 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7464 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7465 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7466 regbuf = change_address (regbuf, BLKmode, addr);
7468 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7470 rtx addr, mask;
7472 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7473 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7474 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7475 emit_insn (gen_andsi3 (addr, addr, mask));
7476 regbuf = change_address (regbuf, BLKmode, addr);
7478 else
7479 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7480 alias_set = get_varargs_alias_set ();
7481 set_mem_alias_set (regbuf, alias_set);
7483 /* Save int args.
7484 This is optimized to only save the regs that are necessary. Explicitly
7485 named args need not be saved. */
7486 if (n_intregs > 0)
7487 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7488 adjust_address (regbuf, BLKmode,
7489 n_floatregs * UNITS_PER_WORD),
7490 n_intregs);
7492 if (TARGET_SHMEDIA)
7493 /* Return the address of the regbuf. */
7494 return XEXP (regbuf, 0);
7496 /* Save float args.
7497 This is optimized to only save the regs that are necessary. Explicitly
7498 named args need not be saved.
7499 We explicitly build a pointer to the buffer because it halves the insn
7500 count when not optimizing (otherwise the pointer is built for each reg
7501 saved).
7502 We emit the moves in reverse order so that we can use predecrement. */
7504 fpregs = copy_to_mode_reg (Pmode,
7505 plus_constant (XEXP (regbuf, 0),
7506 n_floatregs * UNITS_PER_WORD));
7507 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7509 rtx mem;
7510 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7512 emit_insn (gen_addsi3 (fpregs, fpregs,
7513 GEN_INT (-2 * UNITS_PER_WORD)));
7514 mem = change_address (regbuf, DFmode, fpregs);
7515 emit_move_insn (mem,
7516 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7518 regno = first_floatreg;
7519 if (regno & 1)
7521 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7522 mem = change_address (regbuf, SFmode, fpregs);
7523 emit_move_insn (mem,
7524 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7525 - (TARGET_LITTLE_ENDIAN != 0)));
7528 else
7529 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7531 rtx mem;
7533 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7534 mem = change_address (regbuf, SFmode, fpregs);
7535 emit_move_insn (mem,
7536 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7539 /* Return the address of the regbuf. */
7540 return XEXP (regbuf, 0);
7543 /* Define the `__builtin_va_list' type for the ABI. */
7545 static tree
7546 sh_build_builtin_va_list (void)
7548 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7549 tree record;
7551 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7552 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7553 return ptr_type_node;
7555 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7557 f_next_o = build_decl (BUILTINS_LOCATION,
7558 FIELD_DECL, get_identifier ("__va_next_o"),
7559 ptr_type_node);
7560 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7561 FIELD_DECL,
7562 get_identifier ("__va_next_o_limit"),
7563 ptr_type_node);
7564 f_next_fp = build_decl (BUILTINS_LOCATION,
7565 FIELD_DECL, get_identifier ("__va_next_fp"),
7566 ptr_type_node);
7567 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7568 FIELD_DECL,
7569 get_identifier ("__va_next_fp_limit"),
7570 ptr_type_node);
7571 f_next_stack = build_decl (BUILTINS_LOCATION,
7572 FIELD_DECL, get_identifier ("__va_next_stack"),
7573 ptr_type_node);
7575 DECL_FIELD_CONTEXT (f_next_o) = record;
7576 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7577 DECL_FIELD_CONTEXT (f_next_fp) = record;
7578 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7579 DECL_FIELD_CONTEXT (f_next_stack) = record;
7581 TYPE_FIELDS (record) = f_next_o;
7582 TREE_CHAIN (f_next_o) = f_next_o_limit;
7583 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7584 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7585 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7587 layout_type (record);
7589 return record;
7592 /* Implement `va_start' for varargs and stdarg. */
7594 static void
7595 sh_va_start (tree valist, rtx nextarg)
7597 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7598 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7599 tree t, u;
7600 int nfp, nint;
7602 if (TARGET_SH5)
7604 expand_builtin_saveregs ();
7605 std_expand_builtin_va_start (valist, nextarg);
7606 return;
7609 if ((! TARGET_SH2E && ! TARGET_SH4)
7610 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7612 std_expand_builtin_va_start (valist, nextarg);
7613 return;
7616 f_next_o = TYPE_FIELDS (va_list_type_node);
7617 f_next_o_limit = TREE_CHAIN (f_next_o);
7618 f_next_fp = TREE_CHAIN (f_next_o_limit);
7619 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7620 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7622 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7623 NULL_TREE);
7624 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7625 valist, f_next_o_limit, NULL_TREE);
7626 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7627 NULL_TREE);
7628 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7629 valist, f_next_fp_limit, NULL_TREE);
7630 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7631 valist, f_next_stack, NULL_TREE);
7633 /* Call __builtin_saveregs. */
7634 u = make_tree (sizetype, expand_builtin_saveregs ());
7635 u = fold_convert (ptr_type_node, u);
7636 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7637 TREE_SIDE_EFFECTS (t) = 1;
7638 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7640 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7641 if (nfp < 8)
7642 nfp = 8 - nfp;
7643 else
7644 nfp = 0;
7645 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7646 size_int (UNITS_PER_WORD * nfp));
7647 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7648 TREE_SIDE_EFFECTS (t) = 1;
7649 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7651 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7652 TREE_SIDE_EFFECTS (t) = 1;
7653 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7655 nint = crtl->args.info.arg_count[SH_ARG_INT];
7656 if (nint < 4)
7657 nint = 4 - nint;
7658 else
7659 nint = 0;
7660 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7661 size_int (UNITS_PER_WORD * nint));
7662 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7663 TREE_SIDE_EFFECTS (t) = 1;
7664 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7666 u = make_tree (ptr_type_node, nextarg);
7667 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7668 TREE_SIDE_EFFECTS (t) = 1;
7669 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7672 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7673 member, return it. */
7674 static tree
7675 find_sole_member (tree type)
7677 tree field, member = NULL_TREE;
7679 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7681 if (TREE_CODE (field) != FIELD_DECL)
7682 continue;
7683 if (!DECL_SIZE (field))
7684 return NULL_TREE;
7685 if (integer_zerop (DECL_SIZE (field)))
7686 continue;
7687 if (member)
7688 return NULL_TREE;
7689 member = field;
7691 return member;
7693 /* Implement `va_arg'. */
7695 static tree
7696 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7697 gimple_seq *post_p ATTRIBUTE_UNUSED)
7699 HOST_WIDE_INT size, rsize;
7700 tree tmp, pptr_type_node;
7701 tree addr, lab_over = NULL, result = NULL;
7702 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7703 tree eff_type;
7705 if (pass_by_ref)
7706 type = build_pointer_type (type);
7708 size = int_size_in_bytes (type);
7709 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7710 pptr_type_node = build_pointer_type (ptr_type_node);
7712 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7713 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7715 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7716 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7717 int pass_as_float;
7718 tree lab_false;
7719 tree member;
7721 f_next_o = TYPE_FIELDS (va_list_type_node);
7722 f_next_o_limit = TREE_CHAIN (f_next_o);
7723 f_next_fp = TREE_CHAIN (f_next_o_limit);
7724 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7725 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7727 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7728 NULL_TREE);
7729 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7730 valist, f_next_o_limit, NULL_TREE);
7731 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7732 valist, f_next_fp, NULL_TREE);
7733 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7734 valist, f_next_fp_limit, NULL_TREE);
7735 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7736 valist, f_next_stack, NULL_TREE);
7738 /* Structures with a single member with a distinct mode are passed
7739 like their member. This is relevant if the latter has a REAL_TYPE
7740 or COMPLEX_TYPE type. */
7741 eff_type = type;
7742 while (TREE_CODE (eff_type) == RECORD_TYPE
7743 && (member = find_sole_member (eff_type))
7744 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7745 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7746 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7748 tree field_type = TREE_TYPE (member);
7750 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7751 eff_type = field_type;
7752 else
7754 gcc_assert ((TYPE_ALIGN (eff_type)
7755 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7756 || (TYPE_ALIGN (eff_type)
7757 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7758 break;
7762 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7764 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7765 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7766 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7767 && size <= 16));
7769 else
7771 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7774 addr = create_tmp_var (pptr_type_node, NULL);
7775 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7776 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7778 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7780 if (pass_as_float)
7782 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7783 tree cmp;
7784 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7786 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7787 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7789 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7790 tmp = next_fp_limit;
7791 if (size > 4 && !is_double)
7792 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7793 unshare_expr (tmp), size_int (4 - size));
7794 tmp = build2 (GE_EXPR, boolean_type_node,
7795 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7796 cmp = build3 (COND_EXPR, void_type_node, tmp,
7797 build1 (GOTO_EXPR, void_type_node,
7798 unshare_expr (lab_false)), NULL_TREE);
7799 if (!is_double)
7800 gimplify_and_add (cmp, pre_p);
7802 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7803 || (is_double || size == 16))
7805 tmp = fold_convert (sizetype, next_fp_tmp);
7806 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7807 size_int (UNITS_PER_WORD));
7808 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7809 unshare_expr (next_fp_tmp), tmp);
7810 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7812 if (is_double)
7813 gimplify_and_add (cmp, pre_p);
7815 #ifdef FUNCTION_ARG_SCmode_WART
7816 if (TYPE_MODE (eff_type) == SCmode
7817 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7819 tree subtype = TREE_TYPE (eff_type);
7820 tree real, imag;
7822 imag
7823 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7824 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7826 real
7827 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7828 real = get_initialized_tmp_var (real, pre_p, NULL);
7830 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7831 if (type != eff_type)
7832 result = build1 (VIEW_CONVERT_EXPR, type, result);
7833 result = get_initialized_tmp_var (result, pre_p, NULL);
7835 #endif /* FUNCTION_ARG_SCmode_WART */
7837 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7838 gimplify_and_add (tmp, pre_p);
7840 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7841 gimplify_and_add (tmp, pre_p);
7843 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7844 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7845 gimplify_assign (unshare_expr (next_fp_tmp),
7846 unshare_expr (valist), pre_p);
7848 gimplify_assign (unshare_expr (valist),
7849 unshare_expr (next_fp_tmp), post_p);
7850 valist = next_fp_tmp;
7852 else
7854 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7855 unshare_expr (next_o), size_int (rsize));
7856 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7857 unshare_expr (next_o_limit));
7858 tmp = build3 (COND_EXPR, void_type_node, tmp,
7859 build1 (GOTO_EXPR, void_type_node,
7860 unshare_expr (lab_false)),
7861 NULL_TREE);
7862 gimplify_and_add (tmp, pre_p);
7864 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7865 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7867 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7868 gimplify_and_add (tmp, pre_p);
7870 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7871 gimplify_and_add (tmp, pre_p);
7873 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7874 gimplify_assign (unshare_expr (next_o),
7875 unshare_expr (next_o_limit), pre_p);
7877 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7878 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7881 if (!result)
7883 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7884 gimplify_and_add (tmp, pre_p);
7888 /* ??? In va-sh.h, there had been code to make values larger than
7889 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7891 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7892 if (result)
7894 gimplify_assign (result, tmp, pre_p);
7895 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7896 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7897 gimplify_and_add (tmp, pre_p);
7899 else
7900 result = tmp;
7902 if (pass_by_ref)
7903 result = build_va_arg_indirect_ref (result);
7905 return result;
7908 /* 64 bit floating points memory transfers are paired single precision loads
7909 or store. So DWARF information needs fixing in little endian (unless
7910 PR=SZ=1 in FPSCR). */
7912 sh_dwarf_register_span (rtx reg)
7914 unsigned regno = REGNO (reg);
7916 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7917 return NULL_RTX;
7919 return
7920 gen_rtx_PARALLEL (VOIDmode,
7921 gen_rtvec (2,
7922 gen_rtx_REG (SFmode,
7923 DBX_REGISTER_NUMBER (regno+1)),
7924 gen_rtx_REG (SFmode,
7925 DBX_REGISTER_NUMBER (regno))));
7928 static enum machine_mode
7929 sh_promote_function_mode (const_tree type, enum machine_mode mode,
7930 int *punsignedp, const_tree funtype,
7931 int for_return ATTRIBUTE_UNUSED)
7933 if (sh_promote_prototypes (funtype))
7934 return promote_mode (type, mode, punsignedp);
7935 else
7936 return mode;
7939 bool
7940 sh_promote_prototypes (const_tree type)
7942 if (TARGET_HITACHI)
7943 return 0;
7944 if (! type)
7945 return 1;
7946 return ! sh_attr_renesas_p (type);
7949 /* Whether an argument must be passed by reference. On SHcompact, we
7950 pretend arguments wider than 32-bits that would have been passed in
7951 registers are passed by reference, so that an SHmedia trampoline
7952 loads them into the full 64-bits registers. */
7954 static int
7955 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7956 const_tree type, bool named)
7958 unsigned HOST_WIDE_INT size;
7960 if (type)
7961 size = int_size_in_bytes (type);
7962 else
7963 size = GET_MODE_SIZE (mode);
7965 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7966 && (!named
7967 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7968 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7969 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7970 && size > 4
7971 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7972 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7973 return size;
7974 else
7975 return 0;
7978 static bool
7979 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7980 const_tree type, bool named)
7982 if (targetm.calls.must_pass_in_stack (mode, type))
7983 return true;
7985 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7986 wants to know about pass-by-reference semantics for incoming
7987 arguments. */
7988 if (! cum)
7989 return false;
7991 if (TARGET_SHCOMPACT)
7993 cum->byref = shcompact_byref (cum, mode, type, named);
7994 return cum->byref != 0;
7997 return false;
8000 static bool
8001 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8002 const_tree type, bool named ATTRIBUTE_UNUSED)
8004 /* ??? How can it possibly be correct to return true only on the
8005 caller side of the equation? Is there someplace else in the
8006 sh backend that's magically producing the copies? */
8007 return (cum->outgoing
8008 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8009 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8012 static int
8013 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8014 tree type, bool named ATTRIBUTE_UNUSED)
8016 int words = 0;
8018 if (!TARGET_SH5
8019 && PASS_IN_REG_P (*cum, mode, type)
8020 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8021 && (ROUND_REG (*cum, mode)
8022 + (mode != BLKmode
8023 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8024 : ROUND_ADVANCE (int_size_in_bytes (type)))
8025 > NPARM_REGS (mode)))
8026 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8028 else if (!TARGET_SHCOMPACT
8029 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8030 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8032 return words * UNITS_PER_WORD;
8036 /* Define where to put the arguments to a function.
8037 Value is zero to push the argument on the stack,
8038 or a hard register in which to store the argument.
8040 MODE is the argument's machine mode.
8041 TYPE is the data type of the argument (as a tree).
8042 This is null for libcalls where that information may
8043 not be available.
8044 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8045 the preceding args and about the function being called.
8046 NAMED is nonzero if this argument is a named parameter
8047 (otherwise it is an extra parameter matching an ellipsis).
8049 On SH the first args are normally in registers
8050 and the rest are pushed. Any arg that starts within the first
8051 NPARM_REGS words is at least partially passed in a register unless
8052 its data type forbids. */
8056 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8057 tree type, int named)
8059 if (! TARGET_SH5 && mode == VOIDmode)
8060 return GEN_INT (ca->renesas_abi ? 1 : 0);
8062 if (! TARGET_SH5
8063 && PASS_IN_REG_P (*ca, mode, type)
8064 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8066 int regno;
8068 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8069 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8071 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8072 gen_rtx_REG (SFmode,
8073 BASE_ARG_REG (mode)
8074 + (ROUND_REG (*ca, mode) ^ 1)),
8075 const0_rtx);
8076 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8077 gen_rtx_REG (SFmode,
8078 BASE_ARG_REG (mode)
8079 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8080 GEN_INT (4));
8081 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8084 /* If the alignment of a DF value causes an SF register to be
8085 skipped, we will use that skipped register for the next SF
8086 value. */
8087 if ((TARGET_HITACHI || ca->renesas_abi)
8088 && ca->free_single_fp_reg
8089 && mode == SFmode)
8090 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8092 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8093 ^ (mode == SFmode && TARGET_SH4
8094 && TARGET_LITTLE_ENDIAN != 0
8095 && ! TARGET_HITACHI && ! ca->renesas_abi);
8096 return gen_rtx_REG (mode, regno);
8100 if (TARGET_SH5)
8102 if (mode == VOIDmode && TARGET_SHCOMPACT)
8103 return GEN_INT (ca->call_cookie);
8105 /* The following test assumes unnamed arguments are promoted to
8106 DFmode. */
8107 if (mode == SFmode && ca->free_single_fp_reg)
8108 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8110 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8111 && (named || ! ca->prototype_p)
8112 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8114 if (! ca->prototype_p && TARGET_SHMEDIA)
8115 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8117 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8118 FIRST_FP_PARM_REG
8119 + ca->arg_count[(int) SH_ARG_FLOAT]);
8122 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8123 && (! TARGET_SHCOMPACT
8124 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8125 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8126 type, named))))
8128 return gen_rtx_REG (mode, (FIRST_PARM_REG
8129 + ca->arg_count[(int) SH_ARG_INT]));
8132 return 0;
8135 return 0;
8138 /* Update the data in CUM to advance over an argument
8139 of mode MODE and data type TYPE.
8140 (TYPE is null for libcalls where that information may not be
8141 available.) */
8143 void
8144 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8145 tree type, int named)
8147 if (ca->force_mem)
8148 ca->force_mem = 0;
8149 else if (TARGET_SH5)
8151 tree type2 = (ca->byref && type
8152 ? TREE_TYPE (type)
8153 : type);
8154 enum machine_mode mode2 = (ca->byref && type
8155 ? TYPE_MODE (type2)
8156 : mode);
8157 int dwords = ((ca->byref
8158 ? ca->byref
8159 : mode2 == BLKmode
8160 ? int_size_in_bytes (type2)
8161 : GET_MODE_SIZE (mode2)) + 7) / 8;
8162 int numregs = MIN (dwords, NPARM_REGS (SImode)
8163 - ca->arg_count[(int) SH_ARG_INT]);
8165 if (numregs)
8167 ca->arg_count[(int) SH_ARG_INT] += numregs;
8168 if (TARGET_SHCOMPACT
8169 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8171 ca->call_cookie
8172 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8173 - numregs, 1);
8174 /* N.B. We want this also for outgoing. */
8175 ca->stack_regs += numregs;
8177 else if (ca->byref)
8179 if (! ca->outgoing)
8180 ca->stack_regs += numregs;
8181 ca->byref_regs += numregs;
8182 ca->byref = 0;
8184 ca->call_cookie
8185 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8186 - numregs, 2);
8187 while (--numregs);
8188 ca->call_cookie
8189 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8190 - 1, 1);
8192 else if (dwords > numregs)
8194 int pushregs = numregs;
8196 if (TARGET_SHCOMPACT)
8197 ca->stack_regs += numregs;
8198 while (pushregs < NPARM_REGS (SImode) - 1
8199 && (CALL_COOKIE_INT_REG_GET
8200 (ca->call_cookie,
8201 NPARM_REGS (SImode) - pushregs)
8202 == 1))
8204 ca->call_cookie
8205 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8206 - pushregs, 1);
8207 pushregs++;
8209 if (numregs == NPARM_REGS (SImode))
8210 ca->call_cookie
8211 |= CALL_COOKIE_INT_REG (0, 1)
8212 | CALL_COOKIE_STACKSEQ (numregs - 1);
8213 else
8214 ca->call_cookie
8215 |= CALL_COOKIE_STACKSEQ (numregs);
8218 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8219 && (named || ! ca->prototype_p))
8221 if (mode2 == SFmode && ca->free_single_fp_reg)
8222 ca->free_single_fp_reg = 0;
8223 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8224 < NPARM_REGS (SFmode))
8226 int numfpregs
8227 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8228 NPARM_REGS (SFmode)
8229 - ca->arg_count[(int) SH_ARG_FLOAT]);
8231 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8233 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8235 if (ca->outgoing && numregs > 0)
8238 ca->call_cookie
8239 |= (CALL_COOKIE_INT_REG
8240 (ca->arg_count[(int) SH_ARG_INT]
8241 - numregs + ((numfpregs - 2) / 2),
8242 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8243 - numfpregs) / 2));
8245 while (numfpregs -= 2);
8247 else if (mode2 == SFmode && (named)
8248 && (ca->arg_count[(int) SH_ARG_FLOAT]
8249 < NPARM_REGS (SFmode)))
8250 ca->free_single_fp_reg
8251 = FIRST_FP_PARM_REG - numfpregs
8252 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8255 return;
8258 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8260 /* Note that we've used the skipped register. */
8261 if (mode == SFmode && ca->free_single_fp_reg)
8263 ca->free_single_fp_reg = 0;
8264 return;
8266 /* When we have a DF after an SF, there's an SF register that get
8267 skipped in order to align the DF value. We note this skipped
8268 register, because the next SF value will use it, and not the
8269 SF that follows the DF. */
8270 if (mode == DFmode
8271 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8273 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8274 + BASE_ARG_REG (mode));
8278 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8279 || PASS_IN_REG_P (*ca, mode, type))
8280 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8281 = (ROUND_REG (*ca, mode)
8282 + (mode == BLKmode
8283 ? ROUND_ADVANCE (int_size_in_bytes (type))
8284 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8287 /* The Renesas calling convention doesn't quite fit into this scheme since
8288 the address is passed like an invisible argument, but one that is always
8289 passed in memory. */
8290 static rtx
8291 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8293 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8294 return 0;
8295 return gen_rtx_REG (Pmode, 2);
8298 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8300 static bool
8301 sh_return_in_memory (const_tree type, const_tree fndecl)
8303 if (TARGET_SH5)
8305 if (TYPE_MODE (type) == BLKmode)
8306 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8307 else
8308 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8310 else
8312 return (TYPE_MODE (type) == BLKmode
8313 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8314 && TREE_CODE (type) == RECORD_TYPE));
8318 /* We actually emit the code in sh_expand_prologue. We used to use
8319 a static variable to flag that we need to emit this code, but that
8320 doesn't when inlining, when functions are deferred and then emitted
8321 later. Fortunately, we already have two flags that are part of struct
8322 function that tell if a function uses varargs or stdarg. */
8323 static void
8324 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8325 enum machine_mode mode,
8326 tree type,
8327 int *pretend_arg_size,
8328 int second_time ATTRIBUTE_UNUSED)
8330 gcc_assert (cfun->stdarg);
8331 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8333 int named_parm_regs, anon_parm_regs;
8335 named_parm_regs = (ROUND_REG (*ca, mode)
8336 + (mode == BLKmode
8337 ? ROUND_ADVANCE (int_size_in_bytes (type))
8338 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8339 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8340 if (anon_parm_regs > 0)
8341 *pretend_arg_size = anon_parm_regs * 4;
8345 static bool
8346 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8348 return TARGET_SH5;
8351 static bool
8352 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8354 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8358 /* Define the offset between two registers, one to be eliminated, and
8359 the other its replacement, at the start of a routine. */
8362 initial_elimination_offset (int from, int to)
8364 int regs_saved;
8365 int regs_saved_rounding = 0;
8366 int total_saved_regs_space;
8367 int total_auto_space;
8368 int save_flags = target_flags;
8369 int copy_flags;
8370 HARD_REG_SET live_regs_mask;
8372 shmedia_space_reserved_for_target_registers = false;
8373 regs_saved = calc_live_regs (&live_regs_mask);
8374 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8376 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8378 shmedia_space_reserved_for_target_registers = true;
8379 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8382 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8383 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8384 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8386 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8387 copy_flags = target_flags;
8388 target_flags = save_flags;
8390 total_saved_regs_space = regs_saved + regs_saved_rounding;
8392 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8393 return total_saved_regs_space + total_auto_space
8394 + crtl->args.info.byref_regs * 8;
8396 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8397 return total_saved_regs_space + total_auto_space
8398 + crtl->args.info.byref_regs * 8;
8400 /* Initial gap between fp and sp is 0. */
8401 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8402 return 0;
8404 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8405 return rounded_frame_size (0);
8407 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8408 return rounded_frame_size (0);
8410 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8411 && (to == HARD_FRAME_POINTER_REGNUM
8412 || to == STACK_POINTER_REGNUM));
8413 if (TARGET_SH5)
8415 int n = total_saved_regs_space;
8416 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8417 save_schedule schedule;
8418 save_entry *entry;
8420 n += total_auto_space;
8422 /* If it wasn't saved, there's not much we can do. */
8423 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8424 return n;
8426 target_flags = copy_flags;
8428 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8429 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8430 if (entry->reg == pr_reg)
8432 target_flags = save_flags;
8433 return entry->offset;
8435 gcc_unreachable ();
8437 else
8438 return total_auto_space;
8441 /* Parse the -mfixed-range= option string. */
8442 void
8443 sh_fix_range (const char *const_str)
8445 int i, first, last;
8446 char *str, *dash, *comma;
8448 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8449 REG2 are either register names or register numbers. The effect
8450 of this option is to mark the registers in the range from REG1 to
8451 REG2 as ``fixed'' so they won't be used by the compiler. */
8453 i = strlen (const_str);
8454 str = (char *) alloca (i + 1);
8455 memcpy (str, const_str, i + 1);
8457 while (1)
8459 dash = strchr (str, '-');
8460 if (!dash)
8462 warning (0, "value of -mfixed-range must have form REG1-REG2");
8463 return;
8465 *dash = '\0';
8466 comma = strchr (dash + 1, ',');
8467 if (comma)
8468 *comma = '\0';
8470 first = decode_reg_name (str);
8471 if (first < 0)
8473 warning (0, "unknown register name: %s", str);
8474 return;
8477 last = decode_reg_name (dash + 1);
8478 if (last < 0)
8480 warning (0, "unknown register name: %s", dash + 1);
8481 return;
8484 *dash = '-';
8486 if (first > last)
8488 warning (0, "%s-%s is an empty range", str, dash + 1);
8489 return;
8492 for (i = first; i <= last; ++i)
8493 fixed_regs[i] = call_used_regs[i] = 1;
8495 if (!comma)
8496 break;
8498 *comma = ',';
8499 str = comma + 1;
8503 /* Insert any deferred function attributes from earlier pragmas. */
8504 static void
8505 sh_insert_attributes (tree node, tree *attributes)
8507 tree attrs;
8509 if (TREE_CODE (node) != FUNCTION_DECL)
8510 return;
8512 /* We are only interested in fields. */
8513 if (!DECL_P (node))
8514 return;
8516 /* Append the attributes to the deferred attributes. */
8517 *sh_deferred_function_attributes_tail = *attributes;
8518 attrs = sh_deferred_function_attributes;
8519 if (!attrs)
8520 return;
8522 /* Some attributes imply or require the interrupt attribute. */
8523 if (!lookup_attribute ("interrupt_handler", attrs)
8524 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8526 /* If we have a trapa_handler, but no interrupt_handler attribute,
8527 insert an interrupt_handler attribute. */
8528 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8529 /* We can't use sh_pr_interrupt here because that's not in the
8530 java frontend. */
8531 attrs
8532 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8533 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8534 if the interrupt attribute is missing, we ignore the attribute
8535 and warn. */
8536 else if (lookup_attribute ("sp_switch", attrs)
8537 || lookup_attribute ("trap_exit", attrs)
8538 || lookup_attribute ("nosave_low_regs", attrs)
8539 || lookup_attribute ("resbank", attrs))
8541 tree *tail;
8543 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8545 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8546 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8547 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8548 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8549 warning (OPT_Wattributes,
8550 "%qE attribute only applies to interrupt functions",
8551 TREE_PURPOSE (attrs));
8552 else
8554 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8555 NULL_TREE);
8556 tail = &TREE_CHAIN (*tail);
8559 attrs = *attributes;
8563 /* Install the processed list. */
8564 *attributes = attrs;
8566 /* Clear deferred attributes. */
8567 sh_deferred_function_attributes = NULL_TREE;
8568 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8570 return;
8573 /* Supported attributes:
8575 interrupt_handler -- specifies this function is an interrupt handler.
8577 trapa_handler - like above, but don't save all registers.
8579 sp_switch -- specifies an alternate stack for an interrupt handler
8580 to run on.
8582 trap_exit -- use a trapa to exit an interrupt function instead of
8583 an rte instruction.
8585 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8586 This is useful on the SH3 and upwards,
8587 which has a separate set of low regs for User and Supervisor modes.
8588 This should only be used for the lowest level of interrupts. Higher levels
8589 of interrupts must save the registers in case they themselves are
8590 interrupted.
8592 renesas -- use Renesas calling/layout conventions (functions and
8593 structures).
8595 resbank -- In case of an ISR, use a register bank to save registers
8596 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8599 /* Handle a 'resbank' attribute. */
8600 static tree
8601 sh_handle_resbank_handler_attribute (tree * node, tree name,
8602 tree args ATTRIBUTE_UNUSED,
8603 int flags ATTRIBUTE_UNUSED,
8604 bool * no_add_attrs)
8606 if (!TARGET_SH2A)
8608 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8609 name);
8610 *no_add_attrs = true;
8612 if (TREE_CODE (*node) != FUNCTION_DECL)
8614 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8615 name);
8616 *no_add_attrs = true;
8619 return NULL_TREE;
8622 /* Handle an "interrupt_handler" attribute; arguments as in
8623 struct attribute_spec.handler. */
8624 static tree
8625 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8626 tree args ATTRIBUTE_UNUSED,
8627 int flags ATTRIBUTE_UNUSED,
8628 bool *no_add_attrs)
8630 if (TREE_CODE (*node) != FUNCTION_DECL)
8632 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8633 name);
8634 *no_add_attrs = true;
8636 else if (TARGET_SHCOMPACT)
8638 error ("attribute interrupt_handler is not compatible with -m5-compact");
8639 *no_add_attrs = true;
8642 return NULL_TREE;
8645 /* Handle an 'function_vector' attribute; arguments as in
8646 struct attribute_spec.handler. */
8647 static tree
8648 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8649 tree args ATTRIBUTE_UNUSED,
8650 int flags ATTRIBUTE_UNUSED,
8651 bool * no_add_attrs)
8653 if (!TARGET_SH2A)
8655 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8656 name);
8657 *no_add_attrs = true;
8659 else if (TREE_CODE (*node) != FUNCTION_DECL)
8661 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8662 name);
8663 *no_add_attrs = true;
8665 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8667 /* The argument must be a constant integer. */
8668 warning (OPT_Wattributes,
8669 "%qE attribute argument not an integer constant",
8670 name);
8671 *no_add_attrs = true;
8673 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8675 /* The argument value must be between 0 to 255. */
8676 warning (OPT_Wattributes,
8677 "%qE attribute argument should be between 0 to 255",
8678 name);
8679 *no_add_attrs = true;
8681 return NULL_TREE;
8684 /* Returns 1 if current function has been assigned the attribute
8685 'function_vector'. */
8687 sh2a_is_function_vector_call (rtx x)
8689 if (GET_CODE (x) == SYMBOL_REF
8690 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8692 tree tr = SYMBOL_REF_DECL (x);
8694 if (sh2a_function_vector_p (tr))
8695 return 1;
8698 return 0;
8701 /* Returns the function vector number, if the the attribute
8702 'function_vector' is assigned, otherwise returns zero. */
8704 sh2a_get_function_vector_number (rtx x)
8706 int num;
8707 tree list, t;
8709 if ((GET_CODE (x) == SYMBOL_REF)
8710 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8712 t = SYMBOL_REF_DECL (x);
8714 if (TREE_CODE (t) != FUNCTION_DECL)
8715 return 0;
8717 list = SH_ATTRIBUTES (t);
8718 while (list)
8720 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8722 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8723 return num;
8726 list = TREE_CHAIN (list);
8729 return 0;
8731 else
8732 return 0;
8735 /* Handle an "sp_switch" attribute; arguments as in
8736 struct attribute_spec.handler. */
8737 static tree
8738 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8739 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8741 if (TREE_CODE (*node) != FUNCTION_DECL)
8743 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8744 name);
8745 *no_add_attrs = true;
8747 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8749 /* The argument must be a constant string. */
8750 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8751 name);
8752 *no_add_attrs = true;
8755 return NULL_TREE;
8758 /* Handle an "trap_exit" attribute; arguments as in
8759 struct attribute_spec.handler. */
8760 static tree
8761 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8762 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8764 if (TREE_CODE (*node) != FUNCTION_DECL)
8766 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8767 name);
8768 *no_add_attrs = true;
8770 /* The argument specifies a trap number to be used in a trapa instruction
8771 at function exit (instead of an rte instruction). */
8772 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8774 /* The argument must be a constant integer. */
8775 warning (OPT_Wattributes, "%qE attribute argument not an "
8776 "integer constant", name);
8777 *no_add_attrs = true;
8780 return NULL_TREE;
8783 static tree
8784 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8785 tree name ATTRIBUTE_UNUSED,
8786 tree args ATTRIBUTE_UNUSED,
8787 int flags ATTRIBUTE_UNUSED,
8788 bool *no_add_attrs ATTRIBUTE_UNUSED)
8790 return NULL_TREE;
8793 /* True if __attribute__((renesas)) or -mrenesas. */
8795 sh_attr_renesas_p (const_tree td)
8797 if (TARGET_HITACHI)
8798 return 1;
8799 if (td == 0)
8800 return 0;
8801 if (DECL_P (td))
8802 td = TREE_TYPE (td);
8803 if (td == error_mark_node)
8804 return 0;
8805 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8806 != NULL_TREE);
8809 /* True if __attribute__((renesas)) or -mrenesas, for the current
8810 function. */
8812 sh_cfun_attr_renesas_p (void)
8814 return sh_attr_renesas_p (current_function_decl);
8818 sh_cfun_interrupt_handler_p (void)
8820 return (lookup_attribute ("interrupt_handler",
8821 DECL_ATTRIBUTES (current_function_decl))
8822 != NULL_TREE);
8825 /* Returns 1 if FUNC has been assigned the attribute
8826 "function_vector". */
8828 sh2a_function_vector_p (tree func)
8830 tree list;
8831 if (TREE_CODE (func) != FUNCTION_DECL)
8832 return 0;
8834 list = SH_ATTRIBUTES (func);
8835 while (list)
8837 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8838 return 1;
8840 list = TREE_CHAIN (list);
8842 return 0;
8845 /* Returns TRUE if given tree has the "resbank" attribute. */
8848 sh_cfun_resbank_handler_p (void)
8850 return ((lookup_attribute ("resbank",
8851 DECL_ATTRIBUTES (current_function_decl))
8852 != NULL_TREE)
8853 && (lookup_attribute ("interrupt_handler",
8854 DECL_ATTRIBUTES (current_function_decl))
8855 != NULL_TREE) && TARGET_SH2A);
8858 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8860 static const char *
8861 sh_check_pch_target_flags (int old_flags)
8863 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8864 | MASK_SH_E | MASK_HARD_SH4
8865 | MASK_FPU_SINGLE | MASK_SH4))
8866 return _("created and used with different architectures / ABIs");
8867 if ((old_flags ^ target_flags) & MASK_HITACHI)
8868 return _("created and used with different ABIs");
8869 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8870 return _("created and used with different endianness");
8871 return NULL;
8874 /* Predicates used by the templates. */
8876 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8877 Used only in general_movsrc_operand. */
8880 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8882 switch (REGNO (op))
8884 case PR_REG:
8885 case MACL_REG:
8886 case MACH_REG:
8887 return 1;
8889 return 0;
8892 /* Nonzero if OP is a floating point value with value 0.0. */
8895 fp_zero_operand (rtx op)
8897 REAL_VALUE_TYPE r;
8899 if (GET_MODE (op) != SFmode)
8900 return 0;
8902 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8903 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8906 /* Nonzero if OP is a floating point value with value 1.0. */
8909 fp_one_operand (rtx op)
8911 REAL_VALUE_TYPE r;
8913 if (GET_MODE (op) != SFmode)
8914 return 0;
8916 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8917 return REAL_VALUES_EQUAL (r, dconst1);
8920 /* In general mode switching is used. If we are
8921 compiling without -mfmovd, movsf_ie isn't taken into account for
8922 mode switching. We could check in machine_dependent_reorg for
8923 cases where we know we are in single precision mode, but there is
8924 interface to find that out during reload, so we must avoid
8925 choosing an fldi alternative during reload and thus failing to
8926 allocate a scratch register for the constant loading. */
8928 fldi_ok (void)
8930 return 1;
8934 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8936 enum rtx_code code = GET_CODE (op);
8937 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8940 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8941 enum tls_model
8942 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8944 if (GET_CODE (op) != SYMBOL_REF)
8945 return TLS_MODEL_NONE;
8946 return SYMBOL_REF_TLS_MODEL (op);
8949 /* Return the destination address of a branch. */
8951 static int
8952 branch_dest (rtx branch)
8954 rtx dest = SET_SRC (PATTERN (branch));
8955 int dest_uid;
8957 if (GET_CODE (dest) == IF_THEN_ELSE)
8958 dest = XEXP (dest, 1);
8959 dest = XEXP (dest, 0);
8960 dest_uid = INSN_UID (dest);
8961 return INSN_ADDRESSES (dest_uid);
8964 /* Return nonzero if REG is not used after INSN.
8965 We assume REG is a reload reg, and therefore does
8966 not live past labels. It may live past calls or jumps though. */
8968 reg_unused_after (rtx reg, rtx insn)
8970 enum rtx_code code;
8971 rtx set;
8973 /* If the reg is set by this instruction, then it is safe for our
8974 case. Disregard the case where this is a store to memory, since
8975 we are checking a register used in the store address. */
8976 set = single_set (insn);
8977 if (set && !MEM_P (SET_DEST (set))
8978 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8979 return 1;
8981 while ((insn = NEXT_INSN (insn)))
8983 rtx set;
8984 if (!INSN_P (insn))
8985 continue;
8987 code = GET_CODE (insn);
8989 #if 0
8990 /* If this is a label that existed before reload, then the register
8991 if dead here. However, if this is a label added by reorg, then
8992 the register may still be live here. We can't tell the difference,
8993 so we just ignore labels completely. */
8994 if (code == CODE_LABEL)
8995 return 1;
8996 /* else */
8997 #endif
8999 if (code == JUMP_INSN)
9000 return 0;
9002 /* If this is a sequence, we must handle them all at once.
9003 We could have for instance a call that sets the target register,
9004 and an insn in a delay slot that uses the register. In this case,
9005 we must return 0. */
9006 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9008 int i;
9009 int retval = 0;
9011 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9013 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9014 rtx set = single_set (this_insn);
9016 if (CALL_P (this_insn))
9017 code = CALL_INSN;
9018 else if (JUMP_P (this_insn))
9020 if (INSN_ANNULLED_BRANCH_P (this_insn))
9021 return 0;
9022 code = JUMP_INSN;
9025 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9026 return 0;
9027 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9029 if (!MEM_P (SET_DEST (set)))
9030 retval = 1;
9031 else
9032 return 0;
9034 if (set == 0
9035 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9036 return 0;
9038 if (retval == 1)
9039 return 1;
9040 else if (code == JUMP_INSN)
9041 return 0;
9044 set = single_set (insn);
9045 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9046 return 0;
9047 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9048 return !MEM_P (SET_DEST (set));
9049 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9050 return 0;
9052 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9053 return 1;
9055 return 1;
9058 #include "ggc.h"
9060 static GTY(()) rtx fpscr_rtx;
9062 get_fpscr_rtx (void)
9064 if (! fpscr_rtx)
9066 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9067 REG_USERVAR_P (fpscr_rtx) = 1;
9068 mark_user_reg (fpscr_rtx);
9070 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9071 mark_user_reg (fpscr_rtx);
9072 return fpscr_rtx;
9075 static GTY(()) tree fpscr_values;
9077 static void
9078 emit_fpu_switch (rtx scratch, int index)
9080 rtx dst, src;
9082 if (fpscr_values == NULL)
9084 tree t;
9086 t = build_index_type (integer_one_node);
9087 t = build_array_type (integer_type_node, t);
9088 t = build_decl (BUILTINS_LOCATION,
9089 VAR_DECL, get_identifier ("__fpscr_values"), t);
9090 DECL_ARTIFICIAL (t) = 1;
9091 DECL_IGNORED_P (t) = 1;
9092 DECL_EXTERNAL (t) = 1;
9093 TREE_STATIC (t) = 1;
9094 TREE_PUBLIC (t) = 1;
9095 TREE_USED (t) = 1;
9097 fpscr_values = t;
9100 src = DECL_RTL (fpscr_values);
9101 if (!can_create_pseudo_p ())
9103 emit_move_insn (scratch, XEXP (src, 0));
9104 if (index != 0)
9105 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9106 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9108 else
9109 src = adjust_address (src, PSImode, index * 4);
9111 dst = get_fpscr_rtx ();
9112 emit_move_insn (dst, src);
9115 void
9116 emit_sf_insn (rtx pat)
9118 emit_insn (pat);
9121 void
9122 emit_df_insn (rtx pat)
9124 emit_insn (pat);
9127 void
9128 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9130 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9133 void
9134 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9136 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9137 get_fpscr_rtx ()));
9140 void
9141 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9143 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9146 void
9147 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9149 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9150 get_fpscr_rtx ()));
9153 static rtx get_free_reg (HARD_REG_SET);
9155 /* This function returns a register to use to load the address to load
9156 the fpscr from. Currently it always returns r1 or r7, but when we are
9157 able to use pseudo registers after combine, or have a better mechanism
9158 for choosing a register, it should be done here. */
9159 /* REGS_LIVE is the liveness information for the point for which we
9160 need this allocation. In some bare-bones exit blocks, r1 is live at the
9161 start. We can even have all of r0..r3 being live:
9162 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9163 INSN before which new insns are placed with will clobber the register
9164 we return. If a basic block consists only of setting the return value
9165 register to a pseudo and using that register, the return value is not
9166 live before or after this block, yet we we'll insert our insns right in
9167 the middle. */
9169 static rtx
9170 get_free_reg (HARD_REG_SET regs_live)
9172 if (! TEST_HARD_REG_BIT (regs_live, 1))
9173 return gen_rtx_REG (Pmode, 1);
9175 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9176 there shouldn't be anything but a jump before the function end. */
9177 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9178 return gen_rtx_REG (Pmode, 7);
9181 /* This function will set the fpscr from memory.
9182 MODE is the mode we are setting it to. */
9183 void
9184 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9186 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9187 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9188 rtx addr_reg;
9190 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9191 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9194 /* Is the given character a logical line separator for the assembler? */
9195 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9196 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9197 #endif
9200 sh_insn_length_adjustment (rtx insn)
9202 /* Instructions with unfilled delay slots take up an extra two bytes for
9203 the nop in the delay slot. */
9204 if (((NONJUMP_INSN_P (insn)
9205 && GET_CODE (PATTERN (insn)) != USE
9206 && GET_CODE (PATTERN (insn)) != CLOBBER)
9207 || CALL_P (insn)
9208 || (JUMP_P (insn)
9209 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
9210 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
9211 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9212 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9213 return 2;
9215 /* SH2e has a bug that prevents the use of annulled branches, so if
9216 the delay slot is not filled, we'll have to put a NOP in it. */
9217 if (sh_cpu_attr == CPU_SH2E
9218 && JUMP_P (insn)
9219 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
9220 && GET_CODE (PATTERN (insn)) != ADDR_VEC
9221 && get_attr_type (insn) == TYPE_CBRANCH
9222 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9223 return 2;
9225 /* sh-dsp parallel processing insn take four bytes instead of two. */
9227 if (NONJUMP_INSN_P (insn))
9229 int sum = 0;
9230 rtx body = PATTERN (insn);
9231 const char *templ;
9232 char c;
9233 int maybe_label = 1;
9235 if (GET_CODE (body) == ASM_INPUT)
9236 templ = XSTR (body, 0);
9237 else if (asm_noperands (body) >= 0)
9238 templ
9239 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9240 else
9241 return 0;
9244 int ppi_adjust = 0;
9247 c = *templ++;
9248 while (c == ' ' || c == '\t');
9249 /* all sh-dsp parallel-processing insns start with p.
9250 The only non-ppi sh insn starting with p is pref.
9251 The only ppi starting with pr is prnd. */
9252 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9253 ppi_adjust = 2;
9254 /* The repeat pseudo-insn expands two three insns, a total of
9255 six bytes in size. */
9256 else if ((c == 'r' || c == 'R')
9257 && ! strncasecmp ("epeat", templ, 5))
9258 ppi_adjust = 4;
9259 while (c && c != '\n'
9260 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9262 /* If this is a label, it is obviously not a ppi insn. */
9263 if (c == ':' && maybe_label)
9265 ppi_adjust = 0;
9266 break;
9268 else if (c == '\'' || c == '"')
9269 maybe_label = 0;
9270 c = *templ++;
9272 sum += ppi_adjust;
9273 maybe_label = c != ':';
9275 while (c);
9276 return sum;
9278 return 0;
9281 /* Return TRUE for a valid displacement for the REG+disp addressing
9282 with MODE. */
9284 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9285 into the FRx registers. We implement this by setting the maximum offset
9286 to zero when the value is SFmode. This also restricts loading of SFmode
9287 values into the integer registers, but that can't be helped. */
9289 /* The SH allows a displacement in a QI or HI amode, but only when the
9290 other operand is R0. GCC doesn't handle this very well, so we forgot
9291 all of that.
9293 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9294 DI can be any number 0..60. */
9296 bool
9297 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9299 if (CONST_INT_P (op))
9301 if (TARGET_SHMEDIA)
9303 int size;
9305 /* Check if this the address of an unaligned load / store. */
9306 if (mode == VOIDmode)
9307 return CONST_OK_FOR_I06 (INTVAL (op));
9309 size = GET_MODE_SIZE (mode);
9310 return (!(INTVAL (op) & (size - 1))
9311 && INTVAL (op) >= -512 * size
9312 && INTVAL (op) < 512 * size);
9315 if (TARGET_SH2A)
9317 if (GET_MODE_SIZE (mode) == 1
9318 && (unsigned) INTVAL (op) < 4096)
9319 return true;
9322 if ((GET_MODE_SIZE (mode) == 4
9323 && (unsigned) INTVAL (op) < 64
9324 && !(INTVAL (op) & 3)
9325 && !(TARGET_SH2E && mode == SFmode))
9326 || (GET_MODE_SIZE (mode) == 4
9327 && (unsigned) INTVAL (op) < 16383
9328 && !(INTVAL (op) & 3) && TARGET_SH2A))
9329 return true;
9331 if ((GET_MODE_SIZE (mode) == 8
9332 && (unsigned) INTVAL (op) < 60
9333 && !(INTVAL (op) & 3)
9334 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9335 || ((GET_MODE_SIZE (mode)==8)
9336 && (unsigned) INTVAL (op) < 8192
9337 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9338 && (TARGET_SH2A && mode == DFmode)))
9339 return true;
9342 return false;
9345 /* Recognize an RTL expression that is a valid memory address for
9346 an instruction.
9347 The MODE argument is the machine mode for the MEM expression
9348 that wants to use this address.
9349 Allow REG
9350 REG+disp
9351 REG+r0
9352 REG++
9353 --REG */
9355 static bool
9356 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9358 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9359 return true;
9360 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9361 && ! TARGET_SHMEDIA
9362 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9363 return true;
9364 else if (GET_CODE (x) == PLUS
9365 && (mode != PSImode || reload_completed))
9367 rtx xop0 = XEXP (x, 0);
9368 rtx xop1 = XEXP (x, 1);
9370 if (GET_MODE_SIZE (mode) <= 8
9371 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9372 && sh_legitimate_index_p (mode, xop1))
9373 return true;
9375 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9376 || ((xop0 == stack_pointer_rtx
9377 || xop0 == hard_frame_pointer_rtx)
9378 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9379 || ((xop1 == stack_pointer_rtx
9380 || xop1 == hard_frame_pointer_rtx)
9381 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9382 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9383 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9384 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9385 && TARGET_FMOVD && mode == DFmode)))
9387 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9388 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9389 return true;
9390 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9391 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9392 return true;
9396 return false;
9399 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9400 isn't protected by a PIC unspec. */
9402 nonpic_symbol_mentioned_p (rtx x)
9404 register const char *fmt;
9405 register int i;
9407 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9408 || GET_CODE (x) == PC)
9409 return 1;
9411 /* We don't want to look into the possible MEM location of a
9412 CONST_DOUBLE, since we're not going to use it, in general. */
9413 if (GET_CODE (x) == CONST_DOUBLE)
9414 return 0;
9416 if (GET_CODE (x) == UNSPEC
9417 && (XINT (x, 1) == UNSPEC_PIC
9418 || XINT (x, 1) == UNSPEC_GOT
9419 || XINT (x, 1) == UNSPEC_GOTOFF
9420 || XINT (x, 1) == UNSPEC_GOTPLT
9421 || XINT (x, 1) == UNSPEC_GOTTPOFF
9422 || XINT (x, 1) == UNSPEC_DTPOFF
9423 || XINT (x, 1) == UNSPEC_PLT
9424 || XINT (x, 1) == UNSPEC_SYMOFF
9425 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9426 return 0;
9428 fmt = GET_RTX_FORMAT (GET_CODE (x));
9429 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9431 if (fmt[i] == 'E')
9433 register int j;
9435 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9436 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9437 return 1;
9439 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9440 return 1;
9443 return 0;
9446 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9447 @GOTOFF in `reg'. */
9449 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9450 rtx reg)
9452 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9453 return orig;
9455 if (GET_CODE (orig) == LABEL_REF
9456 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9458 if (reg == 0)
9459 reg = gen_reg_rtx (Pmode);
9461 emit_insn (gen_symGOTOFF2reg (reg, orig));
9462 return reg;
9464 else if (GET_CODE (orig) == SYMBOL_REF)
9466 if (reg == 0)
9467 reg = gen_reg_rtx (Pmode);
9469 emit_insn (gen_symGOT2reg (reg, orig));
9470 return reg;
9472 return orig;
9475 /* Try machine-dependent ways of modifying an illegitimate address
9476 to be legitimate. If we find one, return the new, valid address.
9477 Otherwise, return X.
9479 For the SH, if X is almost suitable for indexing, but the offset is
9480 out of range, convert it into a normal form so that CSE has a chance
9481 of reducing the number of address registers used. */
9483 static rtx
9484 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9486 if (flag_pic)
9487 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9489 if (GET_CODE (x) == PLUS
9490 && (GET_MODE_SIZE (mode) == 4
9491 || GET_MODE_SIZE (mode) == 8)
9492 && CONST_INT_P (XEXP (x, 1))
9493 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9494 && ! TARGET_SHMEDIA
9495 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9496 && ! (TARGET_SH2E && mode == SFmode))
9498 rtx index_rtx = XEXP (x, 1);
9499 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9500 rtx sum;
9502 /* On rare occasions, we might get an unaligned pointer
9503 that is indexed in a way to give an aligned address.
9504 Therefore, keep the lower two bits in offset_base. */
9505 /* Instead of offset_base 128..131 use 124..127, so that
9506 simple add suffices. */
9507 if (offset > 127)
9508 offset_base = ((offset + 4) & ~60) - 4;
9509 else
9510 offset_base = offset & ~60;
9512 /* Sometimes the normal form does not suit DImode. We
9513 could avoid that by using smaller ranges, but that
9514 would give less optimized code when SImode is
9515 prevalent. */
9516 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9518 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9519 GEN_INT (offset_base), NULL_RTX, 0,
9520 OPTAB_LIB_WIDEN);
9522 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9526 return x;
9529 /* Mark the use of a constant in the literal table. If the constant
9530 has multiple labels, make it unique. */
9531 static rtx
9532 mark_constant_pool_use (rtx x)
9534 rtx insn, lab, pattern;
9536 if (x == NULL)
9537 return x;
9539 switch (GET_CODE (x))
9541 case LABEL_REF:
9542 x = XEXP (x, 0);
9543 case CODE_LABEL:
9544 break;
9545 default:
9546 return x;
9549 /* Get the first label in the list of labels for the same constant
9550 and delete another labels in the list. */
9551 lab = x;
9552 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9554 if (!LABEL_P (insn)
9555 || LABEL_REFS (insn) != NEXT_INSN (insn))
9556 break;
9557 lab = insn;
9560 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9561 INSN_DELETED_P (insn) = 1;
9563 /* Mark constants in a window. */
9564 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9566 if (!NONJUMP_INSN_P (insn))
9567 continue;
9569 pattern = PATTERN (insn);
9570 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9571 continue;
9573 switch (XINT (pattern, 1))
9575 case UNSPECV_CONST2:
9576 case UNSPECV_CONST4:
9577 case UNSPECV_CONST8:
9578 XVECEXP (pattern, 0, 1) = const1_rtx;
9579 break;
9580 case UNSPECV_WINDOW_END:
9581 if (XVECEXP (pattern, 0, 0) == x)
9582 return lab;
9583 break;
9584 case UNSPECV_CONST_END:
9585 return lab;
9586 default:
9587 break;
9591 return lab;
9594 /* Return true if it's possible to redirect BRANCH1 to the destination
9595 of an unconditional jump BRANCH2. We only want to do this if the
9596 resulting branch will have a short displacement. */
9598 sh_can_redirect_branch (rtx branch1, rtx branch2)
9600 if (flag_expensive_optimizations && simplejump_p (branch2))
9602 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9603 rtx insn;
9604 int distance;
9606 for (distance = 0, insn = NEXT_INSN (branch1);
9607 insn && distance < 256;
9608 insn = PREV_INSN (insn))
9610 if (insn == dest)
9611 return 1;
9612 else
9613 distance += get_attr_length (insn);
9615 for (distance = 0, insn = NEXT_INSN (branch1);
9616 insn && distance < 256;
9617 insn = NEXT_INSN (insn))
9619 if (insn == dest)
9620 return 1;
9621 else
9622 distance += get_attr_length (insn);
9625 return 0;
9628 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9630 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9631 unsigned int new_reg)
9633 /* Interrupt functions can only use registers that have already been
9634 saved by the prologue, even if they would normally be
9635 call-clobbered. */
9637 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9638 return 0;
9640 return 1;
9643 /* Function to update the integer COST
9644 based on the relationship between INSN that is dependent on
9645 DEP_INSN through the dependence LINK. The default is to make no
9646 adjustment to COST. This can be used for example to specify to
9647 the scheduler that an output- or anti-dependence does not incur
9648 the same cost as a data-dependence. The return value should be
9649 the new value for COST. */
9650 static int
9651 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9653 rtx reg, use_pat;
9655 if (TARGET_SHMEDIA)
9657 /* On SHmedia, if the dependence is an anti-dependence or
9658 output-dependence, there is no cost. */
9659 if (REG_NOTE_KIND (link) != 0)
9661 /* However, dependencies between target register loads and
9662 uses of the register in a subsequent block that are separated
9663 by a conditional branch are not modelled - we have to do with
9664 the anti-dependency between the target register load and the
9665 conditional branch that ends the current block. */
9666 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9667 && GET_CODE (PATTERN (dep_insn)) == SET
9668 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9669 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9670 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9672 int orig_cost = cost;
9673 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9674 rtx target = ((! note
9675 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9676 ? insn : JUMP_LABEL (insn));
9677 /* On the likely path, the branch costs 1, on the unlikely path,
9678 it costs 3. */
9679 cost--;
9681 target = next_active_insn (target);
9682 while (target && ! flow_dependent_p (target, dep_insn)
9683 && --cost > 0);
9684 /* If two branches are executed in immediate succession, with the
9685 first branch properly predicted, this causes a stall at the
9686 second branch, hence we won't need the target for the
9687 second branch for two cycles after the launch of the first
9688 branch. */
9689 if (cost > orig_cost - 2)
9690 cost = orig_cost - 2;
9692 else
9693 cost = 0;
9696 else if (get_attr_is_mac_media (insn)
9697 && get_attr_is_mac_media (dep_insn))
9698 cost = 1;
9700 else if (! reload_completed
9701 && GET_CODE (PATTERN (insn)) == SET
9702 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9703 && GET_CODE (PATTERN (dep_insn)) == SET
9704 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9705 && cost < 4)
9706 cost = 4;
9707 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9708 that is needed at the target. */
9709 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9710 && ! flow_dependent_p (insn, dep_insn))
9711 cost--;
9713 else if (REG_NOTE_KIND (link) == 0)
9715 enum attr_type type;
9716 rtx dep_set;
9718 if (recog_memoized (insn) < 0
9719 || recog_memoized (dep_insn) < 0)
9720 return cost;
9722 dep_set = single_set (dep_insn);
9724 /* The latency that we specify in the scheduling description refers
9725 to the actual output, not to an auto-increment register; for that,
9726 the latency is one. */
9727 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9729 rtx set = single_set (insn);
9731 if (set
9732 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9733 && (!MEM_P (SET_DEST (set))
9734 || !reg_mentioned_p (SET_DEST (dep_set),
9735 XEXP (SET_DEST (set), 0))))
9736 cost = 1;
9738 /* The only input for a call that is timing-critical is the
9739 function's address. */
9740 if (CALL_P (insn))
9742 rtx call = PATTERN (insn);
9744 if (GET_CODE (call) == PARALLEL)
9745 call = XVECEXP (call, 0 ,0);
9746 if (GET_CODE (call) == SET)
9747 call = SET_SRC (call);
9748 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9749 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9750 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9751 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9752 cost -= TARGET_SH4_300 ? 3 : 6;
9754 /* Likewise, the most timing critical input for an sfuncs call
9755 is the function address. However, sfuncs typically start
9756 using their arguments pretty quickly.
9757 Assume a four cycle delay for SH4 before they are needed.
9758 Cached ST40-300 calls are quicker, so assume only a one
9759 cycle delay there.
9760 ??? Maybe we should encode the delays till input registers
9761 are needed by sfuncs into the sfunc call insn. */
9762 /* All sfunc calls are parallels with at least four components.
9763 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9764 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9765 && XVECLEN (PATTERN (insn), 0) >= 4
9766 && (reg = sfunc_uses_reg (insn)))
9768 if (! reg_set_p (reg, dep_insn))
9769 cost -= TARGET_SH4_300 ? 1 : 4;
9771 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9773 enum attr_type dep_type = get_attr_type (dep_insn);
9775 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9776 cost--;
9777 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9778 && (type = get_attr_type (insn)) != TYPE_CALL
9779 && type != TYPE_SFUNC)
9780 cost--;
9781 /* When the preceding instruction loads the shift amount of
9782 the following SHAD/SHLD, the latency of the load is increased
9783 by 1 cycle. */
9784 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9785 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9786 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9787 XEXP (SET_SRC (single_set (insn)),
9788 1)))
9789 cost++;
9790 /* When an LS group instruction with a latency of less than
9791 3 cycles is followed by a double-precision floating-point
9792 instruction, FIPR, or FTRV, the latency of the first
9793 instruction is increased to 3 cycles. */
9794 else if (cost < 3
9795 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9796 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9797 cost = 3;
9798 /* The lsw register of a double-precision computation is ready one
9799 cycle earlier. */
9800 else if (reload_completed
9801 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9802 && (use_pat = single_set (insn))
9803 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9804 SET_SRC (use_pat)))
9805 cost -= 1;
9807 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9808 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9809 cost -= 1;
9811 else if (TARGET_SH4_300)
9813 /* Stores need their input register two cycles later. */
9814 if (dep_set && cost >= 1
9815 && ((type = get_attr_type (insn)) == TYPE_STORE
9816 || type == TYPE_PSTORE
9817 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9819 rtx set = single_set (insn);
9821 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9822 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9824 cost -= 2;
9825 /* But don't reduce the cost below 1 if the address depends
9826 on a side effect of dep_insn. */
9827 if (cost < 1
9828 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9829 cost = 1;
9834 /* An anti-dependence penalty of two applies if the first insn is a double
9835 precision fadd / fsub / fmul. */
9836 else if (!TARGET_SH4_300
9837 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9838 && recog_memoized (dep_insn) >= 0
9839 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9840 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9841 /* A lot of alleged anti-flow dependences are fake,
9842 so check this one is real. */
9843 && flow_dependent_p (dep_insn, insn))
9844 cost = 2;
9846 return cost;
9849 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9850 if DEP_INSN is anti-flow dependent on INSN. */
9851 static int
9852 flow_dependent_p (rtx insn, rtx dep_insn)
9854 rtx tmp = PATTERN (insn);
9856 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9857 return tmp == NULL_RTX;
9860 /* A helper function for flow_dependent_p called through note_stores. */
9861 static void
9862 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9864 rtx * pinsn = (rtx *) data;
9866 if (*pinsn && reg_referenced_p (x, *pinsn))
9867 *pinsn = NULL_RTX;
9870 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9871 'special function' patterns (type sfunc) that clobber pr, but that
9872 do not look like function calls to leaf_function_p. Hence we must
9873 do this extra check. */
9874 static int
9875 sh_pr_n_sets (void)
9877 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9880 /* Return where to allocate pseudo for a given hard register initial
9881 value. */
9882 static rtx
9883 sh_allocate_initial_value (rtx hard_reg)
9885 rtx x;
9887 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9889 if (current_function_is_leaf
9890 && ! sh_pr_n_sets ()
9891 && ! (TARGET_SHCOMPACT
9892 && ((crtl->args.info.call_cookie
9893 & ~ CALL_COOKIE_RET_TRAMP (1))
9894 || crtl->saves_all_registers)))
9895 x = hard_reg;
9896 else
9897 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9899 else
9900 x = NULL_RTX;
9902 return x;
9905 /* This function returns "2" to indicate dual issue for the SH4
9906 processor. To be used by the DFA pipeline description. */
9907 static int
9908 sh_issue_rate (void)
9910 if (TARGET_SUPERSCALAR)
9911 return 2;
9912 else
9913 return 1;
9916 /* Functions for ready queue reordering for sched1. */
9918 /* Get weight for mode for a set x. */
9919 static short
9920 find_set_regmode_weight (rtx x, enum machine_mode mode)
9922 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9923 return 1;
9924 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9926 if (REG_P (SET_DEST (x)))
9928 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9929 return 1;
9930 else
9931 return 0;
9933 return 1;
9935 return 0;
9938 /* Get regmode weight for insn. */
9939 static short
9940 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9942 short reg_weight = 0;
9943 rtx x;
9945 /* Increment weight for each register born here. */
9946 x = PATTERN (insn);
9947 reg_weight += find_set_regmode_weight (x, mode);
9948 if (GET_CODE (x) == PARALLEL)
9950 int j;
9951 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9953 x = XVECEXP (PATTERN (insn), 0, j);
9954 reg_weight += find_set_regmode_weight (x, mode);
9957 /* Decrement weight for each register that dies here. */
9958 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9960 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9962 rtx note = XEXP (x, 0);
9963 if (REG_P (note) && GET_MODE (note) == mode)
9964 reg_weight--;
9967 return reg_weight;
9970 /* Calculate regmode weights for all insns of a basic block. */
9971 static void
9972 find_regmode_weight (basic_block b, enum machine_mode mode)
9974 rtx insn, next_tail, head, tail;
9976 get_ebb_head_tail (b, b, &head, &tail);
9977 next_tail = NEXT_INSN (tail);
9979 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9981 /* Handle register life information. */
9982 if (!INSN_P (insn))
9983 continue;
9985 if (mode == SFmode)
9986 INSN_REGMODE_WEIGHT (insn, mode) =
9987 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9988 else if (mode == SImode)
9989 INSN_REGMODE_WEIGHT (insn, mode) =
9990 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9994 /* Comparison function for ready queue sorting. */
9995 static int
9996 rank_for_reorder (const void *x, const void *y)
9998 rtx tmp = *(const rtx *) y;
9999 rtx tmp2 = *(const rtx *) x;
10001 /* The insn in a schedule group should be issued the first. */
10002 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10003 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10005 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10006 minimizes instruction movement, thus minimizing sched's effect on
10007 register pressure. */
10008 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10011 /* Resort the array A in which only element at index N may be out of order. */
10012 static void
10013 swap_reorder (rtx *a, int n)
10015 rtx insn = a[n - 1];
10016 int i = n - 2;
10018 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10020 a[i + 1] = a[i];
10021 i -= 1;
10023 a[i + 1] = insn;
10026 #define SCHED_REORDER(READY, N_READY) \
10027 do \
10029 if ((N_READY) == 2) \
10030 swap_reorder (READY, N_READY); \
10031 else if ((N_READY) > 2) \
10032 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10034 while (0)
10036 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10037 macro. */
10038 static void
10039 ready_reorder (rtx *ready, int nready)
10041 SCHED_REORDER (ready, nready);
10044 /* Count life regions of r0 for a block. */
10045 static int
10046 find_r0_life_regions (basic_block b)
10048 rtx end, insn;
10049 rtx pset;
10050 rtx r0_reg;
10051 int live;
10052 int set;
10053 int death = 0;
10055 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10057 set = 1;
10058 live = 1;
10060 else
10062 set = 0;
10063 live = 0;
10066 insn = BB_HEAD (b);
10067 end = BB_END (b);
10068 r0_reg = gen_rtx_REG (SImode, R0_REG);
10069 while (1)
10071 if (INSN_P (insn))
10073 if (find_regno_note (insn, REG_DEAD, R0_REG))
10075 death++;
10076 live = 0;
10078 if (!live
10079 && (pset = single_set (insn))
10080 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10081 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10083 set++;
10084 live = 1;
10087 if (insn == end)
10088 break;
10089 insn = NEXT_INSN (insn);
10091 return set - death;
10094 /* Calculate regmode weights for all insns of all basic block. */
10095 static void
10096 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10097 int verbose ATTRIBUTE_UNUSED,
10098 int old_max_uid)
10100 basic_block b;
10102 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10103 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10104 r0_life_regions = 0;
10106 FOR_EACH_BB_REVERSE (b)
10108 find_regmode_weight (b, SImode);
10109 find_regmode_weight (b, SFmode);
10110 if (!reload_completed)
10111 r0_life_regions += find_r0_life_regions (b);
10114 CURR_REGMODE_PRESSURE (SImode) = 0;
10115 CURR_REGMODE_PRESSURE (SFmode) = 0;
10119 /* Cleanup. */
10120 static void
10121 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10122 int verbose ATTRIBUTE_UNUSED)
10124 if (regmode_weight[0])
10126 free (regmode_weight[0]);
10127 regmode_weight[0] = NULL;
10129 if (regmode_weight[1])
10131 free (regmode_weight[1]);
10132 regmode_weight[1] = NULL;
10136 /* The scalar modes supported differs from the default version in TImode
10137 for 32-bit SHMEDIA. */
10138 static bool
10139 sh_scalar_mode_supported_p (enum machine_mode mode)
10141 if (TARGET_SHMEDIA32 && mode == TImode)
10142 return false;
10144 return default_scalar_mode_supported_p (mode);
10147 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10148 keep count of register pressures on SImode and SFmode. */
10149 static int
10150 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10151 int sched_verbose ATTRIBUTE_UNUSED,
10152 rtx insn,
10153 int can_issue_more)
10155 if (GET_CODE (PATTERN (insn)) != USE
10156 && GET_CODE (PATTERN (insn)) != CLOBBER)
10157 cached_can_issue_more = can_issue_more - 1;
10158 else
10159 cached_can_issue_more = can_issue_more;
10161 if (reload_completed)
10162 return cached_can_issue_more;
10164 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10165 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10167 return cached_can_issue_more;
10170 static void
10171 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10172 int verbose ATTRIBUTE_UNUSED,
10173 int veclen ATTRIBUTE_UNUSED)
10175 CURR_REGMODE_PRESSURE (SImode) = 0;
10176 CURR_REGMODE_PRESSURE (SFmode) = 0;
10179 /* Some magic numbers. */
10180 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10181 functions that already have high pressure on r0. */
10182 #define R0_MAX_LIFE_REGIONS 2
10183 /* Register Pressure thresholds for SImode and SFmode registers. */
10184 #define SIMODE_MAX_WEIGHT 5
10185 #define SFMODE_MAX_WEIGHT 10
10187 /* Return true if the pressure is high for MODE. */
10188 static short
10189 high_pressure (enum machine_mode mode)
10191 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10192 functions that already have high pressure on r0. */
10193 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10194 return 1;
10196 if (mode == SFmode)
10197 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10198 else
10199 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10202 /* Reorder ready queue if register pressure is high. */
10203 static int
10204 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10205 int sched_verbose ATTRIBUTE_UNUSED,
10206 rtx *ready,
10207 int *n_readyp,
10208 int clock_var ATTRIBUTE_UNUSED)
10210 if (reload_completed)
10211 return sh_issue_rate ();
10213 if (high_pressure (SFmode) || high_pressure (SImode))
10215 ready_reorder (ready, *n_readyp);
10218 return sh_issue_rate ();
10221 /* Skip cycles if the current register pressure is high. */
10222 static int
10223 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10224 int sched_verbose ATTRIBUTE_UNUSED,
10225 rtx *ready ATTRIBUTE_UNUSED,
10226 int *n_readyp ATTRIBUTE_UNUSED,
10227 int clock_var ATTRIBUTE_UNUSED)
10229 if (reload_completed)
10230 return cached_can_issue_more;
10232 if (high_pressure(SFmode) || high_pressure (SImode))
10233 skip_cycles = 1;
10235 return cached_can_issue_more;
10238 /* Skip cycles without sorting the ready queue. This will move insn from
10239 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10240 queue by sh_reorder. */
10242 /* Generally, skipping these many cycles are sufficient for all insns to move
10243 from Q -> R. */
10244 #define MAX_SKIPS 8
10246 static int
10247 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10248 int sched_verbose ATTRIBUTE_UNUSED,
10249 rtx insn ATTRIBUTE_UNUSED,
10250 int last_clock_var,
10251 int clock_var,
10252 int *sort_p)
10254 if (reload_completed)
10255 return 0;
10257 if (skip_cycles)
10259 if ((clock_var - last_clock_var) < MAX_SKIPS)
10261 *sort_p = 0;
10262 return 1;
10264 /* If this is the last cycle we are skipping, allow reordering of R. */
10265 if ((clock_var - last_clock_var) == MAX_SKIPS)
10267 *sort_p = 1;
10268 return 1;
10272 skip_cycles = 0;
10274 return 0;
10277 /* SHmedia requires registers for branches, so we can't generate new
10278 branches past reload. */
10279 static bool
10280 sh_cannot_modify_jumps_p (void)
10282 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10285 static enum reg_class
10286 sh_target_reg_class (void)
10288 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10291 static bool
10292 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10294 HARD_REG_SET dummy;
10295 #if 0
10296 rtx insn;
10297 #endif
10299 if (! shmedia_space_reserved_for_target_registers)
10300 return 0;
10301 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10302 return 0;
10303 if (calc_live_regs (&dummy) >= 6 * 8)
10304 return 1;
10305 return 0;
10308 static bool
10309 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10311 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10315 On the SH1..SH4, the trampoline looks like
10316 2 0002 D202 mov.l l2,r2
10317 1 0000 D301 mov.l l1,r3
10318 3 0004 422B jmp @r2
10319 4 0006 0009 nop
10320 5 0008 00000000 l1: .long area
10321 6 000c 00000000 l2: .long function
10323 SH5 (compact) uses r1 instead of r3 for the static chain. */
10326 /* Emit RTL insns to initialize the variable parts of a trampoline.
10327 FNADDR is an RTX for the address of the function's pure code.
10328 CXT is an RTX for the static chain value for the function. */
10330 void
10331 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
10333 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
10335 if (TARGET_SHMEDIA64)
10337 rtx tramp_templ;
10338 int fixed_len;
10340 rtx movi1 = GEN_INT (0xcc000010);
10341 rtx shori1 = GEN_INT (0xc8000010);
10342 rtx src, dst;
10344 /* The following trampoline works within a +- 128 KB range for cxt:
10345 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10346 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10347 gettr tr1,r1; blink tr0,r63 */
10348 /* Address rounding makes it hard to compute the exact bounds of the
10349 offset for this trampoline, but we have a rather generous offset
10350 range, so frame_offset should do fine as an upper bound. */
10351 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10353 /* ??? could optimize this trampoline initialization
10354 by writing DImode words with two insns each. */
10355 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10356 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10357 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10358 insn = gen_rtx_AND (DImode, insn, mask);
10359 /* Or in ptb/u .,tr1 pattern */
10360 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10361 insn = force_operand (insn, NULL_RTX);
10362 insn = gen_lowpart (SImode, insn);
10363 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10364 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10365 insn = gen_rtx_AND (DImode, insn, mask);
10366 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10367 insn = gen_lowpart (SImode, insn);
10368 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10369 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10370 insn = gen_rtx_AND (DImode, insn, mask);
10371 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10372 insn = gen_lowpart (SImode, insn);
10373 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10374 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10375 insn = gen_rtx_AND (DImode, insn, mask);
10376 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10377 insn = gen_lowpart (SImode, insn);
10378 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10379 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10380 insn = gen_rtx_AND (DImode, insn, mask);
10381 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10382 insn = gen_lowpart (SImode, insn);
10383 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10384 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10385 GEN_INT (0x6bf10600));
10386 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10387 GEN_INT (0x4415fc10));
10388 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10389 GEN_INT (0x4401fff0));
10390 emit_insn (gen_ic_invalidate_line (tramp));
10391 return;
10393 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10394 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10396 tramp_templ = gen_datalabel_ref (tramp_templ);
10397 dst = tramp_mem;
10398 src = gen_const_mem (BLKmode, tramp_templ);
10399 set_mem_align (dst, 256);
10400 set_mem_align (src, 64);
10401 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10403 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10404 emit_move_insn (adjust_address (tramp_mem, Pmode,
10405 fixed_len + GET_MODE_SIZE (Pmode)),
10406 cxt);
10407 emit_insn (gen_ic_invalidate_line (tramp));
10408 return;
10410 else if (TARGET_SHMEDIA)
10412 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10413 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10414 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10415 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10416 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10417 rotated 10 right, and higher 16 bit of every 32 selected. */
10418 rtx movishori
10419 = force_reg (V2HImode, (simplify_gen_subreg
10420 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10421 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10422 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10424 tramp = force_reg (Pmode, tramp);
10425 fnaddr = force_reg (SImode, fnaddr);
10426 cxt = force_reg (SImode, cxt);
10427 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10428 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10429 movishori));
10430 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10431 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10432 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10433 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10434 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10435 gen_rtx_SUBREG (V2HImode, cxt, 0),
10436 movishori));
10437 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10438 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10439 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10440 if (TARGET_LITTLE_ENDIAN)
10442 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10443 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10445 else
10447 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10448 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10450 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10451 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10452 emit_insn (gen_ic_invalidate_line (tramp));
10453 return;
10455 else if (TARGET_SHCOMPACT)
10457 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10458 return;
10460 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10461 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10462 SImode));
10463 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10464 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10465 SImode));
10466 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10467 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10468 if (TARGET_HARVARD)
10470 if (!TARGET_INLINE_IC_INVALIDATE
10471 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10472 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10473 FUNCTION_ORDINARY),
10474 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10475 else
10476 emit_insn (gen_ic_invalidate_line (tramp));
10480 /* FIXME: This is overly conservative. A SHcompact function that
10481 receives arguments ``by reference'' will have them stored in its
10482 own stack frame, so it must not pass pointers or references to
10483 these arguments to other functions by means of sibling calls. */
10484 /* If PIC, we cannot make sibling calls to global functions
10485 because the PLT requires r12 to be live. */
10486 static bool
10487 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10489 return (1
10490 && (! TARGET_SHCOMPACT
10491 || crtl->args.info.stack_regs == 0)
10492 && ! sh_cfun_interrupt_handler_p ()
10493 && (! flag_pic
10494 || (decl && ! TREE_PUBLIC (decl))
10495 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10498 /* Machine specific built-in functions. */
10500 struct builtin_description
10502 const enum insn_code icode;
10503 const char *const name;
10504 int signature;
10507 /* describe number and signedness of arguments; arg[0] == result
10508 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10509 /* 9: 64-bit pointer, 10: 32-bit pointer */
10510 static const char signature_args[][4] =
10512 #define SH_BLTIN_V2SI2 0
10513 { 4, 4 },
10514 #define SH_BLTIN_V4HI2 1
10515 { 4, 4 },
10516 #define SH_BLTIN_V2SI3 2
10517 { 4, 4, 4 },
10518 #define SH_BLTIN_V4HI3 3
10519 { 4, 4, 4 },
10520 #define SH_BLTIN_V8QI3 4
10521 { 4, 4, 4 },
10522 #define SH_BLTIN_MAC_HISI 5
10523 { 1, 4, 4, 1 },
10524 #define SH_BLTIN_SH_HI 6
10525 { 4, 4, 1 },
10526 #define SH_BLTIN_SH_SI 7
10527 { 4, 4, 1 },
10528 #define SH_BLTIN_V4HI2V2SI 8
10529 { 4, 4, 4 },
10530 #define SH_BLTIN_V4HI2V8QI 9
10531 { 4, 4, 4 },
10532 #define SH_BLTIN_SISF 10
10533 { 4, 2 },
10534 #define SH_BLTIN_LDUA_L 11
10535 { 2, 10 },
10536 #define SH_BLTIN_LDUA_Q 12
10537 { 1, 10 },
10538 #define SH_BLTIN_STUA_L 13
10539 { 0, 10, 2 },
10540 #define SH_BLTIN_STUA_Q 14
10541 { 0, 10, 1 },
10542 #define SH_BLTIN_LDUA_L64 15
10543 { 2, 9 },
10544 #define SH_BLTIN_LDUA_Q64 16
10545 { 1, 9 },
10546 #define SH_BLTIN_STUA_L64 17
10547 { 0, 9, 2 },
10548 #define SH_BLTIN_STUA_Q64 18
10549 { 0, 9, 1 },
10550 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10551 #define SH_BLTIN_2 19
10552 #define SH_BLTIN_SU 19
10553 { 1, 2 },
10554 #define SH_BLTIN_3 20
10555 #define SH_BLTIN_SUS 20
10556 { 2, 2, 1 },
10557 #define SH_BLTIN_PSSV 21
10558 { 0, 8, 2, 2 },
10559 #define SH_BLTIN_XXUU 22
10560 #define SH_BLTIN_UUUU 22
10561 { 1, 1, 1, 1 },
10562 #define SH_BLTIN_PV 23
10563 { 0, 8 },
10565 /* mcmv: operands considered unsigned. */
10566 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10567 /* mperm: control value considered unsigned int. */
10568 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10569 /* mshards_q: returns signed short. */
10570 /* nsb: takes long long arg, returns unsigned char. */
10571 static const struct builtin_description bdesc[] =
10573 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
10574 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
10575 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
10576 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
10577 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
10578 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
10579 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
10580 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
10581 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
10582 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
10583 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
10584 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
10585 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
10586 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
10587 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
10588 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
10589 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
10590 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
10591 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
10592 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
10593 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
10594 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
10595 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
10596 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
10597 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
10598 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
10599 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
10600 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
10601 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
10602 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
10603 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
10604 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
10605 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
10606 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
10607 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
10608 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
10609 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
10610 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
10611 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
10612 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
10613 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
10614 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
10615 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
10616 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
10617 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
10618 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
10619 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
10620 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
10621 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
10622 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
10623 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
10624 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
10625 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
10626 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
10627 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
10628 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
10629 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
10630 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
10631 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
10632 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
10633 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
10634 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
10635 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
10636 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
10637 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
10638 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
10639 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
10640 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
10641 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
10642 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
10643 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
10644 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
10645 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
10646 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
10647 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
10648 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
10649 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
10650 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
10651 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
10652 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
10653 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
10654 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
10655 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
10656 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
10659 static void
10660 sh_media_init_builtins (void)
10662 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10663 const struct builtin_description *d;
10665 memset (shared, 0, sizeof shared);
10666 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10668 tree type, arg_type = 0;
10669 int signature = d->signature;
10670 int i;
10672 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10673 type = shared[signature];
10674 else
10676 int has_result = signature_args[signature][0] != 0;
10678 if ((signature_args[signature][1] & 8)
10679 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10680 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10681 continue;
10682 if (! TARGET_FPU_ANY
10683 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10684 continue;
10685 type = void_list_node;
10686 for (i = 3; ; i--)
10688 int arg = signature_args[signature][i];
10689 int opno = i - 1 + has_result;
10691 if (arg & 8)
10692 arg_type = ptr_type_node;
10693 else if (arg)
10694 arg_type = (*lang_hooks.types.type_for_mode)
10695 (insn_data[d->icode].operand[opno].mode,
10696 (arg & 1));
10697 else if (i)
10698 continue;
10699 else
10700 arg_type = void_type_node;
10701 if (i == 0)
10702 break;
10703 type = tree_cons (NULL_TREE, arg_type, type);
10705 type = build_function_type (arg_type, type);
10706 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10707 shared[signature] = type;
10709 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10710 NULL, NULL_TREE);
10714 /* Implements target hook vector_mode_supported_p. */
10715 bool
10716 sh_vector_mode_supported_p (enum machine_mode mode)
10718 if (TARGET_FPU_ANY
10719 && ((mode == V2SFmode)
10720 || (mode == V4SFmode)
10721 || (mode == V16SFmode)))
10722 return true;
10724 else if (TARGET_SHMEDIA
10725 && ((mode == V8QImode)
10726 || (mode == V2HImode)
10727 || (mode == V4HImode)
10728 || (mode == V2SImode)))
10729 return true;
10731 return false;
10734 /* Implements target hook dwarf_calling_convention. Return an enum
10735 of dwarf_calling_convention. */
10737 sh_dwarf_calling_convention (const_tree func)
10739 if (sh_attr_renesas_p (func))
10740 return DW_CC_GNU_renesas_sh;
10742 return DW_CC_normal;
10745 static void
10746 sh_init_builtins (void)
10748 if (TARGET_SHMEDIA)
10749 sh_media_init_builtins ();
10752 /* Expand an expression EXP that calls a built-in function,
10753 with result going to TARGET if that's convenient
10754 (and in mode MODE if that's convenient).
10755 SUBTARGET may be used as the target for computing one of EXP's operands.
10756 IGNORE is nonzero if the value is to be ignored. */
10758 static rtx
10759 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10760 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10762 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10763 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10764 const struct builtin_description *d = &bdesc[fcode];
10765 enum insn_code icode = d->icode;
10766 int signature = d->signature;
10767 enum machine_mode tmode = VOIDmode;
10768 int nop = 0, i;
10769 rtx op[4];
10770 rtx pat = 0;
10772 if (signature_args[signature][0])
10774 if (ignore)
10775 return 0;
10777 tmode = insn_data[icode].operand[0].mode;
10778 if (! target
10779 || GET_MODE (target) != tmode
10780 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10781 target = gen_reg_rtx (tmode);
10782 op[nop++] = target;
10784 else
10785 target = 0;
10787 for (i = 1; i <= 3; i++, nop++)
10789 tree arg;
10790 enum machine_mode opmode, argmode;
10791 tree optype;
10793 if (! signature_args[signature][i])
10794 break;
10795 arg = CALL_EXPR_ARG (exp, i - 1);
10796 if (arg == error_mark_node)
10797 return const0_rtx;
10798 if (signature_args[signature][i] & 8)
10800 opmode = ptr_mode;
10801 optype = ptr_type_node;
10803 else
10805 opmode = insn_data[icode].operand[nop].mode;
10806 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10808 argmode = TYPE_MODE (TREE_TYPE (arg));
10809 if (argmode != opmode)
10810 arg = build1 (NOP_EXPR, optype, arg);
10811 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10812 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10813 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10816 switch (nop)
10818 case 1:
10819 pat = (*insn_data[d->icode].genfun) (op[0]);
10820 break;
10821 case 2:
10822 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10823 break;
10824 case 3:
10825 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10826 break;
10827 case 4:
10828 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10829 break;
10830 default:
10831 gcc_unreachable ();
10833 if (! pat)
10834 return 0;
10835 emit_insn (pat);
10836 return target;
10839 void
10840 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10842 rtx sel0 = const0_rtx;
10843 rtx sel1 = const1_rtx;
10844 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10845 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10847 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10848 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10851 void
10852 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10854 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10856 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10857 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10860 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10861 We can allow any mode in any general register. The special registers
10862 only allow SImode. Don't allow any mode in the PR.
10864 We cannot hold DCmode values in the XD registers because alter_reg
10865 handles subregs of them incorrectly. We could work around this by
10866 spacing the XD registers like the DR registers, but this would require
10867 additional memory in every compilation to hold larger register vectors.
10868 We could hold SFmode / SCmode values in XD registers, but that
10869 would require a tertiary reload when reloading from / to memory,
10870 and a secondary reload to reload from / to general regs; that
10871 seems to be a loosing proposition.
10873 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10874 it won't be ferried through GP registers first. */
10876 bool
10877 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
10879 if (SPECIAL_REGISTER_P (regno))
10880 return mode == SImode;
10882 if (regno == FPUL_REG)
10883 return (mode == SImode || mode == SFmode);
10885 if (FP_REGISTER_P (regno) && mode == SFmode)
10886 return true;
10888 if (mode == V2SFmode)
10890 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10891 || GENERAL_REGISTER_P (regno)))
10892 return true;
10893 else
10894 return false;
10897 if (mode == V4SFmode)
10899 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10900 || GENERAL_REGISTER_P (regno))
10901 return true;
10902 else
10903 return false;
10906 if (mode == V16SFmode)
10908 if (TARGET_SHMEDIA)
10910 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
10911 return true;
10912 else
10913 return false;
10915 else
10916 return regno == FIRST_XD_REG;
10919 if (FP_REGISTER_P (regno))
10921 if (mode == SFmode
10922 || mode == SImode
10923 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
10924 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10925 || mode == DCmode
10926 || (TARGET_SHMEDIA
10927 && (mode == DFmode || mode == DImode
10928 || mode == V2SFmode || mode == TImode)))
10929 && ((regno - FIRST_FP_REG) & 1) == 0)
10930 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
10931 && ((regno - FIRST_FP_REG) & 3) == 0))
10932 return true;
10933 else
10934 return false;
10937 if (XD_REGISTER_P (regno))
10938 return mode == DFmode;
10940 if (TARGET_REGISTER_P (regno))
10941 return (mode == DImode || mode == SImode || mode == PDImode);
10943 if (regno == PR_REG)
10944 return mode == SImode;
10946 if (regno == FPSCR_REG)
10947 return mode == PSImode;
10949 /* FIXME. This works around PR target/37633 for -O0. */
10950 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
10952 unsigned int n = GET_MODE_SIZE (mode) / 8;
10954 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
10955 && regno <= FIRST_GENERAL_REG + 14)
10956 return false;
10959 return true;
10962 /* Return the class of registers for which a mode change from FROM to TO
10963 is invalid. */
10964 bool
10965 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10966 enum reg_class rclass)
10968 /* We want to enable the use of SUBREGs as a means to
10969 VEC_SELECT a single element of a vector. */
10970 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10971 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
10973 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10975 if (TARGET_LITTLE_ENDIAN)
10977 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10978 return reg_classes_intersect_p (DF_REGS, rclass);
10980 else
10982 if (GET_MODE_SIZE (from) < 8)
10983 return reg_classes_intersect_p (DF_HI_REGS, rclass);
10986 return 0;
10990 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10991 that label is used. */
10993 void
10994 sh_mark_label (rtx address, int nuses)
10996 if (GOTOFF_P (address))
10998 /* Extract the label or symbol. */
10999 address = XEXP (address, 0);
11000 if (GET_CODE (address) == PLUS)
11001 address = XEXP (address, 0);
11002 address = XVECEXP (address, 0, 0);
11004 if (GET_CODE (address) == LABEL_REF
11005 && LABEL_P (XEXP (address, 0)))
11006 LABEL_NUSES (XEXP (address, 0)) += nuses;
11009 /* Compute extra cost of moving data between one register class
11010 and another. */
11012 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11013 uses this information. Hence, the general register <-> floating point
11014 register information here is not used for SFmode. */
11017 sh_register_move_cost (enum machine_mode mode,
11018 enum reg_class srcclass, enum reg_class dstclass)
11020 if (dstclass == T_REGS || dstclass == PR_REGS)
11021 return 10;
11023 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11024 return 4;
11026 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11027 && REGCLASS_HAS_FP_REG (srcclass)
11028 && REGCLASS_HAS_FP_REG (dstclass))
11029 return 4;
11031 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11032 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11034 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11035 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11036 return 9;
11038 if ((REGCLASS_HAS_FP_REG (dstclass)
11039 && REGCLASS_HAS_GENERAL_REG (srcclass))
11040 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11041 && REGCLASS_HAS_FP_REG (srcclass)))
11042 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11043 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11045 if ((dstclass == FPUL_REGS
11046 && REGCLASS_HAS_GENERAL_REG (srcclass))
11047 || (srcclass == FPUL_REGS
11048 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11049 return 5;
11051 if ((dstclass == FPUL_REGS
11052 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11053 || (srcclass == FPUL_REGS
11054 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11055 return 7;
11057 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11058 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11059 return 20;
11061 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11062 if (TARGET_SHMEDIA
11063 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11065 if (sh_gettrcost >= 0)
11066 return sh_gettrcost;
11067 else if (!TARGET_PT_FIXED)
11068 return 100;
11071 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11072 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11073 return 4;
11075 if (TARGET_SHMEDIA
11076 || (TARGET_FMOVD
11077 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11078 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11079 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11081 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11084 static rtx emit_load_ptr (rtx, rtx);
11086 static rtx
11087 emit_load_ptr (rtx reg, rtx addr)
11089 rtx mem = gen_const_mem (ptr_mode, addr);
11091 if (Pmode != ptr_mode)
11092 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11093 return emit_move_insn (reg, mem);
11096 static void
11097 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11098 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11099 tree function)
11101 CUMULATIVE_ARGS cum;
11102 int structure_value_byref = 0;
11103 rtx this_rtx, this_value, sibcall, insns, funexp;
11104 tree funtype = TREE_TYPE (function);
11105 int simple_add = CONST_OK_FOR_ADD (delta);
11106 int did_load = 0;
11107 rtx scratch0, scratch1, scratch2;
11108 unsigned i;
11110 reload_completed = 1;
11111 epilogue_completed = 1;
11112 current_function_uses_only_leaf_regs = 1;
11114 emit_note (NOTE_INSN_PROLOGUE_END);
11116 /* Find the "this" pointer. We have such a wide range of ABIs for the
11117 SH that it's best to do this completely machine independently.
11118 "this" is passed as first argument, unless a structure return pointer
11119 comes first, in which case "this" comes second. */
11120 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11121 #ifndef PCC_STATIC_STRUCT_RETURN
11122 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11123 structure_value_byref = 1;
11124 #endif /* not PCC_STATIC_STRUCT_RETURN */
11125 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11127 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11129 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11131 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11133 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11134 static chain pointer (even if you can't have nested virtual functions
11135 right now, someone might implement them sometime), and the rest of the
11136 registers are used for argument passing, are callee-saved, or reserved. */
11137 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11138 -ffixed-reg has been used. */
11139 if (! call_used_regs[0] || fixed_regs[0])
11140 error ("r0 needs to be available as a call-clobbered register");
11141 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11142 if (! TARGET_SH5)
11144 if (call_used_regs[1] && ! fixed_regs[1])
11145 scratch1 = gen_rtx_REG (ptr_mode, 1);
11146 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11147 pointing where to return struct values. */
11148 if (call_used_regs[3] && ! fixed_regs[3])
11149 scratch2 = gen_rtx_REG (Pmode, 3);
11151 else if (TARGET_SHMEDIA)
11153 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11154 if (i != REGNO (scratch0) &&
11155 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11157 scratch1 = gen_rtx_REG (ptr_mode, i);
11158 break;
11160 if (scratch1 == scratch0)
11161 error ("Need a second call-clobbered general purpose register");
11162 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11163 if (call_used_regs[i] && ! fixed_regs[i])
11165 scratch2 = gen_rtx_REG (Pmode, i);
11166 break;
11168 if (scratch2 == scratch0)
11169 error ("Need a call-clobbered target register");
11172 this_value = plus_constant (this_rtx, delta);
11173 if (vcall_offset
11174 && (simple_add || scratch0 != scratch1)
11175 && strict_memory_address_p (ptr_mode, this_value))
11177 emit_load_ptr (scratch0, this_value);
11178 did_load = 1;
11181 if (!delta)
11182 ; /* Do nothing. */
11183 else if (simple_add)
11184 emit_move_insn (this_rtx, this_value);
11185 else
11187 emit_move_insn (scratch1, GEN_INT (delta));
11188 emit_insn (gen_add2_insn (this_rtx, scratch1));
11191 if (vcall_offset)
11193 rtx offset_addr;
11195 if (!did_load)
11196 emit_load_ptr (scratch0, this_rtx);
11198 offset_addr = plus_constant (scratch0, vcall_offset);
11199 if (strict_memory_address_p (ptr_mode, offset_addr))
11200 ; /* Do nothing. */
11201 else if (! TARGET_SH5 && scratch0 != scratch1)
11203 /* scratch0 != scratch1, and we have indexed loads. Get better
11204 schedule by loading the offset into r1 and using an indexed
11205 load - then the load of r1 can issue before the load from
11206 (this_rtx + delta) finishes. */
11207 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11208 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11210 else if (CONST_OK_FOR_ADD (vcall_offset))
11212 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11213 offset_addr = scratch0;
11215 else if (scratch0 != scratch1)
11217 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11218 emit_insn (gen_add2_insn (scratch0, scratch1));
11219 offset_addr = scratch0;
11221 else
11222 gcc_unreachable (); /* FIXME */
11223 emit_load_ptr (scratch0, offset_addr);
11225 if (Pmode != ptr_mode)
11226 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11227 emit_insn (gen_add2_insn (this_rtx, scratch0));
11230 /* Generate a tail call to the target function. */
11231 if (! TREE_USED (function))
11233 assemble_external (function);
11234 TREE_USED (function) = 1;
11236 funexp = XEXP (DECL_RTL (function), 0);
11237 /* If the function is overridden, so is the thunk, hence we don't
11238 need GOT addressing even if this is a public symbol. */
11239 #if 0
11240 if (TARGET_SH1 && ! flag_weak)
11241 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11242 else
11243 #endif
11244 if (TARGET_SH2 && flag_pic)
11246 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11247 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11249 else
11251 if (TARGET_SHMEDIA && flag_pic)
11253 funexp = gen_sym2PIC (funexp);
11254 PUT_MODE (funexp, Pmode);
11256 emit_move_insn (scratch2, funexp);
11257 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11258 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11260 sibcall = emit_call_insn (sibcall);
11261 SIBLING_CALL_P (sibcall) = 1;
11262 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11263 emit_barrier ();
11265 /* Run just enough of rest_of_compilation to do scheduling and get
11266 the insns emitted. Note that use_thunk calls
11267 assemble_start_function and assemble_end_function. */
11269 insn_locators_alloc ();
11270 insns = get_insns ();
11272 if (optimize > 0)
11274 if (! cfun->cfg)
11275 init_flow (cfun);
11276 split_all_insns_noflow ();
11279 sh_reorg ();
11281 if (optimize > 0 && flag_delayed_branch)
11282 dbr_schedule (insns);
11284 shorten_branches (insns);
11285 final_start_function (insns, file, 1);
11286 final (insns, file, 1);
11287 final_end_function ();
11289 reload_completed = 0;
11290 epilogue_completed = 0;
11294 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11296 rtx sym;
11298 /* If this is not an ordinary function, the name usually comes from a
11299 string literal or an sprintf buffer. Make sure we use the same
11300 string consistently, so that cse will be able to unify address loads. */
11301 if (kind != FUNCTION_ORDINARY)
11302 name = IDENTIFIER_POINTER (get_identifier (name));
11303 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11304 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11305 if (flag_pic)
11306 switch (kind)
11308 case FUNCTION_ORDINARY:
11309 break;
11310 case SFUNC_GOT:
11312 rtx reg = target ? target : gen_reg_rtx (Pmode);
11314 emit_insn (gen_symGOT2reg (reg, sym));
11315 sym = reg;
11316 break;
11318 case SFUNC_STATIC:
11320 /* ??? To allow cse to work, we use GOTOFF relocations.
11321 we could add combiner patterns to transform this into
11322 straight pc-relative calls with sym2PIC / bsrf when
11323 label load and function call are still 1:1 and in the
11324 same basic block during combine. */
11325 rtx reg = target ? target : gen_reg_rtx (Pmode);
11327 emit_insn (gen_symGOTOFF2reg (reg, sym));
11328 sym = reg;
11329 break;
11332 if (target && sym != target)
11334 emit_move_insn (target, sym);
11335 return target;
11337 return sym;
11340 /* Find the number of a general purpose register in S. */
11341 static int
11342 scavenge_reg (HARD_REG_SET *s)
11344 int r;
11345 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11346 if (TEST_HARD_REG_BIT (*s, r))
11347 return r;
11348 return -1;
11352 sh_get_pr_initial_val (void)
11354 rtx val;
11356 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11357 PR register on SHcompact, because it might be clobbered by the prologue.
11358 We check first if that is known to be the case. */
11359 if (TARGET_SHCOMPACT
11360 && ((crtl->args.info.call_cookie
11361 & ~ CALL_COOKIE_RET_TRAMP (1))
11362 || crtl->saves_all_registers))
11363 return gen_frame_mem (SImode, return_address_pointer_rtx);
11365 /* If we haven't finished rtl generation, there might be a nonlocal label
11366 that we haven't seen yet.
11367 ??? get_hard_reg_initial_val fails if it is called after register
11368 allocation has started, unless it has been called before for the
11369 same register. And even then, we end in trouble if we didn't use
11370 the register in the same basic block before. So call
11371 get_hard_reg_initial_val now and wrap it in an unspec if we might
11372 need to replace it. */
11373 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11374 combine can put the pseudo returned by get_hard_reg_initial_val into
11375 instructions that need a general purpose registers, which will fail to
11376 be recognized when the pseudo becomes allocated to PR. */
11378 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11379 if (TARGET_SH1)
11380 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11381 return val;
11385 sh_expand_t_scc (rtx operands[])
11387 enum rtx_code code = GET_CODE (operands[1]);
11388 rtx target = operands[0];
11389 rtx op0 = operands[2];
11390 rtx op1 = operands[3];
11391 rtx result = target;
11392 HOST_WIDE_INT val;
11394 if (!REG_P (op0) || REGNO (op0) != T_REG
11395 || !CONST_INT_P (op1))
11396 return 0;
11397 if (!REG_P (result))
11398 result = gen_reg_rtx (SImode);
11399 val = INTVAL (op1);
11400 if ((code == EQ && val == 1) || (code == NE && val == 0))
11401 emit_insn (gen_movt (result));
11402 else if (TARGET_SH2A && ((code == EQ && val == 0)
11403 || (code == NE && val == 1)))
11404 emit_insn (gen_xorsi3_movrt (result));
11405 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11407 emit_clobber (result);
11408 emit_insn (gen_subc (result, result, result));
11409 emit_insn (gen_addsi3 (result, result, const1_rtx));
11411 else if (code == EQ || code == NE)
11412 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11413 else
11414 return 0;
11415 if (result != target)
11416 emit_move_insn (target, result);
11417 return 1;
11420 /* INSN is an sfunc; return the rtx that describes the address used. */
11421 static rtx
11422 extract_sfunc_addr (rtx insn)
11424 rtx pattern, part = NULL_RTX;
11425 int len, i;
11427 pattern = PATTERN (insn);
11428 len = XVECLEN (pattern, 0);
11429 for (i = 0; i < len; i++)
11431 part = XVECEXP (pattern, 0, i);
11432 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11433 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11434 return XEXP (part, 0);
11436 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11437 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11440 /* Verify that the register in use_sfunc_addr still agrees with the address
11441 used in the sfunc. This prevents fill_slots_from_thread from changing
11442 use_sfunc_addr.
11443 INSN is the use_sfunc_addr instruction, and REG is the register it
11444 guards. */
11446 check_use_sfunc_addr (rtx insn, rtx reg)
11448 /* Search for the sfunc. It should really come right after INSN. */
11449 while ((insn = NEXT_INSN (insn)))
11451 if (LABEL_P (insn) || JUMP_P (insn))
11452 break;
11453 if (! INSN_P (insn))
11454 continue;
11456 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11457 insn = XVECEXP (PATTERN (insn), 0, 0);
11458 if (GET_CODE (PATTERN (insn)) != PARALLEL
11459 || get_attr_type (insn) != TYPE_SFUNC)
11460 continue;
11461 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11463 gcc_unreachable ();
11466 /* This function returns a constant rtx that represents pi / 2**15 in
11467 SFmode. it's used to scale SFmode angles, in radians, to a
11468 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11469 maps to 0x10000). */
11471 static GTY(()) rtx sh_fsca_sf2int_rtx;
11474 sh_fsca_sf2int (void)
11476 if (! sh_fsca_sf2int_rtx)
11478 REAL_VALUE_TYPE rv;
11480 real_from_string (&rv, "10430.378350470453");
11481 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11484 return sh_fsca_sf2int_rtx;
11487 /* This function returns a constant rtx that represents pi / 2**15 in
11488 DFmode. it's used to scale DFmode angles, in radians, to a
11489 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11490 maps to 0x10000). */
11492 static GTY(()) rtx sh_fsca_df2int_rtx;
11495 sh_fsca_df2int (void)
11497 if (! sh_fsca_df2int_rtx)
11499 REAL_VALUE_TYPE rv;
11501 real_from_string (&rv, "10430.378350470453");
11502 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11505 return sh_fsca_df2int_rtx;
11508 /* This function returns a constant rtx that represents 2**15 / pi in
11509 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11510 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11511 2*pi). */
11513 static GTY(()) rtx sh_fsca_int2sf_rtx;
11516 sh_fsca_int2sf (void)
11518 if (! sh_fsca_int2sf_rtx)
11520 REAL_VALUE_TYPE rv;
11522 real_from_string (&rv, "9.587379924285257e-5");
11523 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11526 return sh_fsca_int2sf_rtx;
11529 /* Initialize the CUMULATIVE_ARGS structure. */
11531 void
11532 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11533 tree fntype,
11534 rtx libname ATTRIBUTE_UNUSED,
11535 tree fndecl,
11536 signed int n_named_args,
11537 enum machine_mode mode)
11539 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11540 pcum->free_single_fp_reg = 0;
11541 pcum->stack_regs = 0;
11542 pcum->byref_regs = 0;
11543 pcum->byref = 0;
11544 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11546 /* XXX - Should we check TARGET_HITACHI here ??? */
11547 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11549 if (fntype)
11551 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11552 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11553 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11554 pcum->arg_count [(int) SH_ARG_INT]
11555 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11557 pcum->call_cookie
11558 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11559 && pcum->arg_count [(int) SH_ARG_INT] == 0
11560 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11561 ? int_size_in_bytes (TREE_TYPE (fntype))
11562 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11563 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11564 == FIRST_RET_REG));
11566 else
11568 pcum->arg_count [(int) SH_ARG_INT] = 0;
11569 pcum->prototype_p = FALSE;
11570 if (mode != VOIDmode)
11572 pcum->call_cookie =
11573 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11574 && GET_MODE_SIZE (mode) > 4
11575 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11577 /* If the default ABI is the Renesas ABI then all library
11578 calls must assume that the library will be using the
11579 Renesas ABI. So if the function would return its result
11580 in memory then we must force the address of this memory
11581 block onto the stack. Ideally we would like to call
11582 targetm.calls.return_in_memory() here but we do not have
11583 the TYPE or the FNDECL available so we synthesize the
11584 contents of that function as best we can. */
11585 pcum->force_mem =
11586 (TARGET_DEFAULT & MASK_HITACHI)
11587 && (mode == BLKmode
11588 || (GET_MODE_SIZE (mode) > 4
11589 && !(mode == DFmode
11590 && TARGET_FPU_DOUBLE)));
11592 else
11594 pcum->call_cookie = 0;
11595 pcum->force_mem = FALSE;
11600 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11601 not enter into CONST_DOUBLE for the replace.
11603 Note that copying is not done so X must not be shared unless all copies
11604 are to be modified.
11606 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11607 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11608 replacements[n*2+1] - and that we take mode changes into account.
11610 If a replacement is ambiguous, return NULL_RTX.
11612 If MODIFY is zero, don't modify any rtl in place,
11613 just return zero or nonzero for failure / success. */
11616 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11618 int i, j;
11619 const char *fmt;
11621 /* The following prevents loops occurrence when we change MEM in
11622 CONST_DOUBLE onto the same CONST_DOUBLE. */
11623 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11624 return x;
11626 for (i = n_replacements - 1; i >= 0 ; i--)
11627 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11628 return replacements[i*2+1];
11630 /* Allow this function to make replacements in EXPR_LISTs. */
11631 if (x == 0)
11632 return 0;
11634 if (GET_CODE (x) == SUBREG)
11636 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11637 n_replacements, modify);
11639 if (CONST_INT_P (new_rtx))
11641 x = simplify_subreg (GET_MODE (x), new_rtx,
11642 GET_MODE (SUBREG_REG (x)),
11643 SUBREG_BYTE (x));
11644 if (! x)
11645 abort ();
11647 else if (modify)
11648 SUBREG_REG (x) = new_rtx;
11650 return x;
11652 else if (REG_P (x))
11654 unsigned regno = REGNO (x);
11655 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11656 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11657 rtx result = NULL_RTX;
11659 for (i = n_replacements - 1; i >= 0; i--)
11661 rtx from = replacements[i*2];
11662 rtx to = replacements[i*2+1];
11663 unsigned from_regno, from_nregs, to_regno, new_regno;
11665 if (!REG_P (from))
11666 continue;
11667 from_regno = REGNO (from);
11668 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11669 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11670 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11672 if (regno < from_regno
11673 || regno + nregs > from_regno + nregs
11674 || !REG_P (to)
11675 || result)
11676 return NULL_RTX;
11677 to_regno = REGNO (to);
11678 if (to_regno < FIRST_PSEUDO_REGISTER)
11680 new_regno = regno + to_regno - from_regno;
11681 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11682 != nregs)
11683 return NULL_RTX;
11684 result = gen_rtx_REG (GET_MODE (x), new_regno);
11686 else if (GET_MODE (x) <= GET_MODE (to))
11687 result = gen_lowpart_common (GET_MODE (x), to);
11688 else
11689 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11692 return result ? result : x;
11694 else if (GET_CODE (x) == ZERO_EXTEND)
11696 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11697 n_replacements, modify);
11699 if (CONST_INT_P (new_rtx))
11701 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11702 new_rtx, GET_MODE (XEXP (x, 0)));
11703 if (! x)
11704 abort ();
11706 else if (modify)
11707 XEXP (x, 0) = new_rtx;
11709 return x;
11712 fmt = GET_RTX_FORMAT (GET_CODE (x));
11713 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11715 rtx new_rtx;
11717 if (fmt[i] == 'e')
11719 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11720 n_replacements, modify);
11721 if (!new_rtx)
11722 return NULL_RTX;
11723 if (modify)
11724 XEXP (x, i) = new_rtx;
11726 else if (fmt[i] == 'E')
11727 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11729 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11730 n_replacements, modify);
11731 if (!new_rtx)
11732 return NULL_RTX;
11733 if (modify)
11734 XVECEXP (x, i, j) = new_rtx;
11738 return x;
11742 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11744 enum rtx_code code = TRUNCATE;
11746 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11748 rtx inner = XEXP (x, 0);
11749 enum machine_mode inner_mode = GET_MODE (inner);
11751 if (inner_mode == mode)
11752 return inner;
11753 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11754 x = inner;
11755 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11756 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11758 code = GET_CODE (x);
11759 x = inner;
11762 return gen_rtx_fmt_e (code, mode, x);
11765 /* called via for_each_rtx after reload, to clean up truncates of
11766 registers that span multiple actual hard registers. */
11768 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11770 rtx x = *p, reg;
11772 if (GET_CODE (x) != TRUNCATE)
11773 return 0;
11774 reg = XEXP (x, 0);
11775 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
11777 enum machine_mode reg_mode = GET_MODE (reg);
11778 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11779 subreg_lowpart_offset (DImode, reg_mode));
11780 *(int*) n_changes += 1;
11781 return -1;
11783 return 0;
11786 /* Load and store depend on the highpart of the address. However,
11787 set_attr_alternative does not give well-defined results before reload,
11788 so we must look at the rtl ourselves to see if any of the feeding
11789 registers is used in a memref. */
11791 /* Called by sh_contains_memref_p via for_each_rtx. */
11792 static int
11793 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11795 return (MEM_P (*loc));
11798 /* Return nonzero iff INSN contains a MEM. */
11800 sh_contains_memref_p (rtx insn)
11802 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11805 /* Return nonzero iff INSN loads a banked register. */
11807 sh_loads_bankedreg_p (rtx insn)
11809 if (GET_CODE (PATTERN (insn)) == SET)
11811 rtx op = SET_DEST (PATTERN(insn));
11812 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11813 return 1;
11816 return 0;
11819 /* FNADDR is the MEM expression from a call expander. Return an address
11820 to use in an SHmedia insn pattern. */
11822 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11824 int is_sym;
11826 fnaddr = XEXP (fnaddr, 0);
11827 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11828 if (flag_pic && is_sym)
11830 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11832 rtx reg = gen_reg_rtx (Pmode);
11834 /* We must not use GOTPLT for sibcalls, because PIC_REG
11835 must be restored before the PLT code gets to run. */
11836 if (is_sibcall)
11837 emit_insn (gen_symGOT2reg (reg, fnaddr));
11838 else
11839 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11840 fnaddr = reg;
11842 else
11844 fnaddr = gen_sym2PIC (fnaddr);
11845 PUT_MODE (fnaddr, Pmode);
11848 /* If ptabs might trap, make this visible to the rest of the compiler.
11849 We generally assume that symbols pertain to valid locations, but
11850 it is possible to generate invalid symbols with asm or linker tricks.
11851 In a list of functions where each returns its successor, an invalid
11852 symbol might denote an empty list. */
11853 if (!TARGET_PT_FIXED
11854 && (!is_sym || TARGET_INVALID_SYMBOLS)
11855 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11857 rtx tr = gen_reg_rtx (PDImode);
11859 emit_insn (gen_ptabs (tr, fnaddr));
11860 fnaddr = tr;
11862 else if (! target_reg_operand (fnaddr, Pmode))
11863 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11864 return fnaddr;
11867 enum reg_class
11868 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
11869 enum machine_mode mode, secondary_reload_info *sri)
11871 if (in_p)
11873 if (REGCLASS_HAS_FP_REG (rclass)
11874 && ! TARGET_SHMEDIA
11875 && immediate_operand ((x), mode)
11876 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11877 && mode == SFmode && fldi_ok ()))
11878 switch (mode)
11880 case SFmode:
11881 sri->icode = CODE_FOR_reload_insf__frn;
11882 return NO_REGS;
11883 case DFmode:
11884 sri->icode = CODE_FOR_reload_indf__frn;
11885 return NO_REGS;
11886 case SImode:
11887 /* ??? If we knew that we are in the appropriate mode -
11888 single precision - we could use a reload pattern directly. */
11889 return FPUL_REGS;
11890 default:
11891 abort ();
11893 if (rclass == FPUL_REGS
11894 && ((REG_P (x)
11895 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11896 || REGNO (x) == T_REG))
11897 || GET_CODE (x) == PLUS))
11898 return GENERAL_REGS;
11899 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11901 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11902 return GENERAL_REGS;
11903 else if (mode == SFmode)
11904 return FP_REGS;
11905 sri->icode = CODE_FOR_reload_insi__i_fpul;
11906 return NO_REGS;
11908 if (rclass == FPSCR_REGS
11909 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11910 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11911 return GENERAL_REGS;
11912 if (REGCLASS_HAS_FP_REG (rclass)
11913 && TARGET_SHMEDIA
11914 && immediate_operand (x, mode)
11915 && x != CONST0_RTX (GET_MODE (x))
11916 && GET_MODE (x) != V4SFmode)
11917 return GENERAL_REGS;
11918 if ((mode == QImode || mode == HImode)
11919 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11921 sri->icode = ((mode == QImode)
11922 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11923 return NO_REGS;
11925 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
11926 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
11927 return TARGET_REGS;
11928 } /* end of input-only processing. */
11930 if (((REGCLASS_HAS_FP_REG (rclass)
11931 && (REG_P (x)
11932 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11933 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11934 && TARGET_FMOVD))))
11935 || (REGCLASS_HAS_GENERAL_REG (rclass)
11936 && REG_P (x)
11937 && FP_REGISTER_P (REGNO (x))))
11938 && ! TARGET_SHMEDIA
11939 && (mode == SFmode || mode == SImode))
11940 return FPUL_REGS;
11941 if ((rclass == FPUL_REGS
11942 || (REGCLASS_HAS_FP_REG (rclass)
11943 && ! TARGET_SHMEDIA && mode == SImode))
11944 && (MEM_P (x)
11945 || (REG_P (x)
11946 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11947 || REGNO (x) == T_REG
11948 || system_reg_operand (x, VOIDmode)))))
11950 if (rclass == FPUL_REGS)
11951 return GENERAL_REGS;
11952 return FPUL_REGS;
11954 if ((rclass == TARGET_REGS
11955 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
11956 && !satisfies_constraint_Csy (x)
11957 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
11958 return GENERAL_REGS;
11959 if ((rclass == MAC_REGS || rclass == PR_REGS)
11960 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11961 && rclass != REGNO_REG_CLASS (REGNO (x)))
11962 return GENERAL_REGS;
11963 if (rclass != GENERAL_REGS && REG_P (x)
11964 && TARGET_REGISTER_P (REGNO (x)))
11965 return GENERAL_REGS;
11966 return NO_REGS;
11969 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11971 #include "gt-sh.h"