* config/sh/sh.c (push_regs): Emit movml for interrupt handler
[official-gcc.git] / gcc / config / sh / sh.c
blobd693351ecb4459ee436ee29eddad841783a1346a
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "toplev.h"
42 #include "recog.h"
43 #include "integrate.h"
44 #include "dwarf2.h"
45 #include "tm_p.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "df.h"
51 #include "cfglayout.h"
52 #include "intl.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "ggc.h"
56 #include "gimple.h"
57 #include "cfgloop.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
75 they may be. */
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
115 reg number. */
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static bool sh_frame_pointer_required (void);
193 static rtx mark_constant_pool_use (rtx);
194 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_resbank_handler_attribute (tree *, tree,
196 tree, int, bool *);
197 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
198 tree, int, bool *);
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_print_operand (FILE *, rtx, int);
203 static void sh_print_operand_address (FILE *, rtx);
204 static bool sh_print_operand_punct_valid_p (unsigned char code);
205 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
206 static void sh_insert_attributes (tree, tree *);
207 static const char *sh_check_pch_target_flags (int);
208 static int sh_adjust_cost (rtx, rtx, rtx, int);
209 static int sh_issue_rate (void);
210 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
211 static short find_set_regmode_weight (rtx, enum machine_mode);
212 static short find_insn_regmode_weight (rtx, enum machine_mode);
213 static void find_regmode_weight (basic_block, enum machine_mode);
214 static int find_r0_life_regions (basic_block);
215 static void sh_md_init_global (FILE *, int, int);
216 static void sh_md_finish_global (FILE *, int);
217 static int rank_for_reorder (const void *, const void *);
218 static void swap_reorder (rtx *, int);
219 static void ready_reorder (rtx *, int);
220 static short high_pressure (enum machine_mode);
221 static int sh_reorder (FILE *, int, rtx *, int *, int);
222 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
223 static void sh_md_init (FILE *, int, int);
224 static int sh_variable_issue (FILE *, int, rtx, int);
226 static bool sh_function_ok_for_sibcall (tree, tree);
228 static bool sh_cannot_modify_jumps_p (void);
229 static reg_class_t sh_target_reg_class (void);
230 static bool sh_optimize_target_register_callee_saved (bool);
231 static bool sh_ms_bitfield_layout_p (const_tree);
233 static void sh_init_builtins (void);
234 static tree sh_builtin_decl (unsigned, bool);
235 static void sh_media_init_builtins (void);
236 static tree sh_media_builtin_decl (unsigned, bool);
237 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
238 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
239 static void sh_file_start (void);
240 static int flow_dependent_p (rtx, rtx);
241 static void flow_dependent_p_1 (rtx, const_rtx, void *);
242 static int shiftcosts (rtx);
243 static int andcosts (rtx);
244 static int addsubcosts (rtx);
245 static int multcosts (rtx);
246 static bool unspec_caller_rtx_p (rtx);
247 static bool sh_cannot_copy_insn_p (rtx);
248 static bool sh_rtx_costs (rtx, int, int, int *, bool);
249 static int sh_address_cost (rtx, bool);
250 static int sh_pr_n_sets (void);
251 static rtx sh_allocate_initial_value (rtx);
252 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
253 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
254 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
255 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
256 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
257 static int scavenge_reg (HARD_REG_SET *s);
258 struct save_schedule_s;
259 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
260 struct save_schedule_s *, int);
262 static rtx sh_struct_value_rtx (tree, int);
263 static rtx sh_function_value (const_tree, const_tree, bool);
264 static rtx sh_libcall_value (enum machine_mode, const_rtx);
265 static bool sh_return_in_memory (const_tree, const_tree);
266 static rtx sh_builtin_saveregs (void);
267 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
268 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
269 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
270 static tree sh_build_builtin_va_list (void);
271 static void sh_va_start (tree, rtx);
272 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
273 static bool sh_promote_prototypes (const_tree);
274 static enum machine_mode sh_promote_function_mode (const_tree type,
275 enum machine_mode,
276 int *punsignedp,
277 const_tree funtype,
278 int for_return);
279 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
280 const_tree, bool);
281 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
282 const_tree, bool);
283 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
284 tree, bool);
285 static bool sh_scalar_mode_supported_p (enum machine_mode);
286 static int sh_dwarf_calling_convention (const_tree);
287 static void sh_encode_section_info (tree, rtx, int);
288 static int sh2a_function_vector_p (tree);
289 static void sh_trampoline_init (rtx, tree, rtx);
290 static rtx sh_trampoline_adjust_address (rtx);
292 static const struct attribute_spec sh_attribute_table[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
295 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
296 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
297 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
298 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
299 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
300 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
301 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
302 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
303 #ifdef SYMBIAN
304 /* Symbian support adds three new attributes:
305 dllexport - for exporting a function/variable that will live in a dll
306 dllimport - for importing a function/variable from a dll
308 Microsoft allows multiple declspecs in one __declspec, separating
309 them with spaces. We do NOT support this. Instead, use __declspec
310 multiple times. */
311 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
312 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
313 #endif
314 { NULL, 0, 0, false, false, false, NULL }
317 /* Initialize the GCC target structure. */
318 #undef TARGET_ATTRIBUTE_TABLE
319 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
321 /* The next two are used for debug info when compiling with -gdwarf. */
322 #undef TARGET_ASM_UNALIGNED_HI_OP
323 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
324 #undef TARGET_ASM_UNALIGNED_SI_OP
325 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
327 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
328 #undef TARGET_ASM_UNALIGNED_DI_OP
329 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
330 #undef TARGET_ASM_ALIGNED_DI_OP
331 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
333 #undef TARGET_PRINT_OPERAND
334 #define TARGET_PRINT_OPERAND sh_print_operand
335 #undef TARGET_PRINT_OPERAND_ADDRESS
336 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
337 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
338 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
340 #undef TARGET_ASM_FUNCTION_EPILOGUE
341 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
343 #undef TARGET_ASM_OUTPUT_MI_THUNK
344 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
346 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
347 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
349 #undef TARGET_ASM_FILE_START
350 #define TARGET_ASM_FILE_START sh_file_start
351 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
352 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
354 #undef TARGET_DEFAULT_TARGET_FLAGS
355 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
356 #undef TARGET_HANDLE_OPTION
357 #define TARGET_HANDLE_OPTION sh_handle_option
359 #undef TARGET_INSERT_ATTRIBUTES
360 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
362 #undef TARGET_SCHED_ADJUST_COST
363 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
365 #undef TARGET_SCHED_ISSUE_RATE
366 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
368 /* The next 5 hooks have been implemented for reenabling sched1. With the
369 help of these macros we are limiting the movement of insns in sched1 to
370 reduce the register pressure. The overall idea is to keep count of SImode
371 and SFmode regs required by already scheduled insns. When these counts
372 cross some threshold values; give priority to insns that free registers.
373 The insn that frees registers is most likely to be the insn with lowest
374 LUID (original insn order); but such an insn might be there in the stalled
375 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
376 upto a max of 8 cycles so that such insns may move from Q -> R.
378 The description of the hooks are as below:
380 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
381 scheduler; it is called inside the sched_init function just after
382 find_insn_reg_weights function call. It is used to calculate the SImode
383 and SFmode weights of insns of basic blocks; much similar to what
384 find_insn_reg_weights does.
385 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
387 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
388 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
389 (Q)->(R).
391 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
392 high; reorder the ready queue so that the insn with lowest LUID will be
393 issued next.
395 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
396 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
398 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
399 can be returned from TARGET_SCHED_REORDER2.
401 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
403 #undef TARGET_SCHED_DFA_NEW_CYCLE
404 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
406 #undef TARGET_SCHED_INIT_GLOBAL
407 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
409 #undef TARGET_SCHED_FINISH_GLOBAL
410 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
412 #undef TARGET_SCHED_VARIABLE_ISSUE
413 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
415 #undef TARGET_SCHED_REORDER
416 #define TARGET_SCHED_REORDER sh_reorder
418 #undef TARGET_SCHED_REORDER2
419 #define TARGET_SCHED_REORDER2 sh_reorder2
421 #undef TARGET_SCHED_INIT
422 #define TARGET_SCHED_INIT sh_md_init
424 #undef TARGET_LEGITIMIZE_ADDRESS
425 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
427 #undef TARGET_CANNOT_MODIFY_JUMPS_P
428 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
429 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
430 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
431 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
432 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
433 sh_optimize_target_register_callee_saved
435 #undef TARGET_MS_BITFIELD_LAYOUT_P
436 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
438 #undef TARGET_INIT_BUILTINS
439 #define TARGET_INIT_BUILTINS sh_init_builtins
440 #undef TARGET_BUILTIN_DECL
441 #define TARGET_BUILTIN_DECL sh_builtin_decl
442 #undef TARGET_EXPAND_BUILTIN
443 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
445 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
446 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
448 #undef TARGET_CANNOT_COPY_INSN_P
449 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
450 #undef TARGET_RTX_COSTS
451 #define TARGET_RTX_COSTS sh_rtx_costs
452 #undef TARGET_ADDRESS_COST
453 #define TARGET_ADDRESS_COST sh_address_cost
454 #undef TARGET_ALLOCATE_INITIAL_VALUE
455 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
457 #undef TARGET_MACHINE_DEPENDENT_REORG
458 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
460 #undef TARGET_DWARF_REGISTER_SPAN
461 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
463 #ifdef HAVE_AS_TLS
464 #undef TARGET_HAVE_TLS
465 #define TARGET_HAVE_TLS true
466 #endif
468 #undef TARGET_PROMOTE_PROTOTYPES
469 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
470 #undef TARGET_PROMOTE_FUNCTION_MODE
471 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
473 #undef TARGET_FUNCTION_VALUE
474 #define TARGET_FUNCTION_VALUE sh_function_value
475 #undef TARGET_LIBCALL_VALUE
476 #define TARGET_LIBCALL_VALUE sh_libcall_value
477 #undef TARGET_STRUCT_VALUE_RTX
478 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
479 #undef TARGET_RETURN_IN_MEMORY
480 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
482 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
483 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
484 #undef TARGET_SETUP_INCOMING_VARARGS
485 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
486 #undef TARGET_STRICT_ARGUMENT_NAMING
487 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
488 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
489 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
490 #undef TARGET_MUST_PASS_IN_STACK
491 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
492 #undef TARGET_PASS_BY_REFERENCE
493 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
494 #undef TARGET_CALLEE_COPIES
495 #define TARGET_CALLEE_COPIES sh_callee_copies
496 #undef TARGET_ARG_PARTIAL_BYTES
497 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
499 #undef TARGET_BUILD_BUILTIN_VA_LIST
500 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
501 #undef TARGET_EXPAND_BUILTIN_VA_START
502 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
503 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
504 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
506 #undef TARGET_SCALAR_MODE_SUPPORTED_P
507 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
508 #undef TARGET_VECTOR_MODE_SUPPORTED_P
509 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
511 #undef TARGET_CHECK_PCH_TARGET_FLAGS
512 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
514 #undef TARGET_DWARF_CALLING_CONVENTION
515 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
517 #undef TARGET_FRAME_POINTER_REQUIRED
518 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
520 /* Return regmode weight for insn. */
521 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
523 /* Return current register pressure for regmode. */
524 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
526 #undef TARGET_ENCODE_SECTION_INFO
527 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
529 #ifdef SYMBIAN
531 #undef TARGET_ENCODE_SECTION_INFO
532 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
533 #undef TARGET_STRIP_NAME_ENCODING
534 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
535 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
536 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
538 #endif /* SYMBIAN */
540 #undef TARGET_SECONDARY_RELOAD
541 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
543 #undef TARGET_LEGITIMATE_ADDRESS_P
544 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
546 #undef TARGET_TRAMPOLINE_INIT
547 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
548 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
549 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
551 /* Machine-specific symbol_ref flags. */
552 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
554 struct gcc_target targetm = TARGET_INITIALIZER;
556 /* Implement TARGET_HANDLE_OPTION. */
558 static bool
559 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
560 int value ATTRIBUTE_UNUSED)
562 switch (code)
564 case OPT_m1:
565 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
566 return true;
568 case OPT_m2:
569 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
570 return true;
572 case OPT_m2a:
573 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
574 return true;
576 case OPT_m2a_nofpu:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
578 return true;
580 case OPT_m2a_single:
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
582 return true;
584 case OPT_m2a_single_only:
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
586 return true;
588 case OPT_m2e:
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
590 return true;
592 case OPT_m3:
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
594 return true;
596 case OPT_m3e:
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
598 return true;
600 case OPT_m4:
601 case OPT_m4_100:
602 case OPT_m4_200:
603 case OPT_m4_300:
604 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
605 return true;
607 case OPT_m4_nofpu:
608 case OPT_m4_100_nofpu:
609 case OPT_m4_200_nofpu:
610 case OPT_m4_300_nofpu:
611 case OPT_m4_340:
612 case OPT_m4_400:
613 case OPT_m4_500:
614 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
615 return true;
617 case OPT_m4_single:
618 case OPT_m4_100_single:
619 case OPT_m4_200_single:
620 case OPT_m4_300_single:
621 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
622 return true;
624 case OPT_m4_single_only:
625 case OPT_m4_100_single_only:
626 case OPT_m4_200_single_only:
627 case OPT_m4_300_single_only:
628 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
629 return true;
631 case OPT_m4a:
632 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
633 return true;
635 case OPT_m4a_nofpu:
636 case OPT_m4al:
637 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
638 return true;
640 case OPT_m4a_single:
641 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
642 return true;
644 case OPT_m4a_single_only:
645 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
646 return true;
648 case OPT_m5_32media:
649 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
650 return true;
652 case OPT_m5_32media_nofpu:
653 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
654 return true;
656 case OPT_m5_64media:
657 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
658 return true;
660 case OPT_m5_64media_nofpu:
661 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
662 return true;
664 case OPT_m5_compact:
665 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
666 return true;
668 case OPT_m5_compact_nofpu:
669 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
670 return true;
672 default:
673 return true;
677 /* Set default optimization options. */
678 void
679 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
681 if (level)
683 if (!size)
684 sh_div_str = "inv:minlat";
686 if (size)
688 target_flags |= MASK_SMALLCODE;
689 sh_div_str = SH_DIV_STR_FOR_SIZE ;
691 else
692 TARGET_CBRANCHDI4 = 1;
693 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
694 haven't been parsed yet, hence we'd read only the default.
695 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
696 it's OK to always set flag_branch_target_load_optimize. */
697 if (level > 1)
699 flag_branch_target_load_optimize = 1;
700 if (!size)
701 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
703 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
704 here, so leave it to OVERRIDE_OPTIONS to set
705 flag_finite_math_only. We set it to 2 here so we know if the user
706 explicitly requested this to be on or off. */
707 flag_finite_math_only = 2;
708 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
709 the user explicitly requested this to be on or off. */
710 if (flag_schedule_insns > 0)
711 flag_schedule_insns = 2;
713 set_param_value ("simultaneous-prefetches", 2);
716 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
717 options, and do some machine dependent initialization. */
718 void
719 sh_override_options (void)
721 int regno;
723 SUBTARGET_OVERRIDE_OPTIONS;
724 if (flag_finite_math_only == 2)
725 flag_finite_math_only
726 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
727 if (TARGET_SH2E && !flag_finite_math_only)
728 target_flags |= MASK_IEEE;
729 sh_cpu = PROCESSOR_SH1;
730 assembler_dialect = 0;
731 if (TARGET_SH2)
732 sh_cpu = PROCESSOR_SH2;
733 if (TARGET_SH2E)
734 sh_cpu = PROCESSOR_SH2E;
735 if (TARGET_SH2A)
736 sh_cpu = PROCESSOR_SH2A;
737 if (TARGET_SH3)
738 sh_cpu = PROCESSOR_SH3;
739 if (TARGET_SH3E)
740 sh_cpu = PROCESSOR_SH3E;
741 if (TARGET_SH4)
743 assembler_dialect = 1;
744 sh_cpu = PROCESSOR_SH4;
746 if (TARGET_SH4A_ARCH)
748 assembler_dialect = 1;
749 sh_cpu = PROCESSOR_SH4A;
751 if (TARGET_SH5)
753 sh_cpu = PROCESSOR_SH5;
754 target_flags |= MASK_ALIGN_DOUBLE;
755 if (TARGET_SHMEDIA_FPU)
756 target_flags |= MASK_FMOVD;
757 if (TARGET_SHMEDIA)
759 /* There are no delay slots on SHmedia. */
760 flag_delayed_branch = 0;
761 /* Relaxation isn't yet supported for SHmedia */
762 target_flags &= ~MASK_RELAX;
763 /* After reload, if conversion does little good but can cause
764 ICEs:
765 - find_if_block doesn't do anything for SH because we don't
766 have conditional execution patterns. (We use conditional
767 move patterns, which are handled differently, and only
768 before reload).
769 - find_cond_trap doesn't do anything for the SH because we
770 don't have conditional traps.
771 - find_if_case_1 uses redirect_edge_and_branch_force in
772 the only path that does an optimization, and this causes
773 an ICE when branch targets are in registers.
774 - find_if_case_2 doesn't do anything for the SHmedia after
775 reload except when it can redirect a tablejump - and
776 that's rather rare. */
777 flag_if_conversion2 = 0;
778 if (! strcmp (sh_div_str, "call"))
779 sh_div_strategy = SH_DIV_CALL;
780 else if (! strcmp (sh_div_str, "call2"))
781 sh_div_strategy = SH_DIV_CALL2;
782 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
783 sh_div_strategy = SH_DIV_FP;
784 else if (! strcmp (sh_div_str, "inv"))
785 sh_div_strategy = SH_DIV_INV;
786 else if (! strcmp (sh_div_str, "inv:minlat"))
787 sh_div_strategy = SH_DIV_INV_MINLAT;
788 else if (! strcmp (sh_div_str, "inv20u"))
789 sh_div_strategy = SH_DIV_INV20U;
790 else if (! strcmp (sh_div_str, "inv20l"))
791 sh_div_strategy = SH_DIV_INV20L;
792 else if (! strcmp (sh_div_str, "inv:call2"))
793 sh_div_strategy = SH_DIV_INV_CALL2;
794 else if (! strcmp (sh_div_str, "inv:call"))
795 sh_div_strategy = SH_DIV_INV_CALL;
796 else if (! strcmp (sh_div_str, "inv:fp"))
798 if (TARGET_FPU_ANY)
799 sh_div_strategy = SH_DIV_INV_FP;
800 else
801 sh_div_strategy = SH_DIV_INV;
803 TARGET_CBRANCHDI4 = 0;
804 /* Assembler CFI isn't yet fully supported for SHmedia. */
805 flag_dwarf2_cfi_asm = 0;
808 else
810 /* Only the sh64-elf assembler fully supports .quad properly. */
811 targetm.asm_out.aligned_op.di = NULL;
812 targetm.asm_out.unaligned_op.di = NULL;
814 if (TARGET_SH1)
816 if (! strcmp (sh_div_str, "call-div1"))
817 sh_div_strategy = SH_DIV_CALL_DIV1;
818 else if (! strcmp (sh_div_str, "call-fp")
819 && (TARGET_FPU_DOUBLE
820 || (TARGET_HARD_SH4 && TARGET_SH2E)
821 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
822 sh_div_strategy = SH_DIV_CALL_FP;
823 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
824 sh_div_strategy = SH_DIV_CALL_TABLE;
825 else
826 /* Pick one that makes most sense for the target in general.
827 It is not much good to use different functions depending
828 on -Os, since then we'll end up with two different functions
829 when some of the code is compiled for size, and some for
830 speed. */
832 /* SH4 tends to emphasize speed. */
833 if (TARGET_HARD_SH4)
834 sh_div_strategy = SH_DIV_CALL_TABLE;
835 /* These have their own way of doing things. */
836 else if (TARGET_SH2A)
837 sh_div_strategy = SH_DIV_INTRINSIC;
838 /* ??? Should we use the integer SHmedia function instead? */
839 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
840 sh_div_strategy = SH_DIV_CALL_FP;
841 /* SH1 .. SH3 cores often go into small-footprint systems, so
842 default to the smallest implementation available. */
843 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
844 sh_div_strategy = SH_DIV_CALL_TABLE;
845 else
846 sh_div_strategy = SH_DIV_CALL_DIV1;
848 if (!TARGET_SH1)
849 TARGET_PRETEND_CMOVE = 0;
850 if (sh_divsi3_libfunc[0])
851 ; /* User supplied - leave it alone. */
852 else if (TARGET_DIVIDE_CALL_FP)
853 sh_divsi3_libfunc = "__sdivsi3_i4";
854 else if (TARGET_DIVIDE_CALL_TABLE)
855 sh_divsi3_libfunc = "__sdivsi3_i4i";
856 else if (TARGET_SH5)
857 sh_divsi3_libfunc = "__sdivsi3_1";
858 else
859 sh_divsi3_libfunc = "__sdivsi3";
860 if (sh_branch_cost == -1)
861 sh_branch_cost
862 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
864 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
865 if (! VALID_REGISTER_P (regno))
866 sh_register_names[regno][0] = '\0';
868 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
869 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
870 sh_additional_register_names[regno][0] = '\0';
872 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
874 if ((flag_pic && ! TARGET_PREFERGOT)
875 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
876 flag_no_function_cse = 1;
878 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
880 /* Never run scheduling before reload, since that can
881 break global alloc, and generates slower code anyway due
882 to the pressure on R0. */
883 /* Enable sched1 for SH4 if the user explicitly requests.
884 When sched1 is enabled, the ready queue will be reordered by
885 the target hooks if pressure is high. We can not do this for
886 PIC, SH3 and lower as they give spill failures for R0. */
887 if (!TARGET_HARD_SH4 || flag_pic)
888 flag_schedule_insns = 0;
889 /* ??? Current exception handling places basic block boundaries
890 after call_insns. It causes the high pressure on R0 and gives
891 spill failures for R0 in reload. See PR 22553 and the thread
892 on gcc-patches
893 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
894 else if (flag_exceptions)
896 if (flag_schedule_insns == 1)
897 warning (0, "ignoring -fschedule-insns because of exception handling bug");
898 flag_schedule_insns = 0;
900 else if (flag_schedule_insns == 2)
901 flag_schedule_insns = 0;
904 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
905 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
907 /* Unwind info is not correct around the CFG unless either a frame
908 pointer is present or M_A_O_A is set. Fixing this requires rewriting
909 unwind info generation to be aware of the CFG and propagating states
910 around edges. */
911 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
912 || flag_exceptions || flag_non_call_exceptions)
913 && flag_omit_frame_pointer
914 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
916 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
917 warning (0, "unwind tables currently require either a frame pointer "
918 "or -maccumulate-outgoing-args for correctness");
919 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
922 /* Unwinding with -freorder-blocks-and-partition does not work on this
923 architecture, because it requires far jumps to label crossing between
924 hot/cold sections which are rejected on this architecture. */
925 if (flag_reorder_blocks_and_partition)
927 if (flag_exceptions)
929 inform (input_location,
930 "-freorder-blocks-and-partition does not work with "
931 "exceptions on this architecture");
932 flag_reorder_blocks_and_partition = 0;
933 flag_reorder_blocks = 1;
935 else if (flag_unwind_tables)
937 inform (input_location,
938 "-freorder-blocks-and-partition does not support unwind "
939 "info on this architecture");
940 flag_reorder_blocks_and_partition = 0;
941 flag_reorder_blocks = 1;
945 if (align_loops == 0)
946 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
947 if (align_jumps == 0)
948 align_jumps = 1 << CACHE_LOG;
949 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
950 align_jumps = TARGET_SHMEDIA ? 4 : 2;
952 /* Allocation boundary (in *bytes*) for the code of a function.
953 SH1: 32 bit alignment is faster, because instructions are always
954 fetched as a pair from a longword boundary.
955 SH2 .. SH5 : align to cache line start. */
956 if (align_functions == 0)
957 align_functions
958 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
959 /* The linker relaxation code breaks when a function contains
960 alignments that are larger than that at the start of a
961 compilation unit. */
962 if (TARGET_RELAX)
964 int min_align
965 = align_loops > align_jumps ? align_loops : align_jumps;
967 /* Also take possible .long constants / mova tables int account. */
968 if (min_align < 4)
969 min_align = 4;
970 if (align_functions < min_align)
971 align_functions = min_align;
974 if (sh_fixed_range_str)
975 sh_fix_range (sh_fixed_range_str);
977 /* This target defaults to strict volatile bitfields. */
978 if (flag_strict_volatile_bitfields < 0)
979 flag_strict_volatile_bitfields = 1;
982 /* Print the operand address in x to the stream. */
984 static void
985 sh_print_operand_address (FILE *stream, rtx x)
987 switch (GET_CODE (x))
989 case REG:
990 case SUBREG:
991 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
992 break;
994 case PLUS:
996 rtx base = XEXP (x, 0);
997 rtx index = XEXP (x, 1);
999 switch (GET_CODE (index))
1001 case CONST_INT:
1002 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1003 reg_names[true_regnum (base)]);
1004 break;
1006 case REG:
1007 case SUBREG:
1009 int base_num = true_regnum (base);
1010 int index_num = true_regnum (index);
1012 fprintf (stream, "@(r0,%s)",
1013 reg_names[MAX (base_num, index_num)]);
1014 break;
1017 default:
1018 gcc_unreachable ();
1021 break;
1023 case PRE_DEC:
1024 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1025 break;
1027 case POST_INC:
1028 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1029 break;
1031 default:
1032 x = mark_constant_pool_use (x);
1033 output_addr_const (stream, x);
1034 break;
1038 /* Print operand x (an rtx) in assembler syntax to file stream
1039 according to modifier code.
1041 '.' print a .s if insn needs delay slot
1042 ',' print LOCAL_LABEL_PREFIX
1043 '@' print trap, rte or rts depending upon pragma interruptness
1044 '#' output a nop if there is nothing to put in the delay slot
1045 ''' print likelihood suffix (/u for unlikely).
1046 '>' print branch target if -fverbose-asm
1047 'O' print a constant without the #
1048 'R' print the LSW of a dp value - changes if in little endian
1049 'S' print the MSW of a dp value - changes if in little endian
1050 'T' print the next word of a dp value - same as 'R' in big endian mode.
1051 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1052 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1053 'N' print 'r63' if the operand is (const_int 0).
1054 'd' print a V2SF reg as dN instead of fpN.
1055 'm' print a pair `base,offset' or `base,index', for LD and ST.
1056 'U' Likewise for {LD,ST}{HI,LO}.
1057 'V' print the position of a single bit set.
1058 'W' print the position of a single bit cleared.
1059 't' print a memory address which is a register.
1060 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1061 'o' output an operator. */
1063 static void
1064 sh_print_operand (FILE *stream, rtx x, int code)
1066 int regno;
1067 enum machine_mode mode;
1069 switch (code)
1071 tree trapa_attr;
1073 case '.':
1074 if (final_sequence
1075 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1076 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1077 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1078 break;
1079 case ',':
1080 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1081 break;
1082 case '@':
1083 trapa_attr = lookup_attribute ("trap_exit",
1084 DECL_ATTRIBUTES (current_function_decl));
1085 if (trapa_attr)
1086 fprintf (stream, "trapa #%ld",
1087 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1088 else if (sh_cfun_interrupt_handler_p ())
1090 if (sh_cfun_resbank_handler_p ())
1091 fprintf (stream, "resbank\n");
1092 fprintf (stream, "rte");
1094 else
1095 fprintf (stream, "rts");
1096 break;
1097 case '#':
1098 /* Output a nop if there's nothing in the delay slot. */
1099 if (dbr_sequence_length () == 0)
1100 fprintf (stream, "\n\tnop");
1101 break;
1102 case '\'':
1104 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1106 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1107 fputs ("/u", stream);
1108 break;
1110 case '>':
1111 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1113 fputs ("\t! target: ", stream);
1114 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1116 break;
1117 case 'O':
1118 x = mark_constant_pool_use (x);
1119 output_addr_const (stream, x);
1120 break;
1121 /* N.B.: %R / %S / %T adjust memory addresses by four.
1122 For SHMEDIA, that means they can be used to access the first and
1123 second 32 bit part of a 64 bit (or larger) value that
1124 might be held in floating point registers or memory.
1125 While they can be used to access 64 bit parts of a larger value
1126 held in general purpose registers, that won't work with memory -
1127 neither for fp registers, since the frxx names are used. */
1128 case 'R':
1129 if (REG_P (x) || GET_CODE (x) == SUBREG)
1131 regno = true_regnum (x);
1132 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1133 fputs (reg_names[regno], (stream));
1135 else if (MEM_P (x))
1137 x = adjust_address (x, SImode, 4 * LSW);
1138 sh_print_operand_address (stream, XEXP (x, 0));
1140 else
1142 rtx sub = NULL_RTX;
1144 mode = GET_MODE (x);
1145 if (mode == VOIDmode)
1146 mode = DImode;
1147 if (GET_MODE_SIZE (mode) >= 8)
1148 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1149 if (sub)
1150 sh_print_operand (stream, sub, 0);
1151 else
1152 output_operand_lossage ("invalid operand to %%R");
1154 break;
1155 case 'S':
1156 if (REG_P (x) || GET_CODE (x) == SUBREG)
1158 regno = true_regnum (x);
1159 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1160 fputs (reg_names[regno], (stream));
1162 else if (MEM_P (x))
1164 x = adjust_address (x, SImode, 4 * MSW);
1165 sh_print_operand_address (stream, XEXP (x, 0));
1167 else
1169 rtx sub = NULL_RTX;
1171 mode = GET_MODE (x);
1172 if (mode == VOIDmode)
1173 mode = DImode;
1174 if (GET_MODE_SIZE (mode) >= 8)
1175 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1176 if (sub)
1177 sh_print_operand (stream, sub, 0);
1178 else
1179 output_operand_lossage ("invalid operand to %%S");
1181 break;
1182 case 'T':
1183 /* Next word of a double. */
1184 switch (GET_CODE (x))
1186 case REG:
1187 fputs (reg_names[REGNO (x) + 1], (stream));
1188 break;
1189 case MEM:
1190 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1191 && GET_CODE (XEXP (x, 0)) != POST_INC)
1192 x = adjust_address (x, SImode, 4);
1193 sh_print_operand_address (stream, XEXP (x, 0));
1194 break;
1195 default:
1196 break;
1198 break;
1200 case 't':
1201 gcc_assert (MEM_P (x));
1202 x = XEXP (x, 0);
1203 switch (GET_CODE (x))
1205 case REG:
1206 case SUBREG:
1207 sh_print_operand (stream, x, 0);
1208 break;
1209 default:
1210 break;
1212 break;
1214 case 'o':
1215 switch (GET_CODE (x))
1217 case PLUS: fputs ("add", stream); break;
1218 case MINUS: fputs ("sub", stream); break;
1219 case MULT: fputs ("mul", stream); break;
1220 case DIV: fputs ("div", stream); break;
1221 case EQ: fputs ("eq", stream); break;
1222 case NE: fputs ("ne", stream); break;
1223 case GT: case LT: fputs ("gt", stream); break;
1224 case GE: case LE: fputs ("ge", stream); break;
1225 case GTU: case LTU: fputs ("gtu", stream); break;
1226 case GEU: case LEU: fputs ("geu", stream); break;
1227 default:
1228 break;
1230 break;
1231 case 'M':
1232 if (TARGET_SHMEDIA)
1234 if (MEM_P (x)
1235 && GET_CODE (XEXP (x, 0)) == PLUS
1236 && (REG_P (XEXP (XEXP (x, 0), 1))
1237 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1238 fputc ('x', stream);
1240 else
1242 if (MEM_P (x))
1244 switch (GET_MODE (x))
1246 case QImode: fputs (".b", stream); break;
1247 case HImode: fputs (".w", stream); break;
1248 case SImode: fputs (".l", stream); break;
1249 case SFmode: fputs (".s", stream); break;
1250 case DFmode: fputs (".d", stream); break;
1251 default: gcc_unreachable ();
1255 break;
1257 case 'm':
1258 gcc_assert (MEM_P (x));
1259 x = XEXP (x, 0);
1260 /* Fall through. */
1261 case 'U':
1262 switch (GET_CODE (x))
1264 case REG:
1265 case SUBREG:
1266 sh_print_operand (stream, x, 0);
1267 fputs (", 0", stream);
1268 break;
1270 case PLUS:
1271 sh_print_operand (stream, XEXP (x, 0), 0);
1272 fputs (", ", stream);
1273 sh_print_operand (stream, XEXP (x, 1), 0);
1274 break;
1276 default:
1277 gcc_unreachable ();
1279 break;
1281 case 'V':
1283 int num = exact_log2 (INTVAL (x));
1284 gcc_assert (num >= 0);
1285 fprintf (stream, "#%d", num);
1287 break;
1289 case 'W':
1291 int num = exact_log2 (~INTVAL (x));
1292 gcc_assert (num >= 0);
1293 fprintf (stream, "#%d", num);
1295 break;
1297 case 'd':
1298 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1300 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1301 break;
1303 case 'N':
1304 if (x == CONST0_RTX (GET_MODE (x)))
1306 fprintf ((stream), "r63");
1307 break;
1309 goto default_output;
1310 case 'u':
1311 if (CONST_INT_P (x))
1313 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1314 break;
1316 /* Fall through. */
1318 default_output:
1319 default:
1320 regno = 0;
1321 mode = GET_MODE (x);
1323 switch (GET_CODE (x))
1325 case TRUNCATE:
1327 rtx inner = XEXP (x, 0);
1328 int offset = 0;
1329 enum machine_mode inner_mode;
1331 /* We might see SUBREGs with vector mode registers inside. */
1332 if (GET_CODE (inner) == SUBREG
1333 && (GET_MODE_SIZE (GET_MODE (inner))
1334 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1335 && subreg_lowpart_p (inner))
1336 inner = SUBREG_REG (inner);
1337 if (CONST_INT_P (inner))
1339 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1340 goto default_output;
1342 inner_mode = GET_MODE (inner);
1343 if (GET_CODE (inner) == SUBREG
1344 && (GET_MODE_SIZE (GET_MODE (inner))
1345 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1346 && REG_P (SUBREG_REG (inner)))
1348 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1349 GET_MODE (SUBREG_REG (inner)),
1350 SUBREG_BYTE (inner),
1351 GET_MODE (inner));
1352 inner = SUBREG_REG (inner);
1354 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1355 abort ();
1356 /* Floating point register pairs are always big endian;
1357 general purpose registers are 64 bit wide. */
1358 regno = REGNO (inner);
1359 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1360 - HARD_REGNO_NREGS (regno, mode))
1361 + offset;
1362 x = inner;
1363 goto reg;
1365 case SIGN_EXTEND:
1366 x = XEXP (x, 0);
1367 goto reg;
1368 /* FIXME: We need this on SHmedia32 because reload generates
1369 some sign-extended HI or QI loads into DImode registers
1370 but, because Pmode is SImode, the address ends up with a
1371 subreg:SI of the DImode register. Maybe reload should be
1372 fixed so as to apply alter_subreg to such loads? */
1373 case IF_THEN_ELSE:
1374 gcc_assert (trapping_target_operand (x, VOIDmode));
1375 x = XEXP (XEXP (x, 2), 0);
1376 goto default_output;
1377 case SUBREG:
1378 gcc_assert (SUBREG_BYTE (x) == 0
1379 && REG_P (SUBREG_REG (x)));
1381 x = SUBREG_REG (x);
1382 /* Fall through. */
1384 reg:
1385 case REG:
1386 regno += REGNO (x);
1387 if (FP_REGISTER_P (regno)
1388 && mode == V16SFmode)
1389 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1390 else if (FP_REGISTER_P (REGNO (x))
1391 && mode == V4SFmode)
1392 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1393 else if (REG_P (x)
1394 && mode == V2SFmode)
1395 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1396 else if (FP_REGISTER_P (REGNO (x))
1397 && GET_MODE_SIZE (mode) > 4)
1398 fprintf ((stream), "d%s", reg_names[regno] + 1);
1399 else
1400 fputs (reg_names[regno], (stream));
1401 break;
1403 case MEM:
1404 output_address (XEXP (x, 0));
1405 break;
1407 default:
1408 if (TARGET_SH1)
1409 fputc ('#', stream);
1410 output_addr_const (stream, x);
1411 break;
1413 break;
1417 static bool
1418 sh_print_operand_punct_valid_p (unsigned char code)
1420 return (code == '.' || code == '#' || code == '@' || code == ','
1421 || code == '$' || code == '\'' || code == '>');
1425 /* Encode symbol attributes of a SYMBOL_REF into its
1426 SYMBOL_REF_FLAGS. */
1427 static void
1428 sh_encode_section_info (tree decl, rtx rtl, int first)
1430 default_encode_section_info (decl, rtl, first);
1432 if (TREE_CODE (decl) == FUNCTION_DECL
1433 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1434 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1437 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1438 static void
1439 force_into (rtx value, rtx target)
1441 value = force_operand (value, target);
1442 if (! rtx_equal_p (value, target))
1443 emit_insn (gen_move_insn (target, value));
1446 /* Emit code to perform a block move. Choose the best method.
1448 OPERANDS[0] is the destination.
1449 OPERANDS[1] is the source.
1450 OPERANDS[2] is the size.
1451 OPERANDS[3] is the alignment safe to use. */
1454 expand_block_move (rtx *operands)
1456 int align = INTVAL (operands[3]);
1457 int constp = (CONST_INT_P (operands[2]));
1458 int bytes = (constp ? INTVAL (operands[2]) : 0);
1460 if (! constp)
1461 return 0;
1463 /* If we could use mov.l to move words and dest is word-aligned, we
1464 can use movua.l for loads and still generate a relatively short
1465 and efficient sequence. */
1466 if (TARGET_SH4A_ARCH && align < 4
1467 && MEM_ALIGN (operands[0]) >= 32
1468 && can_move_by_pieces (bytes, 32))
1470 rtx dest = copy_rtx (operands[0]);
1471 rtx src = copy_rtx (operands[1]);
1472 /* We could use different pseudos for each copied word, but
1473 since movua can only load into r0, it's kind of
1474 pointless. */
1475 rtx temp = gen_reg_rtx (SImode);
1476 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1477 int copied = 0;
1479 while (copied + 4 <= bytes)
1481 rtx to = adjust_address (dest, SImode, copied);
1482 rtx from = adjust_automodify_address (src, BLKmode,
1483 src_addr, copied);
1485 set_mem_size (from, GEN_INT (4));
1486 emit_insn (gen_movua (temp, from));
1487 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1488 emit_move_insn (to, temp);
1489 copied += 4;
1492 if (copied < bytes)
1493 move_by_pieces (adjust_address (dest, BLKmode, copied),
1494 adjust_automodify_address (src, BLKmode,
1495 src_addr, copied),
1496 bytes - copied, align, 0);
1498 return 1;
1501 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1502 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1503 if (align < 4 || (bytes % 4 != 0))
1504 return 0;
1506 if (TARGET_HARD_SH4)
1508 if (bytes < 12)
1509 return 0;
1510 else if (bytes == 12)
1512 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1513 rtx r4 = gen_rtx_REG (SImode, 4);
1514 rtx r5 = gen_rtx_REG (SImode, 5);
1516 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1517 force_into (XEXP (operands[0], 0), r4);
1518 force_into (XEXP (operands[1], 0), r5);
1519 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1520 return 1;
1522 else if (! TARGET_SMALLCODE)
1524 const char *entry_name;
1525 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1526 int dwords;
1527 rtx r4 = gen_rtx_REG (SImode, 4);
1528 rtx r5 = gen_rtx_REG (SImode, 5);
1529 rtx r6 = gen_rtx_REG (SImode, 6);
1531 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1532 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1533 force_into (XEXP (operands[0], 0), r4);
1534 force_into (XEXP (operands[1], 0), r5);
1536 dwords = bytes >> 3;
1537 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1538 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1539 return 1;
1541 else
1542 return 0;
1544 if (bytes < 64)
1546 char entry[30];
1547 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1548 rtx r4 = gen_rtx_REG (SImode, 4);
1549 rtx r5 = gen_rtx_REG (SImode, 5);
1551 sprintf (entry, "__movmemSI%d", bytes);
1552 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1553 force_into (XEXP (operands[0], 0), r4);
1554 force_into (XEXP (operands[1], 0), r5);
1555 emit_insn (gen_block_move_real (func_addr_rtx));
1556 return 1;
1559 /* This is the same number of bytes as a memcpy call, but to a different
1560 less common function name, so this will occasionally use more space. */
1561 if (! TARGET_SMALLCODE)
1563 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1564 int final_switch, while_loop;
1565 rtx r4 = gen_rtx_REG (SImode, 4);
1566 rtx r5 = gen_rtx_REG (SImode, 5);
1567 rtx r6 = gen_rtx_REG (SImode, 6);
1569 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1570 force_into (XEXP (operands[0], 0), r4);
1571 force_into (XEXP (operands[1], 0), r5);
1573 /* r6 controls the size of the move. 16 is decremented from it
1574 for each 64 bytes moved. Then the negative bit left over is used
1575 as an index into a list of move instructions. e.g., a 72 byte move
1576 would be set up with size(r6) = 14, for one iteration through the
1577 big while loop, and a switch of -2 for the last part. */
1579 final_switch = 16 - ((bytes / 4) % 16);
1580 while_loop = ((bytes / 4) / 16 - 1) * 16;
1581 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1582 emit_insn (gen_block_lump_real (func_addr_rtx));
1583 return 1;
1586 return 0;
1589 /* Prepare operands for a move define_expand; specifically, one of the
1590 operands must be in a register. */
1593 prepare_move_operands (rtx operands[], enum machine_mode mode)
1595 if ((mode == SImode || mode == DImode)
1596 && flag_pic
1597 && ! ((mode == Pmode || mode == ptr_mode)
1598 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1600 rtx temp;
1601 if (SYMBOLIC_CONST_P (operands[1]))
1603 if (MEM_P (operands[0]))
1604 operands[1] = force_reg (Pmode, operands[1]);
1605 else if (TARGET_SHMEDIA
1606 && GET_CODE (operands[1]) == LABEL_REF
1607 && target_reg_operand (operands[0], mode))
1608 /* It's ok. */;
1609 else
1611 temp = (!can_create_pseudo_p ()
1612 ? operands[0]
1613 : gen_reg_rtx (Pmode));
1614 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1617 else if (GET_CODE (operands[1]) == CONST
1618 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1619 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1621 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1622 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1623 mode, temp);
1624 operands[1] = expand_binop (mode, add_optab, temp,
1625 XEXP (XEXP (operands[1], 0), 1),
1626 (!can_create_pseudo_p ()
1627 ? temp
1628 : gen_reg_rtx (Pmode)),
1629 0, OPTAB_LIB_WIDEN);
1633 if (! reload_in_progress && ! reload_completed)
1635 /* Copy the source to a register if both operands aren't registers. */
1636 if (! register_operand (operands[0], mode)
1637 && ! sh_register_operand (operands[1], mode))
1638 operands[1] = copy_to_mode_reg (mode, operands[1]);
1640 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1642 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1643 except that we can't use that function because it is static. */
1644 rtx new_rtx = change_address (operands[0], mode, 0);
1645 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1646 operands[0] = new_rtx;
1649 /* This case can happen while generating code to move the result
1650 of a library call to the target. Reject `st r0,@(rX,rY)' because
1651 reload will fail to find a spill register for rX, since r0 is already
1652 being used for the source. */
1653 else if (TARGET_SH1
1654 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1655 && MEM_P (operands[0])
1656 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1657 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1658 operands[1] = copy_to_mode_reg (mode, operands[1]);
1661 if (mode == Pmode || mode == ptr_mode)
1663 rtx op0, op1, opc;
1664 enum tls_model tls_kind;
1666 op0 = operands[0];
1667 op1 = operands[1];
1668 if (GET_CODE (op1) == CONST
1669 && GET_CODE (XEXP (op1, 0)) == PLUS
1670 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1671 != TLS_MODEL_NONE))
1673 opc = XEXP (XEXP (op1, 0), 1);
1674 op1 = XEXP (XEXP (op1, 0), 0);
1676 else
1677 opc = NULL_RTX;
1679 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1681 rtx tga_op1, tga_ret, tmp, tmp2;
1683 switch (tls_kind)
1685 case TLS_MODEL_GLOBAL_DYNAMIC:
1686 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1687 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1688 op1 = tga_ret;
1689 break;
1691 case TLS_MODEL_LOCAL_DYNAMIC:
1692 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1693 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1695 tmp = gen_reg_rtx (Pmode);
1696 emit_move_insn (tmp, tga_ret);
1698 if (register_operand (op0, Pmode))
1699 tmp2 = op0;
1700 else
1701 tmp2 = gen_reg_rtx (Pmode);
1703 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1704 op1 = tmp2;
1705 break;
1707 case TLS_MODEL_INITIAL_EXEC:
1708 if (! flag_pic)
1710 /* Don't schedule insns for getting GOT address when
1711 the first scheduling is enabled, to avoid spill
1712 failures for R0. */
1713 if (flag_schedule_insns)
1714 emit_insn (gen_blockage ());
1715 emit_insn (gen_GOTaddr2picreg ());
1716 emit_use (gen_rtx_REG (SImode, PIC_REG));
1717 if (flag_schedule_insns)
1718 emit_insn (gen_blockage ());
1720 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1721 tmp = gen_sym2GOTTPOFF (op1);
1722 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1723 op1 = tga_op1;
1724 break;
1726 case TLS_MODEL_LOCAL_EXEC:
1727 tmp2 = gen_reg_rtx (Pmode);
1728 emit_insn (gen_load_gbr (tmp2));
1729 tmp = gen_reg_rtx (Pmode);
1730 emit_insn (gen_symTPOFF2reg (tmp, op1));
1732 if (register_operand (op0, Pmode))
1733 op1 = op0;
1734 else
1735 op1 = gen_reg_rtx (Pmode);
1737 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1738 break;
1740 default:
1741 gcc_unreachable ();
1743 if (opc)
1744 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1745 operands[1] = op1;
1749 return 0;
1752 enum rtx_code
1753 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1754 enum rtx_code comparison)
1756 rtx op1;
1757 rtx scratch = NULL_RTX;
1759 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1760 comparison = GET_CODE (operands[0]);
1761 else
1762 scratch = operands[4];
1763 if (CONST_INT_P (operands[1])
1764 && !CONST_INT_P (operands[2]))
1766 rtx tmp = operands[1];
1768 operands[1] = operands[2];
1769 operands[2] = tmp;
1770 comparison = swap_condition (comparison);
1772 if (CONST_INT_P (operands[2]))
1774 HOST_WIDE_INT val = INTVAL (operands[2]);
1775 if ((val == -1 || val == -0x81)
1776 && (comparison == GT || comparison == LE))
1778 comparison = (comparison == GT) ? GE : LT;
1779 operands[2] = gen_int_mode (val + 1, mode);
1781 else if ((val == 1 || val == 0x80)
1782 && (comparison == GE || comparison == LT))
1784 comparison = (comparison == GE) ? GT : LE;
1785 operands[2] = gen_int_mode (val - 1, mode);
1787 else if (val == 1 && (comparison == GEU || comparison == LTU))
1789 comparison = (comparison == GEU) ? NE : EQ;
1790 operands[2] = CONST0_RTX (mode);
1792 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1794 comparison = (comparison == GEU) ? GTU : LEU;
1795 operands[2] = gen_int_mode (val - 1, mode);
1797 else if (val == 0 && (comparison == GTU || comparison == LEU))
1798 comparison = (comparison == GTU) ? NE : EQ;
1799 else if (mode == SImode
1800 && ((val == 0x7fffffff
1801 && (comparison == GTU || comparison == LEU))
1802 || ((unsigned HOST_WIDE_INT) val
1803 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1804 && (comparison == GEU || comparison == LTU))))
1806 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1807 operands[2] = CONST0_RTX (mode);
1810 op1 = operands[1];
1811 if (can_create_pseudo_p ())
1812 operands[1] = force_reg (mode, op1);
1813 /* When we are handling DImode comparisons, we want to keep constants so
1814 that we can optimize the component comparisons; however, memory loads
1815 are better issued as a whole so that they can be scheduled well.
1816 SImode equality comparisons allow I08 constants, but only when they
1817 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1818 into a register, that register might as well be r0, and we allow the
1819 constant. If it is already in a register, this is likely to be
1820 allocated to a different hard register, thus we load the constant into
1821 a register unless it is zero. */
1822 if (!REG_P (operands[2])
1823 && (!CONST_INT_P (operands[2])
1824 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1825 && ((comparison != EQ && comparison != NE)
1826 || (REG_P (op1) && REGNO (op1) != R0_REG)
1827 || !satisfies_constraint_I08 (operands[2])))))
1829 if (scratch && GET_MODE (scratch) == mode)
1831 emit_move_insn (scratch, operands[2]);
1832 operands[2] = scratch;
1834 else if (can_create_pseudo_p ())
1835 operands[2] = force_reg (mode, operands[2]);
1837 return comparison;
1840 void
1841 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1843 rtx (*branch_expander) (rtx) = gen_branch_true;
1844 rtx jump;
1846 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1847 switch (comparison)
1849 case NE: case LT: case LE: case LTU: case LEU:
1850 comparison = reverse_condition (comparison);
1851 branch_expander = gen_branch_false;
1852 default: ;
1854 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1855 gen_rtx_fmt_ee (comparison, SImode,
1856 operands[1], operands[2])));
1857 jump = emit_jump_insn (branch_expander (operands[3]));
1858 if (probability >= 0)
1859 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1863 /* ??? How should we distribute probabilities when more than one branch
1864 is generated. So far we only have soem ad-hoc observations:
1865 - If the operands are random, they are likely to differ in both parts.
1866 - If comparing items in a hash chain, the operands are random or equal;
1867 operation should be EQ or NE.
1868 - If items are searched in an ordered tree from the root, we can expect
1869 the highpart to be unequal about half of the time; operation should be
1870 an inequality comparison, operands non-constant, and overall probability
1871 about 50%. Likewise for quicksort.
1872 - Range checks will be often made against constants. Even if we assume for
1873 simplicity an even distribution of the non-constant operand over a
1874 sub-range here, the same probability could be generated with differently
1875 wide sub-ranges - as long as the ratio of the part of the subrange that
1876 is before the threshold to the part that comes after the threshold stays
1877 the same. Thus, we can't really tell anything here;
1878 assuming random distribution is at least simple.
1881 bool
1882 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1884 enum rtx_code msw_taken, msw_skip, lsw_taken;
1885 rtx skip_label = NULL_RTX;
1886 rtx op1h, op1l, op2h, op2l;
1887 int num_branches;
1888 int prob, rev_prob;
1889 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1890 rtx scratch = operands[4];
1892 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1893 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1894 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1895 op1l = gen_lowpart (SImode, operands[1]);
1896 op2l = gen_lowpart (SImode, operands[2]);
1897 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1898 prob = split_branch_probability;
1899 rev_prob = REG_BR_PROB_BASE - prob;
1900 switch (comparison)
1902 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1903 That costs 1 cycle more when the first branch can be predicted taken,
1904 but saves us mispredicts because only one branch needs prediction.
1905 It also enables generating the cmpeqdi_t-1 pattern. */
1906 case EQ:
1907 if (TARGET_CMPEQDI_T)
1909 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1910 emit_jump_insn (gen_branch_true (operands[3]));
1911 return true;
1913 msw_skip = NE;
1914 lsw_taken = EQ;
1915 if (prob >= 0)
1917 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1919 msw_skip_prob = rev_prob;
1920 if (REG_BR_PROB_BASE <= 65535)
1921 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1922 else
1924 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1925 lsw_taken_prob
1926 = (prob
1927 ? (REG_BR_PROB_BASE
1928 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1929 / ((HOST_WIDEST_INT) prob << 32)))
1930 : 0);
1933 break;
1934 case NE:
1935 if (TARGET_CMPEQDI_T)
1937 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1938 emit_jump_insn (gen_branch_false (operands[3]));
1939 return true;
1941 msw_taken = NE;
1942 msw_taken_prob = prob;
1943 lsw_taken = NE;
1944 lsw_taken_prob = 0;
1945 break;
1946 case GTU: case GT:
1947 msw_taken = comparison;
1948 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1949 break;
1950 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1951 msw_skip = swap_condition (msw_taken);
1952 lsw_taken = GTU;
1953 break;
1954 case GEU: case GE:
1955 if (op2l == CONST0_RTX (SImode))
1956 msw_taken = comparison;
1957 else
1959 msw_taken = comparison == GE ? GT : GTU;
1960 msw_skip = swap_condition (msw_taken);
1961 lsw_taken = GEU;
1963 break;
1964 case LTU: case LT:
1965 msw_taken = comparison;
1966 if (op2l == CONST0_RTX (SImode))
1967 break;
1968 msw_skip = swap_condition (msw_taken);
1969 lsw_taken = LTU;
1970 break;
1971 case LEU: case LE:
1972 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1973 msw_taken = comparison;
1974 else
1976 lsw_taken = LEU;
1977 if (comparison == LE)
1978 msw_taken = LT;
1979 else if (op2h != CONST0_RTX (SImode))
1980 msw_taken = LTU;
1981 else
1982 break;
1983 msw_skip = swap_condition (msw_taken);
1985 break;
1986 default: return false;
1988 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1989 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1990 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1991 if (comparison != EQ && comparison != NE && num_branches > 1)
1993 if (!CONSTANT_P (operands[2])
1994 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1995 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1997 msw_taken_prob = prob / 2U;
1998 msw_skip_prob
1999 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2000 lsw_taken_prob = prob;
2002 else
2004 msw_taken_prob = prob;
2005 msw_skip_prob = REG_BR_PROB_BASE;
2006 /* ??? If we have a constant op2h, should we use that when
2007 calculating lsw_taken_prob? */
2008 lsw_taken_prob = prob;
2011 operands[1] = op1h;
2012 operands[2] = op2h;
2013 operands[4] = NULL_RTX;
2014 if (reload_completed
2015 && ! arith_reg_or_0_operand (op2h, SImode)
2016 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2017 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2018 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2020 emit_move_insn (scratch, operands[2]);
2021 operands[2] = scratch;
2023 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2024 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2025 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2027 rtx taken_label = operands[3];
2029 /* Operands were possibly modified, but msw_skip doesn't expect this.
2030 Always use the original ones. */
2031 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2033 operands[1] = op1h;
2034 operands[2] = op2h;
2037 operands[3] = skip_label = gen_label_rtx ();
2038 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2039 operands[3] = taken_label;
2041 operands[1] = op1l;
2042 operands[2] = op2l;
2043 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2045 if (reload_completed
2046 && ! arith_reg_or_0_operand (op2l, SImode)
2047 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2049 emit_move_insn (scratch, operands[2]);
2050 operands[2] = scratch;
2052 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2054 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2055 emit_label (skip_label);
2056 return true;
2059 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2061 static void
2062 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2064 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2066 insn = gen_rtx_PARALLEL (VOIDmode,
2067 gen_rtvec (2, insn,
2068 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2069 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2071 else
2072 emit_insn (insn);
2075 /* Prepare the operands for an scc instruction; make sure that the
2076 compare has been done and the result is in T_REG. */
2077 void
2078 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2080 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2081 enum rtx_code oldcode = code;
2082 enum machine_mode mode;
2084 /* First need a compare insn. */
2085 switch (code)
2087 case NE:
2088 /* It isn't possible to handle this case. */
2089 gcc_unreachable ();
2090 case LT:
2091 code = GT;
2092 break;
2093 case LE:
2094 code = GE;
2095 break;
2096 case LTU:
2097 code = GTU;
2098 break;
2099 case LEU:
2100 code = GEU;
2101 break;
2102 default:
2103 break;
2105 if (code != oldcode)
2107 rtx tmp = op0;
2108 op0 = op1;
2109 op1 = tmp;
2112 mode = GET_MODE (op0);
2113 if (mode == VOIDmode)
2114 mode = GET_MODE (op1);
2116 op0 = force_reg (mode, op0);
2117 if ((code != EQ && code != NE
2118 && (op1 != const0_rtx
2119 || code == GTU || code == GEU || code == LTU || code == LEU))
2120 || (mode == DImode && op1 != const0_rtx)
2121 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2122 op1 = force_reg (mode, op1);
2124 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2125 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2126 mode);
2130 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2131 rtx op0, rtx op1)
2133 rtx target = gen_reg_rtx (SImode);
2134 rtx tmp;
2136 gcc_assert (TARGET_SHMEDIA);
2137 switch (code)
2139 case EQ:
2140 case GT:
2141 case LT:
2142 case UNORDERED:
2143 case GTU:
2144 case LTU:
2145 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2146 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2147 code = NE;
2148 break;
2150 case NE:
2151 case GE:
2152 case LE:
2153 case ORDERED:
2154 case GEU:
2155 case LEU:
2156 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2157 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2158 code = EQ;
2159 break;
2161 case UNEQ:
2162 case UNGE:
2163 case UNGT:
2164 case UNLE:
2165 case UNLT:
2166 case LTGT:
2167 return NULL_RTX;
2169 default:
2170 gcc_unreachable ();
2173 if (mode == DImode)
2175 rtx t2 = gen_reg_rtx (DImode);
2176 emit_insn (gen_extendsidi2 (t2, target));
2177 target = t2;
2180 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2183 /* Called from the md file, set up the operands of a compare instruction. */
2185 void
2186 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2188 enum rtx_code code = GET_CODE (operands[0]);
2189 enum rtx_code branch_code;
2190 rtx op0 = operands[1];
2191 rtx op1 = operands[2];
2192 rtx insn, tem;
2193 bool need_ccmpeq = false;
2195 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2197 op0 = force_reg (mode, op0);
2198 op1 = force_reg (mode, op1);
2200 else
2202 if (code != EQ || mode == DImode)
2204 /* Force args into regs, since we can't use constants here. */
2205 op0 = force_reg (mode, op0);
2206 if (op1 != const0_rtx || code == GTU || code == GEU)
2207 op1 = force_reg (mode, op1);
2211 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2213 if (code == LT
2214 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2215 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2217 tem = op0, op0 = op1, op1 = tem;
2218 code = swap_condition (code);
2221 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2222 if (code == GE)
2224 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2225 need_ccmpeq = true;
2226 code = GT;
2229 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2230 to EQ/GT respectively. */
2231 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2234 switch (code)
2236 case EQ:
2237 case GT:
2238 case GE:
2239 case GTU:
2240 case GEU:
2241 branch_code = code;
2242 break;
2243 case NE:
2244 case LT:
2245 case LE:
2246 case LTU:
2247 case LEU:
2248 branch_code = reverse_condition (code);
2249 break;
2250 default:
2251 gcc_unreachable ();
2254 insn = gen_rtx_SET (VOIDmode,
2255 gen_rtx_REG (SImode, T_REG),
2256 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2258 sh_emit_set_t_insn (insn, mode);
2259 if (need_ccmpeq)
2260 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2262 if (branch_code == code)
2263 emit_jump_insn (gen_branch_true (operands[3]));
2264 else
2265 emit_jump_insn (gen_branch_false (operands[3]));
2268 void
2269 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2271 enum rtx_code code = GET_CODE (operands[1]);
2272 rtx op0 = operands[2];
2273 rtx op1 = operands[3];
2274 rtx lab = NULL_RTX;
2275 bool invert = false;
2276 rtx tem;
2278 op0 = force_reg (mode, op0);
2279 if ((code != EQ && code != NE
2280 && (op1 != const0_rtx
2281 || code == GTU || code == GEU || code == LTU || code == LEU))
2282 || (mode == DImode && op1 != const0_rtx)
2283 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2284 op1 = force_reg (mode, op1);
2286 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2288 if (code == LT || code == LE)
2290 code = swap_condition (code);
2291 tem = op0, op0 = op1, op1 = tem;
2293 if (code == GE)
2295 if (TARGET_IEEE)
2297 lab = gen_label_rtx ();
2298 sh_emit_scc_to_t (EQ, op0, op1);
2299 emit_jump_insn (gen_branch_true (lab));
2300 code = GT;
2302 else
2304 code = LT;
2305 invert = true;
2310 if (code == NE)
2312 code = EQ;
2313 invert = true;
2316 sh_emit_scc_to_t (code, op0, op1);
2317 if (lab)
2318 emit_label (lab);
2319 if (invert)
2320 emit_insn (gen_movnegt (operands[0]));
2321 else
2322 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2325 /* Functions to output assembly code. */
2327 /* Return a sequence of instructions to perform DI or DF move.
2329 Since the SH cannot move a DI or DF in one instruction, we have
2330 to take care when we see overlapping source and dest registers. */
2332 const char *
2333 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2334 enum machine_mode mode)
2336 rtx dst = operands[0];
2337 rtx src = operands[1];
2339 if (MEM_P (dst)
2340 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2341 return "mov.l %T1,%0\n\tmov.l %1,%0";
2343 if (register_operand (dst, mode)
2344 && register_operand (src, mode))
2346 if (REGNO (src) == MACH_REG)
2347 return "sts mach,%S0\n\tsts macl,%R0";
2349 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2350 when mov.d r1,r0 do r1->r0 then r2->r1. */
2352 if (REGNO (src) + 1 == REGNO (dst))
2353 return "mov %T1,%T0\n\tmov %1,%0";
2354 else
2355 return "mov %1,%0\n\tmov %T1,%T0";
2357 else if (CONST_INT_P (src))
2359 if (INTVAL (src) < 0)
2360 output_asm_insn ("mov #-1,%S0", operands);
2361 else
2362 output_asm_insn ("mov #0,%S0", operands);
2364 return "mov %1,%R0";
2366 else if (MEM_P (src))
2368 int ptrreg = -1;
2369 int dreg = REGNO (dst);
2370 rtx inside = XEXP (src, 0);
2372 switch (GET_CODE (inside))
2374 case REG:
2375 ptrreg = REGNO (inside);
2376 break;
2378 case SUBREG:
2379 ptrreg = subreg_regno (inside);
2380 break;
2382 case PLUS:
2383 ptrreg = REGNO (XEXP (inside, 0));
2384 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2385 an offsettable address. Unfortunately, offsettable addresses use
2386 QImode to check the offset, and a QImode offsettable address
2387 requires r0 for the other operand, which is not currently
2388 supported, so we can't use the 'o' constraint.
2389 Thus we must check for and handle r0+REG addresses here.
2390 We punt for now, since this is likely very rare. */
2391 gcc_assert (!REG_P (XEXP (inside, 1)));
2392 break;
2394 case LABEL_REF:
2395 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2396 case POST_INC:
2397 return "mov.l %1,%0\n\tmov.l %1,%T0";
2398 default:
2399 gcc_unreachable ();
2402 /* Work out the safe way to copy. Copy into the second half first. */
2403 if (dreg == ptrreg)
2404 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2407 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2410 /* Print an instruction which would have gone into a delay slot after
2411 another instruction, but couldn't because the other instruction expanded
2412 into a sequence where putting the slot insn at the end wouldn't work. */
2414 static void
2415 print_slot (rtx insn)
2417 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2419 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2422 const char *
2423 output_far_jump (rtx insn, rtx op)
2425 struct { rtx lab, reg, op; } this_jmp;
2426 rtx braf_base_lab = NULL_RTX;
2427 const char *jump;
2428 int far;
2429 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2430 rtx prev;
2432 this_jmp.lab = gen_label_rtx ();
2434 if (TARGET_SH2
2435 && offset >= -32764
2436 && offset - get_attr_length (insn) <= 32766)
2438 far = 0;
2439 jump = "mov.w %O0,%1; braf %1";
2441 else
2443 far = 1;
2444 if (flag_pic)
2446 if (TARGET_SH2)
2447 jump = "mov.l %O0,%1; braf %1";
2448 else
2449 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2451 else
2452 jump = "mov.l %O0,%1; jmp @%1";
2454 /* If we have a scratch register available, use it. */
2455 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2456 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2458 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2459 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2460 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2461 output_asm_insn (jump, &this_jmp.lab);
2462 if (dbr_sequence_length ())
2463 print_slot (final_sequence);
2464 else
2465 output_asm_insn ("nop", 0);
2467 else
2469 /* Output the delay slot insn first if any. */
2470 if (dbr_sequence_length ())
2471 print_slot (final_sequence);
2473 this_jmp.reg = gen_rtx_REG (SImode, 13);
2474 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2475 Fortunately, MACL is fixed and call-clobbered, and we never
2476 need its value across jumps, so save r13 in it instead of in
2477 the stack. */
2478 if (TARGET_SH5)
2479 output_asm_insn ("lds r13, macl", 0);
2480 else
2481 output_asm_insn ("mov.l r13,@-r15", 0);
2482 output_asm_insn (jump, &this_jmp.lab);
2483 if (TARGET_SH5)
2484 output_asm_insn ("sts macl, r13", 0);
2485 else
2486 output_asm_insn ("mov.l @r15+,r13", 0);
2488 if (far && flag_pic && TARGET_SH2)
2490 braf_base_lab = gen_label_rtx ();
2491 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2492 CODE_LABEL_NUMBER (braf_base_lab));
2494 if (far)
2495 output_asm_insn (".align 2", 0);
2496 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2497 this_jmp.op = op;
2498 if (far && flag_pic)
2500 if (TARGET_SH2)
2501 this_jmp.lab = braf_base_lab;
2502 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2504 else
2505 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2506 return "";
2509 /* Local label counter, used for constants in the pool and inside
2510 pattern branches. */
2512 static int lf = 100;
2514 /* Output code for ordinary branches. */
2516 const char *
2517 output_branch (int logic, rtx insn, rtx *operands)
2519 switch (get_attr_length (insn))
2521 case 6:
2522 /* This can happen if filling the delay slot has caused a forward
2523 branch to exceed its range (we could reverse it, but only
2524 when we know we won't overextend other branches; this should
2525 best be handled by relaxation).
2526 It can also happen when other condbranches hoist delay slot insn
2527 from their destination, thus leading to code size increase.
2528 But the branch will still be in the range -4092..+4098 bytes. */
2530 if (! TARGET_RELAX)
2532 int label = lf++;
2533 /* The call to print_slot will clobber the operands. */
2534 rtx op0 = operands[0];
2536 /* If the instruction in the delay slot is annulled (true), then
2537 there is no delay slot where we can put it now. The only safe
2538 place for it is after the label. final will do that by default. */
2540 if (final_sequence
2541 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2542 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2544 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2545 ASSEMBLER_DIALECT ? "/" : ".", label);
2546 print_slot (final_sequence);
2548 else
2549 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2551 output_asm_insn ("bra\t%l0", &op0);
2552 fprintf (asm_out_file, "\tnop\n");
2553 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2555 return "";
2557 /* When relaxing, handle this like a short branch. The linker
2558 will fix it up if it still doesn't fit after relaxation. */
2559 case 2:
2560 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2562 /* These are for SH2e, in which we have to account for the
2563 extra nop because of the hardware bug in annulled branches. */
2564 case 8:
2565 if (! TARGET_RELAX)
2567 int label = lf++;
2569 gcc_assert (!final_sequence
2570 || !(INSN_ANNULLED_BRANCH_P
2571 (XVECEXP (final_sequence, 0, 0))));
2572 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2573 logic ? "f" : "t",
2574 ASSEMBLER_DIALECT ? "/" : ".", label);
2575 fprintf (asm_out_file, "\tnop\n");
2576 output_asm_insn ("bra\t%l0", operands);
2577 fprintf (asm_out_file, "\tnop\n");
2578 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2580 return "";
2582 /* When relaxing, fall through. */
2583 case 4:
2585 char buffer[10];
2587 sprintf (buffer, "b%s%ss\t%%l0",
2588 logic ? "t" : "f",
2589 ASSEMBLER_DIALECT ? "/" : ".");
2590 output_asm_insn (buffer, &operands[0]);
2591 return "nop";
2594 default:
2595 /* There should be no longer branches now - that would
2596 indicate that something has destroyed the branches set
2597 up in machine_dependent_reorg. */
2598 gcc_unreachable ();
2602 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2603 fill in operands 9 as a label to the successor insn.
2604 We try to use jump threading where possible.
2605 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2606 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2607 follow jmp and bt, if the address is in range. */
2608 const char *
2609 output_branchy_insn (enum rtx_code code, const char *templ,
2610 rtx insn, rtx *operands)
2612 rtx next_insn = NEXT_INSN (insn);
2614 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2616 rtx src = SET_SRC (PATTERN (next_insn));
2617 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2619 /* Following branch not taken */
2620 operands[9] = gen_label_rtx ();
2621 emit_label_after (operands[9], next_insn);
2622 INSN_ADDRESSES_NEW (operands[9],
2623 INSN_ADDRESSES (INSN_UID (next_insn))
2624 + get_attr_length (next_insn));
2625 return templ;
2627 else
2629 int offset = (branch_dest (next_insn)
2630 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2631 if (offset >= -252 && offset <= 258)
2633 if (GET_CODE (src) == IF_THEN_ELSE)
2634 /* branch_true */
2635 src = XEXP (src, 1);
2636 operands[9] = src;
2637 return templ;
2641 operands[9] = gen_label_rtx ();
2642 emit_label_after (operands[9], insn);
2643 INSN_ADDRESSES_NEW (operands[9],
2644 INSN_ADDRESSES (INSN_UID (insn))
2645 + get_attr_length (insn));
2646 return templ;
2649 const char *
2650 output_ieee_ccmpeq (rtx insn, rtx *operands)
2652 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2653 insn, operands);
2656 /* Output the start of the assembler file. */
2658 static void
2659 sh_file_start (void)
2661 default_file_start ();
2663 #ifdef SYMBIAN
2664 /* Declare the .directive section before it is used. */
2665 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2666 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2667 #endif
2669 if (TARGET_ELF)
2670 /* We need to show the text section with the proper
2671 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2672 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2673 will complain. We can teach GAS specifically about the
2674 default attributes for our choice of text section, but
2675 then we would have to change GAS again if/when we change
2676 the text section name. */
2677 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2678 else
2679 /* Switch to the data section so that the coffsem symbol
2680 isn't in the text section. */
2681 switch_to_section (data_section);
2683 if (TARGET_LITTLE_ENDIAN)
2684 fputs ("\t.little\n", asm_out_file);
2686 if (!TARGET_ELF)
2688 if (TARGET_SHCOMPACT)
2689 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2690 else if (TARGET_SHMEDIA)
2691 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2692 TARGET_SHMEDIA64 ? 64 : 32);
2696 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2698 static bool
2699 unspec_caller_rtx_p (rtx pat)
2701 rtx base, offset;
2702 int i;
2704 split_const (pat, &base, &offset);
2705 if (GET_CODE (base) == UNSPEC)
2707 if (XINT (base, 1) == UNSPEC_CALLER)
2708 return true;
2709 for (i = 0; i < XVECLEN (base, 0); i++)
2710 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2711 return true;
2713 return false;
2716 /* Indicate that INSN cannot be duplicated. This is true for insn
2717 that generates a unique label. */
2719 static bool
2720 sh_cannot_copy_insn_p (rtx insn)
2722 rtx pat;
2724 if (!reload_completed || !flag_pic)
2725 return false;
2727 if (!NONJUMP_INSN_P (insn))
2728 return false;
2729 if (asm_noperands (insn) >= 0)
2730 return false;
2732 pat = PATTERN (insn);
2733 if (GET_CODE (pat) != SET)
2734 return false;
2735 pat = SET_SRC (pat);
2737 if (unspec_caller_rtx_p (pat))
2738 return true;
2740 return false;
2743 /* Actual number of instructions used to make a shift by N. */
2744 static const char ashiftrt_insns[] =
2745 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2747 /* Left shift and logical right shift are the same. */
2748 static const char shift_insns[] =
2749 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2751 /* Individual shift amounts needed to get the above length sequences.
2752 One bit right shifts clobber the T bit, so when possible, put one bit
2753 shifts in the middle of the sequence, so the ends are eligible for
2754 branch delay slots. */
2755 static const short shift_amounts[32][5] = {
2756 {0}, {1}, {2}, {2, 1},
2757 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2758 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2759 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2760 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2761 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2762 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2763 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2765 /* Likewise, but for shift amounts < 16, up to three highmost bits
2766 might be clobbered. This is typically used when combined with some
2767 kind of sign or zero extension. */
2769 static const char ext_shift_insns[] =
2770 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2772 static const short ext_shift_amounts[32][4] = {
2773 {0}, {1}, {2}, {2, 1},
2774 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2775 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2776 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2777 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2778 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2779 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2780 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2782 /* Assuming we have a value that has been sign-extended by at least one bit,
2783 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2784 to shift it by N without data loss, and quicker than by other means? */
2785 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2787 /* This is used in length attributes in sh.md to help compute the length
2788 of arbitrary constant shift instructions. */
2791 shift_insns_rtx (rtx insn)
2793 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2794 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2795 enum rtx_code shift_code = GET_CODE (set_src);
2797 switch (shift_code)
2799 case ASHIFTRT:
2800 return ashiftrt_insns[shift_count];
2801 case LSHIFTRT:
2802 case ASHIFT:
2803 return shift_insns[shift_count];
2804 default:
2805 gcc_unreachable ();
2809 /* Return the cost of a shift. */
2811 static inline int
2812 shiftcosts (rtx x)
2814 int value;
2816 if (TARGET_SHMEDIA)
2817 return 1;
2819 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2821 if (GET_MODE (x) == DImode
2822 && CONST_INT_P (XEXP (x, 1))
2823 && INTVAL (XEXP (x, 1)) == 1)
2824 return 2;
2826 /* Everything else is invalid, because there is no pattern for it. */
2827 return MAX_COST;
2829 /* If shift by a non constant, then this will be expensive. */
2830 if (!CONST_INT_P (XEXP (x, 1)))
2831 return SH_DYNAMIC_SHIFT_COST;
2833 /* Otherwise, return the true cost in instructions. Cope with out of range
2834 shift counts more or less arbitrarily. */
2835 value = INTVAL (XEXP (x, 1)) & 31;
2837 if (GET_CODE (x) == ASHIFTRT)
2839 int cost = ashiftrt_insns[value];
2840 /* If SH3, then we put the constant in a reg and use shad. */
2841 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2842 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2843 return cost;
2845 else
2846 return shift_insns[value];
2849 /* Return the cost of an AND operation. */
2851 static inline int
2852 andcosts (rtx x)
2854 int i;
2856 /* Anding with a register is a single cycle and instruction. */
2857 if (!CONST_INT_P (XEXP (x, 1)))
2858 return 1;
2860 i = INTVAL (XEXP (x, 1));
2862 if (TARGET_SHMEDIA)
2864 if (satisfies_constraint_I10 (XEXP (x, 1))
2865 || satisfies_constraint_J16 (XEXP (x, 1)))
2866 return 1;
2867 else
2868 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2871 /* These constants are single cycle extu.[bw] instructions. */
2872 if (i == 0xff || i == 0xffff)
2873 return 1;
2874 /* Constants that can be used in an and immediate instruction in a single
2875 cycle, but this requires r0, so make it a little more expensive. */
2876 if (CONST_OK_FOR_K08 (i))
2877 return 2;
2878 /* Constants that can be loaded with a mov immediate and an and.
2879 This case is probably unnecessary. */
2880 if (CONST_OK_FOR_I08 (i))
2881 return 2;
2882 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2883 This case is probably unnecessary. */
2884 return 3;
2887 /* Return the cost of an addition or a subtraction. */
2889 static inline int
2890 addsubcosts (rtx x)
2892 /* Adding a register is a single cycle insn. */
2893 if (REG_P (XEXP (x, 1))
2894 || GET_CODE (XEXP (x, 1)) == SUBREG)
2895 return 1;
2897 /* Likewise for small constants. */
2898 if (CONST_INT_P (XEXP (x, 1))
2899 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2900 return 1;
2902 if (TARGET_SHMEDIA)
2903 switch (GET_CODE (XEXP (x, 1)))
2905 case CONST:
2906 case LABEL_REF:
2907 case SYMBOL_REF:
2908 return TARGET_SHMEDIA64 ? 5 : 3;
2910 case CONST_INT:
2911 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2912 return 2;
2913 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2914 return 3;
2915 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2916 return 4;
2918 /* Fall through. */
2919 default:
2920 return 5;
2923 /* Any other constant requires a 2 cycle pc-relative load plus an
2924 addition. */
2925 return 3;
2928 /* Return the cost of a multiply. */
2929 static inline int
2930 multcosts (rtx x ATTRIBUTE_UNUSED)
2932 if (sh_multcost >= 0)
2933 return sh_multcost;
2934 if (TARGET_SHMEDIA)
2935 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2936 accept constants. Ideally, we would use a cost of one or two and
2937 add the cost of the operand, but disregard the latter when inside loops
2938 and loop invariant code motion is still to follow.
2939 Using a multiply first and splitting it later if it's a loss
2940 doesn't work because of different sign / zero extension semantics
2941 of multiplies vs. shifts. */
2942 return TARGET_SMALLCODE ? 2 : 3;
2944 if (TARGET_SH2)
2946 /* We have a mul insn, so we can never take more than the mul and the
2947 read of the mac reg, but count more because of the latency and extra
2948 reg usage. */
2949 if (TARGET_SMALLCODE)
2950 return 2;
2951 return 3;
2954 /* If we're aiming at small code, then just count the number of
2955 insns in a multiply call sequence. */
2956 if (TARGET_SMALLCODE)
2957 return 5;
2959 /* Otherwise count all the insns in the routine we'd be calling too. */
2960 return 20;
2963 /* Compute a (partial) cost for rtx X. Return true if the complete
2964 cost has been computed, and false if subexpressions should be
2965 scanned. In either case, *TOTAL contains the cost result. */
2967 static bool
2968 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2969 bool speed ATTRIBUTE_UNUSED)
2971 switch (code)
2973 case CONST_INT:
2974 if (TARGET_SHMEDIA)
2976 if (INTVAL (x) == 0)
2977 *total = 0;
2978 else if (outer_code == AND && and_operand ((x), DImode))
2979 *total = 0;
2980 else if ((outer_code == IOR || outer_code == XOR
2981 || outer_code == PLUS)
2982 && CONST_OK_FOR_I10 (INTVAL (x)))
2983 *total = 0;
2984 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2985 *total = COSTS_N_INSNS (outer_code != SET);
2986 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2987 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2988 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2989 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2990 else
2991 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2992 return true;
2994 if (CONST_OK_FOR_I08 (INTVAL (x)))
2995 *total = 0;
2996 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2997 && CONST_OK_FOR_K08 (INTVAL (x)))
2998 *total = 1;
2999 /* prepare_cmp_insn will force costly constants int registers before
3000 the cbranch[sd]i4 patterns can see them, so preserve potentially
3001 interesting ones not covered by I08 above. */
3002 else if (outer_code == COMPARE
3003 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3004 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3005 || INTVAL (x) == 0x7fffffff
3006 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3007 *total = 1;
3008 else
3009 *total = 8;
3010 return true;
3012 case CONST:
3013 case LABEL_REF:
3014 case SYMBOL_REF:
3015 if (TARGET_SHMEDIA64)
3016 *total = COSTS_N_INSNS (4);
3017 else if (TARGET_SHMEDIA32)
3018 *total = COSTS_N_INSNS (2);
3019 else
3020 *total = 5;
3021 return true;
3023 case CONST_DOUBLE:
3024 if (TARGET_SHMEDIA)
3025 *total = COSTS_N_INSNS (4);
3026 /* prepare_cmp_insn will force costly constants int registers before
3027 the cbranchdi4 pattern can see them, so preserve potentially
3028 interesting ones. */
3029 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3030 *total = 1;
3031 else
3032 *total = 10;
3033 return true;
3034 case CONST_VECTOR:
3035 if (x == CONST0_RTX (GET_MODE (x)))
3036 *total = 0;
3037 else if (sh_1el_vec (x, VOIDmode))
3038 *total = outer_code != SET;
3039 if (sh_rep_vec (x, VOIDmode))
3040 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3041 + (outer_code != SET));
3042 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3043 return true;
3045 case PLUS:
3046 case MINUS:
3047 *total = COSTS_N_INSNS (addsubcosts (x));
3048 return true;
3050 case AND:
3051 *total = COSTS_N_INSNS (andcosts (x));
3052 return true;
3054 case MULT:
3055 *total = COSTS_N_INSNS (multcosts (x));
3056 return true;
3058 case ASHIFT:
3059 case ASHIFTRT:
3060 case LSHIFTRT:
3061 *total = COSTS_N_INSNS (shiftcosts (x));
3062 return true;
3064 case DIV:
3065 case UDIV:
3066 case MOD:
3067 case UMOD:
3068 *total = COSTS_N_INSNS (20);
3069 return true;
3071 case PARALLEL:
3072 if (sh_1el_vec (x, VOIDmode))
3073 *total = outer_code != SET;
3074 if (sh_rep_vec (x, VOIDmode))
3075 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3076 + (outer_code != SET));
3077 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3078 return true;
3080 case FLOAT:
3081 case FIX:
3082 *total = 100;
3083 return true;
3085 default:
3086 return false;
3090 /* Compute the cost of an address. For the SH, all valid addresses are
3091 the same cost. Use a slightly higher cost for reg + reg addressing,
3092 since it increases pressure on r0. */
3094 static int
3095 sh_address_cost (rtx X,
3096 bool speed ATTRIBUTE_UNUSED)
3098 return (GET_CODE (X) == PLUS
3099 && ! CONSTANT_P (XEXP (X, 1))
3100 && ! TARGET_SHMEDIA ? 1 : 0);
3103 /* Code to expand a shift. */
3105 void
3106 gen_ashift (int type, int n, rtx reg)
3108 /* Negative values here come from the shift_amounts array. */
3109 if (n < 0)
3111 if (type == ASHIFT)
3112 type = LSHIFTRT;
3113 else
3114 type = ASHIFT;
3115 n = -n;
3118 switch (type)
3120 case ASHIFTRT:
3121 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3122 break;
3123 case LSHIFTRT:
3124 if (n == 1)
3125 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3126 else
3127 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3128 break;
3129 case ASHIFT:
3130 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3131 break;
3135 /* Same for HImode */
3137 void
3138 gen_ashift_hi (int type, int n, rtx reg)
3140 /* Negative values here come from the shift_amounts array. */
3141 if (n < 0)
3143 if (type == ASHIFT)
3144 type = LSHIFTRT;
3145 else
3146 type = ASHIFT;
3147 n = -n;
3150 switch (type)
3152 case ASHIFTRT:
3153 case LSHIFTRT:
3154 /* We don't have HImode right shift operations because using the
3155 ordinary 32 bit shift instructions for that doesn't generate proper
3156 zero/sign extension.
3157 gen_ashift_hi is only called in contexts where we know that the
3158 sign extension works out correctly. */
3160 int offset = 0;
3161 if (GET_CODE (reg) == SUBREG)
3163 offset = SUBREG_BYTE (reg);
3164 reg = SUBREG_REG (reg);
3166 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3167 break;
3169 case ASHIFT:
3170 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3171 break;
3175 /* Output RTL to split a constant shift into its component SH constant
3176 shift instructions. */
3178 void
3179 gen_shifty_op (int code, rtx *operands)
3181 int value = INTVAL (operands[2]);
3182 int max, i;
3184 /* Truncate the shift count in case it is out of bounds. */
3185 value = value & 31;
3187 if (value == 31)
3189 if (code == LSHIFTRT)
3191 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3192 emit_insn (gen_movt (operands[0]));
3193 return;
3195 else if (code == ASHIFT)
3197 /* There is a two instruction sequence for 31 bit left shifts,
3198 but it requires r0. */
3199 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3201 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3202 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3203 return;
3207 else if (value == 0)
3209 /* This can happen even when optimizing, if there were subregs before
3210 reload. Don't output a nop here, as this is never optimized away;
3211 use a no-op move instead. */
3212 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3213 return;
3216 max = shift_insns[value];
3217 for (i = 0; i < max; i++)
3218 gen_ashift (code, shift_amounts[value][i], operands[0]);
3221 /* Same as above, but optimized for values where the topmost bits don't
3222 matter. */
3224 void
3225 gen_shifty_hi_op (int code, rtx *operands)
3227 int value = INTVAL (operands[2]);
3228 int max, i;
3229 void (*gen_fun) (int, int, rtx);
3231 /* This operation is used by and_shl for SImode values with a few
3232 high bits known to be cleared. */
3233 value &= 31;
3234 if (value == 0)
3236 emit_insn (gen_nop ());
3237 return;
3240 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3241 if (code == ASHIFT)
3243 max = ext_shift_insns[value];
3244 for (i = 0; i < max; i++)
3245 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3247 else
3248 /* When shifting right, emit the shifts in reverse order, so that
3249 solitary negative values come first. */
3250 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3251 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3254 /* Output RTL for an arithmetic right shift. */
3256 /* ??? Rewrite to use super-optimizer sequences. */
3259 expand_ashiftrt (rtx *operands)
3261 rtx wrk;
3262 char func[18];
3263 int value;
3265 if (TARGET_SH3)
3267 if (!CONST_INT_P (operands[2]))
3269 rtx count = copy_to_mode_reg (SImode, operands[2]);
3270 emit_insn (gen_negsi2 (count, count));
3271 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3272 return 1;
3274 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3275 > 1 + SH_DYNAMIC_SHIFT_COST)
3277 rtx count
3278 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3279 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3280 return 1;
3283 if (!CONST_INT_P (operands[2]))
3284 return 0;
3286 value = INTVAL (operands[2]) & 31;
3288 if (value == 31)
3290 /* If we are called from abs expansion, arrange things so that we
3291 we can use a single MT instruction that doesn't clobber the source,
3292 if LICM can hoist out the load of the constant zero. */
3293 if (currently_expanding_to_rtl)
3295 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3296 operands[1]));
3297 emit_insn (gen_mov_neg_si_t (operands[0]));
3298 return 1;
3300 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3301 return 1;
3303 else if (value >= 16 && value <= 19)
3305 wrk = gen_reg_rtx (SImode);
3306 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3307 value -= 16;
3308 while (value--)
3309 gen_ashift (ASHIFTRT, 1, wrk);
3310 emit_move_insn (operands[0], wrk);
3311 return 1;
3313 /* Expand a short sequence inline, longer call a magic routine. */
3314 else if (value <= 5)
3316 wrk = gen_reg_rtx (SImode);
3317 emit_move_insn (wrk, operands[1]);
3318 while (value--)
3319 gen_ashift (ASHIFTRT, 1, wrk);
3320 emit_move_insn (operands[0], wrk);
3321 return 1;
3324 wrk = gen_reg_rtx (Pmode);
3326 /* Load the value into an arg reg and call a helper. */
3327 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3328 sprintf (func, "__ashiftrt_r4_%d", value);
3329 function_symbol (wrk, func, SFUNC_STATIC);
3330 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3331 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3332 return 1;
3336 sh_dynamicalize_shift_p (rtx count)
3338 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3341 /* Try to find a good way to implement the combiner pattern
3342 [(set (match_operand:SI 0 "register_operand" "r")
3343 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3344 (match_operand:SI 2 "const_int_operand" "n"))
3345 (match_operand:SI 3 "const_int_operand" "n"))) .
3346 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3347 return 0 for simple right / left or left/right shift combination.
3348 return 1 for a combination of shifts with zero_extend.
3349 return 2 for a combination of shifts with an AND that needs r0.
3350 return 3 for a combination of shifts with an AND that needs an extra
3351 scratch register, when the three highmost bits of the AND mask are clear.
3352 return 4 for a combination of shifts with an AND that needs an extra
3353 scratch register, when any of the three highmost bits of the AND mask
3354 is set.
3355 If ATTRP is set, store an initial right shift width in ATTRP[0],
3356 and the instruction length in ATTRP[1] . These values are not valid
3357 when returning 0.
3358 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3359 shift_amounts for the last shift value that is to be used before the
3360 sign extend. */
3362 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3364 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3365 int left = INTVAL (left_rtx), right;
3366 int best = 0;
3367 int cost, best_cost = 10000;
3368 int best_right = 0, best_len = 0;
3369 int i;
3370 int can_ext;
3372 if (left < 0 || left > 31)
3373 return 0;
3374 if (CONST_INT_P (mask_rtx))
3375 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3376 else
3377 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3378 /* Can this be expressed as a right shift / left shift pair? */
3379 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3380 right = exact_log2 (lsb);
3381 mask2 = ~(mask + lsb - 1);
3382 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3383 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3384 if (! mask2)
3385 best_cost = shift_insns[right] + shift_insns[right + left];
3386 /* mask has no trailing zeroes <==> ! right */
3387 else if (! right && mask2 == ~(lsb2 - 1))
3389 int late_right = exact_log2 (lsb2);
3390 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3392 /* Try to use zero extend. */
3393 if (mask2 == ~(lsb2 - 1))
3395 int width, first;
3397 for (width = 8; width <= 16; width += 8)
3399 /* Can we zero-extend right away? */
3400 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3402 cost
3403 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3404 if (cost < best_cost)
3406 best = 1;
3407 best_cost = cost;
3408 best_right = right;
3409 best_len = cost;
3410 if (attrp)
3411 attrp[2] = -1;
3413 continue;
3415 /* ??? Could try to put zero extend into initial right shift,
3416 or even shift a bit left before the right shift. */
3417 /* Determine value of first part of left shift, to get to the
3418 zero extend cut-off point. */
3419 first = width - exact_log2 (lsb2) + right;
3420 if (first >= 0 && right + left - first >= 0)
3422 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3423 + ext_shift_insns[right + left - first];
3424 if (cost < best_cost)
3426 best = 1;
3427 best_cost = cost;
3428 best_right = right;
3429 best_len = cost;
3430 if (attrp)
3431 attrp[2] = first;
3436 /* Try to use r0 AND pattern */
3437 for (i = 0; i <= 2; i++)
3439 if (i > right)
3440 break;
3441 if (! CONST_OK_FOR_K08 (mask >> i))
3442 continue;
3443 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3444 if (cost < best_cost)
3446 best = 2;
3447 best_cost = cost;
3448 best_right = i;
3449 best_len = cost - 1;
3452 /* Try to use a scratch register to hold the AND operand. */
3453 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3454 for (i = 0; i <= 2; i++)
3456 if (i > right)
3457 break;
3458 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3459 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3460 if (cost < best_cost)
3462 best = 4 - can_ext;
3463 best_cost = cost;
3464 best_right = i;
3465 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3469 if (attrp)
3471 attrp[0] = best_right;
3472 attrp[1] = best_len;
3474 return best;
3477 /* This is used in length attributes of the unnamed instructions
3478 corresponding to shl_and_kind return values of 1 and 2. */
3480 shl_and_length (rtx insn)
3482 rtx set_src, left_rtx, mask_rtx;
3483 int attributes[3];
3485 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3486 left_rtx = XEXP (XEXP (set_src, 0), 1);
3487 mask_rtx = XEXP (set_src, 1);
3488 shl_and_kind (left_rtx, mask_rtx, attributes);
3489 return attributes[1];
3492 /* This is used in length attribute of the and_shl_scratch instruction. */
3495 shl_and_scr_length (rtx insn)
3497 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3498 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3499 rtx op = XEXP (set_src, 0);
3500 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3501 op = XEXP (XEXP (op, 0), 0);
3502 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3505 /* Generate rtl for instructions for which shl_and_kind advised a particular
3506 method of generating them, i.e. returned zero. */
3509 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3511 int attributes[3];
3512 unsigned HOST_WIDE_INT mask;
3513 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3514 int right, total_shift;
3515 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3517 right = attributes[0];
3518 total_shift = INTVAL (left_rtx) + right;
3519 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3520 switch (kind)
3522 default:
3523 return -1;
3524 case 1:
3526 int first = attributes[2];
3527 rtx operands[3];
3529 if (first < 0)
3531 emit_insn ((mask << right) <= 0xff
3532 ? gen_zero_extendqisi2 (dest,
3533 gen_lowpart (QImode, source))
3534 : gen_zero_extendhisi2 (dest,
3535 gen_lowpart (HImode, source)));
3536 source = dest;
3538 if (source != dest)
3539 emit_insn (gen_movsi (dest, source));
3540 operands[0] = dest;
3541 if (right)
3543 operands[2] = GEN_INT (right);
3544 gen_shifty_hi_op (LSHIFTRT, operands);
3546 if (first > 0)
3548 operands[2] = GEN_INT (first);
3549 gen_shifty_hi_op (ASHIFT, operands);
3550 total_shift -= first;
3551 mask <<= first;
3553 if (first >= 0)
3554 emit_insn (mask <= 0xff
3555 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3556 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3557 if (total_shift > 0)
3559 operands[2] = GEN_INT (total_shift);
3560 gen_shifty_hi_op (ASHIFT, operands);
3562 break;
3564 case 4:
3565 shift_gen_fun = gen_shifty_op;
3566 case 3:
3567 /* If the topmost bit that matters is set, set the topmost bits
3568 that don't matter. This way, we might be able to get a shorter
3569 signed constant. */
3570 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3571 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3572 case 2:
3573 /* Don't expand fine-grained when combining, because that will
3574 make the pattern fail. */
3575 if (currently_expanding_to_rtl
3576 || reload_in_progress || reload_completed)
3578 rtx operands[3];
3580 /* Cases 3 and 4 should be handled by this split
3581 only while combining */
3582 gcc_assert (kind <= 2);
3583 if (right)
3585 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3586 source = dest;
3588 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3589 if (total_shift)
3591 operands[0] = dest;
3592 operands[1] = dest;
3593 operands[2] = GEN_INT (total_shift);
3594 shift_gen_fun (ASHIFT, operands);
3596 break;
3598 else
3600 int neg = 0;
3601 if (kind != 4 && total_shift < 16)
3603 neg = -ext_shift_amounts[total_shift][1];
3604 if (neg > 0)
3605 neg -= ext_shift_amounts[total_shift][2];
3606 else
3607 neg = 0;
3609 emit_insn (gen_and_shl_scratch (dest, source,
3610 GEN_INT (right),
3611 GEN_INT (mask),
3612 GEN_INT (total_shift + neg),
3613 GEN_INT (neg)));
3614 emit_insn (gen_movsi (dest, dest));
3615 break;
3618 return 0;
3621 /* Try to find a good way to implement the combiner pattern
3622 [(set (match_operand:SI 0 "register_operand" "=r")
3623 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3624 (match_operand:SI 2 "const_int_operand" "n")
3625 (match_operand:SI 3 "const_int_operand" "n")
3626 (const_int 0)))
3627 (clobber (reg:SI T_REG))]
3628 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3629 return 0 for simple left / right shift combination.
3630 return 1 for left shift / 8 bit sign extend / left shift.
3631 return 2 for left shift / 16 bit sign extend / left shift.
3632 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3633 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3634 return 5 for left shift / 16 bit sign extend / right shift
3635 return 6 for < 8 bit sign extend / left shift.
3636 return 7 for < 8 bit sign extend / left shift / single right shift.
3637 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3640 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3642 int left, size, insize, ext;
3643 int cost = 0, best_cost;
3644 int kind;
3646 left = INTVAL (left_rtx);
3647 size = INTVAL (size_rtx);
3648 insize = size - left;
3649 gcc_assert (insize > 0);
3650 /* Default to left / right shift. */
3651 kind = 0;
3652 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3653 if (size <= 16)
3655 /* 16 bit shift / sign extend / 16 bit shift */
3656 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3657 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3658 below, by alternative 3 or something even better. */
3659 if (cost < best_cost)
3661 kind = 5;
3662 best_cost = cost;
3665 /* Try a plain sign extend between two shifts. */
3666 for (ext = 16; ext >= insize; ext -= 8)
3668 if (ext <= size)
3670 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3671 if (cost < best_cost)
3673 kind = ext / (unsigned) 8;
3674 best_cost = cost;
3677 /* Check if we can do a sloppy shift with a final signed shift
3678 restoring the sign. */
3679 if (EXT_SHIFT_SIGNED (size - ext))
3680 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3681 /* If not, maybe it's still cheaper to do the second shift sloppy,
3682 and do a final sign extend? */
3683 else if (size <= 16)
3684 cost = ext_shift_insns[ext - insize] + 1
3685 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3686 else
3687 continue;
3688 if (cost < best_cost)
3690 kind = ext / (unsigned) 8 + 2;
3691 best_cost = cost;
3694 /* Check if we can sign extend in r0 */
3695 if (insize < 8)
3697 cost = 3 + shift_insns[left];
3698 if (cost < best_cost)
3700 kind = 6;
3701 best_cost = cost;
3703 /* Try the same with a final signed shift. */
3704 if (left < 31)
3706 cost = 3 + ext_shift_insns[left + 1] + 1;
3707 if (cost < best_cost)
3709 kind = 7;
3710 best_cost = cost;
3714 if (TARGET_SH3)
3716 /* Try to use a dynamic shift. */
3717 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3718 if (cost < best_cost)
3720 kind = 0;
3721 best_cost = cost;
3724 if (costp)
3725 *costp = cost;
3726 return kind;
3729 /* Function to be used in the length attribute of the instructions
3730 implementing this pattern. */
3733 shl_sext_length (rtx insn)
3735 rtx set_src, left_rtx, size_rtx;
3736 int cost;
3738 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3739 left_rtx = XEXP (XEXP (set_src, 0), 1);
3740 size_rtx = XEXP (set_src, 1);
3741 shl_sext_kind (left_rtx, size_rtx, &cost);
3742 return cost;
3745 /* Generate rtl for this pattern */
3748 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3750 int kind;
3751 int left, size, insize, cost;
3752 rtx operands[3];
3754 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3755 left = INTVAL (left_rtx);
3756 size = INTVAL (size_rtx);
3757 insize = size - left;
3758 switch (kind)
3760 case 1:
3761 case 2:
3762 case 3:
3763 case 4:
3765 int ext = kind & 1 ? 8 : 16;
3766 int shift2 = size - ext;
3768 /* Don't expand fine-grained when combining, because that will
3769 make the pattern fail. */
3770 if (! currently_expanding_to_rtl
3771 && ! reload_in_progress && ! reload_completed)
3773 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3774 emit_insn (gen_movsi (dest, source));
3775 break;
3777 if (dest != source)
3778 emit_insn (gen_movsi (dest, source));
3779 operands[0] = dest;
3780 if (ext - insize)
3782 operands[2] = GEN_INT (ext - insize);
3783 gen_shifty_hi_op (ASHIFT, operands);
3785 emit_insn (kind & 1
3786 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3787 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3788 if (kind <= 2)
3790 if (shift2)
3792 operands[2] = GEN_INT (shift2);
3793 gen_shifty_op (ASHIFT, operands);
3796 else
3798 if (shift2 > 0)
3800 if (EXT_SHIFT_SIGNED (shift2))
3802 operands[2] = GEN_INT (shift2 + 1);
3803 gen_shifty_op (ASHIFT, operands);
3804 operands[2] = const1_rtx;
3805 gen_shifty_op (ASHIFTRT, operands);
3806 break;
3808 operands[2] = GEN_INT (shift2);
3809 gen_shifty_hi_op (ASHIFT, operands);
3811 else if (shift2)
3813 operands[2] = GEN_INT (-shift2);
3814 gen_shifty_hi_op (LSHIFTRT, operands);
3816 emit_insn (size <= 8
3817 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3818 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3820 break;
3822 case 5:
3824 int i = 16 - size;
3825 if (! currently_expanding_to_rtl
3826 && ! reload_in_progress && ! reload_completed)
3827 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3828 else
3830 operands[0] = dest;
3831 operands[2] = GEN_INT (16 - insize);
3832 gen_shifty_hi_op (ASHIFT, operands);
3833 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3835 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3836 while (--i >= 0)
3837 gen_ashift (ASHIFTRT, 1, dest);
3838 break;
3840 case 6:
3841 case 7:
3842 /* Don't expand fine-grained when combining, because that will
3843 make the pattern fail. */
3844 if (! currently_expanding_to_rtl
3845 && ! reload_in_progress && ! reload_completed)
3847 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3848 emit_insn (gen_movsi (dest, source));
3849 break;
3851 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3852 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3853 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3854 operands[0] = dest;
3855 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3856 gen_shifty_op (ASHIFT, operands);
3857 if (kind == 7)
3858 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3859 break;
3860 default:
3861 return -1;
3863 return 0;
3866 /* Prefix a symbol_ref name with "datalabel". */
3869 gen_datalabel_ref (rtx sym)
3871 const char *str;
3873 if (GET_CODE (sym) == LABEL_REF)
3874 return gen_rtx_CONST (GET_MODE (sym),
3875 gen_rtx_UNSPEC (GET_MODE (sym),
3876 gen_rtvec (1, sym),
3877 UNSPEC_DATALABEL));
3879 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3881 str = XSTR (sym, 0);
3882 /* Share all SYMBOL_REF strings with the same value - that is important
3883 for cse. */
3884 str = IDENTIFIER_POINTER (get_identifier (str));
3885 XSTR (sym, 0) = str;
3887 return sym;
3891 static alloc_pool label_ref_list_pool;
3893 typedef struct label_ref_list_d
3895 rtx label;
3896 struct label_ref_list_d *next;
3897 } *label_ref_list_t;
3899 /* The SH cannot load a large constant into a register, constants have to
3900 come from a pc relative load. The reference of a pc relative load
3901 instruction must be less than 1k in front of the instruction. This
3902 means that we often have to dump a constant inside a function, and
3903 generate code to branch around it.
3905 It is important to minimize this, since the branches will slow things
3906 down and make things bigger.
3908 Worst case code looks like:
3910 mov.l L1,rn
3911 bra L2
3913 align
3914 L1: .long value
3918 mov.l L3,rn
3919 bra L4
3921 align
3922 L3: .long value
3926 We fix this by performing a scan before scheduling, which notices which
3927 instructions need to have their operands fetched from the constant table
3928 and builds the table.
3930 The algorithm is:
3932 scan, find an instruction which needs a pcrel move. Look forward, find the
3933 last barrier which is within MAX_COUNT bytes of the requirement.
3934 If there isn't one, make one. Process all the instructions between
3935 the find and the barrier.
3937 In the above example, we can tell that L3 is within 1k of L1, so
3938 the first move can be shrunk from the 3 insn+constant sequence into
3939 just 1 insn, and the constant moved to L3 to make:
3941 mov.l L1,rn
3943 mov.l L3,rn
3944 bra L4
3946 align
3947 L3:.long value
3948 L4:.long value
3950 Then the second move becomes the target for the shortening process. */
3952 typedef struct
3954 rtx value; /* Value in table. */
3955 rtx label; /* Label of value. */
3956 label_ref_list_t wend; /* End of window. */
3957 enum machine_mode mode; /* Mode of value. */
3959 /* True if this constant is accessed as part of a post-increment
3960 sequence. Note that HImode constants are never accessed in this way. */
3961 bool part_of_sequence_p;
3962 } pool_node;
3964 /* The maximum number of constants that can fit into one pool, since
3965 constants in the range 0..510 are at least 2 bytes long, and in the
3966 range from there to 1018 at least 4 bytes. */
3968 #define MAX_POOL_SIZE 372
3969 static pool_node pool_vector[MAX_POOL_SIZE];
3970 static int pool_size;
3971 static rtx pool_window_label;
3972 static int pool_window_last;
3974 static int max_labelno_before_reorg;
3976 /* ??? If we need a constant in HImode which is the truncated value of a
3977 constant we need in SImode, we could combine the two entries thus saving
3978 two bytes. Is this common enough to be worth the effort of implementing
3979 it? */
3981 /* ??? This stuff should be done at the same time that we shorten branches.
3982 As it is now, we must assume that all branches are the maximum size, and
3983 this causes us to almost always output constant pools sooner than
3984 necessary. */
3986 /* Add a constant to the pool and return its label. */
3988 static rtx
3989 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3991 int i;
3992 rtx lab, new_rtx;
3993 label_ref_list_t ref, newref;
3995 /* First see if we've already got it. */
3996 for (i = 0; i < pool_size; i++)
3998 if (x->code == pool_vector[i].value->code
3999 && mode == pool_vector[i].mode)
4001 if (x->code == CODE_LABEL)
4003 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4004 continue;
4006 if (rtx_equal_p (x, pool_vector[i].value))
4008 lab = new_rtx = 0;
4009 if (! last_value
4010 || ! i
4011 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4013 new_rtx = gen_label_rtx ();
4014 LABEL_REFS (new_rtx) = pool_vector[i].label;
4015 pool_vector[i].label = lab = new_rtx;
4017 if (lab && pool_window_label)
4019 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4020 newref->label = pool_window_label;
4021 ref = pool_vector[pool_window_last].wend;
4022 newref->next = ref;
4023 pool_vector[pool_window_last].wend = newref;
4025 if (new_rtx)
4026 pool_window_label = new_rtx;
4027 pool_window_last = i;
4028 return lab;
4033 /* Need a new one. */
4034 pool_vector[pool_size].value = x;
4035 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4037 lab = 0;
4038 pool_vector[pool_size - 1].part_of_sequence_p = true;
4040 else
4041 lab = gen_label_rtx ();
4042 pool_vector[pool_size].mode = mode;
4043 pool_vector[pool_size].label = lab;
4044 pool_vector[pool_size].wend = NULL;
4045 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4046 if (lab && pool_window_label)
4048 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4049 newref->label = pool_window_label;
4050 ref = pool_vector[pool_window_last].wend;
4051 newref->next = ref;
4052 pool_vector[pool_window_last].wend = newref;
4054 if (lab)
4055 pool_window_label = lab;
4056 pool_window_last = pool_size;
4057 pool_size++;
4058 return lab;
4061 /* Output the literal table. START, if nonzero, is the first instruction
4062 this table is needed for, and also indicates that there is at least one
4063 casesi_worker_2 instruction; We have to emit the operand3 labels from
4064 these insns at a 4-byte aligned position. BARRIER is the barrier
4065 after which we are to place the table. */
4067 static void
4068 dump_table (rtx start, rtx barrier)
4070 rtx scan = barrier;
4071 int i;
4072 int need_align = 1;
4073 rtx lab;
4074 label_ref_list_t ref;
4075 int have_df = 0;
4077 /* Do two passes, first time dump out the HI sized constants. */
4079 for (i = 0; i < pool_size; i++)
4081 pool_node *p = &pool_vector[i];
4083 if (p->mode == HImode)
4085 if (need_align)
4087 scan = emit_insn_after (gen_align_2 (), scan);
4088 need_align = 0;
4090 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4091 scan = emit_label_after (lab, scan);
4092 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4093 scan);
4094 for (ref = p->wend; ref; ref = ref->next)
4096 lab = ref->label;
4097 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4100 else if (p->mode == DFmode)
4101 have_df = 1;
4104 need_align = 1;
4106 if (start)
4108 scan = emit_insn_after (gen_align_4 (), scan);
4109 need_align = 0;
4110 for (; start != barrier; start = NEXT_INSN (start))
4111 if (NONJUMP_INSN_P (start)
4112 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4114 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4115 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4117 scan = emit_label_after (lab, scan);
4120 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4122 rtx align_insn = NULL_RTX;
4124 scan = emit_label_after (gen_label_rtx (), scan);
4125 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4126 need_align = 0;
4128 for (i = 0; i < pool_size; i++)
4130 pool_node *p = &pool_vector[i];
4132 switch (p->mode)
4134 case HImode:
4135 break;
4136 case SImode:
4137 case SFmode:
4138 if (align_insn && !p->part_of_sequence_p)
4140 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4141 emit_label_before (lab, align_insn);
4142 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4143 align_insn);
4144 for (ref = p->wend; ref; ref = ref->next)
4146 lab = ref->label;
4147 emit_insn_before (gen_consttable_window_end (lab),
4148 align_insn);
4150 delete_insn (align_insn);
4151 align_insn = NULL_RTX;
4152 continue;
4154 else
4156 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4157 scan = emit_label_after (lab, scan);
4158 scan = emit_insn_after (gen_consttable_4 (p->value,
4159 const0_rtx), scan);
4160 need_align = ! need_align;
4162 break;
4163 case DFmode:
4164 if (need_align)
4166 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4167 align_insn = scan;
4168 need_align = 0;
4170 case DImode:
4171 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4172 scan = emit_label_after (lab, scan);
4173 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4174 scan);
4175 break;
4176 default:
4177 gcc_unreachable ();
4180 if (p->mode != HImode)
4182 for (ref = p->wend; ref; ref = ref->next)
4184 lab = ref->label;
4185 scan = emit_insn_after (gen_consttable_window_end (lab),
4186 scan);
4191 pool_size = 0;
4194 for (i = 0; i < pool_size; i++)
4196 pool_node *p = &pool_vector[i];
4198 switch (p->mode)
4200 case HImode:
4201 break;
4202 case SImode:
4203 case SFmode:
4204 if (need_align)
4206 need_align = 0;
4207 scan = emit_label_after (gen_label_rtx (), scan);
4208 scan = emit_insn_after (gen_align_4 (), scan);
4210 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4211 scan = emit_label_after (lab, scan);
4212 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4213 scan);
4214 break;
4215 case DFmode:
4216 case DImode:
4217 if (need_align)
4219 need_align = 0;
4220 scan = emit_label_after (gen_label_rtx (), scan);
4221 scan = emit_insn_after (gen_align_4 (), scan);
4223 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4224 scan = emit_label_after (lab, scan);
4225 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4226 scan);
4227 break;
4228 default:
4229 gcc_unreachable ();
4232 if (p->mode != HImode)
4234 for (ref = p->wend; ref; ref = ref->next)
4236 lab = ref->label;
4237 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4242 scan = emit_insn_after (gen_consttable_end (), scan);
4243 scan = emit_barrier_after (scan);
4244 pool_size = 0;
4245 pool_window_label = NULL_RTX;
4246 pool_window_last = 0;
4249 /* Return nonzero if constant would be an ok source for a
4250 mov.w instead of a mov.l. */
4252 static int
4253 hi_const (rtx src)
4255 return (CONST_INT_P (src)
4256 && INTVAL (src) >= -32768
4257 && INTVAL (src) <= 32767);
4260 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4262 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4264 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4265 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4266 need to fix it if the input value is CONST_OK_FOR_I08. */
4268 static int
4269 broken_move (rtx insn)
4271 if (NONJUMP_INSN_P (insn))
4273 rtx pat = PATTERN (insn);
4274 if (GET_CODE (pat) == PARALLEL)
4275 pat = XVECEXP (pat, 0, 0);
4276 if (GET_CODE (pat) == SET
4277 /* We can load any 8-bit value if we don't care what the high
4278 order bits end up as. */
4279 && GET_MODE (SET_DEST (pat)) != QImode
4280 && (CONSTANT_P (SET_SRC (pat))
4281 /* Match mova_const. */
4282 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4283 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4284 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4285 && ! (TARGET_SH2E
4286 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4287 && (fp_zero_operand (SET_SRC (pat))
4288 || fp_one_operand (SET_SRC (pat)))
4289 /* In general we don't know the current setting of fpscr, so disable fldi.
4290 There is an exception if this was a register-register move
4291 before reload - and hence it was ascertained that we have
4292 single precision setting - and in a post-reload optimization
4293 we changed this to do a constant load. In that case
4294 we don't have an r0 clobber, hence we must use fldi. */
4295 && (TARGET_FMOVD
4296 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4297 == SCRATCH))
4298 && REG_P (SET_DEST (pat))
4299 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4300 && ! (TARGET_SH2A
4301 && GET_MODE (SET_DEST (pat)) == SImode
4302 && (satisfies_constraint_I20 (SET_SRC (pat))
4303 || satisfies_constraint_I28 (SET_SRC (pat))))
4304 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4305 return 1;
4308 return 0;
4311 static int
4312 mova_p (rtx insn)
4314 return (NONJUMP_INSN_P (insn)
4315 && GET_CODE (PATTERN (insn)) == SET
4316 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4317 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4318 /* Don't match mova_const. */
4319 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4322 /* Fix up a mova from a switch that went out of range. */
4323 static void
4324 fixup_mova (rtx mova)
4326 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4327 if (! flag_pic)
4329 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4330 INSN_CODE (mova) = -1;
4332 else
4334 rtx worker = mova;
4335 rtx lab = gen_label_rtx ();
4336 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4340 worker = NEXT_INSN (worker);
4341 gcc_assert (worker
4342 && !LABEL_P (worker)
4343 && !JUMP_P (worker));
4344 } while (NOTE_P (worker)
4345 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4346 wpat = PATTERN (worker);
4347 wpat0 = XVECEXP (wpat, 0, 0);
4348 wpat1 = XVECEXP (wpat, 0, 1);
4349 wsrc = SET_SRC (wpat0);
4350 PATTERN (worker) = (gen_casesi_worker_2
4351 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4352 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4353 XEXP (wpat1, 0)));
4354 INSN_CODE (worker) = -1;
4355 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4356 base = gen_rtx_LABEL_REF (Pmode, lab);
4357 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4358 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4359 INSN_CODE (mova) = -1;
4363 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4364 *num_mova, and check if the new mova is not nested within the first one.
4365 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4366 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4367 static int
4368 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4370 int n_addr = 0; /* Initialization to shut up spurious warning. */
4371 int f_target, n_target = 0; /* Likewise. */
4373 if (optimize)
4375 /* If NEW_MOVA has no address yet, it will be handled later. */
4376 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4377 return -1;
4379 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4380 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4381 if (n_addr > n_target || n_addr + 1022 < n_target)
4383 /* Change the mova into a load.
4384 broken_move will then return true for it. */
4385 fixup_mova (new_mova);
4386 return 1;
4389 if (!(*num_mova)++)
4391 *first_mova = new_mova;
4392 return 2;
4394 if (!optimize
4395 || ((f_target
4396 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4397 >= n_target))
4398 return -1;
4400 (*num_mova)--;
4401 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4402 > n_target - n_addr)
4404 fixup_mova (*first_mova);
4405 return 0;
4407 else
4409 fixup_mova (new_mova);
4410 return 1;
4414 /* Find the last barrier from insn FROM which is close enough to hold the
4415 constant pool. If we can't find one, then create one near the end of
4416 the range. */
4418 static rtx
4419 find_barrier (int num_mova, rtx mova, rtx from)
4421 int count_si = 0;
4422 int count_hi = 0;
4423 int found_hi = 0;
4424 int found_si = 0;
4425 int found_di = 0;
4426 int hi_align = 2;
4427 int si_align = 2;
4428 int leading_mova = num_mova;
4429 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4430 int si_limit;
4431 int hi_limit;
4432 rtx orig = from;
4433 rtx last_got = NULL_RTX;
4434 rtx last_symoff = NULL_RTX;
4436 /* For HImode: range is 510, add 4 because pc counts from address of
4437 second instruction after this one, subtract 2 for the jump instruction
4438 that we may need to emit before the table, subtract 2 for the instruction
4439 that fills the jump delay slot (in very rare cases, reorg will take an
4440 instruction from after the constant pool or will leave the delay slot
4441 empty). This gives 510.
4442 For SImode: range is 1020, add 4 because pc counts from address of
4443 second instruction after this one, subtract 2 in case pc is 2 byte
4444 aligned, subtract 2 for the jump instruction that we may need to emit
4445 before the table, subtract 2 for the instruction that fills the jump
4446 delay slot. This gives 1018. */
4448 /* The branch will always be shortened now that the reference address for
4449 forward branches is the successor address, thus we need no longer make
4450 adjustments to the [sh]i_limit for -O0. */
4452 si_limit = 1018;
4453 hi_limit = 510;
4455 while (from && count_si < si_limit && count_hi < hi_limit)
4457 int inc = get_attr_length (from);
4458 int new_align = 1;
4460 /* If this is a label that existed at the time of the compute_alignments
4461 call, determine the alignment. N.B. When find_barrier recurses for
4462 an out-of-reach mova, we might see labels at the start of previously
4463 inserted constant tables. */
4464 if (LABEL_P (from)
4465 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4467 if (optimize)
4468 new_align = 1 << label_to_alignment (from);
4469 else if (BARRIER_P (prev_nonnote_insn (from)))
4470 new_align = 1 << barrier_align (from);
4471 else
4472 new_align = 1;
4473 inc = 0;
4475 /* In case we are scanning a constant table because of recursion, check
4476 for explicit alignments. If the table is long, we might be forced
4477 to emit the new table in front of it; the length of the alignment
4478 might be the last straw. */
4479 else if (NONJUMP_INSN_P (from)
4480 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4481 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4482 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4483 /* When we find the end of a constant table, paste the new constant
4484 at the end. That is better than putting it in front because
4485 this way, we don't need extra alignment for adding a 4-byte-aligned
4486 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4487 else if (NONJUMP_INSN_P (from)
4488 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4489 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4490 return from;
4492 if (BARRIER_P (from))
4494 rtx next;
4496 found_barrier = from;
4498 /* If we are at the end of the function, or in front of an alignment
4499 instruction, we need not insert an extra alignment. We prefer
4500 this kind of barrier. */
4501 if (barrier_align (from) > 2)
4502 good_barrier = from;
4504 /* If we are at the end of a hot/cold block, dump the constants
4505 here. */
4506 next = NEXT_INSN (from);
4507 if (next
4508 && NOTE_P (next)
4509 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4510 break;
4513 if (broken_move (from))
4515 rtx pat, src, dst;
4516 enum machine_mode mode;
4518 pat = PATTERN (from);
4519 if (GET_CODE (pat) == PARALLEL)
4520 pat = XVECEXP (pat, 0, 0);
4521 src = SET_SRC (pat);
4522 dst = SET_DEST (pat);
4523 mode = GET_MODE (dst);
4525 /* GOT pcrelat setting comes in pair of
4526 mova .L8,r0
4527 mov.l .L8,r12
4528 instructions. (plus add r0,r12).
4529 Remember if we see one without the other. */
4530 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4531 last_got = last_got ? NULL_RTX : from;
4532 else if (PIC_ADDR_P (src))
4533 last_got = last_got ? NULL_RTX : from;
4535 /* We must explicitly check the mode, because sometimes the
4536 front end will generate code to load unsigned constants into
4537 HImode targets without properly sign extending them. */
4538 if (mode == HImode
4539 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4541 found_hi += 2;
4542 /* We put the short constants before the long constants, so
4543 we must count the length of short constants in the range
4544 for the long constants. */
4545 /* ??? This isn't optimal, but is easy to do. */
4546 si_limit -= 2;
4548 else
4550 /* We dump DF/DI constants before SF/SI ones, because
4551 the limit is the same, but the alignment requirements
4552 are higher. We may waste up to 4 additional bytes
4553 for alignment, and the DF/DI constant may have
4554 another SF/SI constant placed before it. */
4555 if (TARGET_SHCOMPACT
4556 && ! found_di
4557 && (mode == DFmode || mode == DImode))
4559 found_di = 1;
4560 si_limit -= 8;
4562 while (si_align > 2 && found_si + si_align - 2 > count_si)
4563 si_align >>= 1;
4564 if (found_si > count_si)
4565 count_si = found_si;
4566 found_si += GET_MODE_SIZE (mode);
4567 if (num_mova)
4568 si_limit -= GET_MODE_SIZE (mode);
4572 if (mova_p (from))
4574 switch (untangle_mova (&num_mova, &mova, from))
4576 case 1:
4577 if (flag_pic)
4579 rtx src = SET_SRC (PATTERN (from));
4580 if (GET_CODE (src) == CONST
4581 && GET_CODE (XEXP (src, 0)) == UNSPEC
4582 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4583 last_symoff = from;
4585 break;
4586 case 0: return find_barrier (0, 0, mova);
4587 case 2:
4589 leading_mova = 0;
4590 barrier_before_mova
4591 = good_barrier ? good_barrier : found_barrier;
4593 default: break;
4595 if (found_si > count_si)
4596 count_si = found_si;
4598 else if (JUMP_TABLE_DATA_P (from))
4600 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4601 || (num_mova
4602 && (prev_nonnote_insn (from)
4603 == XEXP (MOVA_LABELREF (mova), 0))))
4604 num_mova--;
4605 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4607 /* We have just passed the barrier in front of the
4608 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4609 the ADDR_DIFF_VEC is accessed as data, just like our pool
4610 constants, this is a good opportunity to accommodate what
4611 we have gathered so far.
4612 If we waited any longer, we could end up at a barrier in
4613 front of code, which gives worse cache usage for separated
4614 instruction / data caches. */
4615 good_barrier = found_barrier;
4616 break;
4618 else
4620 rtx body = PATTERN (from);
4621 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4624 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4625 else if (JUMP_P (from)
4626 && ! TARGET_SH2
4627 && ! TARGET_SMALLCODE)
4628 new_align = 4;
4630 /* There is a possibility that a bf is transformed into a bf/s by the
4631 delay slot scheduler. */
4632 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4633 && get_attr_type (from) == TYPE_CBRANCH
4634 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4635 inc += 2;
4637 if (found_si)
4639 count_si += inc;
4640 if (new_align > si_align)
4642 si_limit -= (count_si - 1) & (new_align - si_align);
4643 si_align = new_align;
4645 count_si = (count_si + new_align - 1) & -new_align;
4647 if (found_hi)
4649 count_hi += inc;
4650 if (new_align > hi_align)
4652 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4653 hi_align = new_align;
4655 count_hi = (count_hi + new_align - 1) & -new_align;
4657 from = NEXT_INSN (from);
4660 if (num_mova)
4662 if (leading_mova)
4664 /* Try as we might, the leading mova is out of range. Change
4665 it into a load (which will become a pcload) and retry. */
4666 fixup_mova (mova);
4667 return find_barrier (0, 0, mova);
4669 else
4671 /* Insert the constant pool table before the mova instruction,
4672 to prevent the mova label reference from going out of range. */
4673 from = mova;
4674 good_barrier = found_barrier = barrier_before_mova;
4678 if (found_barrier)
4680 if (good_barrier && next_real_insn (found_barrier))
4681 found_barrier = good_barrier;
4683 else
4685 /* We didn't find a barrier in time to dump our stuff,
4686 so we'll make one. */
4687 rtx label = gen_label_rtx ();
4689 /* Don't emit a constant table in the middle of insns for
4690 casesi_worker_2. This is a bit overkill but is enough
4691 because casesi_worker_2 wouldn't appear so frequently. */
4692 if (last_symoff)
4693 from = last_symoff;
4695 /* If we exceeded the range, then we must back up over the last
4696 instruction we looked at. Otherwise, we just need to undo the
4697 NEXT_INSN at the end of the loop. */
4698 if (PREV_INSN (from) != orig
4699 && (count_hi > hi_limit || count_si > si_limit))
4700 from = PREV_INSN (PREV_INSN (from));
4701 else
4702 from = PREV_INSN (from);
4704 /* Don't emit a constant table int the middle of global pointer setting,
4705 since that that would move the addressing base GOT into another table.
4706 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4707 in the pool anyway, so just move up the whole constant pool. */
4708 if (last_got)
4709 from = PREV_INSN (last_got);
4711 /* Don't insert the constant pool table at the position which
4712 may be the landing pad. */
4713 if (flag_exceptions
4714 && CALL_P (from)
4715 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4716 from = PREV_INSN (from);
4718 /* Walk back to be just before any jump or label.
4719 Putting it before a label reduces the number of times the branch
4720 around the constant pool table will be hit. Putting it before
4721 a jump makes it more likely that the bra delay slot will be
4722 filled. */
4723 while (NOTE_P (from) || JUMP_P (from)
4724 || LABEL_P (from))
4725 from = PREV_INSN (from);
4727 from = emit_jump_insn_after (gen_jump (label), from);
4728 JUMP_LABEL (from) = label;
4729 LABEL_NUSES (label) = 1;
4730 found_barrier = emit_barrier_after (from);
4731 emit_label_after (label, found_barrier);
4734 return found_barrier;
4737 /* If the instruction INSN is implemented by a special function, and we can
4738 positively find the register that is used to call the sfunc, and this
4739 register is not used anywhere else in this instruction - except as the
4740 destination of a set, return this register; else, return 0. */
4742 sfunc_uses_reg (rtx insn)
4744 int i;
4745 rtx pattern, part, reg_part, reg;
4747 if (!NONJUMP_INSN_P (insn))
4748 return 0;
4749 pattern = PATTERN (insn);
4750 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4751 return 0;
4753 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4755 part = XVECEXP (pattern, 0, i);
4756 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4757 reg_part = part;
4759 if (! reg_part)
4760 return 0;
4761 reg = XEXP (reg_part, 0);
4762 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4764 part = XVECEXP (pattern, 0, i);
4765 if (part == reg_part || GET_CODE (part) == CLOBBER)
4766 continue;
4767 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4768 && REG_P (SET_DEST (part)))
4769 ? SET_SRC (part) : part)))
4770 return 0;
4772 return reg;
4775 /* See if the only way in which INSN uses REG is by calling it, or by
4776 setting it while calling it. Set *SET to a SET rtx if the register
4777 is set by INSN. */
4779 static int
4780 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4782 rtx pattern, reg2;
4784 *set = NULL_RTX;
4786 reg2 = sfunc_uses_reg (insn);
4787 if (reg2 && REGNO (reg2) == REGNO (reg))
4789 pattern = single_set (insn);
4790 if (pattern
4791 && REG_P (SET_DEST (pattern))
4792 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4793 *set = pattern;
4794 return 0;
4796 if (!CALL_P (insn))
4798 /* We don't use rtx_equal_p because we don't care if the mode is
4799 different. */
4800 pattern = single_set (insn);
4801 if (pattern
4802 && REG_P (SET_DEST (pattern))
4803 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4805 rtx par, part;
4806 int i;
4808 *set = pattern;
4809 par = PATTERN (insn);
4810 if (GET_CODE (par) == PARALLEL)
4811 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4813 part = XVECEXP (par, 0, i);
4814 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4815 return 1;
4817 return reg_mentioned_p (reg, SET_SRC (pattern));
4820 return 1;
4823 pattern = PATTERN (insn);
4825 if (GET_CODE (pattern) == PARALLEL)
4827 int i;
4829 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4830 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4831 return 1;
4832 pattern = XVECEXP (pattern, 0, 0);
4835 if (GET_CODE (pattern) == SET)
4837 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4839 /* We don't use rtx_equal_p, because we don't care if the
4840 mode is different. */
4841 if (!REG_P (SET_DEST (pattern))
4842 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4843 return 1;
4845 *set = pattern;
4848 pattern = SET_SRC (pattern);
4851 if (GET_CODE (pattern) != CALL
4852 || !MEM_P (XEXP (pattern, 0))
4853 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4854 return 1;
4856 return 0;
4859 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4860 general registers. Bits 0..15 mean that the respective registers
4861 are used as inputs in the instruction. Bits 16..31 mean that the
4862 registers 0..15, respectively, are used as outputs, or are clobbered.
4863 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4865 regs_used (rtx x, int is_dest)
4867 enum rtx_code code;
4868 const char *fmt;
4869 int i, used = 0;
4871 if (! x)
4872 return used;
4873 code = GET_CODE (x);
4874 switch (code)
4876 case REG:
4877 if (REGNO (x) < 16)
4878 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4879 << (REGNO (x) + is_dest));
4880 return 0;
4881 case SUBREG:
4883 rtx y = SUBREG_REG (x);
4885 if (!REG_P (y))
4886 break;
4887 if (REGNO (y) < 16)
4888 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4889 << (REGNO (y) +
4890 subreg_regno_offset (REGNO (y),
4891 GET_MODE (y),
4892 SUBREG_BYTE (x),
4893 GET_MODE (x)) + is_dest));
4894 return 0;
4896 case SET:
4897 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4898 case RETURN:
4899 /* If there was a return value, it must have been indicated with USE. */
4900 return 0x00ffff00;
4901 case CLOBBER:
4902 is_dest = 1;
4903 break;
4904 case MEM:
4905 is_dest = 0;
4906 break;
4907 case CALL:
4908 used |= 0x00ff00f0;
4909 break;
4910 default:
4911 break;
4914 fmt = GET_RTX_FORMAT (code);
4916 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4918 if (fmt[i] == 'E')
4920 register int j;
4921 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4922 used |= regs_used (XVECEXP (x, i, j), is_dest);
4924 else if (fmt[i] == 'e')
4925 used |= regs_used (XEXP (x, i), is_dest);
4927 return used;
4930 /* Create an instruction that prevents redirection of a conditional branch
4931 to the destination of the JUMP with address ADDR.
4932 If the branch needs to be implemented as an indirect jump, try to find
4933 a scratch register for it.
4934 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4935 If any preceding insn that doesn't fit into a delay slot is good enough,
4936 pass 1. Pass 2 if a definite blocking insn is needed.
4937 -1 is used internally to avoid deep recursion.
4938 If a blocking instruction is made or recognized, return it. */
4940 static rtx
4941 gen_block_redirect (rtx jump, int addr, int need_block)
4943 int dead = 0;
4944 rtx prev = prev_nonnote_insn (jump);
4945 rtx dest;
4947 /* First, check if we already have an instruction that satisfies our need. */
4948 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4950 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4951 return prev;
4952 if (GET_CODE (PATTERN (prev)) == USE
4953 || GET_CODE (PATTERN (prev)) == CLOBBER
4954 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4955 prev = jump;
4956 else if ((need_block &= ~1) < 0)
4957 return prev;
4958 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4959 need_block = 0;
4961 if (GET_CODE (PATTERN (jump)) == RETURN)
4963 if (! need_block)
4964 return prev;
4965 /* Reorg even does nasty things with return insns that cause branches
4966 to go out of range - see find_end_label and callers. */
4967 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4969 /* We can't use JUMP_LABEL here because it might be undefined
4970 when not optimizing. */
4971 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4972 /* If the branch is out of range, try to find a scratch register for it. */
4973 if (optimize
4974 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4975 > 4092 + 4098))
4977 rtx scan;
4978 /* Don't look for the stack pointer as a scratch register,
4979 it would cause trouble if an interrupt occurred. */
4980 unsigned attempt = 0x7fff, used;
4981 int jump_left = flag_expensive_optimizations + 1;
4983 /* It is likely that the most recent eligible instruction is wanted for
4984 the delay slot. Therefore, find out which registers it uses, and
4985 try to avoid using them. */
4987 for (scan = jump; (scan = PREV_INSN (scan)); )
4989 enum rtx_code code;
4991 if (INSN_DELETED_P (scan))
4992 continue;
4993 code = GET_CODE (scan);
4994 if (code == CODE_LABEL || code == JUMP_INSN)
4995 break;
4996 if (code == INSN
4997 && GET_CODE (PATTERN (scan)) != USE
4998 && GET_CODE (PATTERN (scan)) != CLOBBER
4999 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5001 attempt &= ~regs_used (PATTERN (scan), 0);
5002 break;
5005 for (used = dead = 0, scan = JUMP_LABEL (jump);
5006 (scan = NEXT_INSN (scan)); )
5008 enum rtx_code code;
5010 if (INSN_DELETED_P (scan))
5011 continue;
5012 code = GET_CODE (scan);
5013 if (INSN_P (scan))
5015 used |= regs_used (PATTERN (scan), 0);
5016 if (code == CALL_INSN)
5017 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5018 dead |= (used >> 16) & ~used;
5019 if (dead & attempt)
5021 dead &= attempt;
5022 break;
5024 if (code == JUMP_INSN)
5026 if (jump_left-- && simplejump_p (scan))
5027 scan = JUMP_LABEL (scan);
5028 else
5029 break;
5033 /* Mask out the stack pointer again, in case it was
5034 the only 'free' register we have found. */
5035 dead &= 0x7fff;
5037 /* If the immediate destination is still in range, check for possible
5038 threading with a jump beyond the delay slot insn.
5039 Don't check if we are called recursively; the jump has been or will be
5040 checked in a different invocation then. */
5042 else if (optimize && need_block >= 0)
5044 rtx next = next_active_insn (next_active_insn (dest));
5045 if (next && JUMP_P (next)
5046 && GET_CODE (PATTERN (next)) == SET
5047 && recog_memoized (next) == CODE_FOR_jump_compact)
5049 dest = JUMP_LABEL (next);
5050 if (dest
5051 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5052 > 4092 + 4098))
5053 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5057 if (dead)
5059 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5061 /* It would be nice if we could convert the jump into an indirect
5062 jump / far branch right now, and thus exposing all constituent
5063 instructions to further optimization. However, reorg uses
5064 simplejump_p to determine if there is an unconditional jump where
5065 it should try to schedule instructions from the target of the
5066 branch; simplejump_p fails for indirect jumps even if they have
5067 a JUMP_LABEL. */
5068 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5069 (reg, GEN_INT (unspec_bbr_uid++)),
5070 jump);
5071 /* ??? We would like this to have the scope of the jump, but that
5072 scope will change when a delay slot insn of an inner scope is added.
5073 Hence, after delay slot scheduling, we'll have to expect
5074 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5075 the jump. */
5077 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5078 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5079 return insn;
5081 else if (need_block)
5082 /* We can't use JUMP_LABEL here because it might be undefined
5083 when not optimizing. */
5084 return emit_insn_before (gen_block_branch_redirect
5085 (GEN_INT (unspec_bbr_uid++)),
5086 jump);
5087 return prev;
5090 #define CONDJUMP_MIN -252
5091 #define CONDJUMP_MAX 262
5092 struct far_branch
5094 /* A label (to be placed) in front of the jump
5095 that jumps to our ultimate destination. */
5096 rtx near_label;
5097 /* Where we are going to insert it if we cannot move the jump any farther,
5098 or the jump itself if we have picked up an existing jump. */
5099 rtx insert_place;
5100 /* The ultimate destination. */
5101 rtx far_label;
5102 struct far_branch *prev;
5103 /* If the branch has already been created, its address;
5104 else the address of its first prospective user. */
5105 int address;
5108 static void gen_far_branch (struct far_branch *);
5109 enum mdep_reorg_phase_e mdep_reorg_phase;
5110 static void
5111 gen_far_branch (struct far_branch *bp)
5113 rtx insn = bp->insert_place;
5114 rtx jump;
5115 rtx label = gen_label_rtx ();
5116 int ok;
5118 emit_label_after (label, insn);
5119 if (bp->far_label)
5121 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5122 LABEL_NUSES (bp->far_label)++;
5124 else
5125 jump = emit_jump_insn_after (gen_return (), insn);
5126 /* Emit a barrier so that reorg knows that any following instructions
5127 are not reachable via a fall-through path.
5128 But don't do this when not optimizing, since we wouldn't suppress the
5129 alignment for the barrier then, and could end up with out-of-range
5130 pc-relative loads. */
5131 if (optimize)
5132 emit_barrier_after (jump);
5133 emit_label_after (bp->near_label, insn);
5134 JUMP_LABEL (jump) = bp->far_label;
5135 ok = invert_jump (insn, label, 1);
5136 gcc_assert (ok);
5138 /* If we are branching around a jump (rather than a return), prevent
5139 reorg from using an insn from the jump target as the delay slot insn -
5140 when reorg did this, it pessimized code (we rather hide the delay slot)
5141 and it could cause branches to go out of range. */
5142 if (bp->far_label)
5143 (emit_insn_after
5144 (gen_stuff_delay_slot
5145 (GEN_INT (unspec_bbr_uid++),
5146 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5147 insn));
5148 /* Prevent reorg from undoing our splits. */
5149 gen_block_redirect (jump, bp->address += 2, 2);
5152 /* Fix up ADDR_DIFF_VECs. */
5153 void
5154 fixup_addr_diff_vecs (rtx first)
5156 rtx insn;
5158 for (insn = first; insn; insn = NEXT_INSN (insn))
5160 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5162 if (!JUMP_P (insn)
5163 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5164 continue;
5165 pat = PATTERN (insn);
5166 vec_lab = XEXP (XEXP (pat, 0), 0);
5168 /* Search the matching casesi_jump_2. */
5169 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5171 if (!JUMP_P (prev))
5172 continue;
5173 prevpat = PATTERN (prev);
5174 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5175 continue;
5176 x = XVECEXP (prevpat, 0, 1);
5177 if (GET_CODE (x) != USE)
5178 continue;
5179 x = XEXP (x, 0);
5180 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5181 break;
5183 /* FIXME: This is a bug in the optimizer, but it seems harmless
5184 to just avoid panicing. */
5185 if (!prev)
5186 continue;
5188 /* Emit the reference label of the braf where it belongs, right after
5189 the casesi_jump_2 (i.e. braf). */
5190 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5191 emit_label_after (braf_label, prev);
5193 /* Fix up the ADDR_DIF_VEC to be relative
5194 to the reference address of the braf. */
5195 XEXP (XEXP (pat, 0), 0) = braf_label;
5199 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5200 a barrier. Return the base 2 logarithm of the desired alignment. */
5202 barrier_align (rtx barrier_or_label)
5204 rtx next = next_real_insn (barrier_or_label), pat, prev;
5205 int slot, credit, jump_to_next = 0;
5207 if (! next)
5208 return 0;
5210 pat = PATTERN (next);
5212 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5213 return 2;
5215 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5216 /* This is a barrier in front of a constant table. */
5217 return 0;
5219 prev = prev_real_insn (barrier_or_label);
5220 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5222 pat = PATTERN (prev);
5223 /* If this is a very small table, we want to keep the alignment after
5224 the table to the minimum for proper code alignment. */
5225 return ((TARGET_SMALLCODE
5226 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5227 <= (unsigned) 1 << (CACHE_LOG - 2)))
5228 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5231 if (TARGET_SMALLCODE)
5232 return 0;
5234 if (! TARGET_SH2 || ! optimize)
5235 return align_jumps_log;
5237 /* When fixing up pcloads, a constant table might be inserted just before
5238 the basic block that ends with the barrier. Thus, we can't trust the
5239 instruction lengths before that. */
5240 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5242 /* Check if there is an immediately preceding branch to the insn beyond
5243 the barrier. We must weight the cost of discarding useful information
5244 from the current cache line when executing this branch and there is
5245 an alignment, against that of fetching unneeded insn in front of the
5246 branch target when there is no alignment. */
5248 /* There are two delay_slot cases to consider. One is the simple case
5249 where the preceding branch is to the insn beyond the barrier (simple
5250 delay slot filling), and the other is where the preceding branch has
5251 a delay slot that is a duplicate of the insn after the barrier
5252 (fill_eager_delay_slots) and the branch is to the insn after the insn
5253 after the barrier. */
5255 /* PREV is presumed to be the JUMP_INSN for the barrier under
5256 investigation. Skip to the insn before it. */
5257 prev = prev_real_insn (prev);
5259 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5260 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5261 prev = prev_real_insn (prev))
5263 jump_to_next = 0;
5264 if (GET_CODE (PATTERN (prev)) == USE
5265 || GET_CODE (PATTERN (prev)) == CLOBBER)
5266 continue;
5267 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5269 prev = XVECEXP (PATTERN (prev), 0, 1);
5270 if (INSN_UID (prev) == INSN_UID (next))
5272 /* Delay slot was filled with insn at jump target. */
5273 jump_to_next = 1;
5274 continue;
5278 if (slot &&
5279 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5280 slot = 0;
5281 credit -= get_attr_length (prev);
5283 if (prev
5284 && JUMP_P (prev)
5285 && JUMP_LABEL (prev))
5287 rtx x;
5288 if (jump_to_next
5289 || next_real_insn (JUMP_LABEL (prev)) == next
5290 /* If relax_delay_slots() decides NEXT was redundant
5291 with some previous instruction, it will have
5292 redirected PREV's jump to the following insn. */
5293 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5294 /* There is no upper bound on redundant instructions
5295 that might have been skipped, but we must not put an
5296 alignment where none had been before. */
5297 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5298 (INSN_P (x)
5299 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5300 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5301 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5303 rtx pat = PATTERN (prev);
5304 if (GET_CODE (pat) == PARALLEL)
5305 pat = XVECEXP (pat, 0, 0);
5306 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5307 return 0;
5312 return align_jumps_log;
5315 /* If we are inside a phony loop, almost any kind of label can turn up as the
5316 first one in the loop. Aligning a braf label causes incorrect switch
5317 destination addresses; we can detect braf labels because they are
5318 followed by a BARRIER.
5319 Applying loop alignment to small constant or switch tables is a waste
5320 of space, so we suppress this too. */
5322 sh_loop_align (rtx label)
5324 rtx next = label;
5327 next = next_nonnote_insn (next);
5328 while (next && LABEL_P (next));
5330 if (! next
5331 || ! INSN_P (next)
5332 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5333 || recog_memoized (next) == CODE_FOR_consttable_2)
5334 return 0;
5336 return align_loops_log;
5339 /* Do a final pass over the function, just before delayed branch
5340 scheduling. */
5342 static void
5343 sh_reorg (void)
5345 rtx first, insn, mova = NULL_RTX;
5346 int num_mova;
5347 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5348 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5350 first = get_insns ();
5351 max_labelno_before_reorg = max_label_num ();
5353 /* We must split call insns before introducing `mova's. If we're
5354 optimizing, they'll have already been split. Otherwise, make
5355 sure we don't split them too late. */
5356 if (! optimize)
5357 split_all_insns_noflow ();
5359 if (TARGET_SHMEDIA)
5360 return;
5362 /* If relaxing, generate pseudo-ops to associate function calls with
5363 the symbols they call. It does no harm to not generate these
5364 pseudo-ops. However, when we can generate them, it enables to
5365 linker to potentially relax the jsr to a bsr, and eliminate the
5366 register load and, possibly, the constant pool entry. */
5368 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5369 if (TARGET_RELAX)
5371 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5372 own purposes. This works because none of the remaining passes
5373 need to look at them.
5375 ??? But it may break in the future. We should use a machine
5376 dependent REG_NOTE, or some other approach entirely. */
5377 for (insn = first; insn; insn = NEXT_INSN (insn))
5379 if (INSN_P (insn))
5381 rtx note;
5383 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5384 NULL_RTX)) != 0)
5385 remove_note (insn, note);
5389 for (insn = first; insn; insn = NEXT_INSN (insn))
5391 rtx pattern, reg, link, set, scan, dies, label;
5392 int rescan = 0, foundinsn = 0;
5394 if (CALL_P (insn))
5396 pattern = PATTERN (insn);
5398 if (GET_CODE (pattern) == PARALLEL)
5399 pattern = XVECEXP (pattern, 0, 0);
5400 if (GET_CODE (pattern) == SET)
5401 pattern = SET_SRC (pattern);
5403 if (GET_CODE (pattern) != CALL
5404 || !MEM_P (XEXP (pattern, 0)))
5405 continue;
5407 reg = XEXP (XEXP (pattern, 0), 0);
5409 else
5411 reg = sfunc_uses_reg (insn);
5412 if (! reg)
5413 continue;
5416 if (!REG_P (reg))
5417 continue;
5419 /* Try scanning backward to find where the register is set. */
5420 link = NULL;
5421 for (scan = PREV_INSN (insn);
5422 scan && !LABEL_P (scan);
5423 scan = PREV_INSN (scan))
5425 if (! INSN_P (scan))
5426 continue;
5428 if (! reg_mentioned_p (reg, scan))
5429 continue;
5431 if (noncall_uses_reg (reg, scan, &set))
5432 break;
5434 if (set)
5436 link = scan;
5437 break;
5441 if (! link)
5442 continue;
5444 /* The register is set at LINK. */
5446 /* We can only optimize the function call if the register is
5447 being set to a symbol. In theory, we could sometimes
5448 optimize calls to a constant location, but the assembler
5449 and linker do not support that at present. */
5450 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5451 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5452 continue;
5454 /* Scan forward from LINK to the place where REG dies, and
5455 make sure that the only insns which use REG are
5456 themselves function calls. */
5458 /* ??? This doesn't work for call targets that were allocated
5459 by reload, since there may not be a REG_DEAD note for the
5460 register. */
5462 dies = NULL_RTX;
5463 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5465 rtx scanset;
5467 /* Don't try to trace forward past a CODE_LABEL if we haven't
5468 seen INSN yet. Ordinarily, we will only find the setting insn
5469 if it is in the same basic block. However,
5470 cross-jumping can insert code labels in between the load and
5471 the call, and can result in situations where a single call
5472 insn may have two targets depending on where we came from. */
5474 if (LABEL_P (scan) && ! foundinsn)
5475 break;
5477 if (! INSN_P (scan))
5478 continue;
5480 /* Don't try to trace forward past a JUMP. To optimize
5481 safely, we would have to check that all the
5482 instructions at the jump destination did not use REG. */
5484 if (JUMP_P (scan))
5485 break;
5487 if (! reg_mentioned_p (reg, scan))
5488 continue;
5490 if (noncall_uses_reg (reg, scan, &scanset))
5491 break;
5493 if (scan == insn)
5494 foundinsn = 1;
5496 if (scan != insn
5497 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5499 /* There is a function call to this register other
5500 than the one we are checking. If we optimize
5501 this call, we need to rescan again below. */
5502 rescan = 1;
5505 /* ??? We shouldn't have to worry about SCANSET here.
5506 We should just be able to check for a REG_DEAD note
5507 on a function call. However, the REG_DEAD notes are
5508 apparently not dependable around libcalls; c-torture
5509 execute/920501-2 is a test case. If SCANSET is set,
5510 then this insn sets the register, so it must have
5511 died earlier. Unfortunately, this will only handle
5512 the cases in which the register is, in fact, set in a
5513 later insn. */
5515 /* ??? We shouldn't have to use FOUNDINSN here.
5516 This dates back to when we used LOG_LINKS to find
5517 the most recent insn which sets the register. */
5519 if (foundinsn
5520 && (scanset
5521 || find_reg_note (scan, REG_DEAD, reg)))
5523 dies = scan;
5524 break;
5528 if (! dies)
5530 /* Either there was a branch, or some insn used REG
5531 other than as a function call address. */
5532 continue;
5535 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5536 on the insn which sets the register, and on each call insn
5537 which uses the register. In final_prescan_insn we look for
5538 the REG_LABEL_OPERAND notes, and output the appropriate label
5539 or pseudo-op. */
5541 label = gen_label_rtx ();
5542 add_reg_note (link, REG_LABEL_OPERAND, label);
5543 add_reg_note (insn, REG_LABEL_OPERAND, label);
5544 if (rescan)
5546 scan = link;
5549 rtx reg2;
5551 scan = NEXT_INSN (scan);
5552 if (scan != insn
5553 && ((CALL_P (scan)
5554 && reg_mentioned_p (reg, scan))
5555 || ((reg2 = sfunc_uses_reg (scan))
5556 && REGNO (reg2) == REGNO (reg))))
5557 add_reg_note (scan, REG_LABEL_OPERAND, label);
5559 while (scan != dies);
5564 if (TARGET_SH2)
5565 fixup_addr_diff_vecs (first);
5567 if (optimize)
5569 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5570 shorten_branches (first);
5573 /* Scan the function looking for move instructions which have to be
5574 changed to pc-relative loads and insert the literal tables. */
5575 label_ref_list_pool = create_alloc_pool ("label references list",
5576 sizeof (struct label_ref_list_d),
5577 30);
5578 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5579 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5581 if (mova_p (insn))
5583 /* ??? basic block reordering can move a switch table dispatch
5584 below the switch table. Check if that has happened.
5585 We only have the addresses available when optimizing; but then,
5586 this check shouldn't be needed when not optimizing. */
5587 if (!untangle_mova (&num_mova, &mova, insn))
5589 insn = mova;
5590 num_mova = 0;
5593 else if (JUMP_P (insn)
5594 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5595 && num_mova
5596 /* ??? loop invariant motion can also move a mova out of a
5597 loop. Since loop does this code motion anyway, maybe we
5598 should wrap UNSPEC_MOVA into a CONST, so that reload can
5599 move it back. */
5600 && ((num_mova > 1
5601 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5602 || (prev_nonnote_insn (insn)
5603 == XEXP (MOVA_LABELREF (mova), 0))))
5605 rtx scan;
5606 int total;
5608 num_mova--;
5610 /* Some code might have been inserted between the mova and
5611 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5612 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5613 total += get_attr_length (scan);
5615 /* range of mova is 1020, add 4 because pc counts from address of
5616 second instruction after this one, subtract 2 in case pc is 2
5617 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5618 cancels out with alignment effects of the mova itself. */
5619 if (total > 1022)
5621 /* Change the mova into a load, and restart scanning
5622 there. broken_move will then return true for mova. */
5623 fixup_mova (mova);
5624 insn = mova;
5627 if (broken_move (insn)
5628 || (NONJUMP_INSN_P (insn)
5629 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5631 rtx scan;
5632 /* Scan ahead looking for a barrier to stick the constant table
5633 behind. */
5634 rtx barrier = find_barrier (num_mova, mova, insn);
5635 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5636 int need_aligned_label = 0;
5638 if (num_mova && ! mova_p (mova))
5640 /* find_barrier had to change the first mova into a
5641 pcload; thus, we have to start with this new pcload. */
5642 insn = mova;
5643 num_mova = 0;
5645 /* Now find all the moves between the points and modify them. */
5646 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5648 if (LABEL_P (scan))
5649 last_float = 0;
5650 if (NONJUMP_INSN_P (scan)
5651 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5652 need_aligned_label = 1;
5653 if (broken_move (scan))
5655 rtx *patp = &PATTERN (scan), pat = *patp;
5656 rtx src, dst;
5657 rtx lab;
5658 rtx newsrc;
5659 enum machine_mode mode;
5661 if (GET_CODE (pat) == PARALLEL)
5662 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5663 src = SET_SRC (pat);
5664 dst = SET_DEST (pat);
5665 mode = GET_MODE (dst);
5667 if (mode == SImode && hi_const (src)
5668 && REGNO (dst) != FPUL_REG)
5670 int offset = 0;
5672 mode = HImode;
5673 while (GET_CODE (dst) == SUBREG)
5675 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5676 GET_MODE (SUBREG_REG (dst)),
5677 SUBREG_BYTE (dst),
5678 GET_MODE (dst));
5679 dst = SUBREG_REG (dst);
5681 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5683 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5685 /* This must be an insn that clobbers r0. */
5686 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5687 XVECLEN (PATTERN (scan), 0)
5688 - 1);
5689 rtx clobber = *clobberp;
5691 gcc_assert (GET_CODE (clobber) == CLOBBER
5692 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5694 if (last_float
5695 && reg_set_between_p (r0_rtx, last_float_move, scan))
5696 last_float = 0;
5697 if (last_float
5698 && TARGET_SHCOMPACT
5699 && GET_MODE_SIZE (mode) != 4
5700 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5701 last_float = 0;
5702 lab = add_constant (src, mode, last_float);
5703 if (lab)
5704 emit_insn_before (gen_mova (lab), scan);
5705 else
5707 /* There will be a REG_UNUSED note for r0 on
5708 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5709 lest reorg:mark_target_live_regs will not
5710 consider r0 to be used, and we end up with delay
5711 slot insn in front of SCAN that clobbers r0. */
5712 rtx note
5713 = find_regno_note (last_float_move, REG_UNUSED, 0);
5715 /* If we are not optimizing, then there may not be
5716 a note. */
5717 if (note)
5718 PUT_REG_NOTE_KIND (note, REG_INC);
5720 *last_float_addr = r0_inc_rtx;
5722 last_float_move = scan;
5723 last_float = src;
5724 newsrc = gen_const_mem (mode,
5725 (((TARGET_SH4 && ! TARGET_FMOVD)
5726 || REGNO (dst) == FPUL_REG)
5727 ? r0_inc_rtx
5728 : r0_rtx));
5729 last_float_addr = &XEXP (newsrc, 0);
5731 /* Remove the clobber of r0. */
5732 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5733 gen_rtx_SCRATCH (Pmode));
5735 /* This is a mova needing a label. Create it. */
5736 else if (GET_CODE (src) == UNSPEC
5737 && XINT (src, 1) == UNSPEC_MOVA
5738 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5740 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5741 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5742 newsrc = gen_rtx_UNSPEC (SImode,
5743 gen_rtvec (1, newsrc),
5744 UNSPEC_MOVA);
5746 else
5748 lab = add_constant (src, mode, 0);
5749 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5750 newsrc = gen_const_mem (mode, newsrc);
5752 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5753 INSN_CODE (scan) = -1;
5756 dump_table (need_aligned_label ? insn : 0, barrier);
5757 insn = barrier;
5760 free_alloc_pool (label_ref_list_pool);
5761 for (insn = first; insn; insn = NEXT_INSN (insn))
5762 PUT_MODE (insn, VOIDmode);
5764 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5765 INSN_ADDRESSES_FREE ();
5766 split_branches (first);
5768 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5769 also has an effect on the register that holds the address of the sfunc.
5770 Insert an extra dummy insn in front of each sfunc that pretends to
5771 use this register. */
5772 if (flag_delayed_branch)
5774 for (insn = first; insn; insn = NEXT_INSN (insn))
5776 rtx reg = sfunc_uses_reg (insn);
5778 if (! reg)
5779 continue;
5780 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5783 #if 0
5784 /* fpscr is not actually a user variable, but we pretend it is for the
5785 sake of the previous optimization passes, since we want it handled like
5786 one. However, we don't have any debugging information for it, so turn
5787 it into a non-user variable now. */
5788 if (TARGET_SH4)
5789 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5790 #endif
5791 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5795 get_dest_uid (rtx label, int max_uid)
5797 rtx dest = next_real_insn (label);
5798 int dest_uid;
5799 if (! dest)
5800 /* This can happen for an undefined label. */
5801 return 0;
5802 dest_uid = INSN_UID (dest);
5803 /* If this is a newly created branch redirection blocking instruction,
5804 we cannot index the branch_uid or insn_addresses arrays with its
5805 uid. But then, we won't need to, because the actual destination is
5806 the following branch. */
5807 while (dest_uid >= max_uid)
5809 dest = NEXT_INSN (dest);
5810 dest_uid = INSN_UID (dest);
5812 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5813 return 0;
5814 return dest_uid;
5817 /* Split condbranches that are out of range. Also add clobbers for
5818 scratch registers that are needed in far jumps.
5819 We do this before delay slot scheduling, so that it can take our
5820 newly created instructions into account. It also allows us to
5821 find branches with common targets more easily. */
5823 static void
5824 split_branches (rtx first)
5826 rtx insn;
5827 struct far_branch **uid_branch, *far_branch_list = 0;
5828 int max_uid = get_max_uid ();
5829 int ok;
5831 /* Find out which branches are out of range. */
5832 shorten_branches (first);
5834 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5835 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5837 for (insn = first; insn; insn = NEXT_INSN (insn))
5838 if (! INSN_P (insn))
5839 continue;
5840 else if (INSN_DELETED_P (insn))
5842 /* Shorten_branches would split this instruction again,
5843 so transform it into a note. */
5844 SET_INSN_DELETED (insn);
5846 else if (JUMP_P (insn)
5847 /* Don't mess with ADDR_DIFF_VEC */
5848 && (GET_CODE (PATTERN (insn)) == SET
5849 || GET_CODE (PATTERN (insn)) == RETURN))
5851 enum attr_type type = get_attr_type (insn);
5852 if (type == TYPE_CBRANCH)
5854 rtx next, beyond;
5856 if (get_attr_length (insn) > 4)
5858 rtx src = SET_SRC (PATTERN (insn));
5859 rtx olabel = XEXP (XEXP (src, 1), 0);
5860 int addr = INSN_ADDRESSES (INSN_UID (insn));
5861 rtx label = 0;
5862 int dest_uid = get_dest_uid (olabel, max_uid);
5863 struct far_branch *bp = uid_branch[dest_uid];
5865 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5866 the label if the LABEL_NUSES count drops to zero. There is
5867 always a jump_optimize pass that sets these values, but it
5868 proceeds to delete unreferenced code, and then if not
5869 optimizing, to un-delete the deleted instructions, thus
5870 leaving labels with too low uses counts. */
5871 if (! optimize)
5873 JUMP_LABEL (insn) = olabel;
5874 LABEL_NUSES (olabel)++;
5876 if (! bp)
5878 bp = (struct far_branch *) alloca (sizeof *bp);
5879 uid_branch[dest_uid] = bp;
5880 bp->prev = far_branch_list;
5881 far_branch_list = bp;
5882 bp->far_label
5883 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5884 LABEL_NUSES (bp->far_label)++;
5886 else
5888 label = bp->near_label;
5889 if (! label && bp->address - addr >= CONDJUMP_MIN)
5891 rtx block = bp->insert_place;
5893 if (GET_CODE (PATTERN (block)) == RETURN)
5894 block = PREV_INSN (block);
5895 else
5896 block = gen_block_redirect (block,
5897 bp->address, 2);
5898 label = emit_label_after (gen_label_rtx (),
5899 PREV_INSN (block));
5900 bp->near_label = label;
5902 else if (label && ! NEXT_INSN (label))
5904 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5905 bp->insert_place = insn;
5906 else
5907 gen_far_branch (bp);
5910 if (! label
5911 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5913 bp->near_label = label = gen_label_rtx ();
5914 bp->insert_place = insn;
5915 bp->address = addr;
5917 ok = redirect_jump (insn, label, 0);
5918 gcc_assert (ok);
5920 else
5922 /* get_attr_length (insn) == 2 */
5923 /* Check if we have a pattern where reorg wants to redirect
5924 the branch to a label from an unconditional branch that
5925 is too far away. */
5926 /* We can't use JUMP_LABEL here because it might be undefined
5927 when not optimizing. */
5928 /* A syntax error might cause beyond to be NULL_RTX. */
5929 beyond
5930 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5931 0));
5933 if (beyond
5934 && (JUMP_P (beyond)
5935 || ((beyond = next_active_insn (beyond))
5936 && JUMP_P (beyond)))
5937 && GET_CODE (PATTERN (beyond)) == SET
5938 && recog_memoized (beyond) == CODE_FOR_jump_compact
5939 && ((INSN_ADDRESSES
5940 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5941 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5942 > 252 + 258 + 2))
5943 gen_block_redirect (beyond,
5944 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5947 next = next_active_insn (insn);
5949 if (next
5950 && (JUMP_P (next)
5951 || ((next = next_active_insn (next))
5952 && JUMP_P (next)))
5953 && GET_CODE (PATTERN (next)) == SET
5954 && recog_memoized (next) == CODE_FOR_jump_compact
5955 && ((INSN_ADDRESSES
5956 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5957 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5958 > 252 + 258 + 2))
5959 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5961 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5963 int addr = INSN_ADDRESSES (INSN_UID (insn));
5964 rtx far_label = 0;
5965 int dest_uid = 0;
5966 struct far_branch *bp;
5968 if (type == TYPE_JUMP)
5970 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5971 dest_uid = get_dest_uid (far_label, max_uid);
5972 if (! dest_uid)
5974 /* Parse errors can lead to labels outside
5975 the insn stream. */
5976 if (! NEXT_INSN (far_label))
5977 continue;
5979 if (! optimize)
5981 JUMP_LABEL (insn) = far_label;
5982 LABEL_NUSES (far_label)++;
5984 redirect_jump (insn, NULL_RTX, 1);
5985 far_label = 0;
5988 bp = uid_branch[dest_uid];
5989 if (! bp)
5991 bp = (struct far_branch *) alloca (sizeof *bp);
5992 uid_branch[dest_uid] = bp;
5993 bp->prev = far_branch_list;
5994 far_branch_list = bp;
5995 bp->near_label = 0;
5996 bp->far_label = far_label;
5997 if (far_label)
5998 LABEL_NUSES (far_label)++;
6000 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6001 if (addr - bp->address <= CONDJUMP_MAX)
6002 emit_label_after (bp->near_label, PREV_INSN (insn));
6003 else
6005 gen_far_branch (bp);
6006 bp->near_label = 0;
6008 else
6009 bp->near_label = 0;
6010 bp->address = addr;
6011 bp->insert_place = insn;
6012 if (! far_label)
6013 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6014 else
6015 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6018 /* Generate all pending far branches,
6019 and free our references to the far labels. */
6020 while (far_branch_list)
6022 if (far_branch_list->near_label
6023 && ! NEXT_INSN (far_branch_list->near_label))
6024 gen_far_branch (far_branch_list);
6025 if (optimize
6026 && far_branch_list->far_label
6027 && ! --LABEL_NUSES (far_branch_list->far_label))
6028 delete_insn (far_branch_list->far_label);
6029 far_branch_list = far_branch_list->prev;
6032 /* Instruction length information is no longer valid due to the new
6033 instructions that have been generated. */
6034 init_insn_lengths ();
6037 /* Dump out instruction addresses, which is useful for debugging the
6038 constant pool table stuff.
6040 If relaxing, output the label and pseudo-ops used to link together
6041 calls and the instruction which set the registers. */
6043 /* ??? The addresses printed by this routine for insns are nonsense for
6044 insns which are inside of a sequence where none of the inner insns have
6045 variable length. This is because the second pass of shorten_branches
6046 does not bother to update them. */
6048 void
6049 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6050 int noperands ATTRIBUTE_UNUSED)
6052 if (TARGET_DUMPISIZE)
6053 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6055 if (TARGET_RELAX)
6057 rtx note;
6059 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6060 if (note)
6062 rtx pattern;
6064 pattern = PATTERN (insn);
6065 if (GET_CODE (pattern) == PARALLEL)
6066 pattern = XVECEXP (pattern, 0, 0);
6067 switch (GET_CODE (pattern))
6069 case SET:
6070 if (GET_CODE (SET_SRC (pattern)) != CALL
6071 && get_attr_type (insn) != TYPE_SFUNC)
6073 targetm.asm_out.internal_label
6074 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6075 break;
6077 /* else FALLTHROUGH */
6078 case CALL:
6079 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6080 CODE_LABEL_NUMBER (XEXP (note, 0)));
6081 break;
6083 default:
6084 gcc_unreachable ();
6090 /* Dump out any constants accumulated in the final pass. These will
6091 only be labels. */
6093 const char *
6094 output_jump_label_table (void)
6096 int i;
6098 if (pool_size)
6100 fprintf (asm_out_file, "\t.align 2\n");
6101 for (i = 0; i < pool_size; i++)
6103 pool_node *p = &pool_vector[i];
6105 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6106 CODE_LABEL_NUMBER (p->label));
6107 output_asm_insn (".long %O0", &p->value);
6109 pool_size = 0;
6112 return "";
6115 /* A full frame looks like:
6117 arg-5
6118 arg-4
6119 [ if current_function_anonymous_args
6120 arg-3
6121 arg-2
6122 arg-1
6123 arg-0 ]
6124 saved-fp
6125 saved-r10
6126 saved-r11
6127 saved-r12
6128 saved-pr
6129 local-n
6131 local-1
6132 local-0 <- fp points here. */
6134 /* Number of bytes pushed for anonymous args, used to pass information
6135 between expand_prologue and expand_epilogue. */
6137 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6138 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6139 for an epilogue and a negative value means that it's for a sibcall
6140 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6141 all the registers that are about to be restored, and hence dead. */
6143 static void
6144 output_stack_adjust (int size, rtx reg, int epilogue_p,
6145 HARD_REG_SET *live_regs_mask, bool frame_p)
6147 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6148 if (size)
6150 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6152 /* This test is bogus, as output_stack_adjust is used to re-align the
6153 stack. */
6154 #if 0
6155 gcc_assert (!(size % align));
6156 #endif
6158 if (CONST_OK_FOR_ADD (size))
6159 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6160 /* Try to do it with two partial adjustments; however, we must make
6161 sure that the stack is properly aligned at all times, in case
6162 an interrupt occurs between the two partial adjustments. */
6163 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6164 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6166 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6167 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6169 else
6171 rtx const_reg;
6172 rtx insn;
6173 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6174 int i;
6176 /* If TEMP is invalid, we could temporarily save a general
6177 register to MACL. However, there is currently no need
6178 to handle this case, so just die when we see it. */
6179 if (epilogue_p < 0
6180 || current_function_interrupt
6181 || ! call_really_used_regs[temp] || fixed_regs[temp])
6182 temp = -1;
6183 if (temp < 0 && ! current_function_interrupt
6184 && (TARGET_SHMEDIA || epilogue_p >= 0))
6186 HARD_REG_SET temps;
6187 COPY_HARD_REG_SET (temps, call_used_reg_set);
6188 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6189 if (epilogue_p > 0)
6191 int nreg = 0;
6192 if (crtl->return_rtx)
6194 enum machine_mode mode;
6195 mode = GET_MODE (crtl->return_rtx);
6196 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6197 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6199 for (i = 0; i < nreg; i++)
6200 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6201 if (crtl->calls_eh_return)
6203 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6204 for (i = 0; i <= 3; i++)
6205 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6208 if (TARGET_SHMEDIA && epilogue_p < 0)
6209 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6210 CLEAR_HARD_REG_BIT (temps, i);
6211 if (epilogue_p <= 0)
6213 for (i = FIRST_PARM_REG;
6214 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6215 CLEAR_HARD_REG_BIT (temps, i);
6216 if (cfun->static_chain_decl != NULL)
6217 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6219 temp = scavenge_reg (&temps);
6221 if (temp < 0 && live_regs_mask)
6223 HARD_REG_SET temps;
6225 COPY_HARD_REG_SET (temps, *live_regs_mask);
6226 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6227 temp = scavenge_reg (&temps);
6229 if (temp < 0)
6231 rtx adj_reg, tmp_reg, mem;
6233 /* If we reached here, the most likely case is the (sibcall)
6234 epilogue for non SHmedia. Put a special push/pop sequence
6235 for such case as the last resort. This looks lengthy but
6236 would not be problem because it seems to be very
6237 rare. */
6239 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6242 /* ??? There is still the slight possibility that r4 or
6243 r5 have been reserved as fixed registers or assigned
6244 as global registers, and they change during an
6245 interrupt. There are possible ways to handle this:
6247 - If we are adjusting the frame pointer (r14), we can do
6248 with a single temp register and an ordinary push / pop
6249 on the stack.
6250 - Grab any call-used or call-saved registers (i.e. not
6251 fixed or globals) for the temps we need. We might
6252 also grab r14 if we are adjusting the stack pointer.
6253 If we can't find enough available registers, issue
6254 a diagnostic and die - the user must have reserved
6255 way too many registers.
6256 But since all this is rather unlikely to happen and
6257 would require extra testing, we just die if r4 / r5
6258 are not available. */
6259 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6260 && !global_regs[4] && !global_regs[5]);
6262 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6263 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6264 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6265 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6266 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6267 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6268 emit_move_insn (mem, tmp_reg);
6269 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6270 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6271 emit_move_insn (mem, tmp_reg);
6272 emit_move_insn (reg, adj_reg);
6273 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6274 emit_move_insn (adj_reg, mem);
6275 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6276 emit_move_insn (tmp_reg, mem);
6277 /* Tell flow the insns that pop r4/r5 aren't dead. */
6278 emit_use (tmp_reg);
6279 emit_use (adj_reg);
6280 return;
6282 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6284 /* If SIZE is negative, subtract the positive value.
6285 This sometimes allows a constant pool entry to be shared
6286 between prologue and epilogue code. */
6287 if (size < 0)
6289 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6290 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6292 else
6294 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6295 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6297 if (! epilogue_p)
6298 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6299 gen_rtx_SET (VOIDmode, reg,
6300 gen_rtx_PLUS (SImode, reg,
6301 GEN_INT (size))));
6306 static rtx
6307 frame_insn (rtx x)
6309 x = emit_insn (x);
6310 RTX_FRAME_RELATED_P (x) = 1;
6311 return x;
6314 /* Output RTL to push register RN onto the stack. */
6316 static rtx
6317 push (int rn)
6319 rtx x;
6320 if (rn == FPUL_REG)
6321 x = gen_push_fpul ();
6322 else if (rn == FPSCR_REG)
6323 x = gen_push_fpscr ();
6324 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6325 && FP_OR_XD_REGISTER_P (rn))
6327 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6328 return NULL_RTX;
6329 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6331 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6332 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6333 else
6334 x = gen_push (gen_rtx_REG (SImode, rn));
6336 x = frame_insn (x);
6337 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6338 return x;
6341 /* Output RTL to pop register RN from the stack. */
6343 static void
6344 pop (int rn)
6346 rtx x;
6347 if (rn == FPUL_REG)
6348 x = gen_pop_fpul ();
6349 else if (rn == FPSCR_REG)
6350 x = gen_pop_fpscr ();
6351 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6352 && FP_OR_XD_REGISTER_P (rn))
6354 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6355 return;
6356 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6358 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6359 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6360 else
6361 x = gen_pop (gen_rtx_REG (SImode, rn));
6363 x = emit_insn (x);
6364 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6367 /* Generate code to push the regs specified in the mask. */
6369 static void
6370 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6372 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6373 int skip_fpscr = 0;
6375 /* Push PR last; this gives better latencies after the prologue, and
6376 candidates for the return delay slot when there are no general
6377 registers pushed. */
6378 for (; i < FIRST_PSEUDO_REGISTER; i++)
6380 /* If this is an interrupt handler, and the SZ bit varies,
6381 and we have to push any floating point register, we need
6382 to switch to the correct precision first. */
6383 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6384 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6386 HARD_REG_SET unsaved;
6388 push (FPSCR_REG);
6389 COMPL_HARD_REG_SET (unsaved, *mask);
6390 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6391 skip_fpscr = 1;
6393 if (i != PR_REG
6394 && (i != FPSCR_REG || ! skip_fpscr)
6395 && TEST_HARD_REG_BIT (*mask, i))
6397 /* If the ISR has RESBANK attribute assigned, don't push any of
6398 the following registers - R0-R14, MACH, MACL and GBR. */
6399 if (! (sh_cfun_resbank_handler_p ()
6400 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6401 || i == MACH_REG
6402 || i == MACL_REG
6403 || i == GBR_REG)))
6404 push (i);
6408 /* Push banked registers last to improve delay slot opportunities. */
6409 if (interrupt_handler)
6411 bool use_movml = false;
6413 if (TARGET_SH2A)
6415 unsigned int count = 0;
6417 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6418 if (TEST_HARD_REG_BIT (*mask, i))
6419 count++;
6420 else
6421 break;
6423 /* Use movml when all banked registers are pushed. */
6424 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6425 use_movml = true;
6428 if (use_movml)
6430 rtx x, mem, reg, set;
6431 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6433 /* We must avoid scheduling multiple store insn with another
6434 insns. */
6435 emit_insn (gen_blockage ());
6436 x = gen_movml_push_banked (sp_reg);
6437 x = frame_insn (x);
6438 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6440 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6441 reg = gen_rtx_REG (SImode, i);
6442 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6445 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6446 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6447 emit_insn (gen_blockage ());
6449 else
6450 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6451 if (TEST_HARD_REG_BIT (*mask, i))
6452 push (i);
6455 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6456 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6457 push (PR_REG);
6460 /* Calculate how much extra space is needed to save all callee-saved
6461 target registers.
6462 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6464 static int
6465 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6467 int reg;
6468 int stack_space = 0;
6469 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6471 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6472 if ((! call_really_used_regs[reg] || interrupt_handler)
6473 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6474 /* Leave space to save this target register on the stack,
6475 in case target register allocation wants to use it. */
6476 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6477 return stack_space;
6480 /* Decide whether we should reserve space for callee-save target registers,
6481 in case target register allocation wants to use them. REGS_SAVED is
6482 the space, in bytes, that is already required for register saves.
6483 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6485 static int
6486 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6487 HARD_REG_SET *live_regs_mask)
6489 if (optimize_size)
6490 return 0;
6491 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6494 /* Decide how much space to reserve for callee-save target registers
6495 in case target register allocation wants to use them.
6496 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6498 static int
6499 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6501 if (shmedia_space_reserved_for_target_registers)
6502 return shmedia_target_regs_stack_space (live_regs_mask);
6503 else
6504 return 0;
6507 /* Work out the registers which need to be saved, both as a mask and a
6508 count of saved words. Return the count.
6510 If doing a pragma interrupt function, then push all regs used by the
6511 function, and if we call another function (we can tell by looking at PR),
6512 make sure that all the regs it clobbers are safe too. */
6514 static int
6515 calc_live_regs (HARD_REG_SET *live_regs_mask)
6517 unsigned int reg;
6518 int count;
6519 tree attrs;
6520 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6521 bool nosave_low_regs;
6522 int pr_live, has_call;
6524 attrs = DECL_ATTRIBUTES (current_function_decl);
6525 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6526 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6527 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6528 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6530 CLEAR_HARD_REG_SET (*live_regs_mask);
6531 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6532 && df_regs_ever_live_p (FPSCR_REG))
6533 target_flags &= ~MASK_FPU_SINGLE;
6534 /* If we can save a lot of saves by switching to double mode, do that. */
6535 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6536 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6537 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6538 && (! call_really_used_regs[reg]
6539 || interrupt_handler)
6540 && ++count > 2)
6542 target_flags &= ~MASK_FPU_SINGLE;
6543 break;
6545 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6546 knows how to use it. That means the pseudo originally allocated for
6547 the initial value can become the PR_MEDIA_REG hard register, as seen for
6548 execute/20010122-1.c:test9. */
6549 if (TARGET_SHMEDIA)
6550 /* ??? this function is called from initial_elimination_offset, hence we
6551 can't use the result of sh_media_register_for_return here. */
6552 pr_live = sh_pr_n_sets ();
6553 else
6555 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6556 pr_live = (pr_initial
6557 ? (!REG_P (pr_initial)
6558 || REGNO (pr_initial) != (PR_REG))
6559 : df_regs_ever_live_p (PR_REG));
6560 /* For Shcompact, if not optimizing, we end up with a memory reference
6561 using the return address pointer for __builtin_return_address even
6562 though there is no actual need to put the PR register on the stack. */
6563 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6565 /* Force PR to be live if the prologue has to call the SHmedia
6566 argument decoder or register saver. */
6567 if (TARGET_SHCOMPACT
6568 && ((crtl->args.info.call_cookie
6569 & ~ CALL_COOKIE_RET_TRAMP (1))
6570 || crtl->saves_all_registers))
6571 pr_live = 1;
6572 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6573 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6575 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6576 ? pr_live
6577 : interrupt_handler
6578 ? (/* Need to save all the regs ever live. */
6579 (df_regs_ever_live_p (reg)
6580 || (call_really_used_regs[reg]
6581 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6582 || reg == PIC_OFFSET_TABLE_REGNUM)
6583 && has_call)
6584 || (TARGET_SHMEDIA && has_call
6585 && REGISTER_NATURAL_MODE (reg) == SImode
6586 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6587 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6588 && reg != RETURN_ADDRESS_POINTER_REGNUM
6589 && reg != T_REG && reg != GBR_REG
6590 /* Push fpscr only on targets which have FPU */
6591 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6592 : (/* Only push those regs which are used and need to be saved. */
6593 (TARGET_SHCOMPACT
6594 && flag_pic
6595 && crtl->args.info.call_cookie
6596 && reg == PIC_OFFSET_TABLE_REGNUM)
6597 || (df_regs_ever_live_p (reg)
6598 && ((!call_really_used_regs[reg]
6599 && !(reg != PIC_OFFSET_TABLE_REGNUM
6600 && fixed_regs[reg] && call_used_regs[reg]))
6601 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6602 || (crtl->calls_eh_return
6603 && (reg == EH_RETURN_DATA_REGNO (0)
6604 || reg == EH_RETURN_DATA_REGNO (1)
6605 || reg == EH_RETURN_DATA_REGNO (2)
6606 || reg == EH_RETURN_DATA_REGNO (3)))
6607 || ((reg == MACL_REG || reg == MACH_REG)
6608 && df_regs_ever_live_p (reg)
6609 && sh_cfun_attr_renesas_p ())
6612 SET_HARD_REG_BIT (*live_regs_mask, reg);
6613 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6615 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6616 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6618 if (FP_REGISTER_P (reg))
6620 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6622 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6623 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6626 else if (XD_REGISTER_P (reg))
6628 /* Must switch to double mode to access these registers. */
6629 target_flags &= ~MASK_FPU_SINGLE;
6633 if (nosave_low_regs && reg == R8_REG)
6634 break;
6636 /* If we have a target register optimization pass after prologue / epilogue
6637 threading, we need to assume all target registers will be live even if
6638 they aren't now. */
6639 if (flag_branch_target_load_optimize2
6640 && TARGET_SAVE_ALL_TARGET_REGS
6641 && shmedia_space_reserved_for_target_registers)
6642 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6643 if ((! call_really_used_regs[reg] || interrupt_handler)
6644 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6646 SET_HARD_REG_BIT (*live_regs_mask, reg);
6647 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6649 /* If this is an interrupt handler, we don't have any call-clobbered
6650 registers we can conveniently use for target register save/restore.
6651 Make sure we save at least one general purpose register when we need
6652 to save target registers. */
6653 if (interrupt_handler
6654 && hard_reg_set_intersect_p (*live_regs_mask,
6655 reg_class_contents[TARGET_REGS])
6656 && ! hard_reg_set_intersect_p (*live_regs_mask,
6657 reg_class_contents[GENERAL_REGS]))
6659 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6660 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6663 return count;
6666 /* Code to generate prologue and epilogue sequences */
6668 /* PUSHED is the number of bytes that are being pushed on the
6669 stack for register saves. Return the frame size, padded
6670 appropriately so that the stack stays properly aligned. */
6671 static HOST_WIDE_INT
6672 rounded_frame_size (int pushed)
6674 HOST_WIDE_INT size = get_frame_size ();
6675 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6677 if (ACCUMULATE_OUTGOING_ARGS)
6678 size += crtl->outgoing_args_size;
6680 return ((size + pushed + align - 1) & -align) - pushed;
6683 /* Choose a call-clobbered target-branch register that remains
6684 unchanged along the whole function. We set it up as the return
6685 value in the prologue. */
6687 sh_media_register_for_return (void)
6689 int regno;
6690 int tr0_used;
6692 if (! current_function_is_leaf)
6693 return -1;
6694 if (lookup_attribute ("interrupt_handler",
6695 DECL_ATTRIBUTES (current_function_decl)))
6696 return -1;
6697 if (sh_cfun_interrupt_handler_p ())
6698 return -1;
6700 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6702 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6703 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6704 return regno;
6706 return -1;
6709 /* The maximum registers we need to save are:
6710 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6711 - 32 floating point registers (for each pair, we save none,
6712 one single precision value, or a double precision value).
6713 - 8 target registers
6714 - add 1 entry for a delimiter. */
6715 #define MAX_SAVED_REGS (62+32+8)
6717 typedef struct save_entry_s
6719 unsigned char reg;
6720 unsigned char mode;
6721 short offset;
6722 } save_entry;
6724 #define MAX_TEMPS 4
6726 /* There will be a delimiter entry with VOIDmode both at the start and the
6727 end of a filled in schedule. The end delimiter has the offset of the
6728 save with the smallest (i.e. most negative) offset. */
6729 typedef struct save_schedule_s
6731 save_entry entries[MAX_SAVED_REGS + 2];
6732 int temps[MAX_TEMPS+1];
6733 } save_schedule;
6735 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6736 use reverse order. Returns the last entry written to (not counting
6737 the delimiter). OFFSET_BASE is a number to be added to all offset
6738 entries. */
6740 static save_entry *
6741 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6742 int offset_base)
6744 int align, i;
6745 save_entry *entry = schedule->entries;
6746 int tmpx = 0;
6747 int offset;
6749 if (! current_function_interrupt)
6750 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6751 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6752 && ! FUNCTION_ARG_REGNO_P (i)
6753 && i != FIRST_RET_REG
6754 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6755 && ! (crtl->calls_eh_return
6756 && (i == EH_RETURN_STACKADJ_REGNO
6757 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6758 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6759 schedule->temps[tmpx++] = i;
6760 entry->reg = -1;
6761 entry->mode = VOIDmode;
6762 entry->offset = offset_base;
6763 entry++;
6764 /* We loop twice: first, we save 8-byte aligned registers in the
6765 higher addresses, that are known to be aligned. Then, we
6766 proceed to saving 32-bit registers that don't need 8-byte
6767 alignment.
6768 If this is an interrupt function, all registers that need saving
6769 need to be saved in full. moreover, we need to postpone saving
6770 target registers till we have saved some general purpose registers
6771 we can then use as scratch registers. */
6772 offset = offset_base;
6773 for (align = 1; align >= 0; align--)
6775 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6776 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6778 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6779 int reg = i;
6781 if (current_function_interrupt)
6783 if (TARGET_REGISTER_P (i))
6784 continue;
6785 if (GENERAL_REGISTER_P (i))
6786 mode = DImode;
6788 if (mode == SFmode && (i % 2) == 1
6789 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6790 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6792 mode = DFmode;
6793 i--;
6794 reg--;
6797 /* If we're doing the aligned pass and this is not aligned,
6798 or we're doing the unaligned pass and this is aligned,
6799 skip it. */
6800 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6801 != align)
6802 continue;
6804 if (current_function_interrupt
6805 && GENERAL_REGISTER_P (i)
6806 && tmpx < MAX_TEMPS)
6807 schedule->temps[tmpx++] = i;
6809 offset -= GET_MODE_SIZE (mode);
6810 entry->reg = i;
6811 entry->mode = mode;
6812 entry->offset = offset;
6813 entry++;
6815 if (align && current_function_interrupt)
6816 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6817 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6819 offset -= GET_MODE_SIZE (DImode);
6820 entry->reg = i;
6821 entry->mode = DImode;
6822 entry->offset = offset;
6823 entry++;
6826 entry->reg = -1;
6827 entry->mode = VOIDmode;
6828 entry->offset = offset;
6829 schedule->temps[tmpx] = -1;
6830 return entry - 1;
6833 void
6834 sh_expand_prologue (void)
6836 HARD_REG_SET live_regs_mask;
6837 int d, i;
6838 int d_rounding = 0;
6839 int save_flags = target_flags;
6840 int pretend_args;
6841 tree sp_switch_attr
6842 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6844 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6846 /* We have pretend args if we had an object sent partially in registers
6847 and partially on the stack, e.g. a large structure. */
6848 pretend_args = crtl->args.pretend_args_size;
6849 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6850 && (NPARM_REGS(SImode)
6851 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6852 pretend_args = 0;
6853 /* Dwarf2 module doesn't expect frame related insns here. */
6854 output_stack_adjust (-pretend_args
6855 - crtl->args.info.stack_regs * 8,
6856 stack_pointer_rtx, 0, NULL, false);
6858 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6859 /* We're going to use the PIC register to load the address of the
6860 incoming-argument decoder and/or of the return trampoline from
6861 the GOT, so make sure the PIC register is preserved and
6862 initialized. */
6863 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6865 if (TARGET_SHCOMPACT
6866 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6868 int reg;
6870 /* First, make all registers with incoming arguments that will
6871 be pushed onto the stack live, so that register renaming
6872 doesn't overwrite them. */
6873 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6874 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6875 >= NPARM_REGS (SImode) - reg)
6876 for (; reg < NPARM_REGS (SImode); reg++)
6877 emit_insn (gen_shcompact_preserve_incoming_args
6878 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6879 else if (CALL_COOKIE_INT_REG_GET
6880 (crtl->args.info.call_cookie, reg) == 1)
6881 emit_insn (gen_shcompact_preserve_incoming_args
6882 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6884 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6885 stack_pointer_rtx);
6886 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6887 GEN_INT (crtl->args.info.call_cookie));
6888 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6889 gen_rtx_REG (SImode, R0_REG));
6891 else if (TARGET_SHMEDIA)
6893 int tr = sh_media_register_for_return ();
6895 if (tr >= 0)
6896 emit_move_insn (gen_rtx_REG (DImode, tr),
6897 gen_rtx_REG (DImode, PR_MEDIA_REG));
6900 /* Emit the code for SETUP_VARARGS. */
6901 if (cfun->stdarg)
6903 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6905 /* Push arg regs as if they'd been provided by caller in stack. */
6906 for (i = 0; i < NPARM_REGS(SImode); i++)
6908 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6910 if (i >= (NPARM_REGS(SImode)
6911 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6913 break;
6914 push (rn);
6919 /* If we're supposed to switch stacks at function entry, do so now. */
6920 if (sp_switch_attr)
6922 rtx lab, newsrc;
6923 /* The argument specifies a variable holding the address of the
6924 stack the interrupt function should switch to/from at entry/exit. */
6925 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6926 const char *s
6927 = ggc_strdup (TREE_STRING_POINTER (arg));
6928 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6930 lab = add_constant (sp_switch, SImode, 0);
6931 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6932 newsrc = gen_const_mem (SImode, newsrc);
6934 emit_insn (gen_sp_switch_1 (newsrc));
6937 d = calc_live_regs (&live_regs_mask);
6938 /* ??? Maybe we could save some switching if we can move a mode switch
6939 that already happens to be at the function start into the prologue. */
6940 if (target_flags != save_flags && ! current_function_interrupt)
6941 emit_insn (gen_toggle_sz ());
6943 if (TARGET_SH5)
6945 int offset_base, offset;
6946 rtx r0 = NULL_RTX;
6947 int offset_in_r0 = -1;
6948 int sp_in_r0 = 0;
6949 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6950 int total_size, save_size;
6951 save_schedule schedule;
6952 save_entry *entry;
6953 int *tmp_pnt;
6955 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6956 && ! current_function_interrupt)
6957 r0 = gen_rtx_REG (Pmode, R0_REG);
6959 /* D is the actual number of bytes that we need for saving registers,
6960 however, in initial_elimination_offset we have committed to using
6961 an additional TREGS_SPACE amount of bytes - in order to keep both
6962 addresses to arguments supplied by the caller and local variables
6963 valid, we must keep this gap. Place it between the incoming
6964 arguments and the actually saved registers in a bid to optimize
6965 locality of reference. */
6966 total_size = d + tregs_space;
6967 total_size += rounded_frame_size (total_size);
6968 save_size = total_size - rounded_frame_size (d);
6969 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6970 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6971 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6973 /* If adjusting the stack in a single step costs nothing extra, do so.
6974 I.e. either if a single addi is enough, or we need a movi anyway,
6975 and we don't exceed the maximum offset range (the test for the
6976 latter is conservative for simplicity). */
6977 if (TARGET_SHMEDIA
6978 && (CONST_OK_FOR_I10 (-total_size)
6979 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6980 && total_size <= 2044)))
6981 d_rounding = total_size - save_size;
6983 offset_base = d + d_rounding;
6985 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6986 0, NULL, true);
6988 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6989 tmp_pnt = schedule.temps;
6990 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6992 enum machine_mode mode = (enum machine_mode) entry->mode;
6993 unsigned int reg = entry->reg;
6994 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6995 rtx orig_reg_rtx;
6997 offset = entry->offset;
6999 reg_rtx = gen_rtx_REG (mode, reg);
7001 mem_rtx = gen_frame_mem (mode,
7002 gen_rtx_PLUS (Pmode,
7003 stack_pointer_rtx,
7004 GEN_INT (offset)));
7006 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7008 gcc_assert (r0);
7009 mem_rtx = NULL_RTX;
7012 if (HAVE_PRE_DECREMENT
7013 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7014 || mem_rtx == NULL_RTX
7015 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7017 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7019 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7020 pre_dec = NULL_RTX;
7021 else
7023 mem_rtx = NULL_RTX;
7024 offset += GET_MODE_SIZE (mode);
7028 if (mem_rtx != NULL_RTX)
7029 goto addr_ok;
7031 if (offset_in_r0 == -1)
7033 emit_move_insn (r0, GEN_INT (offset));
7034 offset_in_r0 = offset;
7036 else if (offset != offset_in_r0)
7038 emit_move_insn (r0,
7039 gen_rtx_PLUS
7040 (Pmode, r0,
7041 GEN_INT (offset - offset_in_r0)));
7042 offset_in_r0 += offset - offset_in_r0;
7045 if (pre_dec != NULL_RTX)
7047 if (! sp_in_r0)
7049 emit_move_insn (r0,
7050 gen_rtx_PLUS
7051 (Pmode, r0, stack_pointer_rtx));
7052 sp_in_r0 = 1;
7055 offset -= GET_MODE_SIZE (mode);
7056 offset_in_r0 -= GET_MODE_SIZE (mode);
7058 mem_rtx = pre_dec;
7060 else if (sp_in_r0)
7061 mem_rtx = gen_frame_mem (mode, r0);
7062 else
7063 mem_rtx = gen_frame_mem (mode,
7064 gen_rtx_PLUS (Pmode,
7065 stack_pointer_rtx,
7066 r0));
7068 /* We must not use an r0-based address for target-branch
7069 registers or for special registers without pre-dec
7070 memory addresses, since we store their values in r0
7071 first. */
7072 gcc_assert (!TARGET_REGISTER_P (reg)
7073 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7074 || mem_rtx == pre_dec));
7076 addr_ok:
7077 orig_reg_rtx = reg_rtx;
7078 if (TARGET_REGISTER_P (reg)
7079 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7080 && mem_rtx != pre_dec))
7082 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7084 emit_move_insn (tmp_reg, reg_rtx);
7086 if (REGNO (tmp_reg) == R0_REG)
7088 offset_in_r0 = -1;
7089 sp_in_r0 = 0;
7090 gcc_assert (!refers_to_regno_p
7091 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7094 if (*++tmp_pnt <= 0)
7095 tmp_pnt = schedule.temps;
7097 reg_rtx = tmp_reg;
7100 rtx insn;
7102 /* Mark as interesting for dwarf cfi generator */
7103 insn = emit_move_insn (mem_rtx, reg_rtx);
7104 RTX_FRAME_RELATED_P (insn) = 1;
7105 /* If we use an intermediate register for the save, we can't
7106 describe this exactly in cfi as a copy of the to-be-saved
7107 register into the temporary register and then the temporary
7108 register on the stack, because the temporary register can
7109 have a different natural size than the to-be-saved register.
7110 Thus, we gloss over the intermediate copy and pretend we do
7111 a direct save from the to-be-saved register. */
7112 if (REGNO (reg_rtx) != reg)
7114 rtx set;
7116 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7117 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7120 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7122 rtx reg_rtx = gen_rtx_REG (mode, reg);
7123 rtx set;
7124 rtx mem_rtx = gen_frame_mem (mode,
7125 gen_rtx_PLUS (Pmode,
7126 stack_pointer_rtx,
7127 GEN_INT (offset)));
7129 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7130 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7135 gcc_assert (entry->offset == d_rounding);
7137 else
7138 push_regs (&live_regs_mask, current_function_interrupt);
7140 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7141 emit_insn (gen_GOTaddr2picreg ());
7143 if (SHMEDIA_REGS_STACK_ADJUST ())
7145 /* This must NOT go through the PLT, otherwise mach and macl
7146 may be clobbered. */
7147 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7148 (TARGET_FPU_ANY
7149 ? "__GCC_push_shmedia_regs"
7150 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7151 emit_insn (gen_shmedia_save_restore_regs_compact
7152 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7155 if (target_flags != save_flags && ! current_function_interrupt)
7156 emit_insn (gen_toggle_sz ());
7158 target_flags = save_flags;
7160 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7161 stack_pointer_rtx, 0, NULL, true);
7163 if (frame_pointer_needed)
7164 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7166 if (TARGET_SHCOMPACT
7167 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7169 /* This must NOT go through the PLT, otherwise mach and macl
7170 may be clobbered. */
7171 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7172 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7173 emit_insn (gen_shcompact_incoming_args ());
7177 void
7178 sh_expand_epilogue (bool sibcall_p)
7180 HARD_REG_SET live_regs_mask;
7181 int d, i;
7182 int d_rounding = 0;
7184 int save_flags = target_flags;
7185 int frame_size, save_size;
7186 int fpscr_deferred = 0;
7187 int e = sibcall_p ? -1 : 1;
7189 d = calc_live_regs (&live_regs_mask);
7191 save_size = d;
7192 frame_size = rounded_frame_size (d);
7194 if (TARGET_SH5)
7196 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7197 int total_size;
7198 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7199 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7200 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7202 total_size = d + tregs_space;
7203 total_size += rounded_frame_size (total_size);
7204 save_size = total_size - frame_size;
7206 /* If adjusting the stack in a single step costs nothing extra, do so.
7207 I.e. either if a single addi is enough, or we need a movi anyway,
7208 and we don't exceed the maximum offset range (the test for the
7209 latter is conservative for simplicity). */
7210 if (TARGET_SHMEDIA
7211 && ! frame_pointer_needed
7212 && (CONST_OK_FOR_I10 (total_size)
7213 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7214 && total_size <= 2044)))
7215 d_rounding = frame_size;
7217 frame_size -= d_rounding;
7220 if (frame_pointer_needed)
7222 /* We must avoid scheduling the epilogue with previous basic blocks.
7223 See PR/18032 and PR/40313. */
7224 emit_insn (gen_blockage ());
7225 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7226 &live_regs_mask, false);
7228 /* We must avoid moving the stack pointer adjustment past code
7229 which reads from the local frame, else an interrupt could
7230 occur after the SP adjustment and clobber data in the local
7231 frame. */
7232 emit_insn (gen_blockage ());
7233 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7235 else if (frame_size)
7237 /* We must avoid moving the stack pointer adjustment past code
7238 which reads from the local frame, else an interrupt could
7239 occur after the SP adjustment and clobber data in the local
7240 frame. */
7241 emit_insn (gen_blockage ());
7242 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7243 &live_regs_mask, false);
7246 if (SHMEDIA_REGS_STACK_ADJUST ())
7248 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7249 (TARGET_FPU_ANY
7250 ? "__GCC_pop_shmedia_regs"
7251 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7252 /* This must NOT go through the PLT, otherwise mach and macl
7253 may be clobbered. */
7254 emit_insn (gen_shmedia_save_restore_regs_compact
7255 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7258 /* Pop all the registers. */
7260 if (target_flags != save_flags && ! current_function_interrupt)
7261 emit_insn (gen_toggle_sz ());
7262 if (TARGET_SH5)
7264 int offset_base, offset;
7265 int offset_in_r0 = -1;
7266 int sp_in_r0 = 0;
7267 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7268 save_schedule schedule;
7269 save_entry *entry;
7270 int *tmp_pnt;
7272 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7273 offset_base = -entry[1].offset + d_rounding;
7274 tmp_pnt = schedule.temps;
7275 for (; entry->mode != VOIDmode; entry--)
7277 enum machine_mode mode = (enum machine_mode) entry->mode;
7278 int reg = entry->reg;
7279 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7281 offset = offset_base + entry->offset;
7282 reg_rtx = gen_rtx_REG (mode, reg);
7284 mem_rtx = gen_frame_mem (mode,
7285 gen_rtx_PLUS (Pmode,
7286 stack_pointer_rtx,
7287 GEN_INT (offset)));
7289 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7290 mem_rtx = NULL_RTX;
7292 if (HAVE_POST_INCREMENT
7293 && (offset == offset_in_r0
7294 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7295 && mem_rtx == NULL_RTX)
7296 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7298 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7300 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7301 post_inc = NULL_RTX;
7302 else
7303 mem_rtx = NULL_RTX;
7306 if (mem_rtx != NULL_RTX)
7307 goto addr_ok;
7309 if (offset_in_r0 == -1)
7311 emit_move_insn (r0, GEN_INT (offset));
7312 offset_in_r0 = offset;
7314 else if (offset != offset_in_r0)
7316 emit_move_insn (r0,
7317 gen_rtx_PLUS
7318 (Pmode, r0,
7319 GEN_INT (offset - offset_in_r0)));
7320 offset_in_r0 += offset - offset_in_r0;
7323 if (post_inc != NULL_RTX)
7325 if (! sp_in_r0)
7327 emit_move_insn (r0,
7328 gen_rtx_PLUS
7329 (Pmode, r0, stack_pointer_rtx));
7330 sp_in_r0 = 1;
7333 mem_rtx = post_inc;
7335 offset_in_r0 += GET_MODE_SIZE (mode);
7337 else if (sp_in_r0)
7338 mem_rtx = gen_frame_mem (mode, r0);
7339 else
7340 mem_rtx = gen_frame_mem (mode,
7341 gen_rtx_PLUS (Pmode,
7342 stack_pointer_rtx,
7343 r0));
7345 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7346 || mem_rtx == post_inc);
7348 addr_ok:
7349 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7350 && mem_rtx != post_inc)
7352 emit_move_insn (r0, mem_rtx);
7353 mem_rtx = r0;
7355 else if (TARGET_REGISTER_P (reg))
7357 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7359 /* Give the scheduler a bit of freedom by using up to
7360 MAX_TEMPS registers in a round-robin fashion. */
7361 emit_move_insn (tmp_reg, mem_rtx);
7362 mem_rtx = tmp_reg;
7363 if (*++tmp_pnt < 0)
7364 tmp_pnt = schedule.temps;
7367 emit_move_insn (reg_rtx, mem_rtx);
7370 gcc_assert (entry->offset + offset_base == d + d_rounding);
7372 else /* ! TARGET_SH5 */
7374 int last_reg;
7376 save_size = 0;
7377 /* For an ISR with RESBANK attribute assigned, don't pop PR
7378 register. */
7379 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7380 && !sh_cfun_resbank_handler_p ())
7382 if (!frame_pointer_needed)
7383 emit_insn (gen_blockage ());
7384 pop (PR_REG);
7387 /* Banked registers are popped first to avoid being scheduled in the
7388 delay slot. RTE switches banks before the ds instruction. */
7389 if (current_function_interrupt)
7391 bool use_movml = false;
7393 if (TARGET_SH2A)
7395 unsigned int count = 0;
7397 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7398 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7399 count++;
7400 else
7401 break;
7403 /* Use movml when all banked register are poped. */
7404 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7405 use_movml = true;
7408 if (use_movml)
7410 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7412 /* We must avoid scheduling multiple load insn with another
7413 insns. */
7414 emit_insn (gen_blockage ());
7415 emit_insn (gen_movml_pop_banked (sp_reg));
7416 emit_insn (gen_blockage ());
7418 else
7419 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7420 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7421 pop (i);
7423 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7425 else
7426 last_reg = FIRST_PSEUDO_REGISTER;
7428 for (i = 0; i < last_reg; i++)
7430 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7432 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7433 && hard_reg_set_intersect_p (live_regs_mask,
7434 reg_class_contents[DF_REGS]))
7435 fpscr_deferred = 1;
7436 /* For an ISR with RESBANK attribute assigned, don't pop
7437 following registers, R0-R14, MACH, MACL and GBR. */
7438 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7439 && ! (sh_cfun_resbank_handler_p ()
7440 && ((j >= FIRST_GENERAL_REG
7441 && j < LAST_GENERAL_REG)
7442 || j == MACH_REG
7443 || j == MACL_REG
7444 || j == GBR_REG)))
7445 pop (j);
7447 if (j == FIRST_FP_REG && fpscr_deferred)
7448 pop (FPSCR_REG);
7451 if (target_flags != save_flags && ! current_function_interrupt)
7452 emit_insn (gen_toggle_sz ());
7453 target_flags = save_flags;
7455 output_stack_adjust (crtl->args.pretend_args_size
7456 + save_size + d_rounding
7457 + crtl->args.info.stack_regs * 8,
7458 stack_pointer_rtx, e, NULL, false);
7460 if (crtl->calls_eh_return)
7461 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7462 EH_RETURN_STACKADJ_RTX));
7464 /* Switch back to the normal stack if necessary. */
7465 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7466 emit_insn (gen_sp_switch_2 ());
7468 /* Tell flow the insn that pops PR isn't dead. */
7469 /* PR_REG will never be live in SHmedia mode, and we don't need to
7470 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7471 by the return pattern. */
7472 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7473 emit_use (gen_rtx_REG (SImode, PR_REG));
7476 static int sh_need_epilogue_known = 0;
7479 sh_need_epilogue (void)
7481 if (! sh_need_epilogue_known)
7483 rtx epilogue;
7485 start_sequence ();
7486 sh_expand_epilogue (0);
7487 epilogue = get_insns ();
7488 end_sequence ();
7489 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7491 return sh_need_epilogue_known > 0;
7494 /* Emit code to change the current function's return address to RA.
7495 TEMP is available as a scratch register, if needed. */
7497 void
7498 sh_set_return_address (rtx ra, rtx tmp)
7500 HARD_REG_SET live_regs_mask;
7501 int d;
7502 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7503 int pr_offset;
7505 d = calc_live_regs (&live_regs_mask);
7507 /* If pr_reg isn't life, we can set it (or the register given in
7508 sh_media_register_for_return) directly. */
7509 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7511 rtx rr;
7513 if (TARGET_SHMEDIA)
7515 int rr_regno = sh_media_register_for_return ();
7517 if (rr_regno < 0)
7518 rr_regno = pr_reg;
7520 rr = gen_rtx_REG (DImode, rr_regno);
7522 else
7523 rr = gen_rtx_REG (SImode, pr_reg);
7525 emit_insn (GEN_MOV (rr, ra));
7526 /* Tell flow the register for return isn't dead. */
7527 emit_use (rr);
7528 return;
7531 if (TARGET_SH5)
7533 int offset;
7534 save_schedule schedule;
7535 save_entry *entry;
7537 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7538 offset = entry[1].offset;
7539 for (; entry->mode != VOIDmode; entry--)
7540 if (entry->reg == pr_reg)
7541 goto found;
7543 /* We can't find pr register. */
7544 gcc_unreachable ();
7546 found:
7547 offset = entry->offset - offset;
7548 pr_offset = (rounded_frame_size (d) + offset
7549 + SHMEDIA_REGS_STACK_ADJUST ());
7551 else
7552 pr_offset = rounded_frame_size (d);
7554 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7556 if (frame_pointer_needed)
7557 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7558 else
7559 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7561 tmp = gen_frame_mem (Pmode, tmp);
7562 emit_insn (GEN_MOV (tmp, ra));
7563 /* Tell this store isn't dead. */
7564 emit_use (tmp);
7567 /* Clear variables at function end. */
7569 static void
7570 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7571 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7573 sh_need_epilogue_known = 0;
7576 static rtx
7577 sh_builtin_saveregs (void)
7579 /* First unnamed integer register. */
7580 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7581 /* Number of integer registers we need to save. */
7582 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7583 /* First unnamed SFmode float reg */
7584 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7585 /* Number of SFmode float regs to save. */
7586 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7587 rtx regbuf, fpregs;
7588 int bufsize, regno;
7589 alias_set_type alias_set;
7591 if (TARGET_SH5)
7593 if (n_intregs)
7595 int pushregs = n_intregs;
7597 while (pushregs < NPARM_REGS (SImode) - 1
7598 && (CALL_COOKIE_INT_REG_GET
7599 (crtl->args.info.call_cookie,
7600 NPARM_REGS (SImode) - pushregs)
7601 == 1))
7603 crtl->args.info.call_cookie
7604 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7605 - pushregs, 1);
7606 pushregs++;
7609 if (pushregs == NPARM_REGS (SImode))
7610 crtl->args.info.call_cookie
7611 |= (CALL_COOKIE_INT_REG (0, 1)
7612 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7613 else
7614 crtl->args.info.call_cookie
7615 |= CALL_COOKIE_STACKSEQ (pushregs);
7617 crtl->args.pretend_args_size += 8 * n_intregs;
7619 if (TARGET_SHCOMPACT)
7620 return const0_rtx;
7623 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7625 error ("__builtin_saveregs not supported by this subtarget");
7626 return const0_rtx;
7629 if (TARGET_SHMEDIA)
7630 n_floatregs = 0;
7632 /* Allocate block of memory for the regs. */
7633 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7634 Or can assign_stack_local accept a 0 SIZE argument? */
7635 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7637 if (TARGET_SHMEDIA)
7638 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7639 else if (n_floatregs & 1)
7641 rtx addr;
7643 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7644 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7645 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7646 regbuf = change_address (regbuf, BLKmode, addr);
7648 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7650 rtx addr, mask;
7652 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7653 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7654 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7655 emit_insn (gen_andsi3 (addr, addr, mask));
7656 regbuf = change_address (regbuf, BLKmode, addr);
7658 else
7659 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7660 alias_set = get_varargs_alias_set ();
7661 set_mem_alias_set (regbuf, alias_set);
7663 /* Save int args.
7664 This is optimized to only save the regs that are necessary. Explicitly
7665 named args need not be saved. */
7666 if (n_intregs > 0)
7667 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7668 adjust_address (regbuf, BLKmode,
7669 n_floatregs * UNITS_PER_WORD),
7670 n_intregs);
7672 if (TARGET_SHMEDIA)
7673 /* Return the address of the regbuf. */
7674 return XEXP (regbuf, 0);
7676 /* Save float args.
7677 This is optimized to only save the regs that are necessary. Explicitly
7678 named args need not be saved.
7679 We explicitly build a pointer to the buffer because it halves the insn
7680 count when not optimizing (otherwise the pointer is built for each reg
7681 saved).
7682 We emit the moves in reverse order so that we can use predecrement. */
7684 fpregs = copy_to_mode_reg (Pmode,
7685 plus_constant (XEXP (regbuf, 0),
7686 n_floatregs * UNITS_PER_WORD));
7687 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7689 rtx mem;
7690 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7692 emit_insn (gen_addsi3 (fpregs, fpregs,
7693 GEN_INT (-2 * UNITS_PER_WORD)));
7694 mem = change_address (regbuf, DFmode, fpregs);
7695 emit_move_insn (mem,
7696 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7698 regno = first_floatreg;
7699 if (regno & 1)
7701 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7702 mem = change_address (regbuf, SFmode, fpregs);
7703 emit_move_insn (mem,
7704 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7705 - (TARGET_LITTLE_ENDIAN != 0)));
7708 else
7709 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7711 rtx mem;
7713 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7714 mem = change_address (regbuf, SFmode, fpregs);
7715 emit_move_insn (mem,
7716 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7719 /* Return the address of the regbuf. */
7720 return XEXP (regbuf, 0);
7723 /* Define the `__builtin_va_list' type for the ABI. */
7725 static tree
7726 sh_build_builtin_va_list (void)
7728 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7729 tree record, type_decl;
7731 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7732 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7733 return ptr_type_node;
7735 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7736 type_decl = build_decl (BUILTINS_LOCATION,
7737 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7739 f_next_o = build_decl (BUILTINS_LOCATION,
7740 FIELD_DECL, get_identifier ("__va_next_o"),
7741 ptr_type_node);
7742 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7743 FIELD_DECL,
7744 get_identifier ("__va_next_o_limit"),
7745 ptr_type_node);
7746 f_next_fp = build_decl (BUILTINS_LOCATION,
7747 FIELD_DECL, get_identifier ("__va_next_fp"),
7748 ptr_type_node);
7749 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7750 FIELD_DECL,
7751 get_identifier ("__va_next_fp_limit"),
7752 ptr_type_node);
7753 f_next_stack = build_decl (BUILTINS_LOCATION,
7754 FIELD_DECL, get_identifier ("__va_next_stack"),
7755 ptr_type_node);
7757 DECL_FIELD_CONTEXT (f_next_o) = record;
7758 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7759 DECL_FIELD_CONTEXT (f_next_fp) = record;
7760 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7761 DECL_FIELD_CONTEXT (f_next_stack) = record;
7763 TREE_CHAIN (record) = type_decl;
7764 TYPE_NAME (record) = type_decl;
7765 TYPE_FIELDS (record) = f_next_o;
7766 DECL_CHAIN (f_next_o) = f_next_o_limit;
7767 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7768 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7769 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7771 layout_type (record);
7773 return record;
7776 /* Implement `va_start' for varargs and stdarg. */
7778 static void
7779 sh_va_start (tree valist, rtx nextarg)
7781 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7782 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7783 tree t, u;
7784 int nfp, nint;
7786 if (TARGET_SH5)
7788 expand_builtin_saveregs ();
7789 std_expand_builtin_va_start (valist, nextarg);
7790 return;
7793 if ((! TARGET_SH2E && ! TARGET_SH4)
7794 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7796 std_expand_builtin_va_start (valist, nextarg);
7797 return;
7800 f_next_o = TYPE_FIELDS (va_list_type_node);
7801 f_next_o_limit = DECL_CHAIN (f_next_o);
7802 f_next_fp = DECL_CHAIN (f_next_o_limit);
7803 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7804 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7806 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7807 NULL_TREE);
7808 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7809 valist, f_next_o_limit, NULL_TREE);
7810 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7811 NULL_TREE);
7812 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7813 valist, f_next_fp_limit, NULL_TREE);
7814 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7815 valist, f_next_stack, NULL_TREE);
7817 /* Call __builtin_saveregs. */
7818 u = make_tree (sizetype, expand_builtin_saveregs ());
7819 u = fold_convert (ptr_type_node, u);
7820 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7821 TREE_SIDE_EFFECTS (t) = 1;
7822 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7824 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7825 if (nfp < 8)
7826 nfp = 8 - nfp;
7827 else
7828 nfp = 0;
7829 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7830 size_int (UNITS_PER_WORD * nfp));
7831 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7832 TREE_SIDE_EFFECTS (t) = 1;
7833 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7835 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7836 TREE_SIDE_EFFECTS (t) = 1;
7837 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7839 nint = crtl->args.info.arg_count[SH_ARG_INT];
7840 if (nint < 4)
7841 nint = 4 - nint;
7842 else
7843 nint = 0;
7844 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7845 size_int (UNITS_PER_WORD * nint));
7846 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7847 TREE_SIDE_EFFECTS (t) = 1;
7848 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7850 u = make_tree (ptr_type_node, nextarg);
7851 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7852 TREE_SIDE_EFFECTS (t) = 1;
7853 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7856 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7857 member, return it. */
7858 static tree
7859 find_sole_member (tree type)
7861 tree field, member = NULL_TREE;
7863 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7865 if (TREE_CODE (field) != FIELD_DECL)
7866 continue;
7867 if (!DECL_SIZE (field))
7868 return NULL_TREE;
7869 if (integer_zerop (DECL_SIZE (field)))
7870 continue;
7871 if (member)
7872 return NULL_TREE;
7873 member = field;
7875 return member;
7877 /* Implement `va_arg'. */
7879 static tree
7880 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7881 gimple_seq *post_p ATTRIBUTE_UNUSED)
7883 HOST_WIDE_INT size, rsize;
7884 tree tmp, pptr_type_node;
7885 tree addr, lab_over = NULL, result = NULL;
7886 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7887 tree eff_type;
7889 if (pass_by_ref)
7890 type = build_pointer_type (type);
7892 size = int_size_in_bytes (type);
7893 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7894 pptr_type_node = build_pointer_type (ptr_type_node);
7896 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7897 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7899 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7900 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7901 int pass_as_float;
7902 tree lab_false;
7903 tree member;
7905 f_next_o = TYPE_FIELDS (va_list_type_node);
7906 f_next_o_limit = DECL_CHAIN (f_next_o);
7907 f_next_fp = DECL_CHAIN (f_next_o_limit);
7908 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7909 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7911 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7912 NULL_TREE);
7913 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7914 valist, f_next_o_limit, NULL_TREE);
7915 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7916 valist, f_next_fp, NULL_TREE);
7917 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7918 valist, f_next_fp_limit, NULL_TREE);
7919 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7920 valist, f_next_stack, NULL_TREE);
7922 /* Structures with a single member with a distinct mode are passed
7923 like their member. This is relevant if the latter has a REAL_TYPE
7924 or COMPLEX_TYPE type. */
7925 eff_type = type;
7926 while (TREE_CODE (eff_type) == RECORD_TYPE
7927 && (member = find_sole_member (eff_type))
7928 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7929 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7930 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7932 tree field_type = TREE_TYPE (member);
7934 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7935 eff_type = field_type;
7936 else
7938 gcc_assert ((TYPE_ALIGN (eff_type)
7939 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7940 || (TYPE_ALIGN (eff_type)
7941 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7942 break;
7946 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7948 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7949 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7950 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7951 && size <= 16));
7953 else
7955 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7958 addr = create_tmp_var (pptr_type_node, NULL);
7959 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7960 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7962 valist = build_simple_mem_ref (addr);
7964 if (pass_as_float)
7966 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7967 tree cmp;
7968 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7970 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7971 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7973 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7974 tmp = next_fp_limit;
7975 if (size > 4 && !is_double)
7976 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7977 unshare_expr (tmp), size_int (4 - size));
7978 tmp = build2 (GE_EXPR, boolean_type_node,
7979 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7980 cmp = build3 (COND_EXPR, void_type_node, tmp,
7981 build1 (GOTO_EXPR, void_type_node,
7982 unshare_expr (lab_false)), NULL_TREE);
7983 if (!is_double)
7984 gimplify_and_add (cmp, pre_p);
7986 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7987 || (is_double || size == 16))
7989 tmp = fold_convert (sizetype, next_fp_tmp);
7990 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7991 size_int (UNITS_PER_WORD));
7992 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7993 unshare_expr (next_fp_tmp), tmp);
7994 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7996 if (is_double)
7997 gimplify_and_add (cmp, pre_p);
7999 #ifdef FUNCTION_ARG_SCmode_WART
8000 if (TYPE_MODE (eff_type) == SCmode
8001 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8003 tree subtype = TREE_TYPE (eff_type);
8004 tree real, imag;
8006 imag
8007 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8008 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8010 real
8011 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8012 real = get_initialized_tmp_var (real, pre_p, NULL);
8014 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8015 if (type != eff_type)
8016 result = build1 (VIEW_CONVERT_EXPR, type, result);
8017 result = get_initialized_tmp_var (result, pre_p, NULL);
8019 #endif /* FUNCTION_ARG_SCmode_WART */
8021 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8022 gimplify_and_add (tmp, pre_p);
8024 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8025 gimplify_and_add (tmp, pre_p);
8027 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8028 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8029 gimplify_assign (unshare_expr (next_fp_tmp),
8030 unshare_expr (valist), pre_p);
8032 gimplify_assign (unshare_expr (valist),
8033 unshare_expr (next_fp_tmp), post_p);
8034 valist = next_fp_tmp;
8036 else
8038 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8039 unshare_expr (next_o), size_int (rsize));
8040 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8041 unshare_expr (next_o_limit));
8042 tmp = build3 (COND_EXPR, void_type_node, tmp,
8043 build1 (GOTO_EXPR, void_type_node,
8044 unshare_expr (lab_false)),
8045 NULL_TREE);
8046 gimplify_and_add (tmp, pre_p);
8048 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8049 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8051 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8052 gimplify_and_add (tmp, pre_p);
8054 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8055 gimplify_and_add (tmp, pre_p);
8057 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8058 gimplify_assign (unshare_expr (next_o),
8059 unshare_expr (next_o_limit), pre_p);
8061 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8062 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8065 if (!result)
8067 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8068 gimplify_and_add (tmp, pre_p);
8072 /* ??? In va-sh.h, there had been code to make values larger than
8073 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8075 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8076 if (result)
8078 gimplify_assign (result, tmp, pre_p);
8079 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8080 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8081 gimplify_and_add (tmp, pre_p);
8083 else
8084 result = tmp;
8086 if (pass_by_ref)
8087 result = build_va_arg_indirect_ref (result);
8089 return result;
8092 /* 64 bit floating points memory transfers are paired single precision loads
8093 or store. So DWARF information needs fixing in little endian (unless
8094 PR=SZ=1 in FPSCR). */
8096 sh_dwarf_register_span (rtx reg)
8098 unsigned regno = REGNO (reg);
8100 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8101 return NULL_RTX;
8103 return
8104 gen_rtx_PARALLEL (VOIDmode,
8105 gen_rtvec (2,
8106 gen_rtx_REG (SFmode,
8107 DBX_REGISTER_NUMBER (regno+1)),
8108 gen_rtx_REG (SFmode,
8109 DBX_REGISTER_NUMBER (regno))));
8112 static enum machine_mode
8113 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8114 int *punsignedp, const_tree funtype,
8115 int for_return ATTRIBUTE_UNUSED)
8117 if (sh_promote_prototypes (funtype))
8118 return promote_mode (type, mode, punsignedp);
8119 else
8120 return mode;
8123 static bool
8124 sh_promote_prototypes (const_tree type)
8126 if (TARGET_HITACHI)
8127 return 0;
8128 if (! type)
8129 return 1;
8130 return ! sh_attr_renesas_p (type);
8133 /* Whether an argument must be passed by reference. On SHcompact, we
8134 pretend arguments wider than 32-bits that would have been passed in
8135 registers are passed by reference, so that an SHmedia trampoline
8136 loads them into the full 64-bits registers. */
8138 static int
8139 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8140 const_tree type, bool named)
8142 unsigned HOST_WIDE_INT size;
8144 if (type)
8145 size = int_size_in_bytes (type);
8146 else
8147 size = GET_MODE_SIZE (mode);
8149 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8150 && (!named
8151 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8152 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8153 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8154 && size > 4
8155 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8156 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8157 return size;
8158 else
8159 return 0;
8162 static bool
8163 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8164 const_tree type, bool named)
8166 if (targetm.calls.must_pass_in_stack (mode, type))
8167 return true;
8169 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8170 wants to know about pass-by-reference semantics for incoming
8171 arguments. */
8172 if (! cum)
8173 return false;
8175 if (TARGET_SHCOMPACT)
8177 cum->byref = shcompact_byref (cum, mode, type, named);
8178 return cum->byref != 0;
8181 return false;
8184 static bool
8185 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8186 const_tree type, bool named ATTRIBUTE_UNUSED)
8188 /* ??? How can it possibly be correct to return true only on the
8189 caller side of the equation? Is there someplace else in the
8190 sh backend that's magically producing the copies? */
8191 return (cum->outgoing
8192 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8193 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8196 static int
8197 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8198 tree type, bool named ATTRIBUTE_UNUSED)
8200 int words = 0;
8202 if (!TARGET_SH5
8203 && PASS_IN_REG_P (*cum, mode, type)
8204 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8205 && (ROUND_REG (*cum, mode)
8206 + (mode != BLKmode
8207 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8208 : ROUND_ADVANCE (int_size_in_bytes (type)))
8209 > NPARM_REGS (mode)))
8210 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8212 else if (!TARGET_SHCOMPACT
8213 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8214 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8216 return words * UNITS_PER_WORD;
8220 /* Define where to put the arguments to a function.
8221 Value is zero to push the argument on the stack,
8222 or a hard register in which to store the argument.
8224 MODE is the argument's machine mode.
8225 TYPE is the data type of the argument (as a tree).
8226 This is null for libcalls where that information may
8227 not be available.
8228 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8229 the preceding args and about the function being called.
8230 NAMED is nonzero if this argument is a named parameter
8231 (otherwise it is an extra parameter matching an ellipsis).
8233 On SH the first args are normally in registers
8234 and the rest are pushed. Any arg that starts within the first
8235 NPARM_REGS words is at least partially passed in a register unless
8236 its data type forbids. */
8240 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8241 tree type, int named)
8243 if (! TARGET_SH5 && mode == VOIDmode)
8244 return GEN_INT (ca->renesas_abi ? 1 : 0);
8246 if (! TARGET_SH5
8247 && PASS_IN_REG_P (*ca, mode, type)
8248 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8250 int regno;
8252 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8253 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8255 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8256 gen_rtx_REG (SFmode,
8257 BASE_ARG_REG (mode)
8258 + (ROUND_REG (*ca, mode) ^ 1)),
8259 const0_rtx);
8260 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8261 gen_rtx_REG (SFmode,
8262 BASE_ARG_REG (mode)
8263 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8264 GEN_INT (4));
8265 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8268 /* If the alignment of a DF value causes an SF register to be
8269 skipped, we will use that skipped register for the next SF
8270 value. */
8271 if ((TARGET_HITACHI || ca->renesas_abi)
8272 && ca->free_single_fp_reg
8273 && mode == SFmode)
8274 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8276 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8277 ^ (mode == SFmode && TARGET_SH4
8278 && TARGET_LITTLE_ENDIAN != 0
8279 && ! TARGET_HITACHI && ! ca->renesas_abi);
8280 return gen_rtx_REG (mode, regno);
8284 if (TARGET_SH5)
8286 if (mode == VOIDmode && TARGET_SHCOMPACT)
8287 return GEN_INT (ca->call_cookie);
8289 /* The following test assumes unnamed arguments are promoted to
8290 DFmode. */
8291 if (mode == SFmode && ca->free_single_fp_reg)
8292 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8294 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8295 && (named || ! ca->prototype_p)
8296 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8298 if (! ca->prototype_p && TARGET_SHMEDIA)
8299 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8301 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8302 FIRST_FP_PARM_REG
8303 + ca->arg_count[(int) SH_ARG_FLOAT]);
8306 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8307 && (! TARGET_SHCOMPACT
8308 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8309 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8310 type, named))))
8312 return gen_rtx_REG (mode, (FIRST_PARM_REG
8313 + ca->arg_count[(int) SH_ARG_INT]));
8316 return 0;
8319 return 0;
8322 /* Update the data in CUM to advance over an argument
8323 of mode MODE and data type TYPE.
8324 (TYPE is null for libcalls where that information may not be
8325 available.) */
8327 void
8328 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8329 tree type, int named)
8331 if (ca->force_mem)
8332 ca->force_mem = 0;
8333 else if (TARGET_SH5)
8335 tree type2 = (ca->byref && type
8336 ? TREE_TYPE (type)
8337 : type);
8338 enum machine_mode mode2 = (ca->byref && type
8339 ? TYPE_MODE (type2)
8340 : mode);
8341 int dwords = ((ca->byref
8342 ? ca->byref
8343 : mode2 == BLKmode
8344 ? int_size_in_bytes (type2)
8345 : GET_MODE_SIZE (mode2)) + 7) / 8;
8346 int numregs = MIN (dwords, NPARM_REGS (SImode)
8347 - ca->arg_count[(int) SH_ARG_INT]);
8349 if (numregs)
8351 ca->arg_count[(int) SH_ARG_INT] += numregs;
8352 if (TARGET_SHCOMPACT
8353 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8355 ca->call_cookie
8356 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8357 - numregs, 1);
8358 /* N.B. We want this also for outgoing. */
8359 ca->stack_regs += numregs;
8361 else if (ca->byref)
8363 if (! ca->outgoing)
8364 ca->stack_regs += numregs;
8365 ca->byref_regs += numregs;
8366 ca->byref = 0;
8368 ca->call_cookie
8369 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8370 - numregs, 2);
8371 while (--numregs);
8372 ca->call_cookie
8373 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8374 - 1, 1);
8376 else if (dwords > numregs)
8378 int pushregs = numregs;
8380 if (TARGET_SHCOMPACT)
8381 ca->stack_regs += numregs;
8382 while (pushregs < NPARM_REGS (SImode) - 1
8383 && (CALL_COOKIE_INT_REG_GET
8384 (ca->call_cookie,
8385 NPARM_REGS (SImode) - pushregs)
8386 == 1))
8388 ca->call_cookie
8389 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8390 - pushregs, 1);
8391 pushregs++;
8393 if (numregs == NPARM_REGS (SImode))
8394 ca->call_cookie
8395 |= CALL_COOKIE_INT_REG (0, 1)
8396 | CALL_COOKIE_STACKSEQ (numregs - 1);
8397 else
8398 ca->call_cookie
8399 |= CALL_COOKIE_STACKSEQ (numregs);
8402 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8403 && (named || ! ca->prototype_p))
8405 if (mode2 == SFmode && ca->free_single_fp_reg)
8406 ca->free_single_fp_reg = 0;
8407 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8408 < NPARM_REGS (SFmode))
8410 int numfpregs
8411 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8412 NPARM_REGS (SFmode)
8413 - ca->arg_count[(int) SH_ARG_FLOAT]);
8415 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8417 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8419 if (ca->outgoing && numregs > 0)
8422 ca->call_cookie
8423 |= (CALL_COOKIE_INT_REG
8424 (ca->arg_count[(int) SH_ARG_INT]
8425 - numregs + ((numfpregs - 2) / 2),
8426 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8427 - numfpregs) / 2));
8429 while (numfpregs -= 2);
8431 else if (mode2 == SFmode && (named)
8432 && (ca->arg_count[(int) SH_ARG_FLOAT]
8433 < NPARM_REGS (SFmode)))
8434 ca->free_single_fp_reg
8435 = FIRST_FP_PARM_REG - numfpregs
8436 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8439 return;
8442 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8444 /* Note that we've used the skipped register. */
8445 if (mode == SFmode && ca->free_single_fp_reg)
8447 ca->free_single_fp_reg = 0;
8448 return;
8450 /* When we have a DF after an SF, there's an SF register that get
8451 skipped in order to align the DF value. We note this skipped
8452 register, because the next SF value will use it, and not the
8453 SF that follows the DF. */
8454 if (mode == DFmode
8455 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8457 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8458 + BASE_ARG_REG (mode));
8462 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8463 || PASS_IN_REG_P (*ca, mode, type))
8464 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8465 = (ROUND_REG (*ca, mode)
8466 + (mode == BLKmode
8467 ? ROUND_ADVANCE (int_size_in_bytes (type))
8468 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8471 /* The Renesas calling convention doesn't quite fit into this scheme since
8472 the address is passed like an invisible argument, but one that is always
8473 passed in memory. */
8474 static rtx
8475 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8477 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8478 return 0;
8479 return gen_rtx_REG (Pmode, 2);
8482 /* Worker function for TARGET_FUNCTION_VALUE.
8484 For the SH, this is like LIBCALL_VALUE, except that we must change the
8485 mode like PROMOTE_MODE does.
8486 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8487 tested here has to be kept in sync with the one in explow.c:promote_mode.
8490 static rtx
8491 sh_function_value (const_tree valtype,
8492 const_tree fn_decl_or_type,
8493 bool outgoing ATTRIBUTE_UNUSED)
8495 if (fn_decl_or_type
8496 && !DECL_P (fn_decl_or_type))
8497 fn_decl_or_type = NULL;
8499 return gen_rtx_REG (
8500 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8501 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8502 && (TREE_CODE (valtype) == INTEGER_TYPE
8503 || TREE_CODE (valtype) == ENUMERAL_TYPE
8504 || TREE_CODE (valtype) == BOOLEAN_TYPE
8505 || TREE_CODE (valtype) == REAL_TYPE
8506 || TREE_CODE (valtype) == OFFSET_TYPE))
8507 && sh_promote_prototypes (fn_decl_or_type)
8508 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8509 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8512 /* Worker function for TARGET_LIBCALL_VALUE. */
8514 static rtx
8515 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8517 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8520 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8522 bool
8523 sh_function_value_regno_p (const unsigned int regno)
8525 return ((regno) == FIRST_RET_REG
8526 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8527 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8530 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8532 static bool
8533 sh_return_in_memory (const_tree type, const_tree fndecl)
8535 if (TARGET_SH5)
8537 if (TYPE_MODE (type) == BLKmode)
8538 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8539 else
8540 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8542 else
8544 return (TYPE_MODE (type) == BLKmode
8545 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8546 && TREE_CODE (type) == RECORD_TYPE));
8550 /* We actually emit the code in sh_expand_prologue. We used to use
8551 a static variable to flag that we need to emit this code, but that
8552 doesn't when inlining, when functions are deferred and then emitted
8553 later. Fortunately, we already have two flags that are part of struct
8554 function that tell if a function uses varargs or stdarg. */
8555 static void
8556 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8557 enum machine_mode mode,
8558 tree type,
8559 int *pretend_arg_size,
8560 int second_time ATTRIBUTE_UNUSED)
8562 gcc_assert (cfun->stdarg);
8563 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8565 int named_parm_regs, anon_parm_regs;
8567 named_parm_regs = (ROUND_REG (*ca, mode)
8568 + (mode == BLKmode
8569 ? ROUND_ADVANCE (int_size_in_bytes (type))
8570 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8571 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8572 if (anon_parm_regs > 0)
8573 *pretend_arg_size = anon_parm_regs * 4;
8577 static bool
8578 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8580 return TARGET_SH5;
8583 static bool
8584 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8586 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8590 /* Define the offset between two registers, one to be eliminated, and
8591 the other its replacement, at the start of a routine. */
8594 initial_elimination_offset (int from, int to)
8596 int regs_saved;
8597 int regs_saved_rounding = 0;
8598 int total_saved_regs_space;
8599 int total_auto_space;
8600 int save_flags = target_flags;
8601 int copy_flags;
8602 HARD_REG_SET live_regs_mask;
8604 shmedia_space_reserved_for_target_registers = false;
8605 regs_saved = calc_live_regs (&live_regs_mask);
8606 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8608 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8610 shmedia_space_reserved_for_target_registers = true;
8611 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8614 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8615 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8616 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8618 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8619 copy_flags = target_flags;
8620 target_flags = save_flags;
8622 total_saved_regs_space = regs_saved + regs_saved_rounding;
8624 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8625 return total_saved_regs_space + total_auto_space
8626 + crtl->args.info.byref_regs * 8;
8628 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8629 return total_saved_regs_space + total_auto_space
8630 + crtl->args.info.byref_regs * 8;
8632 /* Initial gap between fp and sp is 0. */
8633 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8634 return 0;
8636 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8637 return rounded_frame_size (0);
8639 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8640 return rounded_frame_size (0);
8642 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8643 && (to == HARD_FRAME_POINTER_REGNUM
8644 || to == STACK_POINTER_REGNUM));
8645 if (TARGET_SH5)
8647 int n = total_saved_regs_space;
8648 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8649 save_schedule schedule;
8650 save_entry *entry;
8652 n += total_auto_space;
8654 /* If it wasn't saved, there's not much we can do. */
8655 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8656 return n;
8658 target_flags = copy_flags;
8660 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8661 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8662 if (entry->reg == pr_reg)
8664 target_flags = save_flags;
8665 return entry->offset;
8667 gcc_unreachable ();
8669 else
8670 return total_auto_space;
8673 /* Parse the -mfixed-range= option string. */
8674 void
8675 sh_fix_range (const char *const_str)
8677 int i, first, last;
8678 char *str, *dash, *comma;
8680 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8681 REG2 are either register names or register numbers. The effect
8682 of this option is to mark the registers in the range from REG1 to
8683 REG2 as ``fixed'' so they won't be used by the compiler. */
8685 i = strlen (const_str);
8686 str = (char *) alloca (i + 1);
8687 memcpy (str, const_str, i + 1);
8689 while (1)
8691 dash = strchr (str, '-');
8692 if (!dash)
8694 warning (0, "value of -mfixed-range must have form REG1-REG2");
8695 return;
8697 *dash = '\0';
8698 comma = strchr (dash + 1, ',');
8699 if (comma)
8700 *comma = '\0';
8702 first = decode_reg_name (str);
8703 if (first < 0)
8705 warning (0, "unknown register name: %s", str);
8706 return;
8709 last = decode_reg_name (dash + 1);
8710 if (last < 0)
8712 warning (0, "unknown register name: %s", dash + 1);
8713 return;
8716 *dash = '-';
8718 if (first > last)
8720 warning (0, "%s-%s is an empty range", str, dash + 1);
8721 return;
8724 for (i = first; i <= last; ++i)
8725 fixed_regs[i] = call_used_regs[i] = 1;
8727 if (!comma)
8728 break;
8730 *comma = ',';
8731 str = comma + 1;
8735 /* Insert any deferred function attributes from earlier pragmas. */
8736 static void
8737 sh_insert_attributes (tree node, tree *attributes)
8739 tree attrs;
8741 if (TREE_CODE (node) != FUNCTION_DECL)
8742 return;
8744 /* We are only interested in fields. */
8745 if (!DECL_P (node))
8746 return;
8748 /* Append the attributes to the deferred attributes. */
8749 *sh_deferred_function_attributes_tail = *attributes;
8750 attrs = sh_deferred_function_attributes;
8751 if (!attrs)
8752 return;
8754 /* Some attributes imply or require the interrupt attribute. */
8755 if (!lookup_attribute ("interrupt_handler", attrs)
8756 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8758 /* If we have a trapa_handler, but no interrupt_handler attribute,
8759 insert an interrupt_handler attribute. */
8760 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8761 /* We can't use sh_pr_interrupt here because that's not in the
8762 java frontend. */
8763 attrs
8764 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8765 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8766 if the interrupt attribute is missing, we ignore the attribute
8767 and warn. */
8768 else if (lookup_attribute ("sp_switch", attrs)
8769 || lookup_attribute ("trap_exit", attrs)
8770 || lookup_attribute ("nosave_low_regs", attrs)
8771 || lookup_attribute ("resbank", attrs))
8773 tree *tail;
8775 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8777 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8778 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8779 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8780 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8781 warning (OPT_Wattributes,
8782 "%qE attribute only applies to interrupt functions",
8783 TREE_PURPOSE (attrs));
8784 else
8786 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8787 NULL_TREE);
8788 tail = &TREE_CHAIN (*tail);
8791 attrs = *attributes;
8795 /* Install the processed list. */
8796 *attributes = attrs;
8798 /* Clear deferred attributes. */
8799 sh_deferred_function_attributes = NULL_TREE;
8800 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8802 return;
8805 /* Supported attributes:
8807 interrupt_handler -- specifies this function is an interrupt handler.
8809 trapa_handler - like above, but don't save all registers.
8811 sp_switch -- specifies an alternate stack for an interrupt handler
8812 to run on.
8814 trap_exit -- use a trapa to exit an interrupt function instead of
8815 an rte instruction.
8817 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8818 This is useful on the SH3 and upwards,
8819 which has a separate set of low regs for User and Supervisor modes.
8820 This should only be used for the lowest level of interrupts. Higher levels
8821 of interrupts must save the registers in case they themselves are
8822 interrupted.
8824 renesas -- use Renesas calling/layout conventions (functions and
8825 structures).
8827 resbank -- In case of an ISR, use a register bank to save registers
8828 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8831 /* Handle a 'resbank' attribute. */
8832 static tree
8833 sh_handle_resbank_handler_attribute (tree * node, tree name,
8834 tree args ATTRIBUTE_UNUSED,
8835 int flags ATTRIBUTE_UNUSED,
8836 bool * no_add_attrs)
8838 if (!TARGET_SH2A)
8840 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8841 name);
8842 *no_add_attrs = true;
8844 if (TREE_CODE (*node) != FUNCTION_DECL)
8846 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8847 name);
8848 *no_add_attrs = true;
8851 return NULL_TREE;
8854 /* Handle an "interrupt_handler" attribute; arguments as in
8855 struct attribute_spec.handler. */
8856 static tree
8857 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8858 tree args ATTRIBUTE_UNUSED,
8859 int flags ATTRIBUTE_UNUSED,
8860 bool *no_add_attrs)
8862 if (TREE_CODE (*node) != FUNCTION_DECL)
8864 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8865 name);
8866 *no_add_attrs = true;
8868 else if (TARGET_SHCOMPACT)
8870 error ("attribute interrupt_handler is not compatible with -m5-compact");
8871 *no_add_attrs = true;
8874 return NULL_TREE;
8877 /* Handle an 'function_vector' attribute; arguments as in
8878 struct attribute_spec.handler. */
8879 static tree
8880 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8881 tree args ATTRIBUTE_UNUSED,
8882 int flags ATTRIBUTE_UNUSED,
8883 bool * no_add_attrs)
8885 if (!TARGET_SH2A)
8887 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8888 name);
8889 *no_add_attrs = true;
8891 else if (TREE_CODE (*node) != FUNCTION_DECL)
8893 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8894 name);
8895 *no_add_attrs = true;
8897 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8899 /* The argument must be a constant integer. */
8900 warning (OPT_Wattributes,
8901 "%qE attribute argument not an integer constant",
8902 name);
8903 *no_add_attrs = true;
8905 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8907 /* The argument value must be between 0 to 255. */
8908 warning (OPT_Wattributes,
8909 "%qE attribute argument should be between 0 to 255",
8910 name);
8911 *no_add_attrs = true;
8913 return NULL_TREE;
8916 /* Returns 1 if current function has been assigned the attribute
8917 'function_vector'. */
8919 sh2a_is_function_vector_call (rtx x)
8921 if (GET_CODE (x) == SYMBOL_REF
8922 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8924 tree tr = SYMBOL_REF_DECL (x);
8926 if (sh2a_function_vector_p (tr))
8927 return 1;
8930 return 0;
8933 /* Returns the function vector number, if the the attribute
8934 'function_vector' is assigned, otherwise returns zero. */
8936 sh2a_get_function_vector_number (rtx x)
8938 int num;
8939 tree list, t;
8941 if ((GET_CODE (x) == SYMBOL_REF)
8942 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8944 t = SYMBOL_REF_DECL (x);
8946 if (TREE_CODE (t) != FUNCTION_DECL)
8947 return 0;
8949 list = SH_ATTRIBUTES (t);
8950 while (list)
8952 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8954 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8955 return num;
8958 list = TREE_CHAIN (list);
8961 return 0;
8963 else
8964 return 0;
8967 /* Handle an "sp_switch" attribute; arguments as in
8968 struct attribute_spec.handler. */
8969 static tree
8970 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8971 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8973 if (TREE_CODE (*node) != FUNCTION_DECL)
8975 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8976 name);
8977 *no_add_attrs = true;
8979 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8981 /* The argument must be a constant string. */
8982 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8983 name);
8984 *no_add_attrs = true;
8987 return NULL_TREE;
8990 /* Handle an "trap_exit" attribute; arguments as in
8991 struct attribute_spec.handler. */
8992 static tree
8993 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8994 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8996 if (TREE_CODE (*node) != FUNCTION_DECL)
8998 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8999 name);
9000 *no_add_attrs = true;
9002 /* The argument specifies a trap number to be used in a trapa instruction
9003 at function exit (instead of an rte instruction). */
9004 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9006 /* The argument must be a constant integer. */
9007 warning (OPT_Wattributes, "%qE attribute argument not an "
9008 "integer constant", name);
9009 *no_add_attrs = true;
9012 return NULL_TREE;
9015 static tree
9016 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9017 tree name ATTRIBUTE_UNUSED,
9018 tree args ATTRIBUTE_UNUSED,
9019 int flags ATTRIBUTE_UNUSED,
9020 bool *no_add_attrs ATTRIBUTE_UNUSED)
9022 return NULL_TREE;
9025 /* True if __attribute__((renesas)) or -mrenesas. */
9027 sh_attr_renesas_p (const_tree td)
9029 if (TARGET_HITACHI)
9030 return 1;
9031 if (td == 0)
9032 return 0;
9033 if (DECL_P (td))
9034 td = TREE_TYPE (td);
9035 if (td == error_mark_node)
9036 return 0;
9037 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9038 != NULL_TREE);
9041 /* True if __attribute__((renesas)) or -mrenesas, for the current
9042 function. */
9044 sh_cfun_attr_renesas_p (void)
9046 return sh_attr_renesas_p (current_function_decl);
9050 sh_cfun_interrupt_handler_p (void)
9052 return (lookup_attribute ("interrupt_handler",
9053 DECL_ATTRIBUTES (current_function_decl))
9054 != NULL_TREE);
9057 /* Returns 1 if FUNC has been assigned the attribute
9058 "function_vector". */
9060 sh2a_function_vector_p (tree func)
9062 tree list;
9063 if (TREE_CODE (func) != FUNCTION_DECL)
9064 return 0;
9066 list = SH_ATTRIBUTES (func);
9067 while (list)
9069 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9070 return 1;
9072 list = TREE_CHAIN (list);
9074 return 0;
9077 /* Returns TRUE if given tree has the "resbank" attribute. */
9080 sh_cfun_resbank_handler_p (void)
9082 return ((lookup_attribute ("resbank",
9083 DECL_ATTRIBUTES (current_function_decl))
9084 != NULL_TREE)
9085 && (lookup_attribute ("interrupt_handler",
9086 DECL_ATTRIBUTES (current_function_decl))
9087 != NULL_TREE) && TARGET_SH2A);
9090 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9092 static const char *
9093 sh_check_pch_target_flags (int old_flags)
9095 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9096 | MASK_SH_E | MASK_HARD_SH4
9097 | MASK_FPU_SINGLE | MASK_SH4))
9098 return _("created and used with different architectures / ABIs");
9099 if ((old_flags ^ target_flags) & MASK_HITACHI)
9100 return _("created and used with different ABIs");
9101 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9102 return _("created and used with different endianness");
9103 return NULL;
9106 /* Predicates used by the templates. */
9108 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9109 Used only in general_movsrc_operand. */
9112 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9114 switch (REGNO (op))
9116 case PR_REG:
9117 case MACL_REG:
9118 case MACH_REG:
9119 return 1;
9121 return 0;
9124 /* Nonzero if OP is a floating point value with value 0.0. */
9127 fp_zero_operand (rtx op)
9129 REAL_VALUE_TYPE r;
9131 if (GET_MODE (op) != SFmode)
9132 return 0;
9134 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9135 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9138 /* Nonzero if OP is a floating point value with value 1.0. */
9141 fp_one_operand (rtx op)
9143 REAL_VALUE_TYPE r;
9145 if (GET_MODE (op) != SFmode)
9146 return 0;
9148 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9149 return REAL_VALUES_EQUAL (r, dconst1);
9152 /* In general mode switching is used. If we are
9153 compiling without -mfmovd, movsf_ie isn't taken into account for
9154 mode switching. We could check in machine_dependent_reorg for
9155 cases where we know we are in single precision mode, but there is
9156 interface to find that out during reload, so we must avoid
9157 choosing an fldi alternative during reload and thus failing to
9158 allocate a scratch register for the constant loading. */
9160 fldi_ok (void)
9162 return 1;
9166 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9168 enum rtx_code code = GET_CODE (op);
9169 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9172 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9173 enum tls_model
9174 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9176 if (GET_CODE (op) != SYMBOL_REF)
9177 return TLS_MODEL_NONE;
9178 return SYMBOL_REF_TLS_MODEL (op);
9181 /* Return the destination address of a branch. */
9183 static int
9184 branch_dest (rtx branch)
9186 rtx dest = SET_SRC (PATTERN (branch));
9187 int dest_uid;
9189 if (GET_CODE (dest) == IF_THEN_ELSE)
9190 dest = XEXP (dest, 1);
9191 dest = XEXP (dest, 0);
9192 dest_uid = INSN_UID (dest);
9193 return INSN_ADDRESSES (dest_uid);
9196 /* Return nonzero if REG is not used after INSN.
9197 We assume REG is a reload reg, and therefore does
9198 not live past labels. It may live past calls or jumps though. */
9200 reg_unused_after (rtx reg, rtx insn)
9202 enum rtx_code code;
9203 rtx set;
9205 /* If the reg is set by this instruction, then it is safe for our
9206 case. Disregard the case where this is a store to memory, since
9207 we are checking a register used in the store address. */
9208 set = single_set (insn);
9209 if (set && !MEM_P (SET_DEST (set))
9210 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9211 return 1;
9213 while ((insn = NEXT_INSN (insn)))
9215 rtx set;
9216 if (!INSN_P (insn))
9217 continue;
9219 code = GET_CODE (insn);
9221 #if 0
9222 /* If this is a label that existed before reload, then the register
9223 if dead here. However, if this is a label added by reorg, then
9224 the register may still be live here. We can't tell the difference,
9225 so we just ignore labels completely. */
9226 if (code == CODE_LABEL)
9227 return 1;
9228 /* else */
9229 #endif
9231 if (code == JUMP_INSN)
9232 return 0;
9234 /* If this is a sequence, we must handle them all at once.
9235 We could have for instance a call that sets the target register,
9236 and an insn in a delay slot that uses the register. In this case,
9237 we must return 0. */
9238 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9240 int i;
9241 int retval = 0;
9243 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9245 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9246 rtx set = single_set (this_insn);
9248 if (CALL_P (this_insn))
9249 code = CALL_INSN;
9250 else if (JUMP_P (this_insn))
9252 if (INSN_ANNULLED_BRANCH_P (this_insn))
9253 return 0;
9254 code = JUMP_INSN;
9257 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9258 return 0;
9259 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9261 if (!MEM_P (SET_DEST (set)))
9262 retval = 1;
9263 else
9264 return 0;
9266 if (set == 0
9267 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9268 return 0;
9270 if (retval == 1)
9271 return 1;
9272 else if (code == JUMP_INSN)
9273 return 0;
9276 set = single_set (insn);
9277 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9278 return 0;
9279 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9280 return !MEM_P (SET_DEST (set));
9281 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9282 return 0;
9284 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9285 return 1;
9287 return 1;
9290 #include "ggc.h"
9292 static GTY(()) rtx fpscr_rtx;
9294 get_fpscr_rtx (void)
9296 if (! fpscr_rtx)
9298 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9299 REG_USERVAR_P (fpscr_rtx) = 1;
9300 mark_user_reg (fpscr_rtx);
9302 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9303 mark_user_reg (fpscr_rtx);
9304 return fpscr_rtx;
9307 static GTY(()) tree fpscr_values;
9309 static void
9310 emit_fpu_switch (rtx scratch, int index)
9312 rtx dst, src;
9314 if (fpscr_values == NULL)
9316 tree t;
9318 t = build_index_type (integer_one_node);
9319 t = build_array_type (integer_type_node, t);
9320 t = build_decl (BUILTINS_LOCATION,
9321 VAR_DECL, get_identifier ("__fpscr_values"), t);
9322 DECL_ARTIFICIAL (t) = 1;
9323 DECL_IGNORED_P (t) = 1;
9324 DECL_EXTERNAL (t) = 1;
9325 TREE_STATIC (t) = 1;
9326 TREE_PUBLIC (t) = 1;
9327 TREE_USED (t) = 1;
9329 fpscr_values = t;
9332 src = DECL_RTL (fpscr_values);
9333 if (!can_create_pseudo_p ())
9335 emit_move_insn (scratch, XEXP (src, 0));
9336 if (index != 0)
9337 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9338 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9340 else
9341 src = adjust_address (src, PSImode, index * 4);
9343 dst = get_fpscr_rtx ();
9344 emit_move_insn (dst, src);
9347 void
9348 emit_sf_insn (rtx pat)
9350 emit_insn (pat);
9353 void
9354 emit_df_insn (rtx pat)
9356 emit_insn (pat);
9359 void
9360 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9362 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9365 void
9366 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9368 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9369 get_fpscr_rtx ()));
9372 void
9373 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9375 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9378 void
9379 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9381 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9382 get_fpscr_rtx ()));
9385 static rtx get_free_reg (HARD_REG_SET);
9387 /* This function returns a register to use to load the address to load
9388 the fpscr from. Currently it always returns r1 or r7, but when we are
9389 able to use pseudo registers after combine, or have a better mechanism
9390 for choosing a register, it should be done here. */
9391 /* REGS_LIVE is the liveness information for the point for which we
9392 need this allocation. In some bare-bones exit blocks, r1 is live at the
9393 start. We can even have all of r0..r3 being live:
9394 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9395 INSN before which new insns are placed with will clobber the register
9396 we return. If a basic block consists only of setting the return value
9397 register to a pseudo and using that register, the return value is not
9398 live before or after this block, yet we we'll insert our insns right in
9399 the middle. */
9401 static rtx
9402 get_free_reg (HARD_REG_SET regs_live)
9404 if (! TEST_HARD_REG_BIT (regs_live, 1))
9405 return gen_rtx_REG (Pmode, 1);
9407 /* Hard reg 1 is live; since this is a small register classes target,
9408 there shouldn't be anything but a jump before the function end. */
9409 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9410 return gen_rtx_REG (Pmode, 7);
9413 /* This function will set the fpscr from memory.
9414 MODE is the mode we are setting it to. */
9415 void
9416 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9418 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9419 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9420 rtx addr_reg;
9422 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9423 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9426 /* Is the given character a logical line separator for the assembler? */
9427 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9428 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9429 #endif
9432 sh_insn_length_adjustment (rtx insn)
9434 /* Instructions with unfilled delay slots take up an extra two bytes for
9435 the nop in the delay slot. */
9436 if (((NONJUMP_INSN_P (insn)
9437 && GET_CODE (PATTERN (insn)) != USE
9438 && GET_CODE (PATTERN (insn)) != CLOBBER)
9439 || CALL_P (insn)
9440 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9441 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9442 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9443 return 2;
9445 /* SH2e has a bug that prevents the use of annulled branches, so if
9446 the delay slot is not filled, we'll have to put a NOP in it. */
9447 if (sh_cpu_attr == CPU_SH2E
9448 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9449 && get_attr_type (insn) == TYPE_CBRANCH
9450 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9451 return 2;
9453 /* sh-dsp parallel processing insn take four bytes instead of two. */
9455 if (NONJUMP_INSN_P (insn))
9457 int sum = 0;
9458 rtx body = PATTERN (insn);
9459 const char *templ;
9460 char c;
9461 int maybe_label = 1;
9463 if (GET_CODE (body) == ASM_INPUT)
9464 templ = XSTR (body, 0);
9465 else if (asm_noperands (body) >= 0)
9466 templ
9467 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9468 else
9469 return 0;
9472 int ppi_adjust = 0;
9475 c = *templ++;
9476 while (c == ' ' || c == '\t');
9477 /* all sh-dsp parallel-processing insns start with p.
9478 The only non-ppi sh insn starting with p is pref.
9479 The only ppi starting with pr is prnd. */
9480 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9481 ppi_adjust = 2;
9482 /* The repeat pseudo-insn expands two three insns, a total of
9483 six bytes in size. */
9484 else if ((c == 'r' || c == 'R')
9485 && ! strncasecmp ("epeat", templ, 5))
9486 ppi_adjust = 4;
9487 while (c && c != '\n'
9488 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9490 /* If this is a label, it is obviously not a ppi insn. */
9491 if (c == ':' && maybe_label)
9493 ppi_adjust = 0;
9494 break;
9496 else if (c == '\'' || c == '"')
9497 maybe_label = 0;
9498 c = *templ++;
9500 sum += ppi_adjust;
9501 maybe_label = c != ':';
9503 while (c);
9504 return sum;
9506 return 0;
9509 /* Return TRUE for a valid displacement for the REG+disp addressing
9510 with MODE. */
9512 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9513 into the FRx registers. We implement this by setting the maximum offset
9514 to zero when the value is SFmode. This also restricts loading of SFmode
9515 values into the integer registers, but that can't be helped. */
9517 /* The SH allows a displacement in a QI or HI amode, but only when the
9518 other operand is R0. GCC doesn't handle this very well, so we forgot
9519 all of that.
9521 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9522 DI can be any number 0..60. */
9524 bool
9525 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9527 if (CONST_INT_P (op))
9529 if (TARGET_SHMEDIA)
9531 int size;
9533 /* Check if this the address of an unaligned load / store. */
9534 if (mode == VOIDmode)
9535 return CONST_OK_FOR_I06 (INTVAL (op));
9537 size = GET_MODE_SIZE (mode);
9538 return (!(INTVAL (op) & (size - 1))
9539 && INTVAL (op) >= -512 * size
9540 && INTVAL (op) < 512 * size);
9543 if (TARGET_SH2A)
9545 if (GET_MODE_SIZE (mode) == 1
9546 && (unsigned) INTVAL (op) < 4096)
9547 return true;
9550 if ((GET_MODE_SIZE (mode) == 4
9551 && (unsigned) INTVAL (op) < 64
9552 && !(INTVAL (op) & 3)
9553 && !(TARGET_SH2E && mode == SFmode))
9554 || (GET_MODE_SIZE (mode) == 4
9555 && (unsigned) INTVAL (op) < 16383
9556 && !(INTVAL (op) & 3) && TARGET_SH2A))
9557 return true;
9559 if ((GET_MODE_SIZE (mode) == 8
9560 && (unsigned) INTVAL (op) < 60
9561 && !(INTVAL (op) & 3)
9562 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9563 || ((GET_MODE_SIZE (mode)==8)
9564 && (unsigned) INTVAL (op) < 8192
9565 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9566 && (TARGET_SH2A && mode == DFmode)))
9567 return true;
9570 return false;
9573 /* Recognize an RTL expression that is a valid memory address for
9574 an instruction.
9575 The MODE argument is the machine mode for the MEM expression
9576 that wants to use this address.
9577 Allow REG
9578 REG+disp
9579 REG+r0
9580 REG++
9581 --REG */
9583 static bool
9584 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9586 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9587 return true;
9588 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9589 && ! TARGET_SHMEDIA
9590 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9591 return true;
9592 else if (GET_CODE (x) == PLUS
9593 && (mode != PSImode || reload_completed))
9595 rtx xop0 = XEXP (x, 0);
9596 rtx xop1 = XEXP (x, 1);
9598 if (GET_MODE_SIZE (mode) <= 8
9599 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9600 && sh_legitimate_index_p (mode, xop1))
9601 return true;
9603 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9604 || ((xop0 == stack_pointer_rtx
9605 || xop0 == hard_frame_pointer_rtx)
9606 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9607 || ((xop1 == stack_pointer_rtx
9608 || xop1 == hard_frame_pointer_rtx)
9609 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9610 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9611 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9612 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9613 && TARGET_FMOVD && mode == DFmode)))
9615 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9616 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9617 return true;
9618 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9619 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9620 return true;
9624 return false;
9627 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9628 isn't protected by a PIC unspec. */
9630 nonpic_symbol_mentioned_p (rtx x)
9632 register const char *fmt;
9633 register int i;
9635 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9636 || GET_CODE (x) == PC)
9637 return 1;
9639 /* We don't want to look into the possible MEM location of a
9640 CONST_DOUBLE, since we're not going to use it, in general. */
9641 if (GET_CODE (x) == CONST_DOUBLE)
9642 return 0;
9644 if (GET_CODE (x) == UNSPEC
9645 && (XINT (x, 1) == UNSPEC_PIC
9646 || XINT (x, 1) == UNSPEC_GOT
9647 || XINT (x, 1) == UNSPEC_GOTOFF
9648 || XINT (x, 1) == UNSPEC_GOTPLT
9649 || XINT (x, 1) == UNSPEC_GOTTPOFF
9650 || XINT (x, 1) == UNSPEC_DTPOFF
9651 || XINT (x, 1) == UNSPEC_TPOFF
9652 || XINT (x, 1) == UNSPEC_PLT
9653 || XINT (x, 1) == UNSPEC_SYMOFF
9654 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9655 return 0;
9657 fmt = GET_RTX_FORMAT (GET_CODE (x));
9658 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9660 if (fmt[i] == 'E')
9662 register int j;
9664 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9665 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9666 return 1;
9668 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9669 return 1;
9672 return 0;
9675 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9676 @GOTOFF in `reg'. */
9678 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9679 rtx reg)
9681 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9682 return orig;
9684 if (GET_CODE (orig) == LABEL_REF
9685 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9687 if (reg == 0)
9688 reg = gen_reg_rtx (Pmode);
9690 emit_insn (gen_symGOTOFF2reg (reg, orig));
9691 return reg;
9693 else if (GET_CODE (orig) == SYMBOL_REF)
9695 if (reg == 0)
9696 reg = gen_reg_rtx (Pmode);
9698 emit_insn (gen_symGOT2reg (reg, orig));
9699 return reg;
9701 return orig;
9704 /* Try machine-dependent ways of modifying an illegitimate address
9705 to be legitimate. If we find one, return the new, valid address.
9706 Otherwise, return X.
9708 For the SH, if X is almost suitable for indexing, but the offset is
9709 out of range, convert it into a normal form so that CSE has a chance
9710 of reducing the number of address registers used. */
9712 static rtx
9713 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9715 if (flag_pic)
9716 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9718 if (GET_CODE (x) == PLUS
9719 && (GET_MODE_SIZE (mode) == 4
9720 || GET_MODE_SIZE (mode) == 8)
9721 && CONST_INT_P (XEXP (x, 1))
9722 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9723 && ! TARGET_SHMEDIA
9724 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9725 && ! (TARGET_SH2E && mode == SFmode))
9727 rtx index_rtx = XEXP (x, 1);
9728 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9729 rtx sum;
9731 /* On rare occasions, we might get an unaligned pointer
9732 that is indexed in a way to give an aligned address.
9733 Therefore, keep the lower two bits in offset_base. */
9734 /* Instead of offset_base 128..131 use 124..127, so that
9735 simple add suffices. */
9736 if (offset > 127)
9737 offset_base = ((offset + 4) & ~60) - 4;
9738 else
9739 offset_base = offset & ~60;
9741 /* Sometimes the normal form does not suit DImode. We
9742 could avoid that by using smaller ranges, but that
9743 would give less optimized code when SImode is
9744 prevalent. */
9745 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9747 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9748 GEN_INT (offset_base), NULL_RTX, 0,
9749 OPTAB_LIB_WIDEN);
9751 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9755 return x;
9758 /* Attempt to replace *P, which is an address that needs reloading, with
9759 a valid memory address for an operand of mode MODE.
9760 Like for sh_legitimize_address, for the SH we try to get a normal form
9761 of the address. That will allow inheritance of the address reloads. */
9763 bool
9764 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9765 int itype)
9767 enum reload_type type = (enum reload_type) itype;
9769 if (GET_CODE (*p) == PLUS
9770 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9771 && CONST_INT_P (XEXP (*p, 1))
9772 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9773 && ! TARGET_SHMEDIA
9774 && ! (TARGET_SH4 && mode == DFmode)
9775 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9776 && (ALLOW_INDEXED_ADDRESS
9777 || XEXP (*p, 0) == stack_pointer_rtx
9778 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9780 rtx index_rtx = XEXP (*p, 1);
9781 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9782 rtx sum;
9784 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9786 push_reload (*p, NULL_RTX, p, NULL,
9787 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9788 goto win;
9790 if (TARGET_SH2E && mode == SFmode)
9792 *p = copy_rtx (*p);
9793 push_reload (*p, NULL_RTX, p, NULL,
9794 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9795 goto win;
9797 /* Instead of offset_base 128..131 use 124..127, so that
9798 simple add suffices. */
9799 if (offset > 127)
9800 offset_base = ((offset + 4) & ~60) - 4;
9801 else
9802 offset_base = offset & ~60;
9803 /* Sometimes the normal form does not suit DImode. We could avoid
9804 that by using smaller ranges, but that would give less optimized
9805 code when SImode is prevalent. */
9806 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9808 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9809 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9810 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9811 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9812 goto win;
9815 /* We must re-recognize what we created before. */
9816 else if (GET_CODE (*p) == PLUS
9817 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9818 && GET_CODE (XEXP (*p, 0)) == PLUS
9819 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9820 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9821 && CONST_INT_P (XEXP (*p, 1))
9822 && ! TARGET_SHMEDIA
9823 && ! (TARGET_SH2E && mode == SFmode))
9825 /* Because this address is so complex, we know it must have
9826 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9827 it is already unshared, and needs no further unsharing. */
9828 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9829 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9830 goto win;
9833 return false;
9835 win:
9836 return true;
9839 /* Mark the use of a constant in the literal table. If the constant
9840 has multiple labels, make it unique. */
9841 static rtx
9842 mark_constant_pool_use (rtx x)
9844 rtx insn, lab, pattern;
9846 if (x == NULL)
9847 return x;
9849 switch (GET_CODE (x))
9851 case LABEL_REF:
9852 x = XEXP (x, 0);
9853 case CODE_LABEL:
9854 break;
9855 default:
9856 return x;
9859 /* Get the first label in the list of labels for the same constant
9860 and delete another labels in the list. */
9861 lab = x;
9862 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9864 if (!LABEL_P (insn)
9865 || LABEL_REFS (insn) != NEXT_INSN (insn))
9866 break;
9867 lab = insn;
9870 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9871 INSN_DELETED_P (insn) = 1;
9873 /* Mark constants in a window. */
9874 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9876 if (!NONJUMP_INSN_P (insn))
9877 continue;
9879 pattern = PATTERN (insn);
9880 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9881 continue;
9883 switch (XINT (pattern, 1))
9885 case UNSPECV_CONST2:
9886 case UNSPECV_CONST4:
9887 case UNSPECV_CONST8:
9888 XVECEXP (pattern, 0, 1) = const1_rtx;
9889 break;
9890 case UNSPECV_WINDOW_END:
9891 if (XVECEXP (pattern, 0, 0) == x)
9892 return lab;
9893 break;
9894 case UNSPECV_CONST_END:
9895 return lab;
9896 default:
9897 break;
9901 return lab;
9904 /* Return true if it's possible to redirect BRANCH1 to the destination
9905 of an unconditional jump BRANCH2. We only want to do this if the
9906 resulting branch will have a short displacement. */
9908 sh_can_redirect_branch (rtx branch1, rtx branch2)
9910 if (flag_expensive_optimizations && simplejump_p (branch2))
9912 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9913 rtx insn;
9914 int distance;
9916 for (distance = 0, insn = NEXT_INSN (branch1);
9917 insn && distance < 256;
9918 insn = PREV_INSN (insn))
9920 if (insn == dest)
9921 return 1;
9922 else
9923 distance += get_attr_length (insn);
9925 for (distance = 0, insn = NEXT_INSN (branch1);
9926 insn && distance < 256;
9927 insn = NEXT_INSN (insn))
9929 if (insn == dest)
9930 return 1;
9931 else
9932 distance += get_attr_length (insn);
9935 return 0;
9938 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9940 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9941 unsigned int new_reg)
9943 /* Interrupt functions can only use registers that have already been
9944 saved by the prologue, even if they would normally be
9945 call-clobbered. */
9947 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9948 return 0;
9950 return 1;
9953 /* Function to update the integer COST
9954 based on the relationship between INSN that is dependent on
9955 DEP_INSN through the dependence LINK. The default is to make no
9956 adjustment to COST. This can be used for example to specify to
9957 the scheduler that an output- or anti-dependence does not incur
9958 the same cost as a data-dependence. The return value should be
9959 the new value for COST. */
9960 static int
9961 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9963 rtx reg, use_pat;
9965 if (TARGET_SHMEDIA)
9967 /* On SHmedia, if the dependence is an anti-dependence or
9968 output-dependence, there is no cost. */
9969 if (REG_NOTE_KIND (link) != 0)
9971 /* However, dependencies between target register loads and
9972 uses of the register in a subsequent block that are separated
9973 by a conditional branch are not modelled - we have to do with
9974 the anti-dependency between the target register load and the
9975 conditional branch that ends the current block. */
9976 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9977 && GET_CODE (PATTERN (dep_insn)) == SET
9978 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9979 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9980 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9982 int orig_cost = cost;
9983 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9984 rtx target = ((! note
9985 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9986 ? insn : JUMP_LABEL (insn));
9987 /* On the likely path, the branch costs 1, on the unlikely path,
9988 it costs 3. */
9989 cost--;
9991 target = next_active_insn (target);
9992 while (target && ! flow_dependent_p (target, dep_insn)
9993 && --cost > 0);
9994 /* If two branches are executed in immediate succession, with the
9995 first branch properly predicted, this causes a stall at the
9996 second branch, hence we won't need the target for the
9997 second branch for two cycles after the launch of the first
9998 branch. */
9999 if (cost > orig_cost - 2)
10000 cost = orig_cost - 2;
10002 else
10003 cost = 0;
10006 else if (get_attr_is_mac_media (insn)
10007 && get_attr_is_mac_media (dep_insn))
10008 cost = 1;
10010 else if (! reload_completed
10011 && GET_CODE (PATTERN (insn)) == SET
10012 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10013 && GET_CODE (PATTERN (dep_insn)) == SET
10014 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10015 && cost < 4)
10016 cost = 4;
10017 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10018 that is needed at the target. */
10019 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10020 && ! flow_dependent_p (insn, dep_insn))
10021 cost--;
10023 else if (REG_NOTE_KIND (link) == 0)
10025 enum attr_type type;
10026 rtx dep_set;
10028 if (recog_memoized (insn) < 0
10029 || recog_memoized (dep_insn) < 0)
10030 return cost;
10032 dep_set = single_set (dep_insn);
10034 /* The latency that we specify in the scheduling description refers
10035 to the actual output, not to an auto-increment register; for that,
10036 the latency is one. */
10037 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10039 rtx set = single_set (insn);
10041 if (set
10042 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10043 && (!MEM_P (SET_DEST (set))
10044 || !reg_mentioned_p (SET_DEST (dep_set),
10045 XEXP (SET_DEST (set), 0))))
10046 cost = 1;
10048 /* The only input for a call that is timing-critical is the
10049 function's address. */
10050 if (CALL_P (insn))
10052 rtx call = PATTERN (insn);
10054 if (GET_CODE (call) == PARALLEL)
10055 call = XVECEXP (call, 0 ,0);
10056 if (GET_CODE (call) == SET)
10057 call = SET_SRC (call);
10058 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10059 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10060 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10061 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10062 cost -= TARGET_SH4_300 ? 3 : 6;
10064 /* Likewise, the most timing critical input for an sfuncs call
10065 is the function address. However, sfuncs typically start
10066 using their arguments pretty quickly.
10067 Assume a four cycle delay for SH4 before they are needed.
10068 Cached ST40-300 calls are quicker, so assume only a one
10069 cycle delay there.
10070 ??? Maybe we should encode the delays till input registers
10071 are needed by sfuncs into the sfunc call insn. */
10072 /* All sfunc calls are parallels with at least four components.
10073 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10074 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10075 && XVECLEN (PATTERN (insn), 0) >= 4
10076 && (reg = sfunc_uses_reg (insn)))
10078 if (! reg_set_p (reg, dep_insn))
10079 cost -= TARGET_SH4_300 ? 1 : 4;
10081 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10083 enum attr_type dep_type = get_attr_type (dep_insn);
10085 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10086 cost--;
10087 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10088 && (type = get_attr_type (insn)) != TYPE_CALL
10089 && type != TYPE_SFUNC)
10090 cost--;
10091 /* When the preceding instruction loads the shift amount of
10092 the following SHAD/SHLD, the latency of the load is increased
10093 by 1 cycle. */
10094 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10095 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10096 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10097 XEXP (SET_SRC (single_set (insn)),
10098 1)))
10099 cost++;
10100 /* When an LS group instruction with a latency of less than
10101 3 cycles is followed by a double-precision floating-point
10102 instruction, FIPR, or FTRV, the latency of the first
10103 instruction is increased to 3 cycles. */
10104 else if (cost < 3
10105 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10106 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10107 cost = 3;
10108 /* The lsw register of a double-precision computation is ready one
10109 cycle earlier. */
10110 else if (reload_completed
10111 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10112 && (use_pat = single_set (insn))
10113 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10114 SET_SRC (use_pat)))
10115 cost -= 1;
10117 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10118 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10119 cost -= 1;
10121 else if (TARGET_SH4_300)
10123 /* Stores need their input register two cycles later. */
10124 if (dep_set && cost >= 1
10125 && ((type = get_attr_type (insn)) == TYPE_STORE
10126 || type == TYPE_PSTORE
10127 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10129 rtx set = single_set (insn);
10131 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10132 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10134 cost -= 2;
10135 /* But don't reduce the cost below 1 if the address depends
10136 on a side effect of dep_insn. */
10137 if (cost < 1
10138 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10139 cost = 1;
10144 /* An anti-dependence penalty of two applies if the first insn is a double
10145 precision fadd / fsub / fmul. */
10146 else if (!TARGET_SH4_300
10147 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10148 && recog_memoized (dep_insn) >= 0
10149 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10150 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10151 /* A lot of alleged anti-flow dependences are fake,
10152 so check this one is real. */
10153 && flow_dependent_p (dep_insn, insn))
10154 cost = 2;
10156 return cost;
10159 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10160 if DEP_INSN is anti-flow dependent on INSN. */
10161 static int
10162 flow_dependent_p (rtx insn, rtx dep_insn)
10164 rtx tmp = PATTERN (insn);
10166 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10167 return tmp == NULL_RTX;
10170 /* A helper function for flow_dependent_p called through note_stores. */
10171 static void
10172 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10174 rtx * pinsn = (rtx *) data;
10176 if (*pinsn && reg_referenced_p (x, *pinsn))
10177 *pinsn = NULL_RTX;
10180 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10181 'special function' patterns (type sfunc) that clobber pr, but that
10182 do not look like function calls to leaf_function_p. Hence we must
10183 do this extra check. */
10184 static int
10185 sh_pr_n_sets (void)
10187 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10190 /* Return where to allocate pseudo for a given hard register initial
10191 value. */
10192 static rtx
10193 sh_allocate_initial_value (rtx hard_reg)
10195 rtx x;
10197 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10199 if (current_function_is_leaf
10200 && ! sh_pr_n_sets ()
10201 && ! (TARGET_SHCOMPACT
10202 && ((crtl->args.info.call_cookie
10203 & ~ CALL_COOKIE_RET_TRAMP (1))
10204 || crtl->saves_all_registers)))
10205 x = hard_reg;
10206 else
10207 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10209 else
10210 x = NULL_RTX;
10212 return x;
10215 /* This function returns "2" to indicate dual issue for the SH4
10216 processor. To be used by the DFA pipeline description. */
10217 static int
10218 sh_issue_rate (void)
10220 if (TARGET_SUPERSCALAR)
10221 return 2;
10222 else
10223 return 1;
10226 /* Functions for ready queue reordering for sched1. */
10228 /* Get weight for mode for a set x. */
10229 static short
10230 find_set_regmode_weight (rtx x, enum machine_mode mode)
10232 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10233 return 1;
10234 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10236 if (REG_P (SET_DEST (x)))
10238 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10239 return 1;
10240 else
10241 return 0;
10243 return 1;
10245 return 0;
10248 /* Get regmode weight for insn. */
10249 static short
10250 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10252 short reg_weight = 0;
10253 rtx x;
10255 /* Increment weight for each register born here. */
10256 x = PATTERN (insn);
10257 reg_weight += find_set_regmode_weight (x, mode);
10258 if (GET_CODE (x) == PARALLEL)
10260 int j;
10261 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10263 x = XVECEXP (PATTERN (insn), 0, j);
10264 reg_weight += find_set_regmode_weight (x, mode);
10267 /* Decrement weight for each register that dies here. */
10268 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10270 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10272 rtx note = XEXP (x, 0);
10273 if (REG_P (note) && GET_MODE (note) == mode)
10274 reg_weight--;
10277 return reg_weight;
10280 /* Calculate regmode weights for all insns of a basic block. */
10281 static void
10282 find_regmode_weight (basic_block b, enum machine_mode mode)
10284 rtx insn, next_tail, head, tail;
10286 get_ebb_head_tail (b, b, &head, &tail);
10287 next_tail = NEXT_INSN (tail);
10289 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10291 /* Handle register life information. */
10292 if (!INSN_P (insn))
10293 continue;
10295 if (mode == SFmode)
10296 INSN_REGMODE_WEIGHT (insn, mode) =
10297 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10298 else if (mode == SImode)
10299 INSN_REGMODE_WEIGHT (insn, mode) =
10300 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10304 /* Comparison function for ready queue sorting. */
10305 static int
10306 rank_for_reorder (const void *x, const void *y)
10308 rtx tmp = *(const rtx *) y;
10309 rtx tmp2 = *(const rtx *) x;
10311 /* The insn in a schedule group should be issued the first. */
10312 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10313 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10315 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10316 minimizes instruction movement, thus minimizing sched's effect on
10317 register pressure. */
10318 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10321 /* Resort the array A in which only element at index N may be out of order. */
10322 static void
10323 swap_reorder (rtx *a, int n)
10325 rtx insn = a[n - 1];
10326 int i = n - 2;
10328 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10330 a[i + 1] = a[i];
10331 i -= 1;
10333 a[i + 1] = insn;
10336 #define SCHED_REORDER(READY, N_READY) \
10337 do \
10339 if ((N_READY) == 2) \
10340 swap_reorder (READY, N_READY); \
10341 else if ((N_READY) > 2) \
10342 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10344 while (0)
10346 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10347 macro. */
10348 static void
10349 ready_reorder (rtx *ready, int nready)
10351 SCHED_REORDER (ready, nready);
10354 /* Count life regions of r0 for a block. */
10355 static int
10356 find_r0_life_regions (basic_block b)
10358 rtx end, insn;
10359 rtx pset;
10360 rtx r0_reg;
10361 int live;
10362 int set;
10363 int death = 0;
10365 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10367 set = 1;
10368 live = 1;
10370 else
10372 set = 0;
10373 live = 0;
10376 insn = BB_HEAD (b);
10377 end = BB_END (b);
10378 r0_reg = gen_rtx_REG (SImode, R0_REG);
10379 while (1)
10381 if (INSN_P (insn))
10383 if (find_regno_note (insn, REG_DEAD, R0_REG))
10385 death++;
10386 live = 0;
10388 if (!live
10389 && (pset = single_set (insn))
10390 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10391 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10393 set++;
10394 live = 1;
10397 if (insn == end)
10398 break;
10399 insn = NEXT_INSN (insn);
10401 return set - death;
10404 /* Calculate regmode weights for all insns of all basic block. */
10405 static void
10406 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10407 int verbose ATTRIBUTE_UNUSED,
10408 int old_max_uid)
10410 basic_block b;
10412 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10413 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10414 r0_life_regions = 0;
10416 FOR_EACH_BB_REVERSE (b)
10418 find_regmode_weight (b, SImode);
10419 find_regmode_weight (b, SFmode);
10420 if (!reload_completed)
10421 r0_life_regions += find_r0_life_regions (b);
10424 CURR_REGMODE_PRESSURE (SImode) = 0;
10425 CURR_REGMODE_PRESSURE (SFmode) = 0;
10429 /* Cleanup. */
10430 static void
10431 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10432 int verbose ATTRIBUTE_UNUSED)
10434 if (regmode_weight[0])
10436 free (regmode_weight[0]);
10437 regmode_weight[0] = NULL;
10439 if (regmode_weight[1])
10441 free (regmode_weight[1]);
10442 regmode_weight[1] = NULL;
10446 /* The scalar modes supported differs from the default version in TImode
10447 for 32-bit SHMEDIA. */
10448 static bool
10449 sh_scalar_mode_supported_p (enum machine_mode mode)
10451 if (TARGET_SHMEDIA32 && mode == TImode)
10452 return false;
10454 return default_scalar_mode_supported_p (mode);
10457 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10458 keep count of register pressures on SImode and SFmode. */
10459 static int
10460 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10461 int sched_verbose ATTRIBUTE_UNUSED,
10462 rtx insn,
10463 int can_issue_more)
10465 if (GET_CODE (PATTERN (insn)) != USE
10466 && GET_CODE (PATTERN (insn)) != CLOBBER)
10467 cached_can_issue_more = can_issue_more - 1;
10468 else
10469 cached_can_issue_more = can_issue_more;
10471 if (reload_completed)
10472 return cached_can_issue_more;
10474 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10475 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10477 return cached_can_issue_more;
10480 static void
10481 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10482 int verbose ATTRIBUTE_UNUSED,
10483 int veclen ATTRIBUTE_UNUSED)
10485 CURR_REGMODE_PRESSURE (SImode) = 0;
10486 CURR_REGMODE_PRESSURE (SFmode) = 0;
10489 /* Some magic numbers. */
10490 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10491 functions that already have high pressure on r0. */
10492 #define R0_MAX_LIFE_REGIONS 2
10493 /* Register Pressure thresholds for SImode and SFmode registers. */
10494 #define SIMODE_MAX_WEIGHT 5
10495 #define SFMODE_MAX_WEIGHT 10
10497 /* Return true if the pressure is high for MODE. */
10498 static short
10499 high_pressure (enum machine_mode mode)
10501 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10502 functions that already have high pressure on r0. */
10503 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10504 return 1;
10506 if (mode == SFmode)
10507 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10508 else
10509 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10512 /* Reorder ready queue if register pressure is high. */
10513 static int
10514 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10515 int sched_verbose ATTRIBUTE_UNUSED,
10516 rtx *ready,
10517 int *n_readyp,
10518 int clock_var ATTRIBUTE_UNUSED)
10520 if (reload_completed)
10521 return sh_issue_rate ();
10523 if (high_pressure (SFmode) || high_pressure (SImode))
10525 ready_reorder (ready, *n_readyp);
10528 return sh_issue_rate ();
10531 /* Skip cycles if the current register pressure is high. */
10532 static int
10533 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10534 int sched_verbose ATTRIBUTE_UNUSED,
10535 rtx *ready ATTRIBUTE_UNUSED,
10536 int *n_readyp ATTRIBUTE_UNUSED,
10537 int clock_var ATTRIBUTE_UNUSED)
10539 if (reload_completed)
10540 return cached_can_issue_more;
10542 if (high_pressure(SFmode) || high_pressure (SImode))
10543 skip_cycles = 1;
10545 return cached_can_issue_more;
10548 /* Skip cycles without sorting the ready queue. This will move insn from
10549 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10550 queue by sh_reorder. */
10552 /* Generally, skipping these many cycles are sufficient for all insns to move
10553 from Q -> R. */
10554 #define MAX_SKIPS 8
10556 static int
10557 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10558 int sched_verbose ATTRIBUTE_UNUSED,
10559 rtx insn ATTRIBUTE_UNUSED,
10560 int last_clock_var,
10561 int clock_var,
10562 int *sort_p)
10564 if (reload_completed)
10565 return 0;
10567 if (skip_cycles)
10569 if ((clock_var - last_clock_var) < MAX_SKIPS)
10571 *sort_p = 0;
10572 return 1;
10574 /* If this is the last cycle we are skipping, allow reordering of R. */
10575 if ((clock_var - last_clock_var) == MAX_SKIPS)
10577 *sort_p = 1;
10578 return 1;
10582 skip_cycles = 0;
10584 return 0;
10587 /* SHmedia requires registers for branches, so we can't generate new
10588 branches past reload. */
10589 static bool
10590 sh_cannot_modify_jumps_p (void)
10592 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10595 static reg_class_t
10596 sh_target_reg_class (void)
10598 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10601 static bool
10602 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10604 HARD_REG_SET dummy;
10605 #if 0
10606 rtx insn;
10607 #endif
10609 if (! shmedia_space_reserved_for_target_registers)
10610 return 0;
10611 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10612 return 0;
10613 if (calc_live_regs (&dummy) >= 6 * 8)
10614 return 1;
10615 return 0;
10618 static bool
10619 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10621 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10625 On the SH1..SH4, the trampoline looks like
10626 2 0002 D202 mov.l l2,r2
10627 1 0000 D301 mov.l l1,r3
10628 3 0004 422B jmp @r2
10629 4 0006 0009 nop
10630 5 0008 00000000 l1: .long area
10631 6 000c 00000000 l2: .long function
10633 SH5 (compact) uses r1 instead of r3 for the static chain. */
10636 /* Emit RTL insns to initialize the variable parts of a trampoline.
10637 FNADDR is an RTX for the address of the function's pure code.
10638 CXT is an RTX for the static chain value for the function. */
10640 static void
10641 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10643 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10644 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10646 if (TARGET_SHMEDIA64)
10648 rtx tramp_templ;
10649 int fixed_len;
10651 rtx movi1 = GEN_INT (0xcc000010);
10652 rtx shori1 = GEN_INT (0xc8000010);
10653 rtx src, dst;
10655 /* The following trampoline works within a +- 128 KB range for cxt:
10656 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10657 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10658 gettr tr1,r1; blink tr0,r63 */
10659 /* Address rounding makes it hard to compute the exact bounds of the
10660 offset for this trampoline, but we have a rather generous offset
10661 range, so frame_offset should do fine as an upper bound. */
10662 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10664 /* ??? could optimize this trampoline initialization
10665 by writing DImode words with two insns each. */
10666 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10667 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10668 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10669 insn = gen_rtx_AND (DImode, insn, mask);
10670 /* Or in ptb/u .,tr1 pattern */
10671 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10672 insn = force_operand (insn, NULL_RTX);
10673 insn = gen_lowpart (SImode, insn);
10674 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10675 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10676 insn = gen_rtx_AND (DImode, insn, mask);
10677 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10678 insn = gen_lowpart (SImode, insn);
10679 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10680 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10681 insn = gen_rtx_AND (DImode, insn, mask);
10682 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10683 insn = gen_lowpart (SImode, insn);
10684 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10685 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10686 insn = gen_rtx_AND (DImode, insn, mask);
10687 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10688 insn = gen_lowpart (SImode, insn);
10689 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10690 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10691 insn = gen_rtx_AND (DImode, insn, mask);
10692 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10693 insn = gen_lowpart (SImode, insn);
10694 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10695 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10696 GEN_INT (0x6bf10600));
10697 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10698 GEN_INT (0x4415fc10));
10699 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10700 GEN_INT (0x4401fff0));
10701 emit_insn (gen_ic_invalidate_line (tramp));
10702 return;
10704 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10705 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10707 tramp_templ = gen_datalabel_ref (tramp_templ);
10708 dst = tramp_mem;
10709 src = gen_const_mem (BLKmode, tramp_templ);
10710 set_mem_align (dst, 256);
10711 set_mem_align (src, 64);
10712 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10714 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10715 emit_move_insn (adjust_address (tramp_mem, Pmode,
10716 fixed_len + GET_MODE_SIZE (Pmode)),
10717 cxt);
10718 emit_insn (gen_ic_invalidate_line (tramp));
10719 return;
10721 else if (TARGET_SHMEDIA)
10723 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10724 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10725 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10726 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10727 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10728 rotated 10 right, and higher 16 bit of every 32 selected. */
10729 rtx movishori
10730 = force_reg (V2HImode, (simplify_gen_subreg
10731 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10732 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10733 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10735 fnaddr = force_reg (SImode, fnaddr);
10736 cxt = force_reg (SImode, cxt);
10737 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10738 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10739 movishori));
10740 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10741 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10742 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10743 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10744 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10745 gen_rtx_SUBREG (V2HImode, cxt, 0),
10746 movishori));
10747 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10748 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10749 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10750 if (TARGET_LITTLE_ENDIAN)
10752 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10753 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10755 else
10757 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10758 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10760 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10761 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10762 emit_insn (gen_ic_invalidate_line (tramp));
10763 return;
10765 else if (TARGET_SHCOMPACT)
10767 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10768 return;
10770 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10771 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10772 SImode));
10773 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10774 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10775 SImode));
10776 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10777 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10778 if (TARGET_HARVARD)
10780 if (!TARGET_INLINE_IC_INVALIDATE
10781 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10782 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10783 FUNCTION_ORDINARY),
10784 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10785 else
10786 emit_insn (gen_ic_invalidate_line (tramp));
10790 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10792 static rtx
10793 sh_trampoline_adjust_address (rtx tramp)
10795 if (TARGET_SHMEDIA)
10796 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10797 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10798 return tramp;
10801 /* FIXME: This is overly conservative. A SHcompact function that
10802 receives arguments ``by reference'' will have them stored in its
10803 own stack frame, so it must not pass pointers or references to
10804 these arguments to other functions by means of sibling calls. */
10805 /* If PIC, we cannot make sibling calls to global functions
10806 because the PLT requires r12 to be live. */
10807 static bool
10808 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10810 return (1
10811 && (! TARGET_SHCOMPACT
10812 || crtl->args.info.stack_regs == 0)
10813 && ! sh_cfun_interrupt_handler_p ()
10814 && (! flag_pic
10815 || (decl && ! TREE_PUBLIC (decl))
10816 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10819 /* Machine specific built-in functions. */
10821 struct builtin_description
10823 const enum insn_code icode;
10824 const char *const name;
10825 int signature;
10826 tree fndecl;
10829 /* describe number and signedness of arguments; arg[0] == result
10830 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10831 /* 9: 64-bit pointer, 10: 32-bit pointer */
10832 static const char signature_args[][4] =
10834 #define SH_BLTIN_V2SI2 0
10835 { 4, 4 },
10836 #define SH_BLTIN_V4HI2 1
10837 { 4, 4 },
10838 #define SH_BLTIN_V2SI3 2
10839 { 4, 4, 4 },
10840 #define SH_BLTIN_V4HI3 3
10841 { 4, 4, 4 },
10842 #define SH_BLTIN_V8QI3 4
10843 { 4, 4, 4 },
10844 #define SH_BLTIN_MAC_HISI 5
10845 { 1, 4, 4, 1 },
10846 #define SH_BLTIN_SH_HI 6
10847 { 4, 4, 1 },
10848 #define SH_BLTIN_SH_SI 7
10849 { 4, 4, 1 },
10850 #define SH_BLTIN_V4HI2V2SI 8
10851 { 4, 4, 4 },
10852 #define SH_BLTIN_V4HI2V8QI 9
10853 { 4, 4, 4 },
10854 #define SH_BLTIN_SISF 10
10855 { 4, 2 },
10856 #define SH_BLTIN_LDUA_L 11
10857 { 2, 10 },
10858 #define SH_BLTIN_LDUA_Q 12
10859 { 1, 10 },
10860 #define SH_BLTIN_STUA_L 13
10861 { 0, 10, 2 },
10862 #define SH_BLTIN_STUA_Q 14
10863 { 0, 10, 1 },
10864 #define SH_BLTIN_LDUA_L64 15
10865 { 2, 9 },
10866 #define SH_BLTIN_LDUA_Q64 16
10867 { 1, 9 },
10868 #define SH_BLTIN_STUA_L64 17
10869 { 0, 9, 2 },
10870 #define SH_BLTIN_STUA_Q64 18
10871 { 0, 9, 1 },
10872 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10873 #define SH_BLTIN_2 19
10874 #define SH_BLTIN_SU 19
10875 { 1, 2 },
10876 #define SH_BLTIN_3 20
10877 #define SH_BLTIN_SUS 20
10878 { 2, 2, 1 },
10879 #define SH_BLTIN_PSSV 21
10880 { 0, 8, 2, 2 },
10881 #define SH_BLTIN_XXUU 22
10882 #define SH_BLTIN_UUUU 22
10883 { 1, 1, 1, 1 },
10884 #define SH_BLTIN_PV 23
10885 { 0, 8 },
10887 /* mcmv: operands considered unsigned. */
10888 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10889 /* mperm: control value considered unsigned int. */
10890 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10891 /* mshards_q: returns signed short. */
10892 /* nsb: takes long long arg, returns unsigned char. */
10893 static struct builtin_description bdesc[] =
10895 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10896 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10897 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10898 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10899 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10900 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10901 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10902 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10903 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10904 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10905 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10906 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10907 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10908 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10909 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10910 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10911 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10912 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10913 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10914 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10915 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10916 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10917 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10918 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10919 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10920 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10921 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10922 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10923 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10924 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10925 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10926 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10927 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10928 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10929 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10930 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10931 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10932 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10933 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10934 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10935 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10936 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10937 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10938 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10939 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10940 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10941 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10942 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10943 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10944 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10945 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10946 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10947 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10948 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10949 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10950 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10951 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10952 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10953 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10954 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10955 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10956 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10957 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10958 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10959 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10960 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10961 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10962 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10963 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10964 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10965 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10966 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10967 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10968 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10969 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10970 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10971 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10972 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10973 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10974 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10975 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10976 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10977 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10978 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10981 static void
10982 sh_media_init_builtins (void)
10984 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10985 struct builtin_description *d;
10987 memset (shared, 0, sizeof shared);
10988 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10990 tree type, arg_type = 0;
10991 int signature = d->signature;
10992 int i;
10994 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10995 type = shared[signature];
10996 else
10998 int has_result = signature_args[signature][0] != 0;
11000 if ((signature_args[signature][1] & 8)
11001 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11002 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11003 continue;
11004 if (! TARGET_FPU_ANY
11005 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11006 continue;
11007 type = void_list_node;
11008 for (i = 3; ; i--)
11010 int arg = signature_args[signature][i];
11011 int opno = i - 1 + has_result;
11013 if (arg & 8)
11014 arg_type = ptr_type_node;
11015 else if (arg)
11016 arg_type = (*lang_hooks.types.type_for_mode)
11017 (insn_data[d->icode].operand[opno].mode,
11018 (arg & 1));
11019 else if (i)
11020 continue;
11021 else
11022 arg_type = void_type_node;
11023 if (i == 0)
11024 break;
11025 type = tree_cons (NULL_TREE, arg_type, type);
11027 type = build_function_type (arg_type, type);
11028 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11029 shared[signature] = type;
11031 d->fndecl =
11032 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11033 NULL, NULL_TREE);
11037 /* Returns the shmedia builtin decl for CODE. */
11039 static tree
11040 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11042 if (code >= ARRAY_SIZE (bdesc))
11043 return error_mark_node;
11045 return bdesc[code].fndecl;
11048 /* Implements target hook vector_mode_supported_p. */
11049 bool
11050 sh_vector_mode_supported_p (enum machine_mode mode)
11052 if (TARGET_FPU_ANY
11053 && ((mode == V2SFmode)
11054 || (mode == V4SFmode)
11055 || (mode == V16SFmode)))
11056 return true;
11058 else if (TARGET_SHMEDIA
11059 && ((mode == V8QImode)
11060 || (mode == V2HImode)
11061 || (mode == V4HImode)
11062 || (mode == V2SImode)))
11063 return true;
11065 return false;
11068 bool
11069 sh_frame_pointer_required (void)
11071 /* If needed override this in other tm.h files to cope with various OS
11072 lossage requiring a frame pointer. */
11073 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11074 return true;
11076 if (crtl->profile)
11077 return true;
11079 return false;
11082 /* Implements target hook dwarf_calling_convention. Return an enum
11083 of dwarf_calling_convention. */
11085 sh_dwarf_calling_convention (const_tree func)
11087 if (sh_attr_renesas_p (func))
11088 return DW_CC_GNU_renesas_sh;
11090 return DW_CC_normal;
11093 static void
11094 sh_init_builtins (void)
11096 if (TARGET_SHMEDIA)
11097 sh_media_init_builtins ();
11100 /* Returns the sh builtin decl for CODE. */
11102 static tree
11103 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11105 if (TARGET_SHMEDIA)
11106 return sh_media_builtin_decl (code, initialize_p);
11108 return error_mark_node;
11111 /* Expand an expression EXP that calls a built-in function,
11112 with result going to TARGET if that's convenient
11113 (and in mode MODE if that's convenient).
11114 SUBTARGET may be used as the target for computing one of EXP's operands.
11115 IGNORE is nonzero if the value is to be ignored. */
11117 static rtx
11118 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11119 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11121 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11122 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11123 const struct builtin_description *d = &bdesc[fcode];
11124 enum insn_code icode = d->icode;
11125 int signature = d->signature;
11126 enum machine_mode tmode = VOIDmode;
11127 int nop = 0, i;
11128 rtx op[4];
11129 rtx pat = 0;
11131 if (signature_args[signature][0])
11133 if (ignore)
11134 return 0;
11136 tmode = insn_data[icode].operand[0].mode;
11137 if (! target
11138 || GET_MODE (target) != tmode
11139 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11140 target = gen_reg_rtx (tmode);
11141 op[nop++] = target;
11143 else
11144 target = 0;
11146 for (i = 1; i <= 3; i++, nop++)
11148 tree arg;
11149 enum machine_mode opmode, argmode;
11150 tree optype;
11152 if (! signature_args[signature][i])
11153 break;
11154 arg = CALL_EXPR_ARG (exp, i - 1);
11155 if (arg == error_mark_node)
11156 return const0_rtx;
11157 if (signature_args[signature][i] & 8)
11159 opmode = ptr_mode;
11160 optype = ptr_type_node;
11162 else
11164 opmode = insn_data[icode].operand[nop].mode;
11165 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11167 argmode = TYPE_MODE (TREE_TYPE (arg));
11168 if (argmode != opmode)
11169 arg = build1 (NOP_EXPR, optype, arg);
11170 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11171 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11172 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11175 switch (nop)
11177 case 1:
11178 pat = (*insn_data[d->icode].genfun) (op[0]);
11179 break;
11180 case 2:
11181 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11182 break;
11183 case 3:
11184 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11185 break;
11186 case 4:
11187 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11188 break;
11189 default:
11190 gcc_unreachable ();
11192 if (! pat)
11193 return 0;
11194 emit_insn (pat);
11195 return target;
11198 void
11199 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11201 rtx sel0 = const0_rtx;
11202 rtx sel1 = const1_rtx;
11203 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11204 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11206 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11207 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11210 void
11211 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11213 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11215 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11216 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11219 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11220 We can allow any mode in any general register. The special registers
11221 only allow SImode. Don't allow any mode in the PR.
11223 We cannot hold DCmode values in the XD registers because alter_reg
11224 handles subregs of them incorrectly. We could work around this by
11225 spacing the XD registers like the DR registers, but this would require
11226 additional memory in every compilation to hold larger register vectors.
11227 We could hold SFmode / SCmode values in XD registers, but that
11228 would require a tertiary reload when reloading from / to memory,
11229 and a secondary reload to reload from / to general regs; that
11230 seems to be a loosing proposition.
11232 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11233 it won't be ferried through GP registers first. */
11235 bool
11236 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11238 if (SPECIAL_REGISTER_P (regno))
11239 return mode == SImode;
11241 if (regno == FPUL_REG)
11242 return (mode == SImode || mode == SFmode);
11244 if (FP_REGISTER_P (regno) && mode == SFmode)
11245 return true;
11247 if (mode == V2SFmode)
11249 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11250 || GENERAL_REGISTER_P (regno)))
11251 return true;
11252 else
11253 return false;
11256 if (mode == V4SFmode)
11258 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11259 || GENERAL_REGISTER_P (regno))
11260 return true;
11261 else
11262 return false;
11265 if (mode == V16SFmode)
11267 if (TARGET_SHMEDIA)
11269 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11270 return true;
11271 else
11272 return false;
11274 else
11275 return regno == FIRST_XD_REG;
11278 if (FP_REGISTER_P (regno))
11280 if (mode == SFmode
11281 || mode == SImode
11282 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11283 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11284 || mode == DCmode
11285 || (TARGET_SHMEDIA
11286 && (mode == DFmode || mode == DImode
11287 || mode == V2SFmode || mode == TImode)))
11288 && ((regno - FIRST_FP_REG) & 1) == 0)
11289 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11290 && ((regno - FIRST_FP_REG) & 3) == 0))
11291 return true;
11292 else
11293 return false;
11296 if (XD_REGISTER_P (regno))
11297 return mode == DFmode;
11299 if (TARGET_REGISTER_P (regno))
11300 return (mode == DImode || mode == SImode || mode == PDImode);
11302 if (regno == PR_REG)
11303 return mode == SImode;
11305 if (regno == FPSCR_REG)
11306 return mode == PSImode;
11308 /* FIXME. This works around PR target/37633 for -O0. */
11309 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11311 unsigned int n = GET_MODE_SIZE (mode) / 8;
11313 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11314 && regno <= FIRST_GENERAL_REG + 14)
11315 return false;
11318 return true;
11321 /* Return the class of registers for which a mode change from FROM to TO
11322 is invalid. */
11323 bool
11324 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11325 enum reg_class rclass)
11327 /* We want to enable the use of SUBREGs as a means to
11328 VEC_SELECT a single element of a vector. */
11329 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11330 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11332 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11334 if (TARGET_LITTLE_ENDIAN)
11336 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11337 return reg_classes_intersect_p (DF_REGS, rclass);
11339 else
11341 if (GET_MODE_SIZE (from) < 8)
11342 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11345 return 0;
11348 /* Return true if registers in machine mode MODE will likely be
11349 allocated to registers in small register classes. */
11351 bool
11352 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11354 return (! TARGET_SHMEDIA);
11357 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11358 that label is used. */
11360 void
11361 sh_mark_label (rtx address, int nuses)
11363 if (GOTOFF_P (address))
11365 /* Extract the label or symbol. */
11366 address = XEXP (address, 0);
11367 if (GET_CODE (address) == PLUS)
11368 address = XEXP (address, 0);
11369 address = XVECEXP (address, 0, 0);
11371 if (GET_CODE (address) == LABEL_REF
11372 && LABEL_P (XEXP (address, 0)))
11373 LABEL_NUSES (XEXP (address, 0)) += nuses;
11376 /* Compute extra cost of moving data between one register class
11377 and another. */
11379 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11380 uses this information. Hence, the general register <-> floating point
11381 register information here is not used for SFmode. */
11384 sh_register_move_cost (enum machine_mode mode,
11385 enum reg_class srcclass, enum reg_class dstclass)
11387 if (dstclass == T_REGS || dstclass == PR_REGS)
11388 return 10;
11390 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11391 return 4;
11393 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11394 && REGCLASS_HAS_FP_REG (srcclass)
11395 && REGCLASS_HAS_FP_REG (dstclass))
11396 return 4;
11398 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11399 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11401 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11402 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11403 return 9;
11405 if ((REGCLASS_HAS_FP_REG (dstclass)
11406 && REGCLASS_HAS_GENERAL_REG (srcclass))
11407 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11408 && REGCLASS_HAS_FP_REG (srcclass)))
11409 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11410 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11412 if ((dstclass == FPUL_REGS
11413 && REGCLASS_HAS_GENERAL_REG (srcclass))
11414 || (srcclass == FPUL_REGS
11415 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11416 return 5;
11418 if ((dstclass == FPUL_REGS
11419 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11420 || (srcclass == FPUL_REGS
11421 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11422 return 7;
11424 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11425 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11426 return 20;
11428 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11429 if (TARGET_SHMEDIA
11430 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11432 if (sh_gettrcost >= 0)
11433 return sh_gettrcost;
11434 else if (!TARGET_PT_FIXED)
11435 return 100;
11438 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11439 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11440 return 4;
11442 if (TARGET_SHMEDIA
11443 || (TARGET_FMOVD
11444 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11445 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11446 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11448 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11451 static rtx emit_load_ptr (rtx, rtx);
11453 static rtx
11454 emit_load_ptr (rtx reg, rtx addr)
11456 rtx mem = gen_const_mem (ptr_mode, addr);
11458 if (Pmode != ptr_mode)
11459 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11460 return emit_move_insn (reg, mem);
11463 static void
11464 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11465 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11466 tree function)
11468 CUMULATIVE_ARGS cum;
11469 int structure_value_byref = 0;
11470 rtx this_rtx, this_value, sibcall, insns, funexp;
11471 tree funtype = TREE_TYPE (function);
11472 int simple_add = CONST_OK_FOR_ADD (delta);
11473 int did_load = 0;
11474 rtx scratch0, scratch1, scratch2;
11475 unsigned i;
11477 reload_completed = 1;
11478 epilogue_completed = 1;
11479 current_function_uses_only_leaf_regs = 1;
11481 emit_note (NOTE_INSN_PROLOGUE_END);
11483 /* Find the "this" pointer. We have such a wide range of ABIs for the
11484 SH that it's best to do this completely machine independently.
11485 "this" is passed as first argument, unless a structure return pointer
11486 comes first, in which case "this" comes second. */
11487 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11488 #ifndef PCC_STATIC_STRUCT_RETURN
11489 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11490 structure_value_byref = 1;
11491 #endif /* not PCC_STATIC_STRUCT_RETURN */
11492 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11494 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11496 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11498 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11500 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11501 static chain pointer (even if you can't have nested virtual functions
11502 right now, someone might implement them sometime), and the rest of the
11503 registers are used for argument passing, are callee-saved, or reserved. */
11504 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11505 -ffixed-reg has been used. */
11506 if (! call_used_regs[0] || fixed_regs[0])
11507 error ("r0 needs to be available as a call-clobbered register");
11508 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11509 if (! TARGET_SH5)
11511 if (call_used_regs[1] && ! fixed_regs[1])
11512 scratch1 = gen_rtx_REG (ptr_mode, 1);
11513 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11514 pointing where to return struct values. */
11515 if (call_used_regs[3] && ! fixed_regs[3])
11516 scratch2 = gen_rtx_REG (Pmode, 3);
11518 else if (TARGET_SHMEDIA)
11520 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11521 if (i != REGNO (scratch0) &&
11522 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11524 scratch1 = gen_rtx_REG (ptr_mode, i);
11525 break;
11527 if (scratch1 == scratch0)
11528 error ("Need a second call-clobbered general purpose register");
11529 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11530 if (call_used_regs[i] && ! fixed_regs[i])
11532 scratch2 = gen_rtx_REG (Pmode, i);
11533 break;
11535 if (scratch2 == scratch0)
11536 error ("Need a call-clobbered target register");
11539 this_value = plus_constant (this_rtx, delta);
11540 if (vcall_offset
11541 && (simple_add || scratch0 != scratch1)
11542 && strict_memory_address_p (ptr_mode, this_value))
11544 emit_load_ptr (scratch0, this_value);
11545 did_load = 1;
11548 if (!delta)
11549 ; /* Do nothing. */
11550 else if (simple_add)
11551 emit_move_insn (this_rtx, this_value);
11552 else
11554 emit_move_insn (scratch1, GEN_INT (delta));
11555 emit_insn (gen_add2_insn (this_rtx, scratch1));
11558 if (vcall_offset)
11560 rtx offset_addr;
11562 if (!did_load)
11563 emit_load_ptr (scratch0, this_rtx);
11565 offset_addr = plus_constant (scratch0, vcall_offset);
11566 if (strict_memory_address_p (ptr_mode, offset_addr))
11567 ; /* Do nothing. */
11568 else if (! TARGET_SH5 && scratch0 != scratch1)
11570 /* scratch0 != scratch1, and we have indexed loads. Get better
11571 schedule by loading the offset into r1 and using an indexed
11572 load - then the load of r1 can issue before the load from
11573 (this_rtx + delta) finishes. */
11574 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11575 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11577 else if (CONST_OK_FOR_ADD (vcall_offset))
11579 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11580 offset_addr = scratch0;
11582 else if (scratch0 != scratch1)
11584 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11585 emit_insn (gen_add2_insn (scratch0, scratch1));
11586 offset_addr = scratch0;
11588 else
11589 gcc_unreachable (); /* FIXME */
11590 emit_load_ptr (scratch0, offset_addr);
11592 if (Pmode != ptr_mode)
11593 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11594 emit_insn (gen_add2_insn (this_rtx, scratch0));
11597 /* Generate a tail call to the target function. */
11598 if (! TREE_USED (function))
11600 assemble_external (function);
11601 TREE_USED (function) = 1;
11603 funexp = XEXP (DECL_RTL (function), 0);
11604 /* If the function is overridden, so is the thunk, hence we don't
11605 need GOT addressing even if this is a public symbol. */
11606 #if 0
11607 if (TARGET_SH1 && ! flag_weak)
11608 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11609 else
11610 #endif
11611 if (TARGET_SH2 && flag_pic)
11613 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11614 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11616 else
11618 if (TARGET_SHMEDIA && flag_pic)
11620 funexp = gen_sym2PIC (funexp);
11621 PUT_MODE (funexp, Pmode);
11623 emit_move_insn (scratch2, funexp);
11624 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11625 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11627 sibcall = emit_call_insn (sibcall);
11628 SIBLING_CALL_P (sibcall) = 1;
11629 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11630 emit_barrier ();
11632 /* Run just enough of rest_of_compilation to do scheduling and get
11633 the insns emitted. Note that use_thunk calls
11634 assemble_start_function and assemble_end_function. */
11636 insn_locators_alloc ();
11637 insns = get_insns ();
11639 if (optimize > 0)
11641 if (! cfun->cfg)
11642 init_flow (cfun);
11643 split_all_insns_noflow ();
11646 sh_reorg ();
11648 if (optimize > 0 && flag_delayed_branch)
11649 dbr_schedule (insns);
11651 shorten_branches (insns);
11652 final_start_function (insns, file, 1);
11653 final (insns, file, 1);
11654 final_end_function ();
11656 reload_completed = 0;
11657 epilogue_completed = 0;
11661 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11663 rtx sym;
11665 /* If this is not an ordinary function, the name usually comes from a
11666 string literal or an sprintf buffer. Make sure we use the same
11667 string consistently, so that cse will be able to unify address loads. */
11668 if (kind != FUNCTION_ORDINARY)
11669 name = IDENTIFIER_POINTER (get_identifier (name));
11670 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11671 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11672 if (flag_pic)
11673 switch (kind)
11675 case FUNCTION_ORDINARY:
11676 break;
11677 case SFUNC_GOT:
11679 rtx reg = target ? target : gen_reg_rtx (Pmode);
11681 emit_insn (gen_symGOT2reg (reg, sym));
11682 sym = reg;
11683 break;
11685 case SFUNC_STATIC:
11687 /* ??? To allow cse to work, we use GOTOFF relocations.
11688 we could add combiner patterns to transform this into
11689 straight pc-relative calls with sym2PIC / bsrf when
11690 label load and function call are still 1:1 and in the
11691 same basic block during combine. */
11692 rtx reg = target ? target : gen_reg_rtx (Pmode);
11694 emit_insn (gen_symGOTOFF2reg (reg, sym));
11695 sym = reg;
11696 break;
11699 if (target && sym != target)
11701 emit_move_insn (target, sym);
11702 return target;
11704 return sym;
11707 /* Find the number of a general purpose register in S. */
11708 static int
11709 scavenge_reg (HARD_REG_SET *s)
11711 int r;
11712 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11713 if (TEST_HARD_REG_BIT (*s, r))
11714 return r;
11715 return -1;
11719 sh_get_pr_initial_val (void)
11721 rtx val;
11723 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11724 PR register on SHcompact, because it might be clobbered by the prologue.
11725 We check first if that is known to be the case. */
11726 if (TARGET_SHCOMPACT
11727 && ((crtl->args.info.call_cookie
11728 & ~ CALL_COOKIE_RET_TRAMP (1))
11729 || crtl->saves_all_registers))
11730 return gen_frame_mem (SImode, return_address_pointer_rtx);
11732 /* If we haven't finished rtl generation, there might be a nonlocal label
11733 that we haven't seen yet.
11734 ??? get_hard_reg_initial_val fails if it is called after register
11735 allocation has started, unless it has been called before for the
11736 same register. And even then, we end in trouble if we didn't use
11737 the register in the same basic block before. So call
11738 get_hard_reg_initial_val now and wrap it in an unspec if we might
11739 need to replace it. */
11740 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11741 combine can put the pseudo returned by get_hard_reg_initial_val into
11742 instructions that need a general purpose registers, which will fail to
11743 be recognized when the pseudo becomes allocated to PR. */
11745 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11746 if (TARGET_SH1)
11747 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11748 return val;
11752 sh_expand_t_scc (rtx operands[])
11754 enum rtx_code code = GET_CODE (operands[1]);
11755 rtx target = operands[0];
11756 rtx op0 = operands[2];
11757 rtx op1 = operands[3];
11758 rtx result = target;
11759 HOST_WIDE_INT val;
11761 if (!REG_P (op0) || REGNO (op0) != T_REG
11762 || !CONST_INT_P (op1))
11763 return 0;
11764 if (!REG_P (result))
11765 result = gen_reg_rtx (SImode);
11766 val = INTVAL (op1);
11767 if ((code == EQ && val == 1) || (code == NE && val == 0))
11768 emit_insn (gen_movt (result));
11769 else if (TARGET_SH2A && ((code == EQ && val == 0)
11770 || (code == NE && val == 1)))
11771 emit_insn (gen_xorsi3_movrt (result));
11772 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11774 emit_clobber (result);
11775 emit_insn (gen_subc (result, result, result));
11776 emit_insn (gen_addsi3 (result, result, const1_rtx));
11778 else if (code == EQ || code == NE)
11779 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11780 else
11781 return 0;
11782 if (result != target)
11783 emit_move_insn (target, result);
11784 return 1;
11787 /* INSN is an sfunc; return the rtx that describes the address used. */
11788 static rtx
11789 extract_sfunc_addr (rtx insn)
11791 rtx pattern, part = NULL_RTX;
11792 int len, i;
11794 pattern = PATTERN (insn);
11795 len = XVECLEN (pattern, 0);
11796 for (i = 0; i < len; i++)
11798 part = XVECEXP (pattern, 0, i);
11799 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11800 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11801 return XEXP (part, 0);
11803 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11804 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11807 /* Verify that the register in use_sfunc_addr still agrees with the address
11808 used in the sfunc. This prevents fill_slots_from_thread from changing
11809 use_sfunc_addr.
11810 INSN is the use_sfunc_addr instruction, and REG is the register it
11811 guards. */
11813 check_use_sfunc_addr (rtx insn, rtx reg)
11815 /* Search for the sfunc. It should really come right after INSN. */
11816 while ((insn = NEXT_INSN (insn)))
11818 if (LABEL_P (insn) || JUMP_P (insn))
11819 break;
11820 if (! INSN_P (insn))
11821 continue;
11823 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11824 insn = XVECEXP (PATTERN (insn), 0, 0);
11825 if (GET_CODE (PATTERN (insn)) != PARALLEL
11826 || get_attr_type (insn) != TYPE_SFUNC)
11827 continue;
11828 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11830 gcc_unreachable ();
11833 /* This function returns a constant rtx that represents pi / 2**15 in
11834 SFmode. it's used to scale SFmode angles, in radians, to a
11835 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11836 maps to 0x10000). */
11838 static GTY(()) rtx sh_fsca_sf2int_rtx;
11841 sh_fsca_sf2int (void)
11843 if (! sh_fsca_sf2int_rtx)
11845 REAL_VALUE_TYPE rv;
11847 real_from_string (&rv, "10430.378350470453");
11848 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11851 return sh_fsca_sf2int_rtx;
11854 /* This function returns a constant rtx that represents pi / 2**15 in
11855 DFmode. it's used to scale DFmode angles, in radians, to a
11856 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11857 maps to 0x10000). */
11859 static GTY(()) rtx sh_fsca_df2int_rtx;
11862 sh_fsca_df2int (void)
11864 if (! sh_fsca_df2int_rtx)
11866 REAL_VALUE_TYPE rv;
11868 real_from_string (&rv, "10430.378350470453");
11869 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11872 return sh_fsca_df2int_rtx;
11875 /* This function returns a constant rtx that represents 2**15 / pi in
11876 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11877 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11878 2*pi). */
11880 static GTY(()) rtx sh_fsca_int2sf_rtx;
11883 sh_fsca_int2sf (void)
11885 if (! sh_fsca_int2sf_rtx)
11887 REAL_VALUE_TYPE rv;
11889 real_from_string (&rv, "9.587379924285257e-5");
11890 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11893 return sh_fsca_int2sf_rtx;
11896 /* Initialize the CUMULATIVE_ARGS structure. */
11898 void
11899 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11900 tree fntype,
11901 rtx libname ATTRIBUTE_UNUSED,
11902 tree fndecl,
11903 signed int n_named_args,
11904 enum machine_mode mode)
11906 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11907 pcum->free_single_fp_reg = 0;
11908 pcum->stack_regs = 0;
11909 pcum->byref_regs = 0;
11910 pcum->byref = 0;
11911 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11913 /* XXX - Should we check TARGET_HITACHI here ??? */
11914 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11916 if (fntype)
11918 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11919 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11920 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11921 pcum->arg_count [(int) SH_ARG_INT]
11922 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11924 pcum->call_cookie
11925 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11926 && pcum->arg_count [(int) SH_ARG_INT] == 0
11927 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11928 ? int_size_in_bytes (TREE_TYPE (fntype))
11929 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11930 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11931 == FIRST_RET_REG));
11933 else
11935 pcum->arg_count [(int) SH_ARG_INT] = 0;
11936 pcum->prototype_p = FALSE;
11937 if (mode != VOIDmode)
11939 pcum->call_cookie =
11940 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11941 && GET_MODE_SIZE (mode) > 4
11942 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11944 /* If the default ABI is the Renesas ABI then all library
11945 calls must assume that the library will be using the
11946 Renesas ABI. So if the function would return its result
11947 in memory then we must force the address of this memory
11948 block onto the stack. Ideally we would like to call
11949 targetm.calls.return_in_memory() here but we do not have
11950 the TYPE or the FNDECL available so we synthesize the
11951 contents of that function as best we can. */
11952 pcum->force_mem =
11953 (TARGET_DEFAULT & MASK_HITACHI)
11954 && (mode == BLKmode
11955 || (GET_MODE_SIZE (mode) > 4
11956 && !(mode == DFmode
11957 && TARGET_FPU_DOUBLE)));
11959 else
11961 pcum->call_cookie = 0;
11962 pcum->force_mem = FALSE;
11967 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11968 not enter into CONST_DOUBLE for the replace.
11970 Note that copying is not done so X must not be shared unless all copies
11971 are to be modified.
11973 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11974 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11975 replacements[n*2+1] - and that we take mode changes into account.
11977 If a replacement is ambiguous, return NULL_RTX.
11979 If MODIFY is zero, don't modify any rtl in place,
11980 just return zero or nonzero for failure / success. */
11983 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11985 int i, j;
11986 const char *fmt;
11988 /* The following prevents loops occurrence when we change MEM in
11989 CONST_DOUBLE onto the same CONST_DOUBLE. */
11990 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11991 return x;
11993 for (i = n_replacements - 1; i >= 0 ; i--)
11994 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11995 return replacements[i*2+1];
11997 /* Allow this function to make replacements in EXPR_LISTs. */
11998 if (x == 0)
11999 return 0;
12001 if (GET_CODE (x) == SUBREG)
12003 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12004 n_replacements, modify);
12006 if (CONST_INT_P (new_rtx))
12008 x = simplify_subreg (GET_MODE (x), new_rtx,
12009 GET_MODE (SUBREG_REG (x)),
12010 SUBREG_BYTE (x));
12011 if (! x)
12012 abort ();
12014 else if (modify)
12015 SUBREG_REG (x) = new_rtx;
12017 return x;
12019 else if (REG_P (x))
12021 unsigned regno = REGNO (x);
12022 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12023 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12024 rtx result = NULL_RTX;
12026 for (i = n_replacements - 1; i >= 0; i--)
12028 rtx from = replacements[i*2];
12029 rtx to = replacements[i*2+1];
12030 unsigned from_regno, from_nregs, to_regno, new_regno;
12032 if (!REG_P (from))
12033 continue;
12034 from_regno = REGNO (from);
12035 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12036 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12037 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12039 if (regno < from_regno
12040 || regno + nregs > from_regno + nregs
12041 || !REG_P (to)
12042 || result)
12043 return NULL_RTX;
12044 to_regno = REGNO (to);
12045 if (to_regno < FIRST_PSEUDO_REGISTER)
12047 new_regno = regno + to_regno - from_regno;
12048 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12049 != nregs)
12050 return NULL_RTX;
12051 result = gen_rtx_REG (GET_MODE (x), new_regno);
12053 else if (GET_MODE (x) <= GET_MODE (to))
12054 result = gen_lowpart_common (GET_MODE (x), to);
12055 else
12056 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12059 return result ? result : x;
12061 else if (GET_CODE (x) == ZERO_EXTEND)
12063 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12064 n_replacements, modify);
12066 if (CONST_INT_P (new_rtx))
12068 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12069 new_rtx, GET_MODE (XEXP (x, 0)));
12070 if (! x)
12071 abort ();
12073 else if (modify)
12074 XEXP (x, 0) = new_rtx;
12076 return x;
12079 fmt = GET_RTX_FORMAT (GET_CODE (x));
12080 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12082 rtx new_rtx;
12084 if (fmt[i] == 'e')
12086 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12087 n_replacements, modify);
12088 if (!new_rtx)
12089 return NULL_RTX;
12090 if (modify)
12091 XEXP (x, i) = new_rtx;
12093 else if (fmt[i] == 'E')
12094 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12096 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12097 n_replacements, modify);
12098 if (!new_rtx)
12099 return NULL_RTX;
12100 if (modify)
12101 XVECEXP (x, i, j) = new_rtx;
12105 return x;
12109 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12111 enum rtx_code code = TRUNCATE;
12113 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12115 rtx inner = XEXP (x, 0);
12116 enum machine_mode inner_mode = GET_MODE (inner);
12118 if (inner_mode == mode)
12119 return inner;
12120 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12121 x = inner;
12122 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12123 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12125 code = GET_CODE (x);
12126 x = inner;
12129 return gen_rtx_fmt_e (code, mode, x);
12132 /* called via for_each_rtx after reload, to clean up truncates of
12133 registers that span multiple actual hard registers. */
12135 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12137 rtx x = *p, reg;
12139 if (GET_CODE (x) != TRUNCATE)
12140 return 0;
12141 reg = XEXP (x, 0);
12142 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12144 enum machine_mode reg_mode = GET_MODE (reg);
12145 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12146 subreg_lowpart_offset (DImode, reg_mode));
12147 *(int*) n_changes += 1;
12148 return -1;
12150 return 0;
12153 /* Load and store depend on the highpart of the address. However,
12154 set_attr_alternative does not give well-defined results before reload,
12155 so we must look at the rtl ourselves to see if any of the feeding
12156 registers is used in a memref. */
12158 /* Called by sh_contains_memref_p via for_each_rtx. */
12159 static int
12160 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12162 return (MEM_P (*loc));
12165 /* Return nonzero iff INSN contains a MEM. */
12167 sh_contains_memref_p (rtx insn)
12169 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12172 /* Return nonzero iff INSN loads a banked register. */
12174 sh_loads_bankedreg_p (rtx insn)
12176 if (GET_CODE (PATTERN (insn)) == SET)
12178 rtx op = SET_DEST (PATTERN(insn));
12179 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12180 return 1;
12183 return 0;
12186 /* FNADDR is the MEM expression from a call expander. Return an address
12187 to use in an SHmedia insn pattern. */
12189 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12191 int is_sym;
12193 fnaddr = XEXP (fnaddr, 0);
12194 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12195 if (flag_pic && is_sym)
12197 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12199 rtx reg = gen_reg_rtx (Pmode);
12201 /* We must not use GOTPLT for sibcalls, because PIC_REG
12202 must be restored before the PLT code gets to run. */
12203 if (is_sibcall)
12204 emit_insn (gen_symGOT2reg (reg, fnaddr));
12205 else
12206 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12207 fnaddr = reg;
12209 else
12211 fnaddr = gen_sym2PIC (fnaddr);
12212 PUT_MODE (fnaddr, Pmode);
12215 /* If ptabs might trap, make this visible to the rest of the compiler.
12216 We generally assume that symbols pertain to valid locations, but
12217 it is possible to generate invalid symbols with asm or linker tricks.
12218 In a list of functions where each returns its successor, an invalid
12219 symbol might denote an empty list. */
12220 if (!TARGET_PT_FIXED
12221 && (!is_sym || TARGET_INVALID_SYMBOLS)
12222 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12224 rtx tr = gen_reg_rtx (PDImode);
12226 emit_insn (gen_ptabs (tr, fnaddr));
12227 fnaddr = tr;
12229 else if (! target_reg_operand (fnaddr, Pmode))
12230 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12231 return fnaddr;
12234 reg_class_t
12235 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12236 enum machine_mode mode, secondary_reload_info *sri)
12238 enum reg_class rclass = (enum reg_class) rclass_i;
12240 if (in_p)
12242 if (REGCLASS_HAS_FP_REG (rclass)
12243 && ! TARGET_SHMEDIA
12244 && immediate_operand ((x), mode)
12245 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12246 && mode == SFmode && fldi_ok ()))
12247 switch (mode)
12249 case SFmode:
12250 sri->icode = CODE_FOR_reload_insf__frn;
12251 return NO_REGS;
12252 case DFmode:
12253 sri->icode = CODE_FOR_reload_indf__frn;
12254 return NO_REGS;
12255 case SImode:
12256 /* ??? If we knew that we are in the appropriate mode -
12257 single precision - we could use a reload pattern directly. */
12258 return FPUL_REGS;
12259 default:
12260 abort ();
12262 if (rclass == FPUL_REGS
12263 && ((REG_P (x)
12264 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12265 || REGNO (x) == T_REG))
12266 || GET_CODE (x) == PLUS))
12267 return GENERAL_REGS;
12268 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12270 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12271 return GENERAL_REGS;
12272 else if (mode == SFmode)
12273 return FP_REGS;
12274 sri->icode = CODE_FOR_reload_insi__i_fpul;
12275 return NO_REGS;
12277 if (rclass == FPSCR_REGS
12278 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12279 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12280 return GENERAL_REGS;
12281 if (REGCLASS_HAS_FP_REG (rclass)
12282 && TARGET_SHMEDIA
12283 && immediate_operand (x, mode)
12284 && x != CONST0_RTX (GET_MODE (x))
12285 && GET_MODE (x) != V4SFmode)
12286 return GENERAL_REGS;
12287 if ((mode == QImode || mode == HImode)
12288 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12290 sri->icode = ((mode == QImode)
12291 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12292 return NO_REGS;
12294 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12295 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12296 return TARGET_REGS;
12297 } /* end of input-only processing. */
12299 if (((REGCLASS_HAS_FP_REG (rclass)
12300 && (REG_P (x)
12301 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12302 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12303 && TARGET_FMOVD))))
12304 || (REGCLASS_HAS_GENERAL_REG (rclass)
12305 && REG_P (x)
12306 && FP_REGISTER_P (REGNO (x))))
12307 && ! TARGET_SHMEDIA
12308 && (mode == SFmode || mode == SImode))
12309 return FPUL_REGS;
12310 if ((rclass == FPUL_REGS
12311 || (REGCLASS_HAS_FP_REG (rclass)
12312 && ! TARGET_SHMEDIA && mode == SImode))
12313 && (MEM_P (x)
12314 || (REG_P (x)
12315 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12316 || REGNO (x) == T_REG
12317 || system_reg_operand (x, VOIDmode)))))
12319 if (rclass == FPUL_REGS)
12320 return GENERAL_REGS;
12321 return FPUL_REGS;
12323 if ((rclass == TARGET_REGS
12324 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12325 && !satisfies_constraint_Csy (x)
12326 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12327 return GENERAL_REGS;
12328 if ((rclass == MAC_REGS || rclass == PR_REGS)
12329 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12330 && rclass != REGNO_REG_CLASS (REGNO (x)))
12331 return GENERAL_REGS;
12332 if (rclass != GENERAL_REGS && REG_P (x)
12333 && TARGET_REGISTER_P (REGNO (x)))
12334 return GENERAL_REGS;
12335 return NO_REGS;
12338 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12340 #include "gt-sh.h"