* config/sh/sh-protos.h (sh_function_arg): Delete.
[official-gcc.git] / gcc / config / sh / sh.c
blob9eebf14ca7887b4e6fb5f743b38ea2c8c9d127a7
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "toplev.h"
42 #include "recog.h"
43 #include "integrate.h"
44 #include "dwarf2.h"
45 #include "tm_p.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "df.h"
51 #include "cfglayout.h"
52 #include "intl.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "ggc.h"
56 #include "gimple.h"
57 #include "cfgloop.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
75 they may be. */
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
115 reg number. */
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static bool sh_frame_pointer_required (void);
193 static rtx mark_constant_pool_use (rtx);
194 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_resbank_handler_attribute (tree *, tree,
196 tree, int, bool *);
197 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
198 tree, int, bool *);
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_print_operand (FILE *, rtx, int);
203 static void sh_print_operand_address (FILE *, rtx);
204 static bool sh_print_operand_punct_valid_p (unsigned char code);
205 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
206 static void sh_insert_attributes (tree, tree *);
207 static const char *sh_check_pch_target_flags (int);
208 static int sh_adjust_cost (rtx, rtx, rtx, int);
209 static int sh_issue_rate (void);
210 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
211 static short find_set_regmode_weight (rtx, enum machine_mode);
212 static short find_insn_regmode_weight (rtx, enum machine_mode);
213 static void find_regmode_weight (basic_block, enum machine_mode);
214 static int find_r0_life_regions (basic_block);
215 static void sh_md_init_global (FILE *, int, int);
216 static void sh_md_finish_global (FILE *, int);
217 static int rank_for_reorder (const void *, const void *);
218 static void swap_reorder (rtx *, int);
219 static void ready_reorder (rtx *, int);
220 static short high_pressure (enum machine_mode);
221 static int sh_reorder (FILE *, int, rtx *, int *, int);
222 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
223 static void sh_md_init (FILE *, int, int);
224 static int sh_variable_issue (FILE *, int, rtx, int);
226 static bool sh_function_ok_for_sibcall (tree, tree);
228 static bool sh_cannot_modify_jumps_p (void);
229 static reg_class_t sh_target_reg_class (void);
230 static bool sh_optimize_target_register_callee_saved (bool);
231 static bool sh_ms_bitfield_layout_p (const_tree);
233 static void sh_init_builtins (void);
234 static tree sh_builtin_decl (unsigned, bool);
235 static void sh_media_init_builtins (void);
236 static tree sh_media_builtin_decl (unsigned, bool);
237 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
238 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
239 static void sh_file_start (void);
240 static int flow_dependent_p (rtx, rtx);
241 static void flow_dependent_p_1 (rtx, const_rtx, void *);
242 static int shiftcosts (rtx);
243 static int andcosts (rtx);
244 static int addsubcosts (rtx);
245 static int multcosts (rtx);
246 static bool unspec_caller_rtx_p (rtx);
247 static bool sh_cannot_copy_insn_p (rtx);
248 static bool sh_rtx_costs (rtx, int, int, int *, bool);
249 static int sh_address_cost (rtx, bool);
250 static int sh_pr_n_sets (void);
251 static rtx sh_allocate_initial_value (rtx);
252 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
253 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
254 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
255 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
256 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
257 static int scavenge_reg (HARD_REG_SET *s);
258 struct save_schedule_s;
259 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
260 struct save_schedule_s *, int);
262 static rtx sh_struct_value_rtx (tree, int);
263 static rtx sh_function_value (const_tree, const_tree, bool);
264 static rtx sh_libcall_value (enum machine_mode, const_rtx);
265 static bool sh_return_in_memory (const_tree, const_tree);
266 static rtx sh_builtin_saveregs (void);
267 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
268 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
269 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
270 static tree sh_build_builtin_va_list (void);
271 static void sh_va_start (tree, rtx);
272 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
273 static bool sh_promote_prototypes (const_tree);
274 static enum machine_mode sh_promote_function_mode (const_tree type,
275 enum machine_mode,
276 int *punsignedp,
277 const_tree funtype,
278 int for_return);
279 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
280 const_tree, bool);
281 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
282 const_tree, bool);
283 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
284 tree, bool);
285 static void sh_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
286 const_tree, bool);
287 static rtx sh_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
288 const_tree, bool);
289 static bool sh_scalar_mode_supported_p (enum machine_mode);
290 static int sh_dwarf_calling_convention (const_tree);
291 static void sh_encode_section_info (tree, rtx, int);
292 static int sh2a_function_vector_p (tree);
293 static void sh_trampoline_init (rtx, tree, rtx);
294 static rtx sh_trampoline_adjust_address (rtx);
296 static const struct attribute_spec sh_attribute_table[] =
298 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
299 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
300 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
301 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
302 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
303 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
304 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
305 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
306 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
307 #ifdef SYMBIAN
308 /* Symbian support adds three new attributes:
309 dllexport - for exporting a function/variable that will live in a dll
310 dllimport - for importing a function/variable from a dll
312 Microsoft allows multiple declspecs in one __declspec, separating
313 them with spaces. We do NOT support this. Instead, use __declspec
314 multiple times. */
315 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
316 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
317 #endif
318 { NULL, 0, 0, false, false, false, NULL }
321 /* Initialize the GCC target structure. */
322 #undef TARGET_ATTRIBUTE_TABLE
323 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
325 /* The next two are used for debug info when compiling with -gdwarf. */
326 #undef TARGET_ASM_UNALIGNED_HI_OP
327 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
328 #undef TARGET_ASM_UNALIGNED_SI_OP
329 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
331 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
332 #undef TARGET_ASM_UNALIGNED_DI_OP
333 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
334 #undef TARGET_ASM_ALIGNED_DI_OP
335 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
337 #undef TARGET_PRINT_OPERAND
338 #define TARGET_PRINT_OPERAND sh_print_operand
339 #undef TARGET_PRINT_OPERAND_ADDRESS
340 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
341 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
342 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
344 #undef TARGET_ASM_FUNCTION_EPILOGUE
345 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
347 #undef TARGET_ASM_OUTPUT_MI_THUNK
348 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
350 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
351 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
353 #undef TARGET_ASM_FILE_START
354 #define TARGET_ASM_FILE_START sh_file_start
355 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
356 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
358 #undef TARGET_DEFAULT_TARGET_FLAGS
359 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
360 #undef TARGET_HANDLE_OPTION
361 #define TARGET_HANDLE_OPTION sh_handle_option
363 #undef TARGET_INSERT_ATTRIBUTES
364 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
369 #undef TARGET_SCHED_ISSUE_RATE
370 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
372 /* The next 5 hooks have been implemented for reenabling sched1. With the
373 help of these macros we are limiting the movement of insns in sched1 to
374 reduce the register pressure. The overall idea is to keep count of SImode
375 and SFmode regs required by already scheduled insns. When these counts
376 cross some threshold values; give priority to insns that free registers.
377 The insn that frees registers is most likely to be the insn with lowest
378 LUID (original insn order); but such an insn might be there in the stalled
379 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
380 upto a max of 8 cycles so that such insns may move from Q -> R.
382 The description of the hooks are as below:
384 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
385 scheduler; it is called inside the sched_init function just after
386 find_insn_reg_weights function call. It is used to calculate the SImode
387 and SFmode weights of insns of basic blocks; much similar to what
388 find_insn_reg_weights does.
389 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
391 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
392 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
393 (Q)->(R).
395 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
396 high; reorder the ready queue so that the insn with lowest LUID will be
397 issued next.
399 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
400 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
402 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
403 can be returned from TARGET_SCHED_REORDER2.
405 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
407 #undef TARGET_SCHED_DFA_NEW_CYCLE
408 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
410 #undef TARGET_SCHED_INIT_GLOBAL
411 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
413 #undef TARGET_SCHED_FINISH_GLOBAL
414 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
416 #undef TARGET_SCHED_VARIABLE_ISSUE
417 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
419 #undef TARGET_SCHED_REORDER
420 #define TARGET_SCHED_REORDER sh_reorder
422 #undef TARGET_SCHED_REORDER2
423 #define TARGET_SCHED_REORDER2 sh_reorder2
425 #undef TARGET_SCHED_INIT
426 #define TARGET_SCHED_INIT sh_md_init
428 #undef TARGET_LEGITIMIZE_ADDRESS
429 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
431 #undef TARGET_CANNOT_MODIFY_JUMPS_P
432 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
433 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
434 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
435 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
436 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
437 sh_optimize_target_register_callee_saved
439 #undef TARGET_MS_BITFIELD_LAYOUT_P
440 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
442 #undef TARGET_INIT_BUILTINS
443 #define TARGET_INIT_BUILTINS sh_init_builtins
444 #undef TARGET_BUILTIN_DECL
445 #define TARGET_BUILTIN_DECL sh_builtin_decl
446 #undef TARGET_EXPAND_BUILTIN
447 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
449 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
450 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
452 #undef TARGET_CANNOT_COPY_INSN_P
453 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
454 #undef TARGET_RTX_COSTS
455 #define TARGET_RTX_COSTS sh_rtx_costs
456 #undef TARGET_ADDRESS_COST
457 #define TARGET_ADDRESS_COST sh_address_cost
458 #undef TARGET_ALLOCATE_INITIAL_VALUE
459 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
461 #undef TARGET_MACHINE_DEPENDENT_REORG
462 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
464 #undef TARGET_DWARF_REGISTER_SPAN
465 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
467 #ifdef HAVE_AS_TLS
468 #undef TARGET_HAVE_TLS
469 #define TARGET_HAVE_TLS true
470 #endif
472 #undef TARGET_PROMOTE_PROTOTYPES
473 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
474 #undef TARGET_PROMOTE_FUNCTION_MODE
475 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
477 #undef TARGET_FUNCTION_VALUE
478 #define TARGET_FUNCTION_VALUE sh_function_value
479 #undef TARGET_LIBCALL_VALUE
480 #define TARGET_LIBCALL_VALUE sh_libcall_value
481 #undef TARGET_STRUCT_VALUE_RTX
482 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
483 #undef TARGET_RETURN_IN_MEMORY
484 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
486 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
487 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
488 #undef TARGET_SETUP_INCOMING_VARARGS
489 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
490 #undef TARGET_STRICT_ARGUMENT_NAMING
491 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
492 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
493 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
494 #undef TARGET_MUST_PASS_IN_STACK
495 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
496 #undef TARGET_PASS_BY_REFERENCE
497 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
498 #undef TARGET_CALLEE_COPIES
499 #define TARGET_CALLEE_COPIES sh_callee_copies
500 #undef TARGET_ARG_PARTIAL_BYTES
501 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
502 #undef TARGET_FUNCTION_ARG
503 #define TARGET_FUNCTION_ARG sh_function_arg
504 #undef TARGET_FUNCTION_ARG_ADVANCE
505 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
507 #undef TARGET_BUILD_BUILTIN_VA_LIST
508 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
509 #undef TARGET_EXPAND_BUILTIN_VA_START
510 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
511 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
512 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
514 #undef TARGET_SCALAR_MODE_SUPPORTED_P
515 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
519 #undef TARGET_CHECK_PCH_TARGET_FLAGS
520 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
522 #undef TARGET_DWARF_CALLING_CONVENTION
523 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
525 #undef TARGET_FRAME_POINTER_REQUIRED
526 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
528 /* Return regmode weight for insn. */
529 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
531 /* Return current register pressure for regmode. */
532 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
534 #undef TARGET_ENCODE_SECTION_INFO
535 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
537 #ifdef SYMBIAN
539 #undef TARGET_ENCODE_SECTION_INFO
540 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
541 #undef TARGET_STRIP_NAME_ENCODING
542 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
543 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
544 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
546 #endif /* SYMBIAN */
548 #undef TARGET_SECONDARY_RELOAD
549 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
551 #undef TARGET_LEGITIMATE_ADDRESS_P
552 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
554 #undef TARGET_TRAMPOLINE_INIT
555 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
559 /* Machine-specific symbol_ref flags. */
560 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
562 struct gcc_target targetm = TARGET_INITIALIZER;
564 /* Implement TARGET_HANDLE_OPTION. */
566 static bool
567 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
568 int value ATTRIBUTE_UNUSED)
570 switch (code)
572 case OPT_m1:
573 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
574 return true;
576 case OPT_m2:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
578 return true;
580 case OPT_m2a:
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
582 return true;
584 case OPT_m2a_nofpu:
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
586 return true;
588 case OPT_m2a_single:
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
590 return true;
592 case OPT_m2a_single_only:
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
594 return true;
596 case OPT_m2e:
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
598 return true;
600 case OPT_m3:
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
602 return true;
604 case OPT_m3e:
605 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
606 return true;
608 case OPT_m4:
609 case OPT_m4_100:
610 case OPT_m4_200:
611 case OPT_m4_300:
612 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
613 return true;
615 case OPT_m4_nofpu:
616 case OPT_m4_100_nofpu:
617 case OPT_m4_200_nofpu:
618 case OPT_m4_300_nofpu:
619 case OPT_m4_340:
620 case OPT_m4_400:
621 case OPT_m4_500:
622 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
623 return true;
625 case OPT_m4_single:
626 case OPT_m4_100_single:
627 case OPT_m4_200_single:
628 case OPT_m4_300_single:
629 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
630 return true;
632 case OPT_m4_single_only:
633 case OPT_m4_100_single_only:
634 case OPT_m4_200_single_only:
635 case OPT_m4_300_single_only:
636 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
637 return true;
639 case OPT_m4a:
640 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
641 return true;
643 case OPT_m4a_nofpu:
644 case OPT_m4al:
645 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
646 return true;
648 case OPT_m4a_single:
649 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
650 return true;
652 case OPT_m4a_single_only:
653 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
654 return true;
656 case OPT_m5_32media:
657 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
658 return true;
660 case OPT_m5_32media_nofpu:
661 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
662 return true;
664 case OPT_m5_64media:
665 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
666 return true;
668 case OPT_m5_64media_nofpu:
669 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
670 return true;
672 case OPT_m5_compact:
673 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
674 return true;
676 case OPT_m5_compact_nofpu:
677 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
678 return true;
680 default:
681 return true;
685 /* Set default optimization options. */
686 void
687 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
689 if (level)
691 if (!size)
692 sh_div_str = "inv:minlat";
694 if (size)
696 target_flags |= MASK_SMALLCODE;
697 sh_div_str = SH_DIV_STR_FOR_SIZE ;
699 else
700 TARGET_CBRANCHDI4 = 1;
701 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
702 haven't been parsed yet, hence we'd read only the default.
703 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
704 it's OK to always set flag_branch_target_load_optimize. */
705 if (level > 1)
707 flag_branch_target_load_optimize = 1;
708 if (!size)
709 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
711 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
712 here, so leave it to OVERRIDE_OPTIONS to set
713 flag_finite_math_only. We set it to 2 here so we know if the user
714 explicitly requested this to be on or off. */
715 flag_finite_math_only = 2;
716 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
717 the user explicitly requested this to be on or off. */
718 if (flag_schedule_insns > 0)
719 flag_schedule_insns = 2;
721 set_param_value ("simultaneous-prefetches", 2);
724 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
725 options, and do some machine dependent initialization. */
726 void
727 sh_override_options (void)
729 int regno;
731 SUBTARGET_OVERRIDE_OPTIONS;
732 if (flag_finite_math_only == 2)
733 flag_finite_math_only
734 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
735 if (TARGET_SH2E && !flag_finite_math_only)
736 target_flags |= MASK_IEEE;
737 sh_cpu = PROCESSOR_SH1;
738 assembler_dialect = 0;
739 if (TARGET_SH2)
740 sh_cpu = PROCESSOR_SH2;
741 if (TARGET_SH2E)
742 sh_cpu = PROCESSOR_SH2E;
743 if (TARGET_SH2A)
744 sh_cpu = PROCESSOR_SH2A;
745 if (TARGET_SH3)
746 sh_cpu = PROCESSOR_SH3;
747 if (TARGET_SH3E)
748 sh_cpu = PROCESSOR_SH3E;
749 if (TARGET_SH4)
751 assembler_dialect = 1;
752 sh_cpu = PROCESSOR_SH4;
754 if (TARGET_SH4A_ARCH)
756 assembler_dialect = 1;
757 sh_cpu = PROCESSOR_SH4A;
759 if (TARGET_SH5)
761 sh_cpu = PROCESSOR_SH5;
762 target_flags |= MASK_ALIGN_DOUBLE;
763 if (TARGET_SHMEDIA_FPU)
764 target_flags |= MASK_FMOVD;
765 if (TARGET_SHMEDIA)
767 /* There are no delay slots on SHmedia. */
768 flag_delayed_branch = 0;
769 /* Relaxation isn't yet supported for SHmedia */
770 target_flags &= ~MASK_RELAX;
771 /* After reload, if conversion does little good but can cause
772 ICEs:
773 - find_if_block doesn't do anything for SH because we don't
774 have conditional execution patterns. (We use conditional
775 move patterns, which are handled differently, and only
776 before reload).
777 - find_cond_trap doesn't do anything for the SH because we
778 don't have conditional traps.
779 - find_if_case_1 uses redirect_edge_and_branch_force in
780 the only path that does an optimization, and this causes
781 an ICE when branch targets are in registers.
782 - find_if_case_2 doesn't do anything for the SHmedia after
783 reload except when it can redirect a tablejump - and
784 that's rather rare. */
785 flag_if_conversion2 = 0;
786 if (! strcmp (sh_div_str, "call"))
787 sh_div_strategy = SH_DIV_CALL;
788 else if (! strcmp (sh_div_str, "call2"))
789 sh_div_strategy = SH_DIV_CALL2;
790 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
791 sh_div_strategy = SH_DIV_FP;
792 else if (! strcmp (sh_div_str, "inv"))
793 sh_div_strategy = SH_DIV_INV;
794 else if (! strcmp (sh_div_str, "inv:minlat"))
795 sh_div_strategy = SH_DIV_INV_MINLAT;
796 else if (! strcmp (sh_div_str, "inv20u"))
797 sh_div_strategy = SH_DIV_INV20U;
798 else if (! strcmp (sh_div_str, "inv20l"))
799 sh_div_strategy = SH_DIV_INV20L;
800 else if (! strcmp (sh_div_str, "inv:call2"))
801 sh_div_strategy = SH_DIV_INV_CALL2;
802 else if (! strcmp (sh_div_str, "inv:call"))
803 sh_div_strategy = SH_DIV_INV_CALL;
804 else if (! strcmp (sh_div_str, "inv:fp"))
806 if (TARGET_FPU_ANY)
807 sh_div_strategy = SH_DIV_INV_FP;
808 else
809 sh_div_strategy = SH_DIV_INV;
811 TARGET_CBRANCHDI4 = 0;
812 /* Assembler CFI isn't yet fully supported for SHmedia. */
813 flag_dwarf2_cfi_asm = 0;
816 else
818 /* Only the sh64-elf assembler fully supports .quad properly. */
819 targetm.asm_out.aligned_op.di = NULL;
820 targetm.asm_out.unaligned_op.di = NULL;
822 if (TARGET_SH1)
824 if (! strcmp (sh_div_str, "call-div1"))
825 sh_div_strategy = SH_DIV_CALL_DIV1;
826 else if (! strcmp (sh_div_str, "call-fp")
827 && (TARGET_FPU_DOUBLE
828 || (TARGET_HARD_SH4 && TARGET_SH2E)
829 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
830 sh_div_strategy = SH_DIV_CALL_FP;
831 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
832 sh_div_strategy = SH_DIV_CALL_TABLE;
833 else
834 /* Pick one that makes most sense for the target in general.
835 It is not much good to use different functions depending
836 on -Os, since then we'll end up with two different functions
837 when some of the code is compiled for size, and some for
838 speed. */
840 /* SH4 tends to emphasize speed. */
841 if (TARGET_HARD_SH4)
842 sh_div_strategy = SH_DIV_CALL_TABLE;
843 /* These have their own way of doing things. */
844 else if (TARGET_SH2A)
845 sh_div_strategy = SH_DIV_INTRINSIC;
846 /* ??? Should we use the integer SHmedia function instead? */
847 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
848 sh_div_strategy = SH_DIV_CALL_FP;
849 /* SH1 .. SH3 cores often go into small-footprint systems, so
850 default to the smallest implementation available. */
851 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
852 sh_div_strategy = SH_DIV_CALL_TABLE;
853 else
854 sh_div_strategy = SH_DIV_CALL_DIV1;
856 if (!TARGET_SH1)
857 TARGET_PRETEND_CMOVE = 0;
858 if (sh_divsi3_libfunc[0])
859 ; /* User supplied - leave it alone. */
860 else if (TARGET_DIVIDE_CALL_FP)
861 sh_divsi3_libfunc = "__sdivsi3_i4";
862 else if (TARGET_DIVIDE_CALL_TABLE)
863 sh_divsi3_libfunc = "__sdivsi3_i4i";
864 else if (TARGET_SH5)
865 sh_divsi3_libfunc = "__sdivsi3_1";
866 else
867 sh_divsi3_libfunc = "__sdivsi3";
868 if (sh_branch_cost == -1)
869 sh_branch_cost
870 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
872 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
873 if (! VALID_REGISTER_P (regno))
874 sh_register_names[regno][0] = '\0';
876 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
877 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
878 sh_additional_register_names[regno][0] = '\0';
880 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
882 if ((flag_pic && ! TARGET_PREFERGOT)
883 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
884 flag_no_function_cse = 1;
886 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
888 /* Never run scheduling before reload, since that can
889 break global alloc, and generates slower code anyway due
890 to the pressure on R0. */
891 /* Enable sched1 for SH4 if the user explicitly requests.
892 When sched1 is enabled, the ready queue will be reordered by
893 the target hooks if pressure is high. We can not do this for
894 PIC, SH3 and lower as they give spill failures for R0. */
895 if (!TARGET_HARD_SH4 || flag_pic)
896 flag_schedule_insns = 0;
897 /* ??? Current exception handling places basic block boundaries
898 after call_insns. It causes the high pressure on R0 and gives
899 spill failures for R0 in reload. See PR 22553 and the thread
900 on gcc-patches
901 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
902 else if (flag_exceptions)
904 if (flag_schedule_insns == 1)
905 warning (0, "ignoring -fschedule-insns because of exception handling bug");
906 flag_schedule_insns = 0;
908 else if (flag_schedule_insns == 2)
909 flag_schedule_insns = 0;
912 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
913 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
915 /* Unwind info is not correct around the CFG unless either a frame
916 pointer is present or M_A_O_A is set. Fixing this requires rewriting
917 unwind info generation to be aware of the CFG and propagating states
918 around edges. */
919 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
920 || flag_exceptions || flag_non_call_exceptions)
921 && flag_omit_frame_pointer
922 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
924 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
925 warning (0, "unwind tables currently require either a frame pointer "
926 "or -maccumulate-outgoing-args for correctness");
927 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
930 /* Unwinding with -freorder-blocks-and-partition does not work on this
931 architecture, because it requires far jumps to label crossing between
932 hot/cold sections which are rejected on this architecture. */
933 if (flag_reorder_blocks_and_partition)
935 if (flag_exceptions)
937 inform (input_location,
938 "-freorder-blocks-and-partition does not work with "
939 "exceptions on this architecture");
940 flag_reorder_blocks_and_partition = 0;
941 flag_reorder_blocks = 1;
943 else if (flag_unwind_tables)
945 inform (input_location,
946 "-freorder-blocks-and-partition does not support unwind "
947 "info on this architecture");
948 flag_reorder_blocks_and_partition = 0;
949 flag_reorder_blocks = 1;
953 if (align_loops == 0)
954 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
955 if (align_jumps == 0)
956 align_jumps = 1 << CACHE_LOG;
957 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
958 align_jumps = TARGET_SHMEDIA ? 4 : 2;
960 /* Allocation boundary (in *bytes*) for the code of a function.
961 SH1: 32 bit alignment is faster, because instructions are always
962 fetched as a pair from a longword boundary.
963 SH2 .. SH5 : align to cache line start. */
964 if (align_functions == 0)
965 align_functions
966 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
967 /* The linker relaxation code breaks when a function contains
968 alignments that are larger than that at the start of a
969 compilation unit. */
970 if (TARGET_RELAX)
972 int min_align
973 = align_loops > align_jumps ? align_loops : align_jumps;
975 /* Also take possible .long constants / mova tables int account. */
976 if (min_align < 4)
977 min_align = 4;
978 if (align_functions < min_align)
979 align_functions = min_align;
982 if (sh_fixed_range_str)
983 sh_fix_range (sh_fixed_range_str);
985 /* This target defaults to strict volatile bitfields. */
986 if (flag_strict_volatile_bitfields < 0)
987 flag_strict_volatile_bitfields = 1;
990 /* Print the operand address in x to the stream. */
992 static void
993 sh_print_operand_address (FILE *stream, rtx x)
995 switch (GET_CODE (x))
997 case REG:
998 case SUBREG:
999 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1000 break;
1002 case PLUS:
1004 rtx base = XEXP (x, 0);
1005 rtx index = XEXP (x, 1);
1007 switch (GET_CODE (index))
1009 case CONST_INT:
1010 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1011 reg_names[true_regnum (base)]);
1012 break;
1014 case REG:
1015 case SUBREG:
1017 int base_num = true_regnum (base);
1018 int index_num = true_regnum (index);
1020 fprintf (stream, "@(r0,%s)",
1021 reg_names[MAX (base_num, index_num)]);
1022 break;
1025 default:
1026 gcc_unreachable ();
1029 break;
1031 case PRE_DEC:
1032 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1033 break;
1035 case POST_INC:
1036 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1037 break;
1039 default:
1040 x = mark_constant_pool_use (x);
1041 output_addr_const (stream, x);
1042 break;
1046 /* Print operand x (an rtx) in assembler syntax to file stream
1047 according to modifier code.
1049 '.' print a .s if insn needs delay slot
1050 ',' print LOCAL_LABEL_PREFIX
1051 '@' print trap, rte or rts depending upon pragma interruptness
1052 '#' output a nop if there is nothing to put in the delay slot
1053 ''' print likelihood suffix (/u for unlikely).
1054 '>' print branch target if -fverbose-asm
1055 'O' print a constant without the #
1056 'R' print the LSW of a dp value - changes if in little endian
1057 'S' print the MSW of a dp value - changes if in little endian
1058 'T' print the next word of a dp value - same as 'R' in big endian mode.
1059 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1060 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1061 'N' print 'r63' if the operand is (const_int 0).
1062 'd' print a V2SF reg as dN instead of fpN.
1063 'm' print a pair `base,offset' or `base,index', for LD and ST.
1064 'U' Likewise for {LD,ST}{HI,LO}.
1065 'V' print the position of a single bit set.
1066 'W' print the position of a single bit cleared.
1067 't' print a memory address which is a register.
1068 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1069 'o' output an operator. */
1071 static void
1072 sh_print_operand (FILE *stream, rtx x, int code)
1074 int regno;
1075 enum machine_mode mode;
1077 switch (code)
1079 tree trapa_attr;
1081 case '.':
1082 if (final_sequence
1083 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1084 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1085 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1086 break;
1087 case ',':
1088 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1089 break;
1090 case '@':
1091 trapa_attr = lookup_attribute ("trap_exit",
1092 DECL_ATTRIBUTES (current_function_decl));
1093 if (trapa_attr)
1094 fprintf (stream, "trapa #%ld",
1095 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1096 else if (sh_cfun_interrupt_handler_p ())
1098 if (sh_cfun_resbank_handler_p ())
1099 fprintf (stream, "resbank\n");
1100 fprintf (stream, "rte");
1102 else
1103 fprintf (stream, "rts");
1104 break;
1105 case '#':
1106 /* Output a nop if there's nothing in the delay slot. */
1107 if (dbr_sequence_length () == 0)
1108 fprintf (stream, "\n\tnop");
1109 break;
1110 case '\'':
1112 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1114 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1115 fputs ("/u", stream);
1116 break;
1118 case '>':
1119 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1121 fputs ("\t! target: ", stream);
1122 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1124 break;
1125 case 'O':
1126 x = mark_constant_pool_use (x);
1127 output_addr_const (stream, x);
1128 break;
1129 /* N.B.: %R / %S / %T adjust memory addresses by four.
1130 For SHMEDIA, that means they can be used to access the first and
1131 second 32 bit part of a 64 bit (or larger) value that
1132 might be held in floating point registers or memory.
1133 While they can be used to access 64 bit parts of a larger value
1134 held in general purpose registers, that won't work with memory -
1135 neither for fp registers, since the frxx names are used. */
1136 case 'R':
1137 if (REG_P (x) || GET_CODE (x) == SUBREG)
1139 regno = true_regnum (x);
1140 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1141 fputs (reg_names[regno], (stream));
1143 else if (MEM_P (x))
1145 x = adjust_address (x, SImode, 4 * LSW);
1146 sh_print_operand_address (stream, XEXP (x, 0));
1148 else
1150 rtx sub = NULL_RTX;
1152 mode = GET_MODE (x);
1153 if (mode == VOIDmode)
1154 mode = DImode;
1155 if (GET_MODE_SIZE (mode) >= 8)
1156 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1157 if (sub)
1158 sh_print_operand (stream, sub, 0);
1159 else
1160 output_operand_lossage ("invalid operand to %%R");
1162 break;
1163 case 'S':
1164 if (REG_P (x) || GET_CODE (x) == SUBREG)
1166 regno = true_regnum (x);
1167 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1168 fputs (reg_names[regno], (stream));
1170 else if (MEM_P (x))
1172 x = adjust_address (x, SImode, 4 * MSW);
1173 sh_print_operand_address (stream, XEXP (x, 0));
1175 else
1177 rtx sub = NULL_RTX;
1179 mode = GET_MODE (x);
1180 if (mode == VOIDmode)
1181 mode = DImode;
1182 if (GET_MODE_SIZE (mode) >= 8)
1183 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1184 if (sub)
1185 sh_print_operand (stream, sub, 0);
1186 else
1187 output_operand_lossage ("invalid operand to %%S");
1189 break;
1190 case 'T':
1191 /* Next word of a double. */
1192 switch (GET_CODE (x))
1194 case REG:
1195 fputs (reg_names[REGNO (x) + 1], (stream));
1196 break;
1197 case MEM:
1198 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1199 && GET_CODE (XEXP (x, 0)) != POST_INC)
1200 x = adjust_address (x, SImode, 4);
1201 sh_print_operand_address (stream, XEXP (x, 0));
1202 break;
1203 default:
1204 break;
1206 break;
1208 case 't':
1209 gcc_assert (MEM_P (x));
1210 x = XEXP (x, 0);
1211 switch (GET_CODE (x))
1213 case REG:
1214 case SUBREG:
1215 sh_print_operand (stream, x, 0);
1216 break;
1217 default:
1218 break;
1220 break;
1222 case 'o':
1223 switch (GET_CODE (x))
1225 case PLUS: fputs ("add", stream); break;
1226 case MINUS: fputs ("sub", stream); break;
1227 case MULT: fputs ("mul", stream); break;
1228 case DIV: fputs ("div", stream); break;
1229 case EQ: fputs ("eq", stream); break;
1230 case NE: fputs ("ne", stream); break;
1231 case GT: case LT: fputs ("gt", stream); break;
1232 case GE: case LE: fputs ("ge", stream); break;
1233 case GTU: case LTU: fputs ("gtu", stream); break;
1234 case GEU: case LEU: fputs ("geu", stream); break;
1235 default:
1236 break;
1238 break;
1239 case 'M':
1240 if (TARGET_SHMEDIA)
1242 if (MEM_P (x)
1243 && GET_CODE (XEXP (x, 0)) == PLUS
1244 && (REG_P (XEXP (XEXP (x, 0), 1))
1245 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1246 fputc ('x', stream);
1248 else
1250 if (MEM_P (x))
1252 switch (GET_MODE (x))
1254 case QImode: fputs (".b", stream); break;
1255 case HImode: fputs (".w", stream); break;
1256 case SImode: fputs (".l", stream); break;
1257 case SFmode: fputs (".s", stream); break;
1258 case DFmode: fputs (".d", stream); break;
1259 default: gcc_unreachable ();
1263 break;
1265 case 'm':
1266 gcc_assert (MEM_P (x));
1267 x = XEXP (x, 0);
1268 /* Fall through. */
1269 case 'U':
1270 switch (GET_CODE (x))
1272 case REG:
1273 case SUBREG:
1274 sh_print_operand (stream, x, 0);
1275 fputs (", 0", stream);
1276 break;
1278 case PLUS:
1279 sh_print_operand (stream, XEXP (x, 0), 0);
1280 fputs (", ", stream);
1281 sh_print_operand (stream, XEXP (x, 1), 0);
1282 break;
1284 default:
1285 gcc_unreachable ();
1287 break;
1289 case 'V':
1291 int num = exact_log2 (INTVAL (x));
1292 gcc_assert (num >= 0);
1293 fprintf (stream, "#%d", num);
1295 break;
1297 case 'W':
1299 int num = exact_log2 (~INTVAL (x));
1300 gcc_assert (num >= 0);
1301 fprintf (stream, "#%d", num);
1303 break;
1305 case 'd':
1306 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1308 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1309 break;
1311 case 'N':
1312 if (x == CONST0_RTX (GET_MODE (x)))
1314 fprintf ((stream), "r63");
1315 break;
1317 goto default_output;
1318 case 'u':
1319 if (CONST_INT_P (x))
1321 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1322 break;
1324 /* Fall through. */
1326 default_output:
1327 default:
1328 regno = 0;
1329 mode = GET_MODE (x);
1331 switch (GET_CODE (x))
1333 case TRUNCATE:
1335 rtx inner = XEXP (x, 0);
1336 int offset = 0;
1337 enum machine_mode inner_mode;
1339 /* We might see SUBREGs with vector mode registers inside. */
1340 if (GET_CODE (inner) == SUBREG
1341 && (GET_MODE_SIZE (GET_MODE (inner))
1342 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1343 && subreg_lowpart_p (inner))
1344 inner = SUBREG_REG (inner);
1345 if (CONST_INT_P (inner))
1347 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1348 goto default_output;
1350 inner_mode = GET_MODE (inner);
1351 if (GET_CODE (inner) == SUBREG
1352 && (GET_MODE_SIZE (GET_MODE (inner))
1353 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1354 && REG_P (SUBREG_REG (inner)))
1356 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1357 GET_MODE (SUBREG_REG (inner)),
1358 SUBREG_BYTE (inner),
1359 GET_MODE (inner));
1360 inner = SUBREG_REG (inner);
1362 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1363 abort ();
1364 /* Floating point register pairs are always big endian;
1365 general purpose registers are 64 bit wide. */
1366 regno = REGNO (inner);
1367 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1368 - HARD_REGNO_NREGS (regno, mode))
1369 + offset;
1370 x = inner;
1371 goto reg;
1373 case SIGN_EXTEND:
1374 x = XEXP (x, 0);
1375 goto reg;
1376 /* FIXME: We need this on SHmedia32 because reload generates
1377 some sign-extended HI or QI loads into DImode registers
1378 but, because Pmode is SImode, the address ends up with a
1379 subreg:SI of the DImode register. Maybe reload should be
1380 fixed so as to apply alter_subreg to such loads? */
1381 case IF_THEN_ELSE:
1382 gcc_assert (trapping_target_operand (x, VOIDmode));
1383 x = XEXP (XEXP (x, 2), 0);
1384 goto default_output;
1385 case SUBREG:
1386 gcc_assert (SUBREG_BYTE (x) == 0
1387 && REG_P (SUBREG_REG (x)));
1389 x = SUBREG_REG (x);
1390 /* Fall through. */
1392 reg:
1393 case REG:
1394 regno += REGNO (x);
1395 if (FP_REGISTER_P (regno)
1396 && mode == V16SFmode)
1397 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1398 else if (FP_REGISTER_P (REGNO (x))
1399 && mode == V4SFmode)
1400 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1401 else if (REG_P (x)
1402 && mode == V2SFmode)
1403 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1404 else if (FP_REGISTER_P (REGNO (x))
1405 && GET_MODE_SIZE (mode) > 4)
1406 fprintf ((stream), "d%s", reg_names[regno] + 1);
1407 else
1408 fputs (reg_names[regno], (stream));
1409 break;
1411 case MEM:
1412 output_address (XEXP (x, 0));
1413 break;
1415 default:
1416 if (TARGET_SH1)
1417 fputc ('#', stream);
1418 output_addr_const (stream, x);
1419 break;
1421 break;
1425 static bool
1426 sh_print_operand_punct_valid_p (unsigned char code)
1428 return (code == '.' || code == '#' || code == '@' || code == ','
1429 || code == '$' || code == '\'' || code == '>');
1433 /* Encode symbol attributes of a SYMBOL_REF into its
1434 SYMBOL_REF_FLAGS. */
1435 static void
1436 sh_encode_section_info (tree decl, rtx rtl, int first)
1438 default_encode_section_info (decl, rtl, first);
1440 if (TREE_CODE (decl) == FUNCTION_DECL
1441 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1442 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1445 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1446 static void
1447 force_into (rtx value, rtx target)
1449 value = force_operand (value, target);
1450 if (! rtx_equal_p (value, target))
1451 emit_insn (gen_move_insn (target, value));
1454 /* Emit code to perform a block move. Choose the best method.
1456 OPERANDS[0] is the destination.
1457 OPERANDS[1] is the source.
1458 OPERANDS[2] is the size.
1459 OPERANDS[3] is the alignment safe to use. */
1462 expand_block_move (rtx *operands)
1464 int align = INTVAL (operands[3]);
1465 int constp = (CONST_INT_P (operands[2]));
1466 int bytes = (constp ? INTVAL (operands[2]) : 0);
1468 if (! constp)
1469 return 0;
1471 /* If we could use mov.l to move words and dest is word-aligned, we
1472 can use movua.l for loads and still generate a relatively short
1473 and efficient sequence. */
1474 if (TARGET_SH4A_ARCH && align < 4
1475 && MEM_ALIGN (operands[0]) >= 32
1476 && can_move_by_pieces (bytes, 32))
1478 rtx dest = copy_rtx (operands[0]);
1479 rtx src = copy_rtx (operands[1]);
1480 /* We could use different pseudos for each copied word, but
1481 since movua can only load into r0, it's kind of
1482 pointless. */
1483 rtx temp = gen_reg_rtx (SImode);
1484 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1485 int copied = 0;
1487 while (copied + 4 <= bytes)
1489 rtx to = adjust_address (dest, SImode, copied);
1490 rtx from = adjust_automodify_address (src, BLKmode,
1491 src_addr, copied);
1493 set_mem_size (from, GEN_INT (4));
1494 emit_insn (gen_movua (temp, from));
1495 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1496 emit_move_insn (to, temp);
1497 copied += 4;
1500 if (copied < bytes)
1501 move_by_pieces (adjust_address (dest, BLKmode, copied),
1502 adjust_automodify_address (src, BLKmode,
1503 src_addr, copied),
1504 bytes - copied, align, 0);
1506 return 1;
1509 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1510 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1511 if (align < 4 || (bytes % 4 != 0))
1512 return 0;
1514 if (TARGET_HARD_SH4)
1516 if (bytes < 12)
1517 return 0;
1518 else if (bytes == 12)
1520 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1521 rtx r4 = gen_rtx_REG (SImode, 4);
1522 rtx r5 = gen_rtx_REG (SImode, 5);
1524 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1525 force_into (XEXP (operands[0], 0), r4);
1526 force_into (XEXP (operands[1], 0), r5);
1527 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1528 return 1;
1530 else if (! TARGET_SMALLCODE)
1532 const char *entry_name;
1533 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1534 int dwords;
1535 rtx r4 = gen_rtx_REG (SImode, 4);
1536 rtx r5 = gen_rtx_REG (SImode, 5);
1537 rtx r6 = gen_rtx_REG (SImode, 6);
1539 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1540 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1541 force_into (XEXP (operands[0], 0), r4);
1542 force_into (XEXP (operands[1], 0), r5);
1544 dwords = bytes >> 3;
1545 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1546 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1547 return 1;
1549 else
1550 return 0;
1552 if (bytes < 64)
1554 char entry[30];
1555 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1556 rtx r4 = gen_rtx_REG (SImode, 4);
1557 rtx r5 = gen_rtx_REG (SImode, 5);
1559 sprintf (entry, "__movmemSI%d", bytes);
1560 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1561 force_into (XEXP (operands[0], 0), r4);
1562 force_into (XEXP (operands[1], 0), r5);
1563 emit_insn (gen_block_move_real (func_addr_rtx));
1564 return 1;
1567 /* This is the same number of bytes as a memcpy call, but to a different
1568 less common function name, so this will occasionally use more space. */
1569 if (! TARGET_SMALLCODE)
1571 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1572 int final_switch, while_loop;
1573 rtx r4 = gen_rtx_REG (SImode, 4);
1574 rtx r5 = gen_rtx_REG (SImode, 5);
1575 rtx r6 = gen_rtx_REG (SImode, 6);
1577 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1578 force_into (XEXP (operands[0], 0), r4);
1579 force_into (XEXP (operands[1], 0), r5);
1581 /* r6 controls the size of the move. 16 is decremented from it
1582 for each 64 bytes moved. Then the negative bit left over is used
1583 as an index into a list of move instructions. e.g., a 72 byte move
1584 would be set up with size(r6) = 14, for one iteration through the
1585 big while loop, and a switch of -2 for the last part. */
1587 final_switch = 16 - ((bytes / 4) % 16);
1588 while_loop = ((bytes / 4) / 16 - 1) * 16;
1589 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1590 emit_insn (gen_block_lump_real (func_addr_rtx));
1591 return 1;
1594 return 0;
1597 /* Prepare operands for a move define_expand; specifically, one of the
1598 operands must be in a register. */
1601 prepare_move_operands (rtx operands[], enum machine_mode mode)
1603 if ((mode == SImode || mode == DImode)
1604 && flag_pic
1605 && ! ((mode == Pmode || mode == ptr_mode)
1606 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1608 rtx temp;
1609 if (SYMBOLIC_CONST_P (operands[1]))
1611 if (MEM_P (operands[0]))
1612 operands[1] = force_reg (Pmode, operands[1]);
1613 else if (TARGET_SHMEDIA
1614 && GET_CODE (operands[1]) == LABEL_REF
1615 && target_reg_operand (operands[0], mode))
1616 /* It's ok. */;
1617 else
1619 temp = (!can_create_pseudo_p ()
1620 ? operands[0]
1621 : gen_reg_rtx (Pmode));
1622 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1625 else if (GET_CODE (operands[1]) == CONST
1626 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1627 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1629 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1630 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1631 mode, temp);
1632 operands[1] = expand_binop (mode, add_optab, temp,
1633 XEXP (XEXP (operands[1], 0), 1),
1634 (!can_create_pseudo_p ()
1635 ? temp
1636 : gen_reg_rtx (Pmode)),
1637 0, OPTAB_LIB_WIDEN);
1641 if (! reload_in_progress && ! reload_completed)
1643 /* Copy the source to a register if both operands aren't registers. */
1644 if (! register_operand (operands[0], mode)
1645 && ! sh_register_operand (operands[1], mode))
1646 operands[1] = copy_to_mode_reg (mode, operands[1]);
1648 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1650 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1651 except that we can't use that function because it is static. */
1652 rtx new_rtx = change_address (operands[0], mode, 0);
1653 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1654 operands[0] = new_rtx;
1657 /* This case can happen while generating code to move the result
1658 of a library call to the target. Reject `st r0,@(rX,rY)' because
1659 reload will fail to find a spill register for rX, since r0 is already
1660 being used for the source. */
1661 else if (TARGET_SH1
1662 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1663 && MEM_P (operands[0])
1664 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1665 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1666 operands[1] = copy_to_mode_reg (mode, operands[1]);
1669 if (mode == Pmode || mode == ptr_mode)
1671 rtx op0, op1, opc;
1672 enum tls_model tls_kind;
1674 op0 = operands[0];
1675 op1 = operands[1];
1676 if (GET_CODE (op1) == CONST
1677 && GET_CODE (XEXP (op1, 0)) == PLUS
1678 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1679 != TLS_MODEL_NONE))
1681 opc = XEXP (XEXP (op1, 0), 1);
1682 op1 = XEXP (XEXP (op1, 0), 0);
1684 else
1685 opc = NULL_RTX;
1687 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1689 rtx tga_op1, tga_ret, tmp, tmp2;
1691 switch (tls_kind)
1693 case TLS_MODEL_GLOBAL_DYNAMIC:
1694 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1695 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1696 op1 = tga_ret;
1697 break;
1699 case TLS_MODEL_LOCAL_DYNAMIC:
1700 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1701 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1703 tmp = gen_reg_rtx (Pmode);
1704 emit_move_insn (tmp, tga_ret);
1706 if (register_operand (op0, Pmode))
1707 tmp2 = op0;
1708 else
1709 tmp2 = gen_reg_rtx (Pmode);
1711 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1712 op1 = tmp2;
1713 break;
1715 case TLS_MODEL_INITIAL_EXEC:
1716 if (! flag_pic)
1718 /* Don't schedule insns for getting GOT address when
1719 the first scheduling is enabled, to avoid spill
1720 failures for R0. */
1721 if (flag_schedule_insns)
1722 emit_insn (gen_blockage ());
1723 emit_insn (gen_GOTaddr2picreg ());
1724 emit_use (gen_rtx_REG (SImode, PIC_REG));
1725 if (flag_schedule_insns)
1726 emit_insn (gen_blockage ());
1728 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1729 tmp = gen_sym2GOTTPOFF (op1);
1730 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1731 op1 = tga_op1;
1732 break;
1734 case TLS_MODEL_LOCAL_EXEC:
1735 tmp2 = gen_reg_rtx (Pmode);
1736 emit_insn (gen_load_gbr (tmp2));
1737 tmp = gen_reg_rtx (Pmode);
1738 emit_insn (gen_symTPOFF2reg (tmp, op1));
1740 if (register_operand (op0, Pmode))
1741 op1 = op0;
1742 else
1743 op1 = gen_reg_rtx (Pmode);
1745 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1746 break;
1748 default:
1749 gcc_unreachable ();
1751 if (opc)
1752 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1753 operands[1] = op1;
1757 return 0;
1760 enum rtx_code
1761 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1762 enum rtx_code comparison)
1764 rtx op1;
1765 rtx scratch = NULL_RTX;
1767 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1768 comparison = GET_CODE (operands[0]);
1769 else
1770 scratch = operands[4];
1771 if (CONST_INT_P (operands[1])
1772 && !CONST_INT_P (operands[2]))
1774 rtx tmp = operands[1];
1776 operands[1] = operands[2];
1777 operands[2] = tmp;
1778 comparison = swap_condition (comparison);
1780 if (CONST_INT_P (operands[2]))
1782 HOST_WIDE_INT val = INTVAL (operands[2]);
1783 if ((val == -1 || val == -0x81)
1784 && (comparison == GT || comparison == LE))
1786 comparison = (comparison == GT) ? GE : LT;
1787 operands[2] = gen_int_mode (val + 1, mode);
1789 else if ((val == 1 || val == 0x80)
1790 && (comparison == GE || comparison == LT))
1792 comparison = (comparison == GE) ? GT : LE;
1793 operands[2] = gen_int_mode (val - 1, mode);
1795 else if (val == 1 && (comparison == GEU || comparison == LTU))
1797 comparison = (comparison == GEU) ? NE : EQ;
1798 operands[2] = CONST0_RTX (mode);
1800 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1802 comparison = (comparison == GEU) ? GTU : LEU;
1803 operands[2] = gen_int_mode (val - 1, mode);
1805 else if (val == 0 && (comparison == GTU || comparison == LEU))
1806 comparison = (comparison == GTU) ? NE : EQ;
1807 else if (mode == SImode
1808 && ((val == 0x7fffffff
1809 && (comparison == GTU || comparison == LEU))
1810 || ((unsigned HOST_WIDE_INT) val
1811 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1812 && (comparison == GEU || comparison == LTU))))
1814 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1815 operands[2] = CONST0_RTX (mode);
1818 op1 = operands[1];
1819 if (can_create_pseudo_p ())
1820 operands[1] = force_reg (mode, op1);
1821 /* When we are handling DImode comparisons, we want to keep constants so
1822 that we can optimize the component comparisons; however, memory loads
1823 are better issued as a whole so that they can be scheduled well.
1824 SImode equality comparisons allow I08 constants, but only when they
1825 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1826 into a register, that register might as well be r0, and we allow the
1827 constant. If it is already in a register, this is likely to be
1828 allocated to a different hard register, thus we load the constant into
1829 a register unless it is zero. */
1830 if (!REG_P (operands[2])
1831 && (!CONST_INT_P (operands[2])
1832 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1833 && ((comparison != EQ && comparison != NE)
1834 || (REG_P (op1) && REGNO (op1) != R0_REG)
1835 || !satisfies_constraint_I08 (operands[2])))))
1837 if (scratch && GET_MODE (scratch) == mode)
1839 emit_move_insn (scratch, operands[2]);
1840 operands[2] = scratch;
1842 else if (can_create_pseudo_p ())
1843 operands[2] = force_reg (mode, operands[2]);
1845 return comparison;
1848 void
1849 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1851 rtx (*branch_expander) (rtx) = gen_branch_true;
1852 rtx jump;
1854 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1855 switch (comparison)
1857 case NE: case LT: case LE: case LTU: case LEU:
1858 comparison = reverse_condition (comparison);
1859 branch_expander = gen_branch_false;
1860 default: ;
1862 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1863 gen_rtx_fmt_ee (comparison, SImode,
1864 operands[1], operands[2])));
1865 jump = emit_jump_insn (branch_expander (operands[3]));
1866 if (probability >= 0)
1867 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1871 /* ??? How should we distribute probabilities when more than one branch
1872 is generated. So far we only have soem ad-hoc observations:
1873 - If the operands are random, they are likely to differ in both parts.
1874 - If comparing items in a hash chain, the operands are random or equal;
1875 operation should be EQ or NE.
1876 - If items are searched in an ordered tree from the root, we can expect
1877 the highpart to be unequal about half of the time; operation should be
1878 an inequality comparison, operands non-constant, and overall probability
1879 about 50%. Likewise for quicksort.
1880 - Range checks will be often made against constants. Even if we assume for
1881 simplicity an even distribution of the non-constant operand over a
1882 sub-range here, the same probability could be generated with differently
1883 wide sub-ranges - as long as the ratio of the part of the subrange that
1884 is before the threshold to the part that comes after the threshold stays
1885 the same. Thus, we can't really tell anything here;
1886 assuming random distribution is at least simple.
1889 bool
1890 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1892 enum rtx_code msw_taken, msw_skip, lsw_taken;
1893 rtx skip_label = NULL_RTX;
1894 rtx op1h, op1l, op2h, op2l;
1895 int num_branches;
1896 int prob, rev_prob;
1897 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1898 rtx scratch = operands[4];
1900 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1901 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1902 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1903 op1l = gen_lowpart (SImode, operands[1]);
1904 op2l = gen_lowpart (SImode, operands[2]);
1905 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1906 prob = split_branch_probability;
1907 rev_prob = REG_BR_PROB_BASE - prob;
1908 switch (comparison)
1910 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1911 That costs 1 cycle more when the first branch can be predicted taken,
1912 but saves us mispredicts because only one branch needs prediction.
1913 It also enables generating the cmpeqdi_t-1 pattern. */
1914 case EQ:
1915 if (TARGET_CMPEQDI_T)
1917 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1918 emit_jump_insn (gen_branch_true (operands[3]));
1919 return true;
1921 msw_skip = NE;
1922 lsw_taken = EQ;
1923 if (prob >= 0)
1925 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1927 msw_skip_prob = rev_prob;
1928 if (REG_BR_PROB_BASE <= 65535)
1929 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1930 else
1932 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1933 lsw_taken_prob
1934 = (prob
1935 ? (REG_BR_PROB_BASE
1936 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1937 / ((HOST_WIDEST_INT) prob << 32)))
1938 : 0);
1941 break;
1942 case NE:
1943 if (TARGET_CMPEQDI_T)
1945 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1946 emit_jump_insn (gen_branch_false (operands[3]));
1947 return true;
1949 msw_taken = NE;
1950 msw_taken_prob = prob;
1951 lsw_taken = NE;
1952 lsw_taken_prob = 0;
1953 break;
1954 case GTU: case GT:
1955 msw_taken = comparison;
1956 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1957 break;
1958 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1959 msw_skip = swap_condition (msw_taken);
1960 lsw_taken = GTU;
1961 break;
1962 case GEU: case GE:
1963 if (op2l == CONST0_RTX (SImode))
1964 msw_taken = comparison;
1965 else
1967 msw_taken = comparison == GE ? GT : GTU;
1968 msw_skip = swap_condition (msw_taken);
1969 lsw_taken = GEU;
1971 break;
1972 case LTU: case LT:
1973 msw_taken = comparison;
1974 if (op2l == CONST0_RTX (SImode))
1975 break;
1976 msw_skip = swap_condition (msw_taken);
1977 lsw_taken = LTU;
1978 break;
1979 case LEU: case LE:
1980 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1981 msw_taken = comparison;
1982 else
1984 lsw_taken = LEU;
1985 if (comparison == LE)
1986 msw_taken = LT;
1987 else if (op2h != CONST0_RTX (SImode))
1988 msw_taken = LTU;
1989 else
1990 break;
1991 msw_skip = swap_condition (msw_taken);
1993 break;
1994 default: return false;
1996 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1997 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1998 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1999 if (comparison != EQ && comparison != NE && num_branches > 1)
2001 if (!CONSTANT_P (operands[2])
2002 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2003 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2005 msw_taken_prob = prob / 2U;
2006 msw_skip_prob
2007 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2008 lsw_taken_prob = prob;
2010 else
2012 msw_taken_prob = prob;
2013 msw_skip_prob = REG_BR_PROB_BASE;
2014 /* ??? If we have a constant op2h, should we use that when
2015 calculating lsw_taken_prob? */
2016 lsw_taken_prob = prob;
2019 operands[1] = op1h;
2020 operands[2] = op2h;
2021 operands[4] = NULL_RTX;
2022 if (reload_completed
2023 && ! arith_reg_or_0_operand (op2h, SImode)
2024 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2025 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2026 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2028 emit_move_insn (scratch, operands[2]);
2029 operands[2] = scratch;
2031 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2032 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2033 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2035 rtx taken_label = operands[3];
2037 /* Operands were possibly modified, but msw_skip doesn't expect this.
2038 Always use the original ones. */
2039 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2041 operands[1] = op1h;
2042 operands[2] = op2h;
2045 operands[3] = skip_label = gen_label_rtx ();
2046 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2047 operands[3] = taken_label;
2049 operands[1] = op1l;
2050 operands[2] = op2l;
2051 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2053 if (reload_completed
2054 && ! arith_reg_or_0_operand (op2l, SImode)
2055 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2057 emit_move_insn (scratch, operands[2]);
2058 operands[2] = scratch;
2060 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2062 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2063 emit_label (skip_label);
2064 return true;
2067 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2069 static void
2070 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2072 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2074 insn = gen_rtx_PARALLEL (VOIDmode,
2075 gen_rtvec (2, insn,
2076 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2077 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2079 else
2080 emit_insn (insn);
2083 /* Prepare the operands for an scc instruction; make sure that the
2084 compare has been done and the result is in T_REG. */
2085 void
2086 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2088 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2089 enum rtx_code oldcode = code;
2090 enum machine_mode mode;
2092 /* First need a compare insn. */
2093 switch (code)
2095 case NE:
2096 /* It isn't possible to handle this case. */
2097 gcc_unreachable ();
2098 case LT:
2099 code = GT;
2100 break;
2101 case LE:
2102 code = GE;
2103 break;
2104 case LTU:
2105 code = GTU;
2106 break;
2107 case LEU:
2108 code = GEU;
2109 break;
2110 default:
2111 break;
2113 if (code != oldcode)
2115 rtx tmp = op0;
2116 op0 = op1;
2117 op1 = tmp;
2120 mode = GET_MODE (op0);
2121 if (mode == VOIDmode)
2122 mode = GET_MODE (op1);
2124 op0 = force_reg (mode, op0);
2125 if ((code != EQ && code != NE
2126 && (op1 != const0_rtx
2127 || code == GTU || code == GEU || code == LTU || code == LEU))
2128 || (mode == DImode && op1 != const0_rtx)
2129 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2130 op1 = force_reg (mode, op1);
2132 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2133 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2134 mode);
2138 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2139 rtx op0, rtx op1)
2141 rtx target = gen_reg_rtx (SImode);
2142 rtx tmp;
2144 gcc_assert (TARGET_SHMEDIA);
2145 switch (code)
2147 case EQ:
2148 case GT:
2149 case LT:
2150 case UNORDERED:
2151 case GTU:
2152 case LTU:
2153 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2154 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2155 code = NE;
2156 break;
2158 case NE:
2159 case GE:
2160 case LE:
2161 case ORDERED:
2162 case GEU:
2163 case LEU:
2164 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2165 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2166 code = EQ;
2167 break;
2169 case UNEQ:
2170 case UNGE:
2171 case UNGT:
2172 case UNLE:
2173 case UNLT:
2174 case LTGT:
2175 return NULL_RTX;
2177 default:
2178 gcc_unreachable ();
2181 if (mode == DImode)
2183 rtx t2 = gen_reg_rtx (DImode);
2184 emit_insn (gen_extendsidi2 (t2, target));
2185 target = t2;
2188 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2191 /* Called from the md file, set up the operands of a compare instruction. */
2193 void
2194 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2196 enum rtx_code code = GET_CODE (operands[0]);
2197 enum rtx_code branch_code;
2198 rtx op0 = operands[1];
2199 rtx op1 = operands[2];
2200 rtx insn, tem;
2201 bool need_ccmpeq = false;
2203 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2205 op0 = force_reg (mode, op0);
2206 op1 = force_reg (mode, op1);
2208 else
2210 if (code != EQ || mode == DImode)
2212 /* Force args into regs, since we can't use constants here. */
2213 op0 = force_reg (mode, op0);
2214 if (op1 != const0_rtx || code == GTU || code == GEU)
2215 op1 = force_reg (mode, op1);
2219 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2221 if (code == LT
2222 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2223 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2225 tem = op0, op0 = op1, op1 = tem;
2226 code = swap_condition (code);
2229 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2230 if (code == GE)
2232 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2233 need_ccmpeq = true;
2234 code = GT;
2237 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2238 to EQ/GT respectively. */
2239 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2242 switch (code)
2244 case EQ:
2245 case GT:
2246 case GE:
2247 case GTU:
2248 case GEU:
2249 branch_code = code;
2250 break;
2251 case NE:
2252 case LT:
2253 case LE:
2254 case LTU:
2255 case LEU:
2256 branch_code = reverse_condition (code);
2257 break;
2258 default:
2259 gcc_unreachable ();
2262 insn = gen_rtx_SET (VOIDmode,
2263 gen_rtx_REG (SImode, T_REG),
2264 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2266 sh_emit_set_t_insn (insn, mode);
2267 if (need_ccmpeq)
2268 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2270 if (branch_code == code)
2271 emit_jump_insn (gen_branch_true (operands[3]));
2272 else
2273 emit_jump_insn (gen_branch_false (operands[3]));
2276 void
2277 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2279 enum rtx_code code = GET_CODE (operands[1]);
2280 rtx op0 = operands[2];
2281 rtx op1 = operands[3];
2282 rtx lab = NULL_RTX;
2283 bool invert = false;
2284 rtx tem;
2286 op0 = force_reg (mode, op0);
2287 if ((code != EQ && code != NE
2288 && (op1 != const0_rtx
2289 || code == GTU || code == GEU || code == LTU || code == LEU))
2290 || (mode == DImode && op1 != const0_rtx)
2291 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2292 op1 = force_reg (mode, op1);
2294 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2296 if (code == LT || code == LE)
2298 code = swap_condition (code);
2299 tem = op0, op0 = op1, op1 = tem;
2301 if (code == GE)
2303 if (TARGET_IEEE)
2305 lab = gen_label_rtx ();
2306 sh_emit_scc_to_t (EQ, op0, op1);
2307 emit_jump_insn (gen_branch_true (lab));
2308 code = GT;
2310 else
2312 code = LT;
2313 invert = true;
2318 if (code == NE)
2320 code = EQ;
2321 invert = true;
2324 sh_emit_scc_to_t (code, op0, op1);
2325 if (lab)
2326 emit_label (lab);
2327 if (invert)
2328 emit_insn (gen_movnegt (operands[0]));
2329 else
2330 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2333 /* Functions to output assembly code. */
2335 /* Return a sequence of instructions to perform DI or DF move.
2337 Since the SH cannot move a DI or DF in one instruction, we have
2338 to take care when we see overlapping source and dest registers. */
2340 const char *
2341 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2342 enum machine_mode mode)
2344 rtx dst = operands[0];
2345 rtx src = operands[1];
2347 if (MEM_P (dst)
2348 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2349 return "mov.l %T1,%0\n\tmov.l %1,%0";
2351 if (register_operand (dst, mode)
2352 && register_operand (src, mode))
2354 if (REGNO (src) == MACH_REG)
2355 return "sts mach,%S0\n\tsts macl,%R0";
2357 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2358 when mov.d r1,r0 do r1->r0 then r2->r1. */
2360 if (REGNO (src) + 1 == REGNO (dst))
2361 return "mov %T1,%T0\n\tmov %1,%0";
2362 else
2363 return "mov %1,%0\n\tmov %T1,%T0";
2365 else if (CONST_INT_P (src))
2367 if (INTVAL (src) < 0)
2368 output_asm_insn ("mov #-1,%S0", operands);
2369 else
2370 output_asm_insn ("mov #0,%S0", operands);
2372 return "mov %1,%R0";
2374 else if (MEM_P (src))
2376 int ptrreg = -1;
2377 int dreg = REGNO (dst);
2378 rtx inside = XEXP (src, 0);
2380 switch (GET_CODE (inside))
2382 case REG:
2383 ptrreg = REGNO (inside);
2384 break;
2386 case SUBREG:
2387 ptrreg = subreg_regno (inside);
2388 break;
2390 case PLUS:
2391 ptrreg = REGNO (XEXP (inside, 0));
2392 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2393 an offsettable address. Unfortunately, offsettable addresses use
2394 QImode to check the offset, and a QImode offsettable address
2395 requires r0 for the other operand, which is not currently
2396 supported, so we can't use the 'o' constraint.
2397 Thus we must check for and handle r0+REG addresses here.
2398 We punt for now, since this is likely very rare. */
2399 gcc_assert (!REG_P (XEXP (inside, 1)));
2400 break;
2402 case LABEL_REF:
2403 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2404 case POST_INC:
2405 return "mov.l %1,%0\n\tmov.l %1,%T0";
2406 default:
2407 gcc_unreachable ();
2410 /* Work out the safe way to copy. Copy into the second half first. */
2411 if (dreg == ptrreg)
2412 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2415 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2418 /* Print an instruction which would have gone into a delay slot after
2419 another instruction, but couldn't because the other instruction expanded
2420 into a sequence where putting the slot insn at the end wouldn't work. */
2422 static void
2423 print_slot (rtx insn)
2425 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2427 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2430 const char *
2431 output_far_jump (rtx insn, rtx op)
2433 struct { rtx lab, reg, op; } this_jmp;
2434 rtx braf_base_lab = NULL_RTX;
2435 const char *jump;
2436 int far;
2437 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2438 rtx prev;
2440 this_jmp.lab = gen_label_rtx ();
2442 if (TARGET_SH2
2443 && offset >= -32764
2444 && offset - get_attr_length (insn) <= 32766)
2446 far = 0;
2447 jump = "mov.w %O0,%1; braf %1";
2449 else
2451 far = 1;
2452 if (flag_pic)
2454 if (TARGET_SH2)
2455 jump = "mov.l %O0,%1; braf %1";
2456 else
2457 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2459 else
2460 jump = "mov.l %O0,%1; jmp @%1";
2462 /* If we have a scratch register available, use it. */
2463 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2464 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2466 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2467 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2468 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2469 output_asm_insn (jump, &this_jmp.lab);
2470 if (dbr_sequence_length ())
2471 print_slot (final_sequence);
2472 else
2473 output_asm_insn ("nop", 0);
2475 else
2477 /* Output the delay slot insn first if any. */
2478 if (dbr_sequence_length ())
2479 print_slot (final_sequence);
2481 this_jmp.reg = gen_rtx_REG (SImode, 13);
2482 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2483 Fortunately, MACL is fixed and call-clobbered, and we never
2484 need its value across jumps, so save r13 in it instead of in
2485 the stack. */
2486 if (TARGET_SH5)
2487 output_asm_insn ("lds r13, macl", 0);
2488 else
2489 output_asm_insn ("mov.l r13,@-r15", 0);
2490 output_asm_insn (jump, &this_jmp.lab);
2491 if (TARGET_SH5)
2492 output_asm_insn ("sts macl, r13", 0);
2493 else
2494 output_asm_insn ("mov.l @r15+,r13", 0);
2496 if (far && flag_pic && TARGET_SH2)
2498 braf_base_lab = gen_label_rtx ();
2499 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2500 CODE_LABEL_NUMBER (braf_base_lab));
2502 if (far)
2503 output_asm_insn (".align 2", 0);
2504 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2505 this_jmp.op = op;
2506 if (far && flag_pic)
2508 if (TARGET_SH2)
2509 this_jmp.lab = braf_base_lab;
2510 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2512 else
2513 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2514 return "";
2517 /* Local label counter, used for constants in the pool and inside
2518 pattern branches. */
2520 static int lf = 100;
2522 /* Output code for ordinary branches. */
2524 const char *
2525 output_branch (int logic, rtx insn, rtx *operands)
2527 switch (get_attr_length (insn))
2529 case 6:
2530 /* This can happen if filling the delay slot has caused a forward
2531 branch to exceed its range (we could reverse it, but only
2532 when we know we won't overextend other branches; this should
2533 best be handled by relaxation).
2534 It can also happen when other condbranches hoist delay slot insn
2535 from their destination, thus leading to code size increase.
2536 But the branch will still be in the range -4092..+4098 bytes. */
2538 if (! TARGET_RELAX)
2540 int label = lf++;
2541 /* The call to print_slot will clobber the operands. */
2542 rtx op0 = operands[0];
2544 /* If the instruction in the delay slot is annulled (true), then
2545 there is no delay slot where we can put it now. The only safe
2546 place for it is after the label. final will do that by default. */
2548 if (final_sequence
2549 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2550 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2552 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2553 ASSEMBLER_DIALECT ? "/" : ".", label);
2554 print_slot (final_sequence);
2556 else
2557 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2559 output_asm_insn ("bra\t%l0", &op0);
2560 fprintf (asm_out_file, "\tnop\n");
2561 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2563 return "";
2565 /* When relaxing, handle this like a short branch. The linker
2566 will fix it up if it still doesn't fit after relaxation. */
2567 case 2:
2568 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2570 /* These are for SH2e, in which we have to account for the
2571 extra nop because of the hardware bug in annulled branches. */
2572 case 8:
2573 if (! TARGET_RELAX)
2575 int label = lf++;
2577 gcc_assert (!final_sequence
2578 || !(INSN_ANNULLED_BRANCH_P
2579 (XVECEXP (final_sequence, 0, 0))));
2580 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2581 logic ? "f" : "t",
2582 ASSEMBLER_DIALECT ? "/" : ".", label);
2583 fprintf (asm_out_file, "\tnop\n");
2584 output_asm_insn ("bra\t%l0", operands);
2585 fprintf (asm_out_file, "\tnop\n");
2586 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2588 return "";
2590 /* When relaxing, fall through. */
2591 case 4:
2593 char buffer[10];
2595 sprintf (buffer, "b%s%ss\t%%l0",
2596 logic ? "t" : "f",
2597 ASSEMBLER_DIALECT ? "/" : ".");
2598 output_asm_insn (buffer, &operands[0]);
2599 return "nop";
2602 default:
2603 /* There should be no longer branches now - that would
2604 indicate that something has destroyed the branches set
2605 up in machine_dependent_reorg. */
2606 gcc_unreachable ();
2610 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2611 fill in operands 9 as a label to the successor insn.
2612 We try to use jump threading where possible.
2613 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2614 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2615 follow jmp and bt, if the address is in range. */
2616 const char *
2617 output_branchy_insn (enum rtx_code code, const char *templ,
2618 rtx insn, rtx *operands)
2620 rtx next_insn = NEXT_INSN (insn);
2622 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2624 rtx src = SET_SRC (PATTERN (next_insn));
2625 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2627 /* Following branch not taken */
2628 operands[9] = gen_label_rtx ();
2629 emit_label_after (operands[9], next_insn);
2630 INSN_ADDRESSES_NEW (operands[9],
2631 INSN_ADDRESSES (INSN_UID (next_insn))
2632 + get_attr_length (next_insn));
2633 return templ;
2635 else
2637 int offset = (branch_dest (next_insn)
2638 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2639 if (offset >= -252 && offset <= 258)
2641 if (GET_CODE (src) == IF_THEN_ELSE)
2642 /* branch_true */
2643 src = XEXP (src, 1);
2644 operands[9] = src;
2645 return templ;
2649 operands[9] = gen_label_rtx ();
2650 emit_label_after (operands[9], insn);
2651 INSN_ADDRESSES_NEW (operands[9],
2652 INSN_ADDRESSES (INSN_UID (insn))
2653 + get_attr_length (insn));
2654 return templ;
2657 const char *
2658 output_ieee_ccmpeq (rtx insn, rtx *operands)
2660 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2661 insn, operands);
2664 /* Output the start of the assembler file. */
2666 static void
2667 sh_file_start (void)
2669 default_file_start ();
2671 #ifdef SYMBIAN
2672 /* Declare the .directive section before it is used. */
2673 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2674 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2675 #endif
2677 if (TARGET_ELF)
2678 /* We need to show the text section with the proper
2679 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2680 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2681 will complain. We can teach GAS specifically about the
2682 default attributes for our choice of text section, but
2683 then we would have to change GAS again if/when we change
2684 the text section name. */
2685 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2686 else
2687 /* Switch to the data section so that the coffsem symbol
2688 isn't in the text section. */
2689 switch_to_section (data_section);
2691 if (TARGET_LITTLE_ENDIAN)
2692 fputs ("\t.little\n", asm_out_file);
2694 if (!TARGET_ELF)
2696 if (TARGET_SHCOMPACT)
2697 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2698 else if (TARGET_SHMEDIA)
2699 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2700 TARGET_SHMEDIA64 ? 64 : 32);
2704 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2706 static bool
2707 unspec_caller_rtx_p (rtx pat)
2709 rtx base, offset;
2710 int i;
2712 split_const (pat, &base, &offset);
2713 if (GET_CODE (base) == UNSPEC)
2715 if (XINT (base, 1) == UNSPEC_CALLER)
2716 return true;
2717 for (i = 0; i < XVECLEN (base, 0); i++)
2718 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2719 return true;
2721 return false;
2724 /* Indicate that INSN cannot be duplicated. This is true for insn
2725 that generates a unique label. */
2727 static bool
2728 sh_cannot_copy_insn_p (rtx insn)
2730 rtx pat;
2732 if (!reload_completed || !flag_pic)
2733 return false;
2735 if (!NONJUMP_INSN_P (insn))
2736 return false;
2737 if (asm_noperands (insn) >= 0)
2738 return false;
2740 pat = PATTERN (insn);
2741 if (GET_CODE (pat) != SET)
2742 return false;
2743 pat = SET_SRC (pat);
2745 if (unspec_caller_rtx_p (pat))
2746 return true;
2748 return false;
2751 /* Actual number of instructions used to make a shift by N. */
2752 static const char ashiftrt_insns[] =
2753 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2755 /* Left shift and logical right shift are the same. */
2756 static const char shift_insns[] =
2757 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2759 /* Individual shift amounts needed to get the above length sequences.
2760 One bit right shifts clobber the T bit, so when possible, put one bit
2761 shifts in the middle of the sequence, so the ends are eligible for
2762 branch delay slots. */
2763 static const short shift_amounts[32][5] = {
2764 {0}, {1}, {2}, {2, 1},
2765 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2766 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2767 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2768 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2769 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2770 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2771 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2773 /* Likewise, but for shift amounts < 16, up to three highmost bits
2774 might be clobbered. This is typically used when combined with some
2775 kind of sign or zero extension. */
2777 static const char ext_shift_insns[] =
2778 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2780 static const short ext_shift_amounts[32][4] = {
2781 {0}, {1}, {2}, {2, 1},
2782 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2783 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2784 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2785 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2786 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2787 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2788 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2790 /* Assuming we have a value that has been sign-extended by at least one bit,
2791 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2792 to shift it by N without data loss, and quicker than by other means? */
2793 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2795 /* This is used in length attributes in sh.md to help compute the length
2796 of arbitrary constant shift instructions. */
2799 shift_insns_rtx (rtx insn)
2801 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2802 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2803 enum rtx_code shift_code = GET_CODE (set_src);
2805 switch (shift_code)
2807 case ASHIFTRT:
2808 return ashiftrt_insns[shift_count];
2809 case LSHIFTRT:
2810 case ASHIFT:
2811 return shift_insns[shift_count];
2812 default:
2813 gcc_unreachable ();
2817 /* Return the cost of a shift. */
2819 static inline int
2820 shiftcosts (rtx x)
2822 int value;
2824 if (TARGET_SHMEDIA)
2825 return 1;
2827 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2829 if (GET_MODE (x) == DImode
2830 && CONST_INT_P (XEXP (x, 1))
2831 && INTVAL (XEXP (x, 1)) == 1)
2832 return 2;
2834 /* Everything else is invalid, because there is no pattern for it. */
2835 return MAX_COST;
2837 /* If shift by a non constant, then this will be expensive. */
2838 if (!CONST_INT_P (XEXP (x, 1)))
2839 return SH_DYNAMIC_SHIFT_COST;
2841 /* Otherwise, return the true cost in instructions. Cope with out of range
2842 shift counts more or less arbitrarily. */
2843 value = INTVAL (XEXP (x, 1)) & 31;
2845 if (GET_CODE (x) == ASHIFTRT)
2847 int cost = ashiftrt_insns[value];
2848 /* If SH3, then we put the constant in a reg and use shad. */
2849 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2850 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2851 return cost;
2853 else
2854 return shift_insns[value];
2857 /* Return the cost of an AND operation. */
2859 static inline int
2860 andcosts (rtx x)
2862 int i;
2864 /* Anding with a register is a single cycle and instruction. */
2865 if (!CONST_INT_P (XEXP (x, 1)))
2866 return 1;
2868 i = INTVAL (XEXP (x, 1));
2870 if (TARGET_SHMEDIA)
2872 if (satisfies_constraint_I10 (XEXP (x, 1))
2873 || satisfies_constraint_J16 (XEXP (x, 1)))
2874 return 1;
2875 else
2876 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2879 /* These constants are single cycle extu.[bw] instructions. */
2880 if (i == 0xff || i == 0xffff)
2881 return 1;
2882 /* Constants that can be used in an and immediate instruction in a single
2883 cycle, but this requires r0, so make it a little more expensive. */
2884 if (CONST_OK_FOR_K08 (i))
2885 return 2;
2886 /* Constants that can be loaded with a mov immediate and an and.
2887 This case is probably unnecessary. */
2888 if (CONST_OK_FOR_I08 (i))
2889 return 2;
2890 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2891 This case is probably unnecessary. */
2892 return 3;
2895 /* Return the cost of an addition or a subtraction. */
2897 static inline int
2898 addsubcosts (rtx x)
2900 /* Adding a register is a single cycle insn. */
2901 if (REG_P (XEXP (x, 1))
2902 || GET_CODE (XEXP (x, 1)) == SUBREG)
2903 return 1;
2905 /* Likewise for small constants. */
2906 if (CONST_INT_P (XEXP (x, 1))
2907 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2908 return 1;
2910 if (TARGET_SHMEDIA)
2911 switch (GET_CODE (XEXP (x, 1)))
2913 case CONST:
2914 case LABEL_REF:
2915 case SYMBOL_REF:
2916 return TARGET_SHMEDIA64 ? 5 : 3;
2918 case CONST_INT:
2919 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2920 return 2;
2921 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2922 return 3;
2923 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2924 return 4;
2926 /* Fall through. */
2927 default:
2928 return 5;
2931 /* Any other constant requires a 2 cycle pc-relative load plus an
2932 addition. */
2933 return 3;
2936 /* Return the cost of a multiply. */
2937 static inline int
2938 multcosts (rtx x ATTRIBUTE_UNUSED)
2940 if (sh_multcost >= 0)
2941 return sh_multcost;
2942 if (TARGET_SHMEDIA)
2943 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2944 accept constants. Ideally, we would use a cost of one or two and
2945 add the cost of the operand, but disregard the latter when inside loops
2946 and loop invariant code motion is still to follow.
2947 Using a multiply first and splitting it later if it's a loss
2948 doesn't work because of different sign / zero extension semantics
2949 of multiplies vs. shifts. */
2950 return TARGET_SMALLCODE ? 2 : 3;
2952 if (TARGET_SH2)
2954 /* We have a mul insn, so we can never take more than the mul and the
2955 read of the mac reg, but count more because of the latency and extra
2956 reg usage. */
2957 if (TARGET_SMALLCODE)
2958 return 2;
2959 return 3;
2962 /* If we're aiming at small code, then just count the number of
2963 insns in a multiply call sequence. */
2964 if (TARGET_SMALLCODE)
2965 return 5;
2967 /* Otherwise count all the insns in the routine we'd be calling too. */
2968 return 20;
2971 /* Compute a (partial) cost for rtx X. Return true if the complete
2972 cost has been computed, and false if subexpressions should be
2973 scanned. In either case, *TOTAL contains the cost result. */
2975 static bool
2976 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2977 bool speed ATTRIBUTE_UNUSED)
2979 switch (code)
2981 case CONST_INT:
2982 if (TARGET_SHMEDIA)
2984 if (INTVAL (x) == 0)
2985 *total = 0;
2986 else if (outer_code == AND && and_operand ((x), DImode))
2987 *total = 0;
2988 else if ((outer_code == IOR || outer_code == XOR
2989 || outer_code == PLUS)
2990 && CONST_OK_FOR_I10 (INTVAL (x)))
2991 *total = 0;
2992 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2993 *total = COSTS_N_INSNS (outer_code != SET);
2994 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2995 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2996 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2997 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2998 else
2999 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3000 return true;
3002 if (CONST_OK_FOR_I08 (INTVAL (x)))
3003 *total = 0;
3004 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3005 && CONST_OK_FOR_K08 (INTVAL (x)))
3006 *total = 1;
3007 /* prepare_cmp_insn will force costly constants int registers before
3008 the cbranch[sd]i4 patterns can see them, so preserve potentially
3009 interesting ones not covered by I08 above. */
3010 else if (outer_code == COMPARE
3011 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3012 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3013 || INTVAL (x) == 0x7fffffff
3014 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3015 *total = 1;
3016 else
3017 *total = 8;
3018 return true;
3020 case CONST:
3021 case LABEL_REF:
3022 case SYMBOL_REF:
3023 if (TARGET_SHMEDIA64)
3024 *total = COSTS_N_INSNS (4);
3025 else if (TARGET_SHMEDIA32)
3026 *total = COSTS_N_INSNS (2);
3027 else
3028 *total = 5;
3029 return true;
3031 case CONST_DOUBLE:
3032 if (TARGET_SHMEDIA)
3033 *total = COSTS_N_INSNS (4);
3034 /* prepare_cmp_insn will force costly constants int registers before
3035 the cbranchdi4 pattern can see them, so preserve potentially
3036 interesting ones. */
3037 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3038 *total = 1;
3039 else
3040 *total = 10;
3041 return true;
3042 case CONST_VECTOR:
3043 if (x == CONST0_RTX (GET_MODE (x)))
3044 *total = 0;
3045 else if (sh_1el_vec (x, VOIDmode))
3046 *total = outer_code != SET;
3047 if (sh_rep_vec (x, VOIDmode))
3048 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3049 + (outer_code != SET));
3050 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3051 return true;
3053 case PLUS:
3054 case MINUS:
3055 *total = COSTS_N_INSNS (addsubcosts (x));
3056 return true;
3058 case AND:
3059 *total = COSTS_N_INSNS (andcosts (x));
3060 return true;
3062 case MULT:
3063 *total = COSTS_N_INSNS (multcosts (x));
3064 return true;
3066 case ASHIFT:
3067 case ASHIFTRT:
3068 case LSHIFTRT:
3069 *total = COSTS_N_INSNS (shiftcosts (x));
3070 return true;
3072 case DIV:
3073 case UDIV:
3074 case MOD:
3075 case UMOD:
3076 *total = COSTS_N_INSNS (20);
3077 return true;
3079 case PARALLEL:
3080 if (sh_1el_vec (x, VOIDmode))
3081 *total = outer_code != SET;
3082 if (sh_rep_vec (x, VOIDmode))
3083 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3084 + (outer_code != SET));
3085 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3086 return true;
3088 case FLOAT:
3089 case FIX:
3090 *total = 100;
3091 return true;
3093 default:
3094 return false;
3098 /* Compute the cost of an address. For the SH, all valid addresses are
3099 the same cost. Use a slightly higher cost for reg + reg addressing,
3100 since it increases pressure on r0. */
3102 static int
3103 sh_address_cost (rtx X,
3104 bool speed ATTRIBUTE_UNUSED)
3106 return (GET_CODE (X) == PLUS
3107 && ! CONSTANT_P (XEXP (X, 1))
3108 && ! TARGET_SHMEDIA ? 1 : 0);
3111 /* Code to expand a shift. */
3113 void
3114 gen_ashift (int type, int n, rtx reg)
3116 /* Negative values here come from the shift_amounts array. */
3117 if (n < 0)
3119 if (type == ASHIFT)
3120 type = LSHIFTRT;
3121 else
3122 type = ASHIFT;
3123 n = -n;
3126 switch (type)
3128 case ASHIFTRT:
3129 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3130 break;
3131 case LSHIFTRT:
3132 if (n == 1)
3133 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3134 else
3135 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3136 break;
3137 case ASHIFT:
3138 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3139 break;
3143 /* Same for HImode */
3145 void
3146 gen_ashift_hi (int type, int n, rtx reg)
3148 /* Negative values here come from the shift_amounts array. */
3149 if (n < 0)
3151 if (type == ASHIFT)
3152 type = LSHIFTRT;
3153 else
3154 type = ASHIFT;
3155 n = -n;
3158 switch (type)
3160 case ASHIFTRT:
3161 case LSHIFTRT:
3162 /* We don't have HImode right shift operations because using the
3163 ordinary 32 bit shift instructions for that doesn't generate proper
3164 zero/sign extension.
3165 gen_ashift_hi is only called in contexts where we know that the
3166 sign extension works out correctly. */
3168 int offset = 0;
3169 if (GET_CODE (reg) == SUBREG)
3171 offset = SUBREG_BYTE (reg);
3172 reg = SUBREG_REG (reg);
3174 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3175 break;
3177 case ASHIFT:
3178 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3179 break;
3183 /* Output RTL to split a constant shift into its component SH constant
3184 shift instructions. */
3186 void
3187 gen_shifty_op (int code, rtx *operands)
3189 int value = INTVAL (operands[2]);
3190 int max, i;
3192 /* Truncate the shift count in case it is out of bounds. */
3193 value = value & 31;
3195 if (value == 31)
3197 if (code == LSHIFTRT)
3199 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3200 emit_insn (gen_movt (operands[0]));
3201 return;
3203 else if (code == ASHIFT)
3205 /* There is a two instruction sequence for 31 bit left shifts,
3206 but it requires r0. */
3207 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3209 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3210 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3211 return;
3215 else if (value == 0)
3217 /* This can happen even when optimizing, if there were subregs before
3218 reload. Don't output a nop here, as this is never optimized away;
3219 use a no-op move instead. */
3220 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3221 return;
3224 max = shift_insns[value];
3225 for (i = 0; i < max; i++)
3226 gen_ashift (code, shift_amounts[value][i], operands[0]);
3229 /* Same as above, but optimized for values where the topmost bits don't
3230 matter. */
3232 void
3233 gen_shifty_hi_op (int code, rtx *operands)
3235 int value = INTVAL (operands[2]);
3236 int max, i;
3237 void (*gen_fun) (int, int, rtx);
3239 /* This operation is used by and_shl for SImode values with a few
3240 high bits known to be cleared. */
3241 value &= 31;
3242 if (value == 0)
3244 emit_insn (gen_nop ());
3245 return;
3248 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3249 if (code == ASHIFT)
3251 max = ext_shift_insns[value];
3252 for (i = 0; i < max; i++)
3253 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3255 else
3256 /* When shifting right, emit the shifts in reverse order, so that
3257 solitary negative values come first. */
3258 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3259 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3262 /* Output RTL for an arithmetic right shift. */
3264 /* ??? Rewrite to use super-optimizer sequences. */
3267 expand_ashiftrt (rtx *operands)
3269 rtx wrk;
3270 char func[18];
3271 int value;
3273 if (TARGET_SH3)
3275 if (!CONST_INT_P (operands[2]))
3277 rtx count = copy_to_mode_reg (SImode, operands[2]);
3278 emit_insn (gen_negsi2 (count, count));
3279 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3280 return 1;
3282 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3283 > 1 + SH_DYNAMIC_SHIFT_COST)
3285 rtx count
3286 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3287 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3288 return 1;
3291 if (!CONST_INT_P (operands[2]))
3292 return 0;
3294 value = INTVAL (operands[2]) & 31;
3296 if (value == 31)
3298 /* If we are called from abs expansion, arrange things so that we
3299 we can use a single MT instruction that doesn't clobber the source,
3300 if LICM can hoist out the load of the constant zero. */
3301 if (currently_expanding_to_rtl)
3303 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3304 operands[1]));
3305 emit_insn (gen_mov_neg_si_t (operands[0]));
3306 return 1;
3308 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3309 return 1;
3311 else if (value >= 16 && value <= 19)
3313 wrk = gen_reg_rtx (SImode);
3314 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3315 value -= 16;
3316 while (value--)
3317 gen_ashift (ASHIFTRT, 1, wrk);
3318 emit_move_insn (operands[0], wrk);
3319 return 1;
3321 /* Expand a short sequence inline, longer call a magic routine. */
3322 else if (value <= 5)
3324 wrk = gen_reg_rtx (SImode);
3325 emit_move_insn (wrk, operands[1]);
3326 while (value--)
3327 gen_ashift (ASHIFTRT, 1, wrk);
3328 emit_move_insn (operands[0], wrk);
3329 return 1;
3332 wrk = gen_reg_rtx (Pmode);
3334 /* Load the value into an arg reg and call a helper. */
3335 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3336 sprintf (func, "__ashiftrt_r4_%d", value);
3337 function_symbol (wrk, func, SFUNC_STATIC);
3338 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3339 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3340 return 1;
3344 sh_dynamicalize_shift_p (rtx count)
3346 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3349 /* Try to find a good way to implement the combiner pattern
3350 [(set (match_operand:SI 0 "register_operand" "r")
3351 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3352 (match_operand:SI 2 "const_int_operand" "n"))
3353 (match_operand:SI 3 "const_int_operand" "n"))) .
3354 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3355 return 0 for simple right / left or left/right shift combination.
3356 return 1 for a combination of shifts with zero_extend.
3357 return 2 for a combination of shifts with an AND that needs r0.
3358 return 3 for a combination of shifts with an AND that needs an extra
3359 scratch register, when the three highmost bits of the AND mask are clear.
3360 return 4 for a combination of shifts with an AND that needs an extra
3361 scratch register, when any of the three highmost bits of the AND mask
3362 is set.
3363 If ATTRP is set, store an initial right shift width in ATTRP[0],
3364 and the instruction length in ATTRP[1] . These values are not valid
3365 when returning 0.
3366 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3367 shift_amounts for the last shift value that is to be used before the
3368 sign extend. */
3370 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3372 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3373 int left = INTVAL (left_rtx), right;
3374 int best = 0;
3375 int cost, best_cost = 10000;
3376 int best_right = 0, best_len = 0;
3377 int i;
3378 int can_ext;
3380 if (left < 0 || left > 31)
3381 return 0;
3382 if (CONST_INT_P (mask_rtx))
3383 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3384 else
3385 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3386 /* Can this be expressed as a right shift / left shift pair? */
3387 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3388 right = exact_log2 (lsb);
3389 mask2 = ~(mask + lsb - 1);
3390 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3391 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3392 if (! mask2)
3393 best_cost = shift_insns[right] + shift_insns[right + left];
3394 /* mask has no trailing zeroes <==> ! right */
3395 else if (! right && mask2 == ~(lsb2 - 1))
3397 int late_right = exact_log2 (lsb2);
3398 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3400 /* Try to use zero extend. */
3401 if (mask2 == ~(lsb2 - 1))
3403 int width, first;
3405 for (width = 8; width <= 16; width += 8)
3407 /* Can we zero-extend right away? */
3408 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3410 cost
3411 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3412 if (cost < best_cost)
3414 best = 1;
3415 best_cost = cost;
3416 best_right = right;
3417 best_len = cost;
3418 if (attrp)
3419 attrp[2] = -1;
3421 continue;
3423 /* ??? Could try to put zero extend into initial right shift,
3424 or even shift a bit left before the right shift. */
3425 /* Determine value of first part of left shift, to get to the
3426 zero extend cut-off point. */
3427 first = width - exact_log2 (lsb2) + right;
3428 if (first >= 0 && right + left - first >= 0)
3430 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3431 + ext_shift_insns[right + left - first];
3432 if (cost < best_cost)
3434 best = 1;
3435 best_cost = cost;
3436 best_right = right;
3437 best_len = cost;
3438 if (attrp)
3439 attrp[2] = first;
3444 /* Try to use r0 AND pattern */
3445 for (i = 0; i <= 2; i++)
3447 if (i > right)
3448 break;
3449 if (! CONST_OK_FOR_K08 (mask >> i))
3450 continue;
3451 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3452 if (cost < best_cost)
3454 best = 2;
3455 best_cost = cost;
3456 best_right = i;
3457 best_len = cost - 1;
3460 /* Try to use a scratch register to hold the AND operand. */
3461 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3462 for (i = 0; i <= 2; i++)
3464 if (i > right)
3465 break;
3466 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3467 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3468 if (cost < best_cost)
3470 best = 4 - can_ext;
3471 best_cost = cost;
3472 best_right = i;
3473 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3477 if (attrp)
3479 attrp[0] = best_right;
3480 attrp[1] = best_len;
3482 return best;
3485 /* This is used in length attributes of the unnamed instructions
3486 corresponding to shl_and_kind return values of 1 and 2. */
3488 shl_and_length (rtx insn)
3490 rtx set_src, left_rtx, mask_rtx;
3491 int attributes[3];
3493 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3494 left_rtx = XEXP (XEXP (set_src, 0), 1);
3495 mask_rtx = XEXP (set_src, 1);
3496 shl_and_kind (left_rtx, mask_rtx, attributes);
3497 return attributes[1];
3500 /* This is used in length attribute of the and_shl_scratch instruction. */
3503 shl_and_scr_length (rtx insn)
3505 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3506 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3507 rtx op = XEXP (set_src, 0);
3508 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3509 op = XEXP (XEXP (op, 0), 0);
3510 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3513 /* Generate rtl for instructions for which shl_and_kind advised a particular
3514 method of generating them, i.e. returned zero. */
3517 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3519 int attributes[3];
3520 unsigned HOST_WIDE_INT mask;
3521 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3522 int right, total_shift;
3523 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3525 right = attributes[0];
3526 total_shift = INTVAL (left_rtx) + right;
3527 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3528 switch (kind)
3530 default:
3531 return -1;
3532 case 1:
3534 int first = attributes[2];
3535 rtx operands[3];
3537 if (first < 0)
3539 emit_insn ((mask << right) <= 0xff
3540 ? gen_zero_extendqisi2 (dest,
3541 gen_lowpart (QImode, source))
3542 : gen_zero_extendhisi2 (dest,
3543 gen_lowpart (HImode, source)));
3544 source = dest;
3546 if (source != dest)
3547 emit_insn (gen_movsi (dest, source));
3548 operands[0] = dest;
3549 if (right)
3551 operands[2] = GEN_INT (right);
3552 gen_shifty_hi_op (LSHIFTRT, operands);
3554 if (first > 0)
3556 operands[2] = GEN_INT (first);
3557 gen_shifty_hi_op (ASHIFT, operands);
3558 total_shift -= first;
3559 mask <<= first;
3561 if (first >= 0)
3562 emit_insn (mask <= 0xff
3563 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3564 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3565 if (total_shift > 0)
3567 operands[2] = GEN_INT (total_shift);
3568 gen_shifty_hi_op (ASHIFT, operands);
3570 break;
3572 case 4:
3573 shift_gen_fun = gen_shifty_op;
3574 case 3:
3575 /* If the topmost bit that matters is set, set the topmost bits
3576 that don't matter. This way, we might be able to get a shorter
3577 signed constant. */
3578 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3579 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3580 case 2:
3581 /* Don't expand fine-grained when combining, because that will
3582 make the pattern fail. */
3583 if (currently_expanding_to_rtl
3584 || reload_in_progress || reload_completed)
3586 rtx operands[3];
3588 /* Cases 3 and 4 should be handled by this split
3589 only while combining */
3590 gcc_assert (kind <= 2);
3591 if (right)
3593 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3594 source = dest;
3596 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3597 if (total_shift)
3599 operands[0] = dest;
3600 operands[1] = dest;
3601 operands[2] = GEN_INT (total_shift);
3602 shift_gen_fun (ASHIFT, operands);
3604 break;
3606 else
3608 int neg = 0;
3609 if (kind != 4 && total_shift < 16)
3611 neg = -ext_shift_amounts[total_shift][1];
3612 if (neg > 0)
3613 neg -= ext_shift_amounts[total_shift][2];
3614 else
3615 neg = 0;
3617 emit_insn (gen_and_shl_scratch (dest, source,
3618 GEN_INT (right),
3619 GEN_INT (mask),
3620 GEN_INT (total_shift + neg),
3621 GEN_INT (neg)));
3622 emit_insn (gen_movsi (dest, dest));
3623 break;
3626 return 0;
3629 /* Try to find a good way to implement the combiner pattern
3630 [(set (match_operand:SI 0 "register_operand" "=r")
3631 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3632 (match_operand:SI 2 "const_int_operand" "n")
3633 (match_operand:SI 3 "const_int_operand" "n")
3634 (const_int 0)))
3635 (clobber (reg:SI T_REG))]
3636 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3637 return 0 for simple left / right shift combination.
3638 return 1 for left shift / 8 bit sign extend / left shift.
3639 return 2 for left shift / 16 bit sign extend / left shift.
3640 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3641 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3642 return 5 for left shift / 16 bit sign extend / right shift
3643 return 6 for < 8 bit sign extend / left shift.
3644 return 7 for < 8 bit sign extend / left shift / single right shift.
3645 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3648 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3650 int left, size, insize, ext;
3651 int cost = 0, best_cost;
3652 int kind;
3654 left = INTVAL (left_rtx);
3655 size = INTVAL (size_rtx);
3656 insize = size - left;
3657 gcc_assert (insize > 0);
3658 /* Default to left / right shift. */
3659 kind = 0;
3660 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3661 if (size <= 16)
3663 /* 16 bit shift / sign extend / 16 bit shift */
3664 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3665 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3666 below, by alternative 3 or something even better. */
3667 if (cost < best_cost)
3669 kind = 5;
3670 best_cost = cost;
3673 /* Try a plain sign extend between two shifts. */
3674 for (ext = 16; ext >= insize; ext -= 8)
3676 if (ext <= size)
3678 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3679 if (cost < best_cost)
3681 kind = ext / (unsigned) 8;
3682 best_cost = cost;
3685 /* Check if we can do a sloppy shift with a final signed shift
3686 restoring the sign. */
3687 if (EXT_SHIFT_SIGNED (size - ext))
3688 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3689 /* If not, maybe it's still cheaper to do the second shift sloppy,
3690 and do a final sign extend? */
3691 else if (size <= 16)
3692 cost = ext_shift_insns[ext - insize] + 1
3693 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3694 else
3695 continue;
3696 if (cost < best_cost)
3698 kind = ext / (unsigned) 8 + 2;
3699 best_cost = cost;
3702 /* Check if we can sign extend in r0 */
3703 if (insize < 8)
3705 cost = 3 + shift_insns[left];
3706 if (cost < best_cost)
3708 kind = 6;
3709 best_cost = cost;
3711 /* Try the same with a final signed shift. */
3712 if (left < 31)
3714 cost = 3 + ext_shift_insns[left + 1] + 1;
3715 if (cost < best_cost)
3717 kind = 7;
3718 best_cost = cost;
3722 if (TARGET_SH3)
3724 /* Try to use a dynamic shift. */
3725 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3726 if (cost < best_cost)
3728 kind = 0;
3729 best_cost = cost;
3732 if (costp)
3733 *costp = cost;
3734 return kind;
3737 /* Function to be used in the length attribute of the instructions
3738 implementing this pattern. */
3741 shl_sext_length (rtx insn)
3743 rtx set_src, left_rtx, size_rtx;
3744 int cost;
3746 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3747 left_rtx = XEXP (XEXP (set_src, 0), 1);
3748 size_rtx = XEXP (set_src, 1);
3749 shl_sext_kind (left_rtx, size_rtx, &cost);
3750 return cost;
3753 /* Generate rtl for this pattern */
3756 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3758 int kind;
3759 int left, size, insize, cost;
3760 rtx operands[3];
3762 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3763 left = INTVAL (left_rtx);
3764 size = INTVAL (size_rtx);
3765 insize = size - left;
3766 switch (kind)
3768 case 1:
3769 case 2:
3770 case 3:
3771 case 4:
3773 int ext = kind & 1 ? 8 : 16;
3774 int shift2 = size - ext;
3776 /* Don't expand fine-grained when combining, because that will
3777 make the pattern fail. */
3778 if (! currently_expanding_to_rtl
3779 && ! reload_in_progress && ! reload_completed)
3781 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3782 emit_insn (gen_movsi (dest, source));
3783 break;
3785 if (dest != source)
3786 emit_insn (gen_movsi (dest, source));
3787 operands[0] = dest;
3788 if (ext - insize)
3790 operands[2] = GEN_INT (ext - insize);
3791 gen_shifty_hi_op (ASHIFT, operands);
3793 emit_insn (kind & 1
3794 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3795 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3796 if (kind <= 2)
3798 if (shift2)
3800 operands[2] = GEN_INT (shift2);
3801 gen_shifty_op (ASHIFT, operands);
3804 else
3806 if (shift2 > 0)
3808 if (EXT_SHIFT_SIGNED (shift2))
3810 operands[2] = GEN_INT (shift2 + 1);
3811 gen_shifty_op (ASHIFT, operands);
3812 operands[2] = const1_rtx;
3813 gen_shifty_op (ASHIFTRT, operands);
3814 break;
3816 operands[2] = GEN_INT (shift2);
3817 gen_shifty_hi_op (ASHIFT, operands);
3819 else if (shift2)
3821 operands[2] = GEN_INT (-shift2);
3822 gen_shifty_hi_op (LSHIFTRT, operands);
3824 emit_insn (size <= 8
3825 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3826 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3828 break;
3830 case 5:
3832 int i = 16 - size;
3833 if (! currently_expanding_to_rtl
3834 && ! reload_in_progress && ! reload_completed)
3835 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3836 else
3838 operands[0] = dest;
3839 operands[2] = GEN_INT (16 - insize);
3840 gen_shifty_hi_op (ASHIFT, operands);
3841 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3843 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3844 while (--i >= 0)
3845 gen_ashift (ASHIFTRT, 1, dest);
3846 break;
3848 case 6:
3849 case 7:
3850 /* Don't expand fine-grained when combining, because that will
3851 make the pattern fail. */
3852 if (! currently_expanding_to_rtl
3853 && ! reload_in_progress && ! reload_completed)
3855 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3856 emit_insn (gen_movsi (dest, source));
3857 break;
3859 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3860 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3861 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3862 operands[0] = dest;
3863 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3864 gen_shifty_op (ASHIFT, operands);
3865 if (kind == 7)
3866 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3867 break;
3868 default:
3869 return -1;
3871 return 0;
3874 /* Prefix a symbol_ref name with "datalabel". */
3877 gen_datalabel_ref (rtx sym)
3879 const char *str;
3881 if (GET_CODE (sym) == LABEL_REF)
3882 return gen_rtx_CONST (GET_MODE (sym),
3883 gen_rtx_UNSPEC (GET_MODE (sym),
3884 gen_rtvec (1, sym),
3885 UNSPEC_DATALABEL));
3887 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3889 str = XSTR (sym, 0);
3890 /* Share all SYMBOL_REF strings with the same value - that is important
3891 for cse. */
3892 str = IDENTIFIER_POINTER (get_identifier (str));
3893 XSTR (sym, 0) = str;
3895 return sym;
3899 static alloc_pool label_ref_list_pool;
3901 typedef struct label_ref_list_d
3903 rtx label;
3904 struct label_ref_list_d *next;
3905 } *label_ref_list_t;
3907 /* The SH cannot load a large constant into a register, constants have to
3908 come from a pc relative load. The reference of a pc relative load
3909 instruction must be less than 1k in front of the instruction. This
3910 means that we often have to dump a constant inside a function, and
3911 generate code to branch around it.
3913 It is important to minimize this, since the branches will slow things
3914 down and make things bigger.
3916 Worst case code looks like:
3918 mov.l L1,rn
3919 bra L2
3921 align
3922 L1: .long value
3926 mov.l L3,rn
3927 bra L4
3929 align
3930 L3: .long value
3934 We fix this by performing a scan before scheduling, which notices which
3935 instructions need to have their operands fetched from the constant table
3936 and builds the table.
3938 The algorithm is:
3940 scan, find an instruction which needs a pcrel move. Look forward, find the
3941 last barrier which is within MAX_COUNT bytes of the requirement.
3942 If there isn't one, make one. Process all the instructions between
3943 the find and the barrier.
3945 In the above example, we can tell that L3 is within 1k of L1, so
3946 the first move can be shrunk from the 3 insn+constant sequence into
3947 just 1 insn, and the constant moved to L3 to make:
3949 mov.l L1,rn
3951 mov.l L3,rn
3952 bra L4
3954 align
3955 L3:.long value
3956 L4:.long value
3958 Then the second move becomes the target for the shortening process. */
3960 typedef struct
3962 rtx value; /* Value in table. */
3963 rtx label; /* Label of value. */
3964 label_ref_list_t wend; /* End of window. */
3965 enum machine_mode mode; /* Mode of value. */
3967 /* True if this constant is accessed as part of a post-increment
3968 sequence. Note that HImode constants are never accessed in this way. */
3969 bool part_of_sequence_p;
3970 } pool_node;
3972 /* The maximum number of constants that can fit into one pool, since
3973 constants in the range 0..510 are at least 2 bytes long, and in the
3974 range from there to 1018 at least 4 bytes. */
3976 #define MAX_POOL_SIZE 372
3977 static pool_node pool_vector[MAX_POOL_SIZE];
3978 static int pool_size;
3979 static rtx pool_window_label;
3980 static int pool_window_last;
3982 static int max_labelno_before_reorg;
3984 /* ??? If we need a constant in HImode which is the truncated value of a
3985 constant we need in SImode, we could combine the two entries thus saving
3986 two bytes. Is this common enough to be worth the effort of implementing
3987 it? */
3989 /* ??? This stuff should be done at the same time that we shorten branches.
3990 As it is now, we must assume that all branches are the maximum size, and
3991 this causes us to almost always output constant pools sooner than
3992 necessary. */
3994 /* Add a constant to the pool and return its label. */
3996 static rtx
3997 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3999 int i;
4000 rtx lab, new_rtx;
4001 label_ref_list_t ref, newref;
4003 /* First see if we've already got it. */
4004 for (i = 0; i < pool_size; i++)
4006 if (x->code == pool_vector[i].value->code
4007 && mode == pool_vector[i].mode)
4009 if (x->code == CODE_LABEL)
4011 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4012 continue;
4014 if (rtx_equal_p (x, pool_vector[i].value))
4016 lab = new_rtx = 0;
4017 if (! last_value
4018 || ! i
4019 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4021 new_rtx = gen_label_rtx ();
4022 LABEL_REFS (new_rtx) = pool_vector[i].label;
4023 pool_vector[i].label = lab = new_rtx;
4025 if (lab && pool_window_label)
4027 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4028 newref->label = pool_window_label;
4029 ref = pool_vector[pool_window_last].wend;
4030 newref->next = ref;
4031 pool_vector[pool_window_last].wend = newref;
4033 if (new_rtx)
4034 pool_window_label = new_rtx;
4035 pool_window_last = i;
4036 return lab;
4041 /* Need a new one. */
4042 pool_vector[pool_size].value = x;
4043 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4045 lab = 0;
4046 pool_vector[pool_size - 1].part_of_sequence_p = true;
4048 else
4049 lab = gen_label_rtx ();
4050 pool_vector[pool_size].mode = mode;
4051 pool_vector[pool_size].label = lab;
4052 pool_vector[pool_size].wend = NULL;
4053 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4054 if (lab && pool_window_label)
4056 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4057 newref->label = pool_window_label;
4058 ref = pool_vector[pool_window_last].wend;
4059 newref->next = ref;
4060 pool_vector[pool_window_last].wend = newref;
4062 if (lab)
4063 pool_window_label = lab;
4064 pool_window_last = pool_size;
4065 pool_size++;
4066 return lab;
4069 /* Output the literal table. START, if nonzero, is the first instruction
4070 this table is needed for, and also indicates that there is at least one
4071 casesi_worker_2 instruction; We have to emit the operand3 labels from
4072 these insns at a 4-byte aligned position. BARRIER is the barrier
4073 after which we are to place the table. */
4075 static void
4076 dump_table (rtx start, rtx barrier)
4078 rtx scan = barrier;
4079 int i;
4080 int need_align = 1;
4081 rtx lab;
4082 label_ref_list_t ref;
4083 int have_df = 0;
4085 /* Do two passes, first time dump out the HI sized constants. */
4087 for (i = 0; i < pool_size; i++)
4089 pool_node *p = &pool_vector[i];
4091 if (p->mode == HImode)
4093 if (need_align)
4095 scan = emit_insn_after (gen_align_2 (), scan);
4096 need_align = 0;
4098 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4099 scan = emit_label_after (lab, scan);
4100 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4101 scan);
4102 for (ref = p->wend; ref; ref = ref->next)
4104 lab = ref->label;
4105 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4108 else if (p->mode == DFmode)
4109 have_df = 1;
4112 need_align = 1;
4114 if (start)
4116 scan = emit_insn_after (gen_align_4 (), scan);
4117 need_align = 0;
4118 for (; start != barrier; start = NEXT_INSN (start))
4119 if (NONJUMP_INSN_P (start)
4120 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4122 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4123 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4125 scan = emit_label_after (lab, scan);
4128 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4130 rtx align_insn = NULL_RTX;
4132 scan = emit_label_after (gen_label_rtx (), scan);
4133 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4134 need_align = 0;
4136 for (i = 0; i < pool_size; i++)
4138 pool_node *p = &pool_vector[i];
4140 switch (p->mode)
4142 case HImode:
4143 break;
4144 case SImode:
4145 case SFmode:
4146 if (align_insn && !p->part_of_sequence_p)
4148 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4149 emit_label_before (lab, align_insn);
4150 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4151 align_insn);
4152 for (ref = p->wend; ref; ref = ref->next)
4154 lab = ref->label;
4155 emit_insn_before (gen_consttable_window_end (lab),
4156 align_insn);
4158 delete_insn (align_insn);
4159 align_insn = NULL_RTX;
4160 continue;
4162 else
4164 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4165 scan = emit_label_after (lab, scan);
4166 scan = emit_insn_after (gen_consttable_4 (p->value,
4167 const0_rtx), scan);
4168 need_align = ! need_align;
4170 break;
4171 case DFmode:
4172 if (need_align)
4174 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4175 align_insn = scan;
4176 need_align = 0;
4178 case DImode:
4179 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4180 scan = emit_label_after (lab, scan);
4181 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4182 scan);
4183 break;
4184 default:
4185 gcc_unreachable ();
4188 if (p->mode != HImode)
4190 for (ref = p->wend; ref; ref = ref->next)
4192 lab = ref->label;
4193 scan = emit_insn_after (gen_consttable_window_end (lab),
4194 scan);
4199 pool_size = 0;
4202 for (i = 0; i < pool_size; i++)
4204 pool_node *p = &pool_vector[i];
4206 switch (p->mode)
4208 case HImode:
4209 break;
4210 case SImode:
4211 case SFmode:
4212 if (need_align)
4214 need_align = 0;
4215 scan = emit_label_after (gen_label_rtx (), scan);
4216 scan = emit_insn_after (gen_align_4 (), scan);
4218 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4219 scan = emit_label_after (lab, scan);
4220 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4221 scan);
4222 break;
4223 case DFmode:
4224 case DImode:
4225 if (need_align)
4227 need_align = 0;
4228 scan = emit_label_after (gen_label_rtx (), scan);
4229 scan = emit_insn_after (gen_align_4 (), scan);
4231 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4232 scan = emit_label_after (lab, scan);
4233 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4234 scan);
4235 break;
4236 default:
4237 gcc_unreachable ();
4240 if (p->mode != HImode)
4242 for (ref = p->wend; ref; ref = ref->next)
4244 lab = ref->label;
4245 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4250 scan = emit_insn_after (gen_consttable_end (), scan);
4251 scan = emit_barrier_after (scan);
4252 pool_size = 0;
4253 pool_window_label = NULL_RTX;
4254 pool_window_last = 0;
4257 /* Return nonzero if constant would be an ok source for a
4258 mov.w instead of a mov.l. */
4260 static int
4261 hi_const (rtx src)
4263 return (CONST_INT_P (src)
4264 && INTVAL (src) >= -32768
4265 && INTVAL (src) <= 32767);
4268 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4270 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4272 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4273 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4274 need to fix it if the input value is CONST_OK_FOR_I08. */
4276 static int
4277 broken_move (rtx insn)
4279 if (NONJUMP_INSN_P (insn))
4281 rtx pat = PATTERN (insn);
4282 if (GET_CODE (pat) == PARALLEL)
4283 pat = XVECEXP (pat, 0, 0);
4284 if (GET_CODE (pat) == SET
4285 /* We can load any 8-bit value if we don't care what the high
4286 order bits end up as. */
4287 && GET_MODE (SET_DEST (pat)) != QImode
4288 && (CONSTANT_P (SET_SRC (pat))
4289 /* Match mova_const. */
4290 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4291 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4292 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4293 && ! (TARGET_SH2E
4294 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4295 && (fp_zero_operand (SET_SRC (pat))
4296 || fp_one_operand (SET_SRC (pat)))
4297 /* In general we don't know the current setting of fpscr, so disable fldi.
4298 There is an exception if this was a register-register move
4299 before reload - and hence it was ascertained that we have
4300 single precision setting - and in a post-reload optimization
4301 we changed this to do a constant load. In that case
4302 we don't have an r0 clobber, hence we must use fldi. */
4303 && (TARGET_FMOVD
4304 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4305 == SCRATCH))
4306 && REG_P (SET_DEST (pat))
4307 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4308 && ! (TARGET_SH2A
4309 && GET_MODE (SET_DEST (pat)) == SImode
4310 && (satisfies_constraint_I20 (SET_SRC (pat))
4311 || satisfies_constraint_I28 (SET_SRC (pat))))
4312 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4313 return 1;
4316 return 0;
4319 static int
4320 mova_p (rtx insn)
4322 return (NONJUMP_INSN_P (insn)
4323 && GET_CODE (PATTERN (insn)) == SET
4324 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4325 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4326 /* Don't match mova_const. */
4327 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4330 /* Fix up a mova from a switch that went out of range. */
4331 static void
4332 fixup_mova (rtx mova)
4334 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4335 if (! flag_pic)
4337 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4338 INSN_CODE (mova) = -1;
4340 else
4342 rtx worker = mova;
4343 rtx lab = gen_label_rtx ();
4344 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4348 worker = NEXT_INSN (worker);
4349 gcc_assert (worker
4350 && !LABEL_P (worker)
4351 && !JUMP_P (worker));
4352 } while (NOTE_P (worker)
4353 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4354 wpat = PATTERN (worker);
4355 wpat0 = XVECEXP (wpat, 0, 0);
4356 wpat1 = XVECEXP (wpat, 0, 1);
4357 wsrc = SET_SRC (wpat0);
4358 PATTERN (worker) = (gen_casesi_worker_2
4359 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4360 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4361 XEXP (wpat1, 0)));
4362 INSN_CODE (worker) = -1;
4363 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4364 base = gen_rtx_LABEL_REF (Pmode, lab);
4365 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4366 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4367 INSN_CODE (mova) = -1;
4371 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4372 *num_mova, and check if the new mova is not nested within the first one.
4373 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4374 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4375 static int
4376 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4378 int n_addr = 0; /* Initialization to shut up spurious warning. */
4379 int f_target, n_target = 0; /* Likewise. */
4381 if (optimize)
4383 /* If NEW_MOVA has no address yet, it will be handled later. */
4384 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4385 return -1;
4387 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4388 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4389 if (n_addr > n_target || n_addr + 1022 < n_target)
4391 /* Change the mova into a load.
4392 broken_move will then return true for it. */
4393 fixup_mova (new_mova);
4394 return 1;
4397 if (!(*num_mova)++)
4399 *first_mova = new_mova;
4400 return 2;
4402 if (!optimize
4403 || ((f_target
4404 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4405 >= n_target))
4406 return -1;
4408 (*num_mova)--;
4409 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4410 > n_target - n_addr)
4412 fixup_mova (*first_mova);
4413 return 0;
4415 else
4417 fixup_mova (new_mova);
4418 return 1;
4422 /* Find the last barrier from insn FROM which is close enough to hold the
4423 constant pool. If we can't find one, then create one near the end of
4424 the range. */
4426 static rtx
4427 find_barrier (int num_mova, rtx mova, rtx from)
4429 int count_si = 0;
4430 int count_hi = 0;
4431 int found_hi = 0;
4432 int found_si = 0;
4433 int found_di = 0;
4434 int hi_align = 2;
4435 int si_align = 2;
4436 int leading_mova = num_mova;
4437 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4438 int si_limit;
4439 int hi_limit;
4440 rtx orig = from;
4441 rtx last_got = NULL_RTX;
4442 rtx last_symoff = NULL_RTX;
4444 /* For HImode: range is 510, add 4 because pc counts from address of
4445 second instruction after this one, subtract 2 for the jump instruction
4446 that we may need to emit before the table, subtract 2 for the instruction
4447 that fills the jump delay slot (in very rare cases, reorg will take an
4448 instruction from after the constant pool or will leave the delay slot
4449 empty). This gives 510.
4450 For SImode: range is 1020, add 4 because pc counts from address of
4451 second instruction after this one, subtract 2 in case pc is 2 byte
4452 aligned, subtract 2 for the jump instruction that we may need to emit
4453 before the table, subtract 2 for the instruction that fills the jump
4454 delay slot. This gives 1018. */
4456 /* The branch will always be shortened now that the reference address for
4457 forward branches is the successor address, thus we need no longer make
4458 adjustments to the [sh]i_limit for -O0. */
4460 si_limit = 1018;
4461 hi_limit = 510;
4463 while (from && count_si < si_limit && count_hi < hi_limit)
4465 int inc = get_attr_length (from);
4466 int new_align = 1;
4468 /* If this is a label that existed at the time of the compute_alignments
4469 call, determine the alignment. N.B. When find_barrier recurses for
4470 an out-of-reach mova, we might see labels at the start of previously
4471 inserted constant tables. */
4472 if (LABEL_P (from)
4473 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4475 if (optimize)
4476 new_align = 1 << label_to_alignment (from);
4477 else if (BARRIER_P (prev_nonnote_insn (from)))
4478 new_align = 1 << barrier_align (from);
4479 else
4480 new_align = 1;
4481 inc = 0;
4483 /* In case we are scanning a constant table because of recursion, check
4484 for explicit alignments. If the table is long, we might be forced
4485 to emit the new table in front of it; the length of the alignment
4486 might be the last straw. */
4487 else if (NONJUMP_INSN_P (from)
4488 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4489 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4490 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4491 /* When we find the end of a constant table, paste the new constant
4492 at the end. That is better than putting it in front because
4493 this way, we don't need extra alignment for adding a 4-byte-aligned
4494 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4495 else if (NONJUMP_INSN_P (from)
4496 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4497 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4498 return from;
4500 if (BARRIER_P (from))
4502 rtx next;
4504 found_barrier = from;
4506 /* If we are at the end of the function, or in front of an alignment
4507 instruction, we need not insert an extra alignment. We prefer
4508 this kind of barrier. */
4509 if (barrier_align (from) > 2)
4510 good_barrier = from;
4512 /* If we are at the end of a hot/cold block, dump the constants
4513 here. */
4514 next = NEXT_INSN (from);
4515 if (next
4516 && NOTE_P (next)
4517 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4518 break;
4521 if (broken_move (from))
4523 rtx pat, src, dst;
4524 enum machine_mode mode;
4526 pat = PATTERN (from);
4527 if (GET_CODE (pat) == PARALLEL)
4528 pat = XVECEXP (pat, 0, 0);
4529 src = SET_SRC (pat);
4530 dst = SET_DEST (pat);
4531 mode = GET_MODE (dst);
4533 /* GOT pcrelat setting comes in pair of
4534 mova .L8,r0
4535 mov.l .L8,r12
4536 instructions. (plus add r0,r12).
4537 Remember if we see one without the other. */
4538 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4539 last_got = last_got ? NULL_RTX : from;
4540 else if (PIC_ADDR_P (src))
4541 last_got = last_got ? NULL_RTX : from;
4543 /* We must explicitly check the mode, because sometimes the
4544 front end will generate code to load unsigned constants into
4545 HImode targets without properly sign extending them. */
4546 if (mode == HImode
4547 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4549 found_hi += 2;
4550 /* We put the short constants before the long constants, so
4551 we must count the length of short constants in the range
4552 for the long constants. */
4553 /* ??? This isn't optimal, but is easy to do. */
4554 si_limit -= 2;
4556 else
4558 /* We dump DF/DI constants before SF/SI ones, because
4559 the limit is the same, but the alignment requirements
4560 are higher. We may waste up to 4 additional bytes
4561 for alignment, and the DF/DI constant may have
4562 another SF/SI constant placed before it. */
4563 if (TARGET_SHCOMPACT
4564 && ! found_di
4565 && (mode == DFmode || mode == DImode))
4567 found_di = 1;
4568 si_limit -= 8;
4570 while (si_align > 2 && found_si + si_align - 2 > count_si)
4571 si_align >>= 1;
4572 if (found_si > count_si)
4573 count_si = found_si;
4574 found_si += GET_MODE_SIZE (mode);
4575 if (num_mova)
4576 si_limit -= GET_MODE_SIZE (mode);
4580 if (mova_p (from))
4582 switch (untangle_mova (&num_mova, &mova, from))
4584 case 1:
4585 if (flag_pic)
4587 rtx src = SET_SRC (PATTERN (from));
4588 if (GET_CODE (src) == CONST
4589 && GET_CODE (XEXP (src, 0)) == UNSPEC
4590 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4591 last_symoff = from;
4593 break;
4594 case 0: return find_barrier (0, 0, mova);
4595 case 2:
4597 leading_mova = 0;
4598 barrier_before_mova
4599 = good_barrier ? good_barrier : found_barrier;
4601 default: break;
4603 if (found_si > count_si)
4604 count_si = found_si;
4606 else if (JUMP_TABLE_DATA_P (from))
4608 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4609 || (num_mova
4610 && (prev_nonnote_insn (from)
4611 == XEXP (MOVA_LABELREF (mova), 0))))
4612 num_mova--;
4613 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4615 /* We have just passed the barrier in front of the
4616 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4617 the ADDR_DIFF_VEC is accessed as data, just like our pool
4618 constants, this is a good opportunity to accommodate what
4619 we have gathered so far.
4620 If we waited any longer, we could end up at a barrier in
4621 front of code, which gives worse cache usage for separated
4622 instruction / data caches. */
4623 good_barrier = found_barrier;
4624 break;
4626 else
4628 rtx body = PATTERN (from);
4629 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4632 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4633 else if (JUMP_P (from)
4634 && ! TARGET_SH2
4635 && ! TARGET_SMALLCODE)
4636 new_align = 4;
4638 /* There is a possibility that a bf is transformed into a bf/s by the
4639 delay slot scheduler. */
4640 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4641 && get_attr_type (from) == TYPE_CBRANCH
4642 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4643 inc += 2;
4645 if (found_si)
4647 count_si += inc;
4648 if (new_align > si_align)
4650 si_limit -= (count_si - 1) & (new_align - si_align);
4651 si_align = new_align;
4653 count_si = (count_si + new_align - 1) & -new_align;
4655 if (found_hi)
4657 count_hi += inc;
4658 if (new_align > hi_align)
4660 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4661 hi_align = new_align;
4663 count_hi = (count_hi + new_align - 1) & -new_align;
4665 from = NEXT_INSN (from);
4668 if (num_mova)
4670 if (leading_mova)
4672 /* Try as we might, the leading mova is out of range. Change
4673 it into a load (which will become a pcload) and retry. */
4674 fixup_mova (mova);
4675 return find_barrier (0, 0, mova);
4677 else
4679 /* Insert the constant pool table before the mova instruction,
4680 to prevent the mova label reference from going out of range. */
4681 from = mova;
4682 good_barrier = found_barrier = barrier_before_mova;
4686 if (found_barrier)
4688 if (good_barrier && next_real_insn (found_barrier))
4689 found_barrier = good_barrier;
4691 else
4693 /* We didn't find a barrier in time to dump our stuff,
4694 so we'll make one. */
4695 rtx label = gen_label_rtx ();
4697 /* Don't emit a constant table in the middle of insns for
4698 casesi_worker_2. This is a bit overkill but is enough
4699 because casesi_worker_2 wouldn't appear so frequently. */
4700 if (last_symoff)
4701 from = last_symoff;
4703 /* If we exceeded the range, then we must back up over the last
4704 instruction we looked at. Otherwise, we just need to undo the
4705 NEXT_INSN at the end of the loop. */
4706 if (PREV_INSN (from) != orig
4707 && (count_hi > hi_limit || count_si > si_limit))
4708 from = PREV_INSN (PREV_INSN (from));
4709 else
4710 from = PREV_INSN (from);
4712 /* Don't emit a constant table int the middle of global pointer setting,
4713 since that that would move the addressing base GOT into another table.
4714 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4715 in the pool anyway, so just move up the whole constant pool. */
4716 if (last_got)
4717 from = PREV_INSN (last_got);
4719 /* Don't insert the constant pool table at the position which
4720 may be the landing pad. */
4721 if (flag_exceptions
4722 && CALL_P (from)
4723 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4724 from = PREV_INSN (from);
4726 /* Walk back to be just before any jump or label.
4727 Putting it before a label reduces the number of times the branch
4728 around the constant pool table will be hit. Putting it before
4729 a jump makes it more likely that the bra delay slot will be
4730 filled. */
4731 while (NOTE_P (from) || JUMP_P (from)
4732 || LABEL_P (from))
4733 from = PREV_INSN (from);
4735 from = emit_jump_insn_after (gen_jump (label), from);
4736 JUMP_LABEL (from) = label;
4737 LABEL_NUSES (label) = 1;
4738 found_barrier = emit_barrier_after (from);
4739 emit_label_after (label, found_barrier);
4742 return found_barrier;
4745 /* If the instruction INSN is implemented by a special function, and we can
4746 positively find the register that is used to call the sfunc, and this
4747 register is not used anywhere else in this instruction - except as the
4748 destination of a set, return this register; else, return 0. */
4750 sfunc_uses_reg (rtx insn)
4752 int i;
4753 rtx pattern, part, reg_part, reg;
4755 if (!NONJUMP_INSN_P (insn))
4756 return 0;
4757 pattern = PATTERN (insn);
4758 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4759 return 0;
4761 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4763 part = XVECEXP (pattern, 0, i);
4764 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4765 reg_part = part;
4767 if (! reg_part)
4768 return 0;
4769 reg = XEXP (reg_part, 0);
4770 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4772 part = XVECEXP (pattern, 0, i);
4773 if (part == reg_part || GET_CODE (part) == CLOBBER)
4774 continue;
4775 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4776 && REG_P (SET_DEST (part)))
4777 ? SET_SRC (part) : part)))
4778 return 0;
4780 return reg;
4783 /* See if the only way in which INSN uses REG is by calling it, or by
4784 setting it while calling it. Set *SET to a SET rtx if the register
4785 is set by INSN. */
4787 static int
4788 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4790 rtx pattern, reg2;
4792 *set = NULL_RTX;
4794 reg2 = sfunc_uses_reg (insn);
4795 if (reg2 && REGNO (reg2) == REGNO (reg))
4797 pattern = single_set (insn);
4798 if (pattern
4799 && REG_P (SET_DEST (pattern))
4800 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4801 *set = pattern;
4802 return 0;
4804 if (!CALL_P (insn))
4806 /* We don't use rtx_equal_p because we don't care if the mode is
4807 different. */
4808 pattern = single_set (insn);
4809 if (pattern
4810 && REG_P (SET_DEST (pattern))
4811 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4813 rtx par, part;
4814 int i;
4816 *set = pattern;
4817 par = PATTERN (insn);
4818 if (GET_CODE (par) == PARALLEL)
4819 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4821 part = XVECEXP (par, 0, i);
4822 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4823 return 1;
4825 return reg_mentioned_p (reg, SET_SRC (pattern));
4828 return 1;
4831 pattern = PATTERN (insn);
4833 if (GET_CODE (pattern) == PARALLEL)
4835 int i;
4837 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4838 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4839 return 1;
4840 pattern = XVECEXP (pattern, 0, 0);
4843 if (GET_CODE (pattern) == SET)
4845 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4847 /* We don't use rtx_equal_p, because we don't care if the
4848 mode is different. */
4849 if (!REG_P (SET_DEST (pattern))
4850 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4851 return 1;
4853 *set = pattern;
4856 pattern = SET_SRC (pattern);
4859 if (GET_CODE (pattern) != CALL
4860 || !MEM_P (XEXP (pattern, 0))
4861 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4862 return 1;
4864 return 0;
4867 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4868 general registers. Bits 0..15 mean that the respective registers
4869 are used as inputs in the instruction. Bits 16..31 mean that the
4870 registers 0..15, respectively, are used as outputs, or are clobbered.
4871 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4873 regs_used (rtx x, int is_dest)
4875 enum rtx_code code;
4876 const char *fmt;
4877 int i, used = 0;
4879 if (! x)
4880 return used;
4881 code = GET_CODE (x);
4882 switch (code)
4884 case REG:
4885 if (REGNO (x) < 16)
4886 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4887 << (REGNO (x) + is_dest));
4888 return 0;
4889 case SUBREG:
4891 rtx y = SUBREG_REG (x);
4893 if (!REG_P (y))
4894 break;
4895 if (REGNO (y) < 16)
4896 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4897 << (REGNO (y) +
4898 subreg_regno_offset (REGNO (y),
4899 GET_MODE (y),
4900 SUBREG_BYTE (x),
4901 GET_MODE (x)) + is_dest));
4902 return 0;
4904 case SET:
4905 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4906 case RETURN:
4907 /* If there was a return value, it must have been indicated with USE. */
4908 return 0x00ffff00;
4909 case CLOBBER:
4910 is_dest = 1;
4911 break;
4912 case MEM:
4913 is_dest = 0;
4914 break;
4915 case CALL:
4916 used |= 0x00ff00f0;
4917 break;
4918 default:
4919 break;
4922 fmt = GET_RTX_FORMAT (code);
4924 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4926 if (fmt[i] == 'E')
4928 register int j;
4929 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4930 used |= regs_used (XVECEXP (x, i, j), is_dest);
4932 else if (fmt[i] == 'e')
4933 used |= regs_used (XEXP (x, i), is_dest);
4935 return used;
4938 /* Create an instruction that prevents redirection of a conditional branch
4939 to the destination of the JUMP with address ADDR.
4940 If the branch needs to be implemented as an indirect jump, try to find
4941 a scratch register for it.
4942 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4943 If any preceding insn that doesn't fit into a delay slot is good enough,
4944 pass 1. Pass 2 if a definite blocking insn is needed.
4945 -1 is used internally to avoid deep recursion.
4946 If a blocking instruction is made or recognized, return it. */
4948 static rtx
4949 gen_block_redirect (rtx jump, int addr, int need_block)
4951 int dead = 0;
4952 rtx prev = prev_nonnote_insn (jump);
4953 rtx dest;
4955 /* First, check if we already have an instruction that satisfies our need. */
4956 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4958 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4959 return prev;
4960 if (GET_CODE (PATTERN (prev)) == USE
4961 || GET_CODE (PATTERN (prev)) == CLOBBER
4962 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4963 prev = jump;
4964 else if ((need_block &= ~1) < 0)
4965 return prev;
4966 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4967 need_block = 0;
4969 if (GET_CODE (PATTERN (jump)) == RETURN)
4971 if (! need_block)
4972 return prev;
4973 /* Reorg even does nasty things with return insns that cause branches
4974 to go out of range - see find_end_label and callers. */
4975 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4977 /* We can't use JUMP_LABEL here because it might be undefined
4978 when not optimizing. */
4979 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4980 /* If the branch is out of range, try to find a scratch register for it. */
4981 if (optimize
4982 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4983 > 4092 + 4098))
4985 rtx scan;
4986 /* Don't look for the stack pointer as a scratch register,
4987 it would cause trouble if an interrupt occurred. */
4988 unsigned attempt = 0x7fff, used;
4989 int jump_left = flag_expensive_optimizations + 1;
4991 /* It is likely that the most recent eligible instruction is wanted for
4992 the delay slot. Therefore, find out which registers it uses, and
4993 try to avoid using them. */
4995 for (scan = jump; (scan = PREV_INSN (scan)); )
4997 enum rtx_code code;
4999 if (INSN_DELETED_P (scan))
5000 continue;
5001 code = GET_CODE (scan);
5002 if (code == CODE_LABEL || code == JUMP_INSN)
5003 break;
5004 if (code == INSN
5005 && GET_CODE (PATTERN (scan)) != USE
5006 && GET_CODE (PATTERN (scan)) != CLOBBER
5007 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5009 attempt &= ~regs_used (PATTERN (scan), 0);
5010 break;
5013 for (used = dead = 0, scan = JUMP_LABEL (jump);
5014 (scan = NEXT_INSN (scan)); )
5016 enum rtx_code code;
5018 if (INSN_DELETED_P (scan))
5019 continue;
5020 code = GET_CODE (scan);
5021 if (INSN_P (scan))
5023 used |= regs_used (PATTERN (scan), 0);
5024 if (code == CALL_INSN)
5025 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5026 dead |= (used >> 16) & ~used;
5027 if (dead & attempt)
5029 dead &= attempt;
5030 break;
5032 if (code == JUMP_INSN)
5034 if (jump_left-- && simplejump_p (scan))
5035 scan = JUMP_LABEL (scan);
5036 else
5037 break;
5041 /* Mask out the stack pointer again, in case it was
5042 the only 'free' register we have found. */
5043 dead &= 0x7fff;
5045 /* If the immediate destination is still in range, check for possible
5046 threading with a jump beyond the delay slot insn.
5047 Don't check if we are called recursively; the jump has been or will be
5048 checked in a different invocation then. */
5050 else if (optimize && need_block >= 0)
5052 rtx next = next_active_insn (next_active_insn (dest));
5053 if (next && JUMP_P (next)
5054 && GET_CODE (PATTERN (next)) == SET
5055 && recog_memoized (next) == CODE_FOR_jump_compact)
5057 dest = JUMP_LABEL (next);
5058 if (dest
5059 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5060 > 4092 + 4098))
5061 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5065 if (dead)
5067 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5069 /* It would be nice if we could convert the jump into an indirect
5070 jump / far branch right now, and thus exposing all constituent
5071 instructions to further optimization. However, reorg uses
5072 simplejump_p to determine if there is an unconditional jump where
5073 it should try to schedule instructions from the target of the
5074 branch; simplejump_p fails for indirect jumps even if they have
5075 a JUMP_LABEL. */
5076 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5077 (reg, GEN_INT (unspec_bbr_uid++)),
5078 jump);
5079 /* ??? We would like this to have the scope of the jump, but that
5080 scope will change when a delay slot insn of an inner scope is added.
5081 Hence, after delay slot scheduling, we'll have to expect
5082 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5083 the jump. */
5085 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5086 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5087 return insn;
5089 else if (need_block)
5090 /* We can't use JUMP_LABEL here because it might be undefined
5091 when not optimizing. */
5092 return emit_insn_before (gen_block_branch_redirect
5093 (GEN_INT (unspec_bbr_uid++)),
5094 jump);
5095 return prev;
5098 #define CONDJUMP_MIN -252
5099 #define CONDJUMP_MAX 262
5100 struct far_branch
5102 /* A label (to be placed) in front of the jump
5103 that jumps to our ultimate destination. */
5104 rtx near_label;
5105 /* Where we are going to insert it if we cannot move the jump any farther,
5106 or the jump itself if we have picked up an existing jump. */
5107 rtx insert_place;
5108 /* The ultimate destination. */
5109 rtx far_label;
5110 struct far_branch *prev;
5111 /* If the branch has already been created, its address;
5112 else the address of its first prospective user. */
5113 int address;
5116 static void gen_far_branch (struct far_branch *);
5117 enum mdep_reorg_phase_e mdep_reorg_phase;
5118 static void
5119 gen_far_branch (struct far_branch *bp)
5121 rtx insn = bp->insert_place;
5122 rtx jump;
5123 rtx label = gen_label_rtx ();
5124 int ok;
5126 emit_label_after (label, insn);
5127 if (bp->far_label)
5129 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5130 LABEL_NUSES (bp->far_label)++;
5132 else
5133 jump = emit_jump_insn_after (gen_return (), insn);
5134 /* Emit a barrier so that reorg knows that any following instructions
5135 are not reachable via a fall-through path.
5136 But don't do this when not optimizing, since we wouldn't suppress the
5137 alignment for the barrier then, and could end up with out-of-range
5138 pc-relative loads. */
5139 if (optimize)
5140 emit_barrier_after (jump);
5141 emit_label_after (bp->near_label, insn);
5142 JUMP_LABEL (jump) = bp->far_label;
5143 ok = invert_jump (insn, label, 1);
5144 gcc_assert (ok);
5146 /* If we are branching around a jump (rather than a return), prevent
5147 reorg from using an insn from the jump target as the delay slot insn -
5148 when reorg did this, it pessimized code (we rather hide the delay slot)
5149 and it could cause branches to go out of range. */
5150 if (bp->far_label)
5151 (emit_insn_after
5152 (gen_stuff_delay_slot
5153 (GEN_INT (unspec_bbr_uid++),
5154 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5155 insn));
5156 /* Prevent reorg from undoing our splits. */
5157 gen_block_redirect (jump, bp->address += 2, 2);
5160 /* Fix up ADDR_DIFF_VECs. */
5161 void
5162 fixup_addr_diff_vecs (rtx first)
5164 rtx insn;
5166 for (insn = first; insn; insn = NEXT_INSN (insn))
5168 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5170 if (!JUMP_P (insn)
5171 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5172 continue;
5173 pat = PATTERN (insn);
5174 vec_lab = XEXP (XEXP (pat, 0), 0);
5176 /* Search the matching casesi_jump_2. */
5177 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5179 if (!JUMP_P (prev))
5180 continue;
5181 prevpat = PATTERN (prev);
5182 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5183 continue;
5184 x = XVECEXP (prevpat, 0, 1);
5185 if (GET_CODE (x) != USE)
5186 continue;
5187 x = XEXP (x, 0);
5188 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5189 break;
5191 /* FIXME: This is a bug in the optimizer, but it seems harmless
5192 to just avoid panicing. */
5193 if (!prev)
5194 continue;
5196 /* Emit the reference label of the braf where it belongs, right after
5197 the casesi_jump_2 (i.e. braf). */
5198 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5199 emit_label_after (braf_label, prev);
5201 /* Fix up the ADDR_DIF_VEC to be relative
5202 to the reference address of the braf. */
5203 XEXP (XEXP (pat, 0), 0) = braf_label;
5207 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5208 a barrier. Return the base 2 logarithm of the desired alignment. */
5210 barrier_align (rtx barrier_or_label)
5212 rtx next = next_real_insn (barrier_or_label), pat, prev;
5213 int slot, credit, jump_to_next = 0;
5215 if (! next)
5216 return 0;
5218 pat = PATTERN (next);
5220 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5221 return 2;
5223 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5224 /* This is a barrier in front of a constant table. */
5225 return 0;
5227 prev = prev_real_insn (barrier_or_label);
5228 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5230 pat = PATTERN (prev);
5231 /* If this is a very small table, we want to keep the alignment after
5232 the table to the minimum for proper code alignment. */
5233 return ((TARGET_SMALLCODE
5234 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5235 <= (unsigned) 1 << (CACHE_LOG - 2)))
5236 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5239 if (TARGET_SMALLCODE)
5240 return 0;
5242 if (! TARGET_SH2 || ! optimize)
5243 return align_jumps_log;
5245 /* When fixing up pcloads, a constant table might be inserted just before
5246 the basic block that ends with the barrier. Thus, we can't trust the
5247 instruction lengths before that. */
5248 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5250 /* Check if there is an immediately preceding branch to the insn beyond
5251 the barrier. We must weight the cost of discarding useful information
5252 from the current cache line when executing this branch and there is
5253 an alignment, against that of fetching unneeded insn in front of the
5254 branch target when there is no alignment. */
5256 /* There are two delay_slot cases to consider. One is the simple case
5257 where the preceding branch is to the insn beyond the barrier (simple
5258 delay slot filling), and the other is where the preceding branch has
5259 a delay slot that is a duplicate of the insn after the barrier
5260 (fill_eager_delay_slots) and the branch is to the insn after the insn
5261 after the barrier. */
5263 /* PREV is presumed to be the JUMP_INSN for the barrier under
5264 investigation. Skip to the insn before it. */
5265 prev = prev_real_insn (prev);
5267 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5268 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5269 prev = prev_real_insn (prev))
5271 jump_to_next = 0;
5272 if (GET_CODE (PATTERN (prev)) == USE
5273 || GET_CODE (PATTERN (prev)) == CLOBBER)
5274 continue;
5275 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5277 prev = XVECEXP (PATTERN (prev), 0, 1);
5278 if (INSN_UID (prev) == INSN_UID (next))
5280 /* Delay slot was filled with insn at jump target. */
5281 jump_to_next = 1;
5282 continue;
5286 if (slot &&
5287 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5288 slot = 0;
5289 credit -= get_attr_length (prev);
5291 if (prev
5292 && JUMP_P (prev)
5293 && JUMP_LABEL (prev))
5295 rtx x;
5296 if (jump_to_next
5297 || next_real_insn (JUMP_LABEL (prev)) == next
5298 /* If relax_delay_slots() decides NEXT was redundant
5299 with some previous instruction, it will have
5300 redirected PREV's jump to the following insn. */
5301 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5302 /* There is no upper bound on redundant instructions
5303 that might have been skipped, but we must not put an
5304 alignment where none had been before. */
5305 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5306 (INSN_P (x)
5307 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5308 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5309 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5311 rtx pat = PATTERN (prev);
5312 if (GET_CODE (pat) == PARALLEL)
5313 pat = XVECEXP (pat, 0, 0);
5314 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5315 return 0;
5320 return align_jumps_log;
5323 /* If we are inside a phony loop, almost any kind of label can turn up as the
5324 first one in the loop. Aligning a braf label causes incorrect switch
5325 destination addresses; we can detect braf labels because they are
5326 followed by a BARRIER.
5327 Applying loop alignment to small constant or switch tables is a waste
5328 of space, so we suppress this too. */
5330 sh_loop_align (rtx label)
5332 rtx next = label;
5335 next = next_nonnote_insn (next);
5336 while (next && LABEL_P (next));
5338 if (! next
5339 || ! INSN_P (next)
5340 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5341 || recog_memoized (next) == CODE_FOR_consttable_2)
5342 return 0;
5344 return align_loops_log;
5347 /* Do a final pass over the function, just before delayed branch
5348 scheduling. */
5350 static void
5351 sh_reorg (void)
5353 rtx first, insn, mova = NULL_RTX;
5354 int num_mova;
5355 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5356 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5358 first = get_insns ();
5359 max_labelno_before_reorg = max_label_num ();
5361 /* We must split call insns before introducing `mova's. If we're
5362 optimizing, they'll have already been split. Otherwise, make
5363 sure we don't split them too late. */
5364 if (! optimize)
5365 split_all_insns_noflow ();
5367 if (TARGET_SHMEDIA)
5368 return;
5370 /* If relaxing, generate pseudo-ops to associate function calls with
5371 the symbols they call. It does no harm to not generate these
5372 pseudo-ops. However, when we can generate them, it enables to
5373 linker to potentially relax the jsr to a bsr, and eliminate the
5374 register load and, possibly, the constant pool entry. */
5376 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5377 if (TARGET_RELAX)
5379 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5380 own purposes. This works because none of the remaining passes
5381 need to look at them.
5383 ??? But it may break in the future. We should use a machine
5384 dependent REG_NOTE, or some other approach entirely. */
5385 for (insn = first; insn; insn = NEXT_INSN (insn))
5387 if (INSN_P (insn))
5389 rtx note;
5391 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5392 NULL_RTX)) != 0)
5393 remove_note (insn, note);
5397 for (insn = first; insn; insn = NEXT_INSN (insn))
5399 rtx pattern, reg, link, set, scan, dies, label;
5400 int rescan = 0, foundinsn = 0;
5402 if (CALL_P (insn))
5404 pattern = PATTERN (insn);
5406 if (GET_CODE (pattern) == PARALLEL)
5407 pattern = XVECEXP (pattern, 0, 0);
5408 if (GET_CODE (pattern) == SET)
5409 pattern = SET_SRC (pattern);
5411 if (GET_CODE (pattern) != CALL
5412 || !MEM_P (XEXP (pattern, 0)))
5413 continue;
5415 reg = XEXP (XEXP (pattern, 0), 0);
5417 else
5419 reg = sfunc_uses_reg (insn);
5420 if (! reg)
5421 continue;
5424 if (!REG_P (reg))
5425 continue;
5427 /* Try scanning backward to find where the register is set. */
5428 link = NULL;
5429 for (scan = PREV_INSN (insn);
5430 scan && !LABEL_P (scan);
5431 scan = PREV_INSN (scan))
5433 if (! INSN_P (scan))
5434 continue;
5436 if (! reg_mentioned_p (reg, scan))
5437 continue;
5439 if (noncall_uses_reg (reg, scan, &set))
5440 break;
5442 if (set)
5444 link = scan;
5445 break;
5449 if (! link)
5450 continue;
5452 /* The register is set at LINK. */
5454 /* We can only optimize the function call if the register is
5455 being set to a symbol. In theory, we could sometimes
5456 optimize calls to a constant location, but the assembler
5457 and linker do not support that at present. */
5458 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5459 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5460 continue;
5462 /* Scan forward from LINK to the place where REG dies, and
5463 make sure that the only insns which use REG are
5464 themselves function calls. */
5466 /* ??? This doesn't work for call targets that were allocated
5467 by reload, since there may not be a REG_DEAD note for the
5468 register. */
5470 dies = NULL_RTX;
5471 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5473 rtx scanset;
5475 /* Don't try to trace forward past a CODE_LABEL if we haven't
5476 seen INSN yet. Ordinarily, we will only find the setting insn
5477 if it is in the same basic block. However,
5478 cross-jumping can insert code labels in between the load and
5479 the call, and can result in situations where a single call
5480 insn may have two targets depending on where we came from. */
5482 if (LABEL_P (scan) && ! foundinsn)
5483 break;
5485 if (! INSN_P (scan))
5486 continue;
5488 /* Don't try to trace forward past a JUMP. To optimize
5489 safely, we would have to check that all the
5490 instructions at the jump destination did not use REG. */
5492 if (JUMP_P (scan))
5493 break;
5495 if (! reg_mentioned_p (reg, scan))
5496 continue;
5498 if (noncall_uses_reg (reg, scan, &scanset))
5499 break;
5501 if (scan == insn)
5502 foundinsn = 1;
5504 if (scan != insn
5505 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5507 /* There is a function call to this register other
5508 than the one we are checking. If we optimize
5509 this call, we need to rescan again below. */
5510 rescan = 1;
5513 /* ??? We shouldn't have to worry about SCANSET here.
5514 We should just be able to check for a REG_DEAD note
5515 on a function call. However, the REG_DEAD notes are
5516 apparently not dependable around libcalls; c-torture
5517 execute/920501-2 is a test case. If SCANSET is set,
5518 then this insn sets the register, so it must have
5519 died earlier. Unfortunately, this will only handle
5520 the cases in which the register is, in fact, set in a
5521 later insn. */
5523 /* ??? We shouldn't have to use FOUNDINSN here.
5524 This dates back to when we used LOG_LINKS to find
5525 the most recent insn which sets the register. */
5527 if (foundinsn
5528 && (scanset
5529 || find_reg_note (scan, REG_DEAD, reg)))
5531 dies = scan;
5532 break;
5536 if (! dies)
5538 /* Either there was a branch, or some insn used REG
5539 other than as a function call address. */
5540 continue;
5543 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5544 on the insn which sets the register, and on each call insn
5545 which uses the register. In final_prescan_insn we look for
5546 the REG_LABEL_OPERAND notes, and output the appropriate label
5547 or pseudo-op. */
5549 label = gen_label_rtx ();
5550 add_reg_note (link, REG_LABEL_OPERAND, label);
5551 add_reg_note (insn, REG_LABEL_OPERAND, label);
5552 if (rescan)
5554 scan = link;
5557 rtx reg2;
5559 scan = NEXT_INSN (scan);
5560 if (scan != insn
5561 && ((CALL_P (scan)
5562 && reg_mentioned_p (reg, scan))
5563 || ((reg2 = sfunc_uses_reg (scan))
5564 && REGNO (reg2) == REGNO (reg))))
5565 add_reg_note (scan, REG_LABEL_OPERAND, label);
5567 while (scan != dies);
5572 if (TARGET_SH2)
5573 fixup_addr_diff_vecs (first);
5575 if (optimize)
5577 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5578 shorten_branches (first);
5581 /* Scan the function looking for move instructions which have to be
5582 changed to pc-relative loads and insert the literal tables. */
5583 label_ref_list_pool = create_alloc_pool ("label references list",
5584 sizeof (struct label_ref_list_d),
5585 30);
5586 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5587 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5589 if (mova_p (insn))
5591 /* ??? basic block reordering can move a switch table dispatch
5592 below the switch table. Check if that has happened.
5593 We only have the addresses available when optimizing; but then,
5594 this check shouldn't be needed when not optimizing. */
5595 if (!untangle_mova (&num_mova, &mova, insn))
5597 insn = mova;
5598 num_mova = 0;
5601 else if (JUMP_P (insn)
5602 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5603 && num_mova
5604 /* ??? loop invariant motion can also move a mova out of a
5605 loop. Since loop does this code motion anyway, maybe we
5606 should wrap UNSPEC_MOVA into a CONST, so that reload can
5607 move it back. */
5608 && ((num_mova > 1
5609 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5610 || (prev_nonnote_insn (insn)
5611 == XEXP (MOVA_LABELREF (mova), 0))))
5613 rtx scan;
5614 int total;
5616 num_mova--;
5618 /* Some code might have been inserted between the mova and
5619 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5620 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5621 total += get_attr_length (scan);
5623 /* range of mova is 1020, add 4 because pc counts from address of
5624 second instruction after this one, subtract 2 in case pc is 2
5625 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5626 cancels out with alignment effects of the mova itself. */
5627 if (total > 1022)
5629 /* Change the mova into a load, and restart scanning
5630 there. broken_move will then return true for mova. */
5631 fixup_mova (mova);
5632 insn = mova;
5635 if (broken_move (insn)
5636 || (NONJUMP_INSN_P (insn)
5637 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5639 rtx scan;
5640 /* Scan ahead looking for a barrier to stick the constant table
5641 behind. */
5642 rtx barrier = find_barrier (num_mova, mova, insn);
5643 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5644 int need_aligned_label = 0;
5646 if (num_mova && ! mova_p (mova))
5648 /* find_barrier had to change the first mova into a
5649 pcload; thus, we have to start with this new pcload. */
5650 insn = mova;
5651 num_mova = 0;
5653 /* Now find all the moves between the points and modify them. */
5654 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5656 if (LABEL_P (scan))
5657 last_float = 0;
5658 if (NONJUMP_INSN_P (scan)
5659 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5660 need_aligned_label = 1;
5661 if (broken_move (scan))
5663 rtx *patp = &PATTERN (scan), pat = *patp;
5664 rtx src, dst;
5665 rtx lab;
5666 rtx newsrc;
5667 enum machine_mode mode;
5669 if (GET_CODE (pat) == PARALLEL)
5670 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5671 src = SET_SRC (pat);
5672 dst = SET_DEST (pat);
5673 mode = GET_MODE (dst);
5675 if (mode == SImode && hi_const (src)
5676 && REGNO (dst) != FPUL_REG)
5678 int offset = 0;
5680 mode = HImode;
5681 while (GET_CODE (dst) == SUBREG)
5683 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5684 GET_MODE (SUBREG_REG (dst)),
5685 SUBREG_BYTE (dst),
5686 GET_MODE (dst));
5687 dst = SUBREG_REG (dst);
5689 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5691 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5693 /* This must be an insn that clobbers r0. */
5694 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5695 XVECLEN (PATTERN (scan), 0)
5696 - 1);
5697 rtx clobber = *clobberp;
5699 gcc_assert (GET_CODE (clobber) == CLOBBER
5700 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5702 if (last_float
5703 && reg_set_between_p (r0_rtx, last_float_move, scan))
5704 last_float = 0;
5705 if (last_float
5706 && TARGET_SHCOMPACT
5707 && GET_MODE_SIZE (mode) != 4
5708 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5709 last_float = 0;
5710 lab = add_constant (src, mode, last_float);
5711 if (lab)
5712 emit_insn_before (gen_mova (lab), scan);
5713 else
5715 /* There will be a REG_UNUSED note for r0 on
5716 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5717 lest reorg:mark_target_live_regs will not
5718 consider r0 to be used, and we end up with delay
5719 slot insn in front of SCAN that clobbers r0. */
5720 rtx note
5721 = find_regno_note (last_float_move, REG_UNUSED, 0);
5723 /* If we are not optimizing, then there may not be
5724 a note. */
5725 if (note)
5726 PUT_REG_NOTE_KIND (note, REG_INC);
5728 *last_float_addr = r0_inc_rtx;
5730 last_float_move = scan;
5731 last_float = src;
5732 newsrc = gen_const_mem (mode,
5733 (((TARGET_SH4 && ! TARGET_FMOVD)
5734 || REGNO (dst) == FPUL_REG)
5735 ? r0_inc_rtx
5736 : r0_rtx));
5737 last_float_addr = &XEXP (newsrc, 0);
5739 /* Remove the clobber of r0. */
5740 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5741 gen_rtx_SCRATCH (Pmode));
5743 /* This is a mova needing a label. Create it. */
5744 else if (GET_CODE (src) == UNSPEC
5745 && XINT (src, 1) == UNSPEC_MOVA
5746 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5748 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5749 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5750 newsrc = gen_rtx_UNSPEC (SImode,
5751 gen_rtvec (1, newsrc),
5752 UNSPEC_MOVA);
5754 else
5756 lab = add_constant (src, mode, 0);
5757 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5758 newsrc = gen_const_mem (mode, newsrc);
5760 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5761 INSN_CODE (scan) = -1;
5764 dump_table (need_aligned_label ? insn : 0, barrier);
5765 insn = barrier;
5768 free_alloc_pool (label_ref_list_pool);
5769 for (insn = first; insn; insn = NEXT_INSN (insn))
5770 PUT_MODE (insn, VOIDmode);
5772 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5773 INSN_ADDRESSES_FREE ();
5774 split_branches (first);
5776 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5777 also has an effect on the register that holds the address of the sfunc.
5778 Insert an extra dummy insn in front of each sfunc that pretends to
5779 use this register. */
5780 if (flag_delayed_branch)
5782 for (insn = first; insn; insn = NEXT_INSN (insn))
5784 rtx reg = sfunc_uses_reg (insn);
5786 if (! reg)
5787 continue;
5788 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5791 #if 0
5792 /* fpscr is not actually a user variable, but we pretend it is for the
5793 sake of the previous optimization passes, since we want it handled like
5794 one. However, we don't have any debugging information for it, so turn
5795 it into a non-user variable now. */
5796 if (TARGET_SH4)
5797 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5798 #endif
5799 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5803 get_dest_uid (rtx label, int max_uid)
5805 rtx dest = next_real_insn (label);
5806 int dest_uid;
5807 if (! dest)
5808 /* This can happen for an undefined label. */
5809 return 0;
5810 dest_uid = INSN_UID (dest);
5811 /* If this is a newly created branch redirection blocking instruction,
5812 we cannot index the branch_uid or insn_addresses arrays with its
5813 uid. But then, we won't need to, because the actual destination is
5814 the following branch. */
5815 while (dest_uid >= max_uid)
5817 dest = NEXT_INSN (dest);
5818 dest_uid = INSN_UID (dest);
5820 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5821 return 0;
5822 return dest_uid;
5825 /* Split condbranches that are out of range. Also add clobbers for
5826 scratch registers that are needed in far jumps.
5827 We do this before delay slot scheduling, so that it can take our
5828 newly created instructions into account. It also allows us to
5829 find branches with common targets more easily. */
5831 static void
5832 split_branches (rtx first)
5834 rtx insn;
5835 struct far_branch **uid_branch, *far_branch_list = 0;
5836 int max_uid = get_max_uid ();
5837 int ok;
5839 /* Find out which branches are out of range. */
5840 shorten_branches (first);
5842 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5843 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5845 for (insn = first; insn; insn = NEXT_INSN (insn))
5846 if (! INSN_P (insn))
5847 continue;
5848 else if (INSN_DELETED_P (insn))
5850 /* Shorten_branches would split this instruction again,
5851 so transform it into a note. */
5852 SET_INSN_DELETED (insn);
5854 else if (JUMP_P (insn)
5855 /* Don't mess with ADDR_DIFF_VEC */
5856 && (GET_CODE (PATTERN (insn)) == SET
5857 || GET_CODE (PATTERN (insn)) == RETURN))
5859 enum attr_type type = get_attr_type (insn);
5860 if (type == TYPE_CBRANCH)
5862 rtx next, beyond;
5864 if (get_attr_length (insn) > 4)
5866 rtx src = SET_SRC (PATTERN (insn));
5867 rtx olabel = XEXP (XEXP (src, 1), 0);
5868 int addr = INSN_ADDRESSES (INSN_UID (insn));
5869 rtx label = 0;
5870 int dest_uid = get_dest_uid (olabel, max_uid);
5871 struct far_branch *bp = uid_branch[dest_uid];
5873 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5874 the label if the LABEL_NUSES count drops to zero. There is
5875 always a jump_optimize pass that sets these values, but it
5876 proceeds to delete unreferenced code, and then if not
5877 optimizing, to un-delete the deleted instructions, thus
5878 leaving labels with too low uses counts. */
5879 if (! optimize)
5881 JUMP_LABEL (insn) = olabel;
5882 LABEL_NUSES (olabel)++;
5884 if (! bp)
5886 bp = (struct far_branch *) alloca (sizeof *bp);
5887 uid_branch[dest_uid] = bp;
5888 bp->prev = far_branch_list;
5889 far_branch_list = bp;
5890 bp->far_label
5891 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5892 LABEL_NUSES (bp->far_label)++;
5894 else
5896 label = bp->near_label;
5897 if (! label && bp->address - addr >= CONDJUMP_MIN)
5899 rtx block = bp->insert_place;
5901 if (GET_CODE (PATTERN (block)) == RETURN)
5902 block = PREV_INSN (block);
5903 else
5904 block = gen_block_redirect (block,
5905 bp->address, 2);
5906 label = emit_label_after (gen_label_rtx (),
5907 PREV_INSN (block));
5908 bp->near_label = label;
5910 else if (label && ! NEXT_INSN (label))
5912 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5913 bp->insert_place = insn;
5914 else
5915 gen_far_branch (bp);
5918 if (! label
5919 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5921 bp->near_label = label = gen_label_rtx ();
5922 bp->insert_place = insn;
5923 bp->address = addr;
5925 ok = redirect_jump (insn, label, 0);
5926 gcc_assert (ok);
5928 else
5930 /* get_attr_length (insn) == 2 */
5931 /* Check if we have a pattern where reorg wants to redirect
5932 the branch to a label from an unconditional branch that
5933 is too far away. */
5934 /* We can't use JUMP_LABEL here because it might be undefined
5935 when not optimizing. */
5936 /* A syntax error might cause beyond to be NULL_RTX. */
5937 beyond
5938 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5939 0));
5941 if (beyond
5942 && (JUMP_P (beyond)
5943 || ((beyond = next_active_insn (beyond))
5944 && JUMP_P (beyond)))
5945 && GET_CODE (PATTERN (beyond)) == SET
5946 && recog_memoized (beyond) == CODE_FOR_jump_compact
5947 && ((INSN_ADDRESSES
5948 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5949 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5950 > 252 + 258 + 2))
5951 gen_block_redirect (beyond,
5952 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5955 next = next_active_insn (insn);
5957 if (next
5958 && (JUMP_P (next)
5959 || ((next = next_active_insn (next))
5960 && JUMP_P (next)))
5961 && GET_CODE (PATTERN (next)) == SET
5962 && recog_memoized (next) == CODE_FOR_jump_compact
5963 && ((INSN_ADDRESSES
5964 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5965 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5966 > 252 + 258 + 2))
5967 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5969 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5971 int addr = INSN_ADDRESSES (INSN_UID (insn));
5972 rtx far_label = 0;
5973 int dest_uid = 0;
5974 struct far_branch *bp;
5976 if (type == TYPE_JUMP)
5978 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5979 dest_uid = get_dest_uid (far_label, max_uid);
5980 if (! dest_uid)
5982 /* Parse errors can lead to labels outside
5983 the insn stream. */
5984 if (! NEXT_INSN (far_label))
5985 continue;
5987 if (! optimize)
5989 JUMP_LABEL (insn) = far_label;
5990 LABEL_NUSES (far_label)++;
5992 redirect_jump (insn, NULL_RTX, 1);
5993 far_label = 0;
5996 bp = uid_branch[dest_uid];
5997 if (! bp)
5999 bp = (struct far_branch *) alloca (sizeof *bp);
6000 uid_branch[dest_uid] = bp;
6001 bp->prev = far_branch_list;
6002 far_branch_list = bp;
6003 bp->near_label = 0;
6004 bp->far_label = far_label;
6005 if (far_label)
6006 LABEL_NUSES (far_label)++;
6008 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6009 if (addr - bp->address <= CONDJUMP_MAX)
6010 emit_label_after (bp->near_label, PREV_INSN (insn));
6011 else
6013 gen_far_branch (bp);
6014 bp->near_label = 0;
6016 else
6017 bp->near_label = 0;
6018 bp->address = addr;
6019 bp->insert_place = insn;
6020 if (! far_label)
6021 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6022 else
6023 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6026 /* Generate all pending far branches,
6027 and free our references to the far labels. */
6028 while (far_branch_list)
6030 if (far_branch_list->near_label
6031 && ! NEXT_INSN (far_branch_list->near_label))
6032 gen_far_branch (far_branch_list);
6033 if (optimize
6034 && far_branch_list->far_label
6035 && ! --LABEL_NUSES (far_branch_list->far_label))
6036 delete_insn (far_branch_list->far_label);
6037 far_branch_list = far_branch_list->prev;
6040 /* Instruction length information is no longer valid due to the new
6041 instructions that have been generated. */
6042 init_insn_lengths ();
6045 /* Dump out instruction addresses, which is useful for debugging the
6046 constant pool table stuff.
6048 If relaxing, output the label and pseudo-ops used to link together
6049 calls and the instruction which set the registers. */
6051 /* ??? The addresses printed by this routine for insns are nonsense for
6052 insns which are inside of a sequence where none of the inner insns have
6053 variable length. This is because the second pass of shorten_branches
6054 does not bother to update them. */
6056 void
6057 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6058 int noperands ATTRIBUTE_UNUSED)
6060 if (TARGET_DUMPISIZE)
6061 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6063 if (TARGET_RELAX)
6065 rtx note;
6067 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6068 if (note)
6070 rtx pattern;
6072 pattern = PATTERN (insn);
6073 if (GET_CODE (pattern) == PARALLEL)
6074 pattern = XVECEXP (pattern, 0, 0);
6075 switch (GET_CODE (pattern))
6077 case SET:
6078 if (GET_CODE (SET_SRC (pattern)) != CALL
6079 && get_attr_type (insn) != TYPE_SFUNC)
6081 targetm.asm_out.internal_label
6082 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6083 break;
6085 /* else FALLTHROUGH */
6086 case CALL:
6087 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6088 CODE_LABEL_NUMBER (XEXP (note, 0)));
6089 break;
6091 default:
6092 gcc_unreachable ();
6098 /* Dump out any constants accumulated in the final pass. These will
6099 only be labels. */
6101 const char *
6102 output_jump_label_table (void)
6104 int i;
6106 if (pool_size)
6108 fprintf (asm_out_file, "\t.align 2\n");
6109 for (i = 0; i < pool_size; i++)
6111 pool_node *p = &pool_vector[i];
6113 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6114 CODE_LABEL_NUMBER (p->label));
6115 output_asm_insn (".long %O0", &p->value);
6117 pool_size = 0;
6120 return "";
6123 /* A full frame looks like:
6125 arg-5
6126 arg-4
6127 [ if current_function_anonymous_args
6128 arg-3
6129 arg-2
6130 arg-1
6131 arg-0 ]
6132 saved-fp
6133 saved-r10
6134 saved-r11
6135 saved-r12
6136 saved-pr
6137 local-n
6139 local-1
6140 local-0 <- fp points here. */
6142 /* Number of bytes pushed for anonymous args, used to pass information
6143 between expand_prologue and expand_epilogue. */
6145 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6146 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6147 for an epilogue and a negative value means that it's for a sibcall
6148 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6149 all the registers that are about to be restored, and hence dead. */
6151 static void
6152 output_stack_adjust (int size, rtx reg, int epilogue_p,
6153 HARD_REG_SET *live_regs_mask, bool frame_p)
6155 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6156 if (size)
6158 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6160 /* This test is bogus, as output_stack_adjust is used to re-align the
6161 stack. */
6162 #if 0
6163 gcc_assert (!(size % align));
6164 #endif
6166 if (CONST_OK_FOR_ADD (size))
6167 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6168 /* Try to do it with two partial adjustments; however, we must make
6169 sure that the stack is properly aligned at all times, in case
6170 an interrupt occurs between the two partial adjustments. */
6171 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6172 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6174 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6175 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6177 else
6179 rtx const_reg;
6180 rtx insn;
6181 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6182 int i;
6184 /* If TEMP is invalid, we could temporarily save a general
6185 register to MACL. However, there is currently no need
6186 to handle this case, so just die when we see it. */
6187 if (epilogue_p < 0
6188 || current_function_interrupt
6189 || ! call_really_used_regs[temp] || fixed_regs[temp])
6190 temp = -1;
6191 if (temp < 0 && ! current_function_interrupt
6192 && (TARGET_SHMEDIA || epilogue_p >= 0))
6194 HARD_REG_SET temps;
6195 COPY_HARD_REG_SET (temps, call_used_reg_set);
6196 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6197 if (epilogue_p > 0)
6199 int nreg = 0;
6200 if (crtl->return_rtx)
6202 enum machine_mode mode;
6203 mode = GET_MODE (crtl->return_rtx);
6204 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6205 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6207 for (i = 0; i < nreg; i++)
6208 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6209 if (crtl->calls_eh_return)
6211 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6212 for (i = 0; i <= 3; i++)
6213 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6216 if (TARGET_SHMEDIA && epilogue_p < 0)
6217 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6218 CLEAR_HARD_REG_BIT (temps, i);
6219 if (epilogue_p <= 0)
6221 for (i = FIRST_PARM_REG;
6222 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6223 CLEAR_HARD_REG_BIT (temps, i);
6224 if (cfun->static_chain_decl != NULL)
6225 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6227 temp = scavenge_reg (&temps);
6229 if (temp < 0 && live_regs_mask)
6231 HARD_REG_SET temps;
6233 COPY_HARD_REG_SET (temps, *live_regs_mask);
6234 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6235 temp = scavenge_reg (&temps);
6237 if (temp < 0)
6239 rtx adj_reg, tmp_reg, mem;
6241 /* If we reached here, the most likely case is the (sibcall)
6242 epilogue for non SHmedia. Put a special push/pop sequence
6243 for such case as the last resort. This looks lengthy but
6244 would not be problem because it seems to be very
6245 rare. */
6247 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6250 /* ??? There is still the slight possibility that r4 or
6251 r5 have been reserved as fixed registers or assigned
6252 as global registers, and they change during an
6253 interrupt. There are possible ways to handle this:
6255 - If we are adjusting the frame pointer (r14), we can do
6256 with a single temp register and an ordinary push / pop
6257 on the stack.
6258 - Grab any call-used or call-saved registers (i.e. not
6259 fixed or globals) for the temps we need. We might
6260 also grab r14 if we are adjusting the stack pointer.
6261 If we can't find enough available registers, issue
6262 a diagnostic and die - the user must have reserved
6263 way too many registers.
6264 But since all this is rather unlikely to happen and
6265 would require extra testing, we just die if r4 / r5
6266 are not available. */
6267 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6268 && !global_regs[4] && !global_regs[5]);
6270 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6271 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6272 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6273 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6274 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6275 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6276 emit_move_insn (mem, tmp_reg);
6277 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6278 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6279 emit_move_insn (mem, tmp_reg);
6280 emit_move_insn (reg, adj_reg);
6281 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6282 emit_move_insn (adj_reg, mem);
6283 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6284 emit_move_insn (tmp_reg, mem);
6285 /* Tell flow the insns that pop r4/r5 aren't dead. */
6286 emit_use (tmp_reg);
6287 emit_use (adj_reg);
6288 return;
6290 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6292 /* If SIZE is negative, subtract the positive value.
6293 This sometimes allows a constant pool entry to be shared
6294 between prologue and epilogue code. */
6295 if (size < 0)
6297 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6298 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6300 else
6302 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6303 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6305 if (! epilogue_p)
6306 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6307 gen_rtx_SET (VOIDmode, reg,
6308 gen_rtx_PLUS (SImode, reg,
6309 GEN_INT (size))));
6314 static rtx
6315 frame_insn (rtx x)
6317 x = emit_insn (x);
6318 RTX_FRAME_RELATED_P (x) = 1;
6319 return x;
6322 /* Output RTL to push register RN onto the stack. */
6324 static rtx
6325 push (int rn)
6327 rtx x;
6328 if (rn == FPUL_REG)
6329 x = gen_push_fpul ();
6330 else if (rn == FPSCR_REG)
6331 x = gen_push_fpscr ();
6332 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6333 && FP_OR_XD_REGISTER_P (rn))
6335 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6336 return NULL_RTX;
6337 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6339 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6340 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6341 else
6342 x = gen_push (gen_rtx_REG (SImode, rn));
6344 x = frame_insn (x);
6345 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6346 return x;
6349 /* Output RTL to pop register RN from the stack. */
6351 static void
6352 pop (int rn)
6354 rtx x;
6355 if (rn == FPUL_REG)
6356 x = gen_pop_fpul ();
6357 else if (rn == FPSCR_REG)
6358 x = gen_pop_fpscr ();
6359 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6360 && FP_OR_XD_REGISTER_P (rn))
6362 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6363 return;
6364 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6366 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6367 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6368 else
6369 x = gen_pop (gen_rtx_REG (SImode, rn));
6371 x = emit_insn (x);
6372 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6375 /* Generate code to push the regs specified in the mask. */
6377 static void
6378 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6380 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6381 int skip_fpscr = 0;
6383 /* Push PR last; this gives better latencies after the prologue, and
6384 candidates for the return delay slot when there are no general
6385 registers pushed. */
6386 for (; i < FIRST_PSEUDO_REGISTER; i++)
6388 /* If this is an interrupt handler, and the SZ bit varies,
6389 and we have to push any floating point register, we need
6390 to switch to the correct precision first. */
6391 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6392 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6394 HARD_REG_SET unsaved;
6396 push (FPSCR_REG);
6397 COMPL_HARD_REG_SET (unsaved, *mask);
6398 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6399 skip_fpscr = 1;
6401 if (i != PR_REG
6402 && (i != FPSCR_REG || ! skip_fpscr)
6403 && TEST_HARD_REG_BIT (*mask, i))
6405 /* If the ISR has RESBANK attribute assigned, don't push any of
6406 the following registers - R0-R14, MACH, MACL and GBR. */
6407 if (! (sh_cfun_resbank_handler_p ()
6408 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6409 || i == MACH_REG
6410 || i == MACL_REG
6411 || i == GBR_REG)))
6412 push (i);
6416 /* Push banked registers last to improve delay slot opportunities. */
6417 if (interrupt_handler)
6419 bool use_movml = false;
6421 if (TARGET_SH2A)
6423 unsigned int count = 0;
6425 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6426 if (TEST_HARD_REG_BIT (*mask, i))
6427 count++;
6428 else
6429 break;
6431 /* Use movml when all banked registers are pushed. */
6432 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6433 use_movml = true;
6436 if (use_movml)
6438 rtx x, mem, reg, set;
6439 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6441 /* We must avoid scheduling multiple store insn with another
6442 insns. */
6443 emit_insn (gen_blockage ());
6444 x = gen_movml_push_banked (sp_reg);
6445 x = frame_insn (x);
6446 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6448 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6449 reg = gen_rtx_REG (SImode, i);
6450 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6453 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6454 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6455 emit_insn (gen_blockage ());
6457 else
6458 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6459 if (TEST_HARD_REG_BIT (*mask, i))
6460 push (i);
6463 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6464 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6465 push (PR_REG);
6468 /* Calculate how much extra space is needed to save all callee-saved
6469 target registers.
6470 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6472 static int
6473 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6475 int reg;
6476 int stack_space = 0;
6477 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6479 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6480 if ((! call_really_used_regs[reg] || interrupt_handler)
6481 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6482 /* Leave space to save this target register on the stack,
6483 in case target register allocation wants to use it. */
6484 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6485 return stack_space;
6488 /* Decide whether we should reserve space for callee-save target registers,
6489 in case target register allocation wants to use them. REGS_SAVED is
6490 the space, in bytes, that is already required for register saves.
6491 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6493 static int
6494 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6495 HARD_REG_SET *live_regs_mask)
6497 if (optimize_size)
6498 return 0;
6499 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6502 /* Decide how much space to reserve for callee-save target registers
6503 in case target register allocation wants to use them.
6504 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6506 static int
6507 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6509 if (shmedia_space_reserved_for_target_registers)
6510 return shmedia_target_regs_stack_space (live_regs_mask);
6511 else
6512 return 0;
6515 /* Work out the registers which need to be saved, both as a mask and a
6516 count of saved words. Return the count.
6518 If doing a pragma interrupt function, then push all regs used by the
6519 function, and if we call another function (we can tell by looking at PR),
6520 make sure that all the regs it clobbers are safe too. */
6522 static int
6523 calc_live_regs (HARD_REG_SET *live_regs_mask)
6525 unsigned int reg;
6526 int count;
6527 tree attrs;
6528 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6529 bool nosave_low_regs;
6530 int pr_live, has_call;
6532 attrs = DECL_ATTRIBUTES (current_function_decl);
6533 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6534 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6535 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6536 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6538 CLEAR_HARD_REG_SET (*live_regs_mask);
6539 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6540 && df_regs_ever_live_p (FPSCR_REG))
6541 target_flags &= ~MASK_FPU_SINGLE;
6542 /* If we can save a lot of saves by switching to double mode, do that. */
6543 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6544 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6545 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6546 && (! call_really_used_regs[reg]
6547 || interrupt_handler)
6548 && ++count > 2)
6550 target_flags &= ~MASK_FPU_SINGLE;
6551 break;
6553 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6554 knows how to use it. That means the pseudo originally allocated for
6555 the initial value can become the PR_MEDIA_REG hard register, as seen for
6556 execute/20010122-1.c:test9. */
6557 if (TARGET_SHMEDIA)
6558 /* ??? this function is called from initial_elimination_offset, hence we
6559 can't use the result of sh_media_register_for_return here. */
6560 pr_live = sh_pr_n_sets ();
6561 else
6563 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6564 pr_live = (pr_initial
6565 ? (!REG_P (pr_initial)
6566 || REGNO (pr_initial) != (PR_REG))
6567 : df_regs_ever_live_p (PR_REG));
6568 /* For Shcompact, if not optimizing, we end up with a memory reference
6569 using the return address pointer for __builtin_return_address even
6570 though there is no actual need to put the PR register on the stack. */
6571 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6573 /* Force PR to be live if the prologue has to call the SHmedia
6574 argument decoder or register saver. */
6575 if (TARGET_SHCOMPACT
6576 && ((crtl->args.info.call_cookie
6577 & ~ CALL_COOKIE_RET_TRAMP (1))
6578 || crtl->saves_all_registers))
6579 pr_live = 1;
6580 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6581 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6583 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6584 ? pr_live
6585 : interrupt_handler
6586 ? (/* Need to save all the regs ever live. */
6587 (df_regs_ever_live_p (reg)
6588 || (call_really_used_regs[reg]
6589 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6590 || reg == PIC_OFFSET_TABLE_REGNUM)
6591 && has_call)
6592 || (TARGET_SHMEDIA && has_call
6593 && REGISTER_NATURAL_MODE (reg) == SImode
6594 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6595 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6596 && reg != RETURN_ADDRESS_POINTER_REGNUM
6597 && reg != T_REG && reg != GBR_REG
6598 /* Push fpscr only on targets which have FPU */
6599 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6600 : (/* Only push those regs which are used and need to be saved. */
6601 (TARGET_SHCOMPACT
6602 && flag_pic
6603 && crtl->args.info.call_cookie
6604 && reg == PIC_OFFSET_TABLE_REGNUM)
6605 || (df_regs_ever_live_p (reg)
6606 && ((!call_really_used_regs[reg]
6607 && !(reg != PIC_OFFSET_TABLE_REGNUM
6608 && fixed_regs[reg] && call_used_regs[reg]))
6609 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6610 || (crtl->calls_eh_return
6611 && (reg == EH_RETURN_DATA_REGNO (0)
6612 || reg == EH_RETURN_DATA_REGNO (1)
6613 || reg == EH_RETURN_DATA_REGNO (2)
6614 || reg == EH_RETURN_DATA_REGNO (3)))
6615 || ((reg == MACL_REG || reg == MACH_REG)
6616 && df_regs_ever_live_p (reg)
6617 && sh_cfun_attr_renesas_p ())
6620 SET_HARD_REG_BIT (*live_regs_mask, reg);
6621 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6623 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6624 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6626 if (FP_REGISTER_P (reg))
6628 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6630 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6631 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6634 else if (XD_REGISTER_P (reg))
6636 /* Must switch to double mode to access these registers. */
6637 target_flags &= ~MASK_FPU_SINGLE;
6641 if (nosave_low_regs && reg == R8_REG)
6642 break;
6644 /* If we have a target register optimization pass after prologue / epilogue
6645 threading, we need to assume all target registers will be live even if
6646 they aren't now. */
6647 if (flag_branch_target_load_optimize2
6648 && TARGET_SAVE_ALL_TARGET_REGS
6649 && shmedia_space_reserved_for_target_registers)
6650 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6651 if ((! call_really_used_regs[reg] || interrupt_handler)
6652 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6654 SET_HARD_REG_BIT (*live_regs_mask, reg);
6655 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6657 /* If this is an interrupt handler, we don't have any call-clobbered
6658 registers we can conveniently use for target register save/restore.
6659 Make sure we save at least one general purpose register when we need
6660 to save target registers. */
6661 if (interrupt_handler
6662 && hard_reg_set_intersect_p (*live_regs_mask,
6663 reg_class_contents[TARGET_REGS])
6664 && ! hard_reg_set_intersect_p (*live_regs_mask,
6665 reg_class_contents[GENERAL_REGS]))
6667 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6668 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6671 return count;
6674 /* Code to generate prologue and epilogue sequences */
6676 /* PUSHED is the number of bytes that are being pushed on the
6677 stack for register saves. Return the frame size, padded
6678 appropriately so that the stack stays properly aligned. */
6679 static HOST_WIDE_INT
6680 rounded_frame_size (int pushed)
6682 HOST_WIDE_INT size = get_frame_size ();
6683 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6685 if (ACCUMULATE_OUTGOING_ARGS)
6686 size += crtl->outgoing_args_size;
6688 return ((size + pushed + align - 1) & -align) - pushed;
6691 /* Choose a call-clobbered target-branch register that remains
6692 unchanged along the whole function. We set it up as the return
6693 value in the prologue. */
6695 sh_media_register_for_return (void)
6697 int regno;
6698 int tr0_used;
6700 if (! current_function_is_leaf)
6701 return -1;
6702 if (lookup_attribute ("interrupt_handler",
6703 DECL_ATTRIBUTES (current_function_decl)))
6704 return -1;
6705 if (sh_cfun_interrupt_handler_p ())
6706 return -1;
6708 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6710 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6711 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6712 return regno;
6714 return -1;
6717 /* The maximum registers we need to save are:
6718 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6719 - 32 floating point registers (for each pair, we save none,
6720 one single precision value, or a double precision value).
6721 - 8 target registers
6722 - add 1 entry for a delimiter. */
6723 #define MAX_SAVED_REGS (62+32+8)
6725 typedef struct save_entry_s
6727 unsigned char reg;
6728 unsigned char mode;
6729 short offset;
6730 } save_entry;
6732 #define MAX_TEMPS 4
6734 /* There will be a delimiter entry with VOIDmode both at the start and the
6735 end of a filled in schedule. The end delimiter has the offset of the
6736 save with the smallest (i.e. most negative) offset. */
6737 typedef struct save_schedule_s
6739 save_entry entries[MAX_SAVED_REGS + 2];
6740 int temps[MAX_TEMPS+1];
6741 } save_schedule;
6743 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6744 use reverse order. Returns the last entry written to (not counting
6745 the delimiter). OFFSET_BASE is a number to be added to all offset
6746 entries. */
6748 static save_entry *
6749 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6750 int offset_base)
6752 int align, i;
6753 save_entry *entry = schedule->entries;
6754 int tmpx = 0;
6755 int offset;
6757 if (! current_function_interrupt)
6758 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6759 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6760 && ! FUNCTION_ARG_REGNO_P (i)
6761 && i != FIRST_RET_REG
6762 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6763 && ! (crtl->calls_eh_return
6764 && (i == EH_RETURN_STACKADJ_REGNO
6765 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6766 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6767 schedule->temps[tmpx++] = i;
6768 entry->reg = -1;
6769 entry->mode = VOIDmode;
6770 entry->offset = offset_base;
6771 entry++;
6772 /* We loop twice: first, we save 8-byte aligned registers in the
6773 higher addresses, that are known to be aligned. Then, we
6774 proceed to saving 32-bit registers that don't need 8-byte
6775 alignment.
6776 If this is an interrupt function, all registers that need saving
6777 need to be saved in full. moreover, we need to postpone saving
6778 target registers till we have saved some general purpose registers
6779 we can then use as scratch registers. */
6780 offset = offset_base;
6781 for (align = 1; align >= 0; align--)
6783 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6784 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6786 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6787 int reg = i;
6789 if (current_function_interrupt)
6791 if (TARGET_REGISTER_P (i))
6792 continue;
6793 if (GENERAL_REGISTER_P (i))
6794 mode = DImode;
6796 if (mode == SFmode && (i % 2) == 1
6797 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6798 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6800 mode = DFmode;
6801 i--;
6802 reg--;
6805 /* If we're doing the aligned pass and this is not aligned,
6806 or we're doing the unaligned pass and this is aligned,
6807 skip it. */
6808 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6809 != align)
6810 continue;
6812 if (current_function_interrupt
6813 && GENERAL_REGISTER_P (i)
6814 && tmpx < MAX_TEMPS)
6815 schedule->temps[tmpx++] = i;
6817 offset -= GET_MODE_SIZE (mode);
6818 entry->reg = i;
6819 entry->mode = mode;
6820 entry->offset = offset;
6821 entry++;
6823 if (align && current_function_interrupt)
6824 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6825 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6827 offset -= GET_MODE_SIZE (DImode);
6828 entry->reg = i;
6829 entry->mode = DImode;
6830 entry->offset = offset;
6831 entry++;
6834 entry->reg = -1;
6835 entry->mode = VOIDmode;
6836 entry->offset = offset;
6837 schedule->temps[tmpx] = -1;
6838 return entry - 1;
6841 void
6842 sh_expand_prologue (void)
6844 HARD_REG_SET live_regs_mask;
6845 int d, i;
6846 int d_rounding = 0;
6847 int save_flags = target_flags;
6848 int pretend_args;
6849 tree sp_switch_attr
6850 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6852 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6854 /* We have pretend args if we had an object sent partially in registers
6855 and partially on the stack, e.g. a large structure. */
6856 pretend_args = crtl->args.pretend_args_size;
6857 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6858 && (NPARM_REGS(SImode)
6859 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6860 pretend_args = 0;
6861 /* Dwarf2 module doesn't expect frame related insns here. */
6862 output_stack_adjust (-pretend_args
6863 - crtl->args.info.stack_regs * 8,
6864 stack_pointer_rtx, 0, NULL, false);
6866 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6867 /* We're going to use the PIC register to load the address of the
6868 incoming-argument decoder and/or of the return trampoline from
6869 the GOT, so make sure the PIC register is preserved and
6870 initialized. */
6871 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6873 if (TARGET_SHCOMPACT
6874 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6876 int reg;
6878 /* First, make all registers with incoming arguments that will
6879 be pushed onto the stack live, so that register renaming
6880 doesn't overwrite them. */
6881 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6882 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6883 >= NPARM_REGS (SImode) - reg)
6884 for (; reg < NPARM_REGS (SImode); reg++)
6885 emit_insn (gen_shcompact_preserve_incoming_args
6886 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6887 else if (CALL_COOKIE_INT_REG_GET
6888 (crtl->args.info.call_cookie, reg) == 1)
6889 emit_insn (gen_shcompact_preserve_incoming_args
6890 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6892 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6893 stack_pointer_rtx);
6894 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6895 GEN_INT (crtl->args.info.call_cookie));
6896 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6897 gen_rtx_REG (SImode, R0_REG));
6899 else if (TARGET_SHMEDIA)
6901 int tr = sh_media_register_for_return ();
6903 if (tr >= 0)
6904 emit_move_insn (gen_rtx_REG (DImode, tr),
6905 gen_rtx_REG (DImode, PR_MEDIA_REG));
6908 /* Emit the code for SETUP_VARARGS. */
6909 if (cfun->stdarg)
6911 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6913 /* Push arg regs as if they'd been provided by caller in stack. */
6914 for (i = 0; i < NPARM_REGS(SImode); i++)
6916 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6918 if (i >= (NPARM_REGS(SImode)
6919 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6921 break;
6922 push (rn);
6927 /* If we're supposed to switch stacks at function entry, do so now. */
6928 if (sp_switch_attr)
6930 rtx lab, newsrc;
6931 /* The argument specifies a variable holding the address of the
6932 stack the interrupt function should switch to/from at entry/exit. */
6933 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6934 const char *s
6935 = ggc_strdup (TREE_STRING_POINTER (arg));
6936 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6938 lab = add_constant (sp_switch, SImode, 0);
6939 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6940 newsrc = gen_const_mem (SImode, newsrc);
6942 emit_insn (gen_sp_switch_1 (newsrc));
6945 d = calc_live_regs (&live_regs_mask);
6946 /* ??? Maybe we could save some switching if we can move a mode switch
6947 that already happens to be at the function start into the prologue. */
6948 if (target_flags != save_flags && ! current_function_interrupt)
6949 emit_insn (gen_toggle_sz ());
6951 if (TARGET_SH5)
6953 int offset_base, offset;
6954 rtx r0 = NULL_RTX;
6955 int offset_in_r0 = -1;
6956 int sp_in_r0 = 0;
6957 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6958 int total_size, save_size;
6959 save_schedule schedule;
6960 save_entry *entry;
6961 int *tmp_pnt;
6963 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6964 && ! current_function_interrupt)
6965 r0 = gen_rtx_REG (Pmode, R0_REG);
6967 /* D is the actual number of bytes that we need for saving registers,
6968 however, in initial_elimination_offset we have committed to using
6969 an additional TREGS_SPACE amount of bytes - in order to keep both
6970 addresses to arguments supplied by the caller and local variables
6971 valid, we must keep this gap. Place it between the incoming
6972 arguments and the actually saved registers in a bid to optimize
6973 locality of reference. */
6974 total_size = d + tregs_space;
6975 total_size += rounded_frame_size (total_size);
6976 save_size = total_size - rounded_frame_size (d);
6977 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6978 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6979 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6981 /* If adjusting the stack in a single step costs nothing extra, do so.
6982 I.e. either if a single addi is enough, or we need a movi anyway,
6983 and we don't exceed the maximum offset range (the test for the
6984 latter is conservative for simplicity). */
6985 if (TARGET_SHMEDIA
6986 && (CONST_OK_FOR_I10 (-total_size)
6987 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6988 && total_size <= 2044)))
6989 d_rounding = total_size - save_size;
6991 offset_base = d + d_rounding;
6993 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6994 0, NULL, true);
6996 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6997 tmp_pnt = schedule.temps;
6998 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7000 enum machine_mode mode = (enum machine_mode) entry->mode;
7001 unsigned int reg = entry->reg;
7002 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7003 rtx orig_reg_rtx;
7005 offset = entry->offset;
7007 reg_rtx = gen_rtx_REG (mode, reg);
7009 mem_rtx = gen_frame_mem (mode,
7010 gen_rtx_PLUS (Pmode,
7011 stack_pointer_rtx,
7012 GEN_INT (offset)));
7014 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7016 gcc_assert (r0);
7017 mem_rtx = NULL_RTX;
7020 if (HAVE_PRE_DECREMENT
7021 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7022 || mem_rtx == NULL_RTX
7023 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7025 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7027 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7028 pre_dec = NULL_RTX;
7029 else
7031 mem_rtx = NULL_RTX;
7032 offset += GET_MODE_SIZE (mode);
7036 if (mem_rtx != NULL_RTX)
7037 goto addr_ok;
7039 if (offset_in_r0 == -1)
7041 emit_move_insn (r0, GEN_INT (offset));
7042 offset_in_r0 = offset;
7044 else if (offset != offset_in_r0)
7046 emit_move_insn (r0,
7047 gen_rtx_PLUS
7048 (Pmode, r0,
7049 GEN_INT (offset - offset_in_r0)));
7050 offset_in_r0 += offset - offset_in_r0;
7053 if (pre_dec != NULL_RTX)
7055 if (! sp_in_r0)
7057 emit_move_insn (r0,
7058 gen_rtx_PLUS
7059 (Pmode, r0, stack_pointer_rtx));
7060 sp_in_r0 = 1;
7063 offset -= GET_MODE_SIZE (mode);
7064 offset_in_r0 -= GET_MODE_SIZE (mode);
7066 mem_rtx = pre_dec;
7068 else if (sp_in_r0)
7069 mem_rtx = gen_frame_mem (mode, r0);
7070 else
7071 mem_rtx = gen_frame_mem (mode,
7072 gen_rtx_PLUS (Pmode,
7073 stack_pointer_rtx,
7074 r0));
7076 /* We must not use an r0-based address for target-branch
7077 registers or for special registers without pre-dec
7078 memory addresses, since we store their values in r0
7079 first. */
7080 gcc_assert (!TARGET_REGISTER_P (reg)
7081 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7082 || mem_rtx == pre_dec));
7084 addr_ok:
7085 orig_reg_rtx = reg_rtx;
7086 if (TARGET_REGISTER_P (reg)
7087 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7088 && mem_rtx != pre_dec))
7090 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7092 emit_move_insn (tmp_reg, reg_rtx);
7094 if (REGNO (tmp_reg) == R0_REG)
7096 offset_in_r0 = -1;
7097 sp_in_r0 = 0;
7098 gcc_assert (!refers_to_regno_p
7099 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7102 if (*++tmp_pnt <= 0)
7103 tmp_pnt = schedule.temps;
7105 reg_rtx = tmp_reg;
7108 rtx insn;
7110 /* Mark as interesting for dwarf cfi generator */
7111 insn = emit_move_insn (mem_rtx, reg_rtx);
7112 RTX_FRAME_RELATED_P (insn) = 1;
7113 /* If we use an intermediate register for the save, we can't
7114 describe this exactly in cfi as a copy of the to-be-saved
7115 register into the temporary register and then the temporary
7116 register on the stack, because the temporary register can
7117 have a different natural size than the to-be-saved register.
7118 Thus, we gloss over the intermediate copy and pretend we do
7119 a direct save from the to-be-saved register. */
7120 if (REGNO (reg_rtx) != reg)
7122 rtx set;
7124 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7125 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7128 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7130 rtx reg_rtx = gen_rtx_REG (mode, reg);
7131 rtx set;
7132 rtx mem_rtx = gen_frame_mem (mode,
7133 gen_rtx_PLUS (Pmode,
7134 stack_pointer_rtx,
7135 GEN_INT (offset)));
7137 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7138 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7143 gcc_assert (entry->offset == d_rounding);
7145 else
7146 push_regs (&live_regs_mask, current_function_interrupt);
7148 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7149 emit_insn (gen_GOTaddr2picreg ());
7151 if (SHMEDIA_REGS_STACK_ADJUST ())
7153 /* This must NOT go through the PLT, otherwise mach and macl
7154 may be clobbered. */
7155 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7156 (TARGET_FPU_ANY
7157 ? "__GCC_push_shmedia_regs"
7158 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7159 emit_insn (gen_shmedia_save_restore_regs_compact
7160 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7163 if (target_flags != save_flags && ! current_function_interrupt)
7164 emit_insn (gen_toggle_sz ());
7166 target_flags = save_flags;
7168 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7169 stack_pointer_rtx, 0, NULL, true);
7171 if (frame_pointer_needed)
7172 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7174 if (TARGET_SHCOMPACT
7175 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7177 /* This must NOT go through the PLT, otherwise mach and macl
7178 may be clobbered. */
7179 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7180 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7181 emit_insn (gen_shcompact_incoming_args ());
7185 void
7186 sh_expand_epilogue (bool sibcall_p)
7188 HARD_REG_SET live_regs_mask;
7189 int d, i;
7190 int d_rounding = 0;
7192 int save_flags = target_flags;
7193 int frame_size, save_size;
7194 int fpscr_deferred = 0;
7195 int e = sibcall_p ? -1 : 1;
7197 d = calc_live_regs (&live_regs_mask);
7199 save_size = d;
7200 frame_size = rounded_frame_size (d);
7202 if (TARGET_SH5)
7204 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7205 int total_size;
7206 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7207 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7208 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7210 total_size = d + tregs_space;
7211 total_size += rounded_frame_size (total_size);
7212 save_size = total_size - frame_size;
7214 /* If adjusting the stack in a single step costs nothing extra, do so.
7215 I.e. either if a single addi is enough, or we need a movi anyway,
7216 and we don't exceed the maximum offset range (the test for the
7217 latter is conservative for simplicity). */
7218 if (TARGET_SHMEDIA
7219 && ! frame_pointer_needed
7220 && (CONST_OK_FOR_I10 (total_size)
7221 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7222 && total_size <= 2044)))
7223 d_rounding = frame_size;
7225 frame_size -= d_rounding;
7228 if (frame_pointer_needed)
7230 /* We must avoid scheduling the epilogue with previous basic blocks.
7231 See PR/18032 and PR/40313. */
7232 emit_insn (gen_blockage ());
7233 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7234 &live_regs_mask, false);
7236 /* We must avoid moving the stack pointer adjustment past code
7237 which reads from the local frame, else an interrupt could
7238 occur after the SP adjustment and clobber data in the local
7239 frame. */
7240 emit_insn (gen_blockage ());
7241 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7243 else if (frame_size)
7245 /* We must avoid moving the stack pointer adjustment past code
7246 which reads from the local frame, else an interrupt could
7247 occur after the SP adjustment and clobber data in the local
7248 frame. */
7249 emit_insn (gen_blockage ());
7250 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7251 &live_regs_mask, false);
7254 if (SHMEDIA_REGS_STACK_ADJUST ())
7256 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7257 (TARGET_FPU_ANY
7258 ? "__GCC_pop_shmedia_regs"
7259 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7260 /* This must NOT go through the PLT, otherwise mach and macl
7261 may be clobbered. */
7262 emit_insn (gen_shmedia_save_restore_regs_compact
7263 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7266 /* Pop all the registers. */
7268 if (target_flags != save_flags && ! current_function_interrupt)
7269 emit_insn (gen_toggle_sz ());
7270 if (TARGET_SH5)
7272 int offset_base, offset;
7273 int offset_in_r0 = -1;
7274 int sp_in_r0 = 0;
7275 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7276 save_schedule schedule;
7277 save_entry *entry;
7278 int *tmp_pnt;
7280 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7281 offset_base = -entry[1].offset + d_rounding;
7282 tmp_pnt = schedule.temps;
7283 for (; entry->mode != VOIDmode; entry--)
7285 enum machine_mode mode = (enum machine_mode) entry->mode;
7286 int reg = entry->reg;
7287 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7289 offset = offset_base + entry->offset;
7290 reg_rtx = gen_rtx_REG (mode, reg);
7292 mem_rtx = gen_frame_mem (mode,
7293 gen_rtx_PLUS (Pmode,
7294 stack_pointer_rtx,
7295 GEN_INT (offset)));
7297 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7298 mem_rtx = NULL_RTX;
7300 if (HAVE_POST_INCREMENT
7301 && (offset == offset_in_r0
7302 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7303 && mem_rtx == NULL_RTX)
7304 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7306 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7308 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7309 post_inc = NULL_RTX;
7310 else
7311 mem_rtx = NULL_RTX;
7314 if (mem_rtx != NULL_RTX)
7315 goto addr_ok;
7317 if (offset_in_r0 == -1)
7319 emit_move_insn (r0, GEN_INT (offset));
7320 offset_in_r0 = offset;
7322 else if (offset != offset_in_r0)
7324 emit_move_insn (r0,
7325 gen_rtx_PLUS
7326 (Pmode, r0,
7327 GEN_INT (offset - offset_in_r0)));
7328 offset_in_r0 += offset - offset_in_r0;
7331 if (post_inc != NULL_RTX)
7333 if (! sp_in_r0)
7335 emit_move_insn (r0,
7336 gen_rtx_PLUS
7337 (Pmode, r0, stack_pointer_rtx));
7338 sp_in_r0 = 1;
7341 mem_rtx = post_inc;
7343 offset_in_r0 += GET_MODE_SIZE (mode);
7345 else if (sp_in_r0)
7346 mem_rtx = gen_frame_mem (mode, r0);
7347 else
7348 mem_rtx = gen_frame_mem (mode,
7349 gen_rtx_PLUS (Pmode,
7350 stack_pointer_rtx,
7351 r0));
7353 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7354 || mem_rtx == post_inc);
7356 addr_ok:
7357 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7358 && mem_rtx != post_inc)
7360 emit_move_insn (r0, mem_rtx);
7361 mem_rtx = r0;
7363 else if (TARGET_REGISTER_P (reg))
7365 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7367 /* Give the scheduler a bit of freedom by using up to
7368 MAX_TEMPS registers in a round-robin fashion. */
7369 emit_move_insn (tmp_reg, mem_rtx);
7370 mem_rtx = tmp_reg;
7371 if (*++tmp_pnt < 0)
7372 tmp_pnt = schedule.temps;
7375 emit_move_insn (reg_rtx, mem_rtx);
7378 gcc_assert (entry->offset + offset_base == d + d_rounding);
7380 else /* ! TARGET_SH5 */
7382 int last_reg;
7384 save_size = 0;
7385 /* For an ISR with RESBANK attribute assigned, don't pop PR
7386 register. */
7387 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7388 && !sh_cfun_resbank_handler_p ())
7390 if (!frame_pointer_needed)
7391 emit_insn (gen_blockage ());
7392 pop (PR_REG);
7395 /* Banked registers are popped first to avoid being scheduled in the
7396 delay slot. RTE switches banks before the ds instruction. */
7397 if (current_function_interrupt)
7399 bool use_movml = false;
7401 if (TARGET_SH2A)
7403 unsigned int count = 0;
7405 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7406 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7407 count++;
7408 else
7409 break;
7411 /* Use movml when all banked register are poped. */
7412 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7413 use_movml = true;
7416 if (use_movml)
7418 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7420 /* We must avoid scheduling multiple load insn with another
7421 insns. */
7422 emit_insn (gen_blockage ());
7423 emit_insn (gen_movml_pop_banked (sp_reg));
7424 emit_insn (gen_blockage ());
7426 else
7427 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7428 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7429 pop (i);
7431 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7433 else
7434 last_reg = FIRST_PSEUDO_REGISTER;
7436 for (i = 0; i < last_reg; i++)
7438 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7440 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7441 && hard_reg_set_intersect_p (live_regs_mask,
7442 reg_class_contents[DF_REGS]))
7443 fpscr_deferred = 1;
7444 /* For an ISR with RESBANK attribute assigned, don't pop
7445 following registers, R0-R14, MACH, MACL and GBR. */
7446 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7447 && ! (sh_cfun_resbank_handler_p ()
7448 && ((j >= FIRST_GENERAL_REG
7449 && j < LAST_GENERAL_REG)
7450 || j == MACH_REG
7451 || j == MACL_REG
7452 || j == GBR_REG)))
7453 pop (j);
7455 if (j == FIRST_FP_REG && fpscr_deferred)
7456 pop (FPSCR_REG);
7459 if (target_flags != save_flags && ! current_function_interrupt)
7460 emit_insn (gen_toggle_sz ());
7461 target_flags = save_flags;
7463 output_stack_adjust (crtl->args.pretend_args_size
7464 + save_size + d_rounding
7465 + crtl->args.info.stack_regs * 8,
7466 stack_pointer_rtx, e, NULL, false);
7468 if (crtl->calls_eh_return)
7469 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7470 EH_RETURN_STACKADJ_RTX));
7472 /* Switch back to the normal stack if necessary. */
7473 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7474 emit_insn (gen_sp_switch_2 ());
7476 /* Tell flow the insn that pops PR isn't dead. */
7477 /* PR_REG will never be live in SHmedia mode, and we don't need to
7478 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7479 by the return pattern. */
7480 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7481 emit_use (gen_rtx_REG (SImode, PR_REG));
7484 static int sh_need_epilogue_known = 0;
7487 sh_need_epilogue (void)
7489 if (! sh_need_epilogue_known)
7491 rtx epilogue;
7493 start_sequence ();
7494 sh_expand_epilogue (0);
7495 epilogue = get_insns ();
7496 end_sequence ();
7497 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7499 return sh_need_epilogue_known > 0;
7502 /* Emit code to change the current function's return address to RA.
7503 TEMP is available as a scratch register, if needed. */
7505 void
7506 sh_set_return_address (rtx ra, rtx tmp)
7508 HARD_REG_SET live_regs_mask;
7509 int d;
7510 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7511 int pr_offset;
7513 d = calc_live_regs (&live_regs_mask);
7515 /* If pr_reg isn't life, we can set it (or the register given in
7516 sh_media_register_for_return) directly. */
7517 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7519 rtx rr;
7521 if (TARGET_SHMEDIA)
7523 int rr_regno = sh_media_register_for_return ();
7525 if (rr_regno < 0)
7526 rr_regno = pr_reg;
7528 rr = gen_rtx_REG (DImode, rr_regno);
7530 else
7531 rr = gen_rtx_REG (SImode, pr_reg);
7533 emit_insn (GEN_MOV (rr, ra));
7534 /* Tell flow the register for return isn't dead. */
7535 emit_use (rr);
7536 return;
7539 if (TARGET_SH5)
7541 int offset;
7542 save_schedule schedule;
7543 save_entry *entry;
7545 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7546 offset = entry[1].offset;
7547 for (; entry->mode != VOIDmode; entry--)
7548 if (entry->reg == pr_reg)
7549 goto found;
7551 /* We can't find pr register. */
7552 gcc_unreachable ();
7554 found:
7555 offset = entry->offset - offset;
7556 pr_offset = (rounded_frame_size (d) + offset
7557 + SHMEDIA_REGS_STACK_ADJUST ());
7559 else
7560 pr_offset = rounded_frame_size (d);
7562 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7564 if (frame_pointer_needed)
7565 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7566 else
7567 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7569 tmp = gen_frame_mem (Pmode, tmp);
7570 emit_insn (GEN_MOV (tmp, ra));
7571 /* Tell this store isn't dead. */
7572 emit_use (tmp);
7575 /* Clear variables at function end. */
7577 static void
7578 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7579 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7581 sh_need_epilogue_known = 0;
7584 static rtx
7585 sh_builtin_saveregs (void)
7587 /* First unnamed integer register. */
7588 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7589 /* Number of integer registers we need to save. */
7590 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7591 /* First unnamed SFmode float reg */
7592 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7593 /* Number of SFmode float regs to save. */
7594 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7595 rtx regbuf, fpregs;
7596 int bufsize, regno;
7597 alias_set_type alias_set;
7599 if (TARGET_SH5)
7601 if (n_intregs)
7603 int pushregs = n_intregs;
7605 while (pushregs < NPARM_REGS (SImode) - 1
7606 && (CALL_COOKIE_INT_REG_GET
7607 (crtl->args.info.call_cookie,
7608 NPARM_REGS (SImode) - pushregs)
7609 == 1))
7611 crtl->args.info.call_cookie
7612 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7613 - pushregs, 1);
7614 pushregs++;
7617 if (pushregs == NPARM_REGS (SImode))
7618 crtl->args.info.call_cookie
7619 |= (CALL_COOKIE_INT_REG (0, 1)
7620 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7621 else
7622 crtl->args.info.call_cookie
7623 |= CALL_COOKIE_STACKSEQ (pushregs);
7625 crtl->args.pretend_args_size += 8 * n_intregs;
7627 if (TARGET_SHCOMPACT)
7628 return const0_rtx;
7631 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7633 error ("__builtin_saveregs not supported by this subtarget");
7634 return const0_rtx;
7637 if (TARGET_SHMEDIA)
7638 n_floatregs = 0;
7640 /* Allocate block of memory for the regs. */
7641 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7642 Or can assign_stack_local accept a 0 SIZE argument? */
7643 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7645 if (TARGET_SHMEDIA)
7646 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7647 else if (n_floatregs & 1)
7649 rtx addr;
7651 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7652 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7653 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7654 regbuf = change_address (regbuf, BLKmode, addr);
7656 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7658 rtx addr, mask;
7660 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7661 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7662 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7663 emit_insn (gen_andsi3 (addr, addr, mask));
7664 regbuf = change_address (regbuf, BLKmode, addr);
7666 else
7667 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7668 alias_set = get_varargs_alias_set ();
7669 set_mem_alias_set (regbuf, alias_set);
7671 /* Save int args.
7672 This is optimized to only save the regs that are necessary. Explicitly
7673 named args need not be saved. */
7674 if (n_intregs > 0)
7675 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7676 adjust_address (regbuf, BLKmode,
7677 n_floatregs * UNITS_PER_WORD),
7678 n_intregs);
7680 if (TARGET_SHMEDIA)
7681 /* Return the address of the regbuf. */
7682 return XEXP (regbuf, 0);
7684 /* Save float args.
7685 This is optimized to only save the regs that are necessary. Explicitly
7686 named args need not be saved.
7687 We explicitly build a pointer to the buffer because it halves the insn
7688 count when not optimizing (otherwise the pointer is built for each reg
7689 saved).
7690 We emit the moves in reverse order so that we can use predecrement. */
7692 fpregs = copy_to_mode_reg (Pmode,
7693 plus_constant (XEXP (regbuf, 0),
7694 n_floatregs * UNITS_PER_WORD));
7695 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7697 rtx mem;
7698 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7700 emit_insn (gen_addsi3 (fpregs, fpregs,
7701 GEN_INT (-2 * UNITS_PER_WORD)));
7702 mem = change_address (regbuf, DFmode, fpregs);
7703 emit_move_insn (mem,
7704 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7706 regno = first_floatreg;
7707 if (regno & 1)
7709 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7710 mem = change_address (regbuf, SFmode, fpregs);
7711 emit_move_insn (mem,
7712 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7713 - (TARGET_LITTLE_ENDIAN != 0)));
7716 else
7717 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7719 rtx mem;
7721 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7722 mem = change_address (regbuf, SFmode, fpregs);
7723 emit_move_insn (mem,
7724 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7727 /* Return the address of the regbuf. */
7728 return XEXP (regbuf, 0);
7731 /* Define the `__builtin_va_list' type for the ABI. */
7733 static tree
7734 sh_build_builtin_va_list (void)
7736 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7737 tree record, type_decl;
7739 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7740 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7741 return ptr_type_node;
7743 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7744 type_decl = build_decl (BUILTINS_LOCATION,
7745 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7747 f_next_o = build_decl (BUILTINS_LOCATION,
7748 FIELD_DECL, get_identifier ("__va_next_o"),
7749 ptr_type_node);
7750 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7751 FIELD_DECL,
7752 get_identifier ("__va_next_o_limit"),
7753 ptr_type_node);
7754 f_next_fp = build_decl (BUILTINS_LOCATION,
7755 FIELD_DECL, get_identifier ("__va_next_fp"),
7756 ptr_type_node);
7757 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7758 FIELD_DECL,
7759 get_identifier ("__va_next_fp_limit"),
7760 ptr_type_node);
7761 f_next_stack = build_decl (BUILTINS_LOCATION,
7762 FIELD_DECL, get_identifier ("__va_next_stack"),
7763 ptr_type_node);
7765 DECL_FIELD_CONTEXT (f_next_o) = record;
7766 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7767 DECL_FIELD_CONTEXT (f_next_fp) = record;
7768 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7769 DECL_FIELD_CONTEXT (f_next_stack) = record;
7771 TREE_CHAIN (record) = type_decl;
7772 TYPE_NAME (record) = type_decl;
7773 TYPE_FIELDS (record) = f_next_o;
7774 DECL_CHAIN (f_next_o) = f_next_o_limit;
7775 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7776 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7777 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7779 layout_type (record);
7781 return record;
7784 /* Implement `va_start' for varargs and stdarg. */
7786 static void
7787 sh_va_start (tree valist, rtx nextarg)
7789 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7790 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7791 tree t, u;
7792 int nfp, nint;
7794 if (TARGET_SH5)
7796 expand_builtin_saveregs ();
7797 std_expand_builtin_va_start (valist, nextarg);
7798 return;
7801 if ((! TARGET_SH2E && ! TARGET_SH4)
7802 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7804 std_expand_builtin_va_start (valist, nextarg);
7805 return;
7808 f_next_o = TYPE_FIELDS (va_list_type_node);
7809 f_next_o_limit = DECL_CHAIN (f_next_o);
7810 f_next_fp = DECL_CHAIN (f_next_o_limit);
7811 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7812 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7814 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7815 NULL_TREE);
7816 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7817 valist, f_next_o_limit, NULL_TREE);
7818 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7819 NULL_TREE);
7820 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7821 valist, f_next_fp_limit, NULL_TREE);
7822 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7823 valist, f_next_stack, NULL_TREE);
7825 /* Call __builtin_saveregs. */
7826 u = make_tree (sizetype, expand_builtin_saveregs ());
7827 u = fold_convert (ptr_type_node, u);
7828 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7829 TREE_SIDE_EFFECTS (t) = 1;
7830 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7832 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7833 if (nfp < 8)
7834 nfp = 8 - nfp;
7835 else
7836 nfp = 0;
7837 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7838 size_int (UNITS_PER_WORD * nfp));
7839 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7840 TREE_SIDE_EFFECTS (t) = 1;
7841 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7843 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7844 TREE_SIDE_EFFECTS (t) = 1;
7845 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7847 nint = crtl->args.info.arg_count[SH_ARG_INT];
7848 if (nint < 4)
7849 nint = 4 - nint;
7850 else
7851 nint = 0;
7852 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7853 size_int (UNITS_PER_WORD * nint));
7854 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7855 TREE_SIDE_EFFECTS (t) = 1;
7856 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7858 u = make_tree (ptr_type_node, nextarg);
7859 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7860 TREE_SIDE_EFFECTS (t) = 1;
7861 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7864 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7865 member, return it. */
7866 static tree
7867 find_sole_member (tree type)
7869 tree field, member = NULL_TREE;
7871 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7873 if (TREE_CODE (field) != FIELD_DECL)
7874 continue;
7875 if (!DECL_SIZE (field))
7876 return NULL_TREE;
7877 if (integer_zerop (DECL_SIZE (field)))
7878 continue;
7879 if (member)
7880 return NULL_TREE;
7881 member = field;
7883 return member;
7885 /* Implement `va_arg'. */
7887 static tree
7888 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7889 gimple_seq *post_p ATTRIBUTE_UNUSED)
7891 HOST_WIDE_INT size, rsize;
7892 tree tmp, pptr_type_node;
7893 tree addr, lab_over = NULL, result = NULL;
7894 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7895 tree eff_type;
7897 if (pass_by_ref)
7898 type = build_pointer_type (type);
7900 size = int_size_in_bytes (type);
7901 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7902 pptr_type_node = build_pointer_type (ptr_type_node);
7904 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7905 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7907 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7908 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7909 int pass_as_float;
7910 tree lab_false;
7911 tree member;
7913 f_next_o = TYPE_FIELDS (va_list_type_node);
7914 f_next_o_limit = DECL_CHAIN (f_next_o);
7915 f_next_fp = DECL_CHAIN (f_next_o_limit);
7916 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7917 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7919 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7920 NULL_TREE);
7921 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7922 valist, f_next_o_limit, NULL_TREE);
7923 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7924 valist, f_next_fp, NULL_TREE);
7925 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7926 valist, f_next_fp_limit, NULL_TREE);
7927 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7928 valist, f_next_stack, NULL_TREE);
7930 /* Structures with a single member with a distinct mode are passed
7931 like their member. This is relevant if the latter has a REAL_TYPE
7932 or COMPLEX_TYPE type. */
7933 eff_type = type;
7934 while (TREE_CODE (eff_type) == RECORD_TYPE
7935 && (member = find_sole_member (eff_type))
7936 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7937 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7938 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7940 tree field_type = TREE_TYPE (member);
7942 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7943 eff_type = field_type;
7944 else
7946 gcc_assert ((TYPE_ALIGN (eff_type)
7947 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7948 || (TYPE_ALIGN (eff_type)
7949 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7950 break;
7954 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7956 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7957 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7958 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7959 && size <= 16));
7961 else
7963 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7966 addr = create_tmp_var (pptr_type_node, NULL);
7967 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7968 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7970 valist = build_simple_mem_ref (addr);
7972 if (pass_as_float)
7974 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7975 tree cmp;
7976 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7978 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7979 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7981 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7982 tmp = next_fp_limit;
7983 if (size > 4 && !is_double)
7984 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7985 unshare_expr (tmp), size_int (4 - size));
7986 tmp = build2 (GE_EXPR, boolean_type_node,
7987 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7988 cmp = build3 (COND_EXPR, void_type_node, tmp,
7989 build1 (GOTO_EXPR, void_type_node,
7990 unshare_expr (lab_false)), NULL_TREE);
7991 if (!is_double)
7992 gimplify_and_add (cmp, pre_p);
7994 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7995 || (is_double || size == 16))
7997 tmp = fold_convert (sizetype, next_fp_tmp);
7998 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7999 size_int (UNITS_PER_WORD));
8000 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8001 unshare_expr (next_fp_tmp), tmp);
8002 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8004 if (is_double)
8005 gimplify_and_add (cmp, pre_p);
8007 #ifdef FUNCTION_ARG_SCmode_WART
8008 if (TYPE_MODE (eff_type) == SCmode
8009 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8011 tree subtype = TREE_TYPE (eff_type);
8012 tree real, imag;
8014 imag
8015 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8016 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8018 real
8019 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8020 real = get_initialized_tmp_var (real, pre_p, NULL);
8022 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8023 if (type != eff_type)
8024 result = build1 (VIEW_CONVERT_EXPR, type, result);
8025 result = get_initialized_tmp_var (result, pre_p, NULL);
8027 #endif /* FUNCTION_ARG_SCmode_WART */
8029 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8030 gimplify_and_add (tmp, pre_p);
8032 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8033 gimplify_and_add (tmp, pre_p);
8035 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8036 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8037 gimplify_assign (unshare_expr (next_fp_tmp),
8038 unshare_expr (valist), pre_p);
8040 gimplify_assign (unshare_expr (valist),
8041 unshare_expr (next_fp_tmp), post_p);
8042 valist = next_fp_tmp;
8044 else
8046 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8047 unshare_expr (next_o), size_int (rsize));
8048 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8049 unshare_expr (next_o_limit));
8050 tmp = build3 (COND_EXPR, void_type_node, tmp,
8051 build1 (GOTO_EXPR, void_type_node,
8052 unshare_expr (lab_false)),
8053 NULL_TREE);
8054 gimplify_and_add (tmp, pre_p);
8056 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8057 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8059 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8060 gimplify_and_add (tmp, pre_p);
8062 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8063 gimplify_and_add (tmp, pre_p);
8065 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8066 gimplify_assign (unshare_expr (next_o),
8067 unshare_expr (next_o_limit), pre_p);
8069 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8070 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8073 if (!result)
8075 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8076 gimplify_and_add (tmp, pre_p);
8080 /* ??? In va-sh.h, there had been code to make values larger than
8081 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8083 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8084 if (result)
8086 gimplify_assign (result, tmp, pre_p);
8087 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8088 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8089 gimplify_and_add (tmp, pre_p);
8091 else
8092 result = tmp;
8094 if (pass_by_ref)
8095 result = build_va_arg_indirect_ref (result);
8097 return result;
8100 /* 64 bit floating points memory transfers are paired single precision loads
8101 or store. So DWARF information needs fixing in little endian (unless
8102 PR=SZ=1 in FPSCR). */
8104 sh_dwarf_register_span (rtx reg)
8106 unsigned regno = REGNO (reg);
8108 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8109 return NULL_RTX;
8111 return
8112 gen_rtx_PARALLEL (VOIDmode,
8113 gen_rtvec (2,
8114 gen_rtx_REG (SFmode,
8115 DBX_REGISTER_NUMBER (regno+1)),
8116 gen_rtx_REG (SFmode,
8117 DBX_REGISTER_NUMBER (regno))));
8120 static enum machine_mode
8121 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8122 int *punsignedp, const_tree funtype,
8123 int for_return ATTRIBUTE_UNUSED)
8125 if (sh_promote_prototypes (funtype))
8126 return promote_mode (type, mode, punsignedp);
8127 else
8128 return mode;
8131 static bool
8132 sh_promote_prototypes (const_tree type)
8134 if (TARGET_HITACHI)
8135 return 0;
8136 if (! type)
8137 return 1;
8138 return ! sh_attr_renesas_p (type);
8141 /* Whether an argument must be passed by reference. On SHcompact, we
8142 pretend arguments wider than 32-bits that would have been passed in
8143 registers are passed by reference, so that an SHmedia trampoline
8144 loads them into the full 64-bits registers. */
8146 static int
8147 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8148 const_tree type, bool named)
8150 unsigned HOST_WIDE_INT size;
8152 if (type)
8153 size = int_size_in_bytes (type);
8154 else
8155 size = GET_MODE_SIZE (mode);
8157 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8158 && (!named
8159 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8160 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8161 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8162 && size > 4
8163 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8164 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8165 return size;
8166 else
8167 return 0;
8170 static bool
8171 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8172 const_tree type, bool named)
8174 if (targetm.calls.must_pass_in_stack (mode, type))
8175 return true;
8177 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8178 wants to know about pass-by-reference semantics for incoming
8179 arguments. */
8180 if (! cum)
8181 return false;
8183 if (TARGET_SHCOMPACT)
8185 cum->byref = shcompact_byref (cum, mode, type, named);
8186 return cum->byref != 0;
8189 return false;
8192 static bool
8193 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8194 const_tree type, bool named ATTRIBUTE_UNUSED)
8196 /* ??? How can it possibly be correct to return true only on the
8197 caller side of the equation? Is there someplace else in the
8198 sh backend that's magically producing the copies? */
8199 return (cum->outgoing
8200 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8201 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8204 static int
8205 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8206 tree type, bool named ATTRIBUTE_UNUSED)
8208 int words = 0;
8210 if (!TARGET_SH5
8211 && PASS_IN_REG_P (*cum, mode, type)
8212 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8213 && (ROUND_REG (*cum, mode)
8214 + (mode != BLKmode
8215 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8216 : ROUND_ADVANCE (int_size_in_bytes (type)))
8217 > NPARM_REGS (mode)))
8218 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8220 else if (!TARGET_SHCOMPACT
8221 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8222 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8224 return words * UNITS_PER_WORD;
8228 /* Define where to put the arguments to a function.
8229 Value is zero to push the argument on the stack,
8230 or a hard register in which to store the argument.
8232 MODE is the argument's machine mode.
8233 TYPE is the data type of the argument (as a tree).
8234 This is null for libcalls where that information may
8235 not be available.
8236 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8237 the preceding args and about the function being called.
8238 NAMED is nonzero if this argument is a named parameter
8239 (otherwise it is an extra parameter matching an ellipsis).
8241 On SH the first args are normally in registers
8242 and the rest are pushed. Any arg that starts within the first
8243 NPARM_REGS words is at least partially passed in a register unless
8244 its data type forbids. */
8246 static rtx
8247 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8248 const_tree type, bool named)
8250 if (! TARGET_SH5 && mode == VOIDmode)
8251 return GEN_INT (ca->renesas_abi ? 1 : 0);
8253 if (! TARGET_SH5
8254 && PASS_IN_REG_P (*ca, mode, type)
8255 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8257 int regno;
8259 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8260 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8262 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8263 gen_rtx_REG (SFmode,
8264 BASE_ARG_REG (mode)
8265 + (ROUND_REG (*ca, mode) ^ 1)),
8266 const0_rtx);
8267 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8268 gen_rtx_REG (SFmode,
8269 BASE_ARG_REG (mode)
8270 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8271 GEN_INT (4));
8272 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8275 /* If the alignment of a DF value causes an SF register to be
8276 skipped, we will use that skipped register for the next SF
8277 value. */
8278 if ((TARGET_HITACHI || ca->renesas_abi)
8279 && ca->free_single_fp_reg
8280 && mode == SFmode)
8281 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8283 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8284 ^ (mode == SFmode && TARGET_SH4
8285 && TARGET_LITTLE_ENDIAN != 0
8286 && ! TARGET_HITACHI && ! ca->renesas_abi);
8287 return gen_rtx_REG (mode, regno);
8291 if (TARGET_SH5)
8293 if (mode == VOIDmode && TARGET_SHCOMPACT)
8294 return GEN_INT (ca->call_cookie);
8296 /* The following test assumes unnamed arguments are promoted to
8297 DFmode. */
8298 if (mode == SFmode && ca->free_single_fp_reg)
8299 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8301 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8302 && (named || ! ca->prototype_p)
8303 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8305 if (! ca->prototype_p && TARGET_SHMEDIA)
8306 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8308 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8309 FIRST_FP_PARM_REG
8310 + ca->arg_count[(int) SH_ARG_FLOAT]);
8313 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8314 && (! TARGET_SHCOMPACT
8315 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8316 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8317 type, named))))
8319 return gen_rtx_REG (mode, (FIRST_PARM_REG
8320 + ca->arg_count[(int) SH_ARG_INT]));
8323 return 0;
8326 return 0;
8329 /* Update the data in CUM to advance over an argument
8330 of mode MODE and data type TYPE.
8331 (TYPE is null for libcalls where that information may not be
8332 available.) */
8334 static void
8335 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8336 const_tree type, bool named)
8338 if (ca->force_mem)
8339 ca->force_mem = 0;
8340 else if (TARGET_SH5)
8342 const_tree type2 = (ca->byref && type
8343 ? TREE_TYPE (type)
8344 : type);
8345 enum machine_mode mode2 = (ca->byref && type
8346 ? TYPE_MODE (type2)
8347 : mode);
8348 int dwords = ((ca->byref
8349 ? ca->byref
8350 : mode2 == BLKmode
8351 ? int_size_in_bytes (type2)
8352 : GET_MODE_SIZE (mode2)) + 7) / 8;
8353 int numregs = MIN (dwords, NPARM_REGS (SImode)
8354 - ca->arg_count[(int) SH_ARG_INT]);
8356 if (numregs)
8358 ca->arg_count[(int) SH_ARG_INT] += numregs;
8359 if (TARGET_SHCOMPACT
8360 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8362 ca->call_cookie
8363 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8364 - numregs, 1);
8365 /* N.B. We want this also for outgoing. */
8366 ca->stack_regs += numregs;
8368 else if (ca->byref)
8370 if (! ca->outgoing)
8371 ca->stack_regs += numregs;
8372 ca->byref_regs += numregs;
8373 ca->byref = 0;
8375 ca->call_cookie
8376 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8377 - numregs, 2);
8378 while (--numregs);
8379 ca->call_cookie
8380 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8381 - 1, 1);
8383 else if (dwords > numregs)
8385 int pushregs = numregs;
8387 if (TARGET_SHCOMPACT)
8388 ca->stack_regs += numregs;
8389 while (pushregs < NPARM_REGS (SImode) - 1
8390 && (CALL_COOKIE_INT_REG_GET
8391 (ca->call_cookie,
8392 NPARM_REGS (SImode) - pushregs)
8393 == 1))
8395 ca->call_cookie
8396 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8397 - pushregs, 1);
8398 pushregs++;
8400 if (numregs == NPARM_REGS (SImode))
8401 ca->call_cookie
8402 |= CALL_COOKIE_INT_REG (0, 1)
8403 | CALL_COOKIE_STACKSEQ (numregs - 1);
8404 else
8405 ca->call_cookie
8406 |= CALL_COOKIE_STACKSEQ (numregs);
8409 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8410 && (named || ! ca->prototype_p))
8412 if (mode2 == SFmode && ca->free_single_fp_reg)
8413 ca->free_single_fp_reg = 0;
8414 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8415 < NPARM_REGS (SFmode))
8417 int numfpregs
8418 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8419 NPARM_REGS (SFmode)
8420 - ca->arg_count[(int) SH_ARG_FLOAT]);
8422 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8424 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8426 if (ca->outgoing && numregs > 0)
8429 ca->call_cookie
8430 |= (CALL_COOKIE_INT_REG
8431 (ca->arg_count[(int) SH_ARG_INT]
8432 - numregs + ((numfpregs - 2) / 2),
8433 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8434 - numfpregs) / 2));
8436 while (numfpregs -= 2);
8438 else if (mode2 == SFmode && (named)
8439 && (ca->arg_count[(int) SH_ARG_FLOAT]
8440 < NPARM_REGS (SFmode)))
8441 ca->free_single_fp_reg
8442 = FIRST_FP_PARM_REG - numfpregs
8443 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8446 return;
8449 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8451 /* Note that we've used the skipped register. */
8452 if (mode == SFmode && ca->free_single_fp_reg)
8454 ca->free_single_fp_reg = 0;
8455 return;
8457 /* When we have a DF after an SF, there's an SF register that get
8458 skipped in order to align the DF value. We note this skipped
8459 register, because the next SF value will use it, and not the
8460 SF that follows the DF. */
8461 if (mode == DFmode
8462 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8464 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8465 + BASE_ARG_REG (mode));
8469 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8470 || PASS_IN_REG_P (*ca, mode, type))
8471 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8472 = (ROUND_REG (*ca, mode)
8473 + (mode == BLKmode
8474 ? ROUND_ADVANCE (int_size_in_bytes (type))
8475 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8478 /* The Renesas calling convention doesn't quite fit into this scheme since
8479 the address is passed like an invisible argument, but one that is always
8480 passed in memory. */
8481 static rtx
8482 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8484 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8485 return 0;
8486 return gen_rtx_REG (Pmode, 2);
8489 /* Worker function for TARGET_FUNCTION_VALUE.
8491 For the SH, this is like LIBCALL_VALUE, except that we must change the
8492 mode like PROMOTE_MODE does.
8493 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8494 tested here has to be kept in sync with the one in explow.c:promote_mode.
8497 static rtx
8498 sh_function_value (const_tree valtype,
8499 const_tree fn_decl_or_type,
8500 bool outgoing ATTRIBUTE_UNUSED)
8502 if (fn_decl_or_type
8503 && !DECL_P (fn_decl_or_type))
8504 fn_decl_or_type = NULL;
8506 return gen_rtx_REG (
8507 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8508 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8509 && (TREE_CODE (valtype) == INTEGER_TYPE
8510 || TREE_CODE (valtype) == ENUMERAL_TYPE
8511 || TREE_CODE (valtype) == BOOLEAN_TYPE
8512 || TREE_CODE (valtype) == REAL_TYPE
8513 || TREE_CODE (valtype) == OFFSET_TYPE))
8514 && sh_promote_prototypes (fn_decl_or_type)
8515 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8516 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8519 /* Worker function for TARGET_LIBCALL_VALUE. */
8521 static rtx
8522 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8524 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8527 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8529 bool
8530 sh_function_value_regno_p (const unsigned int regno)
8532 return ((regno) == FIRST_RET_REG
8533 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8534 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8537 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8539 static bool
8540 sh_return_in_memory (const_tree type, const_tree fndecl)
8542 if (TARGET_SH5)
8544 if (TYPE_MODE (type) == BLKmode)
8545 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8546 else
8547 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8549 else
8551 return (TYPE_MODE (type) == BLKmode
8552 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8553 && TREE_CODE (type) == RECORD_TYPE));
8557 /* We actually emit the code in sh_expand_prologue. We used to use
8558 a static variable to flag that we need to emit this code, but that
8559 doesn't when inlining, when functions are deferred and then emitted
8560 later. Fortunately, we already have two flags that are part of struct
8561 function that tell if a function uses varargs or stdarg. */
8562 static void
8563 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8564 enum machine_mode mode,
8565 tree type,
8566 int *pretend_arg_size,
8567 int second_time ATTRIBUTE_UNUSED)
8569 gcc_assert (cfun->stdarg);
8570 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8572 int named_parm_regs, anon_parm_regs;
8574 named_parm_regs = (ROUND_REG (*ca, mode)
8575 + (mode == BLKmode
8576 ? ROUND_ADVANCE (int_size_in_bytes (type))
8577 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8578 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8579 if (anon_parm_regs > 0)
8580 *pretend_arg_size = anon_parm_regs * 4;
8584 static bool
8585 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8587 return TARGET_SH5;
8590 static bool
8591 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8593 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8597 /* Define the offset between two registers, one to be eliminated, and
8598 the other its replacement, at the start of a routine. */
8601 initial_elimination_offset (int from, int to)
8603 int regs_saved;
8604 int regs_saved_rounding = 0;
8605 int total_saved_regs_space;
8606 int total_auto_space;
8607 int save_flags = target_flags;
8608 int copy_flags;
8609 HARD_REG_SET live_regs_mask;
8611 shmedia_space_reserved_for_target_registers = false;
8612 regs_saved = calc_live_regs (&live_regs_mask);
8613 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8615 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8617 shmedia_space_reserved_for_target_registers = true;
8618 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8621 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8622 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8623 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8625 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8626 copy_flags = target_flags;
8627 target_flags = save_flags;
8629 total_saved_regs_space = regs_saved + regs_saved_rounding;
8631 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8632 return total_saved_regs_space + total_auto_space
8633 + crtl->args.info.byref_regs * 8;
8635 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8636 return total_saved_regs_space + total_auto_space
8637 + crtl->args.info.byref_regs * 8;
8639 /* Initial gap between fp and sp is 0. */
8640 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8641 return 0;
8643 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8644 return rounded_frame_size (0);
8646 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8647 return rounded_frame_size (0);
8649 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8650 && (to == HARD_FRAME_POINTER_REGNUM
8651 || to == STACK_POINTER_REGNUM));
8652 if (TARGET_SH5)
8654 int n = total_saved_regs_space;
8655 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8656 save_schedule schedule;
8657 save_entry *entry;
8659 n += total_auto_space;
8661 /* If it wasn't saved, there's not much we can do. */
8662 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8663 return n;
8665 target_flags = copy_flags;
8667 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8668 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8669 if (entry->reg == pr_reg)
8671 target_flags = save_flags;
8672 return entry->offset;
8674 gcc_unreachable ();
8676 else
8677 return total_auto_space;
8680 /* Parse the -mfixed-range= option string. */
8681 void
8682 sh_fix_range (const char *const_str)
8684 int i, first, last;
8685 char *str, *dash, *comma;
8687 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8688 REG2 are either register names or register numbers. The effect
8689 of this option is to mark the registers in the range from REG1 to
8690 REG2 as ``fixed'' so they won't be used by the compiler. */
8692 i = strlen (const_str);
8693 str = (char *) alloca (i + 1);
8694 memcpy (str, const_str, i + 1);
8696 while (1)
8698 dash = strchr (str, '-');
8699 if (!dash)
8701 warning (0, "value of -mfixed-range must have form REG1-REG2");
8702 return;
8704 *dash = '\0';
8705 comma = strchr (dash + 1, ',');
8706 if (comma)
8707 *comma = '\0';
8709 first = decode_reg_name (str);
8710 if (first < 0)
8712 warning (0, "unknown register name: %s", str);
8713 return;
8716 last = decode_reg_name (dash + 1);
8717 if (last < 0)
8719 warning (0, "unknown register name: %s", dash + 1);
8720 return;
8723 *dash = '-';
8725 if (first > last)
8727 warning (0, "%s-%s is an empty range", str, dash + 1);
8728 return;
8731 for (i = first; i <= last; ++i)
8732 fixed_regs[i] = call_used_regs[i] = 1;
8734 if (!comma)
8735 break;
8737 *comma = ',';
8738 str = comma + 1;
8742 /* Insert any deferred function attributes from earlier pragmas. */
8743 static void
8744 sh_insert_attributes (tree node, tree *attributes)
8746 tree attrs;
8748 if (TREE_CODE (node) != FUNCTION_DECL)
8749 return;
8751 /* We are only interested in fields. */
8752 if (!DECL_P (node))
8753 return;
8755 /* Append the attributes to the deferred attributes. */
8756 *sh_deferred_function_attributes_tail = *attributes;
8757 attrs = sh_deferred_function_attributes;
8758 if (!attrs)
8759 return;
8761 /* Some attributes imply or require the interrupt attribute. */
8762 if (!lookup_attribute ("interrupt_handler", attrs)
8763 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8765 /* If we have a trapa_handler, but no interrupt_handler attribute,
8766 insert an interrupt_handler attribute. */
8767 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8768 /* We can't use sh_pr_interrupt here because that's not in the
8769 java frontend. */
8770 attrs
8771 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8772 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8773 if the interrupt attribute is missing, we ignore the attribute
8774 and warn. */
8775 else if (lookup_attribute ("sp_switch", attrs)
8776 || lookup_attribute ("trap_exit", attrs)
8777 || lookup_attribute ("nosave_low_regs", attrs)
8778 || lookup_attribute ("resbank", attrs))
8780 tree *tail;
8782 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8784 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8785 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8786 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8787 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8788 warning (OPT_Wattributes,
8789 "%qE attribute only applies to interrupt functions",
8790 TREE_PURPOSE (attrs));
8791 else
8793 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8794 NULL_TREE);
8795 tail = &TREE_CHAIN (*tail);
8798 attrs = *attributes;
8802 /* Install the processed list. */
8803 *attributes = attrs;
8805 /* Clear deferred attributes. */
8806 sh_deferred_function_attributes = NULL_TREE;
8807 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8809 return;
8812 /* Supported attributes:
8814 interrupt_handler -- specifies this function is an interrupt handler.
8816 trapa_handler - like above, but don't save all registers.
8818 sp_switch -- specifies an alternate stack for an interrupt handler
8819 to run on.
8821 trap_exit -- use a trapa to exit an interrupt function instead of
8822 an rte instruction.
8824 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8825 This is useful on the SH3 and upwards,
8826 which has a separate set of low regs for User and Supervisor modes.
8827 This should only be used for the lowest level of interrupts. Higher levels
8828 of interrupts must save the registers in case they themselves are
8829 interrupted.
8831 renesas -- use Renesas calling/layout conventions (functions and
8832 structures).
8834 resbank -- In case of an ISR, use a register bank to save registers
8835 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8838 /* Handle a 'resbank' attribute. */
8839 static tree
8840 sh_handle_resbank_handler_attribute (tree * node, tree name,
8841 tree args ATTRIBUTE_UNUSED,
8842 int flags ATTRIBUTE_UNUSED,
8843 bool * no_add_attrs)
8845 if (!TARGET_SH2A)
8847 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8848 name);
8849 *no_add_attrs = true;
8851 if (TREE_CODE (*node) != FUNCTION_DECL)
8853 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8854 name);
8855 *no_add_attrs = true;
8858 return NULL_TREE;
8861 /* Handle an "interrupt_handler" attribute; arguments as in
8862 struct attribute_spec.handler. */
8863 static tree
8864 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8865 tree args ATTRIBUTE_UNUSED,
8866 int flags ATTRIBUTE_UNUSED,
8867 bool *no_add_attrs)
8869 if (TREE_CODE (*node) != FUNCTION_DECL)
8871 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8872 name);
8873 *no_add_attrs = true;
8875 else if (TARGET_SHCOMPACT)
8877 error ("attribute interrupt_handler is not compatible with -m5-compact");
8878 *no_add_attrs = true;
8881 return NULL_TREE;
8884 /* Handle an 'function_vector' attribute; arguments as in
8885 struct attribute_spec.handler. */
8886 static tree
8887 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8888 tree args ATTRIBUTE_UNUSED,
8889 int flags ATTRIBUTE_UNUSED,
8890 bool * no_add_attrs)
8892 if (!TARGET_SH2A)
8894 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8895 name);
8896 *no_add_attrs = true;
8898 else if (TREE_CODE (*node) != FUNCTION_DECL)
8900 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8901 name);
8902 *no_add_attrs = true;
8904 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8906 /* The argument must be a constant integer. */
8907 warning (OPT_Wattributes,
8908 "%qE attribute argument not an integer constant",
8909 name);
8910 *no_add_attrs = true;
8912 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8914 /* The argument value must be between 0 to 255. */
8915 warning (OPT_Wattributes,
8916 "%qE attribute argument should be between 0 to 255",
8917 name);
8918 *no_add_attrs = true;
8920 return NULL_TREE;
8923 /* Returns 1 if current function has been assigned the attribute
8924 'function_vector'. */
8926 sh2a_is_function_vector_call (rtx x)
8928 if (GET_CODE (x) == SYMBOL_REF
8929 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8931 tree tr = SYMBOL_REF_DECL (x);
8933 if (sh2a_function_vector_p (tr))
8934 return 1;
8937 return 0;
8940 /* Returns the function vector number, if the the attribute
8941 'function_vector' is assigned, otherwise returns zero. */
8943 sh2a_get_function_vector_number (rtx x)
8945 int num;
8946 tree list, t;
8948 if ((GET_CODE (x) == SYMBOL_REF)
8949 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8951 t = SYMBOL_REF_DECL (x);
8953 if (TREE_CODE (t) != FUNCTION_DECL)
8954 return 0;
8956 list = SH_ATTRIBUTES (t);
8957 while (list)
8959 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8961 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8962 return num;
8965 list = TREE_CHAIN (list);
8968 return 0;
8970 else
8971 return 0;
8974 /* Handle an "sp_switch" attribute; arguments as in
8975 struct attribute_spec.handler. */
8976 static tree
8977 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8978 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8980 if (TREE_CODE (*node) != FUNCTION_DECL)
8982 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8983 name);
8984 *no_add_attrs = true;
8986 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8988 /* The argument must be a constant string. */
8989 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8990 name);
8991 *no_add_attrs = true;
8994 return NULL_TREE;
8997 /* Handle an "trap_exit" attribute; arguments as in
8998 struct attribute_spec.handler. */
8999 static tree
9000 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9001 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9003 if (TREE_CODE (*node) != FUNCTION_DECL)
9005 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9006 name);
9007 *no_add_attrs = true;
9009 /* The argument specifies a trap number to be used in a trapa instruction
9010 at function exit (instead of an rte instruction). */
9011 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9013 /* The argument must be a constant integer. */
9014 warning (OPT_Wattributes, "%qE attribute argument not an "
9015 "integer constant", name);
9016 *no_add_attrs = true;
9019 return NULL_TREE;
9022 static tree
9023 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9024 tree name ATTRIBUTE_UNUSED,
9025 tree args ATTRIBUTE_UNUSED,
9026 int flags ATTRIBUTE_UNUSED,
9027 bool *no_add_attrs ATTRIBUTE_UNUSED)
9029 return NULL_TREE;
9032 /* True if __attribute__((renesas)) or -mrenesas. */
9034 sh_attr_renesas_p (const_tree td)
9036 if (TARGET_HITACHI)
9037 return 1;
9038 if (td == 0)
9039 return 0;
9040 if (DECL_P (td))
9041 td = TREE_TYPE (td);
9042 if (td == error_mark_node)
9043 return 0;
9044 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9045 != NULL_TREE);
9048 /* True if __attribute__((renesas)) or -mrenesas, for the current
9049 function. */
9051 sh_cfun_attr_renesas_p (void)
9053 return sh_attr_renesas_p (current_function_decl);
9057 sh_cfun_interrupt_handler_p (void)
9059 return (lookup_attribute ("interrupt_handler",
9060 DECL_ATTRIBUTES (current_function_decl))
9061 != NULL_TREE);
9064 /* Returns 1 if FUNC has been assigned the attribute
9065 "function_vector". */
9067 sh2a_function_vector_p (tree func)
9069 tree list;
9070 if (TREE_CODE (func) != FUNCTION_DECL)
9071 return 0;
9073 list = SH_ATTRIBUTES (func);
9074 while (list)
9076 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9077 return 1;
9079 list = TREE_CHAIN (list);
9081 return 0;
9084 /* Returns TRUE if given tree has the "resbank" attribute. */
9087 sh_cfun_resbank_handler_p (void)
9089 return ((lookup_attribute ("resbank",
9090 DECL_ATTRIBUTES (current_function_decl))
9091 != NULL_TREE)
9092 && (lookup_attribute ("interrupt_handler",
9093 DECL_ATTRIBUTES (current_function_decl))
9094 != NULL_TREE) && TARGET_SH2A);
9097 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9099 static const char *
9100 sh_check_pch_target_flags (int old_flags)
9102 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9103 | MASK_SH_E | MASK_HARD_SH4
9104 | MASK_FPU_SINGLE | MASK_SH4))
9105 return _("created and used with different architectures / ABIs");
9106 if ((old_flags ^ target_flags) & MASK_HITACHI)
9107 return _("created and used with different ABIs");
9108 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9109 return _("created and used with different endianness");
9110 return NULL;
9113 /* Predicates used by the templates. */
9115 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9116 Used only in general_movsrc_operand. */
9119 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9121 switch (REGNO (op))
9123 case PR_REG:
9124 case MACL_REG:
9125 case MACH_REG:
9126 return 1;
9128 return 0;
9131 /* Nonzero if OP is a floating point value with value 0.0. */
9134 fp_zero_operand (rtx op)
9136 REAL_VALUE_TYPE r;
9138 if (GET_MODE (op) != SFmode)
9139 return 0;
9141 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9142 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9145 /* Nonzero if OP is a floating point value with value 1.0. */
9148 fp_one_operand (rtx op)
9150 REAL_VALUE_TYPE r;
9152 if (GET_MODE (op) != SFmode)
9153 return 0;
9155 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9156 return REAL_VALUES_EQUAL (r, dconst1);
9159 /* In general mode switching is used. If we are
9160 compiling without -mfmovd, movsf_ie isn't taken into account for
9161 mode switching. We could check in machine_dependent_reorg for
9162 cases where we know we are in single precision mode, but there is
9163 interface to find that out during reload, so we must avoid
9164 choosing an fldi alternative during reload and thus failing to
9165 allocate a scratch register for the constant loading. */
9167 fldi_ok (void)
9169 return 1;
9173 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9175 enum rtx_code code = GET_CODE (op);
9176 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9179 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9180 enum tls_model
9181 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9183 if (GET_CODE (op) != SYMBOL_REF)
9184 return TLS_MODEL_NONE;
9185 return SYMBOL_REF_TLS_MODEL (op);
9188 /* Return the destination address of a branch. */
9190 static int
9191 branch_dest (rtx branch)
9193 rtx dest = SET_SRC (PATTERN (branch));
9194 int dest_uid;
9196 if (GET_CODE (dest) == IF_THEN_ELSE)
9197 dest = XEXP (dest, 1);
9198 dest = XEXP (dest, 0);
9199 dest_uid = INSN_UID (dest);
9200 return INSN_ADDRESSES (dest_uid);
9203 /* Return nonzero if REG is not used after INSN.
9204 We assume REG is a reload reg, and therefore does
9205 not live past labels. It may live past calls or jumps though. */
9207 reg_unused_after (rtx reg, rtx insn)
9209 enum rtx_code code;
9210 rtx set;
9212 /* If the reg is set by this instruction, then it is safe for our
9213 case. Disregard the case where this is a store to memory, since
9214 we are checking a register used in the store address. */
9215 set = single_set (insn);
9216 if (set && !MEM_P (SET_DEST (set))
9217 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9218 return 1;
9220 while ((insn = NEXT_INSN (insn)))
9222 rtx set;
9223 if (!INSN_P (insn))
9224 continue;
9226 code = GET_CODE (insn);
9228 #if 0
9229 /* If this is a label that existed before reload, then the register
9230 if dead here. However, if this is a label added by reorg, then
9231 the register may still be live here. We can't tell the difference,
9232 so we just ignore labels completely. */
9233 if (code == CODE_LABEL)
9234 return 1;
9235 /* else */
9236 #endif
9238 if (code == JUMP_INSN)
9239 return 0;
9241 /* If this is a sequence, we must handle them all at once.
9242 We could have for instance a call that sets the target register,
9243 and an insn in a delay slot that uses the register. In this case,
9244 we must return 0. */
9245 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9247 int i;
9248 int retval = 0;
9250 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9252 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9253 rtx set = single_set (this_insn);
9255 if (CALL_P (this_insn))
9256 code = CALL_INSN;
9257 else if (JUMP_P (this_insn))
9259 if (INSN_ANNULLED_BRANCH_P (this_insn))
9260 return 0;
9261 code = JUMP_INSN;
9264 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9265 return 0;
9266 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9268 if (!MEM_P (SET_DEST (set)))
9269 retval = 1;
9270 else
9271 return 0;
9273 if (set == 0
9274 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9275 return 0;
9277 if (retval == 1)
9278 return 1;
9279 else if (code == JUMP_INSN)
9280 return 0;
9283 set = single_set (insn);
9284 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9285 return 0;
9286 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9287 return !MEM_P (SET_DEST (set));
9288 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9289 return 0;
9291 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9292 return 1;
9294 return 1;
9297 #include "ggc.h"
9299 static GTY(()) rtx fpscr_rtx;
9301 get_fpscr_rtx (void)
9303 if (! fpscr_rtx)
9305 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9306 REG_USERVAR_P (fpscr_rtx) = 1;
9307 mark_user_reg (fpscr_rtx);
9309 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9310 mark_user_reg (fpscr_rtx);
9311 return fpscr_rtx;
9314 static GTY(()) tree fpscr_values;
9316 static void
9317 emit_fpu_switch (rtx scratch, int index)
9319 rtx dst, src;
9321 if (fpscr_values == NULL)
9323 tree t;
9325 t = build_index_type (integer_one_node);
9326 t = build_array_type (integer_type_node, t);
9327 t = build_decl (BUILTINS_LOCATION,
9328 VAR_DECL, get_identifier ("__fpscr_values"), t);
9329 DECL_ARTIFICIAL (t) = 1;
9330 DECL_IGNORED_P (t) = 1;
9331 DECL_EXTERNAL (t) = 1;
9332 TREE_STATIC (t) = 1;
9333 TREE_PUBLIC (t) = 1;
9334 TREE_USED (t) = 1;
9336 fpscr_values = t;
9339 src = DECL_RTL (fpscr_values);
9340 if (!can_create_pseudo_p ())
9342 emit_move_insn (scratch, XEXP (src, 0));
9343 if (index != 0)
9344 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9345 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9347 else
9348 src = adjust_address (src, PSImode, index * 4);
9350 dst = get_fpscr_rtx ();
9351 emit_move_insn (dst, src);
9354 void
9355 emit_sf_insn (rtx pat)
9357 emit_insn (pat);
9360 void
9361 emit_df_insn (rtx pat)
9363 emit_insn (pat);
9366 void
9367 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9369 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9372 void
9373 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9375 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9376 get_fpscr_rtx ()));
9379 void
9380 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9382 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9385 void
9386 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9388 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9389 get_fpscr_rtx ()));
9392 static rtx get_free_reg (HARD_REG_SET);
9394 /* This function returns a register to use to load the address to load
9395 the fpscr from. Currently it always returns r1 or r7, but when we are
9396 able to use pseudo registers after combine, or have a better mechanism
9397 for choosing a register, it should be done here. */
9398 /* REGS_LIVE is the liveness information for the point for which we
9399 need this allocation. In some bare-bones exit blocks, r1 is live at the
9400 start. We can even have all of r0..r3 being live:
9401 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9402 INSN before which new insns are placed with will clobber the register
9403 we return. If a basic block consists only of setting the return value
9404 register to a pseudo and using that register, the return value is not
9405 live before or after this block, yet we we'll insert our insns right in
9406 the middle. */
9408 static rtx
9409 get_free_reg (HARD_REG_SET regs_live)
9411 if (! TEST_HARD_REG_BIT (regs_live, 1))
9412 return gen_rtx_REG (Pmode, 1);
9414 /* Hard reg 1 is live; since this is a small register classes target,
9415 there shouldn't be anything but a jump before the function end. */
9416 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9417 return gen_rtx_REG (Pmode, 7);
9420 /* This function will set the fpscr from memory.
9421 MODE is the mode we are setting it to. */
9422 void
9423 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9425 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9426 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9427 rtx addr_reg;
9429 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9430 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9433 /* Is the given character a logical line separator for the assembler? */
9434 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9435 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9436 #endif
9439 sh_insn_length_adjustment (rtx insn)
9441 /* Instructions with unfilled delay slots take up an extra two bytes for
9442 the nop in the delay slot. */
9443 if (((NONJUMP_INSN_P (insn)
9444 && GET_CODE (PATTERN (insn)) != USE
9445 && GET_CODE (PATTERN (insn)) != CLOBBER)
9446 || CALL_P (insn)
9447 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9448 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9449 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9450 return 2;
9452 /* SH2e has a bug that prevents the use of annulled branches, so if
9453 the delay slot is not filled, we'll have to put a NOP in it. */
9454 if (sh_cpu_attr == CPU_SH2E
9455 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9456 && get_attr_type (insn) == TYPE_CBRANCH
9457 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9458 return 2;
9460 /* sh-dsp parallel processing insn take four bytes instead of two. */
9462 if (NONJUMP_INSN_P (insn))
9464 int sum = 0;
9465 rtx body = PATTERN (insn);
9466 const char *templ;
9467 char c;
9468 int maybe_label = 1;
9470 if (GET_CODE (body) == ASM_INPUT)
9471 templ = XSTR (body, 0);
9472 else if (asm_noperands (body) >= 0)
9473 templ
9474 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9475 else
9476 return 0;
9479 int ppi_adjust = 0;
9482 c = *templ++;
9483 while (c == ' ' || c == '\t');
9484 /* all sh-dsp parallel-processing insns start with p.
9485 The only non-ppi sh insn starting with p is pref.
9486 The only ppi starting with pr is prnd. */
9487 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9488 ppi_adjust = 2;
9489 /* The repeat pseudo-insn expands two three insns, a total of
9490 six bytes in size. */
9491 else if ((c == 'r' || c == 'R')
9492 && ! strncasecmp ("epeat", templ, 5))
9493 ppi_adjust = 4;
9494 while (c && c != '\n'
9495 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9497 /* If this is a label, it is obviously not a ppi insn. */
9498 if (c == ':' && maybe_label)
9500 ppi_adjust = 0;
9501 break;
9503 else if (c == '\'' || c == '"')
9504 maybe_label = 0;
9505 c = *templ++;
9507 sum += ppi_adjust;
9508 maybe_label = c != ':';
9510 while (c);
9511 return sum;
9513 return 0;
9516 /* Return TRUE for a valid displacement for the REG+disp addressing
9517 with MODE. */
9519 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9520 into the FRx registers. We implement this by setting the maximum offset
9521 to zero when the value is SFmode. This also restricts loading of SFmode
9522 values into the integer registers, but that can't be helped. */
9524 /* The SH allows a displacement in a QI or HI amode, but only when the
9525 other operand is R0. GCC doesn't handle this very well, so we forgot
9526 all of that.
9528 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9529 DI can be any number 0..60. */
9531 bool
9532 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9534 if (CONST_INT_P (op))
9536 if (TARGET_SHMEDIA)
9538 int size;
9540 /* Check if this the address of an unaligned load / store. */
9541 if (mode == VOIDmode)
9542 return CONST_OK_FOR_I06 (INTVAL (op));
9544 size = GET_MODE_SIZE (mode);
9545 return (!(INTVAL (op) & (size - 1))
9546 && INTVAL (op) >= -512 * size
9547 && INTVAL (op) < 512 * size);
9550 if (TARGET_SH2A)
9552 if (GET_MODE_SIZE (mode) == 1
9553 && (unsigned) INTVAL (op) < 4096)
9554 return true;
9557 if ((GET_MODE_SIZE (mode) == 4
9558 && (unsigned) INTVAL (op) < 64
9559 && !(INTVAL (op) & 3)
9560 && !(TARGET_SH2E && mode == SFmode))
9561 || (GET_MODE_SIZE (mode) == 4
9562 && (unsigned) INTVAL (op) < 16383
9563 && !(INTVAL (op) & 3) && TARGET_SH2A))
9564 return true;
9566 if ((GET_MODE_SIZE (mode) == 8
9567 && (unsigned) INTVAL (op) < 60
9568 && !(INTVAL (op) & 3)
9569 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9570 || ((GET_MODE_SIZE (mode)==8)
9571 && (unsigned) INTVAL (op) < 8192
9572 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9573 && (TARGET_SH2A && mode == DFmode)))
9574 return true;
9577 return false;
9580 /* Recognize an RTL expression that is a valid memory address for
9581 an instruction.
9582 The MODE argument is the machine mode for the MEM expression
9583 that wants to use this address.
9584 Allow REG
9585 REG+disp
9586 REG+r0
9587 REG++
9588 --REG */
9590 static bool
9591 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9593 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9594 return true;
9595 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9596 && ! TARGET_SHMEDIA
9597 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9598 return true;
9599 else if (GET_CODE (x) == PLUS
9600 && (mode != PSImode || reload_completed))
9602 rtx xop0 = XEXP (x, 0);
9603 rtx xop1 = XEXP (x, 1);
9605 if (GET_MODE_SIZE (mode) <= 8
9606 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9607 && sh_legitimate_index_p (mode, xop1))
9608 return true;
9610 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9611 || ((xop0 == stack_pointer_rtx
9612 || xop0 == hard_frame_pointer_rtx)
9613 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9614 || ((xop1 == stack_pointer_rtx
9615 || xop1 == hard_frame_pointer_rtx)
9616 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9617 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9618 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9619 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9620 && TARGET_FMOVD && mode == DFmode)))
9622 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9623 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9624 return true;
9625 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9626 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9627 return true;
9631 return false;
9634 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9635 isn't protected by a PIC unspec. */
9637 nonpic_symbol_mentioned_p (rtx x)
9639 register const char *fmt;
9640 register int i;
9642 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9643 || GET_CODE (x) == PC)
9644 return 1;
9646 /* We don't want to look into the possible MEM location of a
9647 CONST_DOUBLE, since we're not going to use it, in general. */
9648 if (GET_CODE (x) == CONST_DOUBLE)
9649 return 0;
9651 if (GET_CODE (x) == UNSPEC
9652 && (XINT (x, 1) == UNSPEC_PIC
9653 || XINT (x, 1) == UNSPEC_GOT
9654 || XINT (x, 1) == UNSPEC_GOTOFF
9655 || XINT (x, 1) == UNSPEC_GOTPLT
9656 || XINT (x, 1) == UNSPEC_GOTTPOFF
9657 || XINT (x, 1) == UNSPEC_DTPOFF
9658 || XINT (x, 1) == UNSPEC_TPOFF
9659 || XINT (x, 1) == UNSPEC_PLT
9660 || XINT (x, 1) == UNSPEC_SYMOFF
9661 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9662 return 0;
9664 fmt = GET_RTX_FORMAT (GET_CODE (x));
9665 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9667 if (fmt[i] == 'E')
9669 register int j;
9671 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9672 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9673 return 1;
9675 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9676 return 1;
9679 return 0;
9682 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9683 @GOTOFF in `reg'. */
9685 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9686 rtx reg)
9688 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9689 return orig;
9691 if (GET_CODE (orig) == LABEL_REF
9692 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9694 if (reg == 0)
9695 reg = gen_reg_rtx (Pmode);
9697 emit_insn (gen_symGOTOFF2reg (reg, orig));
9698 return reg;
9700 else if (GET_CODE (orig) == SYMBOL_REF)
9702 if (reg == 0)
9703 reg = gen_reg_rtx (Pmode);
9705 emit_insn (gen_symGOT2reg (reg, orig));
9706 return reg;
9708 return orig;
9711 /* Try machine-dependent ways of modifying an illegitimate address
9712 to be legitimate. If we find one, return the new, valid address.
9713 Otherwise, return X.
9715 For the SH, if X is almost suitable for indexing, but the offset is
9716 out of range, convert it into a normal form so that CSE has a chance
9717 of reducing the number of address registers used. */
9719 static rtx
9720 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9722 if (flag_pic)
9723 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9725 if (GET_CODE (x) == PLUS
9726 && (GET_MODE_SIZE (mode) == 4
9727 || GET_MODE_SIZE (mode) == 8)
9728 && CONST_INT_P (XEXP (x, 1))
9729 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9730 && ! TARGET_SHMEDIA
9731 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9732 && ! (TARGET_SH2E && mode == SFmode))
9734 rtx index_rtx = XEXP (x, 1);
9735 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9736 rtx sum;
9738 /* On rare occasions, we might get an unaligned pointer
9739 that is indexed in a way to give an aligned address.
9740 Therefore, keep the lower two bits in offset_base. */
9741 /* Instead of offset_base 128..131 use 124..127, so that
9742 simple add suffices. */
9743 if (offset > 127)
9744 offset_base = ((offset + 4) & ~60) - 4;
9745 else
9746 offset_base = offset & ~60;
9748 /* Sometimes the normal form does not suit DImode. We
9749 could avoid that by using smaller ranges, but that
9750 would give less optimized code when SImode is
9751 prevalent. */
9752 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9754 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9755 GEN_INT (offset_base), NULL_RTX, 0,
9756 OPTAB_LIB_WIDEN);
9758 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9762 return x;
9765 /* Attempt to replace *P, which is an address that needs reloading, with
9766 a valid memory address for an operand of mode MODE.
9767 Like for sh_legitimize_address, for the SH we try to get a normal form
9768 of the address. That will allow inheritance of the address reloads. */
9770 bool
9771 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9772 int itype)
9774 enum reload_type type = (enum reload_type) itype;
9776 if (GET_CODE (*p) == PLUS
9777 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9778 && CONST_INT_P (XEXP (*p, 1))
9779 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9780 && ! TARGET_SHMEDIA
9781 && ! (TARGET_SH4 && mode == DFmode)
9782 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9783 && (ALLOW_INDEXED_ADDRESS
9784 || XEXP (*p, 0) == stack_pointer_rtx
9785 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9787 rtx index_rtx = XEXP (*p, 1);
9788 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9789 rtx sum;
9791 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9793 push_reload (*p, NULL_RTX, p, NULL,
9794 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9795 goto win;
9797 if (TARGET_SH2E && mode == SFmode)
9799 *p = copy_rtx (*p);
9800 push_reload (*p, NULL_RTX, p, NULL,
9801 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9802 goto win;
9804 /* Instead of offset_base 128..131 use 124..127, so that
9805 simple add suffices. */
9806 if (offset > 127)
9807 offset_base = ((offset + 4) & ~60) - 4;
9808 else
9809 offset_base = offset & ~60;
9810 /* Sometimes the normal form does not suit DImode. We could avoid
9811 that by using smaller ranges, but that would give less optimized
9812 code when SImode is prevalent. */
9813 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9815 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9816 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9817 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9818 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9819 goto win;
9822 /* We must re-recognize what we created before. */
9823 else if (GET_CODE (*p) == PLUS
9824 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9825 && GET_CODE (XEXP (*p, 0)) == PLUS
9826 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9827 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9828 && CONST_INT_P (XEXP (*p, 1))
9829 && ! TARGET_SHMEDIA
9830 && ! (TARGET_SH2E && mode == SFmode))
9832 /* Because this address is so complex, we know it must have
9833 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9834 it is already unshared, and needs no further unsharing. */
9835 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9836 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9837 goto win;
9840 return false;
9842 win:
9843 return true;
9846 /* Mark the use of a constant in the literal table. If the constant
9847 has multiple labels, make it unique. */
9848 static rtx
9849 mark_constant_pool_use (rtx x)
9851 rtx insn, lab, pattern;
9853 if (x == NULL)
9854 return x;
9856 switch (GET_CODE (x))
9858 case LABEL_REF:
9859 x = XEXP (x, 0);
9860 case CODE_LABEL:
9861 break;
9862 default:
9863 return x;
9866 /* Get the first label in the list of labels for the same constant
9867 and delete another labels in the list. */
9868 lab = x;
9869 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9871 if (!LABEL_P (insn)
9872 || LABEL_REFS (insn) != NEXT_INSN (insn))
9873 break;
9874 lab = insn;
9877 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9878 INSN_DELETED_P (insn) = 1;
9880 /* Mark constants in a window. */
9881 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9883 if (!NONJUMP_INSN_P (insn))
9884 continue;
9886 pattern = PATTERN (insn);
9887 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9888 continue;
9890 switch (XINT (pattern, 1))
9892 case UNSPECV_CONST2:
9893 case UNSPECV_CONST4:
9894 case UNSPECV_CONST8:
9895 XVECEXP (pattern, 0, 1) = const1_rtx;
9896 break;
9897 case UNSPECV_WINDOW_END:
9898 if (XVECEXP (pattern, 0, 0) == x)
9899 return lab;
9900 break;
9901 case UNSPECV_CONST_END:
9902 return lab;
9903 default:
9904 break;
9908 return lab;
9911 /* Return true if it's possible to redirect BRANCH1 to the destination
9912 of an unconditional jump BRANCH2. We only want to do this if the
9913 resulting branch will have a short displacement. */
9915 sh_can_redirect_branch (rtx branch1, rtx branch2)
9917 if (flag_expensive_optimizations && simplejump_p (branch2))
9919 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9920 rtx insn;
9921 int distance;
9923 for (distance = 0, insn = NEXT_INSN (branch1);
9924 insn && distance < 256;
9925 insn = PREV_INSN (insn))
9927 if (insn == dest)
9928 return 1;
9929 else
9930 distance += get_attr_length (insn);
9932 for (distance = 0, insn = NEXT_INSN (branch1);
9933 insn && distance < 256;
9934 insn = NEXT_INSN (insn))
9936 if (insn == dest)
9937 return 1;
9938 else
9939 distance += get_attr_length (insn);
9942 return 0;
9945 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9947 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9948 unsigned int new_reg)
9950 /* Interrupt functions can only use registers that have already been
9951 saved by the prologue, even if they would normally be
9952 call-clobbered. */
9954 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9955 return 0;
9957 return 1;
9960 /* Function to update the integer COST
9961 based on the relationship between INSN that is dependent on
9962 DEP_INSN through the dependence LINK. The default is to make no
9963 adjustment to COST. This can be used for example to specify to
9964 the scheduler that an output- or anti-dependence does not incur
9965 the same cost as a data-dependence. The return value should be
9966 the new value for COST. */
9967 static int
9968 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9970 rtx reg, use_pat;
9972 if (TARGET_SHMEDIA)
9974 /* On SHmedia, if the dependence is an anti-dependence or
9975 output-dependence, there is no cost. */
9976 if (REG_NOTE_KIND (link) != 0)
9978 /* However, dependencies between target register loads and
9979 uses of the register in a subsequent block that are separated
9980 by a conditional branch are not modelled - we have to do with
9981 the anti-dependency between the target register load and the
9982 conditional branch that ends the current block. */
9983 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9984 && GET_CODE (PATTERN (dep_insn)) == SET
9985 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9986 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9987 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9989 int orig_cost = cost;
9990 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9991 rtx target = ((! note
9992 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9993 ? insn : JUMP_LABEL (insn));
9994 /* On the likely path, the branch costs 1, on the unlikely path,
9995 it costs 3. */
9996 cost--;
9998 target = next_active_insn (target);
9999 while (target && ! flow_dependent_p (target, dep_insn)
10000 && --cost > 0);
10001 /* If two branches are executed in immediate succession, with the
10002 first branch properly predicted, this causes a stall at the
10003 second branch, hence we won't need the target for the
10004 second branch for two cycles after the launch of the first
10005 branch. */
10006 if (cost > orig_cost - 2)
10007 cost = orig_cost - 2;
10009 else
10010 cost = 0;
10013 else if (get_attr_is_mac_media (insn)
10014 && get_attr_is_mac_media (dep_insn))
10015 cost = 1;
10017 else if (! reload_completed
10018 && GET_CODE (PATTERN (insn)) == SET
10019 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10020 && GET_CODE (PATTERN (dep_insn)) == SET
10021 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10022 && cost < 4)
10023 cost = 4;
10024 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10025 that is needed at the target. */
10026 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10027 && ! flow_dependent_p (insn, dep_insn))
10028 cost--;
10030 else if (REG_NOTE_KIND (link) == 0)
10032 enum attr_type type;
10033 rtx dep_set;
10035 if (recog_memoized (insn) < 0
10036 || recog_memoized (dep_insn) < 0)
10037 return cost;
10039 dep_set = single_set (dep_insn);
10041 /* The latency that we specify in the scheduling description refers
10042 to the actual output, not to an auto-increment register; for that,
10043 the latency is one. */
10044 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10046 rtx set = single_set (insn);
10048 if (set
10049 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10050 && (!MEM_P (SET_DEST (set))
10051 || !reg_mentioned_p (SET_DEST (dep_set),
10052 XEXP (SET_DEST (set), 0))))
10053 cost = 1;
10055 /* The only input for a call that is timing-critical is the
10056 function's address. */
10057 if (CALL_P (insn))
10059 rtx call = PATTERN (insn);
10061 if (GET_CODE (call) == PARALLEL)
10062 call = XVECEXP (call, 0 ,0);
10063 if (GET_CODE (call) == SET)
10064 call = SET_SRC (call);
10065 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10066 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10067 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10068 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10069 cost -= TARGET_SH4_300 ? 3 : 6;
10071 /* Likewise, the most timing critical input for an sfuncs call
10072 is the function address. However, sfuncs typically start
10073 using their arguments pretty quickly.
10074 Assume a four cycle delay for SH4 before they are needed.
10075 Cached ST40-300 calls are quicker, so assume only a one
10076 cycle delay there.
10077 ??? Maybe we should encode the delays till input registers
10078 are needed by sfuncs into the sfunc call insn. */
10079 /* All sfunc calls are parallels with at least four components.
10080 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10081 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10082 && XVECLEN (PATTERN (insn), 0) >= 4
10083 && (reg = sfunc_uses_reg (insn)))
10085 if (! reg_set_p (reg, dep_insn))
10086 cost -= TARGET_SH4_300 ? 1 : 4;
10088 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10090 enum attr_type dep_type = get_attr_type (dep_insn);
10092 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10093 cost--;
10094 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10095 && (type = get_attr_type (insn)) != TYPE_CALL
10096 && type != TYPE_SFUNC)
10097 cost--;
10098 /* When the preceding instruction loads the shift amount of
10099 the following SHAD/SHLD, the latency of the load is increased
10100 by 1 cycle. */
10101 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10102 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10103 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10104 XEXP (SET_SRC (single_set (insn)),
10105 1)))
10106 cost++;
10107 /* When an LS group instruction with a latency of less than
10108 3 cycles is followed by a double-precision floating-point
10109 instruction, FIPR, or FTRV, the latency of the first
10110 instruction is increased to 3 cycles. */
10111 else if (cost < 3
10112 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10113 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10114 cost = 3;
10115 /* The lsw register of a double-precision computation is ready one
10116 cycle earlier. */
10117 else if (reload_completed
10118 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10119 && (use_pat = single_set (insn))
10120 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10121 SET_SRC (use_pat)))
10122 cost -= 1;
10124 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10125 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10126 cost -= 1;
10128 else if (TARGET_SH4_300)
10130 /* Stores need their input register two cycles later. */
10131 if (dep_set && cost >= 1
10132 && ((type = get_attr_type (insn)) == TYPE_STORE
10133 || type == TYPE_PSTORE
10134 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10136 rtx set = single_set (insn);
10138 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10139 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10141 cost -= 2;
10142 /* But don't reduce the cost below 1 if the address depends
10143 on a side effect of dep_insn. */
10144 if (cost < 1
10145 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10146 cost = 1;
10151 /* An anti-dependence penalty of two applies if the first insn is a double
10152 precision fadd / fsub / fmul. */
10153 else if (!TARGET_SH4_300
10154 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10155 && recog_memoized (dep_insn) >= 0
10156 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10157 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10158 /* A lot of alleged anti-flow dependences are fake,
10159 so check this one is real. */
10160 && flow_dependent_p (dep_insn, insn))
10161 cost = 2;
10163 return cost;
10166 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10167 if DEP_INSN is anti-flow dependent on INSN. */
10168 static int
10169 flow_dependent_p (rtx insn, rtx dep_insn)
10171 rtx tmp = PATTERN (insn);
10173 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10174 return tmp == NULL_RTX;
10177 /* A helper function for flow_dependent_p called through note_stores. */
10178 static void
10179 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10181 rtx * pinsn = (rtx *) data;
10183 if (*pinsn && reg_referenced_p (x, *pinsn))
10184 *pinsn = NULL_RTX;
10187 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10188 'special function' patterns (type sfunc) that clobber pr, but that
10189 do not look like function calls to leaf_function_p. Hence we must
10190 do this extra check. */
10191 static int
10192 sh_pr_n_sets (void)
10194 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10197 /* Return where to allocate pseudo for a given hard register initial
10198 value. */
10199 static rtx
10200 sh_allocate_initial_value (rtx hard_reg)
10202 rtx x;
10204 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10206 if (current_function_is_leaf
10207 && ! sh_pr_n_sets ()
10208 && ! (TARGET_SHCOMPACT
10209 && ((crtl->args.info.call_cookie
10210 & ~ CALL_COOKIE_RET_TRAMP (1))
10211 || crtl->saves_all_registers)))
10212 x = hard_reg;
10213 else
10214 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10216 else
10217 x = NULL_RTX;
10219 return x;
10222 /* This function returns "2" to indicate dual issue for the SH4
10223 processor. To be used by the DFA pipeline description. */
10224 static int
10225 sh_issue_rate (void)
10227 if (TARGET_SUPERSCALAR)
10228 return 2;
10229 else
10230 return 1;
10233 /* Functions for ready queue reordering for sched1. */
10235 /* Get weight for mode for a set x. */
10236 static short
10237 find_set_regmode_weight (rtx x, enum machine_mode mode)
10239 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10240 return 1;
10241 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10243 if (REG_P (SET_DEST (x)))
10245 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10246 return 1;
10247 else
10248 return 0;
10250 return 1;
10252 return 0;
10255 /* Get regmode weight for insn. */
10256 static short
10257 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10259 short reg_weight = 0;
10260 rtx x;
10262 /* Increment weight for each register born here. */
10263 x = PATTERN (insn);
10264 reg_weight += find_set_regmode_weight (x, mode);
10265 if (GET_CODE (x) == PARALLEL)
10267 int j;
10268 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10270 x = XVECEXP (PATTERN (insn), 0, j);
10271 reg_weight += find_set_regmode_weight (x, mode);
10274 /* Decrement weight for each register that dies here. */
10275 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10277 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10279 rtx note = XEXP (x, 0);
10280 if (REG_P (note) && GET_MODE (note) == mode)
10281 reg_weight--;
10284 return reg_weight;
10287 /* Calculate regmode weights for all insns of a basic block. */
10288 static void
10289 find_regmode_weight (basic_block b, enum machine_mode mode)
10291 rtx insn, next_tail, head, tail;
10293 get_ebb_head_tail (b, b, &head, &tail);
10294 next_tail = NEXT_INSN (tail);
10296 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10298 /* Handle register life information. */
10299 if (!INSN_P (insn))
10300 continue;
10302 if (mode == SFmode)
10303 INSN_REGMODE_WEIGHT (insn, mode) =
10304 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10305 else if (mode == SImode)
10306 INSN_REGMODE_WEIGHT (insn, mode) =
10307 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10311 /* Comparison function for ready queue sorting. */
10312 static int
10313 rank_for_reorder (const void *x, const void *y)
10315 rtx tmp = *(const rtx *) y;
10316 rtx tmp2 = *(const rtx *) x;
10318 /* The insn in a schedule group should be issued the first. */
10319 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10320 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10322 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10323 minimizes instruction movement, thus minimizing sched's effect on
10324 register pressure. */
10325 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10328 /* Resort the array A in which only element at index N may be out of order. */
10329 static void
10330 swap_reorder (rtx *a, int n)
10332 rtx insn = a[n - 1];
10333 int i = n - 2;
10335 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10337 a[i + 1] = a[i];
10338 i -= 1;
10340 a[i + 1] = insn;
10343 #define SCHED_REORDER(READY, N_READY) \
10344 do \
10346 if ((N_READY) == 2) \
10347 swap_reorder (READY, N_READY); \
10348 else if ((N_READY) > 2) \
10349 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10351 while (0)
10353 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10354 macro. */
10355 static void
10356 ready_reorder (rtx *ready, int nready)
10358 SCHED_REORDER (ready, nready);
10361 /* Count life regions of r0 for a block. */
10362 static int
10363 find_r0_life_regions (basic_block b)
10365 rtx end, insn;
10366 rtx pset;
10367 rtx r0_reg;
10368 int live;
10369 int set;
10370 int death = 0;
10372 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10374 set = 1;
10375 live = 1;
10377 else
10379 set = 0;
10380 live = 0;
10383 insn = BB_HEAD (b);
10384 end = BB_END (b);
10385 r0_reg = gen_rtx_REG (SImode, R0_REG);
10386 while (1)
10388 if (INSN_P (insn))
10390 if (find_regno_note (insn, REG_DEAD, R0_REG))
10392 death++;
10393 live = 0;
10395 if (!live
10396 && (pset = single_set (insn))
10397 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10398 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10400 set++;
10401 live = 1;
10404 if (insn == end)
10405 break;
10406 insn = NEXT_INSN (insn);
10408 return set - death;
10411 /* Calculate regmode weights for all insns of all basic block. */
10412 static void
10413 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10414 int verbose ATTRIBUTE_UNUSED,
10415 int old_max_uid)
10417 basic_block b;
10419 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10420 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10421 r0_life_regions = 0;
10423 FOR_EACH_BB_REVERSE (b)
10425 find_regmode_weight (b, SImode);
10426 find_regmode_weight (b, SFmode);
10427 if (!reload_completed)
10428 r0_life_regions += find_r0_life_regions (b);
10431 CURR_REGMODE_PRESSURE (SImode) = 0;
10432 CURR_REGMODE_PRESSURE (SFmode) = 0;
10436 /* Cleanup. */
10437 static void
10438 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10439 int verbose ATTRIBUTE_UNUSED)
10441 if (regmode_weight[0])
10443 free (regmode_weight[0]);
10444 regmode_weight[0] = NULL;
10446 if (regmode_weight[1])
10448 free (regmode_weight[1]);
10449 regmode_weight[1] = NULL;
10453 /* The scalar modes supported differs from the default version in TImode
10454 for 32-bit SHMEDIA. */
10455 static bool
10456 sh_scalar_mode_supported_p (enum machine_mode mode)
10458 if (TARGET_SHMEDIA32 && mode == TImode)
10459 return false;
10461 return default_scalar_mode_supported_p (mode);
10464 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10465 keep count of register pressures on SImode and SFmode. */
10466 static int
10467 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10468 int sched_verbose ATTRIBUTE_UNUSED,
10469 rtx insn,
10470 int can_issue_more)
10472 if (GET_CODE (PATTERN (insn)) != USE
10473 && GET_CODE (PATTERN (insn)) != CLOBBER)
10474 cached_can_issue_more = can_issue_more - 1;
10475 else
10476 cached_can_issue_more = can_issue_more;
10478 if (reload_completed)
10479 return cached_can_issue_more;
10481 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10482 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10484 return cached_can_issue_more;
10487 static void
10488 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10489 int verbose ATTRIBUTE_UNUSED,
10490 int veclen ATTRIBUTE_UNUSED)
10492 CURR_REGMODE_PRESSURE (SImode) = 0;
10493 CURR_REGMODE_PRESSURE (SFmode) = 0;
10496 /* Some magic numbers. */
10497 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10498 functions that already have high pressure on r0. */
10499 #define R0_MAX_LIFE_REGIONS 2
10500 /* Register Pressure thresholds for SImode and SFmode registers. */
10501 #define SIMODE_MAX_WEIGHT 5
10502 #define SFMODE_MAX_WEIGHT 10
10504 /* Return true if the pressure is high for MODE. */
10505 static short
10506 high_pressure (enum machine_mode mode)
10508 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10509 functions that already have high pressure on r0. */
10510 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10511 return 1;
10513 if (mode == SFmode)
10514 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10515 else
10516 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10519 /* Reorder ready queue if register pressure is high. */
10520 static int
10521 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10522 int sched_verbose ATTRIBUTE_UNUSED,
10523 rtx *ready,
10524 int *n_readyp,
10525 int clock_var ATTRIBUTE_UNUSED)
10527 if (reload_completed)
10528 return sh_issue_rate ();
10530 if (high_pressure (SFmode) || high_pressure (SImode))
10532 ready_reorder (ready, *n_readyp);
10535 return sh_issue_rate ();
10538 /* Skip cycles if the current register pressure is high. */
10539 static int
10540 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10541 int sched_verbose ATTRIBUTE_UNUSED,
10542 rtx *ready ATTRIBUTE_UNUSED,
10543 int *n_readyp ATTRIBUTE_UNUSED,
10544 int clock_var ATTRIBUTE_UNUSED)
10546 if (reload_completed)
10547 return cached_can_issue_more;
10549 if (high_pressure(SFmode) || high_pressure (SImode))
10550 skip_cycles = 1;
10552 return cached_can_issue_more;
10555 /* Skip cycles without sorting the ready queue. This will move insn from
10556 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10557 queue by sh_reorder. */
10559 /* Generally, skipping these many cycles are sufficient for all insns to move
10560 from Q -> R. */
10561 #define MAX_SKIPS 8
10563 static int
10564 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10565 int sched_verbose ATTRIBUTE_UNUSED,
10566 rtx insn ATTRIBUTE_UNUSED,
10567 int last_clock_var,
10568 int clock_var,
10569 int *sort_p)
10571 if (reload_completed)
10572 return 0;
10574 if (skip_cycles)
10576 if ((clock_var - last_clock_var) < MAX_SKIPS)
10578 *sort_p = 0;
10579 return 1;
10581 /* If this is the last cycle we are skipping, allow reordering of R. */
10582 if ((clock_var - last_clock_var) == MAX_SKIPS)
10584 *sort_p = 1;
10585 return 1;
10589 skip_cycles = 0;
10591 return 0;
10594 /* SHmedia requires registers for branches, so we can't generate new
10595 branches past reload. */
10596 static bool
10597 sh_cannot_modify_jumps_p (void)
10599 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10602 static reg_class_t
10603 sh_target_reg_class (void)
10605 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10608 static bool
10609 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10611 HARD_REG_SET dummy;
10612 #if 0
10613 rtx insn;
10614 #endif
10616 if (! shmedia_space_reserved_for_target_registers)
10617 return 0;
10618 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10619 return 0;
10620 if (calc_live_regs (&dummy) >= 6 * 8)
10621 return 1;
10622 return 0;
10625 static bool
10626 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10628 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10632 On the SH1..SH4, the trampoline looks like
10633 2 0002 D202 mov.l l2,r2
10634 1 0000 D301 mov.l l1,r3
10635 3 0004 422B jmp @r2
10636 4 0006 0009 nop
10637 5 0008 00000000 l1: .long area
10638 6 000c 00000000 l2: .long function
10640 SH5 (compact) uses r1 instead of r3 for the static chain. */
10643 /* Emit RTL insns to initialize the variable parts of a trampoline.
10644 FNADDR is an RTX for the address of the function's pure code.
10645 CXT is an RTX for the static chain value for the function. */
10647 static void
10648 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10650 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10651 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10653 if (TARGET_SHMEDIA64)
10655 rtx tramp_templ;
10656 int fixed_len;
10658 rtx movi1 = GEN_INT (0xcc000010);
10659 rtx shori1 = GEN_INT (0xc8000010);
10660 rtx src, dst;
10662 /* The following trampoline works within a +- 128 KB range for cxt:
10663 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10664 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10665 gettr tr1,r1; blink tr0,r63 */
10666 /* Address rounding makes it hard to compute the exact bounds of the
10667 offset for this trampoline, but we have a rather generous offset
10668 range, so frame_offset should do fine as an upper bound. */
10669 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10671 /* ??? could optimize this trampoline initialization
10672 by writing DImode words with two insns each. */
10673 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10674 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10675 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10676 insn = gen_rtx_AND (DImode, insn, mask);
10677 /* Or in ptb/u .,tr1 pattern */
10678 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10679 insn = force_operand (insn, NULL_RTX);
10680 insn = gen_lowpart (SImode, insn);
10681 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10682 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10683 insn = gen_rtx_AND (DImode, insn, mask);
10684 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10685 insn = gen_lowpart (SImode, insn);
10686 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10687 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10688 insn = gen_rtx_AND (DImode, insn, mask);
10689 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10690 insn = gen_lowpart (SImode, insn);
10691 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10692 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10693 insn = gen_rtx_AND (DImode, insn, mask);
10694 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10695 insn = gen_lowpart (SImode, insn);
10696 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10697 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10698 insn = gen_rtx_AND (DImode, insn, mask);
10699 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10700 insn = gen_lowpart (SImode, insn);
10701 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10702 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10703 GEN_INT (0x6bf10600));
10704 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10705 GEN_INT (0x4415fc10));
10706 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10707 GEN_INT (0x4401fff0));
10708 emit_insn (gen_ic_invalidate_line (tramp));
10709 return;
10711 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10712 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10714 tramp_templ = gen_datalabel_ref (tramp_templ);
10715 dst = tramp_mem;
10716 src = gen_const_mem (BLKmode, tramp_templ);
10717 set_mem_align (dst, 256);
10718 set_mem_align (src, 64);
10719 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10721 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10722 emit_move_insn (adjust_address (tramp_mem, Pmode,
10723 fixed_len + GET_MODE_SIZE (Pmode)),
10724 cxt);
10725 emit_insn (gen_ic_invalidate_line (tramp));
10726 return;
10728 else if (TARGET_SHMEDIA)
10730 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10731 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10732 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10733 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10734 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10735 rotated 10 right, and higher 16 bit of every 32 selected. */
10736 rtx movishori
10737 = force_reg (V2HImode, (simplify_gen_subreg
10738 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10739 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10740 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10742 fnaddr = force_reg (SImode, fnaddr);
10743 cxt = force_reg (SImode, cxt);
10744 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10745 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10746 movishori));
10747 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10748 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10749 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10750 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10751 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10752 gen_rtx_SUBREG (V2HImode, cxt, 0),
10753 movishori));
10754 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10755 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10756 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10757 if (TARGET_LITTLE_ENDIAN)
10759 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10760 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10762 else
10764 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10765 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10767 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10768 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10769 emit_insn (gen_ic_invalidate_line (tramp));
10770 return;
10772 else if (TARGET_SHCOMPACT)
10774 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10775 return;
10777 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10778 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10779 SImode));
10780 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10781 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10782 SImode));
10783 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10784 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10785 if (TARGET_HARVARD)
10787 if (!TARGET_INLINE_IC_INVALIDATE
10788 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10789 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10790 FUNCTION_ORDINARY),
10791 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10792 else
10793 emit_insn (gen_ic_invalidate_line (tramp));
10797 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10799 static rtx
10800 sh_trampoline_adjust_address (rtx tramp)
10802 if (TARGET_SHMEDIA)
10803 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10804 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10805 return tramp;
10808 /* FIXME: This is overly conservative. A SHcompact function that
10809 receives arguments ``by reference'' will have them stored in its
10810 own stack frame, so it must not pass pointers or references to
10811 these arguments to other functions by means of sibling calls. */
10812 /* If PIC, we cannot make sibling calls to global functions
10813 because the PLT requires r12 to be live. */
10814 static bool
10815 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10817 return (1
10818 && (! TARGET_SHCOMPACT
10819 || crtl->args.info.stack_regs == 0)
10820 && ! sh_cfun_interrupt_handler_p ()
10821 && (! flag_pic
10822 || (decl && ! TREE_PUBLIC (decl))
10823 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10826 /* Machine specific built-in functions. */
10828 struct builtin_description
10830 const enum insn_code icode;
10831 const char *const name;
10832 int signature;
10833 tree fndecl;
10836 /* describe number and signedness of arguments; arg[0] == result
10837 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10838 /* 9: 64-bit pointer, 10: 32-bit pointer */
10839 static const char signature_args[][4] =
10841 #define SH_BLTIN_V2SI2 0
10842 { 4, 4 },
10843 #define SH_BLTIN_V4HI2 1
10844 { 4, 4 },
10845 #define SH_BLTIN_V2SI3 2
10846 { 4, 4, 4 },
10847 #define SH_BLTIN_V4HI3 3
10848 { 4, 4, 4 },
10849 #define SH_BLTIN_V8QI3 4
10850 { 4, 4, 4 },
10851 #define SH_BLTIN_MAC_HISI 5
10852 { 1, 4, 4, 1 },
10853 #define SH_BLTIN_SH_HI 6
10854 { 4, 4, 1 },
10855 #define SH_BLTIN_SH_SI 7
10856 { 4, 4, 1 },
10857 #define SH_BLTIN_V4HI2V2SI 8
10858 { 4, 4, 4 },
10859 #define SH_BLTIN_V4HI2V8QI 9
10860 { 4, 4, 4 },
10861 #define SH_BLTIN_SISF 10
10862 { 4, 2 },
10863 #define SH_BLTIN_LDUA_L 11
10864 { 2, 10 },
10865 #define SH_BLTIN_LDUA_Q 12
10866 { 1, 10 },
10867 #define SH_BLTIN_STUA_L 13
10868 { 0, 10, 2 },
10869 #define SH_BLTIN_STUA_Q 14
10870 { 0, 10, 1 },
10871 #define SH_BLTIN_LDUA_L64 15
10872 { 2, 9 },
10873 #define SH_BLTIN_LDUA_Q64 16
10874 { 1, 9 },
10875 #define SH_BLTIN_STUA_L64 17
10876 { 0, 9, 2 },
10877 #define SH_BLTIN_STUA_Q64 18
10878 { 0, 9, 1 },
10879 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10880 #define SH_BLTIN_2 19
10881 #define SH_BLTIN_SU 19
10882 { 1, 2 },
10883 #define SH_BLTIN_3 20
10884 #define SH_BLTIN_SUS 20
10885 { 2, 2, 1 },
10886 #define SH_BLTIN_PSSV 21
10887 { 0, 8, 2, 2 },
10888 #define SH_BLTIN_XXUU 22
10889 #define SH_BLTIN_UUUU 22
10890 { 1, 1, 1, 1 },
10891 #define SH_BLTIN_PV 23
10892 { 0, 8 },
10894 /* mcmv: operands considered unsigned. */
10895 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10896 /* mperm: control value considered unsigned int. */
10897 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10898 /* mshards_q: returns signed short. */
10899 /* nsb: takes long long arg, returns unsigned char. */
10900 static struct builtin_description bdesc[] =
10902 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10903 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10904 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10905 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10906 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10907 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10908 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10909 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10910 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10911 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10912 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10913 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10914 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10915 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10916 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10917 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10918 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10919 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10920 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10921 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10922 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10923 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10924 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10925 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10926 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10927 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10928 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10929 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10930 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10931 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10932 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10933 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10934 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10935 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10936 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10937 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10938 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10939 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10940 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10941 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10942 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10943 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10944 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10945 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10946 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10947 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10948 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10949 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10950 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10951 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10952 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10953 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10954 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10955 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10956 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10957 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10958 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10959 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10960 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10961 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10962 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10963 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10964 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10965 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10966 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10967 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10968 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10969 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10970 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10971 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10972 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10973 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10974 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10975 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10976 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10977 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10978 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10979 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10980 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10981 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10982 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10983 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10984 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10985 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10988 static void
10989 sh_media_init_builtins (void)
10991 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10992 struct builtin_description *d;
10994 memset (shared, 0, sizeof shared);
10995 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10997 tree type, arg_type = 0;
10998 int signature = d->signature;
10999 int i;
11001 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11002 type = shared[signature];
11003 else
11005 int has_result = signature_args[signature][0] != 0;
11007 if ((signature_args[signature][1] & 8)
11008 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11009 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11010 continue;
11011 if (! TARGET_FPU_ANY
11012 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11013 continue;
11014 type = void_list_node;
11015 for (i = 3; ; i--)
11017 int arg = signature_args[signature][i];
11018 int opno = i - 1 + has_result;
11020 if (arg & 8)
11021 arg_type = ptr_type_node;
11022 else if (arg)
11023 arg_type = (*lang_hooks.types.type_for_mode)
11024 (insn_data[d->icode].operand[opno].mode,
11025 (arg & 1));
11026 else if (i)
11027 continue;
11028 else
11029 arg_type = void_type_node;
11030 if (i == 0)
11031 break;
11032 type = tree_cons (NULL_TREE, arg_type, type);
11034 type = build_function_type (arg_type, type);
11035 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11036 shared[signature] = type;
11038 d->fndecl =
11039 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11040 NULL, NULL_TREE);
11044 /* Returns the shmedia builtin decl for CODE. */
11046 static tree
11047 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11049 if (code >= ARRAY_SIZE (bdesc))
11050 return error_mark_node;
11052 return bdesc[code].fndecl;
11055 /* Implements target hook vector_mode_supported_p. */
11056 bool
11057 sh_vector_mode_supported_p (enum machine_mode mode)
11059 if (TARGET_FPU_ANY
11060 && ((mode == V2SFmode)
11061 || (mode == V4SFmode)
11062 || (mode == V16SFmode)))
11063 return true;
11065 else if (TARGET_SHMEDIA
11066 && ((mode == V8QImode)
11067 || (mode == V2HImode)
11068 || (mode == V4HImode)
11069 || (mode == V2SImode)))
11070 return true;
11072 return false;
11075 bool
11076 sh_frame_pointer_required (void)
11078 /* If needed override this in other tm.h files to cope with various OS
11079 lossage requiring a frame pointer. */
11080 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11081 return true;
11083 if (crtl->profile)
11084 return true;
11086 return false;
11089 /* Implements target hook dwarf_calling_convention. Return an enum
11090 of dwarf_calling_convention. */
11092 sh_dwarf_calling_convention (const_tree func)
11094 if (sh_attr_renesas_p (func))
11095 return DW_CC_GNU_renesas_sh;
11097 return DW_CC_normal;
11100 static void
11101 sh_init_builtins (void)
11103 if (TARGET_SHMEDIA)
11104 sh_media_init_builtins ();
11107 /* Returns the sh builtin decl for CODE. */
11109 static tree
11110 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11112 if (TARGET_SHMEDIA)
11113 return sh_media_builtin_decl (code, initialize_p);
11115 return error_mark_node;
11118 /* Expand an expression EXP that calls a built-in function,
11119 with result going to TARGET if that's convenient
11120 (and in mode MODE if that's convenient).
11121 SUBTARGET may be used as the target for computing one of EXP's operands.
11122 IGNORE is nonzero if the value is to be ignored. */
11124 static rtx
11125 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11126 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11128 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11129 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11130 const struct builtin_description *d = &bdesc[fcode];
11131 enum insn_code icode = d->icode;
11132 int signature = d->signature;
11133 enum machine_mode tmode = VOIDmode;
11134 int nop = 0, i;
11135 rtx op[4];
11136 rtx pat = 0;
11138 if (signature_args[signature][0])
11140 if (ignore)
11141 return 0;
11143 tmode = insn_data[icode].operand[0].mode;
11144 if (! target
11145 || GET_MODE (target) != tmode
11146 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11147 target = gen_reg_rtx (tmode);
11148 op[nop++] = target;
11150 else
11151 target = 0;
11153 for (i = 1; i <= 3; i++, nop++)
11155 tree arg;
11156 enum machine_mode opmode, argmode;
11157 tree optype;
11159 if (! signature_args[signature][i])
11160 break;
11161 arg = CALL_EXPR_ARG (exp, i - 1);
11162 if (arg == error_mark_node)
11163 return const0_rtx;
11164 if (signature_args[signature][i] & 8)
11166 opmode = ptr_mode;
11167 optype = ptr_type_node;
11169 else
11171 opmode = insn_data[icode].operand[nop].mode;
11172 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11174 argmode = TYPE_MODE (TREE_TYPE (arg));
11175 if (argmode != opmode)
11176 arg = build1 (NOP_EXPR, optype, arg);
11177 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11178 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11179 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11182 switch (nop)
11184 case 1:
11185 pat = (*insn_data[d->icode].genfun) (op[0]);
11186 break;
11187 case 2:
11188 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11189 break;
11190 case 3:
11191 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11192 break;
11193 case 4:
11194 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11195 break;
11196 default:
11197 gcc_unreachable ();
11199 if (! pat)
11200 return 0;
11201 emit_insn (pat);
11202 return target;
11205 void
11206 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11208 rtx sel0 = const0_rtx;
11209 rtx sel1 = const1_rtx;
11210 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11211 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11213 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11214 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11217 void
11218 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11220 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11222 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11223 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11226 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11227 We can allow any mode in any general register. The special registers
11228 only allow SImode. Don't allow any mode in the PR.
11230 We cannot hold DCmode values in the XD registers because alter_reg
11231 handles subregs of them incorrectly. We could work around this by
11232 spacing the XD registers like the DR registers, but this would require
11233 additional memory in every compilation to hold larger register vectors.
11234 We could hold SFmode / SCmode values in XD registers, but that
11235 would require a tertiary reload when reloading from / to memory,
11236 and a secondary reload to reload from / to general regs; that
11237 seems to be a loosing proposition.
11239 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11240 it won't be ferried through GP registers first. */
11242 bool
11243 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11245 if (SPECIAL_REGISTER_P (regno))
11246 return mode == SImode;
11248 if (regno == FPUL_REG)
11249 return (mode == SImode || mode == SFmode);
11251 if (FP_REGISTER_P (regno) && mode == SFmode)
11252 return true;
11254 if (mode == V2SFmode)
11256 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11257 || GENERAL_REGISTER_P (regno)))
11258 return true;
11259 else
11260 return false;
11263 if (mode == V4SFmode)
11265 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11266 || GENERAL_REGISTER_P (regno))
11267 return true;
11268 else
11269 return false;
11272 if (mode == V16SFmode)
11274 if (TARGET_SHMEDIA)
11276 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11277 return true;
11278 else
11279 return false;
11281 else
11282 return regno == FIRST_XD_REG;
11285 if (FP_REGISTER_P (regno))
11287 if (mode == SFmode
11288 || mode == SImode
11289 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11290 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11291 || mode == DCmode
11292 || (TARGET_SHMEDIA
11293 && (mode == DFmode || mode == DImode
11294 || mode == V2SFmode || mode == TImode)))
11295 && ((regno - FIRST_FP_REG) & 1) == 0)
11296 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11297 && ((regno - FIRST_FP_REG) & 3) == 0))
11298 return true;
11299 else
11300 return false;
11303 if (XD_REGISTER_P (regno))
11304 return mode == DFmode;
11306 if (TARGET_REGISTER_P (regno))
11307 return (mode == DImode || mode == SImode || mode == PDImode);
11309 if (regno == PR_REG)
11310 return mode == SImode;
11312 if (regno == FPSCR_REG)
11313 return mode == PSImode;
11315 /* FIXME. This works around PR target/37633 for -O0. */
11316 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11318 unsigned int n = GET_MODE_SIZE (mode) / 8;
11320 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11321 && regno <= FIRST_GENERAL_REG + 14)
11322 return false;
11325 return true;
11328 /* Return the class of registers for which a mode change from FROM to TO
11329 is invalid. */
11330 bool
11331 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11332 enum reg_class rclass)
11334 /* We want to enable the use of SUBREGs as a means to
11335 VEC_SELECT a single element of a vector. */
11336 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11337 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11339 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11341 if (TARGET_LITTLE_ENDIAN)
11343 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11344 return reg_classes_intersect_p (DF_REGS, rclass);
11346 else
11348 if (GET_MODE_SIZE (from) < 8)
11349 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11352 return 0;
11355 /* Return true if registers in machine mode MODE will likely be
11356 allocated to registers in small register classes. */
11358 bool
11359 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11361 return (! TARGET_SHMEDIA);
11364 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11365 that label is used. */
11367 void
11368 sh_mark_label (rtx address, int nuses)
11370 if (GOTOFF_P (address))
11372 /* Extract the label or symbol. */
11373 address = XEXP (address, 0);
11374 if (GET_CODE (address) == PLUS)
11375 address = XEXP (address, 0);
11376 address = XVECEXP (address, 0, 0);
11378 if (GET_CODE (address) == LABEL_REF
11379 && LABEL_P (XEXP (address, 0)))
11380 LABEL_NUSES (XEXP (address, 0)) += nuses;
11383 /* Compute extra cost of moving data between one register class
11384 and another. */
11386 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11387 uses this information. Hence, the general register <-> floating point
11388 register information here is not used for SFmode. */
11391 sh_register_move_cost (enum machine_mode mode,
11392 enum reg_class srcclass, enum reg_class dstclass)
11394 if (dstclass == T_REGS || dstclass == PR_REGS)
11395 return 10;
11397 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11398 return 4;
11400 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11401 && REGCLASS_HAS_FP_REG (srcclass)
11402 && REGCLASS_HAS_FP_REG (dstclass))
11403 return 4;
11405 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11406 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11408 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11409 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11410 return 9;
11412 if ((REGCLASS_HAS_FP_REG (dstclass)
11413 && REGCLASS_HAS_GENERAL_REG (srcclass))
11414 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11415 && REGCLASS_HAS_FP_REG (srcclass)))
11416 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11417 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11419 if ((dstclass == FPUL_REGS
11420 && REGCLASS_HAS_GENERAL_REG (srcclass))
11421 || (srcclass == FPUL_REGS
11422 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11423 return 5;
11425 if ((dstclass == FPUL_REGS
11426 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11427 || (srcclass == FPUL_REGS
11428 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11429 return 7;
11431 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11432 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11433 return 20;
11435 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11436 if (TARGET_SHMEDIA
11437 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11439 if (sh_gettrcost >= 0)
11440 return sh_gettrcost;
11441 else if (!TARGET_PT_FIXED)
11442 return 100;
11445 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11446 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11447 return 4;
11449 if (TARGET_SHMEDIA
11450 || (TARGET_FMOVD
11451 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11452 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11453 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11455 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11458 static rtx emit_load_ptr (rtx, rtx);
11460 static rtx
11461 emit_load_ptr (rtx reg, rtx addr)
11463 rtx mem = gen_const_mem (ptr_mode, addr);
11465 if (Pmode != ptr_mode)
11466 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11467 return emit_move_insn (reg, mem);
11470 static void
11471 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11472 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11473 tree function)
11475 CUMULATIVE_ARGS cum;
11476 int structure_value_byref = 0;
11477 rtx this_rtx, this_value, sibcall, insns, funexp;
11478 tree funtype = TREE_TYPE (function);
11479 int simple_add = CONST_OK_FOR_ADD (delta);
11480 int did_load = 0;
11481 rtx scratch0, scratch1, scratch2;
11482 unsigned i;
11484 reload_completed = 1;
11485 epilogue_completed = 1;
11486 current_function_uses_only_leaf_regs = 1;
11488 emit_note (NOTE_INSN_PROLOGUE_END);
11490 /* Find the "this" pointer. We have such a wide range of ABIs for the
11491 SH that it's best to do this completely machine independently.
11492 "this" is passed as first argument, unless a structure return pointer
11493 comes first, in which case "this" comes second. */
11494 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11495 #ifndef PCC_STATIC_STRUCT_RETURN
11496 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11497 structure_value_byref = 1;
11498 #endif /* not PCC_STATIC_STRUCT_RETURN */
11499 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11501 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11503 sh_function_arg_advance (&cum, Pmode, ptype, true);
11505 this_rtx = sh_function_arg (&cum, Pmode, ptr_type_node, true);
11507 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11508 static chain pointer (even if you can't have nested virtual functions
11509 right now, someone might implement them sometime), and the rest of the
11510 registers are used for argument passing, are callee-saved, or reserved. */
11511 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11512 -ffixed-reg has been used. */
11513 if (! call_used_regs[0] || fixed_regs[0])
11514 error ("r0 needs to be available as a call-clobbered register");
11515 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11516 if (! TARGET_SH5)
11518 if (call_used_regs[1] && ! fixed_regs[1])
11519 scratch1 = gen_rtx_REG (ptr_mode, 1);
11520 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11521 pointing where to return struct values. */
11522 if (call_used_regs[3] && ! fixed_regs[3])
11523 scratch2 = gen_rtx_REG (Pmode, 3);
11525 else if (TARGET_SHMEDIA)
11527 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11528 if (i != REGNO (scratch0) &&
11529 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11531 scratch1 = gen_rtx_REG (ptr_mode, i);
11532 break;
11534 if (scratch1 == scratch0)
11535 error ("Need a second call-clobbered general purpose register");
11536 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11537 if (call_used_regs[i] && ! fixed_regs[i])
11539 scratch2 = gen_rtx_REG (Pmode, i);
11540 break;
11542 if (scratch2 == scratch0)
11543 error ("Need a call-clobbered target register");
11546 this_value = plus_constant (this_rtx, delta);
11547 if (vcall_offset
11548 && (simple_add || scratch0 != scratch1)
11549 && strict_memory_address_p (ptr_mode, this_value))
11551 emit_load_ptr (scratch0, this_value);
11552 did_load = 1;
11555 if (!delta)
11556 ; /* Do nothing. */
11557 else if (simple_add)
11558 emit_move_insn (this_rtx, this_value);
11559 else
11561 emit_move_insn (scratch1, GEN_INT (delta));
11562 emit_insn (gen_add2_insn (this_rtx, scratch1));
11565 if (vcall_offset)
11567 rtx offset_addr;
11569 if (!did_load)
11570 emit_load_ptr (scratch0, this_rtx);
11572 offset_addr = plus_constant (scratch0, vcall_offset);
11573 if (strict_memory_address_p (ptr_mode, offset_addr))
11574 ; /* Do nothing. */
11575 else if (! TARGET_SH5 && scratch0 != scratch1)
11577 /* scratch0 != scratch1, and we have indexed loads. Get better
11578 schedule by loading the offset into r1 and using an indexed
11579 load - then the load of r1 can issue before the load from
11580 (this_rtx + delta) finishes. */
11581 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11582 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11584 else if (CONST_OK_FOR_ADD (vcall_offset))
11586 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11587 offset_addr = scratch0;
11589 else if (scratch0 != scratch1)
11591 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11592 emit_insn (gen_add2_insn (scratch0, scratch1));
11593 offset_addr = scratch0;
11595 else
11596 gcc_unreachable (); /* FIXME */
11597 emit_load_ptr (scratch0, offset_addr);
11599 if (Pmode != ptr_mode)
11600 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11601 emit_insn (gen_add2_insn (this_rtx, scratch0));
11604 /* Generate a tail call to the target function. */
11605 if (! TREE_USED (function))
11607 assemble_external (function);
11608 TREE_USED (function) = 1;
11610 funexp = XEXP (DECL_RTL (function), 0);
11611 /* If the function is overridden, so is the thunk, hence we don't
11612 need GOT addressing even if this is a public symbol. */
11613 #if 0
11614 if (TARGET_SH1 && ! flag_weak)
11615 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11616 else
11617 #endif
11618 if (TARGET_SH2 && flag_pic)
11620 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11621 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11623 else
11625 if (TARGET_SHMEDIA && flag_pic)
11627 funexp = gen_sym2PIC (funexp);
11628 PUT_MODE (funexp, Pmode);
11630 emit_move_insn (scratch2, funexp);
11631 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11632 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11634 sibcall = emit_call_insn (sibcall);
11635 SIBLING_CALL_P (sibcall) = 1;
11636 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11637 emit_barrier ();
11639 /* Run just enough of rest_of_compilation to do scheduling and get
11640 the insns emitted. Note that use_thunk calls
11641 assemble_start_function and assemble_end_function. */
11643 insn_locators_alloc ();
11644 insns = get_insns ();
11646 if (optimize > 0)
11648 if (! cfun->cfg)
11649 init_flow (cfun);
11650 split_all_insns_noflow ();
11653 sh_reorg ();
11655 if (optimize > 0 && flag_delayed_branch)
11656 dbr_schedule (insns);
11658 shorten_branches (insns);
11659 final_start_function (insns, file, 1);
11660 final (insns, file, 1);
11661 final_end_function ();
11663 reload_completed = 0;
11664 epilogue_completed = 0;
11668 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11670 rtx sym;
11672 /* If this is not an ordinary function, the name usually comes from a
11673 string literal or an sprintf buffer. Make sure we use the same
11674 string consistently, so that cse will be able to unify address loads. */
11675 if (kind != FUNCTION_ORDINARY)
11676 name = IDENTIFIER_POINTER (get_identifier (name));
11677 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11678 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11679 if (flag_pic)
11680 switch (kind)
11682 case FUNCTION_ORDINARY:
11683 break;
11684 case SFUNC_GOT:
11686 rtx reg = target ? target : gen_reg_rtx (Pmode);
11688 emit_insn (gen_symGOT2reg (reg, sym));
11689 sym = reg;
11690 break;
11692 case SFUNC_STATIC:
11694 /* ??? To allow cse to work, we use GOTOFF relocations.
11695 we could add combiner patterns to transform this into
11696 straight pc-relative calls with sym2PIC / bsrf when
11697 label load and function call are still 1:1 and in the
11698 same basic block during combine. */
11699 rtx reg = target ? target : gen_reg_rtx (Pmode);
11701 emit_insn (gen_symGOTOFF2reg (reg, sym));
11702 sym = reg;
11703 break;
11706 if (target && sym != target)
11708 emit_move_insn (target, sym);
11709 return target;
11711 return sym;
11714 /* Find the number of a general purpose register in S. */
11715 static int
11716 scavenge_reg (HARD_REG_SET *s)
11718 int r;
11719 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11720 if (TEST_HARD_REG_BIT (*s, r))
11721 return r;
11722 return -1;
11726 sh_get_pr_initial_val (void)
11728 rtx val;
11730 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11731 PR register on SHcompact, because it might be clobbered by the prologue.
11732 We check first if that is known to be the case. */
11733 if (TARGET_SHCOMPACT
11734 && ((crtl->args.info.call_cookie
11735 & ~ CALL_COOKIE_RET_TRAMP (1))
11736 || crtl->saves_all_registers))
11737 return gen_frame_mem (SImode, return_address_pointer_rtx);
11739 /* If we haven't finished rtl generation, there might be a nonlocal label
11740 that we haven't seen yet.
11741 ??? get_hard_reg_initial_val fails if it is called after register
11742 allocation has started, unless it has been called before for the
11743 same register. And even then, we end in trouble if we didn't use
11744 the register in the same basic block before. So call
11745 get_hard_reg_initial_val now and wrap it in an unspec if we might
11746 need to replace it. */
11747 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11748 combine can put the pseudo returned by get_hard_reg_initial_val into
11749 instructions that need a general purpose registers, which will fail to
11750 be recognized when the pseudo becomes allocated to PR. */
11752 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11753 if (TARGET_SH1)
11754 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11755 return val;
11759 sh_expand_t_scc (rtx operands[])
11761 enum rtx_code code = GET_CODE (operands[1]);
11762 rtx target = operands[0];
11763 rtx op0 = operands[2];
11764 rtx op1 = operands[3];
11765 rtx result = target;
11766 HOST_WIDE_INT val;
11768 if (!REG_P (op0) || REGNO (op0) != T_REG
11769 || !CONST_INT_P (op1))
11770 return 0;
11771 if (!REG_P (result))
11772 result = gen_reg_rtx (SImode);
11773 val = INTVAL (op1);
11774 if ((code == EQ && val == 1) || (code == NE && val == 0))
11775 emit_insn (gen_movt (result));
11776 else if (TARGET_SH2A && ((code == EQ && val == 0)
11777 || (code == NE && val == 1)))
11778 emit_insn (gen_xorsi3_movrt (result));
11779 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11781 emit_clobber (result);
11782 emit_insn (gen_subc (result, result, result));
11783 emit_insn (gen_addsi3 (result, result, const1_rtx));
11785 else if (code == EQ || code == NE)
11786 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11787 else
11788 return 0;
11789 if (result != target)
11790 emit_move_insn (target, result);
11791 return 1;
11794 /* INSN is an sfunc; return the rtx that describes the address used. */
11795 static rtx
11796 extract_sfunc_addr (rtx insn)
11798 rtx pattern, part = NULL_RTX;
11799 int len, i;
11801 pattern = PATTERN (insn);
11802 len = XVECLEN (pattern, 0);
11803 for (i = 0; i < len; i++)
11805 part = XVECEXP (pattern, 0, i);
11806 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11807 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11808 return XEXP (part, 0);
11810 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11811 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11814 /* Verify that the register in use_sfunc_addr still agrees with the address
11815 used in the sfunc. This prevents fill_slots_from_thread from changing
11816 use_sfunc_addr.
11817 INSN is the use_sfunc_addr instruction, and REG is the register it
11818 guards. */
11820 check_use_sfunc_addr (rtx insn, rtx reg)
11822 /* Search for the sfunc. It should really come right after INSN. */
11823 while ((insn = NEXT_INSN (insn)))
11825 if (LABEL_P (insn) || JUMP_P (insn))
11826 break;
11827 if (! INSN_P (insn))
11828 continue;
11830 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11831 insn = XVECEXP (PATTERN (insn), 0, 0);
11832 if (GET_CODE (PATTERN (insn)) != PARALLEL
11833 || get_attr_type (insn) != TYPE_SFUNC)
11834 continue;
11835 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11837 gcc_unreachable ();
11840 /* This function returns a constant rtx that represents pi / 2**15 in
11841 SFmode. it's used to scale SFmode angles, in radians, to a
11842 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11843 maps to 0x10000). */
11845 static GTY(()) rtx sh_fsca_sf2int_rtx;
11848 sh_fsca_sf2int (void)
11850 if (! sh_fsca_sf2int_rtx)
11852 REAL_VALUE_TYPE rv;
11854 real_from_string (&rv, "10430.378350470453");
11855 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11858 return sh_fsca_sf2int_rtx;
11861 /* This function returns a constant rtx that represents pi / 2**15 in
11862 DFmode. it's used to scale DFmode angles, in radians, to a
11863 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11864 maps to 0x10000). */
11866 static GTY(()) rtx sh_fsca_df2int_rtx;
11869 sh_fsca_df2int (void)
11871 if (! sh_fsca_df2int_rtx)
11873 REAL_VALUE_TYPE rv;
11875 real_from_string (&rv, "10430.378350470453");
11876 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11879 return sh_fsca_df2int_rtx;
11882 /* This function returns a constant rtx that represents 2**15 / pi in
11883 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11884 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11885 2*pi). */
11887 static GTY(()) rtx sh_fsca_int2sf_rtx;
11890 sh_fsca_int2sf (void)
11892 if (! sh_fsca_int2sf_rtx)
11894 REAL_VALUE_TYPE rv;
11896 real_from_string (&rv, "9.587379924285257e-5");
11897 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11900 return sh_fsca_int2sf_rtx;
11903 /* Initialize the CUMULATIVE_ARGS structure. */
11905 void
11906 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11907 tree fntype,
11908 rtx libname ATTRIBUTE_UNUSED,
11909 tree fndecl,
11910 signed int n_named_args,
11911 enum machine_mode mode)
11913 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11914 pcum->free_single_fp_reg = 0;
11915 pcum->stack_regs = 0;
11916 pcum->byref_regs = 0;
11917 pcum->byref = 0;
11918 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11920 /* XXX - Should we check TARGET_HITACHI here ??? */
11921 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11923 if (fntype)
11925 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11926 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11927 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11928 pcum->arg_count [(int) SH_ARG_INT]
11929 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11931 pcum->call_cookie
11932 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11933 && pcum->arg_count [(int) SH_ARG_INT] == 0
11934 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11935 ? int_size_in_bytes (TREE_TYPE (fntype))
11936 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11937 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11938 == FIRST_RET_REG));
11940 else
11942 pcum->arg_count [(int) SH_ARG_INT] = 0;
11943 pcum->prototype_p = FALSE;
11944 if (mode != VOIDmode)
11946 pcum->call_cookie =
11947 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11948 && GET_MODE_SIZE (mode) > 4
11949 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11951 /* If the default ABI is the Renesas ABI then all library
11952 calls must assume that the library will be using the
11953 Renesas ABI. So if the function would return its result
11954 in memory then we must force the address of this memory
11955 block onto the stack. Ideally we would like to call
11956 targetm.calls.return_in_memory() here but we do not have
11957 the TYPE or the FNDECL available so we synthesize the
11958 contents of that function as best we can. */
11959 pcum->force_mem =
11960 (TARGET_DEFAULT & MASK_HITACHI)
11961 && (mode == BLKmode
11962 || (GET_MODE_SIZE (mode) > 4
11963 && !(mode == DFmode
11964 && TARGET_FPU_DOUBLE)));
11966 else
11968 pcum->call_cookie = 0;
11969 pcum->force_mem = FALSE;
11974 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11975 not enter into CONST_DOUBLE for the replace.
11977 Note that copying is not done so X must not be shared unless all copies
11978 are to be modified.
11980 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11981 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11982 replacements[n*2+1] - and that we take mode changes into account.
11984 If a replacement is ambiguous, return NULL_RTX.
11986 If MODIFY is zero, don't modify any rtl in place,
11987 just return zero or nonzero for failure / success. */
11990 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11992 int i, j;
11993 const char *fmt;
11995 /* The following prevents loops occurrence when we change MEM in
11996 CONST_DOUBLE onto the same CONST_DOUBLE. */
11997 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11998 return x;
12000 for (i = n_replacements - 1; i >= 0 ; i--)
12001 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12002 return replacements[i*2+1];
12004 /* Allow this function to make replacements in EXPR_LISTs. */
12005 if (x == 0)
12006 return 0;
12008 if (GET_CODE (x) == SUBREG)
12010 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12011 n_replacements, modify);
12013 if (CONST_INT_P (new_rtx))
12015 x = simplify_subreg (GET_MODE (x), new_rtx,
12016 GET_MODE (SUBREG_REG (x)),
12017 SUBREG_BYTE (x));
12018 if (! x)
12019 abort ();
12021 else if (modify)
12022 SUBREG_REG (x) = new_rtx;
12024 return x;
12026 else if (REG_P (x))
12028 unsigned regno = REGNO (x);
12029 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12030 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12031 rtx result = NULL_RTX;
12033 for (i = n_replacements - 1; i >= 0; i--)
12035 rtx from = replacements[i*2];
12036 rtx to = replacements[i*2+1];
12037 unsigned from_regno, from_nregs, to_regno, new_regno;
12039 if (!REG_P (from))
12040 continue;
12041 from_regno = REGNO (from);
12042 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12043 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12044 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12046 if (regno < from_regno
12047 || regno + nregs > from_regno + nregs
12048 || !REG_P (to)
12049 || result)
12050 return NULL_RTX;
12051 to_regno = REGNO (to);
12052 if (to_regno < FIRST_PSEUDO_REGISTER)
12054 new_regno = regno + to_regno - from_regno;
12055 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12056 != nregs)
12057 return NULL_RTX;
12058 result = gen_rtx_REG (GET_MODE (x), new_regno);
12060 else if (GET_MODE (x) <= GET_MODE (to))
12061 result = gen_lowpart_common (GET_MODE (x), to);
12062 else
12063 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12066 return result ? result : x;
12068 else if (GET_CODE (x) == ZERO_EXTEND)
12070 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12071 n_replacements, modify);
12073 if (CONST_INT_P (new_rtx))
12075 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12076 new_rtx, GET_MODE (XEXP (x, 0)));
12077 if (! x)
12078 abort ();
12080 else if (modify)
12081 XEXP (x, 0) = new_rtx;
12083 return x;
12086 fmt = GET_RTX_FORMAT (GET_CODE (x));
12087 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12089 rtx new_rtx;
12091 if (fmt[i] == 'e')
12093 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12094 n_replacements, modify);
12095 if (!new_rtx)
12096 return NULL_RTX;
12097 if (modify)
12098 XEXP (x, i) = new_rtx;
12100 else if (fmt[i] == 'E')
12101 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12103 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12104 n_replacements, modify);
12105 if (!new_rtx)
12106 return NULL_RTX;
12107 if (modify)
12108 XVECEXP (x, i, j) = new_rtx;
12112 return x;
12116 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12118 enum rtx_code code = TRUNCATE;
12120 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12122 rtx inner = XEXP (x, 0);
12123 enum machine_mode inner_mode = GET_MODE (inner);
12125 if (inner_mode == mode)
12126 return inner;
12127 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12128 x = inner;
12129 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12130 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12132 code = GET_CODE (x);
12133 x = inner;
12136 return gen_rtx_fmt_e (code, mode, x);
12139 /* called via for_each_rtx after reload, to clean up truncates of
12140 registers that span multiple actual hard registers. */
12142 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12144 rtx x = *p, reg;
12146 if (GET_CODE (x) != TRUNCATE)
12147 return 0;
12148 reg = XEXP (x, 0);
12149 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12151 enum machine_mode reg_mode = GET_MODE (reg);
12152 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12153 subreg_lowpart_offset (DImode, reg_mode));
12154 *(int*) n_changes += 1;
12155 return -1;
12157 return 0;
12160 /* Load and store depend on the highpart of the address. However,
12161 set_attr_alternative does not give well-defined results before reload,
12162 so we must look at the rtl ourselves to see if any of the feeding
12163 registers is used in a memref. */
12165 /* Called by sh_contains_memref_p via for_each_rtx. */
12166 static int
12167 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12169 return (MEM_P (*loc));
12172 /* Return nonzero iff INSN contains a MEM. */
12174 sh_contains_memref_p (rtx insn)
12176 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12179 /* Return nonzero iff INSN loads a banked register. */
12181 sh_loads_bankedreg_p (rtx insn)
12183 if (GET_CODE (PATTERN (insn)) == SET)
12185 rtx op = SET_DEST (PATTERN(insn));
12186 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12187 return 1;
12190 return 0;
12193 /* FNADDR is the MEM expression from a call expander. Return an address
12194 to use in an SHmedia insn pattern. */
12196 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12198 int is_sym;
12200 fnaddr = XEXP (fnaddr, 0);
12201 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12202 if (flag_pic && is_sym)
12204 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12206 rtx reg = gen_reg_rtx (Pmode);
12208 /* We must not use GOTPLT for sibcalls, because PIC_REG
12209 must be restored before the PLT code gets to run. */
12210 if (is_sibcall)
12211 emit_insn (gen_symGOT2reg (reg, fnaddr));
12212 else
12213 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12214 fnaddr = reg;
12216 else
12218 fnaddr = gen_sym2PIC (fnaddr);
12219 PUT_MODE (fnaddr, Pmode);
12222 /* If ptabs might trap, make this visible to the rest of the compiler.
12223 We generally assume that symbols pertain to valid locations, but
12224 it is possible to generate invalid symbols with asm or linker tricks.
12225 In a list of functions where each returns its successor, an invalid
12226 symbol might denote an empty list. */
12227 if (!TARGET_PT_FIXED
12228 && (!is_sym || TARGET_INVALID_SYMBOLS)
12229 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12231 rtx tr = gen_reg_rtx (PDImode);
12233 emit_insn (gen_ptabs (tr, fnaddr));
12234 fnaddr = tr;
12236 else if (! target_reg_operand (fnaddr, Pmode))
12237 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12238 return fnaddr;
12241 reg_class_t
12242 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12243 enum machine_mode mode, secondary_reload_info *sri)
12245 enum reg_class rclass = (enum reg_class) rclass_i;
12247 if (in_p)
12249 if (REGCLASS_HAS_FP_REG (rclass)
12250 && ! TARGET_SHMEDIA
12251 && immediate_operand ((x), mode)
12252 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12253 && mode == SFmode && fldi_ok ()))
12254 switch (mode)
12256 case SFmode:
12257 sri->icode = CODE_FOR_reload_insf__frn;
12258 return NO_REGS;
12259 case DFmode:
12260 sri->icode = CODE_FOR_reload_indf__frn;
12261 return NO_REGS;
12262 case SImode:
12263 /* ??? If we knew that we are in the appropriate mode -
12264 single precision - we could use a reload pattern directly. */
12265 return FPUL_REGS;
12266 default:
12267 abort ();
12269 if (rclass == FPUL_REGS
12270 && ((REG_P (x)
12271 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12272 || REGNO (x) == T_REG))
12273 || GET_CODE (x) == PLUS))
12274 return GENERAL_REGS;
12275 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12277 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12278 return GENERAL_REGS;
12279 else if (mode == SFmode)
12280 return FP_REGS;
12281 sri->icode = CODE_FOR_reload_insi__i_fpul;
12282 return NO_REGS;
12284 if (rclass == FPSCR_REGS
12285 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12286 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12287 return GENERAL_REGS;
12288 if (REGCLASS_HAS_FP_REG (rclass)
12289 && TARGET_SHMEDIA
12290 && immediate_operand (x, mode)
12291 && x != CONST0_RTX (GET_MODE (x))
12292 && GET_MODE (x) != V4SFmode)
12293 return GENERAL_REGS;
12294 if ((mode == QImode || mode == HImode)
12295 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12297 sri->icode = ((mode == QImode)
12298 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12299 return NO_REGS;
12301 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12302 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12303 return TARGET_REGS;
12304 } /* end of input-only processing. */
12306 if (((REGCLASS_HAS_FP_REG (rclass)
12307 && (REG_P (x)
12308 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12309 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12310 && TARGET_FMOVD))))
12311 || (REGCLASS_HAS_GENERAL_REG (rclass)
12312 && REG_P (x)
12313 && FP_REGISTER_P (REGNO (x))))
12314 && ! TARGET_SHMEDIA
12315 && (mode == SFmode || mode == SImode))
12316 return FPUL_REGS;
12317 if ((rclass == FPUL_REGS
12318 || (REGCLASS_HAS_FP_REG (rclass)
12319 && ! TARGET_SHMEDIA && mode == SImode))
12320 && (MEM_P (x)
12321 || (REG_P (x)
12322 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12323 || REGNO (x) == T_REG
12324 || system_reg_operand (x, VOIDmode)))))
12326 if (rclass == FPUL_REGS)
12327 return GENERAL_REGS;
12328 return FPUL_REGS;
12330 if ((rclass == TARGET_REGS
12331 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12332 && !satisfies_constraint_Csy (x)
12333 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12334 return GENERAL_REGS;
12335 if ((rclass == MAC_REGS || rclass == PR_REGS)
12336 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12337 && rclass != REGNO_REG_CLASS (REGNO (x)))
12338 return GENERAL_REGS;
12339 if (rclass != GENERAL_REGS && REG_P (x)
12340 && TARGET_REGISTER_P (REGNO (x)))
12341 return GENERAL_REGS;
12342 return NO_REGS;
12345 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12347 #include "gt-sh.h"