PR target/50814.
[official-gcc.git] / gcc / config / sh / sh.c
blobb9834fb5c9f2a57dc1ce8e9ca164d0543fee440e
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "recog.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "params.h"
54 #include "ggc.h"
55 #include "gimple.h"
56 #include "cfgloop.h"
57 #include "alloc-pool.h"
58 #include "tm-constrs.h"
59 #include "opts.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
75 they may be. */
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
115 reg number. */
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static void split_branches (rtx);
172 static int branch_dest (rtx);
173 static void force_into (rtx, rtx);
174 static void print_slot (rtx);
175 static rtx add_constant (rtx, enum machine_mode, rtx);
176 static void dump_table (rtx, rtx);
177 static int hi_const (rtx);
178 static int broken_move (rtx);
179 static int mova_p (rtx);
180 static rtx find_barrier (int, rtx, rtx);
181 static int noncall_uses_reg (rtx, rtx, rtx *);
182 static rtx gen_block_redirect (rtx, int, int);
183 static void sh_reorg (void);
184 static void sh_option_override (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static bool sh_frame_pointer_required (void);
193 static rtx mark_constant_pool_use (rtx);
194 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_resbank_handler_attribute (tree *, tree,
196 tree, int, bool *);
197 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
198 tree, int, bool *);
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_print_operand (FILE *, rtx, int);
203 static void sh_print_operand_address (FILE *, rtx);
204 static bool sh_print_operand_punct_valid_p (unsigned char code);
205 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
206 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
207 static void sh_insert_attributes (tree, tree *);
208 static const char *sh_check_pch_target_flags (int);
209 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
210 static int sh_adjust_cost (rtx, rtx, rtx, int);
211 static int sh_issue_rate (void);
212 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
213 static short find_set_regmode_weight (rtx, enum machine_mode);
214 static short find_insn_regmode_weight (rtx, enum machine_mode);
215 static void find_regmode_weight (basic_block, enum machine_mode);
216 static int find_r0_life_regions (basic_block);
217 static void sh_md_init_global (FILE *, int, int);
218 static void sh_md_finish_global (FILE *, int);
219 static int rank_for_reorder (const void *, const void *);
220 static void swap_reorder (rtx *, int);
221 static void ready_reorder (rtx *, int);
222 static short high_pressure (enum machine_mode);
223 static int sh_reorder (FILE *, int, rtx *, int *, int);
224 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
225 static void sh_md_init (FILE *, int, int);
226 static int sh_variable_issue (FILE *, int, rtx, int);
228 static bool sh_function_ok_for_sibcall (tree, tree);
230 static bool sh_cannot_modify_jumps_p (void);
231 static reg_class_t sh_target_reg_class (void);
232 static bool sh_optimize_target_register_callee_saved (bool);
233 static bool sh_ms_bitfield_layout_p (const_tree);
235 static void sh_init_builtins (void);
236 static tree sh_builtin_decl (unsigned, bool);
237 static void sh_media_init_builtins (void);
238 static tree sh_media_builtin_decl (unsigned, bool);
239 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
240 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
241 static void sh_file_start (void);
242 static int flow_dependent_p (rtx, rtx);
243 static void flow_dependent_p_1 (rtx, const_rtx, void *);
244 static int shiftcosts (rtx);
245 static int and_xor_ior_costs (rtx, int);
246 static int addsubcosts (rtx);
247 static int multcosts (rtx);
248 static bool unspec_caller_rtx_p (rtx);
249 static bool sh_cannot_copy_insn_p (rtx);
250 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
251 static int sh_address_cost (rtx, bool);
252 static int sh_pr_n_sets (void);
253 static rtx sh_allocate_initial_value (rtx);
254 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
255 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
256 enum machine_mode,
257 struct secondary_reload_info *);
258 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
259 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
260 static rtx sh_delegitimize_address (rtx);
261 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
262 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
263 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
264 static int scavenge_reg (HARD_REG_SET *s);
265 struct save_schedule_s;
266 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
267 struct save_schedule_s *, int);
269 static rtx sh_struct_value_rtx (tree, int);
270 static rtx sh_function_value (const_tree, const_tree, bool);
271 static bool sh_function_value_regno_p (const unsigned int);
272 static rtx sh_libcall_value (enum machine_mode, const_rtx);
273 static bool sh_return_in_memory (const_tree, const_tree);
274 static rtx sh_builtin_saveregs (void);
275 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode, tree, int *, int);
276 static bool sh_strict_argument_naming (cumulative_args_t);
277 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
278 static tree sh_build_builtin_va_list (void);
279 static void sh_va_start (tree, rtx);
280 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
281 static bool sh_promote_prototypes (const_tree);
282 static enum machine_mode sh_promote_function_mode (const_tree type,
283 enum machine_mode,
284 int *punsignedp,
285 const_tree funtype,
286 int for_return);
287 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
288 const_tree, bool);
289 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
290 const_tree, bool);
291 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
292 tree, bool);
293 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
294 const_tree, bool);
295 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
296 const_tree, bool);
297 static bool sh_scalar_mode_supported_p (enum machine_mode);
298 static int sh_dwarf_calling_convention (const_tree);
299 static void sh_encode_section_info (tree, rtx, int);
300 static int sh2a_function_vector_p (tree);
301 static void sh_trampoline_init (rtx, tree, rtx);
302 static rtx sh_trampoline_adjust_address (rtx);
303 static void sh_conditional_register_usage (void);
304 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
306 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
308 static const struct attribute_spec sh_attribute_table[] =
310 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
311 affects_type_identity } */
312 { "interrupt_handler", 0, 0, true, false, false,
313 sh_handle_interrupt_handler_attribute, false },
314 { "sp_switch", 1, 1, true, false, false,
315 sh_handle_sp_switch_attribute, false },
316 { "trap_exit", 1, 1, true, false, false,
317 sh_handle_trap_exit_attribute, false },
318 { "renesas", 0, 0, false, true, false,
319 sh_handle_renesas_attribute, false },
320 { "trapa_handler", 0, 0, true, false, false,
321 sh_handle_interrupt_handler_attribute, false },
322 { "nosave_low_regs", 0, 0, true, false, false,
323 sh_handle_interrupt_handler_attribute, false },
324 { "resbank", 0, 0, true, false, false,
325 sh_handle_resbank_handler_attribute, false },
326 { "function_vector", 1, 1, true, false, false,
327 sh2a_handle_function_vector_handler_attribute, false },
328 { NULL, 0, 0, false, false, false, NULL, false }
331 /* Initialize the GCC target structure. */
332 #undef TARGET_ATTRIBUTE_TABLE
333 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
335 /* The next two are used for debug info when compiling with -gdwarf. */
336 #undef TARGET_ASM_UNALIGNED_HI_OP
337 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
338 #undef TARGET_ASM_UNALIGNED_SI_OP
339 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
341 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
342 #undef TARGET_ASM_UNALIGNED_DI_OP
343 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
344 #undef TARGET_ASM_ALIGNED_DI_OP
345 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
347 #undef TARGET_OPTION_OVERRIDE
348 #define TARGET_OPTION_OVERRIDE sh_option_override
350 #undef TARGET_PRINT_OPERAND
351 #define TARGET_PRINT_OPERAND sh_print_operand
352 #undef TARGET_PRINT_OPERAND_ADDRESS
353 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
354 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
355 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
356 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
357 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
359 #undef TARGET_ASM_FUNCTION_EPILOGUE
360 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
362 #undef TARGET_ASM_OUTPUT_MI_THUNK
363 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
365 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
366 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
368 #undef TARGET_ASM_FILE_START
369 #define TARGET_ASM_FILE_START sh_file_start
370 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
371 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
373 #undef TARGET_REGISTER_MOVE_COST
374 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
376 #undef TARGET_INSERT_ATTRIBUTES
377 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
379 #undef TARGET_SCHED_ADJUST_COST
380 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
382 #undef TARGET_SCHED_ISSUE_RATE
383 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
385 /* The next 5 hooks have been implemented for reenabling sched1. With the
386 help of these macros we are limiting the movement of insns in sched1 to
387 reduce the register pressure. The overall idea is to keep count of SImode
388 and SFmode regs required by already scheduled insns. When these counts
389 cross some threshold values; give priority to insns that free registers.
390 The insn that frees registers is most likely to be the insn with lowest
391 LUID (original insn order); but such an insn might be there in the stalled
392 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
393 upto a max of 8 cycles so that such insns may move from Q -> R.
395 The description of the hooks are as below:
397 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
398 scheduler; it is called inside the sched_init function just after
399 find_insn_reg_weights function call. It is used to calculate the SImode
400 and SFmode weights of insns of basic blocks; much similar to what
401 find_insn_reg_weights does.
402 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
404 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
405 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
406 (Q)->(R).
408 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
409 high; reorder the ready queue so that the insn with lowest LUID will be
410 issued next.
412 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
413 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
415 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
416 can be returned from TARGET_SCHED_REORDER2.
418 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
420 #undef TARGET_SCHED_DFA_NEW_CYCLE
421 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
423 #undef TARGET_SCHED_INIT_GLOBAL
424 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
426 #undef TARGET_SCHED_FINISH_GLOBAL
427 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
429 #undef TARGET_SCHED_VARIABLE_ISSUE
430 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
432 #undef TARGET_SCHED_REORDER
433 #define TARGET_SCHED_REORDER sh_reorder
435 #undef TARGET_SCHED_REORDER2
436 #define TARGET_SCHED_REORDER2 sh_reorder2
438 #undef TARGET_SCHED_INIT
439 #define TARGET_SCHED_INIT sh_md_init
441 #undef TARGET_DELEGITIMIZE_ADDRESS
442 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
444 #undef TARGET_LEGITIMIZE_ADDRESS
445 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
447 #undef TARGET_CANNOT_MODIFY_JUMPS_P
448 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
449 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
450 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
451 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
452 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
453 sh_optimize_target_register_callee_saved
455 #undef TARGET_MS_BITFIELD_LAYOUT_P
456 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
458 #undef TARGET_INIT_BUILTINS
459 #define TARGET_INIT_BUILTINS sh_init_builtins
460 #undef TARGET_BUILTIN_DECL
461 #define TARGET_BUILTIN_DECL sh_builtin_decl
462 #undef TARGET_EXPAND_BUILTIN
463 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
465 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
466 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
468 #undef TARGET_CANNOT_COPY_INSN_P
469 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
470 #undef TARGET_RTX_COSTS
471 #define TARGET_RTX_COSTS sh_rtx_costs
472 #undef TARGET_ADDRESS_COST
473 #define TARGET_ADDRESS_COST sh_address_cost
474 #undef TARGET_ALLOCATE_INITIAL_VALUE
475 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
477 #undef TARGET_MACHINE_DEPENDENT_REORG
478 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
480 #undef TARGET_DWARF_REGISTER_SPAN
481 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
483 #ifdef HAVE_AS_TLS
484 #undef TARGET_HAVE_TLS
485 #define TARGET_HAVE_TLS true
486 #endif
488 #undef TARGET_PROMOTE_PROTOTYPES
489 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
490 #undef TARGET_PROMOTE_FUNCTION_MODE
491 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
493 #undef TARGET_FUNCTION_VALUE
494 #define TARGET_FUNCTION_VALUE sh_function_value
495 #undef TARGET_FUNCTION_VALUE_REGNO_P
496 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
497 #undef TARGET_LIBCALL_VALUE
498 #define TARGET_LIBCALL_VALUE sh_libcall_value
499 #undef TARGET_STRUCT_VALUE_RTX
500 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
501 #undef TARGET_RETURN_IN_MEMORY
502 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
504 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
505 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
506 #undef TARGET_SETUP_INCOMING_VARARGS
507 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
508 #undef TARGET_STRICT_ARGUMENT_NAMING
509 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
510 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
511 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
512 #undef TARGET_MUST_PASS_IN_STACK
513 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
514 #undef TARGET_PASS_BY_REFERENCE
515 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
516 #undef TARGET_CALLEE_COPIES
517 #define TARGET_CALLEE_COPIES sh_callee_copies
518 #undef TARGET_ARG_PARTIAL_BYTES
519 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
520 #undef TARGET_FUNCTION_ARG
521 #define TARGET_FUNCTION_ARG sh_function_arg
522 #undef TARGET_FUNCTION_ARG_ADVANCE
523 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
525 #undef TARGET_BUILD_BUILTIN_VA_LIST
526 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
527 #undef TARGET_EXPAND_BUILTIN_VA_START
528 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
529 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
530 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
532 #undef TARGET_SCALAR_MODE_SUPPORTED_P
533 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
534 #undef TARGET_VECTOR_MODE_SUPPORTED_P
535 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
537 #undef TARGET_CHECK_PCH_TARGET_FLAGS
538 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
540 #undef TARGET_DWARF_CALLING_CONVENTION
541 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
543 #undef TARGET_FRAME_POINTER_REQUIRED
544 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
546 /* Return regmode weight for insn. */
547 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
549 /* Return current register pressure for regmode. */
550 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
552 #undef TARGET_ENCODE_SECTION_INFO
553 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
555 #undef TARGET_SECONDARY_RELOAD
556 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
558 #undef TARGET_PREFERRED_RELOAD_CLASS
559 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
561 #undef TARGET_CONDITIONAL_REGISTER_USAGE
562 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
564 #undef TARGET_LEGITIMATE_ADDRESS_P
565 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
567 #undef TARGET_TRAMPOLINE_INIT
568 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
569 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
570 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
572 #undef TARGET_LEGITIMATE_CONSTANT_P
573 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
575 /* Machine-specific symbol_ref flags. */
576 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
578 struct gcc_target targetm = TARGET_INITIALIZER;
580 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
581 various options, and do some machine dependent initialization. */
582 static void
583 sh_option_override (void)
585 int regno;
587 SUBTARGET_OVERRIDE_OPTIONS;
588 if (optimize > 1 && !optimize_size)
589 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
590 if (flag_finite_math_only == 2)
591 flag_finite_math_only
592 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
593 if (TARGET_SH2E && !flag_finite_math_only)
594 target_flags |= MASK_IEEE;
595 sh_cpu = PROCESSOR_SH1;
596 assembler_dialect = 0;
597 if (TARGET_SH2)
598 sh_cpu = PROCESSOR_SH2;
599 if (TARGET_SH2E)
600 sh_cpu = PROCESSOR_SH2E;
601 if (TARGET_SH2A)
602 sh_cpu = PROCESSOR_SH2A;
603 if (TARGET_SH3)
604 sh_cpu = PROCESSOR_SH3;
605 if (TARGET_SH3E)
606 sh_cpu = PROCESSOR_SH3E;
607 if (TARGET_SH4)
609 assembler_dialect = 1;
610 sh_cpu = PROCESSOR_SH4;
612 if (TARGET_SH4A_ARCH)
614 assembler_dialect = 1;
615 sh_cpu = PROCESSOR_SH4A;
617 if (TARGET_SH5)
619 sh_cpu = PROCESSOR_SH5;
620 target_flags |= MASK_ALIGN_DOUBLE;
621 if (TARGET_SHMEDIA_FPU)
622 target_flags |= MASK_FMOVD;
623 if (TARGET_SHMEDIA)
625 /* There are no delay slots on SHmedia. */
626 flag_delayed_branch = 0;
627 /* Relaxation isn't yet supported for SHmedia */
628 target_flags &= ~MASK_RELAX;
629 /* After reload, if conversion does little good but can cause
630 ICEs:
631 - find_if_block doesn't do anything for SH because we don't
632 have conditional execution patterns. (We use conditional
633 move patterns, which are handled differently, and only
634 before reload).
635 - find_cond_trap doesn't do anything for the SH because we
636 don't have conditional traps.
637 - find_if_case_1 uses redirect_edge_and_branch_force in
638 the only path that does an optimization, and this causes
639 an ICE when branch targets are in registers.
640 - find_if_case_2 doesn't do anything for the SHmedia after
641 reload except when it can redirect a tablejump - and
642 that's rather rare. */
643 flag_if_conversion2 = 0;
644 if (! strcmp (sh_div_str, "call"))
645 sh_div_strategy = SH_DIV_CALL;
646 else if (! strcmp (sh_div_str, "call2"))
647 sh_div_strategy = SH_DIV_CALL2;
648 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
649 sh_div_strategy = SH_DIV_FP;
650 else if (! strcmp (sh_div_str, "inv"))
651 sh_div_strategy = SH_DIV_INV;
652 else if (! strcmp (sh_div_str, "inv:minlat"))
653 sh_div_strategy = SH_DIV_INV_MINLAT;
654 else if (! strcmp (sh_div_str, "inv20u"))
655 sh_div_strategy = SH_DIV_INV20U;
656 else if (! strcmp (sh_div_str, "inv20l"))
657 sh_div_strategy = SH_DIV_INV20L;
658 else if (! strcmp (sh_div_str, "inv:call2"))
659 sh_div_strategy = SH_DIV_INV_CALL2;
660 else if (! strcmp (sh_div_str, "inv:call"))
661 sh_div_strategy = SH_DIV_INV_CALL;
662 else if (! strcmp (sh_div_str, "inv:fp"))
664 if (TARGET_FPU_ANY)
665 sh_div_strategy = SH_DIV_INV_FP;
666 else
667 sh_div_strategy = SH_DIV_INV;
669 TARGET_CBRANCHDI4 = 0;
670 /* Assembler CFI isn't yet fully supported for SHmedia. */
671 flag_dwarf2_cfi_asm = 0;
674 else
676 /* Only the sh64-elf assembler fully supports .quad properly. */
677 targetm.asm_out.aligned_op.di = NULL;
678 targetm.asm_out.unaligned_op.di = NULL;
680 if (TARGET_SH1)
682 if (! strcmp (sh_div_str, "call-div1"))
683 sh_div_strategy = SH_DIV_CALL_DIV1;
684 else if (! strcmp (sh_div_str, "call-fp")
685 && (TARGET_FPU_DOUBLE
686 || (TARGET_HARD_SH4 && TARGET_SH2E)
687 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
688 sh_div_strategy = SH_DIV_CALL_FP;
689 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
690 sh_div_strategy = SH_DIV_CALL_TABLE;
691 else
692 /* Pick one that makes most sense for the target in general.
693 It is not much good to use different functions depending
694 on -Os, since then we'll end up with two different functions
695 when some of the code is compiled for size, and some for
696 speed. */
698 /* SH4 tends to emphasize speed. */
699 if (TARGET_HARD_SH4)
700 sh_div_strategy = SH_DIV_CALL_TABLE;
701 /* These have their own way of doing things. */
702 else if (TARGET_SH2A)
703 sh_div_strategy = SH_DIV_INTRINSIC;
704 /* ??? Should we use the integer SHmedia function instead? */
705 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
706 sh_div_strategy = SH_DIV_CALL_FP;
707 /* SH1 .. SH3 cores often go into small-footprint systems, so
708 default to the smallest implementation available. */
709 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
710 sh_div_strategy = SH_DIV_CALL_TABLE;
711 else
712 sh_div_strategy = SH_DIV_CALL_DIV1;
714 if (!TARGET_SH1)
715 TARGET_PRETEND_CMOVE = 0;
716 if (sh_divsi3_libfunc[0])
717 ; /* User supplied - leave it alone. */
718 else if (TARGET_DIVIDE_CALL_FP)
719 sh_divsi3_libfunc = "__sdivsi3_i4";
720 else if (TARGET_DIVIDE_CALL_TABLE)
721 sh_divsi3_libfunc = "__sdivsi3_i4i";
722 else if (TARGET_SH5)
723 sh_divsi3_libfunc = "__sdivsi3_1";
724 else
725 sh_divsi3_libfunc = "__sdivsi3";
726 if (sh_branch_cost == -1)
727 sh_branch_cost
728 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
730 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
731 if (! VALID_REGISTER_P (regno))
732 sh_register_names[regno][0] = '\0';
734 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
735 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
736 sh_additional_register_names[regno][0] = '\0';
738 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
740 if ((flag_pic && ! TARGET_PREFERGOT)
741 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
742 flag_no_function_cse = 1;
744 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
746 /* Never run scheduling before reload, since that can
747 break global alloc, and generates slower code anyway due
748 to the pressure on R0. */
749 /* Enable sched1 for SH4 if the user explicitly requests.
750 When sched1 is enabled, the ready queue will be reordered by
751 the target hooks if pressure is high. We can not do this for
752 PIC, SH3 and lower as they give spill failures for R0. */
753 if (!TARGET_HARD_SH4 || flag_pic)
754 flag_schedule_insns = 0;
755 /* ??? Current exception handling places basic block boundaries
756 after call_insns. It causes the high pressure on R0 and gives
757 spill failures for R0 in reload. See PR 22553 and the thread
758 on gcc-patches
759 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
760 else if (flag_exceptions)
762 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
763 warning (0, "ignoring -fschedule-insns because of exception handling bug");
764 flag_schedule_insns = 0;
766 else if (flag_schedule_insns
767 && !global_options_set.x_flag_schedule_insns)
768 flag_schedule_insns = 0;
771 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
772 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
774 /* Unwind info is not correct around the CFG unless either a frame
775 pointer is present or M_A_O_A is set. Fixing this requires rewriting
776 unwind info generation to be aware of the CFG and propagating states
777 around edges. */
778 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
779 || flag_exceptions || flag_non_call_exceptions)
780 && flag_omit_frame_pointer
781 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
783 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
784 warning (0, "unwind tables currently require either a frame pointer "
785 "or -maccumulate-outgoing-args for correctness");
786 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
789 /* Unwinding with -freorder-blocks-and-partition does not work on this
790 architecture, because it requires far jumps to label crossing between
791 hot/cold sections which are rejected on this architecture. */
792 if (flag_reorder_blocks_and_partition)
794 if (flag_exceptions)
796 inform (input_location,
797 "-freorder-blocks-and-partition does not work with "
798 "exceptions on this architecture");
799 flag_reorder_blocks_and_partition = 0;
800 flag_reorder_blocks = 1;
802 else if (flag_unwind_tables)
804 inform (input_location,
805 "-freorder-blocks-and-partition does not support unwind "
806 "info on this architecture");
807 flag_reorder_blocks_and_partition = 0;
808 flag_reorder_blocks = 1;
812 if (align_loops == 0)
813 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
814 if (align_jumps == 0)
815 align_jumps = 1 << CACHE_LOG;
816 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
817 align_jumps = TARGET_SHMEDIA ? 4 : 2;
819 /* Allocation boundary (in *bytes*) for the code of a function.
820 SH1: 32 bit alignment is faster, because instructions are always
821 fetched as a pair from a longword boundary.
822 SH2 .. SH5 : align to cache line start. */
823 if (align_functions == 0)
824 align_functions
825 = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
826 /* The linker relaxation code breaks when a function contains
827 alignments that are larger than that at the start of a
828 compilation unit. */
829 if (TARGET_RELAX)
831 int min_align
832 = align_loops > align_jumps ? align_loops : align_jumps;
834 /* Also take possible .long constants / mova tables int account. */
835 if (min_align < 4)
836 min_align = 4;
837 if (align_functions < min_align)
838 align_functions = min_align;
841 if (sh_fixed_range_str)
842 sh_fix_range (sh_fixed_range_str);
844 /* This target defaults to strict volatile bitfields. */
845 if (flag_strict_volatile_bitfields < 0)
846 flag_strict_volatile_bitfields = 1;
849 /* Print the operand address in x to the stream. */
851 static void
852 sh_print_operand_address (FILE *stream, rtx x)
854 switch (GET_CODE (x))
856 case REG:
857 case SUBREG:
858 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
859 break;
861 case PLUS:
863 rtx base = XEXP (x, 0);
864 rtx index = XEXP (x, 1);
866 switch (GET_CODE (index))
868 case CONST_INT:
869 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
870 reg_names[true_regnum (base)]);
871 break;
873 case REG:
874 case SUBREG:
876 int base_num = true_regnum (base);
877 int index_num = true_regnum (index);
879 fprintf (stream, "@(r0,%s)",
880 reg_names[MAX (base_num, index_num)]);
881 break;
884 default:
885 gcc_unreachable ();
888 break;
890 case PRE_DEC:
891 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
892 break;
894 case POST_INC:
895 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
896 break;
898 default:
899 x = mark_constant_pool_use (x);
900 output_addr_const (stream, x);
901 break;
905 /* Print operand x (an rtx) in assembler syntax to file stream
906 according to modifier code.
908 '.' print a .s if insn needs delay slot
909 ',' print LOCAL_LABEL_PREFIX
910 '@' print trap, rte or rts depending upon pragma interruptness
911 '#' output a nop if there is nothing to put in the delay slot
912 ''' print likelihood suffix (/u for unlikely).
913 '>' print branch target if -fverbose-asm
914 'O' print a constant without the #
915 'R' print the LSW of a dp value - changes if in little endian
916 'S' print the MSW of a dp value - changes if in little endian
917 'T' print the next word of a dp value - same as 'R' in big endian mode.
918 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
919 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
920 'N' print 'r63' if the operand is (const_int 0).
921 'd' print a V2SF reg as dN instead of fpN.
922 'm' print a pair `base,offset' or `base,index', for LD and ST.
923 'U' Likewise for {LD,ST}{HI,LO}.
924 'V' print the position of a single bit set.
925 'W' print the position of a single bit cleared.
926 't' print a memory address which is a register.
927 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
928 'o' output an operator. */
930 static void
931 sh_print_operand (FILE *stream, rtx x, int code)
933 int regno;
934 enum machine_mode mode;
936 switch (code)
938 tree trapa_attr;
940 case '.':
941 if (final_sequence
942 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
943 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
944 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
945 break;
946 case ',':
947 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
948 break;
949 case '@':
950 trapa_attr = lookup_attribute ("trap_exit",
951 DECL_ATTRIBUTES (current_function_decl));
952 if (trapa_attr)
953 fprintf (stream, "trapa #%ld",
954 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
955 else if (sh_cfun_interrupt_handler_p ())
957 if (sh_cfun_resbank_handler_p ())
958 fprintf (stream, "resbank\n");
959 fprintf (stream, "rte");
961 else
962 fprintf (stream, "rts");
963 break;
964 case '#':
965 /* Output a nop if there's nothing in the delay slot. */
966 if (dbr_sequence_length () == 0)
967 fprintf (stream, "\n\tnop");
968 break;
969 case '\'':
971 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
973 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
974 fputs ("/u", stream);
975 break;
977 case '>':
978 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
980 fputs ("\t! target: ", stream);
981 output_addr_const (stream, JUMP_LABEL (current_output_insn));
983 break;
984 case 'O':
985 x = mark_constant_pool_use (x);
986 output_addr_const (stream, x);
987 break;
988 /* N.B.: %R / %S / %T adjust memory addresses by four.
989 For SHMEDIA, that means they can be used to access the first and
990 second 32 bit part of a 64 bit (or larger) value that
991 might be held in floating point registers or memory.
992 While they can be used to access 64 bit parts of a larger value
993 held in general purpose registers, that won't work with memory -
994 neither for fp registers, since the frxx names are used. */
995 case 'R':
996 if (REG_P (x) || GET_CODE (x) == SUBREG)
998 regno = true_regnum (x);
999 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1000 fputs (reg_names[regno], (stream));
1002 else if (MEM_P (x))
1004 x = adjust_address (x, SImode, 4 * LSW);
1005 sh_print_operand_address (stream, XEXP (x, 0));
1007 else
1009 rtx sub = NULL_RTX;
1011 mode = GET_MODE (x);
1012 if (mode == VOIDmode)
1013 mode = DImode;
1014 if (GET_MODE_SIZE (mode) >= 8)
1015 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1016 if (sub)
1017 sh_print_operand (stream, sub, 0);
1018 else
1019 output_operand_lossage ("invalid operand to %%R");
1021 break;
1022 case 'S':
1023 if (REG_P (x) || GET_CODE (x) == SUBREG)
1025 regno = true_regnum (x);
1026 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1027 fputs (reg_names[regno], (stream));
1029 else if (MEM_P (x))
1031 x = adjust_address (x, SImode, 4 * MSW);
1032 sh_print_operand_address (stream, XEXP (x, 0));
1034 else
1036 rtx sub = NULL_RTX;
1038 mode = GET_MODE (x);
1039 if (mode == VOIDmode)
1040 mode = DImode;
1041 if (GET_MODE_SIZE (mode) >= 8)
1042 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1043 if (sub)
1044 sh_print_operand (stream, sub, 0);
1045 else
1046 output_operand_lossage ("invalid operand to %%S");
1048 break;
1049 case 'T':
1050 /* Next word of a double. */
1051 switch (GET_CODE (x))
1053 case REG:
1054 fputs (reg_names[REGNO (x) + 1], (stream));
1055 break;
1056 case MEM:
1057 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1058 && GET_CODE (XEXP (x, 0)) != POST_INC)
1059 x = adjust_address (x, SImode, 4);
1060 sh_print_operand_address (stream, XEXP (x, 0));
1061 break;
1062 default:
1063 break;
1065 break;
1067 case 't':
1068 gcc_assert (MEM_P (x));
1069 x = XEXP (x, 0);
1070 switch (GET_CODE (x))
1072 case REG:
1073 case SUBREG:
1074 sh_print_operand (stream, x, 0);
1075 break;
1076 default:
1077 break;
1079 break;
1081 case 'o':
1082 switch (GET_CODE (x))
1084 case PLUS: fputs ("add", stream); break;
1085 case MINUS: fputs ("sub", stream); break;
1086 case MULT: fputs ("mul", stream); break;
1087 case DIV: fputs ("div", stream); break;
1088 case EQ: fputs ("eq", stream); break;
1089 case NE: fputs ("ne", stream); break;
1090 case GT: case LT: fputs ("gt", stream); break;
1091 case GE: case LE: fputs ("ge", stream); break;
1092 case GTU: case LTU: fputs ("gtu", stream); break;
1093 case GEU: case LEU: fputs ("geu", stream); break;
1094 default:
1095 break;
1097 break;
1098 case 'M':
1099 if (TARGET_SHMEDIA)
1101 if (MEM_P (x)
1102 && GET_CODE (XEXP (x, 0)) == PLUS
1103 && (REG_P (XEXP (XEXP (x, 0), 1))
1104 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1105 fputc ('x', stream);
1107 else
1109 if (MEM_P (x))
1111 switch (GET_MODE (x))
1113 case QImode: fputs (".b", stream); break;
1114 case HImode: fputs (".w", stream); break;
1115 case SImode: fputs (".l", stream); break;
1116 case SFmode: fputs (".s", stream); break;
1117 case DFmode: fputs (".d", stream); break;
1118 default: gcc_unreachable ();
1122 break;
1124 case 'm':
1125 gcc_assert (MEM_P (x));
1126 x = XEXP (x, 0);
1127 /* Fall through. */
1128 case 'U':
1129 switch (GET_CODE (x))
1131 case REG:
1132 case SUBREG:
1133 sh_print_operand (stream, x, 0);
1134 fputs (", 0", stream);
1135 break;
1137 case PLUS:
1138 sh_print_operand (stream, XEXP (x, 0), 0);
1139 fputs (", ", stream);
1140 sh_print_operand (stream, XEXP (x, 1), 0);
1141 break;
1143 default:
1144 gcc_unreachable ();
1146 break;
1148 case 'V':
1150 int num = exact_log2 (INTVAL (x));
1151 gcc_assert (num >= 0);
1152 fprintf (stream, "#%d", num);
1154 break;
1156 case 'W':
1158 int num = exact_log2 (~INTVAL (x));
1159 gcc_assert (num >= 0);
1160 fprintf (stream, "#%d", num);
1162 break;
1164 case 'd':
1165 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1167 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1168 break;
1170 case 'N':
1171 if (x == CONST0_RTX (GET_MODE (x)))
1173 fprintf ((stream), "r63");
1174 break;
1176 goto default_output;
1177 case 'u':
1178 if (CONST_INT_P (x))
1180 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1181 break;
1183 /* Fall through. */
1185 default_output:
1186 default:
1187 regno = 0;
1188 mode = GET_MODE (x);
1190 switch (GET_CODE (x))
1192 case TRUNCATE:
1194 rtx inner = XEXP (x, 0);
1195 int offset = 0;
1196 enum machine_mode inner_mode;
1198 /* We might see SUBREGs with vector mode registers inside. */
1199 if (GET_CODE (inner) == SUBREG
1200 && (GET_MODE_SIZE (GET_MODE (inner))
1201 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1202 && subreg_lowpart_p (inner))
1203 inner = SUBREG_REG (inner);
1204 if (CONST_INT_P (inner))
1206 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1207 goto default_output;
1209 inner_mode = GET_MODE (inner);
1210 if (GET_CODE (inner) == SUBREG
1211 && (GET_MODE_SIZE (GET_MODE (inner))
1212 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1213 && REG_P (SUBREG_REG (inner)))
1215 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1216 GET_MODE (SUBREG_REG (inner)),
1217 SUBREG_BYTE (inner),
1218 GET_MODE (inner));
1219 inner = SUBREG_REG (inner);
1221 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1222 abort ();
1223 /* Floating point register pairs are always big endian;
1224 general purpose registers are 64 bit wide. */
1225 regno = REGNO (inner);
1226 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1227 - HARD_REGNO_NREGS (regno, mode))
1228 + offset;
1229 x = inner;
1230 goto reg;
1232 case SIGN_EXTEND:
1233 x = XEXP (x, 0);
1234 goto reg;
1235 /* FIXME: We need this on SHmedia32 because reload generates
1236 some sign-extended HI or QI loads into DImode registers
1237 but, because Pmode is SImode, the address ends up with a
1238 subreg:SI of the DImode register. Maybe reload should be
1239 fixed so as to apply alter_subreg to such loads? */
1240 case IF_THEN_ELSE:
1241 gcc_assert (trapping_target_operand (x, VOIDmode));
1242 x = XEXP (XEXP (x, 2), 0);
1243 goto default_output;
1244 case SUBREG:
1245 gcc_assert (SUBREG_BYTE (x) == 0
1246 && REG_P (SUBREG_REG (x)));
1248 x = SUBREG_REG (x);
1249 /* Fall through. */
1251 reg:
1252 case REG:
1253 regno += REGNO (x);
1254 if (FP_REGISTER_P (regno)
1255 && mode == V16SFmode)
1256 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1257 else if (FP_REGISTER_P (REGNO (x))
1258 && mode == V4SFmode)
1259 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1260 else if (REG_P (x)
1261 && mode == V2SFmode)
1262 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1263 else if (FP_REGISTER_P (REGNO (x))
1264 && GET_MODE_SIZE (mode) > 4)
1265 fprintf ((stream), "d%s", reg_names[regno] + 1);
1266 else
1267 fputs (reg_names[regno], (stream));
1268 break;
1270 case MEM:
1271 output_address (XEXP (x, 0));
1272 break;
1274 default:
1275 if (TARGET_SH1)
1276 fputc ('#', stream);
1277 output_addr_const (stream, x);
1278 break;
1280 break;
1284 static bool
1285 sh_print_operand_punct_valid_p (unsigned char code)
1287 return (code == '.' || code == '#' || code == '@' || code == ','
1288 || code == '$' || code == '\'' || code == '>');
1291 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1293 static bool
1294 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1296 if (GET_CODE (x) == UNSPEC)
1298 switch (XINT (x, 1))
1300 case UNSPEC_DATALABEL:
1301 fputs ("datalabel ", file);
1302 output_addr_const (file, XVECEXP (x, 0, 0));
1303 break;
1304 case UNSPEC_PIC:
1305 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1306 output_addr_const (file, XVECEXP (x, 0, 0));
1307 break;
1308 case UNSPEC_GOT:
1309 output_addr_const (file, XVECEXP (x, 0, 0));
1310 fputs ("@GOT", file);
1311 break;
1312 case UNSPEC_GOTOFF:
1313 output_addr_const (file, XVECEXP (x, 0, 0));
1314 fputs ("@GOTOFF", file);
1315 break;
1316 case UNSPEC_PLT:
1317 output_addr_const (file, XVECEXP (x, 0, 0));
1318 fputs ("@PLT", file);
1319 break;
1320 case UNSPEC_GOTPLT:
1321 output_addr_const (file, XVECEXP (x, 0, 0));
1322 fputs ("@GOTPLT", file);
1323 break;
1324 case UNSPEC_DTPOFF:
1325 output_addr_const (file, XVECEXP (x, 0, 0));
1326 fputs ("@DTPOFF", file);
1327 break;
1328 case UNSPEC_GOTTPOFF:
1329 output_addr_const (file, XVECEXP (x, 0, 0));
1330 fputs ("@GOTTPOFF", file);
1331 break;
1332 case UNSPEC_TPOFF:
1333 output_addr_const (file, XVECEXP (x, 0, 0));
1334 fputs ("@TPOFF", file);
1335 break;
1336 case UNSPEC_CALLER:
1338 char name[32];
1339 /* LPCS stands for Label for PIC Call Site. */
1340 targetm.asm_out.generate_internal_label (name, "LPCS",
1341 INTVAL (XVECEXP (x, 0, 0)));
1342 assemble_name (file, name);
1344 break;
1345 case UNSPEC_EXTRACT_S16:
1346 case UNSPEC_EXTRACT_U16:
1348 rtx val, shift;
1350 val = XVECEXP (x, 0, 0);
1351 shift = XVECEXP (x, 0, 1);
1352 fputc ('(', file);
1353 if (shift != const0_rtx)
1354 fputc ('(', file);
1355 if (GET_CODE (val) == CONST
1356 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1358 fputc ('(', file);
1359 output_addr_const (file, val);
1360 fputc (')', file);
1362 else
1363 output_addr_const (file, val);
1364 if (shift != const0_rtx)
1366 fputs (" >> ", file);
1367 output_addr_const (file, shift);
1368 fputc (')', file);
1370 fputs (" & 65535)", file);
1372 break;
1373 case UNSPEC_SYMOFF:
1374 output_addr_const (file, XVECEXP (x, 0, 0));
1375 fputc ('-', file);
1376 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1378 fputc ('(', file);
1379 output_addr_const (file, XVECEXP (x, 0, 1));
1380 fputc (')', file);
1382 else
1383 output_addr_const (file, XVECEXP (x, 0, 1));
1384 break;
1385 case UNSPEC_PCREL_SYMOFF:
1386 output_addr_const (file, XVECEXP (x, 0, 0));
1387 fputs ("-(", file);
1388 output_addr_const (file, XVECEXP (x, 0, 1));
1389 fputs ("-.)", file);
1390 break;
1391 default:
1392 return false;
1394 return true;
1396 else
1397 return false;
1401 /* Encode symbol attributes of a SYMBOL_REF into its
1402 SYMBOL_REF_FLAGS. */
1403 static void
1404 sh_encode_section_info (tree decl, rtx rtl, int first)
1406 default_encode_section_info (decl, rtl, first);
1408 if (TREE_CODE (decl) == FUNCTION_DECL
1409 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1410 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1413 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1414 static void
1415 force_into (rtx value, rtx target)
1417 value = force_operand (value, target);
1418 if (! rtx_equal_p (value, target))
1419 emit_insn (gen_move_insn (target, value));
1422 /* Emit code to perform a block move. Choose the best method.
1424 OPERANDS[0] is the destination.
1425 OPERANDS[1] is the source.
1426 OPERANDS[2] is the size.
1427 OPERANDS[3] is the alignment safe to use. */
1430 expand_block_move (rtx *operands)
1432 int align = INTVAL (operands[3]);
1433 int constp = (CONST_INT_P (operands[2]));
1434 int bytes = (constp ? INTVAL (operands[2]) : 0);
1436 if (! constp)
1437 return 0;
1439 /* If we could use mov.l to move words and dest is word-aligned, we
1440 can use movua.l for loads and still generate a relatively short
1441 and efficient sequence. */
1442 if (TARGET_SH4A_ARCH && align < 4
1443 && MEM_ALIGN (operands[0]) >= 32
1444 && can_move_by_pieces (bytes, 32))
1446 rtx dest = copy_rtx (operands[0]);
1447 rtx src = copy_rtx (operands[1]);
1448 /* We could use different pseudos for each copied word, but
1449 since movua can only load into r0, it's kind of
1450 pointless. */
1451 rtx temp = gen_reg_rtx (SImode);
1452 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1453 int copied = 0;
1455 while (copied + 4 <= bytes)
1457 rtx to = adjust_address (dest, SImode, copied);
1458 rtx from = adjust_automodify_address (src, BLKmode,
1459 src_addr, copied);
1461 set_mem_size (from, 4);
1462 emit_insn (gen_movua (temp, from));
1463 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1464 emit_move_insn (to, temp);
1465 copied += 4;
1468 if (copied < bytes)
1469 move_by_pieces (adjust_address (dest, BLKmode, copied),
1470 adjust_automodify_address (src, BLKmode,
1471 src_addr, copied),
1472 bytes - copied, align, 0);
1474 return 1;
1477 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1478 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1479 if (align < 4 || (bytes % 4 != 0))
1480 return 0;
1482 if (TARGET_HARD_SH4)
1484 if (bytes < 12)
1485 return 0;
1486 else if (bytes == 12)
1488 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1489 rtx r4 = gen_rtx_REG (SImode, 4);
1490 rtx r5 = gen_rtx_REG (SImode, 5);
1492 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1493 force_into (XEXP (operands[0], 0), r4);
1494 force_into (XEXP (operands[1], 0), r5);
1495 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1496 return 1;
1498 else if (! optimize_size)
1500 const char *entry_name;
1501 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1502 int dwords;
1503 rtx r4 = gen_rtx_REG (SImode, 4);
1504 rtx r5 = gen_rtx_REG (SImode, 5);
1505 rtx r6 = gen_rtx_REG (SImode, 6);
1507 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1508 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1509 force_into (XEXP (operands[0], 0), r4);
1510 force_into (XEXP (operands[1], 0), r5);
1512 dwords = bytes >> 3;
1513 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1514 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1515 return 1;
1517 else
1518 return 0;
1520 if (bytes < 64)
1522 char entry[30];
1523 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1524 rtx r4 = gen_rtx_REG (SImode, 4);
1525 rtx r5 = gen_rtx_REG (SImode, 5);
1527 sprintf (entry, "__movmemSI%d", bytes);
1528 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1529 force_into (XEXP (operands[0], 0), r4);
1530 force_into (XEXP (operands[1], 0), r5);
1531 emit_insn (gen_block_move_real (func_addr_rtx));
1532 return 1;
1535 /* This is the same number of bytes as a memcpy call, but to a different
1536 less common function name, so this will occasionally use more space. */
1537 if (! optimize_size)
1539 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1540 int final_switch, while_loop;
1541 rtx r4 = gen_rtx_REG (SImode, 4);
1542 rtx r5 = gen_rtx_REG (SImode, 5);
1543 rtx r6 = gen_rtx_REG (SImode, 6);
1545 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1546 force_into (XEXP (operands[0], 0), r4);
1547 force_into (XEXP (operands[1], 0), r5);
1549 /* r6 controls the size of the move. 16 is decremented from it
1550 for each 64 bytes moved. Then the negative bit left over is used
1551 as an index into a list of move instructions. e.g., a 72 byte move
1552 would be set up with size(r6) = 14, for one iteration through the
1553 big while loop, and a switch of -2 for the last part. */
1555 final_switch = 16 - ((bytes / 4) % 16);
1556 while_loop = ((bytes / 4) / 16 - 1) * 16;
1557 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1558 emit_insn (gen_block_lump_real (func_addr_rtx));
1559 return 1;
1562 return 0;
1565 /* Prepare operands for a move define_expand; specifically, one of the
1566 operands must be in a register. */
1569 prepare_move_operands (rtx operands[], enum machine_mode mode)
1571 if ((mode == SImode || mode == DImode)
1572 && flag_pic
1573 && ! ((mode == Pmode || mode == ptr_mode)
1574 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1576 rtx temp;
1577 if (SYMBOLIC_CONST_P (operands[1]))
1579 if (MEM_P (operands[0]))
1580 operands[1] = force_reg (Pmode, operands[1]);
1581 else if (TARGET_SHMEDIA
1582 && GET_CODE (operands[1]) == LABEL_REF
1583 && target_reg_operand (operands[0], mode))
1584 /* It's ok. */;
1585 else
1587 temp = (!can_create_pseudo_p ()
1588 ? operands[0]
1589 : gen_reg_rtx (Pmode));
1590 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1593 else if (GET_CODE (operands[1]) == CONST
1594 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1595 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1597 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1598 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1599 mode, temp);
1600 operands[1] = expand_binop (mode, add_optab, temp,
1601 XEXP (XEXP (operands[1], 0), 1),
1602 (!can_create_pseudo_p ()
1603 ? temp
1604 : gen_reg_rtx (Pmode)),
1605 0, OPTAB_LIB_WIDEN);
1609 if (! reload_in_progress && ! reload_completed)
1611 /* Copy the source to a register if both operands aren't registers. */
1612 if (! register_operand (operands[0], mode)
1613 && ! sh_register_operand (operands[1], mode))
1614 operands[1] = copy_to_mode_reg (mode, operands[1]);
1616 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1618 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1619 except that we can't use that function because it is static. */
1620 rtx new_rtx = change_address (operands[0], mode, 0);
1621 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1622 operands[0] = new_rtx;
1625 /* This case can happen while generating code to move the result
1626 of a library call to the target. Reject `st r0,@(rX,rY)' because
1627 reload will fail to find a spill register for rX, since r0 is already
1628 being used for the source. */
1629 else if (TARGET_SH1
1630 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1631 && MEM_P (operands[0])
1632 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1633 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1634 operands[1] = copy_to_mode_reg (mode, operands[1]);
1637 if (mode == Pmode || mode == ptr_mode)
1639 rtx op0, op1, opc;
1640 enum tls_model tls_kind;
1642 op0 = operands[0];
1643 op1 = operands[1];
1644 if (GET_CODE (op1) == CONST
1645 && GET_CODE (XEXP (op1, 0)) == PLUS
1646 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1647 != TLS_MODEL_NONE))
1649 opc = XEXP (XEXP (op1, 0), 1);
1650 op1 = XEXP (XEXP (op1, 0), 0);
1652 else
1653 opc = NULL_RTX;
1655 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1657 rtx tga_op1, tga_ret, tmp, tmp2;
1659 if (! flag_pic
1660 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1661 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1662 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1664 /* Don't schedule insns for getting GOT address when
1665 the first scheduling is enabled, to avoid spill
1666 failures for R0. */
1667 if (flag_schedule_insns)
1668 emit_insn (gen_blockage ());
1669 emit_insn (gen_GOTaddr2picreg ());
1670 emit_use (gen_rtx_REG (SImode, PIC_REG));
1671 if (flag_schedule_insns)
1672 emit_insn (gen_blockage ());
1675 switch (tls_kind)
1677 case TLS_MODEL_GLOBAL_DYNAMIC:
1678 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1679 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1680 tmp = gen_reg_rtx (Pmode);
1681 emit_move_insn (tmp, tga_ret);
1682 op1 = tmp;
1683 break;
1685 case TLS_MODEL_LOCAL_DYNAMIC:
1686 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1687 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1689 tmp = gen_reg_rtx (Pmode);
1690 emit_move_insn (tmp, tga_ret);
1692 if (register_operand (op0, Pmode))
1693 tmp2 = op0;
1694 else
1695 tmp2 = gen_reg_rtx (Pmode);
1697 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1698 op1 = tmp2;
1699 break;
1701 case TLS_MODEL_INITIAL_EXEC:
1702 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1703 tmp = gen_sym2GOTTPOFF (op1);
1704 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1705 op1 = tga_op1;
1706 break;
1708 case TLS_MODEL_LOCAL_EXEC:
1709 tmp2 = gen_reg_rtx (Pmode);
1710 emit_insn (gen_load_gbr (tmp2));
1711 tmp = gen_reg_rtx (Pmode);
1712 emit_insn (gen_symTPOFF2reg (tmp, op1));
1714 if (register_operand (op0, Pmode))
1715 op1 = op0;
1716 else
1717 op1 = gen_reg_rtx (Pmode);
1719 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1720 break;
1722 default:
1723 gcc_unreachable ();
1725 if (opc)
1726 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1727 operands[1] = op1;
1731 return 0;
1734 enum rtx_code
1735 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1736 enum rtx_code comparison)
1738 rtx op1;
1739 rtx scratch = NULL_RTX;
1741 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1742 comparison = GET_CODE (operands[0]);
1743 else
1744 scratch = operands[4];
1745 if (CONST_INT_P (operands[1])
1746 && !CONST_INT_P (operands[2]))
1748 rtx tmp = operands[1];
1750 operands[1] = operands[2];
1751 operands[2] = tmp;
1752 comparison = swap_condition (comparison);
1754 if (CONST_INT_P (operands[2]))
1756 HOST_WIDE_INT val = INTVAL (operands[2]);
1757 if ((val == -1 || val == -0x81)
1758 && (comparison == GT || comparison == LE))
1760 comparison = (comparison == GT) ? GE : LT;
1761 operands[2] = gen_int_mode (val + 1, mode);
1763 else if ((val == 1 || val == 0x80)
1764 && (comparison == GE || comparison == LT))
1766 comparison = (comparison == GE) ? GT : LE;
1767 operands[2] = gen_int_mode (val - 1, mode);
1769 else if (val == 1 && (comparison == GEU || comparison == LTU))
1771 comparison = (comparison == GEU) ? NE : EQ;
1772 operands[2] = CONST0_RTX (mode);
1774 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1776 comparison = (comparison == GEU) ? GTU : LEU;
1777 operands[2] = gen_int_mode (val - 1, mode);
1779 else if (val == 0 && (comparison == GTU || comparison == LEU))
1780 comparison = (comparison == GTU) ? NE : EQ;
1781 else if (mode == SImode
1782 && ((val == 0x7fffffff
1783 && (comparison == GTU || comparison == LEU))
1784 || ((unsigned HOST_WIDE_INT) val
1785 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1786 && (comparison == GEU || comparison == LTU))))
1788 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1789 operands[2] = CONST0_RTX (mode);
1792 op1 = operands[1];
1793 if (can_create_pseudo_p ())
1794 operands[1] = force_reg (mode, op1);
1795 /* When we are handling DImode comparisons, we want to keep constants so
1796 that we can optimize the component comparisons; however, memory loads
1797 are better issued as a whole so that they can be scheduled well.
1798 SImode equality comparisons allow I08 constants, but only when they
1799 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1800 into a register, that register might as well be r0, and we allow the
1801 constant. If it is already in a register, this is likely to be
1802 allocated to a different hard register, thus we load the constant into
1803 a register unless it is zero. */
1804 if (!REG_P (operands[2])
1805 && (!CONST_INT_P (operands[2])
1806 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1807 && ((comparison != EQ && comparison != NE)
1808 || (REG_P (op1) && REGNO (op1) != R0_REG)
1809 || !satisfies_constraint_I08 (operands[2])))))
1811 if (scratch && GET_MODE (scratch) == mode)
1813 emit_move_insn (scratch, operands[2]);
1814 operands[2] = scratch;
1816 else if (can_create_pseudo_p ())
1817 operands[2] = force_reg (mode, operands[2]);
1819 return comparison;
1822 void
1823 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1825 rtx (*branch_expander) (rtx) = gen_branch_true;
1826 rtx jump;
1828 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1829 switch (comparison)
1831 case NE: case LT: case LE: case LTU: case LEU:
1832 comparison = reverse_condition (comparison);
1833 branch_expander = gen_branch_false;
1834 default: ;
1836 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1837 gen_rtx_fmt_ee (comparison, SImode,
1838 operands[1], operands[2])));
1839 jump = emit_jump_insn (branch_expander (operands[3]));
1840 if (probability >= 0)
1841 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1845 /* ??? How should we distribute probabilities when more than one branch
1846 is generated. So far we only have soem ad-hoc observations:
1847 - If the operands are random, they are likely to differ in both parts.
1848 - If comparing items in a hash chain, the operands are random or equal;
1849 operation should be EQ or NE.
1850 - If items are searched in an ordered tree from the root, we can expect
1851 the highpart to be unequal about half of the time; operation should be
1852 an inequality comparison, operands non-constant, and overall probability
1853 about 50%. Likewise for quicksort.
1854 - Range checks will be often made against constants. Even if we assume for
1855 simplicity an even distribution of the non-constant operand over a
1856 sub-range here, the same probability could be generated with differently
1857 wide sub-ranges - as long as the ratio of the part of the subrange that
1858 is before the threshold to the part that comes after the threshold stays
1859 the same. Thus, we can't really tell anything here;
1860 assuming random distribution is at least simple.
1863 bool
1864 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1866 enum rtx_code msw_taken, msw_skip, lsw_taken;
1867 rtx skip_label = NULL_RTX;
1868 rtx op1h, op1l, op2h, op2l;
1869 int num_branches;
1870 int prob, rev_prob;
1871 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1872 rtx scratch = operands[4];
1874 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1875 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1876 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1877 op1l = gen_lowpart (SImode, operands[1]);
1878 op2l = gen_lowpart (SImode, operands[2]);
1879 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1880 prob = split_branch_probability;
1881 rev_prob = REG_BR_PROB_BASE - prob;
1882 switch (comparison)
1884 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1885 That costs 1 cycle more when the first branch can be predicted taken,
1886 but saves us mispredicts because only one branch needs prediction.
1887 It also enables generating the cmpeqdi_t-1 pattern. */
1888 case EQ:
1889 if (TARGET_CMPEQDI_T)
1891 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1892 emit_jump_insn (gen_branch_true (operands[3]));
1893 return true;
1895 msw_skip = NE;
1896 lsw_taken = EQ;
1897 if (prob >= 0)
1899 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1901 msw_skip_prob = rev_prob;
1902 if (REG_BR_PROB_BASE <= 65535)
1903 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1904 else
1906 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1907 lsw_taken_prob
1908 = (prob
1909 ? (REG_BR_PROB_BASE
1910 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1911 / ((HOST_WIDEST_INT) prob << 32)))
1912 : 0);
1915 break;
1916 case NE:
1917 if (TARGET_CMPEQDI_T)
1919 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1920 emit_jump_insn (gen_branch_false (operands[3]));
1921 return true;
1923 msw_taken = NE;
1924 msw_taken_prob = prob;
1925 lsw_taken = NE;
1926 lsw_taken_prob = 0;
1927 break;
1928 case GTU: case GT:
1929 msw_taken = comparison;
1930 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1931 break;
1932 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1933 msw_skip = swap_condition (msw_taken);
1934 lsw_taken = GTU;
1935 break;
1936 case GEU: case GE:
1937 if (op2l == CONST0_RTX (SImode))
1938 msw_taken = comparison;
1939 else
1941 msw_taken = comparison == GE ? GT : GTU;
1942 msw_skip = swap_condition (msw_taken);
1943 lsw_taken = GEU;
1945 break;
1946 case LTU: case LT:
1947 msw_taken = comparison;
1948 if (op2l == CONST0_RTX (SImode))
1949 break;
1950 msw_skip = swap_condition (msw_taken);
1951 lsw_taken = LTU;
1952 break;
1953 case LEU: case LE:
1954 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1955 msw_taken = comparison;
1956 else
1958 lsw_taken = LEU;
1959 if (comparison == LE)
1960 msw_taken = LT;
1961 else if (op2h != CONST0_RTX (SImode))
1962 msw_taken = LTU;
1963 else
1965 msw_skip = swap_condition (LTU);
1966 break;
1968 msw_skip = swap_condition (msw_taken);
1970 break;
1971 default: return false;
1973 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1974 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1975 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1976 if (comparison != EQ && comparison != NE && num_branches > 1)
1978 if (!CONSTANT_P (operands[2])
1979 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1980 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1982 msw_taken_prob = prob / 2U;
1983 msw_skip_prob
1984 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1985 lsw_taken_prob = prob;
1987 else
1989 msw_taken_prob = prob;
1990 msw_skip_prob = REG_BR_PROB_BASE;
1991 /* ??? If we have a constant op2h, should we use that when
1992 calculating lsw_taken_prob? */
1993 lsw_taken_prob = prob;
1996 operands[1] = op1h;
1997 operands[2] = op2h;
1998 operands[4] = NULL_RTX;
1999 if (reload_completed
2000 && ! arith_reg_or_0_operand (op2h, SImode)
2001 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2002 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2003 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2005 emit_move_insn (scratch, operands[2]);
2006 operands[2] = scratch;
2008 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2009 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2010 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2012 rtx taken_label = operands[3];
2014 /* Operands were possibly modified, but msw_skip doesn't expect this.
2015 Always use the original ones. */
2016 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2018 operands[1] = op1h;
2019 operands[2] = op2h;
2020 if (reload_completed
2021 && ! arith_reg_or_0_operand (op2h, SImode)
2022 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2024 emit_move_insn (scratch, operands[2]);
2025 operands[2] = scratch;
2029 operands[3] = skip_label = gen_label_rtx ();
2030 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2031 operands[3] = taken_label;
2033 operands[1] = op1l;
2034 operands[2] = op2l;
2035 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2037 if (reload_completed
2038 && ! arith_reg_or_0_operand (op2l, SImode)
2039 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2041 emit_move_insn (scratch, operands[2]);
2042 operands[2] = scratch;
2044 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2046 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2047 emit_label (skip_label);
2048 return true;
2051 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2053 static void
2054 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2056 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2058 insn = gen_rtx_PARALLEL (VOIDmode,
2059 gen_rtvec (2, insn,
2060 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2061 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2063 else
2064 emit_insn (insn);
2067 /* Prepare the operands for an scc instruction; make sure that the
2068 compare has been done and the result is in T_REG. */
2069 void
2070 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2072 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2073 enum rtx_code oldcode = code;
2074 enum machine_mode mode;
2076 /* First need a compare insn. */
2077 switch (code)
2079 case NE:
2080 /* It isn't possible to handle this case. */
2081 gcc_unreachable ();
2082 case LT:
2083 code = GT;
2084 break;
2085 case LE:
2086 code = GE;
2087 break;
2088 case LTU:
2089 code = GTU;
2090 break;
2091 case LEU:
2092 code = GEU;
2093 break;
2094 default:
2095 break;
2097 if (code != oldcode)
2099 rtx tmp = op0;
2100 op0 = op1;
2101 op1 = tmp;
2104 mode = GET_MODE (op0);
2105 if (mode == VOIDmode)
2106 mode = GET_MODE (op1);
2108 op0 = force_reg (mode, op0);
2109 if ((code != EQ && code != NE
2110 && (op1 != const0_rtx
2111 || code == GTU || code == GEU || code == LTU || code == LEU))
2112 || (mode == DImode && op1 != const0_rtx)
2113 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2114 op1 = force_reg (mode, op1);
2116 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2117 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2118 mode);
2122 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2123 rtx op0, rtx op1)
2125 rtx target = gen_reg_rtx (SImode);
2126 rtx tmp;
2128 gcc_assert (TARGET_SHMEDIA);
2129 switch (code)
2131 case EQ:
2132 case GT:
2133 case LT:
2134 case UNORDERED:
2135 case GTU:
2136 case LTU:
2137 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2138 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2139 code = NE;
2140 break;
2142 case NE:
2143 case GE:
2144 case LE:
2145 case ORDERED:
2146 case GEU:
2147 case LEU:
2148 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2149 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2150 code = EQ;
2151 break;
2153 case UNEQ:
2154 case UNGE:
2155 case UNGT:
2156 case UNLE:
2157 case UNLT:
2158 case LTGT:
2159 return NULL_RTX;
2161 default:
2162 gcc_unreachable ();
2165 if (mode == DImode)
2167 rtx t2 = gen_reg_rtx (DImode);
2168 emit_insn (gen_extendsidi2 (t2, target));
2169 target = t2;
2172 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2175 /* Called from the md file, set up the operands of a compare instruction. */
2177 void
2178 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2180 enum rtx_code code = GET_CODE (operands[0]);
2181 enum rtx_code branch_code;
2182 rtx op0 = operands[1];
2183 rtx op1 = operands[2];
2184 rtx insn, tem;
2185 bool need_ccmpeq = false;
2187 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2189 op0 = force_reg (mode, op0);
2190 op1 = force_reg (mode, op1);
2192 else
2194 if (code != EQ || mode == DImode)
2196 /* Force args into regs, since we can't use constants here. */
2197 op0 = force_reg (mode, op0);
2198 if (op1 != const0_rtx || code == GTU || code == GEU)
2199 op1 = force_reg (mode, op1);
2203 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2205 if (code == LT
2206 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2207 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2209 tem = op0, op0 = op1, op1 = tem;
2210 code = swap_condition (code);
2213 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2214 if (code == GE)
2216 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2217 need_ccmpeq = true;
2218 code = GT;
2221 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2222 to EQ/GT respectively. */
2223 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2226 switch (code)
2228 case EQ:
2229 case GT:
2230 case GE:
2231 case GTU:
2232 case GEU:
2233 branch_code = code;
2234 break;
2235 case NE:
2236 case LT:
2237 case LE:
2238 case LTU:
2239 case LEU:
2240 branch_code = reverse_condition (code);
2241 break;
2242 default:
2243 gcc_unreachable ();
2246 insn = gen_rtx_SET (VOIDmode,
2247 gen_rtx_REG (SImode, T_REG),
2248 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2250 sh_emit_set_t_insn (insn, mode);
2251 if (need_ccmpeq)
2252 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2254 if (branch_code == code)
2255 emit_jump_insn (gen_branch_true (operands[3]));
2256 else
2257 emit_jump_insn (gen_branch_false (operands[3]));
2260 void
2261 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2263 enum rtx_code code = GET_CODE (operands[1]);
2264 rtx op0 = operands[2];
2265 rtx op1 = operands[3];
2266 rtx lab = NULL_RTX;
2267 bool invert = false;
2268 rtx tem;
2270 op0 = force_reg (mode, op0);
2271 if ((code != EQ && code != NE
2272 && (op1 != const0_rtx
2273 || code == GTU || code == GEU || code == LTU || code == LEU))
2274 || (mode == DImode && op1 != const0_rtx)
2275 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2276 op1 = force_reg (mode, op1);
2278 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2280 if (code == LT || code == LE)
2282 code = swap_condition (code);
2283 tem = op0, op0 = op1, op1 = tem;
2285 if (code == GE)
2287 if (TARGET_IEEE)
2289 lab = gen_label_rtx ();
2290 sh_emit_scc_to_t (EQ, op0, op1);
2291 emit_jump_insn (gen_branch_true (lab));
2292 code = GT;
2294 else
2296 code = LT;
2297 invert = true;
2302 if (code == NE)
2304 code = EQ;
2305 invert = true;
2308 sh_emit_scc_to_t (code, op0, op1);
2309 if (lab)
2310 emit_label (lab);
2311 if (invert)
2312 emit_insn (gen_movnegt (operands[0]));
2313 else
2314 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2317 /* Functions to output assembly code. */
2319 /* Return a sequence of instructions to perform DI or DF move.
2321 Since the SH cannot move a DI or DF in one instruction, we have
2322 to take care when we see overlapping source and dest registers. */
2324 const char *
2325 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2326 enum machine_mode mode)
2328 rtx dst = operands[0];
2329 rtx src = operands[1];
2331 if (MEM_P (dst)
2332 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2333 return "mov.l %T1,%0\n\tmov.l %1,%0";
2335 if (register_operand (dst, mode)
2336 && register_operand (src, mode))
2338 if (REGNO (src) == MACH_REG)
2339 return "sts mach,%S0\n\tsts macl,%R0";
2341 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2342 when mov.d r1,r0 do r1->r0 then r2->r1. */
2344 if (REGNO (src) + 1 == REGNO (dst))
2345 return "mov %T1,%T0\n\tmov %1,%0";
2346 else
2347 return "mov %1,%0\n\tmov %T1,%T0";
2349 else if (CONST_INT_P (src))
2351 if (INTVAL (src) < 0)
2352 output_asm_insn ("mov #-1,%S0", operands);
2353 else
2354 output_asm_insn ("mov #0,%S0", operands);
2356 return "mov %1,%R0";
2358 else if (MEM_P (src))
2360 int ptrreg = -1;
2361 int dreg = REGNO (dst);
2362 rtx inside = XEXP (src, 0);
2364 switch (GET_CODE (inside))
2366 case REG:
2367 ptrreg = REGNO (inside);
2368 break;
2370 case SUBREG:
2371 ptrreg = subreg_regno (inside);
2372 break;
2374 case PLUS:
2375 ptrreg = REGNO (XEXP (inside, 0));
2376 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2377 an offsettable address. Unfortunately, offsettable addresses use
2378 QImode to check the offset, and a QImode offsettable address
2379 requires r0 for the other operand, which is not currently
2380 supported, so we can't use the 'o' constraint.
2381 Thus we must check for and handle r0+REG addresses here.
2382 We punt for now, since this is likely very rare. */
2383 gcc_assert (!REG_P (XEXP (inside, 1)));
2384 break;
2386 case LABEL_REF:
2387 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2388 case POST_INC:
2389 return "mov.l %1,%0\n\tmov.l %1,%T0";
2390 default:
2391 gcc_unreachable ();
2394 /* Work out the safe way to copy. Copy into the second half first. */
2395 if (dreg == ptrreg)
2396 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2399 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2402 /* Print an instruction which would have gone into a delay slot after
2403 another instruction, but couldn't because the other instruction expanded
2404 into a sequence where putting the slot insn at the end wouldn't work. */
2406 static void
2407 print_slot (rtx insn)
2409 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2411 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2414 const char *
2415 output_far_jump (rtx insn, rtx op)
2417 struct { rtx lab, reg, op; } this_jmp;
2418 rtx braf_base_lab = NULL_RTX;
2419 const char *jump;
2420 int far;
2421 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2422 rtx prev;
2424 this_jmp.lab = gen_label_rtx ();
2426 if (TARGET_SH2
2427 && offset >= -32764
2428 && offset - get_attr_length (insn) <= 32766)
2430 far = 0;
2431 jump = "mov.w %O0,%1; braf %1";
2433 else
2435 far = 1;
2436 if (flag_pic)
2438 if (TARGET_SH2)
2439 jump = "mov.l %O0,%1; braf %1";
2440 else
2441 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2443 else
2444 jump = "mov.l %O0,%1; jmp @%1";
2446 /* If we have a scratch register available, use it. */
2447 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2448 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2450 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2451 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2452 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2453 output_asm_insn (jump, &this_jmp.lab);
2454 if (dbr_sequence_length ())
2455 print_slot (final_sequence);
2456 else
2457 output_asm_insn ("nop", 0);
2459 else
2461 /* Output the delay slot insn first if any. */
2462 if (dbr_sequence_length ())
2463 print_slot (final_sequence);
2465 this_jmp.reg = gen_rtx_REG (SImode, 13);
2466 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2467 Fortunately, MACL is fixed and call-clobbered, and we never
2468 need its value across jumps, so save r13 in it instead of in
2469 the stack. */
2470 if (TARGET_SH5)
2471 output_asm_insn ("lds r13, macl", 0);
2472 else
2473 output_asm_insn ("mov.l r13,@-r15", 0);
2474 output_asm_insn (jump, &this_jmp.lab);
2475 if (TARGET_SH5)
2476 output_asm_insn ("sts macl, r13", 0);
2477 else
2478 output_asm_insn ("mov.l @r15+,r13", 0);
2480 if (far && flag_pic && TARGET_SH2)
2482 braf_base_lab = gen_label_rtx ();
2483 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2484 CODE_LABEL_NUMBER (braf_base_lab));
2486 if (far)
2487 output_asm_insn (".align 2", 0);
2488 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2489 this_jmp.op = op;
2490 if (far && flag_pic)
2492 if (TARGET_SH2)
2493 this_jmp.lab = braf_base_lab;
2494 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2496 else
2497 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2498 return "";
2501 /* Local label counter, used for constants in the pool and inside
2502 pattern branches. */
2504 static int lf = 100;
2506 /* Output code for ordinary branches. */
2508 const char *
2509 output_branch (int logic, rtx insn, rtx *operands)
2511 switch (get_attr_length (insn))
2513 case 6:
2514 /* This can happen if filling the delay slot has caused a forward
2515 branch to exceed its range (we could reverse it, but only
2516 when we know we won't overextend other branches; this should
2517 best be handled by relaxation).
2518 It can also happen when other condbranches hoist delay slot insn
2519 from their destination, thus leading to code size increase.
2520 But the branch will still be in the range -4092..+4098 bytes. */
2522 if (! TARGET_RELAX)
2524 int label = lf++;
2525 /* The call to print_slot will clobber the operands. */
2526 rtx op0 = operands[0];
2528 /* If the instruction in the delay slot is annulled (true), then
2529 there is no delay slot where we can put it now. The only safe
2530 place for it is after the label. final will do that by default. */
2532 if (final_sequence
2533 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2534 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2536 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2537 ASSEMBLER_DIALECT ? "/" : ".", label);
2538 print_slot (final_sequence);
2540 else
2541 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2543 output_asm_insn ("bra\t%l0", &op0);
2544 fprintf (asm_out_file, "\tnop\n");
2545 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2547 return "";
2549 /* When relaxing, handle this like a short branch. The linker
2550 will fix it up if it still doesn't fit after relaxation. */
2551 case 2:
2552 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2554 /* These are for SH2e, in which we have to account for the
2555 extra nop because of the hardware bug in annulled branches. */
2556 case 8:
2557 if (! TARGET_RELAX)
2559 int label = lf++;
2561 gcc_assert (!final_sequence
2562 || !(INSN_ANNULLED_BRANCH_P
2563 (XVECEXP (final_sequence, 0, 0))));
2564 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2565 logic ? "f" : "t",
2566 ASSEMBLER_DIALECT ? "/" : ".", label);
2567 fprintf (asm_out_file, "\tnop\n");
2568 output_asm_insn ("bra\t%l0", operands);
2569 fprintf (asm_out_file, "\tnop\n");
2570 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2572 return "";
2574 /* When relaxing, fall through. */
2575 case 4:
2577 char buffer[10];
2579 sprintf (buffer, "b%s%ss\t%%l0",
2580 logic ? "t" : "f",
2581 ASSEMBLER_DIALECT ? "/" : ".");
2582 output_asm_insn (buffer, &operands[0]);
2583 return "nop";
2586 default:
2587 /* There should be no longer branches now - that would
2588 indicate that something has destroyed the branches set
2589 up in machine_dependent_reorg. */
2590 gcc_unreachable ();
2594 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2595 fill in operands 9 as a label to the successor insn.
2596 We try to use jump threading where possible.
2597 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2598 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2599 follow jmp and bt, if the address is in range. */
2600 const char *
2601 output_branchy_insn (enum rtx_code code, const char *templ,
2602 rtx insn, rtx *operands)
2604 rtx next_insn = NEXT_INSN (insn);
2606 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2608 rtx src = SET_SRC (PATTERN (next_insn));
2609 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2611 /* Following branch not taken */
2612 operands[9] = gen_label_rtx ();
2613 emit_label_after (operands[9], next_insn);
2614 INSN_ADDRESSES_NEW (operands[9],
2615 INSN_ADDRESSES (INSN_UID (next_insn))
2616 + get_attr_length (next_insn));
2617 return templ;
2619 else
2621 int offset = (branch_dest (next_insn)
2622 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2623 if (offset >= -252 && offset <= 258)
2625 if (GET_CODE (src) == IF_THEN_ELSE)
2626 /* branch_true */
2627 src = XEXP (src, 1);
2628 operands[9] = src;
2629 return templ;
2633 operands[9] = gen_label_rtx ();
2634 emit_label_after (operands[9], insn);
2635 INSN_ADDRESSES_NEW (operands[9],
2636 INSN_ADDRESSES (INSN_UID (insn))
2637 + get_attr_length (insn));
2638 return templ;
2641 const char *
2642 output_ieee_ccmpeq (rtx insn, rtx *operands)
2644 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2645 insn, operands);
2648 /* Output the start of the assembler file. */
2650 static void
2651 sh_file_start (void)
2653 default_file_start ();
2655 if (TARGET_ELF)
2656 /* We need to show the text section with the proper
2657 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2658 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2659 will complain. We can teach GAS specifically about the
2660 default attributes for our choice of text section, but
2661 then we would have to change GAS again if/when we change
2662 the text section name. */
2663 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2664 else
2665 /* Switch to the data section so that the coffsem symbol
2666 isn't in the text section. */
2667 switch_to_section (data_section);
2669 if (TARGET_LITTLE_ENDIAN)
2670 fputs ("\t.little\n", asm_out_file);
2672 if (!TARGET_ELF)
2674 if (TARGET_SHCOMPACT)
2675 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2676 else if (TARGET_SHMEDIA)
2677 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2678 TARGET_SHMEDIA64 ? 64 : 32);
2682 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2684 static bool
2685 unspec_caller_rtx_p (rtx pat)
2687 rtx base, offset;
2688 int i;
2690 split_const (pat, &base, &offset);
2691 if (GET_CODE (base) == UNSPEC)
2693 if (XINT (base, 1) == UNSPEC_CALLER)
2694 return true;
2695 for (i = 0; i < XVECLEN (base, 0); i++)
2696 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2697 return true;
2699 return false;
2702 /* Indicate that INSN cannot be duplicated. This is true for insn
2703 that generates a unique label. */
2705 static bool
2706 sh_cannot_copy_insn_p (rtx insn)
2708 rtx pat;
2710 if (!reload_completed || !flag_pic)
2711 return false;
2713 if (!NONJUMP_INSN_P (insn))
2714 return false;
2715 if (asm_noperands (insn) >= 0)
2716 return false;
2718 pat = PATTERN (insn);
2719 if (GET_CODE (pat) != SET)
2720 return false;
2721 pat = SET_SRC (pat);
2723 if (unspec_caller_rtx_p (pat))
2724 return true;
2726 return false;
2729 /* Actual number of instructions used to make a shift by N. */
2730 static const char ashiftrt_insns[] =
2731 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2733 /* Left shift and logical right shift are the same. */
2734 static const char shift_insns[] =
2735 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2737 /* Individual shift amounts needed to get the above length sequences.
2738 One bit right shifts clobber the T bit, so when possible, put one bit
2739 shifts in the middle of the sequence, so the ends are eligible for
2740 branch delay slots. */
2741 static const short shift_amounts[32][5] = {
2742 {0}, {1}, {2}, {2, 1},
2743 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2744 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2745 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2746 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2747 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2748 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2749 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2751 /* Likewise, but for shift amounts < 16, up to three highmost bits
2752 might be clobbered. This is typically used when combined with some
2753 kind of sign or zero extension. */
2755 static const char ext_shift_insns[] =
2756 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2758 static const short ext_shift_amounts[32][4] = {
2759 {0}, {1}, {2}, {2, 1},
2760 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2761 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2762 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2763 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2764 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2765 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2766 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2768 /* Assuming we have a value that has been sign-extended by at least one bit,
2769 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2770 to shift it by N without data loss, and quicker than by other means? */
2771 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2773 /* This is used in length attributes in sh.md to help compute the length
2774 of arbitrary constant shift instructions. */
2777 shift_insns_rtx (rtx insn)
2779 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2780 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2781 enum rtx_code shift_code = GET_CODE (set_src);
2783 switch (shift_code)
2785 case ASHIFTRT:
2786 return ashiftrt_insns[shift_count];
2787 case LSHIFTRT:
2788 case ASHIFT:
2789 return shift_insns[shift_count];
2790 default:
2791 gcc_unreachable ();
2795 /* Return the cost of a shift. */
2797 static inline int
2798 shiftcosts (rtx x)
2800 int value;
2802 if (TARGET_SHMEDIA)
2803 return 1;
2805 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2807 if (GET_MODE (x) == DImode
2808 && CONST_INT_P (XEXP (x, 1))
2809 && INTVAL (XEXP (x, 1)) == 1)
2810 return 2;
2812 /* Everything else is invalid, because there is no pattern for it. */
2813 return MAX_COST;
2815 /* If shift by a non constant, then this will be expensive. */
2816 if (!CONST_INT_P (XEXP (x, 1)))
2817 return SH_DYNAMIC_SHIFT_COST;
2819 /* Otherwise, return the true cost in instructions. Cope with out of range
2820 shift counts more or less arbitrarily. */
2821 value = INTVAL (XEXP (x, 1)) & 31;
2823 if (GET_CODE (x) == ASHIFTRT)
2825 int cost = ashiftrt_insns[value];
2826 /* If SH3, then we put the constant in a reg and use shad. */
2827 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2828 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2829 return cost;
2831 else
2832 return shift_insns[value];
2835 /* Return the cost of an AND/XOR/IOR operation. */
2837 static inline int
2838 and_xor_ior_costs (rtx x, int code)
2840 int i;
2842 /* A logical operation with two registers is a single cycle
2843 instruction. */
2844 if (!CONST_INT_P (XEXP (x, 1)))
2845 return 1;
2847 i = INTVAL (XEXP (x, 1));
2849 if (TARGET_SHMEDIA)
2851 if (satisfies_constraint_I10 (XEXP (x, 1))
2852 || satisfies_constraint_J16 (XEXP (x, 1)))
2853 return 1;
2854 else
2855 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
2858 /* These constants are single cycle extu.[bw] instructions. */
2859 if ((i == 0xff || i == 0xffff) && code == AND)
2860 return 1;
2861 /* Constants that can be used in an instruction as an immediate are
2862 a single cycle, but this requires r0, so make it a little more
2863 expensive. */
2864 if (CONST_OK_FOR_K08 (i))
2865 return 2;
2866 /* Constants that can be loaded with a mov immediate need one more cycle.
2867 This case is probably unnecessary. */
2868 if (CONST_OK_FOR_I08 (i))
2869 return 2;
2870 /* Any other constant requires an additional 2 cycle pc-relative load.
2871 This case is probably unnecessary. */
2872 return 3;
2875 /* Return the cost of an addition or a subtraction. */
2877 static inline int
2878 addsubcosts (rtx x)
2880 /* Adding a register is a single cycle insn. */
2881 if (REG_P (XEXP (x, 1))
2882 || GET_CODE (XEXP (x, 1)) == SUBREG)
2883 return 1;
2885 /* Likewise for small constants. */
2886 if (CONST_INT_P (XEXP (x, 1))
2887 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2888 return 1;
2890 if (TARGET_SHMEDIA)
2891 switch (GET_CODE (XEXP (x, 1)))
2893 case CONST:
2894 case LABEL_REF:
2895 case SYMBOL_REF:
2896 return TARGET_SHMEDIA64 ? 5 : 3;
2898 case CONST_INT:
2899 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2900 return 2;
2901 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2902 return 3;
2903 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2904 return 4;
2906 /* Fall through. */
2907 default:
2908 return 5;
2911 /* Any other constant requires a 2 cycle pc-relative load plus an
2912 addition. */
2913 return 3;
2916 /* Return the cost of a multiply. */
2917 static inline int
2918 multcosts (rtx x ATTRIBUTE_UNUSED)
2920 if (sh_multcost >= 0)
2921 return sh_multcost;
2922 if (TARGET_SHMEDIA)
2923 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2924 accept constants. Ideally, we would use a cost of one or two and
2925 add the cost of the operand, but disregard the latter when inside loops
2926 and loop invariant code motion is still to follow.
2927 Using a multiply first and splitting it later if it's a loss
2928 doesn't work because of different sign / zero extension semantics
2929 of multiplies vs. shifts. */
2930 return optimize_size ? 2 : 3;
2932 if (TARGET_SH2)
2934 /* We have a mul insn, so we can never take more than the mul and the
2935 read of the mac reg, but count more because of the latency and extra
2936 reg usage. */
2937 if (optimize_size)
2938 return 2;
2939 return 3;
2942 /* If we're aiming at small code, then just count the number of
2943 insns in a multiply call sequence. */
2944 if (optimize_size)
2945 return 5;
2947 /* Otherwise count all the insns in the routine we'd be calling too. */
2948 return 20;
2951 /* Compute a (partial) cost for rtx X. Return true if the complete
2952 cost has been computed, and false if subexpressions should be
2953 scanned. In either case, *TOTAL contains the cost result. */
2955 static bool
2956 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2957 int *total, bool speed ATTRIBUTE_UNUSED)
2959 switch (code)
2961 case CONST_INT:
2962 if (TARGET_SHMEDIA)
2964 if (INTVAL (x) == 0)
2965 *total = 0;
2966 else if (outer_code == AND && and_operand ((x), DImode))
2967 *total = 0;
2968 else if ((outer_code == IOR || outer_code == XOR
2969 || outer_code == PLUS)
2970 && CONST_OK_FOR_I10 (INTVAL (x)))
2971 *total = 0;
2972 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2973 *total = COSTS_N_INSNS (outer_code != SET);
2974 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2975 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2976 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2977 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2978 else
2979 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2980 return true;
2982 if (CONST_OK_FOR_I08 (INTVAL (x)))
2983 *total = 0;
2984 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2985 && CONST_OK_FOR_K08 (INTVAL (x)))
2986 *total = 1;
2987 /* prepare_cmp_insn will force costly constants int registers before
2988 the cbranch[sd]i4 patterns can see them, so preserve potentially
2989 interesting ones not covered by I08 above. */
2990 else if (outer_code == COMPARE
2991 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2992 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2993 || INTVAL (x) == 0x7fffffff
2994 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2995 *total = 1;
2996 else
2997 *total = 8;
2998 return true;
3000 case EQ:
3001 /* An and with a constant compared against zero is
3002 most likely going to be a TST #imm, R0 instruction.
3003 Notice that this does not catch the zero_extract variants from
3004 the md file. */
3005 if (GET_CODE (XEXP (x, 0)) == AND
3006 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3008 *total = 1;
3009 return true;
3011 else
3012 return false;
3014 case CONST:
3015 case LABEL_REF:
3016 case SYMBOL_REF:
3017 if (TARGET_SHMEDIA64)
3018 *total = COSTS_N_INSNS (4);
3019 else if (TARGET_SHMEDIA32)
3020 *total = COSTS_N_INSNS (2);
3021 else
3022 *total = 5;
3023 return true;
3025 case CONST_DOUBLE:
3026 if (TARGET_SHMEDIA)
3027 *total = COSTS_N_INSNS (4);
3028 /* prepare_cmp_insn will force costly constants int registers before
3029 the cbranchdi4 pattern can see them, so preserve potentially
3030 interesting ones. */
3031 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3032 *total = 1;
3033 else
3034 *total = 10;
3035 return true;
3036 case CONST_VECTOR:
3037 if (x == CONST0_RTX (GET_MODE (x)))
3038 *total = 0;
3039 else if (sh_1el_vec (x, VOIDmode))
3040 *total = outer_code != SET;
3041 if (sh_rep_vec (x, VOIDmode))
3042 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3043 + (outer_code != SET));
3044 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3045 return true;
3047 case PLUS:
3048 case MINUS:
3049 *total = COSTS_N_INSNS (addsubcosts (x));
3050 return true;
3052 case AND:
3053 case XOR:
3054 case IOR:
3055 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3056 return true;
3058 case MULT:
3059 *total = COSTS_N_INSNS (multcosts (x));
3060 return true;
3062 case ASHIFT:
3063 case ASHIFTRT:
3064 case LSHIFTRT:
3065 *total = COSTS_N_INSNS (shiftcosts (x));
3066 return true;
3068 case DIV:
3069 case UDIV:
3070 case MOD:
3071 case UMOD:
3072 *total = COSTS_N_INSNS (20);
3073 return true;
3075 case PARALLEL:
3076 if (sh_1el_vec (x, VOIDmode))
3077 *total = outer_code != SET;
3078 if (sh_rep_vec (x, VOIDmode))
3079 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3080 + (outer_code != SET));
3081 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3082 return true;
3084 case FLOAT:
3085 case FIX:
3086 *total = 100;
3087 return true;
3089 default:
3090 return false;
3094 /* Compute the cost of an address. For the SH, all valid addresses are
3095 the same cost. Use a slightly higher cost for reg + reg addressing,
3096 since it increases pressure on r0. */
3098 static int
3099 sh_address_cost (rtx X,
3100 bool speed ATTRIBUTE_UNUSED)
3102 return (GET_CODE (X) == PLUS
3103 && ! CONSTANT_P (XEXP (X, 1))
3104 && ! TARGET_SHMEDIA ? 1 : 0);
3107 /* Code to expand a shift. */
3109 void
3110 gen_ashift (int type, int n, rtx reg)
3112 /* Negative values here come from the shift_amounts array. */
3113 if (n < 0)
3115 if (type == ASHIFT)
3116 type = LSHIFTRT;
3117 else
3118 type = ASHIFT;
3119 n = -n;
3122 switch (type)
3124 case ASHIFTRT:
3125 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3126 break;
3127 case LSHIFTRT:
3128 if (n == 1)
3129 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3130 else
3131 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3132 break;
3133 case ASHIFT:
3134 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3135 break;
3139 /* Same for HImode */
3141 void
3142 gen_ashift_hi (int type, int n, rtx reg)
3144 /* Negative values here come from the shift_amounts array. */
3145 if (n < 0)
3147 if (type == ASHIFT)
3148 type = LSHIFTRT;
3149 else
3150 type = ASHIFT;
3151 n = -n;
3154 switch (type)
3156 case ASHIFTRT:
3157 case LSHIFTRT:
3158 /* We don't have HImode right shift operations because using the
3159 ordinary 32 bit shift instructions for that doesn't generate proper
3160 zero/sign extension.
3161 gen_ashift_hi is only called in contexts where we know that the
3162 sign extension works out correctly. */
3164 int offset = 0;
3165 if (GET_CODE (reg) == SUBREG)
3167 offset = SUBREG_BYTE (reg);
3168 reg = SUBREG_REG (reg);
3170 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3171 break;
3173 case ASHIFT:
3174 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3175 break;
3179 /* Output RTL to split a constant shift into its component SH constant
3180 shift instructions. */
3182 void
3183 gen_shifty_op (int code, rtx *operands)
3185 int value = INTVAL (operands[2]);
3186 int max, i;
3188 /* Truncate the shift count in case it is out of bounds. */
3189 value = value & 31;
3191 if (value == 31)
3193 if (code == LSHIFTRT)
3195 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3196 emit_insn (gen_movt (operands[0]));
3197 return;
3199 else if (code == ASHIFT)
3201 /* There is a two instruction sequence for 31 bit left shifts,
3202 but it requires r0. */
3203 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3205 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3206 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3207 return;
3211 else if (value == 0)
3213 /* This can happen even when optimizing, if there were subregs before
3214 reload. Don't output a nop here, as this is never optimized away;
3215 use a no-op move instead. */
3216 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3217 return;
3220 max = shift_insns[value];
3221 for (i = 0; i < max; i++)
3222 gen_ashift (code, shift_amounts[value][i], operands[0]);
3225 /* Same as above, but optimized for values where the topmost bits don't
3226 matter. */
3228 void
3229 gen_shifty_hi_op (int code, rtx *operands)
3231 int value = INTVAL (operands[2]);
3232 int max, i;
3233 void (*gen_fun) (int, int, rtx);
3235 /* This operation is used by and_shl for SImode values with a few
3236 high bits known to be cleared. */
3237 value &= 31;
3238 if (value == 0)
3240 emit_insn (gen_nop ());
3241 return;
3244 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3245 if (code == ASHIFT)
3247 max = ext_shift_insns[value];
3248 for (i = 0; i < max; i++)
3249 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3251 else
3252 /* When shifting right, emit the shifts in reverse order, so that
3253 solitary negative values come first. */
3254 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3255 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3258 /* Output RTL for an arithmetic right shift. */
3260 /* ??? Rewrite to use super-optimizer sequences. */
3263 expand_ashiftrt (rtx *operands)
3265 rtx wrk;
3266 char func[18];
3267 int value;
3269 if (TARGET_SH3 || TARGET_SH2A)
3271 if (!CONST_INT_P (operands[2]))
3273 rtx count = copy_to_mode_reg (SImode, operands[2]);
3274 emit_insn (gen_negsi2 (count, count));
3275 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3276 return 1;
3278 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3279 > 1 + SH_DYNAMIC_SHIFT_COST)
3281 rtx count
3282 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3283 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3284 return 1;
3287 if (!CONST_INT_P (operands[2]))
3288 return 0;
3290 value = INTVAL (operands[2]) & 31;
3292 if (value == 31)
3294 /* If we are called from abs expansion, arrange things so that we
3295 we can use a single MT instruction that doesn't clobber the source,
3296 if LICM can hoist out the load of the constant zero. */
3297 if (currently_expanding_to_rtl)
3299 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3300 operands[1]));
3301 emit_insn (gen_mov_neg_si_t (operands[0]));
3302 return 1;
3304 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3305 return 1;
3307 else if (value >= 16 && value <= 19)
3309 wrk = gen_reg_rtx (SImode);
3310 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3311 value -= 16;
3312 while (value--)
3313 gen_ashift (ASHIFTRT, 1, wrk);
3314 emit_move_insn (operands[0], wrk);
3315 return 1;
3317 /* Expand a short sequence inline, longer call a magic routine. */
3318 else if (value <= 5)
3320 wrk = gen_reg_rtx (SImode);
3321 emit_move_insn (wrk, operands[1]);
3322 while (value--)
3323 gen_ashift (ASHIFTRT, 1, wrk);
3324 emit_move_insn (operands[0], wrk);
3325 return 1;
3328 wrk = gen_reg_rtx (Pmode);
3330 /* Load the value into an arg reg and call a helper. */
3331 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3332 sprintf (func, "__ashiftrt_r4_%d", value);
3333 function_symbol (wrk, func, SFUNC_STATIC);
3334 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3335 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3336 return 1;
3340 sh_dynamicalize_shift_p (rtx count)
3342 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3345 /* Try to find a good way to implement the combiner pattern
3346 [(set (match_operand:SI 0 "register_operand" "r")
3347 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3348 (match_operand:SI 2 "const_int_operand" "n"))
3349 (match_operand:SI 3 "const_int_operand" "n"))) .
3350 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3351 return 0 for simple right / left or left/right shift combination.
3352 return 1 for a combination of shifts with zero_extend.
3353 return 2 for a combination of shifts with an AND that needs r0.
3354 return 3 for a combination of shifts with an AND that needs an extra
3355 scratch register, when the three highmost bits of the AND mask are clear.
3356 return 4 for a combination of shifts with an AND that needs an extra
3357 scratch register, when any of the three highmost bits of the AND mask
3358 is set.
3359 If ATTRP is set, store an initial right shift width in ATTRP[0],
3360 and the instruction length in ATTRP[1] . These values are not valid
3361 when returning 0.
3362 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3363 shift_amounts for the last shift value that is to be used before the
3364 sign extend. */
3366 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3368 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3369 int left = INTVAL (left_rtx), right;
3370 int best = 0;
3371 int cost, best_cost = 10000;
3372 int best_right = 0, best_len = 0;
3373 int i;
3374 int can_ext;
3376 if (left < 0 || left > 31)
3377 return 0;
3378 if (CONST_INT_P (mask_rtx))
3379 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3380 else
3381 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3382 /* Can this be expressed as a right shift / left shift pair? */
3383 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3384 right = exact_log2 (lsb);
3385 mask2 = ~(mask + lsb - 1);
3386 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3387 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3388 if (! mask2)
3389 best_cost = shift_insns[right] + shift_insns[right + left];
3390 /* mask has no trailing zeroes <==> ! right */
3391 else if (! right && mask2 == ~(lsb2 - 1))
3393 int late_right = exact_log2 (lsb2);
3394 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3396 /* Try to use zero extend. */
3397 if (mask2 == ~(lsb2 - 1))
3399 int width, first;
3401 for (width = 8; width <= 16; width += 8)
3403 /* Can we zero-extend right away? */
3404 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3406 cost
3407 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3408 if (cost < best_cost)
3410 best = 1;
3411 best_cost = cost;
3412 best_right = right;
3413 best_len = cost;
3414 if (attrp)
3415 attrp[2] = -1;
3417 continue;
3419 /* ??? Could try to put zero extend into initial right shift,
3420 or even shift a bit left before the right shift. */
3421 /* Determine value of first part of left shift, to get to the
3422 zero extend cut-off point. */
3423 first = width - exact_log2 (lsb2) + right;
3424 if (first >= 0 && right + left - first >= 0)
3426 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3427 + ext_shift_insns[right + left - first];
3428 if (cost < best_cost)
3430 best = 1;
3431 best_cost = cost;
3432 best_right = right;
3433 best_len = cost;
3434 if (attrp)
3435 attrp[2] = first;
3440 /* Try to use r0 AND pattern */
3441 for (i = 0; i <= 2; i++)
3443 if (i > right)
3444 break;
3445 if (! CONST_OK_FOR_K08 (mask >> i))
3446 continue;
3447 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3448 if (cost < best_cost)
3450 best = 2;
3451 best_cost = cost;
3452 best_right = i;
3453 best_len = cost - 1;
3456 /* Try to use a scratch register to hold the AND operand. */
3457 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3458 for (i = 0; i <= 2; i++)
3460 if (i > right)
3461 break;
3462 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3463 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3464 if (cost < best_cost)
3466 best = 4 - can_ext;
3467 best_cost = cost;
3468 best_right = i;
3469 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3473 if (attrp)
3475 attrp[0] = best_right;
3476 attrp[1] = best_len;
3478 return best;
3481 /* This is used in length attributes of the unnamed instructions
3482 corresponding to shl_and_kind return values of 1 and 2. */
3484 shl_and_length (rtx insn)
3486 rtx set_src, left_rtx, mask_rtx;
3487 int attributes[3];
3489 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3490 left_rtx = XEXP (XEXP (set_src, 0), 1);
3491 mask_rtx = XEXP (set_src, 1);
3492 shl_and_kind (left_rtx, mask_rtx, attributes);
3493 return attributes[1];
3496 /* This is used in length attribute of the and_shl_scratch instruction. */
3499 shl_and_scr_length (rtx insn)
3501 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3502 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3503 rtx op = XEXP (set_src, 0);
3504 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3505 op = XEXP (XEXP (op, 0), 0);
3506 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3509 /* Generate rtl for instructions for which shl_and_kind advised a particular
3510 method of generating them, i.e. returned zero. */
3513 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3515 int attributes[3];
3516 unsigned HOST_WIDE_INT mask;
3517 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3518 int right, total_shift;
3519 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3521 right = attributes[0];
3522 total_shift = INTVAL (left_rtx) + right;
3523 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3524 switch (kind)
3526 default:
3527 return -1;
3528 case 1:
3530 int first = attributes[2];
3531 rtx operands[3];
3533 if (first < 0)
3535 emit_insn ((mask << right) <= 0xff
3536 ? gen_zero_extendqisi2 (dest,
3537 gen_lowpart (QImode, source))
3538 : gen_zero_extendhisi2 (dest,
3539 gen_lowpart (HImode, source)));
3540 source = dest;
3542 if (source != dest)
3543 emit_insn (gen_movsi (dest, source));
3544 operands[0] = dest;
3545 if (right)
3547 operands[2] = GEN_INT (right);
3548 gen_shifty_hi_op (LSHIFTRT, operands);
3550 if (first > 0)
3552 operands[2] = GEN_INT (first);
3553 gen_shifty_hi_op (ASHIFT, operands);
3554 total_shift -= first;
3555 mask <<= first;
3557 if (first >= 0)
3558 emit_insn (mask <= 0xff
3559 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3560 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3561 if (total_shift > 0)
3563 operands[2] = GEN_INT (total_shift);
3564 gen_shifty_hi_op (ASHIFT, operands);
3566 break;
3568 case 4:
3569 shift_gen_fun = gen_shifty_op;
3570 case 3:
3571 /* If the topmost bit that matters is set, set the topmost bits
3572 that don't matter. This way, we might be able to get a shorter
3573 signed constant. */
3574 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3575 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3576 case 2:
3577 /* Don't expand fine-grained when combining, because that will
3578 make the pattern fail. */
3579 if (currently_expanding_to_rtl
3580 || reload_in_progress || reload_completed)
3582 rtx operands[3];
3584 /* Cases 3 and 4 should be handled by this split
3585 only while combining */
3586 gcc_assert (kind <= 2);
3587 if (right)
3589 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3590 source = dest;
3592 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3593 if (total_shift)
3595 operands[0] = dest;
3596 operands[1] = dest;
3597 operands[2] = GEN_INT (total_shift);
3598 shift_gen_fun (ASHIFT, operands);
3600 break;
3602 else
3604 int neg = 0;
3605 if (kind != 4 && total_shift < 16)
3607 neg = -ext_shift_amounts[total_shift][1];
3608 if (neg > 0)
3609 neg -= ext_shift_amounts[total_shift][2];
3610 else
3611 neg = 0;
3613 emit_insn (gen_and_shl_scratch (dest, source,
3614 GEN_INT (right),
3615 GEN_INT (mask),
3616 GEN_INT (total_shift + neg),
3617 GEN_INT (neg)));
3618 emit_insn (gen_movsi (dest, dest));
3619 break;
3622 return 0;
3625 /* Try to find a good way to implement the combiner pattern
3626 [(set (match_operand:SI 0 "register_operand" "=r")
3627 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3628 (match_operand:SI 2 "const_int_operand" "n")
3629 (match_operand:SI 3 "const_int_operand" "n")
3630 (const_int 0)))
3631 (clobber (reg:SI T_REG))]
3632 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3633 return 0 for simple left / right shift combination.
3634 return 1 for left shift / 8 bit sign extend / left shift.
3635 return 2 for left shift / 16 bit sign extend / left shift.
3636 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3637 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3638 return 5 for left shift / 16 bit sign extend / right shift
3639 return 6 for < 8 bit sign extend / left shift.
3640 return 7 for < 8 bit sign extend / left shift / single right shift.
3641 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3644 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3646 int left, size, insize, ext;
3647 int cost = 0, best_cost;
3648 int kind;
3650 left = INTVAL (left_rtx);
3651 size = INTVAL (size_rtx);
3652 insize = size - left;
3653 gcc_assert (insize > 0);
3654 /* Default to left / right shift. */
3655 kind = 0;
3656 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3657 if (size <= 16)
3659 /* 16 bit shift / sign extend / 16 bit shift */
3660 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3661 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3662 below, by alternative 3 or something even better. */
3663 if (cost < best_cost)
3665 kind = 5;
3666 best_cost = cost;
3669 /* Try a plain sign extend between two shifts. */
3670 for (ext = 16; ext >= insize; ext -= 8)
3672 if (ext <= size)
3674 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3675 if (cost < best_cost)
3677 kind = ext / (unsigned) 8;
3678 best_cost = cost;
3681 /* Check if we can do a sloppy shift with a final signed shift
3682 restoring the sign. */
3683 if (EXT_SHIFT_SIGNED (size - ext))
3684 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3685 /* If not, maybe it's still cheaper to do the second shift sloppy,
3686 and do a final sign extend? */
3687 else if (size <= 16)
3688 cost = ext_shift_insns[ext - insize] + 1
3689 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3690 else
3691 continue;
3692 if (cost < best_cost)
3694 kind = ext / (unsigned) 8 + 2;
3695 best_cost = cost;
3698 /* Check if we can sign extend in r0 */
3699 if (insize < 8)
3701 cost = 3 + shift_insns[left];
3702 if (cost < best_cost)
3704 kind = 6;
3705 best_cost = cost;
3707 /* Try the same with a final signed shift. */
3708 if (left < 31)
3710 cost = 3 + ext_shift_insns[left + 1] + 1;
3711 if (cost < best_cost)
3713 kind = 7;
3714 best_cost = cost;
3718 if (TARGET_SH3 || TARGET_SH2A)
3720 /* Try to use a dynamic shift. */
3721 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3722 if (cost < best_cost)
3724 kind = 0;
3725 best_cost = cost;
3728 if (costp)
3729 *costp = cost;
3730 return kind;
3733 /* Function to be used in the length attribute of the instructions
3734 implementing this pattern. */
3737 shl_sext_length (rtx insn)
3739 rtx set_src, left_rtx, size_rtx;
3740 int cost;
3742 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3743 left_rtx = XEXP (XEXP (set_src, 0), 1);
3744 size_rtx = XEXP (set_src, 1);
3745 shl_sext_kind (left_rtx, size_rtx, &cost);
3746 return cost;
3749 /* Generate rtl for this pattern */
3752 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3754 int kind;
3755 int left, size, insize, cost;
3756 rtx operands[3];
3758 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3759 left = INTVAL (left_rtx);
3760 size = INTVAL (size_rtx);
3761 insize = size - left;
3762 switch (kind)
3764 case 1:
3765 case 2:
3766 case 3:
3767 case 4:
3769 int ext = kind & 1 ? 8 : 16;
3770 int shift2 = size - ext;
3772 /* Don't expand fine-grained when combining, because that will
3773 make the pattern fail. */
3774 if (! currently_expanding_to_rtl
3775 && ! reload_in_progress && ! reload_completed)
3777 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3778 emit_insn (gen_movsi (dest, source));
3779 break;
3781 if (dest != source)
3782 emit_insn (gen_movsi (dest, source));
3783 operands[0] = dest;
3784 if (ext - insize)
3786 operands[2] = GEN_INT (ext - insize);
3787 gen_shifty_hi_op (ASHIFT, operands);
3789 emit_insn (kind & 1
3790 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3791 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3792 if (kind <= 2)
3794 if (shift2)
3796 operands[2] = GEN_INT (shift2);
3797 gen_shifty_op (ASHIFT, operands);
3800 else
3802 if (shift2 > 0)
3804 if (EXT_SHIFT_SIGNED (shift2))
3806 operands[2] = GEN_INT (shift2 + 1);
3807 gen_shifty_op (ASHIFT, operands);
3808 operands[2] = const1_rtx;
3809 gen_shifty_op (ASHIFTRT, operands);
3810 break;
3812 operands[2] = GEN_INT (shift2);
3813 gen_shifty_hi_op (ASHIFT, operands);
3815 else if (shift2)
3817 operands[2] = GEN_INT (-shift2);
3818 gen_shifty_hi_op (LSHIFTRT, operands);
3820 emit_insn (size <= 8
3821 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3822 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3824 break;
3826 case 5:
3828 int i = 16 - size;
3829 if (! currently_expanding_to_rtl
3830 && ! reload_in_progress && ! reload_completed)
3831 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3832 else
3834 operands[0] = dest;
3835 operands[2] = GEN_INT (16 - insize);
3836 gen_shifty_hi_op (ASHIFT, operands);
3837 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3839 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3840 while (--i >= 0)
3841 gen_ashift (ASHIFTRT, 1, dest);
3842 break;
3844 case 6:
3845 case 7:
3846 /* Don't expand fine-grained when combining, because that will
3847 make the pattern fail. */
3848 if (! currently_expanding_to_rtl
3849 && ! reload_in_progress && ! reload_completed)
3851 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3852 emit_insn (gen_movsi (dest, source));
3853 break;
3855 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3856 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3857 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3858 operands[0] = dest;
3859 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3860 gen_shifty_op (ASHIFT, operands);
3861 if (kind == 7)
3862 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3863 break;
3864 default:
3865 return -1;
3867 return 0;
3870 /* Prefix a symbol_ref name with "datalabel". */
3873 gen_datalabel_ref (rtx sym)
3875 const char *str;
3877 if (GET_CODE (sym) == LABEL_REF)
3878 return gen_rtx_CONST (GET_MODE (sym),
3879 gen_rtx_UNSPEC (GET_MODE (sym),
3880 gen_rtvec (1, sym),
3881 UNSPEC_DATALABEL));
3883 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3885 str = XSTR (sym, 0);
3886 /* Share all SYMBOL_REF strings with the same value - that is important
3887 for cse. */
3888 str = IDENTIFIER_POINTER (get_identifier (str));
3889 XSTR (sym, 0) = str;
3891 return sym;
3895 static alloc_pool label_ref_list_pool;
3897 typedef struct label_ref_list_d
3899 rtx label;
3900 struct label_ref_list_d *next;
3901 } *label_ref_list_t;
3903 /* The SH cannot load a large constant into a register, constants have to
3904 come from a pc relative load. The reference of a pc relative load
3905 instruction must be less than 1k in front of the instruction. This
3906 means that we often have to dump a constant inside a function, and
3907 generate code to branch around it.
3909 It is important to minimize this, since the branches will slow things
3910 down and make things bigger.
3912 Worst case code looks like:
3914 mov.l L1,rn
3915 bra L2
3917 align
3918 L1: .long value
3922 mov.l L3,rn
3923 bra L4
3925 align
3926 L3: .long value
3930 We fix this by performing a scan before scheduling, which notices which
3931 instructions need to have their operands fetched from the constant table
3932 and builds the table.
3934 The algorithm is:
3936 scan, find an instruction which needs a pcrel move. Look forward, find the
3937 last barrier which is within MAX_COUNT bytes of the requirement.
3938 If there isn't one, make one. Process all the instructions between
3939 the find and the barrier.
3941 In the above example, we can tell that L3 is within 1k of L1, so
3942 the first move can be shrunk from the 3 insn+constant sequence into
3943 just 1 insn, and the constant moved to L3 to make:
3945 mov.l L1,rn
3947 mov.l L3,rn
3948 bra L4
3950 align
3951 L3:.long value
3952 L4:.long value
3954 Then the second move becomes the target for the shortening process. */
3956 typedef struct
3958 rtx value; /* Value in table. */
3959 rtx label; /* Label of value. */
3960 label_ref_list_t wend; /* End of window. */
3961 enum machine_mode mode; /* Mode of value. */
3963 /* True if this constant is accessed as part of a post-increment
3964 sequence. Note that HImode constants are never accessed in this way. */
3965 bool part_of_sequence_p;
3966 } pool_node;
3968 /* The maximum number of constants that can fit into one pool, since
3969 constants in the range 0..510 are at least 2 bytes long, and in the
3970 range from there to 1018 at least 4 bytes. */
3972 #define MAX_POOL_SIZE 372
3973 static pool_node pool_vector[MAX_POOL_SIZE];
3974 static int pool_size;
3975 static rtx pool_window_label;
3976 static int pool_window_last;
3978 static int max_labelno_before_reorg;
3980 /* ??? If we need a constant in HImode which is the truncated value of a
3981 constant we need in SImode, we could combine the two entries thus saving
3982 two bytes. Is this common enough to be worth the effort of implementing
3983 it? */
3985 /* ??? This stuff should be done at the same time that we shorten branches.
3986 As it is now, we must assume that all branches are the maximum size, and
3987 this causes us to almost always output constant pools sooner than
3988 necessary. */
3990 /* Add a constant to the pool and return its label. */
3992 static rtx
3993 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3995 int i;
3996 rtx lab, new_rtx;
3997 label_ref_list_t ref, newref;
3999 /* First see if we've already got it. */
4000 for (i = 0; i < pool_size; i++)
4002 if (x->code == pool_vector[i].value->code
4003 && mode == pool_vector[i].mode)
4005 if (x->code == CODE_LABEL)
4007 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4008 continue;
4010 if (rtx_equal_p (x, pool_vector[i].value))
4012 lab = new_rtx = 0;
4013 if (! last_value
4014 || ! i
4015 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4017 new_rtx = gen_label_rtx ();
4018 LABEL_REFS (new_rtx) = pool_vector[i].label;
4019 pool_vector[i].label = lab = new_rtx;
4021 if (lab && pool_window_label)
4023 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4024 newref->label = pool_window_label;
4025 ref = pool_vector[pool_window_last].wend;
4026 newref->next = ref;
4027 pool_vector[pool_window_last].wend = newref;
4029 if (new_rtx)
4030 pool_window_label = new_rtx;
4031 pool_window_last = i;
4032 return lab;
4037 /* Need a new one. */
4038 pool_vector[pool_size].value = x;
4039 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4041 lab = 0;
4042 pool_vector[pool_size - 1].part_of_sequence_p = true;
4044 else
4045 lab = gen_label_rtx ();
4046 pool_vector[pool_size].mode = mode;
4047 pool_vector[pool_size].label = lab;
4048 pool_vector[pool_size].wend = NULL;
4049 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4050 if (lab && pool_window_label)
4052 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4053 newref->label = pool_window_label;
4054 ref = pool_vector[pool_window_last].wend;
4055 newref->next = ref;
4056 pool_vector[pool_window_last].wend = newref;
4058 if (lab)
4059 pool_window_label = lab;
4060 pool_window_last = pool_size;
4061 pool_size++;
4062 return lab;
4065 /* Output the literal table. START, if nonzero, is the first instruction
4066 this table is needed for, and also indicates that there is at least one
4067 casesi_worker_2 instruction; We have to emit the operand3 labels from
4068 these insns at a 4-byte aligned position. BARRIER is the barrier
4069 after which we are to place the table. */
4071 static void
4072 dump_table (rtx start, rtx barrier)
4074 rtx scan = barrier;
4075 int i;
4076 int need_align = 1;
4077 rtx lab;
4078 label_ref_list_t ref;
4079 int have_df = 0;
4081 /* Do two passes, first time dump out the HI sized constants. */
4083 for (i = 0; i < pool_size; i++)
4085 pool_node *p = &pool_vector[i];
4087 if (p->mode == HImode)
4089 if (need_align)
4091 scan = emit_insn_after (gen_align_2 (), scan);
4092 need_align = 0;
4094 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4095 scan = emit_label_after (lab, scan);
4096 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4097 scan);
4098 for (ref = p->wend; ref; ref = ref->next)
4100 lab = ref->label;
4101 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4104 else if (p->mode == DFmode)
4105 have_df = 1;
4108 need_align = 1;
4110 if (start)
4112 scan = emit_insn_after (gen_align_4 (), scan);
4113 need_align = 0;
4114 for (; start != barrier; start = NEXT_INSN (start))
4115 if (NONJUMP_INSN_P (start)
4116 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4118 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4119 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4121 scan = emit_label_after (lab, scan);
4124 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4126 rtx align_insn = NULL_RTX;
4128 scan = emit_label_after (gen_label_rtx (), scan);
4129 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4130 need_align = 0;
4132 for (i = 0; i < pool_size; i++)
4134 pool_node *p = &pool_vector[i];
4136 switch (p->mode)
4138 case HImode:
4139 break;
4140 case SImode:
4141 case SFmode:
4142 if (align_insn && !p->part_of_sequence_p)
4144 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4145 emit_label_before (lab, align_insn);
4146 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4147 align_insn);
4148 for (ref = p->wend; ref; ref = ref->next)
4150 lab = ref->label;
4151 emit_insn_before (gen_consttable_window_end (lab),
4152 align_insn);
4154 delete_insn (align_insn);
4155 align_insn = NULL_RTX;
4156 continue;
4158 else
4160 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4161 scan = emit_label_after (lab, scan);
4162 scan = emit_insn_after (gen_consttable_4 (p->value,
4163 const0_rtx), scan);
4164 need_align = ! need_align;
4166 break;
4167 case DFmode:
4168 if (need_align)
4170 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4171 align_insn = scan;
4172 need_align = 0;
4174 case DImode:
4175 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4176 scan = emit_label_after (lab, scan);
4177 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4178 scan);
4179 break;
4180 default:
4181 gcc_unreachable ();
4184 if (p->mode != HImode)
4186 for (ref = p->wend; ref; ref = ref->next)
4188 lab = ref->label;
4189 scan = emit_insn_after (gen_consttable_window_end (lab),
4190 scan);
4195 pool_size = 0;
4198 for (i = 0; i < pool_size; i++)
4200 pool_node *p = &pool_vector[i];
4202 switch (p->mode)
4204 case HImode:
4205 break;
4206 case SImode:
4207 case SFmode:
4208 if (need_align)
4210 need_align = 0;
4211 scan = emit_label_after (gen_label_rtx (), scan);
4212 scan = emit_insn_after (gen_align_4 (), scan);
4214 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4215 scan = emit_label_after (lab, scan);
4216 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4217 scan);
4218 break;
4219 case DFmode:
4220 case DImode:
4221 if (need_align)
4223 need_align = 0;
4224 scan = emit_label_after (gen_label_rtx (), scan);
4225 scan = emit_insn_after (gen_align_4 (), scan);
4227 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4228 scan = emit_label_after (lab, scan);
4229 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4230 scan);
4231 break;
4232 default:
4233 gcc_unreachable ();
4236 if (p->mode != HImode)
4238 for (ref = p->wend; ref; ref = ref->next)
4240 lab = ref->label;
4241 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4246 scan = emit_insn_after (gen_consttable_end (), scan);
4247 scan = emit_barrier_after (scan);
4248 pool_size = 0;
4249 pool_window_label = NULL_RTX;
4250 pool_window_last = 0;
4253 /* Return nonzero if constant would be an ok source for a
4254 mov.w instead of a mov.l. */
4256 static int
4257 hi_const (rtx src)
4259 return (CONST_INT_P (src)
4260 && INTVAL (src) >= -32768
4261 && INTVAL (src) <= 32767);
4264 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4266 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4268 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4269 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4270 need to fix it if the input value is CONST_OK_FOR_I08. */
4272 static int
4273 broken_move (rtx insn)
4275 if (NONJUMP_INSN_P (insn))
4277 rtx pat = PATTERN (insn);
4278 if (GET_CODE (pat) == PARALLEL)
4279 pat = XVECEXP (pat, 0, 0);
4280 if (GET_CODE (pat) == SET
4281 /* We can load any 8-bit value if we don't care what the high
4282 order bits end up as. */
4283 && GET_MODE (SET_DEST (pat)) != QImode
4284 && (CONSTANT_P (SET_SRC (pat))
4285 /* Match mova_const. */
4286 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4287 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4288 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4289 && ! (TARGET_SH2E
4290 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4291 && (fp_zero_operand (SET_SRC (pat))
4292 || fp_one_operand (SET_SRC (pat)))
4293 /* In general we don't know the current setting of fpscr, so disable fldi.
4294 There is an exception if this was a register-register move
4295 before reload - and hence it was ascertained that we have
4296 single precision setting - and in a post-reload optimization
4297 we changed this to do a constant load. In that case
4298 we don't have an r0 clobber, hence we must use fldi. */
4299 && (TARGET_FMOVD
4300 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4301 == SCRATCH))
4302 && REG_P (SET_DEST (pat))
4303 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4304 && ! (TARGET_SH2A
4305 && GET_MODE (SET_DEST (pat)) == SImode
4306 && (satisfies_constraint_I20 (SET_SRC (pat))
4307 || satisfies_constraint_I28 (SET_SRC (pat))))
4308 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4309 return 1;
4312 return 0;
4315 static int
4316 mova_p (rtx insn)
4318 return (NONJUMP_INSN_P (insn)
4319 && GET_CODE (PATTERN (insn)) == SET
4320 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4321 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4322 /* Don't match mova_const. */
4323 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4326 /* Fix up a mova from a switch that went out of range. */
4327 static void
4328 fixup_mova (rtx mova)
4330 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4331 if (! flag_pic)
4333 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4334 INSN_CODE (mova) = -1;
4336 else
4338 rtx worker = mova;
4339 rtx lab = gen_label_rtx ();
4340 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4344 worker = NEXT_INSN (worker);
4345 gcc_assert (worker
4346 && !LABEL_P (worker)
4347 && !JUMP_P (worker));
4348 } while (NOTE_P (worker)
4349 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4350 wpat = PATTERN (worker);
4351 wpat0 = XVECEXP (wpat, 0, 0);
4352 wpat1 = XVECEXP (wpat, 0, 1);
4353 wsrc = SET_SRC (wpat0);
4354 PATTERN (worker) = (gen_casesi_worker_2
4355 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4356 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4357 XEXP (wpat1, 0)));
4358 INSN_CODE (worker) = -1;
4359 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4360 base = gen_rtx_LABEL_REF (Pmode, lab);
4361 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4362 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4363 INSN_CODE (mova) = -1;
4367 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4368 *num_mova, and check if the new mova is not nested within the first one.
4369 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4370 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4371 static int
4372 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4374 int n_addr = 0; /* Initialization to shut up spurious warning. */
4375 int f_target, n_target = 0; /* Likewise. */
4377 if (optimize)
4379 /* If NEW_MOVA has no address yet, it will be handled later. */
4380 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4381 return -1;
4383 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4384 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4385 if (n_addr > n_target || n_addr + 1022 < n_target)
4387 /* Change the mova into a load.
4388 broken_move will then return true for it. */
4389 fixup_mova (new_mova);
4390 return 1;
4393 if (!(*num_mova)++)
4395 *first_mova = new_mova;
4396 return 2;
4398 if (!optimize
4399 || ((f_target
4400 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4401 >= n_target))
4402 return -1;
4404 (*num_mova)--;
4405 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4406 > n_target - n_addr)
4408 fixup_mova (*first_mova);
4409 return 0;
4411 else
4413 fixup_mova (new_mova);
4414 return 1;
4418 /* Find the last barrier from insn FROM which is close enough to hold the
4419 constant pool. If we can't find one, then create one near the end of
4420 the range. */
4422 static rtx
4423 find_barrier (int num_mova, rtx mova, rtx from)
4425 int count_si = 0;
4426 int count_hi = 0;
4427 int found_hi = 0;
4428 int found_si = 0;
4429 int found_di = 0;
4430 int hi_align = 2;
4431 int si_align = 2;
4432 int leading_mova = num_mova;
4433 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4434 int si_limit;
4435 int hi_limit;
4436 rtx orig = from;
4437 rtx last_got = NULL_RTX;
4438 rtx last_symoff = NULL_RTX;
4440 /* For HImode: range is 510, add 4 because pc counts from address of
4441 second instruction after this one, subtract 2 for the jump instruction
4442 that we may need to emit before the table, subtract 2 for the instruction
4443 that fills the jump delay slot (in very rare cases, reorg will take an
4444 instruction from after the constant pool or will leave the delay slot
4445 empty). This gives 510.
4446 For SImode: range is 1020, add 4 because pc counts from address of
4447 second instruction after this one, subtract 2 in case pc is 2 byte
4448 aligned, subtract 2 for the jump instruction that we may need to emit
4449 before the table, subtract 2 for the instruction that fills the jump
4450 delay slot. This gives 1018. */
4452 /* The branch will always be shortened now that the reference address for
4453 forward branches is the successor address, thus we need no longer make
4454 adjustments to the [sh]i_limit for -O0. */
4456 si_limit = 1018;
4457 hi_limit = 510;
4459 while (from && count_si < si_limit && count_hi < hi_limit)
4461 int inc = get_attr_length (from);
4462 int new_align = 1;
4464 /* If this is a label that existed at the time of the compute_alignments
4465 call, determine the alignment. N.B. When find_barrier recurses for
4466 an out-of-reach mova, we might see labels at the start of previously
4467 inserted constant tables. */
4468 if (LABEL_P (from)
4469 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4471 if (optimize)
4472 new_align = 1 << label_to_alignment (from);
4473 else if (BARRIER_P (prev_nonnote_insn (from)))
4474 new_align = 1 << barrier_align (from);
4475 else
4476 new_align = 1;
4477 inc = 0;
4479 /* In case we are scanning a constant table because of recursion, check
4480 for explicit alignments. If the table is long, we might be forced
4481 to emit the new table in front of it; the length of the alignment
4482 might be the last straw. */
4483 else if (NONJUMP_INSN_P (from)
4484 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4485 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4486 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4487 /* When we find the end of a constant table, paste the new constant
4488 at the end. That is better than putting it in front because
4489 this way, we don't need extra alignment for adding a 4-byte-aligned
4490 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4491 else if (NONJUMP_INSN_P (from)
4492 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4493 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4494 return from;
4496 if (BARRIER_P (from))
4498 rtx next;
4500 found_barrier = from;
4502 /* If we are at the end of the function, or in front of an alignment
4503 instruction, we need not insert an extra alignment. We prefer
4504 this kind of barrier. */
4505 if (barrier_align (from) > 2)
4506 good_barrier = from;
4508 /* If we are at the end of a hot/cold block, dump the constants
4509 here. */
4510 next = NEXT_INSN (from);
4511 if (next
4512 && NOTE_P (next)
4513 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4514 break;
4517 if (broken_move (from))
4519 rtx pat, src, dst;
4520 enum machine_mode mode;
4522 pat = PATTERN (from);
4523 if (GET_CODE (pat) == PARALLEL)
4524 pat = XVECEXP (pat, 0, 0);
4525 src = SET_SRC (pat);
4526 dst = SET_DEST (pat);
4527 mode = GET_MODE (dst);
4529 /* GOT pcrelat setting comes in pair of
4530 mova .L8,r0
4531 mov.l .L8,r12
4532 instructions. (plus add r0,r12).
4533 Remember if we see one without the other. */
4534 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4535 last_got = last_got ? NULL_RTX : from;
4536 else if (PIC_ADDR_P (src))
4537 last_got = last_got ? NULL_RTX : from;
4539 /* We must explicitly check the mode, because sometimes the
4540 front end will generate code to load unsigned constants into
4541 HImode targets without properly sign extending them. */
4542 if (mode == HImode
4543 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4545 found_hi += 2;
4546 /* We put the short constants before the long constants, so
4547 we must count the length of short constants in the range
4548 for the long constants. */
4549 /* ??? This isn't optimal, but is easy to do. */
4550 si_limit -= 2;
4552 else
4554 /* We dump DF/DI constants before SF/SI ones, because
4555 the limit is the same, but the alignment requirements
4556 are higher. We may waste up to 4 additional bytes
4557 for alignment, and the DF/DI constant may have
4558 another SF/SI constant placed before it. */
4559 if (TARGET_SHCOMPACT
4560 && ! found_di
4561 && (mode == DFmode || mode == DImode))
4563 found_di = 1;
4564 si_limit -= 8;
4566 while (si_align > 2 && found_si + si_align - 2 > count_si)
4567 si_align >>= 1;
4568 if (found_si > count_si)
4569 count_si = found_si;
4570 found_si += GET_MODE_SIZE (mode);
4571 if (num_mova)
4572 si_limit -= GET_MODE_SIZE (mode);
4576 if (mova_p (from))
4578 switch (untangle_mova (&num_mova, &mova, from))
4580 case 1:
4581 if (flag_pic)
4583 rtx src = SET_SRC (PATTERN (from));
4584 if (GET_CODE (src) == CONST
4585 && GET_CODE (XEXP (src, 0)) == UNSPEC
4586 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4587 last_symoff = from;
4589 break;
4590 case 0: return find_barrier (0, 0, mova);
4591 case 2:
4593 leading_mova = 0;
4594 barrier_before_mova
4595 = good_barrier ? good_barrier : found_barrier;
4597 default: break;
4599 if (found_si > count_si)
4600 count_si = found_si;
4602 else if (JUMP_TABLE_DATA_P (from))
4604 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4605 || (num_mova
4606 && (prev_nonnote_insn (from)
4607 == XEXP (MOVA_LABELREF (mova), 0))))
4608 num_mova--;
4609 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4611 /* We have just passed the barrier in front of the
4612 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4613 the ADDR_DIFF_VEC is accessed as data, just like our pool
4614 constants, this is a good opportunity to accommodate what
4615 we have gathered so far.
4616 If we waited any longer, we could end up at a barrier in
4617 front of code, which gives worse cache usage for separated
4618 instruction / data caches. */
4619 good_barrier = found_barrier;
4620 break;
4622 else
4624 rtx body = PATTERN (from);
4625 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4628 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4629 else if (JUMP_P (from)
4630 && ! TARGET_SH2
4631 && ! optimize_size)
4632 new_align = 4;
4634 /* There is a possibility that a bf is transformed into a bf/s by the
4635 delay slot scheduler. */
4636 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4637 && get_attr_type (from) == TYPE_CBRANCH
4638 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4639 inc += 2;
4641 if (found_si)
4643 count_si += inc;
4644 if (new_align > si_align)
4646 si_limit -= (count_si - 1) & (new_align - si_align);
4647 si_align = new_align;
4649 count_si = (count_si + new_align - 1) & -new_align;
4651 if (found_hi)
4653 count_hi += inc;
4654 if (new_align > hi_align)
4656 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4657 hi_align = new_align;
4659 count_hi = (count_hi + new_align - 1) & -new_align;
4661 from = NEXT_INSN (from);
4664 if (num_mova)
4666 if (leading_mova)
4668 /* Try as we might, the leading mova is out of range. Change
4669 it into a load (which will become a pcload) and retry. */
4670 fixup_mova (mova);
4671 return find_barrier (0, 0, mova);
4673 else
4675 /* Insert the constant pool table before the mova instruction,
4676 to prevent the mova label reference from going out of range. */
4677 from = mova;
4678 good_barrier = found_barrier = barrier_before_mova;
4682 if (found_barrier)
4684 if (good_barrier && next_real_insn (found_barrier))
4685 found_barrier = good_barrier;
4687 else
4689 /* We didn't find a barrier in time to dump our stuff,
4690 so we'll make one. */
4691 rtx label = gen_label_rtx ();
4693 /* Don't emit a constant table in the middle of insns for
4694 casesi_worker_2. This is a bit overkill but is enough
4695 because casesi_worker_2 wouldn't appear so frequently. */
4696 if (last_symoff)
4697 from = last_symoff;
4699 /* If we exceeded the range, then we must back up over the last
4700 instruction we looked at. Otherwise, we just need to undo the
4701 NEXT_INSN at the end of the loop. */
4702 if (PREV_INSN (from) != orig
4703 && (count_hi > hi_limit || count_si > si_limit))
4704 from = PREV_INSN (PREV_INSN (from));
4705 else
4706 from = PREV_INSN (from);
4708 /* Don't emit a constant table int the middle of global pointer setting,
4709 since that that would move the addressing base GOT into another table.
4710 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4711 in the pool anyway, so just move up the whole constant pool. */
4712 if (last_got)
4713 from = PREV_INSN (last_got);
4715 /* Don't insert the constant pool table at the position which
4716 may be the landing pad. */
4717 if (flag_exceptions
4718 && CALL_P (from)
4719 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4720 from = PREV_INSN (from);
4722 /* Walk back to be just before any jump or label.
4723 Putting it before a label reduces the number of times the branch
4724 around the constant pool table will be hit. Putting it before
4725 a jump makes it more likely that the bra delay slot will be
4726 filled. */
4727 while (NOTE_P (from) || JUMP_P (from)
4728 || LABEL_P (from))
4729 from = PREV_INSN (from);
4731 /* Make sure we do not split between a call and its corresponding
4732 CALL_ARG_LOCATION note. */
4733 if (CALL_P (from))
4735 rtx next = NEXT_INSN (from);
4736 if (next && NOTE_P (next)
4737 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
4738 from = next;
4741 from = emit_jump_insn_after (gen_jump (label), from);
4742 JUMP_LABEL (from) = label;
4743 LABEL_NUSES (label) = 1;
4744 found_barrier = emit_barrier_after (from);
4745 emit_label_after (label, found_barrier);
4748 return found_barrier;
4751 /* If the instruction INSN is implemented by a special function, and we can
4752 positively find the register that is used to call the sfunc, and this
4753 register is not used anywhere else in this instruction - except as the
4754 destination of a set, return this register; else, return 0. */
4756 sfunc_uses_reg (rtx insn)
4758 int i;
4759 rtx pattern, part, reg_part, reg;
4761 if (!NONJUMP_INSN_P (insn))
4762 return 0;
4763 pattern = PATTERN (insn);
4764 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4765 return 0;
4767 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4769 part = XVECEXP (pattern, 0, i);
4770 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4771 reg_part = part;
4773 if (! reg_part)
4774 return 0;
4775 reg = XEXP (reg_part, 0);
4776 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4778 part = XVECEXP (pattern, 0, i);
4779 if (part == reg_part || GET_CODE (part) == CLOBBER)
4780 continue;
4781 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4782 && REG_P (SET_DEST (part)))
4783 ? SET_SRC (part) : part)))
4784 return 0;
4786 return reg;
4789 /* See if the only way in which INSN uses REG is by calling it, or by
4790 setting it while calling it. Set *SET to a SET rtx if the register
4791 is set by INSN. */
4793 static int
4794 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4796 rtx pattern, reg2;
4798 *set = NULL_RTX;
4800 reg2 = sfunc_uses_reg (insn);
4801 if (reg2 && REGNO (reg2) == REGNO (reg))
4803 pattern = single_set (insn);
4804 if (pattern
4805 && REG_P (SET_DEST (pattern))
4806 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4807 *set = pattern;
4808 return 0;
4810 if (!CALL_P (insn))
4812 /* We don't use rtx_equal_p because we don't care if the mode is
4813 different. */
4814 pattern = single_set (insn);
4815 if (pattern
4816 && REG_P (SET_DEST (pattern))
4817 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4819 rtx par, part;
4820 int i;
4822 *set = pattern;
4823 par = PATTERN (insn);
4824 if (GET_CODE (par) == PARALLEL)
4825 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4827 part = XVECEXP (par, 0, i);
4828 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4829 return 1;
4831 return reg_mentioned_p (reg, SET_SRC (pattern));
4834 return 1;
4837 pattern = PATTERN (insn);
4839 if (GET_CODE (pattern) == PARALLEL)
4841 int i;
4843 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4844 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4845 return 1;
4846 pattern = XVECEXP (pattern, 0, 0);
4849 if (GET_CODE (pattern) == SET)
4851 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4853 /* We don't use rtx_equal_p, because we don't care if the
4854 mode is different. */
4855 if (!REG_P (SET_DEST (pattern))
4856 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4857 return 1;
4859 *set = pattern;
4862 pattern = SET_SRC (pattern);
4865 if (GET_CODE (pattern) != CALL
4866 || !MEM_P (XEXP (pattern, 0))
4867 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4868 return 1;
4870 return 0;
4873 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4874 general registers. Bits 0..15 mean that the respective registers
4875 are used as inputs in the instruction. Bits 16..31 mean that the
4876 registers 0..15, respectively, are used as outputs, or are clobbered.
4877 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4879 regs_used (rtx x, int is_dest)
4881 enum rtx_code code;
4882 const char *fmt;
4883 int i, used = 0;
4885 if (! x)
4886 return used;
4887 code = GET_CODE (x);
4888 switch (code)
4890 case REG:
4891 if (REGNO (x) < 16)
4892 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4893 << (REGNO (x) + is_dest));
4894 return 0;
4895 case SUBREG:
4897 rtx y = SUBREG_REG (x);
4899 if (!REG_P (y))
4900 break;
4901 if (REGNO (y) < 16)
4902 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4903 << (REGNO (y) +
4904 subreg_regno_offset (REGNO (y),
4905 GET_MODE (y),
4906 SUBREG_BYTE (x),
4907 GET_MODE (x)) + is_dest));
4908 return 0;
4910 case SET:
4911 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4912 case RETURN:
4913 /* If there was a return value, it must have been indicated with USE. */
4914 return 0x00ffff00;
4915 case CLOBBER:
4916 is_dest = 1;
4917 break;
4918 case MEM:
4919 is_dest = 0;
4920 break;
4921 case CALL:
4922 used |= 0x00ff00f0;
4923 break;
4924 default:
4925 break;
4928 fmt = GET_RTX_FORMAT (code);
4930 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4932 if (fmt[i] == 'E')
4934 register int j;
4935 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4936 used |= regs_used (XVECEXP (x, i, j), is_dest);
4938 else if (fmt[i] == 'e')
4939 used |= regs_used (XEXP (x, i), is_dest);
4941 return used;
4944 /* Create an instruction that prevents redirection of a conditional branch
4945 to the destination of the JUMP with address ADDR.
4946 If the branch needs to be implemented as an indirect jump, try to find
4947 a scratch register for it.
4948 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4949 If any preceding insn that doesn't fit into a delay slot is good enough,
4950 pass 1. Pass 2 if a definite blocking insn is needed.
4951 -1 is used internally to avoid deep recursion.
4952 If a blocking instruction is made or recognized, return it. */
4954 static rtx
4955 gen_block_redirect (rtx jump, int addr, int need_block)
4957 int dead = 0;
4958 rtx prev = prev_nonnote_insn (jump);
4959 rtx dest;
4961 /* First, check if we already have an instruction that satisfies our need. */
4962 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4964 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4965 return prev;
4966 if (GET_CODE (PATTERN (prev)) == USE
4967 || GET_CODE (PATTERN (prev)) == CLOBBER
4968 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4969 prev = jump;
4970 else if ((need_block &= ~1) < 0)
4971 return prev;
4972 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4973 need_block = 0;
4975 if (GET_CODE (PATTERN (jump)) == RETURN)
4977 if (! need_block)
4978 return prev;
4979 /* Reorg even does nasty things with return insns that cause branches
4980 to go out of range - see find_end_label and callers. */
4981 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4983 /* We can't use JUMP_LABEL here because it might be undefined
4984 when not optimizing. */
4985 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4986 /* If the branch is out of range, try to find a scratch register for it. */
4987 if (optimize
4988 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4989 > 4092 + 4098))
4991 rtx scan;
4992 /* Don't look for the stack pointer as a scratch register,
4993 it would cause trouble if an interrupt occurred. */
4994 unsigned attempt = 0x7fff, used;
4995 int jump_left = flag_expensive_optimizations + 1;
4997 /* It is likely that the most recent eligible instruction is wanted for
4998 the delay slot. Therefore, find out which registers it uses, and
4999 try to avoid using them. */
5001 for (scan = jump; (scan = PREV_INSN (scan)); )
5003 enum rtx_code code;
5005 if (INSN_DELETED_P (scan))
5006 continue;
5007 code = GET_CODE (scan);
5008 if (code == CODE_LABEL || code == JUMP_INSN)
5009 break;
5010 if (code == INSN
5011 && GET_CODE (PATTERN (scan)) != USE
5012 && GET_CODE (PATTERN (scan)) != CLOBBER
5013 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5015 attempt &= ~regs_used (PATTERN (scan), 0);
5016 break;
5019 for (used = dead = 0, scan = JUMP_LABEL (jump);
5020 (scan = NEXT_INSN (scan)); )
5022 enum rtx_code code;
5024 if (INSN_DELETED_P (scan))
5025 continue;
5026 code = GET_CODE (scan);
5027 if (INSN_P (scan))
5029 used |= regs_used (PATTERN (scan), 0);
5030 if (code == CALL_INSN)
5031 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5032 dead |= (used >> 16) & ~used;
5033 if (dead & attempt)
5035 dead &= attempt;
5036 break;
5038 if (code == JUMP_INSN)
5040 if (jump_left-- && simplejump_p (scan))
5041 scan = JUMP_LABEL (scan);
5042 else
5043 break;
5047 /* Mask out the stack pointer again, in case it was
5048 the only 'free' register we have found. */
5049 dead &= 0x7fff;
5051 /* If the immediate destination is still in range, check for possible
5052 threading with a jump beyond the delay slot insn.
5053 Don't check if we are called recursively; the jump has been or will be
5054 checked in a different invocation then. */
5056 else if (optimize && need_block >= 0)
5058 rtx next = next_active_insn (next_active_insn (dest));
5059 if (next && JUMP_P (next)
5060 && GET_CODE (PATTERN (next)) == SET
5061 && recog_memoized (next) == CODE_FOR_jump_compact)
5063 dest = JUMP_LABEL (next);
5064 if (dest
5065 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5066 > 4092 + 4098))
5067 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5071 if (dead)
5073 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5075 /* It would be nice if we could convert the jump into an indirect
5076 jump / far branch right now, and thus exposing all constituent
5077 instructions to further optimization. However, reorg uses
5078 simplejump_p to determine if there is an unconditional jump where
5079 it should try to schedule instructions from the target of the
5080 branch; simplejump_p fails for indirect jumps even if they have
5081 a JUMP_LABEL. */
5082 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5083 (reg, GEN_INT (unspec_bbr_uid++)),
5084 jump);
5085 /* ??? We would like this to have the scope of the jump, but that
5086 scope will change when a delay slot insn of an inner scope is added.
5087 Hence, after delay slot scheduling, we'll have to expect
5088 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5089 the jump. */
5091 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5092 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5093 return insn;
5095 else if (need_block)
5096 /* We can't use JUMP_LABEL here because it might be undefined
5097 when not optimizing. */
5098 return emit_insn_before (gen_block_branch_redirect
5099 (GEN_INT (unspec_bbr_uid++)),
5100 jump);
5101 return prev;
5104 #define CONDJUMP_MIN -252
5105 #define CONDJUMP_MAX 262
5106 struct far_branch
5108 /* A label (to be placed) in front of the jump
5109 that jumps to our ultimate destination. */
5110 rtx near_label;
5111 /* Where we are going to insert it if we cannot move the jump any farther,
5112 or the jump itself if we have picked up an existing jump. */
5113 rtx insert_place;
5114 /* The ultimate destination. */
5115 rtx far_label;
5116 struct far_branch *prev;
5117 /* If the branch has already been created, its address;
5118 else the address of its first prospective user. */
5119 int address;
5122 static void gen_far_branch (struct far_branch *);
5123 enum mdep_reorg_phase_e mdep_reorg_phase;
5124 static void
5125 gen_far_branch (struct far_branch *bp)
5127 rtx insn = bp->insert_place;
5128 rtx jump;
5129 rtx label = gen_label_rtx ();
5130 int ok;
5132 emit_label_after (label, insn);
5133 if (bp->far_label)
5135 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5136 LABEL_NUSES (bp->far_label)++;
5138 else
5139 jump = emit_jump_insn_after (gen_return (), insn);
5140 /* Emit a barrier so that reorg knows that any following instructions
5141 are not reachable via a fall-through path.
5142 But don't do this when not optimizing, since we wouldn't suppress the
5143 alignment for the barrier then, and could end up with out-of-range
5144 pc-relative loads. */
5145 if (optimize)
5146 emit_barrier_after (jump);
5147 emit_label_after (bp->near_label, insn);
5148 JUMP_LABEL (jump) = bp->far_label;
5149 ok = invert_jump (insn, label, 1);
5150 gcc_assert (ok);
5152 /* If we are branching around a jump (rather than a return), prevent
5153 reorg from using an insn from the jump target as the delay slot insn -
5154 when reorg did this, it pessimized code (we rather hide the delay slot)
5155 and it could cause branches to go out of range. */
5156 if (bp->far_label)
5157 (emit_insn_after
5158 (gen_stuff_delay_slot
5159 (GEN_INT (unspec_bbr_uid++),
5160 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5161 insn));
5162 /* Prevent reorg from undoing our splits. */
5163 gen_block_redirect (jump, bp->address += 2, 2);
5166 /* Fix up ADDR_DIFF_VECs. */
5167 void
5168 fixup_addr_diff_vecs (rtx first)
5170 rtx insn;
5172 for (insn = first; insn; insn = NEXT_INSN (insn))
5174 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5176 if (!JUMP_P (insn)
5177 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5178 continue;
5179 pat = PATTERN (insn);
5180 vec_lab = XEXP (XEXP (pat, 0), 0);
5182 /* Search the matching casesi_jump_2. */
5183 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5185 if (!JUMP_P (prev))
5186 continue;
5187 prevpat = PATTERN (prev);
5188 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5189 continue;
5190 x = XVECEXP (prevpat, 0, 1);
5191 if (GET_CODE (x) != USE)
5192 continue;
5193 x = XEXP (x, 0);
5194 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5195 break;
5197 /* FIXME: This is a bug in the optimizer, but it seems harmless
5198 to just avoid panicing. */
5199 if (!prev)
5200 continue;
5202 /* Emit the reference label of the braf where it belongs, right after
5203 the casesi_jump_2 (i.e. braf). */
5204 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5205 emit_label_after (braf_label, prev);
5207 /* Fix up the ADDR_DIF_VEC to be relative
5208 to the reference address of the braf. */
5209 XEXP (XEXP (pat, 0), 0) = braf_label;
5213 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5214 a barrier. Return the base 2 logarithm of the desired alignment. */
5216 barrier_align (rtx barrier_or_label)
5218 rtx next = next_real_insn (barrier_or_label), pat, prev;
5219 int slot, credit, jump_to_next = 0;
5221 if (! next)
5222 return 0;
5224 pat = PATTERN (next);
5226 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5227 return 2;
5229 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5230 /* This is a barrier in front of a constant table. */
5231 return 0;
5233 prev = prev_real_insn (barrier_or_label);
5234 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5236 pat = PATTERN (prev);
5237 /* If this is a very small table, we want to keep the alignment after
5238 the table to the minimum for proper code alignment. */
5239 return ((optimize_size
5240 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5241 <= (unsigned) 1 << (CACHE_LOG - 2)))
5242 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5245 if (optimize_size)
5246 return 0;
5248 if (! TARGET_SH2 || ! optimize)
5249 return align_jumps_log;
5251 /* When fixing up pcloads, a constant table might be inserted just before
5252 the basic block that ends with the barrier. Thus, we can't trust the
5253 instruction lengths before that. */
5254 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5256 /* Check if there is an immediately preceding branch to the insn beyond
5257 the barrier. We must weight the cost of discarding useful information
5258 from the current cache line when executing this branch and there is
5259 an alignment, against that of fetching unneeded insn in front of the
5260 branch target when there is no alignment. */
5262 /* There are two delay_slot cases to consider. One is the simple case
5263 where the preceding branch is to the insn beyond the barrier (simple
5264 delay slot filling), and the other is where the preceding branch has
5265 a delay slot that is a duplicate of the insn after the barrier
5266 (fill_eager_delay_slots) and the branch is to the insn after the insn
5267 after the barrier. */
5269 /* PREV is presumed to be the JUMP_INSN for the barrier under
5270 investigation. Skip to the insn before it. */
5271 prev = prev_real_insn (prev);
5273 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5274 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5275 prev = prev_real_insn (prev))
5277 jump_to_next = 0;
5278 if (GET_CODE (PATTERN (prev)) == USE
5279 || GET_CODE (PATTERN (prev)) == CLOBBER)
5280 continue;
5281 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5283 prev = XVECEXP (PATTERN (prev), 0, 1);
5284 if (INSN_UID (prev) == INSN_UID (next))
5286 /* Delay slot was filled with insn at jump target. */
5287 jump_to_next = 1;
5288 continue;
5292 if (slot &&
5293 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5294 slot = 0;
5295 credit -= get_attr_length (prev);
5297 if (prev && jump_to_label_p (prev))
5299 rtx x;
5300 if (jump_to_next
5301 || next_real_insn (JUMP_LABEL (prev)) == next
5302 /* If relax_delay_slots() decides NEXT was redundant
5303 with some previous instruction, it will have
5304 redirected PREV's jump to the following insn. */
5305 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5306 /* There is no upper bound on redundant instructions
5307 that might have been skipped, but we must not put an
5308 alignment where none had been before. */
5309 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5310 (INSN_P (x)
5311 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5312 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5313 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5315 rtx pat = PATTERN (prev);
5316 if (GET_CODE (pat) == PARALLEL)
5317 pat = XVECEXP (pat, 0, 0);
5318 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5319 return 0;
5324 return align_jumps_log;
5327 /* If we are inside a phony loop, almost any kind of label can turn up as the
5328 first one in the loop. Aligning a braf label causes incorrect switch
5329 destination addresses; we can detect braf labels because they are
5330 followed by a BARRIER.
5331 Applying loop alignment to small constant or switch tables is a waste
5332 of space, so we suppress this too. */
5334 sh_loop_align (rtx label)
5336 rtx next = label;
5339 next = next_nonnote_insn (next);
5340 while (next && LABEL_P (next));
5342 if (! next
5343 || ! INSN_P (next)
5344 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5345 || recog_memoized (next) == CODE_FOR_consttable_2)
5346 return 0;
5348 return align_loops_log;
5351 /* Do a final pass over the function, just before delayed branch
5352 scheduling. */
5354 static void
5355 sh_reorg (void)
5357 rtx first, insn, mova = NULL_RTX;
5358 int num_mova;
5359 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5360 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5362 first = get_insns ();
5363 max_labelno_before_reorg = max_label_num ();
5365 /* We must split call insns before introducing `mova's. If we're
5366 optimizing, they'll have already been split. Otherwise, make
5367 sure we don't split them too late. */
5368 if (! optimize)
5369 split_all_insns_noflow ();
5371 if (TARGET_SHMEDIA)
5372 return;
5374 /* If relaxing, generate pseudo-ops to associate function calls with
5375 the symbols they call. It does no harm to not generate these
5376 pseudo-ops. However, when we can generate them, it enables to
5377 linker to potentially relax the jsr to a bsr, and eliminate the
5378 register load and, possibly, the constant pool entry. */
5380 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5381 if (TARGET_RELAX)
5383 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5384 own purposes. This works because none of the remaining passes
5385 need to look at them.
5387 ??? But it may break in the future. We should use a machine
5388 dependent REG_NOTE, or some other approach entirely. */
5389 for (insn = first; insn; insn = NEXT_INSN (insn))
5391 if (INSN_P (insn))
5393 rtx note;
5395 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5396 NULL_RTX)) != 0)
5397 remove_note (insn, note);
5401 for (insn = first; insn; insn = NEXT_INSN (insn))
5403 rtx pattern, reg, link, set, scan, dies, label;
5404 int rescan = 0, foundinsn = 0;
5406 if (CALL_P (insn))
5408 pattern = PATTERN (insn);
5410 if (GET_CODE (pattern) == PARALLEL)
5411 pattern = XVECEXP (pattern, 0, 0);
5412 if (GET_CODE (pattern) == SET)
5413 pattern = SET_SRC (pattern);
5415 if (GET_CODE (pattern) != CALL
5416 || !MEM_P (XEXP (pattern, 0)))
5417 continue;
5419 reg = XEXP (XEXP (pattern, 0), 0);
5421 else
5423 reg = sfunc_uses_reg (insn);
5424 if (! reg)
5425 continue;
5428 if (!REG_P (reg))
5429 continue;
5431 /* Try scanning backward to find where the register is set. */
5432 link = NULL;
5433 for (scan = PREV_INSN (insn);
5434 scan && !LABEL_P (scan);
5435 scan = PREV_INSN (scan))
5437 if (! INSN_P (scan))
5438 continue;
5440 if (! reg_mentioned_p (reg, scan))
5441 continue;
5443 if (noncall_uses_reg (reg, scan, &set))
5444 break;
5446 if (set)
5448 link = scan;
5449 break;
5453 if (! link)
5454 continue;
5456 /* The register is set at LINK. */
5458 /* We can only optimize the function call if the register is
5459 being set to a symbol. In theory, we could sometimes
5460 optimize calls to a constant location, but the assembler
5461 and linker do not support that at present. */
5462 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5463 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5464 continue;
5466 /* Scan forward from LINK to the place where REG dies, and
5467 make sure that the only insns which use REG are
5468 themselves function calls. */
5470 /* ??? This doesn't work for call targets that were allocated
5471 by reload, since there may not be a REG_DEAD note for the
5472 register. */
5474 dies = NULL_RTX;
5475 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5477 rtx scanset;
5479 /* Don't try to trace forward past a CODE_LABEL if we haven't
5480 seen INSN yet. Ordinarily, we will only find the setting insn
5481 if it is in the same basic block. However,
5482 cross-jumping can insert code labels in between the load and
5483 the call, and can result in situations where a single call
5484 insn may have two targets depending on where we came from. */
5486 if (LABEL_P (scan) && ! foundinsn)
5487 break;
5489 if (! INSN_P (scan))
5490 continue;
5492 /* Don't try to trace forward past a JUMP. To optimize
5493 safely, we would have to check that all the
5494 instructions at the jump destination did not use REG. */
5496 if (JUMP_P (scan))
5497 break;
5499 if (! reg_mentioned_p (reg, scan))
5500 continue;
5502 if (noncall_uses_reg (reg, scan, &scanset))
5503 break;
5505 if (scan == insn)
5506 foundinsn = 1;
5508 if (scan != insn
5509 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5511 /* There is a function call to this register other
5512 than the one we are checking. If we optimize
5513 this call, we need to rescan again below. */
5514 rescan = 1;
5517 /* ??? We shouldn't have to worry about SCANSET here.
5518 We should just be able to check for a REG_DEAD note
5519 on a function call. However, the REG_DEAD notes are
5520 apparently not dependable around libcalls; c-torture
5521 execute/920501-2 is a test case. If SCANSET is set,
5522 then this insn sets the register, so it must have
5523 died earlier. Unfortunately, this will only handle
5524 the cases in which the register is, in fact, set in a
5525 later insn. */
5527 /* ??? We shouldn't have to use FOUNDINSN here.
5528 This dates back to when we used LOG_LINKS to find
5529 the most recent insn which sets the register. */
5531 if (foundinsn
5532 && (scanset
5533 || find_reg_note (scan, REG_DEAD, reg)))
5535 dies = scan;
5536 break;
5540 if (! dies)
5542 /* Either there was a branch, or some insn used REG
5543 other than as a function call address. */
5544 continue;
5547 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5548 on the insn which sets the register, and on each call insn
5549 which uses the register. In final_prescan_insn we look for
5550 the REG_LABEL_OPERAND notes, and output the appropriate label
5551 or pseudo-op. */
5553 label = gen_label_rtx ();
5554 add_reg_note (link, REG_LABEL_OPERAND, label);
5555 add_reg_note (insn, REG_LABEL_OPERAND, label);
5556 if (rescan)
5558 scan = link;
5561 rtx reg2;
5563 scan = NEXT_INSN (scan);
5564 if (scan != insn
5565 && ((CALL_P (scan)
5566 && reg_mentioned_p (reg, scan))
5567 || ((reg2 = sfunc_uses_reg (scan))
5568 && REGNO (reg2) == REGNO (reg))))
5569 add_reg_note (scan, REG_LABEL_OPERAND, label);
5571 while (scan != dies);
5576 if (TARGET_SH2)
5577 fixup_addr_diff_vecs (first);
5579 if (optimize)
5581 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5582 shorten_branches (first);
5585 /* Scan the function looking for move instructions which have to be
5586 changed to pc-relative loads and insert the literal tables. */
5587 label_ref_list_pool = create_alloc_pool ("label references list",
5588 sizeof (struct label_ref_list_d),
5589 30);
5590 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5591 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5593 if (mova_p (insn))
5595 /* ??? basic block reordering can move a switch table dispatch
5596 below the switch table. Check if that has happened.
5597 We only have the addresses available when optimizing; but then,
5598 this check shouldn't be needed when not optimizing. */
5599 if (!untangle_mova (&num_mova, &mova, insn))
5601 insn = mova;
5602 num_mova = 0;
5605 else if (JUMP_P (insn)
5606 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5607 && num_mova
5608 /* ??? loop invariant motion can also move a mova out of a
5609 loop. Since loop does this code motion anyway, maybe we
5610 should wrap UNSPEC_MOVA into a CONST, so that reload can
5611 move it back. */
5612 && ((num_mova > 1
5613 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5614 || (prev_nonnote_insn (insn)
5615 == XEXP (MOVA_LABELREF (mova), 0))))
5617 rtx scan;
5618 int total;
5620 num_mova--;
5622 /* Some code might have been inserted between the mova and
5623 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5624 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5625 total += get_attr_length (scan);
5627 /* range of mova is 1020, add 4 because pc counts from address of
5628 second instruction after this one, subtract 2 in case pc is 2
5629 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5630 cancels out with alignment effects of the mova itself. */
5631 if (total > 1022)
5633 /* Change the mova into a load, and restart scanning
5634 there. broken_move will then return true for mova. */
5635 fixup_mova (mova);
5636 insn = mova;
5639 if (broken_move (insn)
5640 || (NONJUMP_INSN_P (insn)
5641 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5643 rtx scan;
5644 /* Scan ahead looking for a barrier to stick the constant table
5645 behind. */
5646 rtx barrier = find_barrier (num_mova, mova, insn);
5647 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5648 int need_aligned_label = 0;
5650 if (num_mova && ! mova_p (mova))
5652 /* find_barrier had to change the first mova into a
5653 pcload; thus, we have to start with this new pcload. */
5654 insn = mova;
5655 num_mova = 0;
5657 /* Now find all the moves between the points and modify them. */
5658 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5660 if (LABEL_P (scan))
5661 last_float = 0;
5662 if (NONJUMP_INSN_P (scan)
5663 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5664 need_aligned_label = 1;
5665 if (broken_move (scan))
5667 rtx *patp = &PATTERN (scan), pat = *patp;
5668 rtx src, dst;
5669 rtx lab;
5670 rtx newsrc;
5671 enum machine_mode mode;
5673 if (GET_CODE (pat) == PARALLEL)
5674 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5675 src = SET_SRC (pat);
5676 dst = SET_DEST (pat);
5677 mode = GET_MODE (dst);
5679 if (mode == SImode && hi_const (src)
5680 && REGNO (dst) != FPUL_REG)
5682 int offset = 0;
5684 mode = HImode;
5685 while (GET_CODE (dst) == SUBREG)
5687 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5688 GET_MODE (SUBREG_REG (dst)),
5689 SUBREG_BYTE (dst),
5690 GET_MODE (dst));
5691 dst = SUBREG_REG (dst);
5693 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5695 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5697 /* This must be an insn that clobbers r0. */
5698 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5699 XVECLEN (PATTERN (scan), 0)
5700 - 1);
5701 rtx clobber = *clobberp;
5703 gcc_assert (GET_CODE (clobber) == CLOBBER
5704 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5706 if (last_float
5707 && reg_set_between_p (r0_rtx, last_float_move, scan))
5708 last_float = 0;
5709 if (last_float
5710 && TARGET_SHCOMPACT
5711 && GET_MODE_SIZE (mode) != 4
5712 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5713 last_float = 0;
5714 lab = add_constant (src, mode, last_float);
5715 if (lab)
5716 emit_insn_before (gen_mova (lab), scan);
5717 else
5719 /* There will be a REG_UNUSED note for r0 on
5720 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5721 lest reorg:mark_target_live_regs will not
5722 consider r0 to be used, and we end up with delay
5723 slot insn in front of SCAN that clobbers r0. */
5724 rtx note
5725 = find_regno_note (last_float_move, REG_UNUSED, 0);
5727 /* If we are not optimizing, then there may not be
5728 a note. */
5729 if (note)
5730 PUT_REG_NOTE_KIND (note, REG_INC);
5732 *last_float_addr = r0_inc_rtx;
5734 last_float_move = scan;
5735 last_float = src;
5736 newsrc = gen_const_mem (mode,
5737 (((TARGET_SH4 && ! TARGET_FMOVD)
5738 || REGNO (dst) == FPUL_REG)
5739 ? r0_inc_rtx
5740 : r0_rtx));
5741 last_float_addr = &XEXP (newsrc, 0);
5743 /* Remove the clobber of r0. */
5744 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5745 gen_rtx_SCRATCH (Pmode));
5747 /* This is a mova needing a label. Create it. */
5748 else if (GET_CODE (src) == UNSPEC
5749 && XINT (src, 1) == UNSPEC_MOVA
5750 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5752 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5753 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5754 newsrc = gen_rtx_UNSPEC (SImode,
5755 gen_rtvec (1, newsrc),
5756 UNSPEC_MOVA);
5758 else
5760 lab = add_constant (src, mode, 0);
5761 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5762 newsrc = gen_const_mem (mode, newsrc);
5764 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5765 INSN_CODE (scan) = -1;
5768 dump_table (need_aligned_label ? insn : 0, barrier);
5769 insn = barrier;
5772 free_alloc_pool (label_ref_list_pool);
5773 for (insn = first; insn; insn = NEXT_INSN (insn))
5774 PUT_MODE (insn, VOIDmode);
5776 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5777 INSN_ADDRESSES_FREE ();
5778 split_branches (first);
5780 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5781 also has an effect on the register that holds the address of the sfunc.
5782 Insert an extra dummy insn in front of each sfunc that pretends to
5783 use this register. */
5784 if (flag_delayed_branch)
5786 for (insn = first; insn; insn = NEXT_INSN (insn))
5788 rtx reg = sfunc_uses_reg (insn);
5790 if (! reg)
5791 continue;
5792 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5795 #if 0
5796 /* fpscr is not actually a user variable, but we pretend it is for the
5797 sake of the previous optimization passes, since we want it handled like
5798 one. However, we don't have any debugging information for it, so turn
5799 it into a non-user variable now. */
5800 if (TARGET_SH4)
5801 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5802 #endif
5803 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5807 get_dest_uid (rtx label, int max_uid)
5809 rtx dest = next_real_insn (label);
5810 int dest_uid;
5811 if (! dest)
5812 /* This can happen for an undefined label. */
5813 return 0;
5814 dest_uid = INSN_UID (dest);
5815 /* If this is a newly created branch redirection blocking instruction,
5816 we cannot index the branch_uid or insn_addresses arrays with its
5817 uid. But then, we won't need to, because the actual destination is
5818 the following branch. */
5819 while (dest_uid >= max_uid)
5821 dest = NEXT_INSN (dest);
5822 dest_uid = INSN_UID (dest);
5824 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5825 return 0;
5826 return dest_uid;
5829 /* Split condbranches that are out of range. Also add clobbers for
5830 scratch registers that are needed in far jumps.
5831 We do this before delay slot scheduling, so that it can take our
5832 newly created instructions into account. It also allows us to
5833 find branches with common targets more easily. */
5835 static void
5836 split_branches (rtx first)
5838 rtx insn;
5839 struct far_branch **uid_branch, *far_branch_list = 0;
5840 int max_uid = get_max_uid ();
5841 int ok;
5843 /* Find out which branches are out of range. */
5844 shorten_branches (first);
5846 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5847 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5849 for (insn = first; insn; insn = NEXT_INSN (insn))
5850 if (! INSN_P (insn))
5851 continue;
5852 else if (INSN_DELETED_P (insn))
5854 /* Shorten_branches would split this instruction again,
5855 so transform it into a note. */
5856 SET_INSN_DELETED (insn);
5858 else if (JUMP_P (insn)
5859 /* Don't mess with ADDR_DIFF_VEC */
5860 && (GET_CODE (PATTERN (insn)) == SET
5861 || GET_CODE (PATTERN (insn)) == RETURN))
5863 enum attr_type type = get_attr_type (insn);
5864 if (type == TYPE_CBRANCH)
5866 rtx next, beyond;
5868 if (get_attr_length (insn) > 4)
5870 rtx src = SET_SRC (PATTERN (insn));
5871 rtx olabel = XEXP (XEXP (src, 1), 0);
5872 int addr = INSN_ADDRESSES (INSN_UID (insn));
5873 rtx label = 0;
5874 int dest_uid = get_dest_uid (olabel, max_uid);
5875 struct far_branch *bp = uid_branch[dest_uid];
5877 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5878 the label if the LABEL_NUSES count drops to zero. There is
5879 always a jump_optimize pass that sets these values, but it
5880 proceeds to delete unreferenced code, and then if not
5881 optimizing, to un-delete the deleted instructions, thus
5882 leaving labels with too low uses counts. */
5883 if (! optimize)
5885 JUMP_LABEL (insn) = olabel;
5886 LABEL_NUSES (olabel)++;
5888 if (! bp)
5890 bp = (struct far_branch *) alloca (sizeof *bp);
5891 uid_branch[dest_uid] = bp;
5892 bp->prev = far_branch_list;
5893 far_branch_list = bp;
5894 bp->far_label
5895 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5896 LABEL_NUSES (bp->far_label)++;
5898 else
5900 label = bp->near_label;
5901 if (! label && bp->address - addr >= CONDJUMP_MIN)
5903 rtx block = bp->insert_place;
5905 if (GET_CODE (PATTERN (block)) == RETURN)
5906 block = PREV_INSN (block);
5907 else
5908 block = gen_block_redirect (block,
5909 bp->address, 2);
5910 label = emit_label_after (gen_label_rtx (),
5911 PREV_INSN (block));
5912 bp->near_label = label;
5914 else if (label && ! NEXT_INSN (label))
5916 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5917 bp->insert_place = insn;
5918 else
5919 gen_far_branch (bp);
5922 if (! label
5923 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5925 bp->near_label = label = gen_label_rtx ();
5926 bp->insert_place = insn;
5927 bp->address = addr;
5929 ok = redirect_jump (insn, label, 0);
5930 gcc_assert (ok);
5932 else
5934 /* get_attr_length (insn) == 2 */
5935 /* Check if we have a pattern where reorg wants to redirect
5936 the branch to a label from an unconditional branch that
5937 is too far away. */
5938 /* We can't use JUMP_LABEL here because it might be undefined
5939 when not optimizing. */
5940 /* A syntax error might cause beyond to be NULL_RTX. */
5941 beyond
5942 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5943 0));
5945 if (beyond
5946 && (JUMP_P (beyond)
5947 || ((beyond = next_active_insn (beyond))
5948 && JUMP_P (beyond)))
5949 && GET_CODE (PATTERN (beyond)) == SET
5950 && recog_memoized (beyond) == CODE_FOR_jump_compact
5951 && ((INSN_ADDRESSES
5952 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5953 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5954 > 252 + 258 + 2))
5955 gen_block_redirect (beyond,
5956 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5959 next = next_active_insn (insn);
5961 if (next
5962 && (JUMP_P (next)
5963 || ((next = next_active_insn (next))
5964 && JUMP_P (next)))
5965 && GET_CODE (PATTERN (next)) == SET
5966 && recog_memoized (next) == CODE_FOR_jump_compact
5967 && ((INSN_ADDRESSES
5968 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5969 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5970 > 252 + 258 + 2))
5971 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5973 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5975 int addr = INSN_ADDRESSES (INSN_UID (insn));
5976 rtx far_label = 0;
5977 int dest_uid = 0;
5978 struct far_branch *bp;
5980 if (type == TYPE_JUMP)
5982 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5983 dest_uid = get_dest_uid (far_label, max_uid);
5984 if (! dest_uid)
5986 /* Parse errors can lead to labels outside
5987 the insn stream. */
5988 if (! NEXT_INSN (far_label))
5989 continue;
5991 if (! optimize)
5993 JUMP_LABEL (insn) = far_label;
5994 LABEL_NUSES (far_label)++;
5996 redirect_jump (insn, ret_rtx, 1);
5997 far_label = 0;
6000 bp = uid_branch[dest_uid];
6001 if (! bp)
6003 bp = (struct far_branch *) alloca (sizeof *bp);
6004 uid_branch[dest_uid] = bp;
6005 bp->prev = far_branch_list;
6006 far_branch_list = bp;
6007 bp->near_label = 0;
6008 bp->far_label = far_label;
6009 if (far_label)
6010 LABEL_NUSES (far_label)++;
6012 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6013 if (addr - bp->address <= CONDJUMP_MAX)
6014 emit_label_after (bp->near_label, PREV_INSN (insn));
6015 else
6017 gen_far_branch (bp);
6018 bp->near_label = 0;
6020 else
6021 bp->near_label = 0;
6022 bp->address = addr;
6023 bp->insert_place = insn;
6024 if (! far_label)
6025 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6026 else
6027 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6030 /* Generate all pending far branches,
6031 and free our references to the far labels. */
6032 while (far_branch_list)
6034 if (far_branch_list->near_label
6035 && ! NEXT_INSN (far_branch_list->near_label))
6036 gen_far_branch (far_branch_list);
6037 if (optimize
6038 && far_branch_list->far_label
6039 && ! --LABEL_NUSES (far_branch_list->far_label))
6040 delete_insn (far_branch_list->far_label);
6041 far_branch_list = far_branch_list->prev;
6044 /* Instruction length information is no longer valid due to the new
6045 instructions that have been generated. */
6046 init_insn_lengths ();
6049 /* Dump out instruction addresses, which is useful for debugging the
6050 constant pool table stuff.
6052 If relaxing, output the label and pseudo-ops used to link together
6053 calls and the instruction which set the registers. */
6055 /* ??? The addresses printed by this routine for insns are nonsense for
6056 insns which are inside of a sequence where none of the inner insns have
6057 variable length. This is because the second pass of shorten_branches
6058 does not bother to update them. */
6060 void
6061 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6062 int noperands ATTRIBUTE_UNUSED)
6064 if (TARGET_DUMPISIZE)
6065 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6067 if (TARGET_RELAX)
6069 rtx note;
6071 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6072 if (note)
6074 rtx pattern;
6076 pattern = PATTERN (insn);
6077 if (GET_CODE (pattern) == PARALLEL)
6078 pattern = XVECEXP (pattern, 0, 0);
6079 switch (GET_CODE (pattern))
6081 case SET:
6082 if (GET_CODE (SET_SRC (pattern)) != CALL
6083 && get_attr_type (insn) != TYPE_SFUNC)
6085 targetm.asm_out.internal_label
6086 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6087 break;
6089 /* else FALLTHROUGH */
6090 case CALL:
6091 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6092 CODE_LABEL_NUMBER (XEXP (note, 0)));
6093 break;
6095 default:
6096 gcc_unreachable ();
6102 /* Dump out any constants accumulated in the final pass. These will
6103 only be labels. */
6105 const char *
6106 output_jump_label_table (void)
6108 int i;
6110 if (pool_size)
6112 fprintf (asm_out_file, "\t.align 2\n");
6113 for (i = 0; i < pool_size; i++)
6115 pool_node *p = &pool_vector[i];
6117 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6118 CODE_LABEL_NUMBER (p->label));
6119 output_asm_insn (".long %O0", &p->value);
6121 pool_size = 0;
6124 return "";
6127 /* A full frame looks like:
6129 arg-5
6130 arg-4
6131 [ if current_function_anonymous_args
6132 arg-3
6133 arg-2
6134 arg-1
6135 arg-0 ]
6136 saved-fp
6137 saved-r10
6138 saved-r11
6139 saved-r12
6140 saved-pr
6141 local-n
6143 local-1
6144 local-0 <- fp points here. */
6146 /* Number of bytes pushed for anonymous args, used to pass information
6147 between expand_prologue and expand_epilogue. */
6149 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6150 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6151 for an epilogue and a negative value means that it's for a sibcall
6152 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6153 all the registers that are about to be restored, and hence dead. */
6155 static void
6156 output_stack_adjust (int size, rtx reg, int epilogue_p,
6157 HARD_REG_SET *live_regs_mask, bool frame_p)
6159 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6160 if (size)
6162 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6164 /* This test is bogus, as output_stack_adjust is used to re-align the
6165 stack. */
6166 #if 0
6167 gcc_assert (!(size % align));
6168 #endif
6170 if (CONST_OK_FOR_ADD (size))
6171 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6172 /* Try to do it with two partial adjustments; however, we must make
6173 sure that the stack is properly aligned at all times, in case
6174 an interrupt occurs between the two partial adjustments. */
6175 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6176 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6178 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6179 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6181 else
6183 rtx const_reg;
6184 rtx insn;
6185 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6186 int i;
6188 /* If TEMP is invalid, we could temporarily save a general
6189 register to MACL. However, there is currently no need
6190 to handle this case, so just die when we see it. */
6191 if (epilogue_p < 0
6192 || current_function_interrupt
6193 || ! call_really_used_regs[temp] || fixed_regs[temp])
6194 temp = -1;
6195 if (temp < 0 && ! current_function_interrupt
6196 && (TARGET_SHMEDIA || epilogue_p >= 0))
6198 HARD_REG_SET temps;
6199 COPY_HARD_REG_SET (temps, call_used_reg_set);
6200 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6201 if (epilogue_p > 0)
6203 int nreg = 0;
6204 if (crtl->return_rtx)
6206 enum machine_mode mode;
6207 mode = GET_MODE (crtl->return_rtx);
6208 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6209 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6211 for (i = 0; i < nreg; i++)
6212 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6213 if (crtl->calls_eh_return)
6215 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6216 for (i = 0; i <= 3; i++)
6217 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6220 if (TARGET_SHMEDIA && epilogue_p < 0)
6221 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6222 CLEAR_HARD_REG_BIT (temps, i);
6223 if (epilogue_p <= 0)
6225 for (i = FIRST_PARM_REG;
6226 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6227 CLEAR_HARD_REG_BIT (temps, i);
6228 if (cfun->static_chain_decl != NULL)
6229 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6231 temp = scavenge_reg (&temps);
6233 if (temp < 0 && live_regs_mask)
6235 HARD_REG_SET temps;
6237 COPY_HARD_REG_SET (temps, *live_regs_mask);
6238 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6239 temp = scavenge_reg (&temps);
6241 if (temp < 0)
6243 rtx adj_reg, tmp_reg, mem;
6245 /* If we reached here, the most likely case is the (sibcall)
6246 epilogue for non SHmedia. Put a special push/pop sequence
6247 for such case as the last resort. This looks lengthy but
6248 would not be problem because it seems to be very
6249 rare. */
6251 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6254 /* ??? There is still the slight possibility that r4 or
6255 r5 have been reserved as fixed registers or assigned
6256 as global registers, and they change during an
6257 interrupt. There are possible ways to handle this:
6259 - If we are adjusting the frame pointer (r14), we can do
6260 with a single temp register and an ordinary push / pop
6261 on the stack.
6262 - Grab any call-used or call-saved registers (i.e. not
6263 fixed or globals) for the temps we need. We might
6264 also grab r14 if we are adjusting the stack pointer.
6265 If we can't find enough available registers, issue
6266 a diagnostic and die - the user must have reserved
6267 way too many registers.
6268 But since all this is rather unlikely to happen and
6269 would require extra testing, we just die if r4 / r5
6270 are not available. */
6271 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6272 && !global_regs[4] && !global_regs[5]);
6274 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6275 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6276 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6277 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6278 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6279 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6280 emit_move_insn (mem, tmp_reg);
6281 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6282 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6283 emit_move_insn (mem, tmp_reg);
6284 emit_move_insn (reg, adj_reg);
6285 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6286 emit_move_insn (adj_reg, mem);
6287 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6288 emit_move_insn (tmp_reg, mem);
6289 /* Tell flow the insns that pop r4/r5 aren't dead. */
6290 emit_use (tmp_reg);
6291 emit_use (adj_reg);
6292 return;
6294 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6296 /* If SIZE is negative, subtract the positive value.
6297 This sometimes allows a constant pool entry to be shared
6298 between prologue and epilogue code. */
6299 if (size < 0)
6301 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6302 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6304 else
6306 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6307 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6309 if (! epilogue_p)
6310 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6311 gen_rtx_SET (VOIDmode, reg,
6312 gen_rtx_PLUS (SImode, reg,
6313 GEN_INT (size))));
6318 static rtx
6319 frame_insn (rtx x)
6321 x = emit_insn (x);
6322 RTX_FRAME_RELATED_P (x) = 1;
6323 return x;
6326 /* Output RTL to push register RN onto the stack. */
6328 static rtx
6329 push (int rn)
6331 rtx x;
6332 if (rn == FPUL_REG)
6333 x = gen_push_fpul ();
6334 else if (rn == FPSCR_REG)
6335 x = gen_push_fpscr ();
6336 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6337 && FP_OR_XD_REGISTER_P (rn))
6339 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6340 return NULL_RTX;
6341 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6343 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6344 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6345 else
6346 x = gen_push (gen_rtx_REG (SImode, rn));
6348 x = frame_insn (x);
6349 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6350 return x;
6353 /* Output RTL to pop register RN from the stack. */
6355 static void
6356 pop (int rn)
6358 rtx x;
6359 if (rn == FPUL_REG)
6360 x = gen_pop_fpul ();
6361 else if (rn == FPSCR_REG)
6362 x = gen_pop_fpscr ();
6363 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6364 && FP_OR_XD_REGISTER_P (rn))
6366 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6367 return;
6368 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6370 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6371 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6372 else
6373 x = gen_pop (gen_rtx_REG (SImode, rn));
6375 x = emit_insn (x);
6376 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6379 /* Generate code to push the regs specified in the mask. */
6381 static void
6382 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6384 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6385 int skip_fpscr = 0;
6387 /* Push PR last; this gives better latencies after the prologue, and
6388 candidates for the return delay slot when there are no general
6389 registers pushed. */
6390 for (; i < FIRST_PSEUDO_REGISTER; i++)
6392 /* If this is an interrupt handler, and the SZ bit varies,
6393 and we have to push any floating point register, we need
6394 to switch to the correct precision first. */
6395 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6396 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6398 HARD_REG_SET unsaved;
6400 push (FPSCR_REG);
6401 COMPL_HARD_REG_SET (unsaved, *mask);
6402 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6403 skip_fpscr = 1;
6405 if (i != PR_REG
6406 && (i != FPSCR_REG || ! skip_fpscr)
6407 && TEST_HARD_REG_BIT (*mask, i))
6409 /* If the ISR has RESBANK attribute assigned, don't push any of
6410 the following registers - R0-R14, MACH, MACL and GBR. */
6411 if (! (sh_cfun_resbank_handler_p ()
6412 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6413 || i == MACH_REG
6414 || i == MACL_REG
6415 || i == GBR_REG)))
6416 push (i);
6420 /* Push banked registers last to improve delay slot opportunities. */
6421 if (interrupt_handler)
6423 bool use_movml = false;
6425 if (TARGET_SH2A)
6427 unsigned int count = 0;
6429 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6430 if (TEST_HARD_REG_BIT (*mask, i))
6431 count++;
6432 else
6433 break;
6435 /* Use movml when all banked registers are pushed. */
6436 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6437 use_movml = true;
6440 if (use_movml)
6442 rtx x, mem, reg, set;
6443 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6445 /* We must avoid scheduling multiple store insn with another
6446 insns. */
6447 emit_insn (gen_blockage ());
6448 x = gen_movml_push_banked (sp_reg);
6449 x = frame_insn (x);
6450 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6452 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6453 reg = gen_rtx_REG (SImode, i);
6454 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6457 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6458 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6459 emit_insn (gen_blockage ());
6461 else
6462 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6463 if (TEST_HARD_REG_BIT (*mask, i))
6464 push (i);
6467 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6468 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6469 push (PR_REG);
6472 /* Calculate how much extra space is needed to save all callee-saved
6473 target registers.
6474 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6476 static int
6477 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6479 int reg;
6480 int stack_space = 0;
6481 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6483 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6484 if ((! call_really_used_regs[reg] || interrupt_handler)
6485 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6486 /* Leave space to save this target register on the stack,
6487 in case target register allocation wants to use it. */
6488 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6489 return stack_space;
6492 /* Decide whether we should reserve space for callee-save target registers,
6493 in case target register allocation wants to use them. REGS_SAVED is
6494 the space, in bytes, that is already required for register saves.
6495 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6497 static int
6498 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6499 HARD_REG_SET *live_regs_mask)
6501 if (optimize_size)
6502 return 0;
6503 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6506 /* Decide how much space to reserve for callee-save target registers
6507 in case target register allocation wants to use them.
6508 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6510 static int
6511 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6513 if (shmedia_space_reserved_for_target_registers)
6514 return shmedia_target_regs_stack_space (live_regs_mask);
6515 else
6516 return 0;
6519 /* Work out the registers which need to be saved, both as a mask and a
6520 count of saved words. Return the count.
6522 If doing a pragma interrupt function, then push all regs used by the
6523 function, and if we call another function (we can tell by looking at PR),
6524 make sure that all the regs it clobbers are safe too. */
6526 static int
6527 calc_live_regs (HARD_REG_SET *live_regs_mask)
6529 unsigned int reg;
6530 int count;
6531 tree attrs;
6532 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6533 bool nosave_low_regs;
6534 int pr_live, has_call;
6536 attrs = DECL_ATTRIBUTES (current_function_decl);
6537 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6538 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6539 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6540 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6542 CLEAR_HARD_REG_SET (*live_regs_mask);
6543 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6544 && df_regs_ever_live_p (FPSCR_REG))
6545 target_flags &= ~MASK_FPU_SINGLE;
6546 /* If we can save a lot of saves by switching to double mode, do that. */
6547 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6548 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6549 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6550 && (! call_really_used_regs[reg]
6551 || interrupt_handler)
6552 && ++count > 2)
6554 target_flags &= ~MASK_FPU_SINGLE;
6555 break;
6557 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6558 knows how to use it. That means the pseudo originally allocated for
6559 the initial value can become the PR_MEDIA_REG hard register, as seen for
6560 execute/20010122-1.c:test9. */
6561 if (TARGET_SHMEDIA)
6562 /* ??? this function is called from initial_elimination_offset, hence we
6563 can't use the result of sh_media_register_for_return here. */
6564 pr_live = sh_pr_n_sets ();
6565 else
6567 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6568 pr_live = (pr_initial
6569 ? (!REG_P (pr_initial)
6570 || REGNO (pr_initial) != (PR_REG))
6571 : df_regs_ever_live_p (PR_REG));
6572 /* For Shcompact, if not optimizing, we end up with a memory reference
6573 using the return address pointer for __builtin_return_address even
6574 though there is no actual need to put the PR register on the stack. */
6575 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6577 /* Force PR to be live if the prologue has to call the SHmedia
6578 argument decoder or register saver. */
6579 if (TARGET_SHCOMPACT
6580 && ((crtl->args.info.call_cookie
6581 & ~ CALL_COOKIE_RET_TRAMP (1))
6582 || crtl->saves_all_registers))
6583 pr_live = 1;
6584 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6585 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6587 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6588 ? pr_live
6589 : interrupt_handler
6590 ? (/* Need to save all the regs ever live. */
6591 (df_regs_ever_live_p (reg)
6592 || (call_really_used_regs[reg]
6593 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6594 || reg == PIC_OFFSET_TABLE_REGNUM)
6595 && has_call)
6596 || (TARGET_SHMEDIA && has_call
6597 && REGISTER_NATURAL_MODE (reg) == SImode
6598 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6599 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6600 && reg != RETURN_ADDRESS_POINTER_REGNUM
6601 && reg != T_REG && reg != GBR_REG
6602 /* Push fpscr only on targets which have FPU */
6603 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6604 : (/* Only push those regs which are used and need to be saved. */
6605 (TARGET_SHCOMPACT
6606 && flag_pic
6607 && crtl->args.info.call_cookie
6608 && reg == PIC_OFFSET_TABLE_REGNUM)
6609 || (df_regs_ever_live_p (reg)
6610 && ((!call_really_used_regs[reg]
6611 && !(reg != PIC_OFFSET_TABLE_REGNUM
6612 && fixed_regs[reg] && call_used_regs[reg]))
6613 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6614 || (crtl->calls_eh_return
6615 && (reg == EH_RETURN_DATA_REGNO (0)
6616 || reg == EH_RETURN_DATA_REGNO (1)
6617 || reg == EH_RETURN_DATA_REGNO (2)
6618 || reg == EH_RETURN_DATA_REGNO (3)))
6619 || ((reg == MACL_REG || reg == MACH_REG)
6620 && df_regs_ever_live_p (reg)
6621 && sh_cfun_attr_renesas_p ())
6624 SET_HARD_REG_BIT (*live_regs_mask, reg);
6625 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6627 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6628 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6630 if (FP_REGISTER_P (reg))
6632 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6634 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6635 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6638 else if (XD_REGISTER_P (reg))
6640 /* Must switch to double mode to access these registers. */
6641 target_flags &= ~MASK_FPU_SINGLE;
6645 if (nosave_low_regs && reg == R8_REG)
6646 break;
6648 /* If we have a target register optimization pass after prologue / epilogue
6649 threading, we need to assume all target registers will be live even if
6650 they aren't now. */
6651 if (flag_branch_target_load_optimize2
6652 && TARGET_SAVE_ALL_TARGET_REGS
6653 && shmedia_space_reserved_for_target_registers)
6654 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6655 if ((! call_really_used_regs[reg] || interrupt_handler)
6656 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6658 SET_HARD_REG_BIT (*live_regs_mask, reg);
6659 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6661 /* If this is an interrupt handler, we don't have any call-clobbered
6662 registers we can conveniently use for target register save/restore.
6663 Make sure we save at least one general purpose register when we need
6664 to save target registers. */
6665 if (interrupt_handler
6666 && hard_reg_set_intersect_p (*live_regs_mask,
6667 reg_class_contents[TARGET_REGS])
6668 && ! hard_reg_set_intersect_p (*live_regs_mask,
6669 reg_class_contents[GENERAL_REGS]))
6671 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6672 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6675 return count;
6678 /* Code to generate prologue and epilogue sequences */
6680 /* PUSHED is the number of bytes that are being pushed on the
6681 stack for register saves. Return the frame size, padded
6682 appropriately so that the stack stays properly aligned. */
6683 static HOST_WIDE_INT
6684 rounded_frame_size (int pushed)
6686 HOST_WIDE_INT size = get_frame_size ();
6687 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6689 if (ACCUMULATE_OUTGOING_ARGS)
6690 size += crtl->outgoing_args_size;
6692 return ((size + pushed + align - 1) & -align) - pushed;
6695 /* Choose a call-clobbered target-branch register that remains
6696 unchanged along the whole function. We set it up as the return
6697 value in the prologue. */
6699 sh_media_register_for_return (void)
6701 int regno;
6702 int tr0_used;
6704 if (! current_function_is_leaf)
6705 return -1;
6706 if (lookup_attribute ("interrupt_handler",
6707 DECL_ATTRIBUTES (current_function_decl)))
6708 return -1;
6709 if (sh_cfun_interrupt_handler_p ())
6710 return -1;
6712 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6714 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6715 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6716 return regno;
6718 return -1;
6721 /* The maximum registers we need to save are:
6722 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6723 - 32 floating point registers (for each pair, we save none,
6724 one single precision value, or a double precision value).
6725 - 8 target registers
6726 - add 1 entry for a delimiter. */
6727 #define MAX_SAVED_REGS (62+32+8)
6729 typedef struct save_entry_s
6731 unsigned char reg;
6732 unsigned char mode;
6733 short offset;
6734 } save_entry;
6736 #define MAX_TEMPS 4
6738 /* There will be a delimiter entry with VOIDmode both at the start and the
6739 end of a filled in schedule. The end delimiter has the offset of the
6740 save with the smallest (i.e. most negative) offset. */
6741 typedef struct save_schedule_s
6743 save_entry entries[MAX_SAVED_REGS + 2];
6744 int temps[MAX_TEMPS+1];
6745 } save_schedule;
6747 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6748 use reverse order. Returns the last entry written to (not counting
6749 the delimiter). OFFSET_BASE is a number to be added to all offset
6750 entries. */
6752 static save_entry *
6753 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6754 int offset_base)
6756 int align, i;
6757 save_entry *entry = schedule->entries;
6758 int tmpx = 0;
6759 int offset;
6761 if (! current_function_interrupt)
6762 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6763 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6764 && ! FUNCTION_ARG_REGNO_P (i)
6765 && i != FIRST_RET_REG
6766 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6767 && ! (crtl->calls_eh_return
6768 && (i == EH_RETURN_STACKADJ_REGNO
6769 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6770 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6771 schedule->temps[tmpx++] = i;
6772 entry->reg = -1;
6773 entry->mode = VOIDmode;
6774 entry->offset = offset_base;
6775 entry++;
6776 /* We loop twice: first, we save 8-byte aligned registers in the
6777 higher addresses, that are known to be aligned. Then, we
6778 proceed to saving 32-bit registers that don't need 8-byte
6779 alignment.
6780 If this is an interrupt function, all registers that need saving
6781 need to be saved in full. moreover, we need to postpone saving
6782 target registers till we have saved some general purpose registers
6783 we can then use as scratch registers. */
6784 offset = offset_base;
6785 for (align = 1; align >= 0; align--)
6787 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6788 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6790 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6791 int reg = i;
6793 if (current_function_interrupt)
6795 if (TARGET_REGISTER_P (i))
6796 continue;
6797 if (GENERAL_REGISTER_P (i))
6798 mode = DImode;
6800 if (mode == SFmode && (i % 2) == 1
6801 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6802 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6804 mode = DFmode;
6805 i--;
6806 reg--;
6809 /* If we're doing the aligned pass and this is not aligned,
6810 or we're doing the unaligned pass and this is aligned,
6811 skip it. */
6812 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6813 != align)
6814 continue;
6816 if (current_function_interrupt
6817 && GENERAL_REGISTER_P (i)
6818 && tmpx < MAX_TEMPS)
6819 schedule->temps[tmpx++] = i;
6821 offset -= GET_MODE_SIZE (mode);
6822 entry->reg = i;
6823 entry->mode = mode;
6824 entry->offset = offset;
6825 entry++;
6827 if (align && current_function_interrupt)
6828 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6829 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6831 offset -= GET_MODE_SIZE (DImode);
6832 entry->reg = i;
6833 entry->mode = DImode;
6834 entry->offset = offset;
6835 entry++;
6838 entry->reg = -1;
6839 entry->mode = VOIDmode;
6840 entry->offset = offset;
6841 schedule->temps[tmpx] = -1;
6842 return entry - 1;
6845 void
6846 sh_expand_prologue (void)
6848 HARD_REG_SET live_regs_mask;
6849 int d, i;
6850 int d_rounding = 0;
6851 int save_flags = target_flags;
6852 int pretend_args;
6853 int stack_usage;
6854 tree sp_switch_attr
6855 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6857 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6859 /* We have pretend args if we had an object sent partially in registers
6860 and partially on the stack, e.g. a large structure. */
6861 pretend_args = crtl->args.pretend_args_size;
6862 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6863 && (NPARM_REGS(SImode)
6864 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6865 pretend_args = 0;
6867 output_stack_adjust (-pretend_args
6868 - crtl->args.info.stack_regs * 8,
6869 stack_pointer_rtx, 0, NULL, true);
6870 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
6872 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6873 /* We're going to use the PIC register to load the address of the
6874 incoming-argument decoder and/or of the return trampoline from
6875 the GOT, so make sure the PIC register is preserved and
6876 initialized. */
6877 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6879 if (TARGET_SHCOMPACT
6880 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6882 int reg;
6884 /* First, make all registers with incoming arguments that will
6885 be pushed onto the stack live, so that register renaming
6886 doesn't overwrite them. */
6887 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6888 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6889 >= NPARM_REGS (SImode) - reg)
6890 for (; reg < NPARM_REGS (SImode); reg++)
6891 emit_insn (gen_shcompact_preserve_incoming_args
6892 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6893 else if (CALL_COOKIE_INT_REG_GET
6894 (crtl->args.info.call_cookie, reg) == 1)
6895 emit_insn (gen_shcompact_preserve_incoming_args
6896 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6898 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6899 stack_pointer_rtx);
6900 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6901 GEN_INT (crtl->args.info.call_cookie));
6902 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6903 gen_rtx_REG (SImode, R0_REG));
6905 else if (TARGET_SHMEDIA)
6907 int tr = sh_media_register_for_return ();
6909 if (tr >= 0)
6910 emit_move_insn (gen_rtx_REG (DImode, tr),
6911 gen_rtx_REG (DImode, PR_MEDIA_REG));
6914 /* Emit the code for SETUP_VARARGS. */
6915 if (cfun->stdarg)
6917 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6919 /* Push arg regs as if they'd been provided by caller in stack. */
6920 for (i = 0; i < NPARM_REGS(SImode); i++)
6922 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6924 if (i >= (NPARM_REGS(SImode)
6925 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6927 break;
6928 push (rn);
6929 stack_usage += GET_MODE_SIZE (SImode);
6934 /* If we're supposed to switch stacks at function entry, do so now. */
6935 if (sp_switch_attr)
6937 rtx lab, newsrc;
6938 /* The argument specifies a variable holding the address of the
6939 stack the interrupt function should switch to/from at entry/exit. */
6940 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6941 const char *s
6942 = ggc_strdup (TREE_STRING_POINTER (arg));
6943 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6945 lab = add_constant (sp_switch, SImode, 0);
6946 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6947 newsrc = gen_const_mem (SImode, newsrc);
6949 emit_insn (gen_sp_switch_1 (newsrc));
6952 d = calc_live_regs (&live_regs_mask);
6953 /* ??? Maybe we could save some switching if we can move a mode switch
6954 that already happens to be at the function start into the prologue. */
6955 if (target_flags != save_flags && ! current_function_interrupt)
6956 emit_insn (gen_toggle_sz ());
6958 if (TARGET_SH5)
6960 int offset_base, offset;
6961 rtx r0 = NULL_RTX;
6962 int offset_in_r0 = -1;
6963 int sp_in_r0 = 0;
6964 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6965 int total_size, save_size;
6966 save_schedule schedule;
6967 save_entry *entry;
6968 int *tmp_pnt;
6970 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6971 && ! current_function_interrupt)
6972 r0 = gen_rtx_REG (Pmode, R0_REG);
6974 /* D is the actual number of bytes that we need for saving registers,
6975 however, in initial_elimination_offset we have committed to using
6976 an additional TREGS_SPACE amount of bytes - in order to keep both
6977 addresses to arguments supplied by the caller and local variables
6978 valid, we must keep this gap. Place it between the incoming
6979 arguments and the actually saved registers in a bid to optimize
6980 locality of reference. */
6981 total_size = d + tregs_space;
6982 total_size += rounded_frame_size (total_size);
6983 save_size = total_size - rounded_frame_size (d);
6984 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6985 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6986 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6988 /* If adjusting the stack in a single step costs nothing extra, do so.
6989 I.e. either if a single addi is enough, or we need a movi anyway,
6990 and we don't exceed the maximum offset range (the test for the
6991 latter is conservative for simplicity). */
6992 if (TARGET_SHMEDIA
6993 && (CONST_OK_FOR_I10 (-total_size)
6994 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6995 && total_size <= 2044)))
6996 d_rounding = total_size - save_size;
6998 offset_base = d + d_rounding;
7000 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7001 0, NULL, true);
7002 stack_usage += save_size + d_rounding;
7004 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7005 tmp_pnt = schedule.temps;
7006 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7008 enum machine_mode mode = (enum machine_mode) entry->mode;
7009 unsigned int reg = entry->reg;
7010 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7011 rtx orig_reg_rtx;
7013 offset = entry->offset;
7015 reg_rtx = gen_rtx_REG (mode, reg);
7017 mem_rtx = gen_frame_mem (mode,
7018 gen_rtx_PLUS (Pmode,
7019 stack_pointer_rtx,
7020 GEN_INT (offset)));
7022 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7024 gcc_assert (r0);
7025 mem_rtx = NULL_RTX;
7028 if (HAVE_PRE_DECREMENT
7029 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7030 || mem_rtx == NULL_RTX
7031 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7033 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7035 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7036 pre_dec = NULL_RTX;
7037 else
7039 mem_rtx = NULL_RTX;
7040 offset += GET_MODE_SIZE (mode);
7044 if (mem_rtx != NULL_RTX)
7045 goto addr_ok;
7047 if (offset_in_r0 == -1)
7049 emit_move_insn (r0, GEN_INT (offset));
7050 offset_in_r0 = offset;
7052 else if (offset != offset_in_r0)
7054 emit_move_insn (r0,
7055 gen_rtx_PLUS
7056 (Pmode, r0,
7057 GEN_INT (offset - offset_in_r0)));
7058 offset_in_r0 += offset - offset_in_r0;
7061 if (pre_dec != NULL_RTX)
7063 if (! sp_in_r0)
7065 emit_move_insn (r0,
7066 gen_rtx_PLUS
7067 (Pmode, r0, stack_pointer_rtx));
7068 sp_in_r0 = 1;
7071 offset -= GET_MODE_SIZE (mode);
7072 offset_in_r0 -= GET_MODE_SIZE (mode);
7074 mem_rtx = pre_dec;
7076 else if (sp_in_r0)
7077 mem_rtx = gen_frame_mem (mode, r0);
7078 else
7079 mem_rtx = gen_frame_mem (mode,
7080 gen_rtx_PLUS (Pmode,
7081 stack_pointer_rtx,
7082 r0));
7084 /* We must not use an r0-based address for target-branch
7085 registers or for special registers without pre-dec
7086 memory addresses, since we store their values in r0
7087 first. */
7088 gcc_assert (!TARGET_REGISTER_P (reg)
7089 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7090 || mem_rtx == pre_dec));
7092 addr_ok:
7093 orig_reg_rtx = reg_rtx;
7094 if (TARGET_REGISTER_P (reg)
7095 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7096 && mem_rtx != pre_dec))
7098 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7100 emit_move_insn (tmp_reg, reg_rtx);
7102 if (REGNO (tmp_reg) == R0_REG)
7104 offset_in_r0 = -1;
7105 sp_in_r0 = 0;
7106 gcc_assert (!refers_to_regno_p
7107 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7110 if (*++tmp_pnt <= 0)
7111 tmp_pnt = schedule.temps;
7113 reg_rtx = tmp_reg;
7116 rtx insn;
7118 /* Mark as interesting for dwarf cfi generator */
7119 insn = emit_move_insn (mem_rtx, reg_rtx);
7120 RTX_FRAME_RELATED_P (insn) = 1;
7121 /* If we use an intermediate register for the save, we can't
7122 describe this exactly in cfi as a copy of the to-be-saved
7123 register into the temporary register and then the temporary
7124 register on the stack, because the temporary register can
7125 have a different natural size than the to-be-saved register.
7126 Thus, we gloss over the intermediate copy and pretend we do
7127 a direct save from the to-be-saved register. */
7128 if (REGNO (reg_rtx) != reg)
7130 rtx set;
7132 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7133 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7136 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7138 rtx reg_rtx = gen_rtx_REG (mode, reg);
7139 rtx set;
7140 rtx mem_rtx = gen_frame_mem (mode,
7141 gen_rtx_PLUS (Pmode,
7142 stack_pointer_rtx,
7143 GEN_INT (offset)));
7145 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7146 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7151 gcc_assert (entry->offset == d_rounding);
7153 else
7155 push_regs (&live_regs_mask, current_function_interrupt);
7156 stack_usage += d;
7159 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7160 emit_insn (gen_GOTaddr2picreg ());
7162 if (SHMEDIA_REGS_STACK_ADJUST ())
7164 /* This must NOT go through the PLT, otherwise mach and macl
7165 may be clobbered. */
7166 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7167 (TARGET_FPU_ANY
7168 ? "__GCC_push_shmedia_regs"
7169 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7170 emit_insn (gen_shmedia_save_restore_regs_compact
7171 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7174 if (target_flags != save_flags && ! current_function_interrupt)
7175 emit_insn (gen_toggle_sz ());
7177 target_flags = save_flags;
7179 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7180 stack_pointer_rtx, 0, NULL, true);
7181 stack_usage += rounded_frame_size (d) - d_rounding;
7183 if (frame_pointer_needed)
7184 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7186 if (TARGET_SHCOMPACT
7187 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7189 /* This must NOT go through the PLT, otherwise mach and macl
7190 may be clobbered. */
7191 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7192 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7193 emit_insn (gen_shcompact_incoming_args ());
7196 if (flag_stack_usage_info)
7197 current_function_static_stack_size = stack_usage;
7200 void
7201 sh_expand_epilogue (bool sibcall_p)
7203 HARD_REG_SET live_regs_mask;
7204 int d, i;
7205 int d_rounding = 0;
7207 int save_flags = target_flags;
7208 int frame_size, save_size;
7209 int fpscr_deferred = 0;
7210 int e = sibcall_p ? -1 : 1;
7212 d = calc_live_regs (&live_regs_mask);
7214 save_size = d;
7215 frame_size = rounded_frame_size (d);
7217 if (TARGET_SH5)
7219 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7220 int total_size;
7221 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7222 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7223 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7225 total_size = d + tregs_space;
7226 total_size += rounded_frame_size (total_size);
7227 save_size = total_size - frame_size;
7229 /* If adjusting the stack in a single step costs nothing extra, do so.
7230 I.e. either if a single addi is enough, or we need a movi anyway,
7231 and we don't exceed the maximum offset range (the test for the
7232 latter is conservative for simplicity). */
7233 if (TARGET_SHMEDIA
7234 && ! frame_pointer_needed
7235 && (CONST_OK_FOR_I10 (total_size)
7236 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7237 && total_size <= 2044)))
7238 d_rounding = frame_size;
7240 frame_size -= d_rounding;
7243 if (frame_pointer_needed)
7245 /* We must avoid scheduling the epilogue with previous basic blocks.
7246 See PR/18032 and PR/40313. */
7247 emit_insn (gen_blockage ());
7248 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7249 &live_regs_mask, false);
7251 /* We must avoid moving the stack pointer adjustment past code
7252 which reads from the local frame, else an interrupt could
7253 occur after the SP adjustment and clobber data in the local
7254 frame. */
7255 emit_insn (gen_blockage ());
7256 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7258 else if (frame_size)
7260 /* We must avoid moving the stack pointer adjustment past code
7261 which reads from the local frame, else an interrupt could
7262 occur after the SP adjustment and clobber data in the local
7263 frame. */
7264 emit_insn (gen_blockage ());
7265 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7266 &live_regs_mask, false);
7269 if (SHMEDIA_REGS_STACK_ADJUST ())
7271 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7272 (TARGET_FPU_ANY
7273 ? "__GCC_pop_shmedia_regs"
7274 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7275 /* This must NOT go through the PLT, otherwise mach and macl
7276 may be clobbered. */
7277 emit_insn (gen_shmedia_save_restore_regs_compact
7278 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7281 /* Pop all the registers. */
7283 if (target_flags != save_flags && ! current_function_interrupt)
7284 emit_insn (gen_toggle_sz ());
7285 if (TARGET_SH5)
7287 int offset_base, offset;
7288 int offset_in_r0 = -1;
7289 int sp_in_r0 = 0;
7290 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7291 save_schedule schedule;
7292 save_entry *entry;
7293 int *tmp_pnt;
7295 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7296 offset_base = -entry[1].offset + d_rounding;
7297 tmp_pnt = schedule.temps;
7298 for (; entry->mode != VOIDmode; entry--)
7300 enum machine_mode mode = (enum machine_mode) entry->mode;
7301 int reg = entry->reg;
7302 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7304 offset = offset_base + entry->offset;
7305 reg_rtx = gen_rtx_REG (mode, reg);
7307 mem_rtx = gen_frame_mem (mode,
7308 gen_rtx_PLUS (Pmode,
7309 stack_pointer_rtx,
7310 GEN_INT (offset)));
7312 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7313 mem_rtx = NULL_RTX;
7315 if (HAVE_POST_INCREMENT
7316 && (offset == offset_in_r0
7317 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7318 && mem_rtx == NULL_RTX)
7319 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7321 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7323 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7324 post_inc = NULL_RTX;
7325 else
7326 mem_rtx = NULL_RTX;
7329 if (mem_rtx != NULL_RTX)
7330 goto addr_ok;
7332 if (offset_in_r0 == -1)
7334 emit_move_insn (r0, GEN_INT (offset));
7335 offset_in_r0 = offset;
7337 else if (offset != offset_in_r0)
7339 emit_move_insn (r0,
7340 gen_rtx_PLUS
7341 (Pmode, r0,
7342 GEN_INT (offset - offset_in_r0)));
7343 offset_in_r0 += offset - offset_in_r0;
7346 if (post_inc != NULL_RTX)
7348 if (! sp_in_r0)
7350 emit_move_insn (r0,
7351 gen_rtx_PLUS
7352 (Pmode, r0, stack_pointer_rtx));
7353 sp_in_r0 = 1;
7356 mem_rtx = post_inc;
7358 offset_in_r0 += GET_MODE_SIZE (mode);
7360 else if (sp_in_r0)
7361 mem_rtx = gen_frame_mem (mode, r0);
7362 else
7363 mem_rtx = gen_frame_mem (mode,
7364 gen_rtx_PLUS (Pmode,
7365 stack_pointer_rtx,
7366 r0));
7368 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7369 || mem_rtx == post_inc);
7371 addr_ok:
7372 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7373 && mem_rtx != post_inc)
7375 emit_move_insn (r0, mem_rtx);
7376 mem_rtx = r0;
7378 else if (TARGET_REGISTER_P (reg))
7380 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7382 /* Give the scheduler a bit of freedom by using up to
7383 MAX_TEMPS registers in a round-robin fashion. */
7384 emit_move_insn (tmp_reg, mem_rtx);
7385 mem_rtx = tmp_reg;
7386 if (*++tmp_pnt < 0)
7387 tmp_pnt = schedule.temps;
7390 emit_move_insn (reg_rtx, mem_rtx);
7393 gcc_assert (entry->offset + offset_base == d + d_rounding);
7395 else /* ! TARGET_SH5 */
7397 int last_reg;
7399 save_size = 0;
7400 /* For an ISR with RESBANK attribute assigned, don't pop PR
7401 register. */
7402 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7403 && !sh_cfun_resbank_handler_p ())
7405 if (!frame_pointer_needed)
7406 emit_insn (gen_blockage ());
7407 pop (PR_REG);
7410 /* Banked registers are popped first to avoid being scheduled in the
7411 delay slot. RTE switches banks before the ds instruction. */
7412 if (current_function_interrupt)
7414 bool use_movml = false;
7416 if (TARGET_SH2A)
7418 unsigned int count = 0;
7420 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7421 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7422 count++;
7423 else
7424 break;
7426 /* Use movml when all banked register are poped. */
7427 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7428 use_movml = true;
7431 if (use_movml)
7433 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7435 /* We must avoid scheduling multiple load insn with another
7436 insns. */
7437 emit_insn (gen_blockage ());
7438 emit_insn (gen_movml_pop_banked (sp_reg));
7439 emit_insn (gen_blockage ());
7441 else
7442 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7443 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7444 pop (i);
7446 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7448 else
7449 last_reg = FIRST_PSEUDO_REGISTER;
7451 for (i = 0; i < last_reg; i++)
7453 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7455 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7456 && hard_reg_set_intersect_p (live_regs_mask,
7457 reg_class_contents[DF_REGS]))
7458 fpscr_deferred = 1;
7459 /* For an ISR with RESBANK attribute assigned, don't pop
7460 following registers, R0-R14, MACH, MACL and GBR. */
7461 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7462 && ! (sh_cfun_resbank_handler_p ()
7463 && ((j >= FIRST_GENERAL_REG
7464 && j < LAST_GENERAL_REG)
7465 || j == MACH_REG
7466 || j == MACL_REG
7467 || j == GBR_REG)))
7468 pop (j);
7470 if (j == FIRST_FP_REG && fpscr_deferred)
7471 pop (FPSCR_REG);
7474 if (target_flags != save_flags && ! current_function_interrupt)
7475 emit_insn (gen_toggle_sz ());
7476 target_flags = save_flags;
7478 output_stack_adjust (crtl->args.pretend_args_size
7479 + save_size + d_rounding
7480 + crtl->args.info.stack_regs * 8,
7481 stack_pointer_rtx, e, NULL, false);
7483 if (crtl->calls_eh_return)
7484 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7485 EH_RETURN_STACKADJ_RTX));
7487 /* Switch back to the normal stack if necessary. */
7488 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7489 emit_insn (gen_sp_switch_2 ());
7491 /* Tell flow the insn that pops PR isn't dead. */
7492 /* PR_REG will never be live in SHmedia mode, and we don't need to
7493 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7494 by the return pattern. */
7495 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7496 emit_use (gen_rtx_REG (SImode, PR_REG));
7499 static int sh_need_epilogue_known = 0;
7502 sh_need_epilogue (void)
7504 if (! sh_need_epilogue_known)
7506 rtx epilogue;
7508 start_sequence ();
7509 sh_expand_epilogue (0);
7510 epilogue = get_insns ();
7511 end_sequence ();
7512 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7514 return sh_need_epilogue_known > 0;
7517 /* Emit code to change the current function's return address to RA.
7518 TEMP is available as a scratch register, if needed. */
7520 void
7521 sh_set_return_address (rtx ra, rtx tmp)
7523 HARD_REG_SET live_regs_mask;
7524 int d;
7525 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7526 int pr_offset;
7528 d = calc_live_regs (&live_regs_mask);
7530 /* If pr_reg isn't life, we can set it (or the register given in
7531 sh_media_register_for_return) directly. */
7532 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7534 rtx rr;
7536 if (TARGET_SHMEDIA)
7538 int rr_regno = sh_media_register_for_return ();
7540 if (rr_regno < 0)
7541 rr_regno = pr_reg;
7543 rr = gen_rtx_REG (DImode, rr_regno);
7545 else
7546 rr = gen_rtx_REG (SImode, pr_reg);
7548 emit_insn (GEN_MOV (rr, ra));
7549 /* Tell flow the register for return isn't dead. */
7550 emit_use (rr);
7551 return;
7554 if (TARGET_SH5)
7556 int offset;
7557 save_schedule schedule;
7558 save_entry *entry;
7560 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7561 offset = entry[1].offset;
7562 for (; entry->mode != VOIDmode; entry--)
7563 if (entry->reg == pr_reg)
7564 goto found;
7566 /* We can't find pr register. */
7567 gcc_unreachable ();
7569 found:
7570 offset = entry->offset - offset;
7571 pr_offset = (rounded_frame_size (d) + offset
7572 + SHMEDIA_REGS_STACK_ADJUST ());
7574 else
7575 pr_offset = rounded_frame_size (d);
7577 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7579 if (frame_pointer_needed)
7580 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7581 else
7582 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7584 tmp = gen_frame_mem (Pmode, tmp);
7585 emit_insn (GEN_MOV (tmp, ra));
7586 /* Tell this store isn't dead. */
7587 emit_use (tmp);
7590 /* Clear variables at function end. */
7592 static void
7593 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7594 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7596 sh_need_epilogue_known = 0;
7599 static rtx
7600 sh_builtin_saveregs (void)
7602 /* First unnamed integer register. */
7603 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7604 /* Number of integer registers we need to save. */
7605 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7606 /* First unnamed SFmode float reg */
7607 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7608 /* Number of SFmode float regs to save. */
7609 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7610 rtx regbuf, fpregs;
7611 int bufsize, regno;
7612 alias_set_type alias_set;
7614 if (TARGET_SH5)
7616 if (n_intregs)
7618 int pushregs = n_intregs;
7620 while (pushregs < NPARM_REGS (SImode) - 1
7621 && (CALL_COOKIE_INT_REG_GET
7622 (crtl->args.info.call_cookie,
7623 NPARM_REGS (SImode) - pushregs)
7624 == 1))
7626 crtl->args.info.call_cookie
7627 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7628 - pushregs, 1);
7629 pushregs++;
7632 if (pushregs == NPARM_REGS (SImode))
7633 crtl->args.info.call_cookie
7634 |= (CALL_COOKIE_INT_REG (0, 1)
7635 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7636 else
7637 crtl->args.info.call_cookie
7638 |= CALL_COOKIE_STACKSEQ (pushregs);
7640 crtl->args.pretend_args_size += 8 * n_intregs;
7642 if (TARGET_SHCOMPACT)
7643 return const0_rtx;
7646 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7648 error ("__builtin_saveregs not supported by this subtarget");
7649 return const0_rtx;
7652 if (TARGET_SHMEDIA)
7653 n_floatregs = 0;
7655 /* Allocate block of memory for the regs. */
7656 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7657 Or can assign_stack_local accept a 0 SIZE argument? */
7658 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7660 if (TARGET_SHMEDIA)
7661 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7662 else if (n_floatregs & 1)
7664 rtx addr;
7666 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7667 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7668 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7669 regbuf = change_address (regbuf, BLKmode, addr);
7671 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7673 rtx addr, mask;
7675 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7676 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7677 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7678 emit_insn (gen_andsi3 (addr, addr, mask));
7679 regbuf = change_address (regbuf, BLKmode, addr);
7681 else
7682 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7683 alias_set = get_varargs_alias_set ();
7684 set_mem_alias_set (regbuf, alias_set);
7686 /* Save int args.
7687 This is optimized to only save the regs that are necessary. Explicitly
7688 named args need not be saved. */
7689 if (n_intregs > 0)
7690 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7691 adjust_address (regbuf, BLKmode,
7692 n_floatregs * UNITS_PER_WORD),
7693 n_intregs);
7695 if (TARGET_SHMEDIA)
7696 /* Return the address of the regbuf. */
7697 return XEXP (regbuf, 0);
7699 /* Save float args.
7700 This is optimized to only save the regs that are necessary. Explicitly
7701 named args need not be saved.
7702 We explicitly build a pointer to the buffer because it halves the insn
7703 count when not optimizing (otherwise the pointer is built for each reg
7704 saved).
7705 We emit the moves in reverse order so that we can use predecrement. */
7707 fpregs = copy_to_mode_reg (Pmode,
7708 plus_constant (XEXP (regbuf, 0),
7709 n_floatregs * UNITS_PER_WORD));
7710 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7712 rtx mem;
7713 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7715 emit_insn (gen_addsi3 (fpregs, fpregs,
7716 GEN_INT (-2 * UNITS_PER_WORD)));
7717 mem = change_address (regbuf, DFmode, fpregs);
7718 emit_move_insn (mem,
7719 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7721 regno = first_floatreg;
7722 if (regno & 1)
7724 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7725 mem = change_address (regbuf, SFmode, fpregs);
7726 emit_move_insn (mem,
7727 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7728 - (TARGET_LITTLE_ENDIAN != 0)));
7731 else
7732 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7734 rtx mem;
7736 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7737 mem = change_address (regbuf, SFmode, fpregs);
7738 emit_move_insn (mem,
7739 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7742 /* Return the address of the regbuf. */
7743 return XEXP (regbuf, 0);
7746 /* Define the `__builtin_va_list' type for the ABI. */
7748 static tree
7749 sh_build_builtin_va_list (void)
7751 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7752 tree record, type_decl;
7754 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7755 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7756 return ptr_type_node;
7758 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7759 type_decl = build_decl (BUILTINS_LOCATION,
7760 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7762 f_next_o = build_decl (BUILTINS_LOCATION,
7763 FIELD_DECL, get_identifier ("__va_next_o"),
7764 ptr_type_node);
7765 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7766 FIELD_DECL,
7767 get_identifier ("__va_next_o_limit"),
7768 ptr_type_node);
7769 f_next_fp = build_decl (BUILTINS_LOCATION,
7770 FIELD_DECL, get_identifier ("__va_next_fp"),
7771 ptr_type_node);
7772 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7773 FIELD_DECL,
7774 get_identifier ("__va_next_fp_limit"),
7775 ptr_type_node);
7776 f_next_stack = build_decl (BUILTINS_LOCATION,
7777 FIELD_DECL, get_identifier ("__va_next_stack"),
7778 ptr_type_node);
7780 DECL_FIELD_CONTEXT (f_next_o) = record;
7781 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7782 DECL_FIELD_CONTEXT (f_next_fp) = record;
7783 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7784 DECL_FIELD_CONTEXT (f_next_stack) = record;
7786 TYPE_STUB_DECL (record) = type_decl;
7787 TYPE_NAME (record) = type_decl;
7788 TYPE_FIELDS (record) = f_next_o;
7789 DECL_CHAIN (f_next_o) = f_next_o_limit;
7790 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7791 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7792 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7794 layout_type (record);
7796 return record;
7799 /* Implement `va_start' for varargs and stdarg. */
7801 static void
7802 sh_va_start (tree valist, rtx nextarg)
7804 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7805 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7806 tree t, u;
7807 int nfp, nint;
7809 if (TARGET_SH5)
7811 expand_builtin_saveregs ();
7812 std_expand_builtin_va_start (valist, nextarg);
7813 return;
7816 if ((! TARGET_SH2E && ! TARGET_SH4)
7817 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7819 std_expand_builtin_va_start (valist, nextarg);
7820 return;
7823 f_next_o = TYPE_FIELDS (va_list_type_node);
7824 f_next_o_limit = DECL_CHAIN (f_next_o);
7825 f_next_fp = DECL_CHAIN (f_next_o_limit);
7826 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7827 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7829 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7830 NULL_TREE);
7831 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7832 valist, f_next_o_limit, NULL_TREE);
7833 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7834 NULL_TREE);
7835 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7836 valist, f_next_fp_limit, NULL_TREE);
7837 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7838 valist, f_next_stack, NULL_TREE);
7840 /* Call __builtin_saveregs. */
7841 u = make_tree (sizetype, expand_builtin_saveregs ());
7842 u = fold_convert (ptr_type_node, u);
7843 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7844 TREE_SIDE_EFFECTS (t) = 1;
7845 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7847 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7848 if (nfp < 8)
7849 nfp = 8 - nfp;
7850 else
7851 nfp = 0;
7852 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7853 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7854 TREE_SIDE_EFFECTS (t) = 1;
7855 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7857 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7858 TREE_SIDE_EFFECTS (t) = 1;
7859 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7861 nint = crtl->args.info.arg_count[SH_ARG_INT];
7862 if (nint < 4)
7863 nint = 4 - nint;
7864 else
7865 nint = 0;
7866 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7867 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7868 TREE_SIDE_EFFECTS (t) = 1;
7869 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7871 u = make_tree (ptr_type_node, nextarg);
7872 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7873 TREE_SIDE_EFFECTS (t) = 1;
7874 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7877 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7878 member, return it. */
7879 static tree
7880 find_sole_member (tree type)
7882 tree field, member = NULL_TREE;
7884 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7886 if (TREE_CODE (field) != FIELD_DECL)
7887 continue;
7888 if (!DECL_SIZE (field))
7889 return NULL_TREE;
7890 if (integer_zerop (DECL_SIZE (field)))
7891 continue;
7892 if (member)
7893 return NULL_TREE;
7894 member = field;
7896 return member;
7898 /* Implement `va_arg'. */
7900 static tree
7901 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7902 gimple_seq *post_p ATTRIBUTE_UNUSED)
7904 HOST_WIDE_INT size, rsize;
7905 tree tmp, pptr_type_node;
7906 tree addr, lab_over = NULL, result = NULL;
7907 bool pass_by_ref;
7908 tree eff_type;
7910 if (!VOID_TYPE_P (type))
7911 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7912 else
7913 pass_by_ref = false;
7915 if (pass_by_ref)
7916 type = build_pointer_type (type);
7918 size = int_size_in_bytes (type);
7919 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7920 pptr_type_node = build_pointer_type (ptr_type_node);
7922 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7923 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7925 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7926 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7927 int pass_as_float;
7928 tree lab_false;
7929 tree member;
7931 f_next_o = TYPE_FIELDS (va_list_type_node);
7932 f_next_o_limit = DECL_CHAIN (f_next_o);
7933 f_next_fp = DECL_CHAIN (f_next_o_limit);
7934 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7935 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7937 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7938 NULL_TREE);
7939 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7940 valist, f_next_o_limit, NULL_TREE);
7941 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7942 valist, f_next_fp, NULL_TREE);
7943 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7944 valist, f_next_fp_limit, NULL_TREE);
7945 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7946 valist, f_next_stack, NULL_TREE);
7948 /* Structures with a single member with a distinct mode are passed
7949 like their member. This is relevant if the latter has a REAL_TYPE
7950 or COMPLEX_TYPE type. */
7951 eff_type = type;
7952 while (TREE_CODE (eff_type) == RECORD_TYPE
7953 && (member = find_sole_member (eff_type))
7954 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7955 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7956 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7958 tree field_type = TREE_TYPE (member);
7960 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7961 eff_type = field_type;
7962 else
7964 gcc_assert ((TYPE_ALIGN (eff_type)
7965 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7966 || (TYPE_ALIGN (eff_type)
7967 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7968 break;
7972 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7974 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7975 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7976 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7977 && size <= 16));
7979 else
7981 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7984 addr = create_tmp_var (pptr_type_node, NULL);
7985 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7986 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7988 valist = build_simple_mem_ref (addr);
7990 if (pass_as_float)
7992 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7993 tree cmp;
7994 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7996 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7997 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7999 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8000 tmp = next_fp_limit;
8001 if (size > 4 && !is_double)
8002 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8003 tmp = build2 (GE_EXPR, boolean_type_node,
8004 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8005 cmp = build3 (COND_EXPR, void_type_node, tmp,
8006 build1 (GOTO_EXPR, void_type_node,
8007 unshare_expr (lab_false)), NULL_TREE);
8008 if (!is_double)
8009 gimplify_and_add (cmp, pre_p);
8011 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8012 || (is_double || size == 16))
8014 tmp = fold_convert (sizetype, next_fp_tmp);
8015 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8016 size_int (UNITS_PER_WORD));
8017 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8018 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8020 if (is_double)
8021 gimplify_and_add (cmp, pre_p);
8023 #ifdef FUNCTION_ARG_SCmode_WART
8024 if (TYPE_MODE (eff_type) == SCmode
8025 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8027 tree subtype = TREE_TYPE (eff_type);
8028 tree real, imag;
8030 imag
8031 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8032 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8034 real
8035 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8036 real = get_initialized_tmp_var (real, pre_p, NULL);
8038 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8039 if (type != eff_type)
8040 result = build1 (VIEW_CONVERT_EXPR, type, result);
8041 result = get_initialized_tmp_var (result, pre_p, NULL);
8043 #endif /* FUNCTION_ARG_SCmode_WART */
8045 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8046 gimplify_and_add (tmp, pre_p);
8048 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8049 gimplify_and_add (tmp, pre_p);
8051 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8052 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8053 gimplify_assign (unshare_expr (next_fp_tmp),
8054 unshare_expr (valist), pre_p);
8056 gimplify_assign (unshare_expr (valist),
8057 unshare_expr (next_fp_tmp), post_p);
8058 valist = next_fp_tmp;
8060 else
8062 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8063 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8064 unshare_expr (next_o_limit));
8065 tmp = build3 (COND_EXPR, void_type_node, tmp,
8066 build1 (GOTO_EXPR, void_type_node,
8067 unshare_expr (lab_false)),
8068 NULL_TREE);
8069 gimplify_and_add (tmp, pre_p);
8071 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8072 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8074 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8075 gimplify_and_add (tmp, pre_p);
8077 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8078 gimplify_and_add (tmp, pre_p);
8080 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8081 gimplify_assign (unshare_expr (next_o),
8082 unshare_expr (next_o_limit), pre_p);
8084 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8085 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8088 if (!result)
8090 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8091 gimplify_and_add (tmp, pre_p);
8095 /* ??? In va-sh.h, there had been code to make values larger than
8096 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8098 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8099 if (result)
8101 gimplify_assign (result, tmp, pre_p);
8102 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8103 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8104 gimplify_and_add (tmp, pre_p);
8106 else
8107 result = tmp;
8109 if (pass_by_ref)
8110 result = build_va_arg_indirect_ref (result);
8112 return result;
8115 /* 64 bit floating points memory transfers are paired single precision loads
8116 or store. So DWARF information needs fixing in little endian (unless
8117 PR=SZ=1 in FPSCR). */
8119 sh_dwarf_register_span (rtx reg)
8121 unsigned regno = REGNO (reg);
8123 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8124 return NULL_RTX;
8126 return
8127 gen_rtx_PARALLEL (VOIDmode,
8128 gen_rtvec (2,
8129 gen_rtx_REG (SFmode,
8130 DBX_REGISTER_NUMBER (regno+1)),
8131 gen_rtx_REG (SFmode,
8132 DBX_REGISTER_NUMBER (regno))));
8135 static enum machine_mode
8136 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8137 int *punsignedp, const_tree funtype,
8138 int for_return)
8140 if (sh_promote_prototypes (funtype))
8141 return promote_mode (type, mode, punsignedp);
8142 else
8143 return default_promote_function_mode (type, mode, punsignedp, funtype,
8144 for_return);
8147 static bool
8148 sh_promote_prototypes (const_tree type)
8150 if (TARGET_HITACHI)
8151 return 0;
8152 if (! type)
8153 return 1;
8154 return ! sh_attr_renesas_p (type);
8157 /* Whether an argument must be passed by reference. On SHcompact, we
8158 pretend arguments wider than 32-bits that would have been passed in
8159 registers are passed by reference, so that an SHmedia trampoline
8160 loads them into the full 64-bits registers. */
8162 static int
8163 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8164 const_tree type, bool named)
8166 unsigned HOST_WIDE_INT size;
8168 if (type)
8169 size = int_size_in_bytes (type);
8170 else
8171 size = GET_MODE_SIZE (mode);
8173 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8174 && (!named
8175 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8176 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8177 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8178 && size > 4
8179 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8180 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8181 return size;
8182 else
8183 return 0;
8186 static bool
8187 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8188 const_tree type, bool named)
8190 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8192 if (targetm.calls.must_pass_in_stack (mode, type))
8193 return true;
8195 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8196 wants to know about pass-by-reference semantics for incoming
8197 arguments. */
8198 if (! cum)
8199 return false;
8201 if (TARGET_SHCOMPACT)
8203 cum->byref = shcompact_byref (cum, mode, type, named);
8204 return cum->byref != 0;
8207 return false;
8210 static bool
8211 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8212 const_tree type, bool named ATTRIBUTE_UNUSED)
8214 /* ??? How can it possibly be correct to return true only on the
8215 caller side of the equation? Is there someplace else in the
8216 sh backend that's magically producing the copies? */
8217 return (get_cumulative_args (cum)->outgoing
8218 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8219 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8222 static int
8223 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8224 tree type, bool named ATTRIBUTE_UNUSED)
8226 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8227 int words = 0;
8229 if (!TARGET_SH5
8230 && PASS_IN_REG_P (*cum, mode, type)
8231 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8232 && (ROUND_REG (*cum, mode)
8233 + (mode != BLKmode
8234 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8235 : ROUND_ADVANCE (int_size_in_bytes (type)))
8236 > NPARM_REGS (mode)))
8237 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8239 else if (!TARGET_SHCOMPACT
8240 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8241 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8243 return words * UNITS_PER_WORD;
8247 /* Define where to put the arguments to a function.
8248 Value is zero to push the argument on the stack,
8249 or a hard register in which to store the argument.
8251 MODE is the argument's machine mode.
8252 TYPE is the data type of the argument (as a tree).
8253 This is null for libcalls where that information may
8254 not be available.
8255 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8256 the preceding args and about the function being called.
8257 NAMED is nonzero if this argument is a named parameter
8258 (otherwise it is an extra parameter matching an ellipsis).
8260 On SH the first args are normally in registers
8261 and the rest are pushed. Any arg that starts within the first
8262 NPARM_REGS words is at least partially passed in a register unless
8263 its data type forbids. */
8265 static rtx
8266 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8267 const_tree type, bool named)
8269 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8271 if (! TARGET_SH5 && mode == VOIDmode)
8272 return GEN_INT (ca->renesas_abi ? 1 : 0);
8274 if (! TARGET_SH5
8275 && PASS_IN_REG_P (*ca, mode, type)
8276 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8278 int regno;
8280 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8281 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8283 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8284 gen_rtx_REG (SFmode,
8285 BASE_ARG_REG (mode)
8286 + (ROUND_REG (*ca, mode) ^ 1)),
8287 const0_rtx);
8288 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8289 gen_rtx_REG (SFmode,
8290 BASE_ARG_REG (mode)
8291 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8292 GEN_INT (4));
8293 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8296 /* If the alignment of a DF value causes an SF register to be
8297 skipped, we will use that skipped register for the next SF
8298 value. */
8299 if ((TARGET_HITACHI || ca->renesas_abi)
8300 && ca->free_single_fp_reg
8301 && mode == SFmode)
8302 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8304 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8305 ^ (mode == SFmode && TARGET_SH4
8306 && TARGET_LITTLE_ENDIAN != 0
8307 && ! TARGET_HITACHI && ! ca->renesas_abi);
8308 return gen_rtx_REG (mode, regno);
8312 if (TARGET_SH5)
8314 if (mode == VOIDmode && TARGET_SHCOMPACT)
8315 return GEN_INT (ca->call_cookie);
8317 /* The following test assumes unnamed arguments are promoted to
8318 DFmode. */
8319 if (mode == SFmode && ca->free_single_fp_reg)
8320 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8322 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8323 && (named || ! ca->prototype_p)
8324 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8326 if (! ca->prototype_p && TARGET_SHMEDIA)
8327 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8329 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8330 FIRST_FP_PARM_REG
8331 + ca->arg_count[(int) SH_ARG_FLOAT]);
8334 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8335 && (! TARGET_SHCOMPACT
8336 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8337 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8338 type, named))))
8340 return gen_rtx_REG (mode, (FIRST_PARM_REG
8341 + ca->arg_count[(int) SH_ARG_INT]));
8344 return 0;
8347 return 0;
8350 /* Update the data in CUM to advance over an argument
8351 of mode MODE and data type TYPE.
8352 (TYPE is null for libcalls where that information may not be
8353 available.) */
8355 static void
8356 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
8357 const_tree type, bool named)
8359 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8361 if (ca->force_mem)
8362 ca->force_mem = 0;
8363 else if (TARGET_SH5)
8365 const_tree type2 = (ca->byref && type
8366 ? TREE_TYPE (type)
8367 : type);
8368 enum machine_mode mode2 = (ca->byref && type
8369 ? TYPE_MODE (type2)
8370 : mode);
8371 int dwords = ((ca->byref
8372 ? ca->byref
8373 : mode2 == BLKmode
8374 ? int_size_in_bytes (type2)
8375 : GET_MODE_SIZE (mode2)) + 7) / 8;
8376 int numregs = MIN (dwords, NPARM_REGS (SImode)
8377 - ca->arg_count[(int) SH_ARG_INT]);
8379 if (numregs)
8381 ca->arg_count[(int) SH_ARG_INT] += numregs;
8382 if (TARGET_SHCOMPACT
8383 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8385 ca->call_cookie
8386 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8387 - numregs, 1);
8388 /* N.B. We want this also for outgoing. */
8389 ca->stack_regs += numregs;
8391 else if (ca->byref)
8393 if (! ca->outgoing)
8394 ca->stack_regs += numregs;
8395 ca->byref_regs += numregs;
8396 ca->byref = 0;
8398 ca->call_cookie
8399 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8400 - numregs, 2);
8401 while (--numregs);
8402 ca->call_cookie
8403 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8404 - 1, 1);
8406 else if (dwords > numregs)
8408 int pushregs = numregs;
8410 if (TARGET_SHCOMPACT)
8411 ca->stack_regs += numregs;
8412 while (pushregs < NPARM_REGS (SImode) - 1
8413 && (CALL_COOKIE_INT_REG_GET
8414 (ca->call_cookie,
8415 NPARM_REGS (SImode) - pushregs)
8416 == 1))
8418 ca->call_cookie
8419 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8420 - pushregs, 1);
8421 pushregs++;
8423 if (numregs == NPARM_REGS (SImode))
8424 ca->call_cookie
8425 |= CALL_COOKIE_INT_REG (0, 1)
8426 | CALL_COOKIE_STACKSEQ (numregs - 1);
8427 else
8428 ca->call_cookie
8429 |= CALL_COOKIE_STACKSEQ (numregs);
8432 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8433 && (named || ! ca->prototype_p))
8435 if (mode2 == SFmode && ca->free_single_fp_reg)
8436 ca->free_single_fp_reg = 0;
8437 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8438 < NPARM_REGS (SFmode))
8440 int numfpregs
8441 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8442 NPARM_REGS (SFmode)
8443 - ca->arg_count[(int) SH_ARG_FLOAT]);
8445 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8447 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8449 if (ca->outgoing && numregs > 0)
8452 ca->call_cookie
8453 |= (CALL_COOKIE_INT_REG
8454 (ca->arg_count[(int) SH_ARG_INT]
8455 - numregs + ((numfpregs - 2) / 2),
8456 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8457 - numfpregs) / 2));
8459 while (numfpregs -= 2);
8461 else if (mode2 == SFmode && (named)
8462 && (ca->arg_count[(int) SH_ARG_FLOAT]
8463 < NPARM_REGS (SFmode)))
8464 ca->free_single_fp_reg
8465 = FIRST_FP_PARM_REG - numfpregs
8466 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8469 return;
8472 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8474 /* Note that we've used the skipped register. */
8475 if (mode == SFmode && ca->free_single_fp_reg)
8477 ca->free_single_fp_reg = 0;
8478 return;
8480 /* When we have a DF after an SF, there's an SF register that get
8481 skipped in order to align the DF value. We note this skipped
8482 register, because the next SF value will use it, and not the
8483 SF that follows the DF. */
8484 if (mode == DFmode
8485 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8487 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8488 + BASE_ARG_REG (mode));
8492 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8493 || PASS_IN_REG_P (*ca, mode, type))
8494 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8495 = (ROUND_REG (*ca, mode)
8496 + (mode == BLKmode
8497 ? ROUND_ADVANCE (int_size_in_bytes (type))
8498 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8501 /* The Renesas calling convention doesn't quite fit into this scheme since
8502 the address is passed like an invisible argument, but one that is always
8503 passed in memory. */
8504 static rtx
8505 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8507 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8508 return 0;
8509 return gen_rtx_REG (Pmode, 2);
8512 /* Worker function for TARGET_FUNCTION_VALUE.
8514 For the SH, this is like LIBCALL_VALUE, except that we must change the
8515 mode like PROMOTE_MODE does.
8516 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8517 tested here has to be kept in sync with the one in explow.c:promote_mode.
8520 static rtx
8521 sh_function_value (const_tree valtype,
8522 const_tree fn_decl_or_type,
8523 bool outgoing ATTRIBUTE_UNUSED)
8525 if (fn_decl_or_type
8526 && !DECL_P (fn_decl_or_type))
8527 fn_decl_or_type = NULL;
8529 return gen_rtx_REG (
8530 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8531 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8532 && (TREE_CODE (valtype) == INTEGER_TYPE
8533 || TREE_CODE (valtype) == ENUMERAL_TYPE
8534 || TREE_CODE (valtype) == BOOLEAN_TYPE
8535 || TREE_CODE (valtype) == REAL_TYPE
8536 || TREE_CODE (valtype) == OFFSET_TYPE))
8537 && sh_promote_prototypes (fn_decl_or_type)
8538 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8539 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8542 /* Worker function for TARGET_LIBCALL_VALUE. */
8544 static rtx
8545 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8547 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8550 /* Return true if N is a possible register number of function value. */
8552 static bool
8553 sh_function_value_regno_p (const unsigned int regno)
8555 return ((regno) == FIRST_RET_REG
8556 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8557 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8560 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8562 static bool
8563 sh_return_in_memory (const_tree type, const_tree fndecl)
8565 if (TARGET_SH5)
8567 if (TYPE_MODE (type) == BLKmode)
8568 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8569 else
8570 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8572 else
8574 return (TYPE_MODE (type) == BLKmode
8575 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8576 && TREE_CODE (type) == RECORD_TYPE));
8580 /* We actually emit the code in sh_expand_prologue. We used to use
8581 a static variable to flag that we need to emit this code, but that
8582 doesn't when inlining, when functions are deferred and then emitted
8583 later. Fortunately, we already have two flags that are part of struct
8584 function that tell if a function uses varargs or stdarg. */
8585 static void
8586 sh_setup_incoming_varargs (cumulative_args_t ca,
8587 enum machine_mode mode,
8588 tree type,
8589 int *pretend_arg_size,
8590 int second_time ATTRIBUTE_UNUSED)
8592 gcc_assert (cfun->stdarg);
8593 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8595 int named_parm_regs, anon_parm_regs;
8597 named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
8598 + (mode == BLKmode
8599 ? ROUND_ADVANCE (int_size_in_bytes (type))
8600 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8601 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8602 if (anon_parm_regs > 0)
8603 *pretend_arg_size = anon_parm_regs * 4;
8607 static bool
8608 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8610 return TARGET_SH5;
8613 static bool
8614 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8616 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8618 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8622 /* Define the offset between two registers, one to be eliminated, and
8623 the other its replacement, at the start of a routine. */
8626 initial_elimination_offset (int from, int to)
8628 int regs_saved;
8629 int regs_saved_rounding = 0;
8630 int total_saved_regs_space;
8631 int total_auto_space;
8632 int save_flags = target_flags;
8633 int copy_flags;
8634 HARD_REG_SET live_regs_mask;
8636 shmedia_space_reserved_for_target_registers = false;
8637 regs_saved = calc_live_regs (&live_regs_mask);
8638 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8640 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8642 shmedia_space_reserved_for_target_registers = true;
8643 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8646 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8647 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8648 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8650 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8651 copy_flags = target_flags;
8652 target_flags = save_flags;
8654 total_saved_regs_space = regs_saved + regs_saved_rounding;
8656 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8657 return total_saved_regs_space + total_auto_space
8658 + crtl->args.info.byref_regs * 8;
8660 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8661 return total_saved_regs_space + total_auto_space
8662 + crtl->args.info.byref_regs * 8;
8664 /* Initial gap between fp and sp is 0. */
8665 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8666 return 0;
8668 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8669 return rounded_frame_size (0);
8671 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8672 return rounded_frame_size (0);
8674 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8675 && (to == HARD_FRAME_POINTER_REGNUM
8676 || to == STACK_POINTER_REGNUM));
8677 if (TARGET_SH5)
8679 int n = total_saved_regs_space;
8680 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8681 save_schedule schedule;
8682 save_entry *entry;
8684 n += total_auto_space;
8686 /* If it wasn't saved, there's not much we can do. */
8687 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8688 return n;
8690 target_flags = copy_flags;
8692 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8693 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8694 if (entry->reg == pr_reg)
8696 target_flags = save_flags;
8697 return entry->offset;
8699 gcc_unreachable ();
8701 else
8702 return total_auto_space;
8705 /* Parse the -mfixed-range= option string. */
8706 void
8707 sh_fix_range (const char *const_str)
8709 int i, first, last;
8710 char *str, *dash, *comma;
8712 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8713 REG2 are either register names or register numbers. The effect
8714 of this option is to mark the registers in the range from REG1 to
8715 REG2 as ``fixed'' so they won't be used by the compiler. */
8717 i = strlen (const_str);
8718 str = (char *) alloca (i + 1);
8719 memcpy (str, const_str, i + 1);
8721 while (1)
8723 dash = strchr (str, '-');
8724 if (!dash)
8726 warning (0, "value of -mfixed-range must have form REG1-REG2");
8727 return;
8729 *dash = '\0';
8730 comma = strchr (dash + 1, ',');
8731 if (comma)
8732 *comma = '\0';
8734 first = decode_reg_name (str);
8735 if (first < 0)
8737 warning (0, "unknown register name: %s", str);
8738 return;
8741 last = decode_reg_name (dash + 1);
8742 if (last < 0)
8744 warning (0, "unknown register name: %s", dash + 1);
8745 return;
8748 *dash = '-';
8750 if (first > last)
8752 warning (0, "%s-%s is an empty range", str, dash + 1);
8753 return;
8756 for (i = first; i <= last; ++i)
8757 fixed_regs[i] = call_used_regs[i] = 1;
8759 if (!comma)
8760 break;
8762 *comma = ',';
8763 str = comma + 1;
8767 /* Insert any deferred function attributes from earlier pragmas. */
8768 static void
8769 sh_insert_attributes (tree node, tree *attributes)
8771 tree attrs;
8773 if (TREE_CODE (node) != FUNCTION_DECL)
8774 return;
8776 /* We are only interested in fields. */
8777 if (!DECL_P (node))
8778 return;
8780 /* Append the attributes to the deferred attributes. */
8781 *sh_deferred_function_attributes_tail = *attributes;
8782 attrs = sh_deferred_function_attributes;
8783 if (!attrs)
8784 return;
8786 /* Some attributes imply or require the interrupt attribute. */
8787 if (!lookup_attribute ("interrupt_handler", attrs)
8788 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8790 /* If we have a trapa_handler, but no interrupt_handler attribute,
8791 insert an interrupt_handler attribute. */
8792 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8793 /* We can't use sh_pr_interrupt here because that's not in the
8794 java frontend. */
8795 attrs
8796 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8797 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8798 if the interrupt attribute is missing, we ignore the attribute
8799 and warn. */
8800 else if (lookup_attribute ("sp_switch", attrs)
8801 || lookup_attribute ("trap_exit", attrs)
8802 || lookup_attribute ("nosave_low_regs", attrs)
8803 || lookup_attribute ("resbank", attrs))
8805 tree *tail;
8807 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8809 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8810 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8811 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8812 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8813 warning (OPT_Wattributes,
8814 "%qE attribute only applies to interrupt functions",
8815 TREE_PURPOSE (attrs));
8816 else
8818 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8819 NULL_TREE);
8820 tail = &TREE_CHAIN (*tail);
8823 attrs = *attributes;
8827 /* Install the processed list. */
8828 *attributes = attrs;
8830 /* Clear deferred attributes. */
8831 sh_deferred_function_attributes = NULL_TREE;
8832 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8834 return;
8837 /* Supported attributes:
8839 interrupt_handler -- specifies this function is an interrupt handler.
8841 trapa_handler - like above, but don't save all registers.
8843 sp_switch -- specifies an alternate stack for an interrupt handler
8844 to run on.
8846 trap_exit -- use a trapa to exit an interrupt function instead of
8847 an rte instruction.
8849 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8850 This is useful on the SH3 and upwards,
8851 which has a separate set of low regs for User and Supervisor modes.
8852 This should only be used for the lowest level of interrupts. Higher levels
8853 of interrupts must save the registers in case they themselves are
8854 interrupted.
8856 renesas -- use Renesas calling/layout conventions (functions and
8857 structures).
8859 resbank -- In case of an ISR, use a register bank to save registers
8860 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8863 /* Handle a 'resbank' attribute. */
8864 static tree
8865 sh_handle_resbank_handler_attribute (tree * node, tree name,
8866 tree args ATTRIBUTE_UNUSED,
8867 int flags ATTRIBUTE_UNUSED,
8868 bool * no_add_attrs)
8870 if (!TARGET_SH2A)
8872 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8873 name);
8874 *no_add_attrs = true;
8876 if (TREE_CODE (*node) != FUNCTION_DECL)
8878 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8879 name);
8880 *no_add_attrs = true;
8883 return NULL_TREE;
8886 /* Handle an "interrupt_handler" attribute; arguments as in
8887 struct attribute_spec.handler. */
8888 static tree
8889 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8890 tree args ATTRIBUTE_UNUSED,
8891 int flags ATTRIBUTE_UNUSED,
8892 bool *no_add_attrs)
8894 if (TREE_CODE (*node) != FUNCTION_DECL)
8896 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8897 name);
8898 *no_add_attrs = true;
8900 else if (TARGET_SHCOMPACT)
8902 error ("attribute interrupt_handler is not compatible with -m5-compact");
8903 *no_add_attrs = true;
8906 return NULL_TREE;
8909 /* Handle an 'function_vector' attribute; arguments as in
8910 struct attribute_spec.handler. */
8911 static tree
8912 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8913 tree args ATTRIBUTE_UNUSED,
8914 int flags ATTRIBUTE_UNUSED,
8915 bool * no_add_attrs)
8917 if (!TARGET_SH2A)
8919 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8920 name);
8921 *no_add_attrs = true;
8923 else if (TREE_CODE (*node) != FUNCTION_DECL)
8925 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8926 name);
8927 *no_add_attrs = true;
8929 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8931 /* The argument must be a constant integer. */
8932 warning (OPT_Wattributes,
8933 "%qE attribute argument not an integer constant",
8934 name);
8935 *no_add_attrs = true;
8937 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8939 /* The argument value must be between 0 to 255. */
8940 warning (OPT_Wattributes,
8941 "%qE attribute argument should be between 0 to 255",
8942 name);
8943 *no_add_attrs = true;
8945 return NULL_TREE;
8948 /* Returns 1 if current function has been assigned the attribute
8949 'function_vector'. */
8951 sh2a_is_function_vector_call (rtx x)
8953 if (GET_CODE (x) == SYMBOL_REF
8954 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8956 tree tr = SYMBOL_REF_DECL (x);
8958 if (sh2a_function_vector_p (tr))
8959 return 1;
8962 return 0;
8965 /* Returns the function vector number, if the attribute
8966 'function_vector' is assigned, otherwise returns zero. */
8968 sh2a_get_function_vector_number (rtx x)
8970 int num;
8971 tree list, t;
8973 if ((GET_CODE (x) == SYMBOL_REF)
8974 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8976 t = SYMBOL_REF_DECL (x);
8978 if (TREE_CODE (t) != FUNCTION_DECL)
8979 return 0;
8981 list = SH_ATTRIBUTES (t);
8982 while (list)
8984 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8986 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8987 return num;
8990 list = TREE_CHAIN (list);
8993 return 0;
8995 else
8996 return 0;
8999 /* Handle an "sp_switch" attribute; arguments as in
9000 struct attribute_spec.handler. */
9001 static tree
9002 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9003 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9005 if (TREE_CODE (*node) != FUNCTION_DECL)
9007 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9008 name);
9009 *no_add_attrs = true;
9011 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9013 /* The argument must be a constant string. */
9014 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9015 name);
9016 *no_add_attrs = true;
9019 return NULL_TREE;
9022 /* Handle an "trap_exit" attribute; arguments as in
9023 struct attribute_spec.handler. */
9024 static tree
9025 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9026 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9028 if (TREE_CODE (*node) != FUNCTION_DECL)
9030 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9031 name);
9032 *no_add_attrs = true;
9034 /* The argument specifies a trap number to be used in a trapa instruction
9035 at function exit (instead of an rte instruction). */
9036 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9038 /* The argument must be a constant integer. */
9039 warning (OPT_Wattributes, "%qE attribute argument not an "
9040 "integer constant", name);
9041 *no_add_attrs = true;
9044 return NULL_TREE;
9047 static tree
9048 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9049 tree name ATTRIBUTE_UNUSED,
9050 tree args ATTRIBUTE_UNUSED,
9051 int flags ATTRIBUTE_UNUSED,
9052 bool *no_add_attrs ATTRIBUTE_UNUSED)
9054 return NULL_TREE;
9057 /* True if __attribute__((renesas)) or -mrenesas. */
9059 sh_attr_renesas_p (const_tree td)
9061 if (TARGET_HITACHI)
9062 return 1;
9063 if (td == 0)
9064 return 0;
9065 if (DECL_P (td))
9066 td = TREE_TYPE (td);
9067 if (td == error_mark_node)
9068 return 0;
9069 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9070 != NULL_TREE);
9073 /* True if __attribute__((renesas)) or -mrenesas, for the current
9074 function. */
9076 sh_cfun_attr_renesas_p (void)
9078 return sh_attr_renesas_p (current_function_decl);
9082 sh_cfun_interrupt_handler_p (void)
9084 return (lookup_attribute ("interrupt_handler",
9085 DECL_ATTRIBUTES (current_function_decl))
9086 != NULL_TREE);
9089 /* Returns 1 if FUNC has been assigned the attribute
9090 "function_vector". */
9092 sh2a_function_vector_p (tree func)
9094 tree list;
9095 if (TREE_CODE (func) != FUNCTION_DECL)
9096 return 0;
9098 list = SH_ATTRIBUTES (func);
9099 while (list)
9101 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9102 return 1;
9104 list = TREE_CHAIN (list);
9106 return 0;
9109 /* Returns TRUE if given tree has the "resbank" attribute. */
9112 sh_cfun_resbank_handler_p (void)
9114 return ((lookup_attribute ("resbank",
9115 DECL_ATTRIBUTES (current_function_decl))
9116 != NULL_TREE)
9117 && (lookup_attribute ("interrupt_handler",
9118 DECL_ATTRIBUTES (current_function_decl))
9119 != NULL_TREE) && TARGET_SH2A);
9122 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9124 static const char *
9125 sh_check_pch_target_flags (int old_flags)
9127 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9128 | MASK_SH_E | MASK_HARD_SH4
9129 | MASK_FPU_SINGLE | MASK_SH4))
9130 return _("created and used with different architectures / ABIs");
9131 if ((old_flags ^ target_flags) & MASK_HITACHI)
9132 return _("created and used with different ABIs");
9133 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9134 return _("created and used with different endianness");
9135 return NULL;
9138 /* Predicates used by the templates. */
9140 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9141 Used only in general_movsrc_operand. */
9144 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9146 switch (REGNO (op))
9148 case PR_REG:
9149 case MACL_REG:
9150 case MACH_REG:
9151 return 1;
9153 return 0;
9156 /* Nonzero if OP is a floating point value with value 0.0. */
9159 fp_zero_operand (rtx op)
9161 REAL_VALUE_TYPE r;
9163 if (GET_MODE (op) != SFmode)
9164 return 0;
9166 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9167 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9170 /* Nonzero if OP is a floating point value with value 1.0. */
9173 fp_one_operand (rtx op)
9175 REAL_VALUE_TYPE r;
9177 if (GET_MODE (op) != SFmode)
9178 return 0;
9180 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9181 return REAL_VALUES_EQUAL (r, dconst1);
9184 /* In general mode switching is used. If we are
9185 compiling without -mfmovd, movsf_ie isn't taken into account for
9186 mode switching. We could check in machine_dependent_reorg for
9187 cases where we know we are in single precision mode, but there is
9188 interface to find that out during reload, so we must avoid
9189 choosing an fldi alternative during reload and thus failing to
9190 allocate a scratch register for the constant loading. */
9192 fldi_ok (void)
9194 return 1;
9198 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9200 enum rtx_code code = GET_CODE (op);
9201 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9204 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9205 enum tls_model
9206 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9208 if (GET_CODE (op) != SYMBOL_REF)
9209 return TLS_MODEL_NONE;
9210 return SYMBOL_REF_TLS_MODEL (op);
9213 /* Return the destination address of a branch. */
9215 static int
9216 branch_dest (rtx branch)
9218 rtx dest = SET_SRC (PATTERN (branch));
9219 int dest_uid;
9221 if (GET_CODE (dest) == IF_THEN_ELSE)
9222 dest = XEXP (dest, 1);
9223 dest = XEXP (dest, 0);
9224 dest_uid = INSN_UID (dest);
9225 return INSN_ADDRESSES (dest_uid);
9228 /* Return nonzero if REG is not used after INSN.
9229 We assume REG is a reload reg, and therefore does
9230 not live past labels. It may live past calls or jumps though. */
9232 reg_unused_after (rtx reg, rtx insn)
9234 enum rtx_code code;
9235 rtx set;
9237 /* If the reg is set by this instruction, then it is safe for our
9238 case. Disregard the case where this is a store to memory, since
9239 we are checking a register used in the store address. */
9240 set = single_set (insn);
9241 if (set && !MEM_P (SET_DEST (set))
9242 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9243 return 1;
9245 while ((insn = NEXT_INSN (insn)))
9247 rtx set;
9248 if (!INSN_P (insn))
9249 continue;
9251 code = GET_CODE (insn);
9253 #if 0
9254 /* If this is a label that existed before reload, then the register
9255 if dead here. However, if this is a label added by reorg, then
9256 the register may still be live here. We can't tell the difference,
9257 so we just ignore labels completely. */
9258 if (code == CODE_LABEL)
9259 return 1;
9260 /* else */
9261 #endif
9263 if (code == JUMP_INSN)
9264 return 0;
9266 /* If this is a sequence, we must handle them all at once.
9267 We could have for instance a call that sets the target register,
9268 and an insn in a delay slot that uses the register. In this case,
9269 we must return 0. */
9270 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9272 int i;
9273 int retval = 0;
9275 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9277 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9278 rtx set = single_set (this_insn);
9280 if (CALL_P (this_insn))
9281 code = CALL_INSN;
9282 else if (JUMP_P (this_insn))
9284 if (INSN_ANNULLED_BRANCH_P (this_insn))
9285 return 0;
9286 code = JUMP_INSN;
9289 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9290 return 0;
9291 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9293 if (!MEM_P (SET_DEST (set)))
9294 retval = 1;
9295 else
9296 return 0;
9298 if (set == 0
9299 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9300 return 0;
9302 if (retval == 1)
9303 return 1;
9304 else if (code == JUMP_INSN)
9305 return 0;
9308 set = single_set (insn);
9309 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9310 return 0;
9311 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9312 return !MEM_P (SET_DEST (set));
9313 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9314 return 0;
9316 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9317 return 1;
9319 return 1;
9322 #include "ggc.h"
9324 static GTY(()) rtx fpscr_rtx;
9326 get_fpscr_rtx (void)
9328 if (! fpscr_rtx)
9330 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9331 REG_USERVAR_P (fpscr_rtx) = 1;
9332 mark_user_reg (fpscr_rtx);
9334 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9335 mark_user_reg (fpscr_rtx);
9336 return fpscr_rtx;
9339 static GTY(()) tree fpscr_values;
9341 static void
9342 emit_fpu_switch (rtx scratch, int index)
9344 rtx dst, src;
9346 if (fpscr_values == NULL)
9348 tree t;
9350 t = build_index_type (integer_one_node);
9351 t = build_array_type (integer_type_node, t);
9352 t = build_decl (BUILTINS_LOCATION,
9353 VAR_DECL, get_identifier ("__fpscr_values"), t);
9354 DECL_ARTIFICIAL (t) = 1;
9355 DECL_IGNORED_P (t) = 1;
9356 DECL_EXTERNAL (t) = 1;
9357 TREE_STATIC (t) = 1;
9358 TREE_PUBLIC (t) = 1;
9359 TREE_USED (t) = 1;
9361 fpscr_values = t;
9364 src = DECL_RTL (fpscr_values);
9365 if (!can_create_pseudo_p ())
9367 emit_move_insn (scratch, XEXP (src, 0));
9368 if (index != 0)
9369 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9370 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9372 else
9373 src = adjust_address (src, PSImode, index * 4);
9375 dst = get_fpscr_rtx ();
9376 emit_move_insn (dst, src);
9379 void
9380 emit_sf_insn (rtx pat)
9382 emit_insn (pat);
9385 void
9386 emit_df_insn (rtx pat)
9388 emit_insn (pat);
9391 void
9392 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9394 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9397 void
9398 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9400 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9401 get_fpscr_rtx ()));
9404 void
9405 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9407 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9410 void
9411 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9413 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9414 get_fpscr_rtx ()));
9417 static rtx get_free_reg (HARD_REG_SET);
9419 /* This function returns a register to use to load the address to load
9420 the fpscr from. Currently it always returns r1 or r7, but when we are
9421 able to use pseudo registers after combine, or have a better mechanism
9422 for choosing a register, it should be done here. */
9423 /* REGS_LIVE is the liveness information for the point for which we
9424 need this allocation. In some bare-bones exit blocks, r1 is live at the
9425 start. We can even have all of r0..r3 being live:
9426 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9427 INSN before which new insns are placed with will clobber the register
9428 we return. If a basic block consists only of setting the return value
9429 register to a pseudo and using that register, the return value is not
9430 live before or after this block, yet we we'll insert our insns right in
9431 the middle. */
9433 static rtx
9434 get_free_reg (HARD_REG_SET regs_live)
9436 if (! TEST_HARD_REG_BIT (regs_live, 1))
9437 return gen_rtx_REG (Pmode, 1);
9439 /* Hard reg 1 is live; since this is a small register classes target,
9440 there shouldn't be anything but a jump before the function end. */
9441 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9442 return gen_rtx_REG (Pmode, 7);
9445 /* This function will set the fpscr from memory.
9446 MODE is the mode we are setting it to. */
9447 void
9448 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9450 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9451 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9452 rtx addr_reg;
9454 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9455 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9458 /* Is the given character a logical line separator for the assembler? */
9459 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9460 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9461 #endif
9464 sh_insn_length_adjustment (rtx insn)
9466 /* Instructions with unfilled delay slots take up an extra two bytes for
9467 the nop in the delay slot. */
9468 if (((NONJUMP_INSN_P (insn)
9469 && GET_CODE (PATTERN (insn)) != USE
9470 && GET_CODE (PATTERN (insn)) != CLOBBER)
9471 || CALL_P (insn)
9472 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9473 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9474 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9475 return 2;
9477 /* SH2e has a bug that prevents the use of annulled branches, so if
9478 the delay slot is not filled, we'll have to put a NOP in it. */
9479 if (sh_cpu_attr == CPU_SH2E
9480 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9481 && get_attr_type (insn) == TYPE_CBRANCH
9482 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9483 return 2;
9485 /* sh-dsp parallel processing insn take four bytes instead of two. */
9487 if (NONJUMP_INSN_P (insn))
9489 int sum = 0;
9490 rtx body = PATTERN (insn);
9491 const char *templ;
9492 char c;
9493 int maybe_label = 1;
9495 if (GET_CODE (body) == ASM_INPUT)
9496 templ = XSTR (body, 0);
9497 else if (asm_noperands (body) >= 0)
9498 templ
9499 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9500 else
9501 return 0;
9504 int ppi_adjust = 0;
9507 c = *templ++;
9508 while (c == ' ' || c == '\t');
9509 /* all sh-dsp parallel-processing insns start with p.
9510 The only non-ppi sh insn starting with p is pref.
9511 The only ppi starting with pr is prnd. */
9512 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9513 ppi_adjust = 2;
9514 /* The repeat pseudo-insn expands two three insns, a total of
9515 six bytes in size. */
9516 else if ((c == 'r' || c == 'R')
9517 && ! strncasecmp ("epeat", templ, 5))
9518 ppi_adjust = 4;
9519 while (c && c != '\n'
9520 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9522 /* If this is a label, it is obviously not a ppi insn. */
9523 if (c == ':' && maybe_label)
9525 ppi_adjust = 0;
9526 break;
9528 else if (c == '\'' || c == '"')
9529 maybe_label = 0;
9530 c = *templ++;
9532 sum += ppi_adjust;
9533 maybe_label = c != ':';
9535 while (c);
9536 return sum;
9538 return 0;
9541 /* Return TRUE for a valid displacement for the REG+disp addressing
9542 with MODE. */
9544 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9545 into the FRx registers. We implement this by setting the maximum offset
9546 to zero when the value is SFmode. This also restricts loading of SFmode
9547 values into the integer registers, but that can't be helped. */
9549 /* The SH allows a displacement in a QI or HI amode, but only when the
9550 other operand is R0. GCC doesn't handle this very well, so we forgot
9551 all of that.
9553 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9554 DI can be any number 0..60. */
9556 bool
9557 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9559 if (CONST_INT_P (op))
9561 if (TARGET_SHMEDIA)
9563 int size;
9565 /* Check if this the address of an unaligned load / store. */
9566 if (mode == VOIDmode)
9567 return CONST_OK_FOR_I06 (INTVAL (op));
9569 size = GET_MODE_SIZE (mode);
9570 return (!(INTVAL (op) & (size - 1))
9571 && INTVAL (op) >= -512 * size
9572 && INTVAL (op) < 512 * size);
9575 if (TARGET_SH2A)
9577 if (GET_MODE_SIZE (mode) == 1
9578 && (unsigned) INTVAL (op) < 4096)
9579 return true;
9582 if ((GET_MODE_SIZE (mode) == 4
9583 && (unsigned) INTVAL (op) < 64
9584 && !(INTVAL (op) & 3)
9585 && !(TARGET_SH2E && mode == SFmode))
9586 || (GET_MODE_SIZE (mode) == 4
9587 && (unsigned) INTVAL (op) < 16383
9588 && !(INTVAL (op) & 3) && TARGET_SH2A))
9589 return true;
9591 if ((GET_MODE_SIZE (mode) == 8
9592 && (unsigned) INTVAL (op) < 60
9593 && !(INTVAL (op) & 3)
9594 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9595 || ((GET_MODE_SIZE (mode)==8)
9596 && (unsigned) INTVAL (op) < 8192
9597 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9598 && (TARGET_SH2A && mode == DFmode)))
9599 return true;
9602 return false;
9605 /* Recognize an RTL expression that is a valid memory address for
9606 an instruction.
9607 The MODE argument is the machine mode for the MEM expression
9608 that wants to use this address.
9609 Allow REG
9610 REG+disp
9611 REG+r0
9612 REG++
9613 --REG */
9615 static bool
9616 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9618 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9619 return true;
9620 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9621 && ! TARGET_SHMEDIA
9622 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9623 return true;
9624 else if (GET_CODE (x) == PLUS
9625 && (mode != PSImode || reload_completed))
9627 rtx xop0 = XEXP (x, 0);
9628 rtx xop1 = XEXP (x, 1);
9630 if (GET_MODE_SIZE (mode) <= 8
9631 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9632 && sh_legitimate_index_p (mode, xop1))
9633 return true;
9635 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9636 || ((xop0 == stack_pointer_rtx
9637 || xop0 == hard_frame_pointer_rtx)
9638 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9639 || ((xop1 == stack_pointer_rtx
9640 || xop1 == hard_frame_pointer_rtx)
9641 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9642 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9643 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9644 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9645 && TARGET_FMOVD && mode == DFmode)))
9647 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9648 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9649 return true;
9650 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9651 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9652 return true;
9656 return false;
9659 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9660 isn't protected by a PIC unspec. */
9662 nonpic_symbol_mentioned_p (rtx x)
9664 register const char *fmt;
9665 register int i;
9667 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9668 || GET_CODE (x) == PC)
9669 return 1;
9671 /* We don't want to look into the possible MEM location of a
9672 CONST_DOUBLE, since we're not going to use it, in general. */
9673 if (GET_CODE (x) == CONST_DOUBLE)
9674 return 0;
9676 if (GET_CODE (x) == UNSPEC
9677 && (XINT (x, 1) == UNSPEC_PIC
9678 || XINT (x, 1) == UNSPEC_GOT
9679 || XINT (x, 1) == UNSPEC_GOTOFF
9680 || XINT (x, 1) == UNSPEC_GOTPLT
9681 || XINT (x, 1) == UNSPEC_GOTTPOFF
9682 || XINT (x, 1) == UNSPEC_DTPOFF
9683 || XINT (x, 1) == UNSPEC_TPOFF
9684 || XINT (x, 1) == UNSPEC_PLT
9685 || XINT (x, 1) == UNSPEC_SYMOFF
9686 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9687 return 0;
9689 fmt = GET_RTX_FORMAT (GET_CODE (x));
9690 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9692 if (fmt[i] == 'E')
9694 register int j;
9696 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9697 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9698 return 1;
9700 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9701 return 1;
9704 return 0;
9707 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9708 @GOTOFF in `reg'. */
9710 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9711 rtx reg)
9713 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9714 return orig;
9716 if (GET_CODE (orig) == LABEL_REF
9717 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9719 if (reg == 0)
9720 reg = gen_reg_rtx (Pmode);
9722 emit_insn (gen_symGOTOFF2reg (reg, orig));
9723 return reg;
9725 else if (GET_CODE (orig) == SYMBOL_REF)
9727 if (reg == 0)
9728 reg = gen_reg_rtx (Pmode);
9730 emit_insn (gen_symGOT2reg (reg, orig));
9731 return reg;
9733 return orig;
9736 /* Try machine-dependent ways of modifying an illegitimate address
9737 to be legitimate. If we find one, return the new, valid address.
9738 Otherwise, return X.
9740 For the SH, if X is almost suitable for indexing, but the offset is
9741 out of range, convert it into a normal form so that CSE has a chance
9742 of reducing the number of address registers used. */
9744 static rtx
9745 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9747 if (flag_pic)
9748 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9750 if (GET_CODE (x) == PLUS
9751 && (GET_MODE_SIZE (mode) == 4
9752 || GET_MODE_SIZE (mode) == 8)
9753 && CONST_INT_P (XEXP (x, 1))
9754 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9755 && ! TARGET_SHMEDIA
9756 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9757 && ! (TARGET_SH2E && mode == SFmode))
9759 rtx index_rtx = XEXP (x, 1);
9760 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9761 rtx sum;
9763 /* On rare occasions, we might get an unaligned pointer
9764 that is indexed in a way to give an aligned address.
9765 Therefore, keep the lower two bits in offset_base. */
9766 /* Instead of offset_base 128..131 use 124..127, so that
9767 simple add suffices. */
9768 if (offset > 127)
9769 offset_base = ((offset + 4) & ~60) - 4;
9770 else
9771 offset_base = offset & ~60;
9773 /* Sometimes the normal form does not suit DImode. We
9774 could avoid that by using smaller ranges, but that
9775 would give less optimized code when SImode is
9776 prevalent. */
9777 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9779 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9780 GEN_INT (offset_base), NULL_RTX, 0,
9781 OPTAB_LIB_WIDEN);
9783 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9787 return x;
9790 /* Attempt to replace *P, which is an address that needs reloading, with
9791 a valid memory address for an operand of mode MODE.
9792 Like for sh_legitimize_address, for the SH we try to get a normal form
9793 of the address. That will allow inheritance of the address reloads. */
9795 bool
9796 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9797 int itype)
9799 enum reload_type type = (enum reload_type) itype;
9801 if (GET_CODE (*p) == PLUS
9802 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9803 && CONST_INT_P (XEXP (*p, 1))
9804 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9805 && ! TARGET_SHMEDIA
9806 && ! (TARGET_SH4 && mode == DFmode)
9807 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9808 && (ALLOW_INDEXED_ADDRESS
9809 || XEXP (*p, 0) == stack_pointer_rtx
9810 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9812 rtx index_rtx = XEXP (*p, 1);
9813 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9814 rtx sum;
9816 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9818 push_reload (*p, NULL_RTX, p, NULL,
9819 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9820 goto win;
9822 if (TARGET_SH2E && mode == SFmode)
9824 *p = copy_rtx (*p);
9825 push_reload (*p, NULL_RTX, p, NULL,
9826 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9827 goto win;
9829 /* Instead of offset_base 128..131 use 124..127, so that
9830 simple add suffices. */
9831 if (offset > 127)
9832 offset_base = ((offset + 4) & ~60) - 4;
9833 else
9834 offset_base = offset & ~60;
9835 /* Sometimes the normal form does not suit DImode. We could avoid
9836 that by using smaller ranges, but that would give less optimized
9837 code when SImode is prevalent. */
9838 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9840 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9841 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9842 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9843 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9844 goto win;
9847 /* We must re-recognize what we created before. */
9848 else if (GET_CODE (*p) == PLUS
9849 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9850 && GET_CODE (XEXP (*p, 0)) == PLUS
9851 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9852 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9853 && CONST_INT_P (XEXP (*p, 1))
9854 && ! TARGET_SHMEDIA
9855 && ! (TARGET_SH2E && mode == SFmode))
9857 /* Because this address is so complex, we know it must have
9858 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9859 it is already unshared, and needs no further unsharing. */
9860 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9861 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9862 goto win;
9865 return false;
9867 win:
9868 return true;
9871 /* In the name of slightly smaller debug output, and to cater to
9872 general assembler lossage, recognize various UNSPEC sequences
9873 and turn them back into a direct symbol reference. */
9875 static rtx
9876 sh_delegitimize_address (rtx orig_x)
9878 rtx x, y;
9880 orig_x = delegitimize_mem_from_attrs (orig_x);
9882 x = orig_x;
9883 if (MEM_P (x))
9884 x = XEXP (x, 0);
9885 if (GET_CODE (x) == CONST)
9887 y = XEXP (x, 0);
9888 if (GET_CODE (y) == UNSPEC)
9890 if (XINT (y, 1) == UNSPEC_GOT
9891 || XINT (y, 1) == UNSPEC_GOTOFF
9892 || XINT (y, 1) == UNSPEC_SYMOFF)
9893 return XVECEXP (y, 0, 0);
9894 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9896 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9898 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9900 if (GET_CODE (symplt) == UNSPEC
9901 && XINT (symplt, 1) == UNSPEC_PLT)
9902 return XVECEXP (symplt, 0, 0);
9905 else if (TARGET_SHMEDIA
9906 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
9907 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
9909 rtx offset = XVECEXP (y, 0, 1);
9911 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
9912 if (MEM_P (orig_x))
9913 x = replace_equiv_address_nv (orig_x, x);
9914 return x;
9919 return orig_x;
9922 /* Mark the use of a constant in the literal table. If the constant
9923 has multiple labels, make it unique. */
9924 static rtx
9925 mark_constant_pool_use (rtx x)
9927 rtx insn, lab, pattern;
9929 if (x == NULL)
9930 return x;
9932 switch (GET_CODE (x))
9934 case LABEL_REF:
9935 x = XEXP (x, 0);
9936 case CODE_LABEL:
9937 break;
9938 default:
9939 return x;
9942 /* Get the first label in the list of labels for the same constant
9943 and delete another labels in the list. */
9944 lab = x;
9945 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9947 if (!LABEL_P (insn)
9948 || LABEL_REFS (insn) != NEXT_INSN (insn))
9949 break;
9950 lab = insn;
9953 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9954 INSN_DELETED_P (insn) = 1;
9956 /* Mark constants in a window. */
9957 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9959 if (!NONJUMP_INSN_P (insn))
9960 continue;
9962 pattern = PATTERN (insn);
9963 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9964 continue;
9966 switch (XINT (pattern, 1))
9968 case UNSPECV_CONST2:
9969 case UNSPECV_CONST4:
9970 case UNSPECV_CONST8:
9971 XVECEXP (pattern, 0, 1) = const1_rtx;
9972 break;
9973 case UNSPECV_WINDOW_END:
9974 if (XVECEXP (pattern, 0, 0) == x)
9975 return lab;
9976 break;
9977 case UNSPECV_CONST_END:
9978 return lab;
9979 default:
9980 break;
9984 return lab;
9987 /* Return true if it's possible to redirect BRANCH1 to the destination
9988 of an unconditional jump BRANCH2. We only want to do this if the
9989 resulting branch will have a short displacement. */
9991 sh_can_redirect_branch (rtx branch1, rtx branch2)
9993 if (flag_expensive_optimizations && simplejump_p (branch2))
9995 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9996 rtx insn;
9997 int distance;
9999 for (distance = 0, insn = NEXT_INSN (branch1);
10000 insn && distance < 256;
10001 insn = PREV_INSN (insn))
10003 if (insn == dest)
10004 return 1;
10005 else
10006 distance += get_attr_length (insn);
10008 for (distance = 0, insn = NEXT_INSN (branch1);
10009 insn && distance < 256;
10010 insn = NEXT_INSN (insn))
10012 if (insn == dest)
10013 return 1;
10014 else
10015 distance += get_attr_length (insn);
10018 return 0;
10021 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10023 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10024 unsigned int new_reg)
10026 /* Interrupt functions can only use registers that have already been
10027 saved by the prologue, even if they would normally be
10028 call-clobbered. */
10030 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10031 return 0;
10033 return 1;
10036 /* Function to update the integer COST
10037 based on the relationship between INSN that is dependent on
10038 DEP_INSN through the dependence LINK. The default is to make no
10039 adjustment to COST. This can be used for example to specify to
10040 the scheduler that an output- or anti-dependence does not incur
10041 the same cost as a data-dependence. The return value should be
10042 the new value for COST. */
10043 static int
10044 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10046 rtx reg, use_pat;
10048 if (TARGET_SHMEDIA)
10050 /* On SHmedia, if the dependence is an anti-dependence or
10051 output-dependence, there is no cost. */
10052 if (REG_NOTE_KIND (link) != 0)
10054 /* However, dependencies between target register loads and
10055 uses of the register in a subsequent block that are separated
10056 by a conditional branch are not modelled - we have to do with
10057 the anti-dependency between the target register load and the
10058 conditional branch that ends the current block. */
10059 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10060 && GET_CODE (PATTERN (dep_insn)) == SET
10061 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10062 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10063 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10065 int orig_cost = cost;
10066 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10067 rtx target = ((! note
10068 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10069 ? insn : JUMP_LABEL (insn));
10070 /* On the likely path, the branch costs 1, on the unlikely path,
10071 it costs 3. */
10072 cost--;
10074 target = next_active_insn (target);
10075 while (target && ! flow_dependent_p (target, dep_insn)
10076 && --cost > 0);
10077 /* If two branches are executed in immediate succession, with the
10078 first branch properly predicted, this causes a stall at the
10079 second branch, hence we won't need the target for the
10080 second branch for two cycles after the launch of the first
10081 branch. */
10082 if (cost > orig_cost - 2)
10083 cost = orig_cost - 2;
10085 else
10086 cost = 0;
10089 else if (get_attr_is_mac_media (insn)
10090 && get_attr_is_mac_media (dep_insn))
10091 cost = 1;
10093 else if (! reload_completed
10094 && GET_CODE (PATTERN (insn)) == SET
10095 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10096 && GET_CODE (PATTERN (dep_insn)) == SET
10097 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10098 && cost < 4)
10099 cost = 4;
10100 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10101 that is needed at the target. */
10102 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10103 && ! flow_dependent_p (insn, dep_insn))
10104 cost--;
10106 else if (REG_NOTE_KIND (link) == 0)
10108 enum attr_type type;
10109 rtx dep_set;
10111 if (recog_memoized (insn) < 0
10112 || recog_memoized (dep_insn) < 0)
10113 return cost;
10115 dep_set = single_set (dep_insn);
10117 /* The latency that we specify in the scheduling description refers
10118 to the actual output, not to an auto-increment register; for that,
10119 the latency is one. */
10120 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10122 rtx set = single_set (insn);
10124 if (set
10125 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10126 && (!MEM_P (SET_DEST (set))
10127 || !reg_mentioned_p (SET_DEST (dep_set),
10128 XEXP (SET_DEST (set), 0))))
10129 cost = 1;
10131 /* The only input for a call that is timing-critical is the
10132 function's address. */
10133 if (CALL_P (insn))
10135 rtx call = PATTERN (insn);
10137 if (GET_CODE (call) == PARALLEL)
10138 call = XVECEXP (call, 0 ,0);
10139 if (GET_CODE (call) == SET)
10140 call = SET_SRC (call);
10141 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10142 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10143 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10144 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10145 cost -= TARGET_SH4_300 ? 3 : 6;
10147 /* Likewise, the most timing critical input for an sfuncs call
10148 is the function address. However, sfuncs typically start
10149 using their arguments pretty quickly.
10150 Assume a four cycle delay for SH4 before they are needed.
10151 Cached ST40-300 calls are quicker, so assume only a one
10152 cycle delay there.
10153 ??? Maybe we should encode the delays till input registers
10154 are needed by sfuncs into the sfunc call insn. */
10155 /* All sfunc calls are parallels with at least four components.
10156 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10157 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10158 && XVECLEN (PATTERN (insn), 0) >= 4
10159 && (reg = sfunc_uses_reg (insn)))
10161 if (! reg_set_p (reg, dep_insn))
10162 cost -= TARGET_SH4_300 ? 1 : 4;
10164 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10166 enum attr_type dep_type = get_attr_type (dep_insn);
10168 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10169 cost--;
10170 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10171 && (type = get_attr_type (insn)) != TYPE_CALL
10172 && type != TYPE_SFUNC)
10173 cost--;
10174 /* When the preceding instruction loads the shift amount of
10175 the following SHAD/SHLD, the latency of the load is increased
10176 by 1 cycle. */
10177 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10178 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10179 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10180 XEXP (SET_SRC (single_set (insn)),
10181 1)))
10182 cost++;
10183 /* When an LS group instruction with a latency of less than
10184 3 cycles is followed by a double-precision floating-point
10185 instruction, FIPR, or FTRV, the latency of the first
10186 instruction is increased to 3 cycles. */
10187 else if (cost < 3
10188 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10189 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10190 cost = 3;
10191 /* The lsw register of a double-precision computation is ready one
10192 cycle earlier. */
10193 else if (reload_completed
10194 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10195 && (use_pat = single_set (insn))
10196 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10197 SET_SRC (use_pat)))
10198 cost -= 1;
10200 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10201 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10202 cost -= 1;
10204 else if (TARGET_SH4_300)
10206 /* Stores need their input register two cycles later. */
10207 if (dep_set && cost >= 1
10208 && ((type = get_attr_type (insn)) == TYPE_STORE
10209 || type == TYPE_PSTORE
10210 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10212 rtx set = single_set (insn);
10214 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10215 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10217 cost -= 2;
10218 /* But don't reduce the cost below 1 if the address depends
10219 on a side effect of dep_insn. */
10220 if (cost < 1
10221 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10222 cost = 1;
10227 /* An anti-dependence penalty of two applies if the first insn is a double
10228 precision fadd / fsub / fmul. */
10229 else if (!TARGET_SH4_300
10230 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10231 && recog_memoized (dep_insn) >= 0
10232 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10233 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10234 /* A lot of alleged anti-flow dependences are fake,
10235 so check this one is real. */
10236 && flow_dependent_p (dep_insn, insn))
10237 cost = 2;
10239 return cost;
10242 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10243 if DEP_INSN is anti-flow dependent on INSN. */
10244 static int
10245 flow_dependent_p (rtx insn, rtx dep_insn)
10247 rtx tmp = PATTERN (insn);
10249 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10250 return tmp == NULL_RTX;
10253 /* A helper function for flow_dependent_p called through note_stores. */
10254 static void
10255 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10257 rtx * pinsn = (rtx *) data;
10259 if (*pinsn && reg_referenced_p (x, *pinsn))
10260 *pinsn = NULL_RTX;
10263 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10264 'special function' patterns (type sfunc) that clobber pr, but that
10265 do not look like function calls to leaf_function_p. Hence we must
10266 do this extra check. */
10267 static int
10268 sh_pr_n_sets (void)
10270 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10273 /* Return where to allocate pseudo for a given hard register initial
10274 value. */
10275 static rtx
10276 sh_allocate_initial_value (rtx hard_reg)
10278 rtx x;
10280 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10282 if (current_function_is_leaf
10283 && ! sh_pr_n_sets ()
10284 && ! (TARGET_SHCOMPACT
10285 && ((crtl->args.info.call_cookie
10286 & ~ CALL_COOKIE_RET_TRAMP (1))
10287 || crtl->saves_all_registers)))
10288 x = hard_reg;
10289 else
10290 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10292 else
10293 x = NULL_RTX;
10295 return x;
10298 /* This function returns "2" to indicate dual issue for the SH4
10299 processor. To be used by the DFA pipeline description. */
10300 static int
10301 sh_issue_rate (void)
10303 if (TARGET_SUPERSCALAR)
10304 return 2;
10305 else
10306 return 1;
10309 /* Functions for ready queue reordering for sched1. */
10311 /* Get weight for mode for a set x. */
10312 static short
10313 find_set_regmode_weight (rtx x, enum machine_mode mode)
10315 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10316 return 1;
10317 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10319 if (REG_P (SET_DEST (x)))
10321 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10322 return 1;
10323 else
10324 return 0;
10326 return 1;
10328 return 0;
10331 /* Get regmode weight for insn. */
10332 static short
10333 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10335 short reg_weight = 0;
10336 rtx x;
10338 /* Increment weight for each register born here. */
10339 x = PATTERN (insn);
10340 reg_weight += find_set_regmode_weight (x, mode);
10341 if (GET_CODE (x) == PARALLEL)
10343 int j;
10344 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10346 x = XVECEXP (PATTERN (insn), 0, j);
10347 reg_weight += find_set_regmode_weight (x, mode);
10350 /* Decrement weight for each register that dies here. */
10351 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10353 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10355 rtx note = XEXP (x, 0);
10356 if (REG_P (note) && GET_MODE (note) == mode)
10357 reg_weight--;
10360 return reg_weight;
10363 /* Calculate regmode weights for all insns of a basic block. */
10364 static void
10365 find_regmode_weight (basic_block b, enum machine_mode mode)
10367 rtx insn, next_tail, head, tail;
10369 get_ebb_head_tail (b, b, &head, &tail);
10370 next_tail = NEXT_INSN (tail);
10372 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10374 /* Handle register life information. */
10375 if (!INSN_P (insn))
10376 continue;
10378 if (mode == SFmode)
10379 INSN_REGMODE_WEIGHT (insn, mode) =
10380 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10381 else if (mode == SImode)
10382 INSN_REGMODE_WEIGHT (insn, mode) =
10383 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10387 /* Comparison function for ready queue sorting. */
10388 static int
10389 rank_for_reorder (const void *x, const void *y)
10391 rtx tmp = *(const rtx *) y;
10392 rtx tmp2 = *(const rtx *) x;
10394 /* The insn in a schedule group should be issued the first. */
10395 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10396 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10398 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10399 minimizes instruction movement, thus minimizing sched's effect on
10400 register pressure. */
10401 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10404 /* Resort the array A in which only element at index N may be out of order. */
10405 static void
10406 swap_reorder (rtx *a, int n)
10408 rtx insn = a[n - 1];
10409 int i = n - 2;
10411 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10413 a[i + 1] = a[i];
10414 i -= 1;
10416 a[i + 1] = insn;
10419 #define SCHED_REORDER(READY, N_READY) \
10420 do \
10422 if ((N_READY) == 2) \
10423 swap_reorder (READY, N_READY); \
10424 else if ((N_READY) > 2) \
10425 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10427 while (0)
10429 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10430 macro. */
10431 static void
10432 ready_reorder (rtx *ready, int nready)
10434 SCHED_REORDER (ready, nready);
10437 /* Count life regions of r0 for a block. */
10438 static int
10439 find_r0_life_regions (basic_block b)
10441 rtx end, insn;
10442 rtx pset;
10443 rtx r0_reg;
10444 int live;
10445 int set;
10446 int death = 0;
10448 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10450 set = 1;
10451 live = 1;
10453 else
10455 set = 0;
10456 live = 0;
10459 insn = BB_HEAD (b);
10460 end = BB_END (b);
10461 r0_reg = gen_rtx_REG (SImode, R0_REG);
10462 while (1)
10464 if (INSN_P (insn))
10466 if (find_regno_note (insn, REG_DEAD, R0_REG))
10468 death++;
10469 live = 0;
10471 if (!live
10472 && (pset = single_set (insn))
10473 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10474 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10476 set++;
10477 live = 1;
10480 if (insn == end)
10481 break;
10482 insn = NEXT_INSN (insn);
10484 return set - death;
10487 /* Calculate regmode weights for all insns of all basic block. */
10488 static void
10489 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10490 int verbose ATTRIBUTE_UNUSED,
10491 int old_max_uid)
10493 basic_block b;
10495 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10496 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10497 r0_life_regions = 0;
10499 FOR_EACH_BB_REVERSE (b)
10501 find_regmode_weight (b, SImode);
10502 find_regmode_weight (b, SFmode);
10503 if (!reload_completed)
10504 r0_life_regions += find_r0_life_regions (b);
10507 CURR_REGMODE_PRESSURE (SImode) = 0;
10508 CURR_REGMODE_PRESSURE (SFmode) = 0;
10512 /* Cleanup. */
10513 static void
10514 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10515 int verbose ATTRIBUTE_UNUSED)
10517 if (regmode_weight[0])
10519 free (regmode_weight[0]);
10520 regmode_weight[0] = NULL;
10522 if (regmode_weight[1])
10524 free (regmode_weight[1]);
10525 regmode_weight[1] = NULL;
10529 /* The scalar modes supported differs from the default version in TImode
10530 for 32-bit SHMEDIA. */
10531 static bool
10532 sh_scalar_mode_supported_p (enum machine_mode mode)
10534 if (TARGET_SHMEDIA32 && mode == TImode)
10535 return false;
10537 return default_scalar_mode_supported_p (mode);
10540 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10541 keep count of register pressures on SImode and SFmode. */
10542 static int
10543 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10544 int sched_verbose ATTRIBUTE_UNUSED,
10545 rtx insn,
10546 int can_issue_more)
10548 if (GET_CODE (PATTERN (insn)) != USE
10549 && GET_CODE (PATTERN (insn)) != CLOBBER)
10550 cached_can_issue_more = can_issue_more - 1;
10551 else
10552 cached_can_issue_more = can_issue_more;
10554 if (reload_completed)
10555 return cached_can_issue_more;
10557 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10558 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10560 return cached_can_issue_more;
10563 static void
10564 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10565 int verbose ATTRIBUTE_UNUSED,
10566 int veclen ATTRIBUTE_UNUSED)
10568 CURR_REGMODE_PRESSURE (SImode) = 0;
10569 CURR_REGMODE_PRESSURE (SFmode) = 0;
10572 /* Some magic numbers. */
10573 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10574 functions that already have high pressure on r0. */
10575 #define R0_MAX_LIFE_REGIONS 2
10576 /* Register Pressure thresholds for SImode and SFmode registers. */
10577 #define SIMODE_MAX_WEIGHT 5
10578 #define SFMODE_MAX_WEIGHT 10
10580 /* Return true if the pressure is high for MODE. */
10581 static short
10582 high_pressure (enum machine_mode mode)
10584 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10585 functions that already have high pressure on r0. */
10586 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10587 return 1;
10589 if (mode == SFmode)
10590 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10591 else
10592 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10595 /* Reorder ready queue if register pressure is high. */
10596 static int
10597 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10598 int sched_verbose ATTRIBUTE_UNUSED,
10599 rtx *ready,
10600 int *n_readyp,
10601 int clock_var ATTRIBUTE_UNUSED)
10603 if (reload_completed)
10604 return sh_issue_rate ();
10606 if (high_pressure (SFmode) || high_pressure (SImode))
10608 ready_reorder (ready, *n_readyp);
10611 return sh_issue_rate ();
10614 /* Skip cycles if the current register pressure is high. */
10615 static int
10616 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10617 int sched_verbose ATTRIBUTE_UNUSED,
10618 rtx *ready ATTRIBUTE_UNUSED,
10619 int *n_readyp ATTRIBUTE_UNUSED,
10620 int clock_var ATTRIBUTE_UNUSED)
10622 if (reload_completed)
10623 return cached_can_issue_more;
10625 if (high_pressure(SFmode) || high_pressure (SImode))
10626 skip_cycles = 1;
10628 return cached_can_issue_more;
10631 /* Skip cycles without sorting the ready queue. This will move insn from
10632 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10633 queue by sh_reorder. */
10635 /* Generally, skipping these many cycles are sufficient for all insns to move
10636 from Q -> R. */
10637 #define MAX_SKIPS 8
10639 static int
10640 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10641 int sched_verbose ATTRIBUTE_UNUSED,
10642 rtx insn ATTRIBUTE_UNUSED,
10643 int last_clock_var,
10644 int clock_var,
10645 int *sort_p)
10647 if (reload_completed)
10648 return 0;
10650 if (skip_cycles)
10652 if ((clock_var - last_clock_var) < MAX_SKIPS)
10654 *sort_p = 0;
10655 return 1;
10657 /* If this is the last cycle we are skipping, allow reordering of R. */
10658 if ((clock_var - last_clock_var) == MAX_SKIPS)
10660 *sort_p = 1;
10661 return 1;
10665 skip_cycles = 0;
10667 return 0;
10670 /* SHmedia requires registers for branches, so we can't generate new
10671 branches past reload. */
10672 static bool
10673 sh_cannot_modify_jumps_p (void)
10675 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10678 static reg_class_t
10679 sh_target_reg_class (void)
10681 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10684 static bool
10685 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10687 HARD_REG_SET dummy;
10688 #if 0
10689 rtx insn;
10690 #endif
10692 if (! shmedia_space_reserved_for_target_registers)
10693 return 0;
10694 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10695 return 0;
10696 if (calc_live_regs (&dummy) >= 6 * 8)
10697 return 1;
10698 return 0;
10701 static bool
10702 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10704 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10708 On the SH1..SH4, the trampoline looks like
10709 2 0002 D202 mov.l l2,r2
10710 1 0000 D301 mov.l l1,r3
10711 3 0004 422B jmp @r2
10712 4 0006 0009 nop
10713 5 0008 00000000 l1: .long area
10714 6 000c 00000000 l2: .long function
10716 SH5 (compact) uses r1 instead of r3 for the static chain. */
10719 /* Emit RTL insns to initialize the variable parts of a trampoline.
10720 FNADDR is an RTX for the address of the function's pure code.
10721 CXT is an RTX for the static chain value for the function. */
10723 static void
10724 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10726 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10727 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10729 if (TARGET_SHMEDIA64)
10731 rtx tramp_templ;
10732 int fixed_len;
10734 rtx movi1 = GEN_INT (0xcc000010);
10735 rtx shori1 = GEN_INT (0xc8000010);
10736 rtx src, dst;
10738 /* The following trampoline works within a +- 128 KB range for cxt:
10739 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10740 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10741 gettr tr1,r1; blink tr0,r63 */
10742 /* Address rounding makes it hard to compute the exact bounds of the
10743 offset for this trampoline, but we have a rather generous offset
10744 range, so frame_offset should do fine as an upper bound. */
10745 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10747 /* ??? could optimize this trampoline initialization
10748 by writing DImode words with two insns each. */
10749 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10750 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10751 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10752 insn = gen_rtx_AND (DImode, insn, mask);
10753 /* Or in ptb/u .,tr1 pattern */
10754 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10755 insn = force_operand (insn, NULL_RTX);
10756 insn = gen_lowpart (SImode, insn);
10757 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10758 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10759 insn = gen_rtx_AND (DImode, insn, mask);
10760 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10761 insn = gen_lowpart (SImode, insn);
10762 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10763 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10764 insn = gen_rtx_AND (DImode, insn, mask);
10765 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10766 insn = gen_lowpart (SImode, insn);
10767 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10768 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10769 insn = gen_rtx_AND (DImode, insn, mask);
10770 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10771 insn = gen_lowpart (SImode, insn);
10772 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10773 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10774 insn = gen_rtx_AND (DImode, insn, mask);
10775 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10776 insn = gen_lowpart (SImode, insn);
10777 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10778 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10779 GEN_INT (0x6bf10600));
10780 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10781 GEN_INT (0x4415fc10));
10782 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10783 GEN_INT (0x4401fff0));
10784 emit_insn (gen_ic_invalidate_line (tramp));
10785 return;
10787 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10788 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10790 tramp_templ = gen_datalabel_ref (tramp_templ);
10791 dst = tramp_mem;
10792 src = gen_const_mem (BLKmode, tramp_templ);
10793 set_mem_align (dst, 256);
10794 set_mem_align (src, 64);
10795 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10797 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10798 emit_move_insn (adjust_address (tramp_mem, Pmode,
10799 fixed_len + GET_MODE_SIZE (Pmode)),
10800 cxt);
10801 emit_insn (gen_ic_invalidate_line (tramp));
10802 return;
10804 else if (TARGET_SHMEDIA)
10806 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10807 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10808 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10809 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10810 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10811 rotated 10 right, and higher 16 bit of every 32 selected. */
10812 rtx movishori
10813 = force_reg (V2HImode, (simplify_gen_subreg
10814 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10815 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10816 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10818 fnaddr = force_reg (SImode, fnaddr);
10819 cxt = force_reg (SImode, cxt);
10820 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10821 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10822 movishori));
10823 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10824 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10825 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10826 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10827 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10828 gen_rtx_SUBREG (V2HImode, cxt, 0),
10829 movishori));
10830 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10831 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10832 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10833 if (TARGET_LITTLE_ENDIAN)
10835 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10836 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10838 else
10840 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10841 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10843 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10844 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10845 emit_insn (gen_ic_invalidate_line (tramp));
10846 return;
10848 else if (TARGET_SHCOMPACT)
10850 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10851 return;
10853 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10854 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10855 SImode));
10856 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10857 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10858 SImode));
10859 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10860 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10861 if (TARGET_HARVARD)
10863 if (!TARGET_INLINE_IC_INVALIDATE
10864 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10865 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10866 FUNCTION_ORDINARY),
10867 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10868 else
10869 emit_insn (gen_ic_invalidate_line (tramp));
10873 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10875 static rtx
10876 sh_trampoline_adjust_address (rtx tramp)
10878 if (TARGET_SHMEDIA)
10879 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10880 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10881 return tramp;
10884 /* FIXME: This is overly conservative. A SHcompact function that
10885 receives arguments ``by reference'' will have them stored in its
10886 own stack frame, so it must not pass pointers or references to
10887 these arguments to other functions by means of sibling calls. */
10888 /* If PIC, we cannot make sibling calls to global functions
10889 because the PLT requires r12 to be live. */
10890 static bool
10891 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10893 return (1
10894 && (! TARGET_SHCOMPACT
10895 || crtl->args.info.stack_regs == 0)
10896 && ! sh_cfun_interrupt_handler_p ()
10897 && (! flag_pic
10898 || (decl && ! TREE_PUBLIC (decl))
10899 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10902 /* Machine specific built-in functions. */
10904 struct builtin_description
10906 const enum insn_code icode;
10907 const char *const name;
10908 int signature;
10909 tree fndecl;
10912 /* describe number and signedness of arguments; arg[0] == result
10913 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10914 /* 9: 64-bit pointer, 10: 32-bit pointer */
10915 static const char signature_args[][4] =
10917 #define SH_BLTIN_V2SI2 0
10918 { 4, 4 },
10919 #define SH_BLTIN_V4HI2 1
10920 { 4, 4 },
10921 #define SH_BLTIN_V2SI3 2
10922 { 4, 4, 4 },
10923 #define SH_BLTIN_V4HI3 3
10924 { 4, 4, 4 },
10925 #define SH_BLTIN_V8QI3 4
10926 { 4, 4, 4 },
10927 #define SH_BLTIN_MAC_HISI 5
10928 { 1, 4, 4, 1 },
10929 #define SH_BLTIN_SH_HI 6
10930 { 4, 4, 1 },
10931 #define SH_BLTIN_SH_SI 7
10932 { 4, 4, 1 },
10933 #define SH_BLTIN_V4HI2V2SI 8
10934 { 4, 4, 4 },
10935 #define SH_BLTIN_V4HI2V8QI 9
10936 { 4, 4, 4 },
10937 #define SH_BLTIN_SISF 10
10938 { 4, 2 },
10939 #define SH_BLTIN_LDUA_L 11
10940 { 2, 10 },
10941 #define SH_BLTIN_LDUA_Q 12
10942 { 1, 10 },
10943 #define SH_BLTIN_STUA_L 13
10944 { 0, 10, 2 },
10945 #define SH_BLTIN_STUA_Q 14
10946 { 0, 10, 1 },
10947 #define SH_BLTIN_LDUA_L64 15
10948 { 2, 9 },
10949 #define SH_BLTIN_LDUA_Q64 16
10950 { 1, 9 },
10951 #define SH_BLTIN_STUA_L64 17
10952 { 0, 9, 2 },
10953 #define SH_BLTIN_STUA_Q64 18
10954 { 0, 9, 1 },
10955 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10956 #define SH_BLTIN_2 19
10957 #define SH_BLTIN_SU 19
10958 { 1, 2 },
10959 #define SH_BLTIN_3 20
10960 #define SH_BLTIN_SUS 20
10961 { 2, 2, 1 },
10962 #define SH_BLTIN_PSSV 21
10963 { 0, 8, 2, 2 },
10964 #define SH_BLTIN_XXUU 22
10965 #define SH_BLTIN_UUUU 22
10966 { 1, 1, 1, 1 },
10967 #define SH_BLTIN_PV 23
10968 { 0, 8 },
10970 /* mcmv: operands considered unsigned. */
10971 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10972 /* mperm: control value considered unsigned int. */
10973 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10974 /* mshards_q: returns signed short. */
10975 /* nsb: takes long long arg, returns unsigned char. */
10976 static struct builtin_description bdesc[] =
10978 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10979 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10980 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10981 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10982 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10983 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10984 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10985 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10986 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10987 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10988 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10989 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10990 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10991 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10992 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10993 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10994 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10995 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10996 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10997 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10998 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10999 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11000 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11001 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11002 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11003 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11004 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11005 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11006 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11007 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11008 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11009 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11010 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11011 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11012 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11013 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11014 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11015 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11016 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11017 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11018 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11019 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11020 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11021 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11022 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11023 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11024 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11025 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11026 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11027 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11028 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11029 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11030 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11031 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11032 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11033 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11034 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11035 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11036 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11037 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11038 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11039 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
11040 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11041 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11042 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11043 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11044 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11045 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11046 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11047 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11048 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11049 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11050 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11051 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11052 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11053 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11054 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11055 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11056 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11057 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11058 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11059 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11060 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11061 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11064 static void
11065 sh_media_init_builtins (void)
11067 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11068 struct builtin_description *d;
11070 memset (shared, 0, sizeof shared);
11071 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11073 tree type, arg_type = 0;
11074 int signature = d->signature;
11075 int i;
11077 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11078 type = shared[signature];
11079 else
11081 int has_result = signature_args[signature][0] != 0;
11082 tree args[3];
11084 if ((signature_args[signature][1] & 8)
11085 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11086 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11087 continue;
11088 if (! TARGET_FPU_ANY
11089 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11090 continue;
11091 for (i = 0; i < (int) ARRAY_SIZE (args); i++)
11092 args[i] = NULL_TREE;
11093 for (i = 3; ; i--)
11095 int arg = signature_args[signature][i];
11096 int opno = i - 1 + has_result;
11098 if (arg & 8)
11099 arg_type = ptr_type_node;
11100 else if (arg)
11101 arg_type = (*lang_hooks.types.type_for_mode)
11102 (insn_data[d->icode].operand[opno].mode,
11103 (arg & 1));
11104 else if (i)
11105 continue;
11106 else
11107 arg_type = void_type_node;
11108 if (i == 0)
11109 break;
11110 args[i-1] = arg_type;
11112 type = build_function_type_list (arg_type, args[0], args[1],
11113 args[2], NULL_TREE);
11114 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11115 shared[signature] = type;
11117 d->fndecl =
11118 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11119 NULL, NULL_TREE);
11123 /* Returns the shmedia builtin decl for CODE. */
11125 static tree
11126 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11128 if (code >= ARRAY_SIZE (bdesc))
11129 return error_mark_node;
11131 return bdesc[code].fndecl;
11134 /* Implements target hook vector_mode_supported_p. */
11135 bool
11136 sh_vector_mode_supported_p (enum machine_mode mode)
11138 if (TARGET_FPU_ANY
11139 && ((mode == V2SFmode)
11140 || (mode == V4SFmode)
11141 || (mode == V16SFmode)))
11142 return true;
11144 else if (TARGET_SHMEDIA
11145 && ((mode == V8QImode)
11146 || (mode == V2HImode)
11147 || (mode == V4HImode)
11148 || (mode == V2SImode)))
11149 return true;
11151 return false;
11154 bool
11155 sh_frame_pointer_required (void)
11157 /* If needed override this in other tm.h files to cope with various OS
11158 lossage requiring a frame pointer. */
11159 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11160 return true;
11162 if (crtl->profile)
11163 return true;
11165 return false;
11168 /* Implements target hook dwarf_calling_convention. Return an enum
11169 of dwarf_calling_convention. */
11171 sh_dwarf_calling_convention (const_tree func)
11173 if (sh_attr_renesas_p (func))
11174 return DW_CC_GNU_renesas_sh;
11176 return DW_CC_normal;
11179 static void
11180 sh_init_builtins (void)
11182 if (TARGET_SHMEDIA)
11183 sh_media_init_builtins ();
11186 /* Returns the sh builtin decl for CODE. */
11188 static tree
11189 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11191 if (TARGET_SHMEDIA)
11192 return sh_media_builtin_decl (code, initialize_p);
11194 return error_mark_node;
11197 /* Expand an expression EXP that calls a built-in function,
11198 with result going to TARGET if that's convenient
11199 (and in mode MODE if that's convenient).
11200 SUBTARGET may be used as the target for computing one of EXP's operands.
11201 IGNORE is nonzero if the value is to be ignored. */
11203 static rtx
11204 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11205 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11207 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11208 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11209 const struct builtin_description *d = &bdesc[fcode];
11210 enum insn_code icode = d->icode;
11211 int signature = d->signature;
11212 enum machine_mode tmode = VOIDmode;
11213 int nop = 0, i;
11214 rtx op[4];
11215 rtx pat = 0;
11217 if (signature_args[signature][0])
11219 if (ignore)
11220 return 0;
11222 tmode = insn_data[icode].operand[0].mode;
11223 if (! target
11224 || GET_MODE (target) != tmode
11225 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11226 target = gen_reg_rtx (tmode);
11227 op[nop++] = target;
11229 else
11230 target = 0;
11232 for (i = 1; i <= 3; i++, nop++)
11234 tree arg;
11235 enum machine_mode opmode, argmode;
11236 tree optype;
11238 if (! signature_args[signature][i])
11239 break;
11240 arg = CALL_EXPR_ARG (exp, i - 1);
11241 if (arg == error_mark_node)
11242 return const0_rtx;
11243 if (signature_args[signature][i] & 8)
11245 opmode = ptr_mode;
11246 optype = ptr_type_node;
11248 else
11250 opmode = insn_data[icode].operand[nop].mode;
11251 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11253 argmode = TYPE_MODE (TREE_TYPE (arg));
11254 if (argmode != opmode)
11255 arg = build1 (NOP_EXPR, optype, arg);
11256 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11257 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11258 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11261 switch (nop)
11263 case 1:
11264 pat = (*insn_data[d->icode].genfun) (op[0]);
11265 break;
11266 case 2:
11267 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11268 break;
11269 case 3:
11270 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11271 break;
11272 case 4:
11273 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11274 break;
11275 default:
11276 gcc_unreachable ();
11278 if (! pat)
11279 return 0;
11280 emit_insn (pat);
11281 return target;
11284 void
11285 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11287 rtx sel0 = const0_rtx;
11288 rtx sel1 = const1_rtx;
11289 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11290 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11292 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11293 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11296 void
11297 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11299 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11301 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11302 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11305 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11306 We can allow any mode in any general register. The special registers
11307 only allow SImode. Don't allow any mode in the PR.
11309 We cannot hold DCmode values in the XD registers because alter_reg
11310 handles subregs of them incorrectly. We could work around this by
11311 spacing the XD registers like the DR registers, but this would require
11312 additional memory in every compilation to hold larger register vectors.
11313 We could hold SFmode / SCmode values in XD registers, but that
11314 would require a tertiary reload when reloading from / to memory,
11315 and a secondary reload to reload from / to general regs; that
11316 seems to be a loosing proposition.
11318 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11319 it won't be ferried through GP registers first. */
11321 bool
11322 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11324 if (SPECIAL_REGISTER_P (regno))
11325 return mode == SImode;
11327 if (regno == FPUL_REG)
11328 return (mode == SImode || mode == SFmode);
11330 if (FP_REGISTER_P (regno) && mode == SFmode)
11331 return true;
11333 if (mode == V2SFmode)
11335 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11336 || GENERAL_REGISTER_P (regno)))
11337 return true;
11338 else
11339 return false;
11342 if (mode == V4SFmode)
11344 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11345 || GENERAL_REGISTER_P (regno))
11346 return true;
11347 else
11348 return false;
11351 if (mode == V16SFmode)
11353 if (TARGET_SHMEDIA)
11355 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11356 return true;
11357 else
11358 return false;
11360 else
11361 return regno == FIRST_XD_REG;
11364 if (FP_REGISTER_P (regno))
11366 if (mode == SFmode
11367 || mode == SImode
11368 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11369 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11370 || mode == DCmode
11371 || (TARGET_SHMEDIA
11372 && (mode == DFmode || mode == DImode
11373 || mode == V2SFmode || mode == TImode)))
11374 && ((regno - FIRST_FP_REG) & 1) == 0)
11375 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11376 && ((regno - FIRST_FP_REG) & 3) == 0))
11377 return true;
11378 else
11379 return false;
11382 if (XD_REGISTER_P (regno))
11383 return mode == DFmode;
11385 if (TARGET_REGISTER_P (regno))
11386 return (mode == DImode || mode == SImode || mode == PDImode);
11388 if (regno == PR_REG)
11389 return mode == SImode;
11391 if (regno == FPSCR_REG)
11392 return mode == PSImode;
11394 /* FIXME. This works around PR target/37633 for -O0. */
11395 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11397 unsigned int n = GET_MODE_SIZE (mode) / 8;
11399 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11400 && regno <= FIRST_GENERAL_REG + 14)
11401 return false;
11404 return true;
11407 /* Return the class of registers for which a mode change from FROM to TO
11408 is invalid. */
11409 bool
11410 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11411 enum reg_class rclass)
11413 /* We want to enable the use of SUBREGs as a means to
11414 VEC_SELECT a single element of a vector. */
11415 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11416 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11418 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11420 if (TARGET_LITTLE_ENDIAN)
11422 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11423 return reg_classes_intersect_p (DF_REGS, rclass);
11425 else
11427 if (GET_MODE_SIZE (from) < 8)
11428 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11431 return 0;
11434 /* Return true if registers in machine mode MODE will likely be
11435 allocated to registers in small register classes. */
11437 bool
11438 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11440 return (! TARGET_SHMEDIA);
11443 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11444 that label is used. */
11446 void
11447 sh_mark_label (rtx address, int nuses)
11449 if (GOTOFF_P (address))
11451 /* Extract the label or symbol. */
11452 address = XEXP (address, 0);
11453 if (GET_CODE (address) == PLUS)
11454 address = XEXP (address, 0);
11455 address = XVECEXP (address, 0, 0);
11457 if (GET_CODE (address) == LABEL_REF
11458 && LABEL_P (XEXP (address, 0)))
11459 LABEL_NUSES (XEXP (address, 0)) += nuses;
11462 /* Compute extra cost of moving data between one register class
11463 and another. */
11465 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11466 uses this information. Hence, the general register <-> floating point
11467 register information here is not used for SFmode. */
11469 static int
11470 sh_register_move_cost (enum machine_mode mode,
11471 reg_class_t srcclass, reg_class_t dstclass)
11473 if (dstclass == T_REGS || dstclass == PR_REGS)
11474 return 10;
11476 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11477 return 4;
11479 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11480 && REGCLASS_HAS_FP_REG (srcclass)
11481 && REGCLASS_HAS_FP_REG (dstclass))
11482 return 4;
11484 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11485 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11487 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11488 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11489 return 9;
11491 if ((REGCLASS_HAS_FP_REG (dstclass)
11492 && REGCLASS_HAS_GENERAL_REG (srcclass))
11493 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11494 && REGCLASS_HAS_FP_REG (srcclass)))
11495 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11496 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11498 if ((dstclass == FPUL_REGS
11499 && REGCLASS_HAS_GENERAL_REG (srcclass))
11500 || (srcclass == FPUL_REGS
11501 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11502 return 5;
11504 if ((dstclass == FPUL_REGS
11505 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11506 || (srcclass == FPUL_REGS
11507 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11508 return 7;
11510 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11511 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11512 return 20;
11514 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11515 if (TARGET_SHMEDIA
11516 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11518 if (sh_gettrcost >= 0)
11519 return sh_gettrcost;
11520 else if (!TARGET_PT_FIXED)
11521 return 100;
11524 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11525 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11526 return 4;
11528 if (TARGET_SHMEDIA
11529 || (TARGET_FMOVD
11530 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11531 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11532 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11534 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11537 static rtx emit_load_ptr (rtx, rtx);
11539 static rtx
11540 emit_load_ptr (rtx reg, rtx addr)
11542 rtx mem = gen_const_mem (ptr_mode, addr);
11544 if (Pmode != ptr_mode)
11545 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11546 return emit_move_insn (reg, mem);
11549 static void
11550 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11551 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11552 tree function)
11554 CUMULATIVE_ARGS cum;
11555 int structure_value_byref = 0;
11556 rtx this_rtx, this_value, sibcall, insns, funexp;
11557 tree funtype = TREE_TYPE (function);
11558 int simple_add = CONST_OK_FOR_ADD (delta);
11559 int did_load = 0;
11560 rtx scratch0, scratch1, scratch2;
11561 unsigned i;
11563 reload_completed = 1;
11564 epilogue_completed = 1;
11565 current_function_uses_only_leaf_regs = 1;
11567 emit_note (NOTE_INSN_PROLOGUE_END);
11569 /* Find the "this" pointer. We have such a wide range of ABIs for the
11570 SH that it's best to do this completely machine independently.
11571 "this" is passed as first argument, unless a structure return pointer
11572 comes first, in which case "this" comes second. */
11573 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11574 #ifndef PCC_STATIC_STRUCT_RETURN
11575 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11576 structure_value_byref = 1;
11577 #endif /* not PCC_STATIC_STRUCT_RETURN */
11578 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11580 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11582 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
11584 this_rtx
11585 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
11587 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11588 static chain pointer (even if you can't have nested virtual functions
11589 right now, someone might implement them sometime), and the rest of the
11590 registers are used for argument passing, are callee-saved, or reserved. */
11591 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11592 -ffixed-reg has been used. */
11593 if (! call_used_regs[0] || fixed_regs[0])
11594 error ("r0 needs to be available as a call-clobbered register");
11595 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11596 if (! TARGET_SH5)
11598 if (call_used_regs[1] && ! fixed_regs[1])
11599 scratch1 = gen_rtx_REG (ptr_mode, 1);
11600 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11601 pointing where to return struct values. */
11602 if (call_used_regs[3] && ! fixed_regs[3])
11603 scratch2 = gen_rtx_REG (Pmode, 3);
11605 else if (TARGET_SHMEDIA)
11607 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11608 if (i != REGNO (scratch0) &&
11609 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11611 scratch1 = gen_rtx_REG (ptr_mode, i);
11612 break;
11614 if (scratch1 == scratch0)
11615 error ("need a second call-clobbered general purpose register");
11616 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11617 if (call_used_regs[i] && ! fixed_regs[i])
11619 scratch2 = gen_rtx_REG (Pmode, i);
11620 break;
11622 if (scratch2 == scratch0)
11623 error ("need a call-clobbered target register");
11626 this_value = plus_constant (this_rtx, delta);
11627 if (vcall_offset
11628 && (simple_add || scratch0 != scratch1)
11629 && strict_memory_address_p (ptr_mode, this_value))
11631 emit_load_ptr (scratch0, this_value);
11632 did_load = 1;
11635 if (!delta)
11636 ; /* Do nothing. */
11637 else if (simple_add)
11638 emit_move_insn (this_rtx, this_value);
11639 else
11641 emit_move_insn (scratch1, GEN_INT (delta));
11642 emit_insn (gen_add2_insn (this_rtx, scratch1));
11645 if (vcall_offset)
11647 rtx offset_addr;
11649 if (!did_load)
11650 emit_load_ptr (scratch0, this_rtx);
11652 offset_addr = plus_constant (scratch0, vcall_offset);
11653 if (strict_memory_address_p (ptr_mode, offset_addr))
11654 ; /* Do nothing. */
11655 else if (! TARGET_SH5 && scratch0 != scratch1)
11657 /* scratch0 != scratch1, and we have indexed loads. Get better
11658 schedule by loading the offset into r1 and using an indexed
11659 load - then the load of r1 can issue before the load from
11660 (this_rtx + delta) finishes. */
11661 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11662 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11664 else if (CONST_OK_FOR_ADD (vcall_offset))
11666 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11667 offset_addr = scratch0;
11669 else if (scratch0 != scratch1)
11671 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11672 emit_insn (gen_add2_insn (scratch0, scratch1));
11673 offset_addr = scratch0;
11675 else
11676 gcc_unreachable (); /* FIXME */
11677 emit_load_ptr (scratch0, offset_addr);
11679 if (Pmode != ptr_mode)
11680 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11681 emit_insn (gen_add2_insn (this_rtx, scratch0));
11684 /* Generate a tail call to the target function. */
11685 if (! TREE_USED (function))
11687 assemble_external (function);
11688 TREE_USED (function) = 1;
11690 funexp = XEXP (DECL_RTL (function), 0);
11691 /* If the function is overridden, so is the thunk, hence we don't
11692 need GOT addressing even if this is a public symbol. */
11693 #if 0
11694 if (TARGET_SH1 && ! flag_weak)
11695 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11696 else
11697 #endif
11698 if (TARGET_SH2 && flag_pic)
11700 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11701 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11703 else
11705 if (TARGET_SHMEDIA && flag_pic)
11707 funexp = gen_sym2PIC (funexp);
11708 PUT_MODE (funexp, Pmode);
11710 emit_move_insn (scratch2, funexp);
11711 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11712 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11714 sibcall = emit_call_insn (sibcall);
11715 SIBLING_CALL_P (sibcall) = 1;
11716 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11717 emit_barrier ();
11719 /* Run just enough of rest_of_compilation to do scheduling and get
11720 the insns emitted. Note that use_thunk calls
11721 assemble_start_function and assemble_end_function. */
11723 insn_locators_alloc ();
11724 insns = get_insns ();
11726 if (optimize > 0)
11728 if (! cfun->cfg)
11729 init_flow (cfun);
11730 split_all_insns_noflow ();
11733 sh_reorg ();
11734 shorten_branches (insns);
11735 final_start_function (insns, file, 1);
11736 final (insns, file, 1);
11737 final_end_function ();
11739 reload_completed = 0;
11740 epilogue_completed = 0;
11744 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11746 rtx sym;
11748 /* If this is not an ordinary function, the name usually comes from a
11749 string literal or an sprintf buffer. Make sure we use the same
11750 string consistently, so that cse will be able to unify address loads. */
11751 if (kind != FUNCTION_ORDINARY)
11752 name = IDENTIFIER_POINTER (get_identifier (name));
11753 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11754 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11755 if (flag_pic)
11756 switch (kind)
11758 case FUNCTION_ORDINARY:
11759 break;
11760 case SFUNC_GOT:
11762 rtx reg = target ? target : gen_reg_rtx (Pmode);
11764 emit_insn (gen_symGOT2reg (reg, sym));
11765 sym = reg;
11766 break;
11768 case SFUNC_STATIC:
11770 /* ??? To allow cse to work, we use GOTOFF relocations.
11771 we could add combiner patterns to transform this into
11772 straight pc-relative calls with sym2PIC / bsrf when
11773 label load and function call are still 1:1 and in the
11774 same basic block during combine. */
11775 rtx reg = target ? target : gen_reg_rtx (Pmode);
11777 emit_insn (gen_symGOTOFF2reg (reg, sym));
11778 sym = reg;
11779 break;
11782 if (target && sym != target)
11784 emit_move_insn (target, sym);
11785 return target;
11787 return sym;
11790 /* Find the number of a general purpose register in S. */
11791 static int
11792 scavenge_reg (HARD_REG_SET *s)
11794 int r;
11795 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11796 if (TEST_HARD_REG_BIT (*s, r))
11797 return r;
11798 return -1;
11802 sh_get_pr_initial_val (void)
11804 rtx val;
11806 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11807 PR register on SHcompact, because it might be clobbered by the prologue.
11808 We check first if that is known to be the case. */
11809 if (TARGET_SHCOMPACT
11810 && ((crtl->args.info.call_cookie
11811 & ~ CALL_COOKIE_RET_TRAMP (1))
11812 || crtl->saves_all_registers))
11813 return gen_frame_mem (SImode, return_address_pointer_rtx);
11815 /* If we haven't finished rtl generation, there might be a nonlocal label
11816 that we haven't seen yet.
11817 ??? get_hard_reg_initial_val fails if it is called after register
11818 allocation has started, unless it has been called before for the
11819 same register. And even then, we end in trouble if we didn't use
11820 the register in the same basic block before. So call
11821 get_hard_reg_initial_val now and wrap it in an unspec if we might
11822 need to replace it. */
11823 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11824 combine can put the pseudo returned by get_hard_reg_initial_val into
11825 instructions that need a general purpose registers, which will fail to
11826 be recognized when the pseudo becomes allocated to PR. */
11828 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11829 if (TARGET_SH1)
11830 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11831 return val;
11835 sh_expand_t_scc (rtx operands[])
11837 enum rtx_code code = GET_CODE (operands[1]);
11838 rtx target = operands[0];
11839 rtx op0 = operands[2];
11840 rtx op1 = operands[3];
11841 rtx result = target;
11842 HOST_WIDE_INT val;
11844 if (!REG_P (op0) || REGNO (op0) != T_REG
11845 || !CONST_INT_P (op1))
11846 return 0;
11847 if (!REG_P (result))
11848 result = gen_reg_rtx (SImode);
11849 val = INTVAL (op1);
11850 if ((code == EQ && val == 1) || (code == NE && val == 0))
11851 emit_insn (gen_movt (result));
11852 else if (TARGET_SH2A && ((code == EQ && val == 0)
11853 || (code == NE && val == 1)))
11854 emit_insn (gen_xorsi3_movrt (result));
11855 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11857 emit_clobber (result);
11858 emit_insn (gen_subc (result, result, result));
11859 emit_insn (gen_addsi3 (result, result, const1_rtx));
11861 else if (code == EQ || code == NE)
11862 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11863 else
11864 return 0;
11865 if (result != target)
11866 emit_move_insn (target, result);
11867 return 1;
11870 /* INSN is an sfunc; return the rtx that describes the address used. */
11871 static rtx
11872 extract_sfunc_addr (rtx insn)
11874 rtx pattern, part = NULL_RTX;
11875 int len, i;
11877 pattern = PATTERN (insn);
11878 len = XVECLEN (pattern, 0);
11879 for (i = 0; i < len; i++)
11881 part = XVECEXP (pattern, 0, i);
11882 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11883 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11884 return XEXP (part, 0);
11886 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11887 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11890 /* Verify that the register in use_sfunc_addr still agrees with the address
11891 used in the sfunc. This prevents fill_slots_from_thread from changing
11892 use_sfunc_addr.
11893 INSN is the use_sfunc_addr instruction, and REG is the register it
11894 guards. */
11896 check_use_sfunc_addr (rtx insn, rtx reg)
11898 /* Search for the sfunc. It should really come right after INSN. */
11899 while ((insn = NEXT_INSN (insn)))
11901 if (LABEL_P (insn) || JUMP_P (insn))
11902 break;
11903 if (! INSN_P (insn))
11904 continue;
11906 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11907 insn = XVECEXP (PATTERN (insn), 0, 0);
11908 if (GET_CODE (PATTERN (insn)) != PARALLEL
11909 || get_attr_type (insn) != TYPE_SFUNC)
11910 continue;
11911 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11913 gcc_unreachable ();
11916 /* This function returns a constant rtx that represents pi / 2**15 in
11917 SFmode. it's used to scale SFmode angles, in radians, to a
11918 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11919 maps to 0x10000). */
11921 static GTY(()) rtx sh_fsca_sf2int_rtx;
11924 sh_fsca_sf2int (void)
11926 if (! sh_fsca_sf2int_rtx)
11928 REAL_VALUE_TYPE rv;
11930 real_from_string (&rv, "10430.378350470453");
11931 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11934 return sh_fsca_sf2int_rtx;
11937 /* This function returns a constant rtx that represents pi / 2**15 in
11938 DFmode. it's used to scale DFmode angles, in radians, to a
11939 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11940 maps to 0x10000). */
11942 static GTY(()) rtx sh_fsca_df2int_rtx;
11945 sh_fsca_df2int (void)
11947 if (! sh_fsca_df2int_rtx)
11949 REAL_VALUE_TYPE rv;
11951 real_from_string (&rv, "10430.378350470453");
11952 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11955 return sh_fsca_df2int_rtx;
11958 /* This function returns a constant rtx that represents 2**15 / pi in
11959 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11960 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11961 2*pi). */
11963 static GTY(()) rtx sh_fsca_int2sf_rtx;
11966 sh_fsca_int2sf (void)
11968 if (! sh_fsca_int2sf_rtx)
11970 REAL_VALUE_TYPE rv;
11972 real_from_string (&rv, "9.587379924285257e-5");
11973 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11976 return sh_fsca_int2sf_rtx;
11979 /* Initialize the CUMULATIVE_ARGS structure. */
11981 void
11982 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11983 tree fntype,
11984 rtx libname ATTRIBUTE_UNUSED,
11985 tree fndecl,
11986 signed int n_named_args,
11987 enum machine_mode mode)
11989 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11990 pcum->free_single_fp_reg = 0;
11991 pcum->stack_regs = 0;
11992 pcum->byref_regs = 0;
11993 pcum->byref = 0;
11994 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11996 /* XXX - Should we check TARGET_HITACHI here ??? */
11997 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11999 if (fntype)
12001 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12002 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12003 pcum->prototype_p = prototype_p (fntype);
12004 pcum->arg_count [(int) SH_ARG_INT]
12005 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12007 pcum->call_cookie
12008 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12009 && pcum->arg_count [(int) SH_ARG_INT] == 0
12010 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12011 ? int_size_in_bytes (TREE_TYPE (fntype))
12012 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12013 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12014 == FIRST_RET_REG));
12016 else
12018 pcum->arg_count [(int) SH_ARG_INT] = 0;
12019 pcum->prototype_p = FALSE;
12020 if (mode != VOIDmode)
12022 pcum->call_cookie =
12023 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12024 && GET_MODE_SIZE (mode) > 4
12025 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12027 /* If the default ABI is the Renesas ABI then all library
12028 calls must assume that the library will be using the
12029 Renesas ABI. So if the function would return its result
12030 in memory then we must force the address of this memory
12031 block onto the stack. Ideally we would like to call
12032 targetm.calls.return_in_memory() here but we do not have
12033 the TYPE or the FNDECL available so we synthesize the
12034 contents of that function as best we can. */
12035 pcum->force_mem =
12036 (TARGET_DEFAULT & MASK_HITACHI)
12037 && (mode == BLKmode
12038 || (GET_MODE_SIZE (mode) > 4
12039 && !(mode == DFmode
12040 && TARGET_FPU_DOUBLE)));
12042 else
12044 pcum->call_cookie = 0;
12045 pcum->force_mem = FALSE;
12050 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12051 not enter into CONST_DOUBLE for the replace.
12053 Note that copying is not done so X must not be shared unless all copies
12054 are to be modified.
12056 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12057 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12058 replacements[n*2+1] - and that we take mode changes into account.
12060 If a replacement is ambiguous, return NULL_RTX.
12062 If MODIFY is zero, don't modify any rtl in place,
12063 just return zero or nonzero for failure / success. */
12066 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12068 int i, j;
12069 const char *fmt;
12071 /* The following prevents loops occurrence when we change MEM in
12072 CONST_DOUBLE onto the same CONST_DOUBLE. */
12073 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
12074 return x;
12076 for (i = n_replacements - 1; i >= 0 ; i--)
12077 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12078 return replacements[i*2+1];
12080 /* Allow this function to make replacements in EXPR_LISTs. */
12081 if (x == 0)
12082 return 0;
12084 if (GET_CODE (x) == SUBREG)
12086 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12087 n_replacements, modify);
12089 if (CONST_INT_P (new_rtx))
12091 x = simplify_subreg (GET_MODE (x), new_rtx,
12092 GET_MODE (SUBREG_REG (x)),
12093 SUBREG_BYTE (x));
12094 if (! x)
12095 abort ();
12097 else if (modify)
12098 SUBREG_REG (x) = new_rtx;
12100 return x;
12102 else if (REG_P (x))
12104 unsigned regno = REGNO (x);
12105 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12106 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12107 rtx result = NULL_RTX;
12109 for (i = n_replacements - 1; i >= 0; i--)
12111 rtx from = replacements[i*2];
12112 rtx to = replacements[i*2+1];
12113 unsigned from_regno, from_nregs, to_regno, new_regno;
12115 if (!REG_P (from))
12116 continue;
12117 from_regno = REGNO (from);
12118 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12119 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12120 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12122 if (regno < from_regno
12123 || regno + nregs > from_regno + nregs
12124 || !REG_P (to)
12125 || result)
12126 return NULL_RTX;
12127 to_regno = REGNO (to);
12128 if (to_regno < FIRST_PSEUDO_REGISTER)
12130 new_regno = regno + to_regno - from_regno;
12131 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12132 != nregs)
12133 return NULL_RTX;
12134 result = gen_rtx_REG (GET_MODE (x), new_regno);
12136 else if (GET_MODE (x) <= GET_MODE (to))
12137 result = gen_lowpart_common (GET_MODE (x), to);
12138 else
12139 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12142 return result ? result : x;
12144 else if (GET_CODE (x) == ZERO_EXTEND)
12146 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12147 n_replacements, modify);
12149 if (CONST_INT_P (new_rtx))
12151 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12152 new_rtx, GET_MODE (XEXP (x, 0)));
12153 if (! x)
12154 abort ();
12156 else if (modify)
12157 XEXP (x, 0) = new_rtx;
12159 return x;
12162 fmt = GET_RTX_FORMAT (GET_CODE (x));
12163 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12165 rtx new_rtx;
12167 if (fmt[i] == 'e')
12169 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12170 n_replacements, modify);
12171 if (!new_rtx)
12172 return NULL_RTX;
12173 if (modify)
12174 XEXP (x, i) = new_rtx;
12176 else if (fmt[i] == 'E')
12177 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12179 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12180 n_replacements, modify);
12181 if (!new_rtx)
12182 return NULL_RTX;
12183 if (modify)
12184 XVECEXP (x, i, j) = new_rtx;
12188 return x;
12192 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12194 enum rtx_code code = TRUNCATE;
12196 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12198 rtx inner = XEXP (x, 0);
12199 enum machine_mode inner_mode = GET_MODE (inner);
12201 if (inner_mode == mode)
12202 return inner;
12203 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12204 x = inner;
12205 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12206 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12208 code = GET_CODE (x);
12209 x = inner;
12212 return gen_rtx_fmt_e (code, mode, x);
12215 /* called via for_each_rtx after reload, to clean up truncates of
12216 registers that span multiple actual hard registers. */
12218 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12220 rtx x = *p, reg;
12222 if (GET_CODE (x) != TRUNCATE)
12223 return 0;
12224 reg = XEXP (x, 0);
12225 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12227 enum machine_mode reg_mode = GET_MODE (reg);
12228 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12229 subreg_lowpart_offset (DImode, reg_mode));
12230 *(int*) n_changes += 1;
12231 return -1;
12233 return 0;
12236 /* Load and store depend on the highpart of the address. However,
12237 set_attr_alternative does not give well-defined results before reload,
12238 so we must look at the rtl ourselves to see if any of the feeding
12239 registers is used in a memref. */
12241 /* Called by sh_contains_memref_p via for_each_rtx. */
12242 static int
12243 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12245 return (MEM_P (*loc));
12248 /* Return nonzero iff INSN contains a MEM. */
12250 sh_contains_memref_p (rtx insn)
12252 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12255 /* Return nonzero iff INSN loads a banked register. */
12257 sh_loads_bankedreg_p (rtx insn)
12259 if (GET_CODE (PATTERN (insn)) == SET)
12261 rtx op = SET_DEST (PATTERN(insn));
12262 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12263 return 1;
12266 return 0;
12269 /* FNADDR is the MEM expression from a call expander. Return an address
12270 to use in an SHmedia insn pattern. */
12272 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12274 int is_sym;
12276 fnaddr = XEXP (fnaddr, 0);
12277 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12278 if (flag_pic && is_sym)
12280 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12282 rtx reg = gen_reg_rtx (Pmode);
12284 /* We must not use GOTPLT for sibcalls, because PIC_REG
12285 must be restored before the PLT code gets to run. */
12286 if (is_sibcall)
12287 emit_insn (gen_symGOT2reg (reg, fnaddr));
12288 else
12289 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12290 fnaddr = reg;
12292 else
12294 fnaddr = gen_sym2PIC (fnaddr);
12295 PUT_MODE (fnaddr, Pmode);
12298 /* If ptabs might trap, make this visible to the rest of the compiler.
12299 We generally assume that symbols pertain to valid locations, but
12300 it is possible to generate invalid symbols with asm or linker tricks.
12301 In a list of functions where each returns its successor, an invalid
12302 symbol might denote an empty list. */
12303 if (!TARGET_PT_FIXED
12304 && (!is_sym || TARGET_INVALID_SYMBOLS)
12305 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12307 rtx tr = gen_reg_rtx (PDImode);
12309 emit_insn (gen_ptabs (tr, fnaddr));
12310 fnaddr = tr;
12312 else if (! target_reg_operand (fnaddr, Pmode))
12313 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12314 return fnaddr;
12317 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
12319 static reg_class_t
12320 sh_preferred_reload_class (rtx x, reg_class_t rclass)
12322 if (rclass == NO_REGS
12323 && TARGET_SHMEDIA
12324 && (CONST_DOUBLE_P (x)
12325 || GET_CODE (x) == SYMBOL_REF
12326 || PIC_ADDR_P (x)))
12327 return GENERAL_REGS;
12329 return rclass;
12332 /* Implement TARGET_SECONDARY_RELOAD. */
12334 static reg_class_t
12335 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12336 enum machine_mode mode, secondary_reload_info *sri)
12338 enum reg_class rclass = (enum reg_class) rclass_i;
12340 if (in_p)
12342 if (REGCLASS_HAS_FP_REG (rclass)
12343 && ! TARGET_SHMEDIA
12344 && immediate_operand ((x), mode)
12345 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12346 && mode == SFmode && fldi_ok ()))
12347 switch (mode)
12349 case SFmode:
12350 sri->icode = CODE_FOR_reload_insf__frn;
12351 return NO_REGS;
12352 case DFmode:
12353 sri->icode = CODE_FOR_reload_indf__frn;
12354 return NO_REGS;
12355 case SImode:
12356 /* ??? If we knew that we are in the appropriate mode -
12357 single precision - we could use a reload pattern directly. */
12358 return FPUL_REGS;
12359 default:
12360 abort ();
12362 if (rclass == FPUL_REGS
12363 && ((REG_P (x)
12364 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12365 || REGNO (x) == T_REG))
12366 || GET_CODE (x) == PLUS))
12367 return GENERAL_REGS;
12368 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12370 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12371 return GENERAL_REGS;
12372 else if (mode == SFmode)
12373 return FP_REGS;
12374 sri->icode = CODE_FOR_reload_insi__i_fpul;
12375 return NO_REGS;
12377 if (rclass == FPSCR_REGS
12378 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12379 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12380 return GENERAL_REGS;
12381 if (REGCLASS_HAS_FP_REG (rclass)
12382 && TARGET_SHMEDIA
12383 && immediate_operand (x, mode)
12384 && x != CONST0_RTX (GET_MODE (x))
12385 && GET_MODE (x) != V4SFmode)
12386 return GENERAL_REGS;
12387 if ((mode == QImode || mode == HImode)
12388 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12390 sri->icode = ((mode == QImode)
12391 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12392 return NO_REGS;
12394 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12395 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12396 return TARGET_REGS;
12397 } /* end of input-only processing. */
12399 if (((REGCLASS_HAS_FP_REG (rclass)
12400 && (REG_P (x)
12401 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12402 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12403 && TARGET_FMOVD))))
12404 || (REGCLASS_HAS_GENERAL_REG (rclass)
12405 && REG_P (x)
12406 && FP_REGISTER_P (REGNO (x))))
12407 && ! TARGET_SHMEDIA
12408 && (mode == SFmode || mode == SImode))
12409 return FPUL_REGS;
12410 if ((rclass == FPUL_REGS
12411 || (REGCLASS_HAS_FP_REG (rclass)
12412 && ! TARGET_SHMEDIA && mode == SImode))
12413 && (MEM_P (x)
12414 || (REG_P (x)
12415 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12416 || REGNO (x) == T_REG
12417 || system_reg_operand (x, VOIDmode)))))
12419 if (rclass == FPUL_REGS)
12420 return GENERAL_REGS;
12421 return FPUL_REGS;
12423 if ((rclass == TARGET_REGS
12424 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12425 && !satisfies_constraint_Csy (x)
12426 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12427 return GENERAL_REGS;
12428 if ((rclass == MAC_REGS || rclass == PR_REGS)
12429 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12430 && rclass != REGNO_REG_CLASS (REGNO (x)))
12431 return GENERAL_REGS;
12432 if (rclass != GENERAL_REGS && REG_P (x)
12433 && TARGET_REGISTER_P (REGNO (x)))
12434 return GENERAL_REGS;
12436 /* If here fall back to loading FPUL register through general registers.
12437 This case can happen when movsi_ie insn is picked initially to
12438 load/store the FPUL register from/to another register, and then the
12439 other register is allocated on the stack. */
12440 if (rclass == FPUL_REGS && true_regnum (x) == -1)
12441 return GENERAL_REGS;
12443 return NO_REGS;
12446 static void
12447 sh_conditional_register_usage (void)
12449 int regno;
12450 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
12451 if (! VALID_REGISTER_P (regno))
12452 fixed_regs[regno] = call_used_regs[regno] = 1;
12453 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
12454 if (TARGET_SH5)
12456 call_used_regs[FIRST_GENERAL_REG + 8]
12457 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
12458 call_really_used_regs[FIRST_GENERAL_REG + 8]
12459 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
12461 if (TARGET_SHMEDIA)
12463 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
12464 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
12465 regno_reg_class[FIRST_FP_REG] = FP_REGS;
12467 if (flag_pic)
12469 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12470 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12472 /* Renesas saves and restores mac registers on call. */
12473 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
12475 call_really_used_regs[MACH_REG] = 0;
12476 call_really_used_regs[MACL_REG] = 0;
12478 for (regno = FIRST_FP_REG + (TARGET_LITTLE_ENDIAN != 0);
12479 regno <= LAST_FP_REG; regno += 2)
12480 SET_HARD_REG_BIT (reg_class_contents[DF_HI_REGS], regno);
12481 if (TARGET_SHMEDIA)
12483 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
12484 if (! fixed_regs[regno] && call_really_used_regs[regno])
12485 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
12487 else
12488 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
12489 if (! fixed_regs[regno] && call_really_used_regs[regno])
12490 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
12493 /* Implement TARGET_LEGITIMATE_CONSTANT_P
12495 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
12497 static bool
12498 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
12500 return (TARGET_SHMEDIA
12501 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
12502 || x == CONST0_RTX (mode)
12503 || !TARGET_SHMEDIA_FPU
12504 || TARGET_SHMEDIA64)
12505 : (GET_CODE (x) != CONST_DOUBLE
12506 || mode == DFmode || mode == SFmode
12507 || mode == DImode || GET_MODE (x) == VOIDmode));
12510 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12512 static void
12513 sh_init_sync_libfuncs (void)
12515 init_sync_libfuncs (UNITS_PER_WORD);
12518 #include "gt-sh.h"