* config/sh/sh.c (sh_expand_prologue): Remove unnecessary
[official-gcc.git] / gcc / config / sh / sh.c
blobefad46b12f1f33e48341e5e5c4167cb5f62b07a2
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "toplev.h"
42 #include "recog.h"
43 #include "integrate.h"
44 #include "dwarf2.h"
45 #include "tm_p.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "df.h"
51 #include "cfglayout.h"
52 #include "intl.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "ggc.h"
56 #include "gimple.h"
57 #include "cfgloop.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
75 they may be. */
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
115 reg number. */
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void sh_option_override (void);
186 static void sh_option_init_struct (struct gcc_options *);
187 static void sh_option_default_params (void);
188 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
189 static rtx frame_insn (rtx);
190 static rtx push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET *, int);
193 static int calc_live_regs (HARD_REG_SET *);
194 static HOST_WIDE_INT rounded_frame_size (int);
195 static bool sh_frame_pointer_required (void);
196 static rtx mark_constant_pool_use (rtx);
197 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
198 static tree sh_handle_resbank_handler_attribute (tree *, tree,
199 tree, int, bool *);
200 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
201 tree, int, bool *);
202 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
203 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
204 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
205 static void sh_print_operand (FILE *, rtx, int);
206 static void sh_print_operand_address (FILE *, rtx);
207 static bool sh_print_operand_punct_valid_p (unsigned char code);
208 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
209 static void sh_insert_attributes (tree, tree *);
210 static const char *sh_check_pch_target_flags (int);
211 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
212 static int sh_adjust_cost (rtx, rtx, rtx, int);
213 static int sh_issue_rate (void);
214 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
215 static short find_set_regmode_weight (rtx, enum machine_mode);
216 static short find_insn_regmode_weight (rtx, enum machine_mode);
217 static void find_regmode_weight (basic_block, enum machine_mode);
218 static int find_r0_life_regions (basic_block);
219 static void sh_md_init_global (FILE *, int, int);
220 static void sh_md_finish_global (FILE *, int);
221 static int rank_for_reorder (const void *, const void *);
222 static void swap_reorder (rtx *, int);
223 static void ready_reorder (rtx *, int);
224 static short high_pressure (enum machine_mode);
225 static int sh_reorder (FILE *, int, rtx *, int *, int);
226 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
227 static void sh_md_init (FILE *, int, int);
228 static int sh_variable_issue (FILE *, int, rtx, int);
230 static bool sh_function_ok_for_sibcall (tree, tree);
232 static bool sh_cannot_modify_jumps_p (void);
233 static reg_class_t sh_target_reg_class (void);
234 static bool sh_optimize_target_register_callee_saved (bool);
235 static bool sh_ms_bitfield_layout_p (const_tree);
237 static void sh_init_builtins (void);
238 static tree sh_builtin_decl (unsigned, bool);
239 static void sh_media_init_builtins (void);
240 static tree sh_media_builtin_decl (unsigned, bool);
241 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
242 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
243 static void sh_file_start (void);
244 static int flow_dependent_p (rtx, rtx);
245 static void flow_dependent_p_1 (rtx, const_rtx, void *);
246 static int shiftcosts (rtx);
247 static int andcosts (rtx);
248 static int addsubcosts (rtx);
249 static int multcosts (rtx);
250 static bool unspec_caller_rtx_p (rtx);
251 static bool sh_cannot_copy_insn_p (rtx);
252 static bool sh_rtx_costs (rtx, int, int, int *, bool);
253 static int sh_address_cost (rtx, bool);
254 static int sh_pr_n_sets (void);
255 static rtx sh_allocate_initial_value (rtx);
256 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
257 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
258 static rtx sh_delegitimize_address (rtx);
259 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
260 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
261 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
262 static int scavenge_reg (HARD_REG_SET *s);
263 struct save_schedule_s;
264 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
265 struct save_schedule_s *, int);
267 static rtx sh_struct_value_rtx (tree, int);
268 static rtx sh_function_value (const_tree, const_tree, bool);
269 static bool sh_function_value_regno_p (const unsigned int);
270 static rtx sh_libcall_value (enum machine_mode, const_rtx);
271 static bool sh_return_in_memory (const_tree, const_tree);
272 static rtx sh_builtin_saveregs (void);
273 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
274 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
275 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
276 static tree sh_build_builtin_va_list (void);
277 static void sh_va_start (tree, rtx);
278 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
279 static bool sh_promote_prototypes (const_tree);
280 static enum machine_mode sh_promote_function_mode (const_tree type,
281 enum machine_mode,
282 int *punsignedp,
283 const_tree funtype,
284 int for_return);
285 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
286 const_tree, bool);
287 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
288 const_tree, bool);
289 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
290 tree, bool);
291 static void sh_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
292 const_tree, bool);
293 static rtx sh_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
294 const_tree, bool);
295 static bool sh_scalar_mode_supported_p (enum machine_mode);
296 static int sh_dwarf_calling_convention (const_tree);
297 static void sh_encode_section_info (tree, rtx, int);
298 static int sh2a_function_vector_p (tree);
299 static void sh_trampoline_init (rtx, tree, rtx);
300 static rtx sh_trampoline_adjust_address (rtx);
302 static const struct attribute_spec sh_attribute_table[] =
304 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
305 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
306 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
307 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
308 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
309 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
310 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
311 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
312 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
313 #ifdef SYMBIAN
314 /* Symbian support adds three new attributes:
315 dllexport - for exporting a function/variable that will live in a dll
316 dllimport - for importing a function/variable from a dll
318 Microsoft allows multiple declspecs in one __declspec, separating
319 them with spaces. We do NOT support this. Instead, use __declspec
320 multiple times. */
321 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
322 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
323 #endif
324 { NULL, 0, 0, false, false, false, NULL }
327 /* Set default optimization options. */
328 static const struct default_options sh_option_optimization_table[] =
330 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
331 { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_mdiv_, "inv:minlat", 1 },
332 { OPT_LEVELS_SIZE, OPT_mdiv_, SH_DIV_STR_FOR_SIZE, 1 },
333 { OPT_LEVELS_0_ONLY, OPT_mdiv_, "", 1 },
334 { OPT_LEVELS_SIZE, OPT_mcbranchdi, NULL, 0 },
335 /* We can't meaningfully test TARGET_SHMEDIA here, because -m
336 options haven't been parsed yet, hence we'd read only the
337 default. sh_target_reg_class will return NO_REGS if this is
338 not SHMEDIA, so it's OK to always set
339 flag_branch_target_load_optimize. */
340 { OPT_LEVELS_2_PLUS, OPT_fbranch_target_load_optimize, NULL, 1 },
341 { OPT_LEVELS_NONE, 0, NULL, 0 }
344 /* Initialize the GCC target structure. */
345 #undef TARGET_ATTRIBUTE_TABLE
346 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
348 /* The next two are used for debug info when compiling with -gdwarf. */
349 #undef TARGET_ASM_UNALIGNED_HI_OP
350 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
351 #undef TARGET_ASM_UNALIGNED_SI_OP
352 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
354 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
355 #undef TARGET_ASM_UNALIGNED_DI_OP
356 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
357 #undef TARGET_ASM_ALIGNED_DI_OP
358 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
360 #undef TARGET_OPTION_OVERRIDE
361 #define TARGET_OPTION_OVERRIDE sh_option_override
362 #undef TARGET_OPTION_OPTIMIZATION_TABLE
363 #define TARGET_OPTION_OPTIMIZATION_TABLE sh_option_optimization_table
364 #undef TARGET_OPTION_INIT_STRUCT
365 #define TARGET_OPTION_INIT_STRUCT sh_option_init_struct
366 #undef TARGET_OPTION_DEFAULT_PARAMS
367 #define TARGET_OPTION_DEFAULT_PARAMS sh_option_default_params
369 #undef TARGET_PRINT_OPERAND
370 #define TARGET_PRINT_OPERAND sh_print_operand
371 #undef TARGET_PRINT_OPERAND_ADDRESS
372 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
373 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
374 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
376 #undef TARGET_ASM_FUNCTION_EPILOGUE
377 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
379 #undef TARGET_ASM_OUTPUT_MI_THUNK
380 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
382 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
383 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
385 #undef TARGET_ASM_FILE_START
386 #define TARGET_ASM_FILE_START sh_file_start
387 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
388 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
390 #undef TARGET_DEFAULT_TARGET_FLAGS
391 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
392 #undef TARGET_HANDLE_OPTION
393 #define TARGET_HANDLE_OPTION sh_handle_option
395 #undef TARGET_REGISTER_MOVE_COST
396 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
398 #undef TARGET_INSERT_ATTRIBUTES
399 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
401 #undef TARGET_SCHED_ADJUST_COST
402 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
404 #undef TARGET_SCHED_ISSUE_RATE
405 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
407 /* The next 5 hooks have been implemented for reenabling sched1. With the
408 help of these macros we are limiting the movement of insns in sched1 to
409 reduce the register pressure. The overall idea is to keep count of SImode
410 and SFmode regs required by already scheduled insns. When these counts
411 cross some threshold values; give priority to insns that free registers.
412 The insn that frees registers is most likely to be the insn with lowest
413 LUID (original insn order); but such an insn might be there in the stalled
414 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
415 upto a max of 8 cycles so that such insns may move from Q -> R.
417 The description of the hooks are as below:
419 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
420 scheduler; it is called inside the sched_init function just after
421 find_insn_reg_weights function call. It is used to calculate the SImode
422 and SFmode weights of insns of basic blocks; much similar to what
423 find_insn_reg_weights does.
424 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
426 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
427 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
428 (Q)->(R).
430 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
431 high; reorder the ready queue so that the insn with lowest LUID will be
432 issued next.
434 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
435 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
437 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
438 can be returned from TARGET_SCHED_REORDER2.
440 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
442 #undef TARGET_SCHED_DFA_NEW_CYCLE
443 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
445 #undef TARGET_SCHED_INIT_GLOBAL
446 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
448 #undef TARGET_SCHED_FINISH_GLOBAL
449 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
451 #undef TARGET_SCHED_VARIABLE_ISSUE
452 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
454 #undef TARGET_SCHED_REORDER
455 #define TARGET_SCHED_REORDER sh_reorder
457 #undef TARGET_SCHED_REORDER2
458 #define TARGET_SCHED_REORDER2 sh_reorder2
460 #undef TARGET_SCHED_INIT
461 #define TARGET_SCHED_INIT sh_md_init
463 #undef TARGET_DELEGITIMIZE_ADDRESS
464 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
466 #undef TARGET_LEGITIMIZE_ADDRESS
467 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
469 #undef TARGET_CANNOT_MODIFY_JUMPS_P
470 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
471 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
472 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
473 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
474 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
475 sh_optimize_target_register_callee_saved
477 #undef TARGET_MS_BITFIELD_LAYOUT_P
478 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
480 #undef TARGET_INIT_BUILTINS
481 #define TARGET_INIT_BUILTINS sh_init_builtins
482 #undef TARGET_BUILTIN_DECL
483 #define TARGET_BUILTIN_DECL sh_builtin_decl
484 #undef TARGET_EXPAND_BUILTIN
485 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
487 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
488 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
490 #undef TARGET_CANNOT_COPY_INSN_P
491 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
492 #undef TARGET_RTX_COSTS
493 #define TARGET_RTX_COSTS sh_rtx_costs
494 #undef TARGET_ADDRESS_COST
495 #define TARGET_ADDRESS_COST sh_address_cost
496 #undef TARGET_ALLOCATE_INITIAL_VALUE
497 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
499 #undef TARGET_MACHINE_DEPENDENT_REORG
500 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
502 #undef TARGET_DWARF_REGISTER_SPAN
503 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
505 #ifdef HAVE_AS_TLS
506 #undef TARGET_HAVE_TLS
507 #define TARGET_HAVE_TLS true
508 #endif
510 #undef TARGET_PROMOTE_PROTOTYPES
511 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
512 #undef TARGET_PROMOTE_FUNCTION_MODE
513 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
515 #undef TARGET_FUNCTION_VALUE
516 #define TARGET_FUNCTION_VALUE sh_function_value
517 #undef TARGET_FUNCTION_VALUE_REGNO_P
518 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
519 #undef TARGET_LIBCALL_VALUE
520 #define TARGET_LIBCALL_VALUE sh_libcall_value
521 #undef TARGET_STRUCT_VALUE_RTX
522 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
523 #undef TARGET_RETURN_IN_MEMORY
524 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
526 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
527 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
528 #undef TARGET_SETUP_INCOMING_VARARGS
529 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
530 #undef TARGET_STRICT_ARGUMENT_NAMING
531 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
532 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
533 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
534 #undef TARGET_MUST_PASS_IN_STACK
535 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
536 #undef TARGET_PASS_BY_REFERENCE
537 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
538 #undef TARGET_CALLEE_COPIES
539 #define TARGET_CALLEE_COPIES sh_callee_copies
540 #undef TARGET_ARG_PARTIAL_BYTES
541 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
542 #undef TARGET_FUNCTION_ARG
543 #define TARGET_FUNCTION_ARG sh_function_arg
544 #undef TARGET_FUNCTION_ARG_ADVANCE
545 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
547 #undef TARGET_BUILD_BUILTIN_VA_LIST
548 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
549 #undef TARGET_EXPAND_BUILTIN_VA_START
550 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
551 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
552 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
554 #undef TARGET_SCALAR_MODE_SUPPORTED_P
555 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
556 #undef TARGET_VECTOR_MODE_SUPPORTED_P
557 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
559 #undef TARGET_CHECK_PCH_TARGET_FLAGS
560 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
562 #undef TARGET_DWARF_CALLING_CONVENTION
563 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
565 #undef TARGET_FRAME_POINTER_REQUIRED
566 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
568 /* Return regmode weight for insn. */
569 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
571 /* Return current register pressure for regmode. */
572 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
574 #undef TARGET_ENCODE_SECTION_INFO
575 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
577 #ifdef SYMBIAN
579 #undef TARGET_ENCODE_SECTION_INFO
580 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
581 #undef TARGET_STRIP_NAME_ENCODING
582 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
583 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
584 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
586 #endif /* SYMBIAN */
588 #undef TARGET_SECONDARY_RELOAD
589 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
591 #undef TARGET_LEGITIMATE_ADDRESS_P
592 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
594 #undef TARGET_TRAMPOLINE_INIT
595 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
596 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
597 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
599 /* Machine-specific symbol_ref flags. */
600 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
602 struct gcc_target targetm = TARGET_INITIALIZER;
604 /* Implement TARGET_HANDLE_OPTION. */
606 static bool
607 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
608 int value ATTRIBUTE_UNUSED)
610 switch (code)
612 case OPT_m1:
613 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
614 return true;
616 case OPT_m2:
617 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
618 return true;
620 case OPT_m2a:
621 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
622 return true;
624 case OPT_m2a_nofpu:
625 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
626 return true;
628 case OPT_m2a_single:
629 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
630 return true;
632 case OPT_m2a_single_only:
633 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
634 return true;
636 case OPT_m2e:
637 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
638 return true;
640 case OPT_m3:
641 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
642 return true;
644 case OPT_m3e:
645 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
646 return true;
648 case OPT_m4:
649 case OPT_m4_100:
650 case OPT_m4_200:
651 case OPT_m4_300:
652 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
653 return true;
655 case OPT_m4_nofpu:
656 case OPT_m4_100_nofpu:
657 case OPT_m4_200_nofpu:
658 case OPT_m4_300_nofpu:
659 case OPT_m4_340:
660 case OPT_m4_400:
661 case OPT_m4_500:
662 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
663 return true;
665 case OPT_m4_single:
666 case OPT_m4_100_single:
667 case OPT_m4_200_single:
668 case OPT_m4_300_single:
669 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
670 return true;
672 case OPT_m4_single_only:
673 case OPT_m4_100_single_only:
674 case OPT_m4_200_single_only:
675 case OPT_m4_300_single_only:
676 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
677 return true;
679 case OPT_m4a:
680 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
681 return true;
683 case OPT_m4a_nofpu:
684 case OPT_m4al:
685 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
686 return true;
688 case OPT_m4a_single:
689 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
690 return true;
692 case OPT_m4a_single_only:
693 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
694 return true;
696 case OPT_m5_32media:
697 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
698 return true;
700 case OPT_m5_32media_nofpu:
701 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
702 return true;
704 case OPT_m5_64media:
705 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
706 return true;
708 case OPT_m5_64media_nofpu:
709 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
710 return true;
712 case OPT_m5_compact:
713 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
714 return true;
716 case OPT_m5_compact_nofpu:
717 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
718 return true;
720 default:
721 return true;
725 /* Implement TARGET_OPTION_INIT_STRUCT. */
726 static void
727 sh_option_init_struct (struct gcc_options *opts)
729 /* We can't meaningfully test TARGET_SH2E / TARGET_IEEE
730 here, so leave it to TARGET_OPTION_OVERRIDE to set
731 flag_finite_math_only. We set it to 2 here so we know if the user
732 explicitly requested this to be on or off. */
733 opts->x_flag_finite_math_only = 2;
736 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
737 static void
738 sh_option_default_params (void)
740 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 2);
743 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
744 various options, and do some machine dependent initialization. */
745 static void
746 sh_option_override (void)
748 int regno;
750 SUBTARGET_OVERRIDE_OPTIONS;
751 if (optimize > 1 && !optimize_size)
752 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
753 if (flag_finite_math_only == 2)
754 flag_finite_math_only
755 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
756 if (TARGET_SH2E && !flag_finite_math_only)
757 target_flags |= MASK_IEEE;
758 sh_cpu = PROCESSOR_SH1;
759 assembler_dialect = 0;
760 if (TARGET_SH2)
761 sh_cpu = PROCESSOR_SH2;
762 if (TARGET_SH2E)
763 sh_cpu = PROCESSOR_SH2E;
764 if (TARGET_SH2A)
765 sh_cpu = PROCESSOR_SH2A;
766 if (TARGET_SH3)
767 sh_cpu = PROCESSOR_SH3;
768 if (TARGET_SH3E)
769 sh_cpu = PROCESSOR_SH3E;
770 if (TARGET_SH4)
772 assembler_dialect = 1;
773 sh_cpu = PROCESSOR_SH4;
775 if (TARGET_SH4A_ARCH)
777 assembler_dialect = 1;
778 sh_cpu = PROCESSOR_SH4A;
780 if (TARGET_SH5)
782 sh_cpu = PROCESSOR_SH5;
783 target_flags |= MASK_ALIGN_DOUBLE;
784 if (TARGET_SHMEDIA_FPU)
785 target_flags |= MASK_FMOVD;
786 if (TARGET_SHMEDIA)
788 /* There are no delay slots on SHmedia. */
789 flag_delayed_branch = 0;
790 /* Relaxation isn't yet supported for SHmedia */
791 target_flags &= ~MASK_RELAX;
792 /* After reload, if conversion does little good but can cause
793 ICEs:
794 - find_if_block doesn't do anything for SH because we don't
795 have conditional execution patterns. (We use conditional
796 move patterns, which are handled differently, and only
797 before reload).
798 - find_cond_trap doesn't do anything for the SH because we
799 don't have conditional traps.
800 - find_if_case_1 uses redirect_edge_and_branch_force in
801 the only path that does an optimization, and this causes
802 an ICE when branch targets are in registers.
803 - find_if_case_2 doesn't do anything for the SHmedia after
804 reload except when it can redirect a tablejump - and
805 that's rather rare. */
806 flag_if_conversion2 = 0;
807 if (! strcmp (sh_div_str, "call"))
808 sh_div_strategy = SH_DIV_CALL;
809 else if (! strcmp (sh_div_str, "call2"))
810 sh_div_strategy = SH_DIV_CALL2;
811 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
812 sh_div_strategy = SH_DIV_FP;
813 else if (! strcmp (sh_div_str, "inv"))
814 sh_div_strategy = SH_DIV_INV;
815 else if (! strcmp (sh_div_str, "inv:minlat"))
816 sh_div_strategy = SH_DIV_INV_MINLAT;
817 else if (! strcmp (sh_div_str, "inv20u"))
818 sh_div_strategy = SH_DIV_INV20U;
819 else if (! strcmp (sh_div_str, "inv20l"))
820 sh_div_strategy = SH_DIV_INV20L;
821 else if (! strcmp (sh_div_str, "inv:call2"))
822 sh_div_strategy = SH_DIV_INV_CALL2;
823 else if (! strcmp (sh_div_str, "inv:call"))
824 sh_div_strategy = SH_DIV_INV_CALL;
825 else if (! strcmp (sh_div_str, "inv:fp"))
827 if (TARGET_FPU_ANY)
828 sh_div_strategy = SH_DIV_INV_FP;
829 else
830 sh_div_strategy = SH_DIV_INV;
832 TARGET_CBRANCHDI4 = 0;
833 /* Assembler CFI isn't yet fully supported for SHmedia. */
834 flag_dwarf2_cfi_asm = 0;
837 else
839 /* Only the sh64-elf assembler fully supports .quad properly. */
840 targetm.asm_out.aligned_op.di = NULL;
841 targetm.asm_out.unaligned_op.di = NULL;
843 if (TARGET_SH1)
845 if (! strcmp (sh_div_str, "call-div1"))
846 sh_div_strategy = SH_DIV_CALL_DIV1;
847 else if (! strcmp (sh_div_str, "call-fp")
848 && (TARGET_FPU_DOUBLE
849 || (TARGET_HARD_SH4 && TARGET_SH2E)
850 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
851 sh_div_strategy = SH_DIV_CALL_FP;
852 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
853 sh_div_strategy = SH_DIV_CALL_TABLE;
854 else
855 /* Pick one that makes most sense for the target in general.
856 It is not much good to use different functions depending
857 on -Os, since then we'll end up with two different functions
858 when some of the code is compiled for size, and some for
859 speed. */
861 /* SH4 tends to emphasize speed. */
862 if (TARGET_HARD_SH4)
863 sh_div_strategy = SH_DIV_CALL_TABLE;
864 /* These have their own way of doing things. */
865 else if (TARGET_SH2A)
866 sh_div_strategy = SH_DIV_INTRINSIC;
867 /* ??? Should we use the integer SHmedia function instead? */
868 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
869 sh_div_strategy = SH_DIV_CALL_FP;
870 /* SH1 .. SH3 cores often go into small-footprint systems, so
871 default to the smallest implementation available. */
872 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
873 sh_div_strategy = SH_DIV_CALL_TABLE;
874 else
875 sh_div_strategy = SH_DIV_CALL_DIV1;
877 if (!TARGET_SH1)
878 TARGET_PRETEND_CMOVE = 0;
879 if (sh_divsi3_libfunc[0])
880 ; /* User supplied - leave it alone. */
881 else if (TARGET_DIVIDE_CALL_FP)
882 sh_divsi3_libfunc = "__sdivsi3_i4";
883 else if (TARGET_DIVIDE_CALL_TABLE)
884 sh_divsi3_libfunc = "__sdivsi3_i4i";
885 else if (TARGET_SH5)
886 sh_divsi3_libfunc = "__sdivsi3_1";
887 else
888 sh_divsi3_libfunc = "__sdivsi3";
889 if (sh_branch_cost == -1)
890 sh_branch_cost
891 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
893 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
894 if (! VALID_REGISTER_P (regno))
895 sh_register_names[regno][0] = '\0';
897 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
898 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
899 sh_additional_register_names[regno][0] = '\0';
901 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
903 if ((flag_pic && ! TARGET_PREFERGOT)
904 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
905 flag_no_function_cse = 1;
907 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
909 /* Never run scheduling before reload, since that can
910 break global alloc, and generates slower code anyway due
911 to the pressure on R0. */
912 /* Enable sched1 for SH4 if the user explicitly requests.
913 When sched1 is enabled, the ready queue will be reordered by
914 the target hooks if pressure is high. We can not do this for
915 PIC, SH3 and lower as they give spill failures for R0. */
916 if (!TARGET_HARD_SH4 || flag_pic)
917 flag_schedule_insns = 0;
918 /* ??? Current exception handling places basic block boundaries
919 after call_insns. It causes the high pressure on R0 and gives
920 spill failures for R0 in reload. See PR 22553 and the thread
921 on gcc-patches
922 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
923 else if (flag_exceptions)
925 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
926 warning (0, "ignoring -fschedule-insns because of exception handling bug");
927 flag_schedule_insns = 0;
929 else if (flag_schedule_insns
930 && !global_options_set.x_flag_schedule_insns)
931 flag_schedule_insns = 0;
934 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
935 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
937 /* Unwind info is not correct around the CFG unless either a frame
938 pointer is present or M_A_O_A is set. Fixing this requires rewriting
939 unwind info generation to be aware of the CFG and propagating states
940 around edges. */
941 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
942 || flag_exceptions || flag_non_call_exceptions)
943 && flag_omit_frame_pointer
944 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
946 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
947 warning (0, "unwind tables currently require either a frame pointer "
948 "or -maccumulate-outgoing-args for correctness");
949 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
952 /* Unwinding with -freorder-blocks-and-partition does not work on this
953 architecture, because it requires far jumps to label crossing between
954 hot/cold sections which are rejected on this architecture. */
955 if (flag_reorder_blocks_and_partition)
957 if (flag_exceptions)
959 inform (input_location,
960 "-freorder-blocks-and-partition does not work with "
961 "exceptions on this architecture");
962 flag_reorder_blocks_and_partition = 0;
963 flag_reorder_blocks = 1;
965 else if (flag_unwind_tables)
967 inform (input_location,
968 "-freorder-blocks-and-partition does not support unwind "
969 "info on this architecture");
970 flag_reorder_blocks_and_partition = 0;
971 flag_reorder_blocks = 1;
975 if (align_loops == 0)
976 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
977 if (align_jumps == 0)
978 align_jumps = 1 << CACHE_LOG;
979 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
980 align_jumps = TARGET_SHMEDIA ? 4 : 2;
982 /* Allocation boundary (in *bytes*) for the code of a function.
983 SH1: 32 bit alignment is faster, because instructions are always
984 fetched as a pair from a longword boundary.
985 SH2 .. SH5 : align to cache line start. */
986 if (align_functions == 0)
987 align_functions
988 = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
989 /* The linker relaxation code breaks when a function contains
990 alignments that are larger than that at the start of a
991 compilation unit. */
992 if (TARGET_RELAX)
994 int min_align
995 = align_loops > align_jumps ? align_loops : align_jumps;
997 /* Also take possible .long constants / mova tables int account. */
998 if (min_align < 4)
999 min_align = 4;
1000 if (align_functions < min_align)
1001 align_functions = min_align;
1004 if (sh_fixed_range_str)
1005 sh_fix_range (sh_fixed_range_str);
1007 /* This target defaults to strict volatile bitfields. */
1008 if (flag_strict_volatile_bitfields < 0)
1009 flag_strict_volatile_bitfields = 1;
1012 /* Print the operand address in x to the stream. */
1014 static void
1015 sh_print_operand_address (FILE *stream, rtx x)
1017 switch (GET_CODE (x))
1019 case REG:
1020 case SUBREG:
1021 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1022 break;
1024 case PLUS:
1026 rtx base = XEXP (x, 0);
1027 rtx index = XEXP (x, 1);
1029 switch (GET_CODE (index))
1031 case CONST_INT:
1032 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1033 reg_names[true_regnum (base)]);
1034 break;
1036 case REG:
1037 case SUBREG:
1039 int base_num = true_regnum (base);
1040 int index_num = true_regnum (index);
1042 fprintf (stream, "@(r0,%s)",
1043 reg_names[MAX (base_num, index_num)]);
1044 break;
1047 default:
1048 gcc_unreachable ();
1051 break;
1053 case PRE_DEC:
1054 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1055 break;
1057 case POST_INC:
1058 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1059 break;
1061 default:
1062 x = mark_constant_pool_use (x);
1063 output_addr_const (stream, x);
1064 break;
1068 /* Print operand x (an rtx) in assembler syntax to file stream
1069 according to modifier code.
1071 '.' print a .s if insn needs delay slot
1072 ',' print LOCAL_LABEL_PREFIX
1073 '@' print trap, rte or rts depending upon pragma interruptness
1074 '#' output a nop if there is nothing to put in the delay slot
1075 ''' print likelihood suffix (/u for unlikely).
1076 '>' print branch target if -fverbose-asm
1077 'O' print a constant without the #
1078 'R' print the LSW of a dp value - changes if in little endian
1079 'S' print the MSW of a dp value - changes if in little endian
1080 'T' print the next word of a dp value - same as 'R' in big endian mode.
1081 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1082 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1083 'N' print 'r63' if the operand is (const_int 0).
1084 'd' print a V2SF reg as dN instead of fpN.
1085 'm' print a pair `base,offset' or `base,index', for LD and ST.
1086 'U' Likewise for {LD,ST}{HI,LO}.
1087 'V' print the position of a single bit set.
1088 'W' print the position of a single bit cleared.
1089 't' print a memory address which is a register.
1090 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1091 'o' output an operator. */
1093 static void
1094 sh_print_operand (FILE *stream, rtx x, int code)
1096 int regno;
1097 enum machine_mode mode;
1099 switch (code)
1101 tree trapa_attr;
1103 case '.':
1104 if (final_sequence
1105 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1106 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1107 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1108 break;
1109 case ',':
1110 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1111 break;
1112 case '@':
1113 trapa_attr = lookup_attribute ("trap_exit",
1114 DECL_ATTRIBUTES (current_function_decl));
1115 if (trapa_attr)
1116 fprintf (stream, "trapa #%ld",
1117 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1118 else if (sh_cfun_interrupt_handler_p ())
1120 if (sh_cfun_resbank_handler_p ())
1121 fprintf (stream, "resbank\n");
1122 fprintf (stream, "rte");
1124 else
1125 fprintf (stream, "rts");
1126 break;
1127 case '#':
1128 /* Output a nop if there's nothing in the delay slot. */
1129 if (dbr_sequence_length () == 0)
1130 fprintf (stream, "\n\tnop");
1131 break;
1132 case '\'':
1134 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1136 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1137 fputs ("/u", stream);
1138 break;
1140 case '>':
1141 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1143 fputs ("\t! target: ", stream);
1144 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1146 break;
1147 case 'O':
1148 x = mark_constant_pool_use (x);
1149 output_addr_const (stream, x);
1150 break;
1151 /* N.B.: %R / %S / %T adjust memory addresses by four.
1152 For SHMEDIA, that means they can be used to access the first and
1153 second 32 bit part of a 64 bit (or larger) value that
1154 might be held in floating point registers or memory.
1155 While they can be used to access 64 bit parts of a larger value
1156 held in general purpose registers, that won't work with memory -
1157 neither for fp registers, since the frxx names are used. */
1158 case 'R':
1159 if (REG_P (x) || GET_CODE (x) == SUBREG)
1161 regno = true_regnum (x);
1162 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1163 fputs (reg_names[regno], (stream));
1165 else if (MEM_P (x))
1167 x = adjust_address (x, SImode, 4 * LSW);
1168 sh_print_operand_address (stream, XEXP (x, 0));
1170 else
1172 rtx sub = NULL_RTX;
1174 mode = GET_MODE (x);
1175 if (mode == VOIDmode)
1176 mode = DImode;
1177 if (GET_MODE_SIZE (mode) >= 8)
1178 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1179 if (sub)
1180 sh_print_operand (stream, sub, 0);
1181 else
1182 output_operand_lossage ("invalid operand to %%R");
1184 break;
1185 case 'S':
1186 if (REG_P (x) || GET_CODE (x) == SUBREG)
1188 regno = true_regnum (x);
1189 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1190 fputs (reg_names[regno], (stream));
1192 else if (MEM_P (x))
1194 x = adjust_address (x, SImode, 4 * MSW);
1195 sh_print_operand_address (stream, XEXP (x, 0));
1197 else
1199 rtx sub = NULL_RTX;
1201 mode = GET_MODE (x);
1202 if (mode == VOIDmode)
1203 mode = DImode;
1204 if (GET_MODE_SIZE (mode) >= 8)
1205 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1206 if (sub)
1207 sh_print_operand (stream, sub, 0);
1208 else
1209 output_operand_lossage ("invalid operand to %%S");
1211 break;
1212 case 'T':
1213 /* Next word of a double. */
1214 switch (GET_CODE (x))
1216 case REG:
1217 fputs (reg_names[REGNO (x) + 1], (stream));
1218 break;
1219 case MEM:
1220 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1221 && GET_CODE (XEXP (x, 0)) != POST_INC)
1222 x = adjust_address (x, SImode, 4);
1223 sh_print_operand_address (stream, XEXP (x, 0));
1224 break;
1225 default:
1226 break;
1228 break;
1230 case 't':
1231 gcc_assert (MEM_P (x));
1232 x = XEXP (x, 0);
1233 switch (GET_CODE (x))
1235 case REG:
1236 case SUBREG:
1237 sh_print_operand (stream, x, 0);
1238 break;
1239 default:
1240 break;
1242 break;
1244 case 'o':
1245 switch (GET_CODE (x))
1247 case PLUS: fputs ("add", stream); break;
1248 case MINUS: fputs ("sub", stream); break;
1249 case MULT: fputs ("mul", stream); break;
1250 case DIV: fputs ("div", stream); break;
1251 case EQ: fputs ("eq", stream); break;
1252 case NE: fputs ("ne", stream); break;
1253 case GT: case LT: fputs ("gt", stream); break;
1254 case GE: case LE: fputs ("ge", stream); break;
1255 case GTU: case LTU: fputs ("gtu", stream); break;
1256 case GEU: case LEU: fputs ("geu", stream); break;
1257 default:
1258 break;
1260 break;
1261 case 'M':
1262 if (TARGET_SHMEDIA)
1264 if (MEM_P (x)
1265 && GET_CODE (XEXP (x, 0)) == PLUS
1266 && (REG_P (XEXP (XEXP (x, 0), 1))
1267 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1268 fputc ('x', stream);
1270 else
1272 if (MEM_P (x))
1274 switch (GET_MODE (x))
1276 case QImode: fputs (".b", stream); break;
1277 case HImode: fputs (".w", stream); break;
1278 case SImode: fputs (".l", stream); break;
1279 case SFmode: fputs (".s", stream); break;
1280 case DFmode: fputs (".d", stream); break;
1281 default: gcc_unreachable ();
1285 break;
1287 case 'm':
1288 gcc_assert (MEM_P (x));
1289 x = XEXP (x, 0);
1290 /* Fall through. */
1291 case 'U':
1292 switch (GET_CODE (x))
1294 case REG:
1295 case SUBREG:
1296 sh_print_operand (stream, x, 0);
1297 fputs (", 0", stream);
1298 break;
1300 case PLUS:
1301 sh_print_operand (stream, XEXP (x, 0), 0);
1302 fputs (", ", stream);
1303 sh_print_operand (stream, XEXP (x, 1), 0);
1304 break;
1306 default:
1307 gcc_unreachable ();
1309 break;
1311 case 'V':
1313 int num = exact_log2 (INTVAL (x));
1314 gcc_assert (num >= 0);
1315 fprintf (stream, "#%d", num);
1317 break;
1319 case 'W':
1321 int num = exact_log2 (~INTVAL (x));
1322 gcc_assert (num >= 0);
1323 fprintf (stream, "#%d", num);
1325 break;
1327 case 'd':
1328 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1330 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1331 break;
1333 case 'N':
1334 if (x == CONST0_RTX (GET_MODE (x)))
1336 fprintf ((stream), "r63");
1337 break;
1339 goto default_output;
1340 case 'u':
1341 if (CONST_INT_P (x))
1343 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1344 break;
1346 /* Fall through. */
1348 default_output:
1349 default:
1350 regno = 0;
1351 mode = GET_MODE (x);
1353 switch (GET_CODE (x))
1355 case TRUNCATE:
1357 rtx inner = XEXP (x, 0);
1358 int offset = 0;
1359 enum machine_mode inner_mode;
1361 /* We might see SUBREGs with vector mode registers inside. */
1362 if (GET_CODE (inner) == SUBREG
1363 && (GET_MODE_SIZE (GET_MODE (inner))
1364 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1365 && subreg_lowpart_p (inner))
1366 inner = SUBREG_REG (inner);
1367 if (CONST_INT_P (inner))
1369 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1370 goto default_output;
1372 inner_mode = GET_MODE (inner);
1373 if (GET_CODE (inner) == SUBREG
1374 && (GET_MODE_SIZE (GET_MODE (inner))
1375 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1376 && REG_P (SUBREG_REG (inner)))
1378 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1379 GET_MODE (SUBREG_REG (inner)),
1380 SUBREG_BYTE (inner),
1381 GET_MODE (inner));
1382 inner = SUBREG_REG (inner);
1384 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1385 abort ();
1386 /* Floating point register pairs are always big endian;
1387 general purpose registers are 64 bit wide. */
1388 regno = REGNO (inner);
1389 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1390 - HARD_REGNO_NREGS (regno, mode))
1391 + offset;
1392 x = inner;
1393 goto reg;
1395 case SIGN_EXTEND:
1396 x = XEXP (x, 0);
1397 goto reg;
1398 /* FIXME: We need this on SHmedia32 because reload generates
1399 some sign-extended HI or QI loads into DImode registers
1400 but, because Pmode is SImode, the address ends up with a
1401 subreg:SI of the DImode register. Maybe reload should be
1402 fixed so as to apply alter_subreg to such loads? */
1403 case IF_THEN_ELSE:
1404 gcc_assert (trapping_target_operand (x, VOIDmode));
1405 x = XEXP (XEXP (x, 2), 0);
1406 goto default_output;
1407 case SUBREG:
1408 gcc_assert (SUBREG_BYTE (x) == 0
1409 && REG_P (SUBREG_REG (x)));
1411 x = SUBREG_REG (x);
1412 /* Fall through. */
1414 reg:
1415 case REG:
1416 regno += REGNO (x);
1417 if (FP_REGISTER_P (regno)
1418 && mode == V16SFmode)
1419 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1420 else if (FP_REGISTER_P (REGNO (x))
1421 && mode == V4SFmode)
1422 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1423 else if (REG_P (x)
1424 && mode == V2SFmode)
1425 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1426 else if (FP_REGISTER_P (REGNO (x))
1427 && GET_MODE_SIZE (mode) > 4)
1428 fprintf ((stream), "d%s", reg_names[regno] + 1);
1429 else
1430 fputs (reg_names[regno], (stream));
1431 break;
1433 case MEM:
1434 output_address (XEXP (x, 0));
1435 break;
1437 default:
1438 if (TARGET_SH1)
1439 fputc ('#', stream);
1440 output_addr_const (stream, x);
1441 break;
1443 break;
1447 static bool
1448 sh_print_operand_punct_valid_p (unsigned char code)
1450 return (code == '.' || code == '#' || code == '@' || code == ','
1451 || code == '$' || code == '\'' || code == '>');
1455 /* Encode symbol attributes of a SYMBOL_REF into its
1456 SYMBOL_REF_FLAGS. */
1457 static void
1458 sh_encode_section_info (tree decl, rtx rtl, int first)
1460 default_encode_section_info (decl, rtl, first);
1462 if (TREE_CODE (decl) == FUNCTION_DECL
1463 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1464 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1467 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1468 static void
1469 force_into (rtx value, rtx target)
1471 value = force_operand (value, target);
1472 if (! rtx_equal_p (value, target))
1473 emit_insn (gen_move_insn (target, value));
1476 /* Emit code to perform a block move. Choose the best method.
1478 OPERANDS[0] is the destination.
1479 OPERANDS[1] is the source.
1480 OPERANDS[2] is the size.
1481 OPERANDS[3] is the alignment safe to use. */
1484 expand_block_move (rtx *operands)
1486 int align = INTVAL (operands[3]);
1487 int constp = (CONST_INT_P (operands[2]));
1488 int bytes = (constp ? INTVAL (operands[2]) : 0);
1490 if (! constp)
1491 return 0;
1493 /* If we could use mov.l to move words and dest is word-aligned, we
1494 can use movua.l for loads and still generate a relatively short
1495 and efficient sequence. */
1496 if (TARGET_SH4A_ARCH && align < 4
1497 && MEM_ALIGN (operands[0]) >= 32
1498 && can_move_by_pieces (bytes, 32))
1500 rtx dest = copy_rtx (operands[0]);
1501 rtx src = copy_rtx (operands[1]);
1502 /* We could use different pseudos for each copied word, but
1503 since movua can only load into r0, it's kind of
1504 pointless. */
1505 rtx temp = gen_reg_rtx (SImode);
1506 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1507 int copied = 0;
1509 while (copied + 4 <= bytes)
1511 rtx to = adjust_address (dest, SImode, copied);
1512 rtx from = adjust_automodify_address (src, BLKmode,
1513 src_addr, copied);
1515 set_mem_size (from, GEN_INT (4));
1516 emit_insn (gen_movua (temp, from));
1517 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1518 emit_move_insn (to, temp);
1519 copied += 4;
1522 if (copied < bytes)
1523 move_by_pieces (adjust_address (dest, BLKmode, copied),
1524 adjust_automodify_address (src, BLKmode,
1525 src_addr, copied),
1526 bytes - copied, align, 0);
1528 return 1;
1531 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1532 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1533 if (align < 4 || (bytes % 4 != 0))
1534 return 0;
1536 if (TARGET_HARD_SH4)
1538 if (bytes < 12)
1539 return 0;
1540 else if (bytes == 12)
1542 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1543 rtx r4 = gen_rtx_REG (SImode, 4);
1544 rtx r5 = gen_rtx_REG (SImode, 5);
1546 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1547 force_into (XEXP (operands[0], 0), r4);
1548 force_into (XEXP (operands[1], 0), r5);
1549 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1550 return 1;
1552 else if (! optimize_size)
1554 const char *entry_name;
1555 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1556 int dwords;
1557 rtx r4 = gen_rtx_REG (SImode, 4);
1558 rtx r5 = gen_rtx_REG (SImode, 5);
1559 rtx r6 = gen_rtx_REG (SImode, 6);
1561 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1562 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1563 force_into (XEXP (operands[0], 0), r4);
1564 force_into (XEXP (operands[1], 0), r5);
1566 dwords = bytes >> 3;
1567 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1568 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1569 return 1;
1571 else
1572 return 0;
1574 if (bytes < 64)
1576 char entry[30];
1577 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1578 rtx r4 = gen_rtx_REG (SImode, 4);
1579 rtx r5 = gen_rtx_REG (SImode, 5);
1581 sprintf (entry, "__movmemSI%d", bytes);
1582 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1583 force_into (XEXP (operands[0], 0), r4);
1584 force_into (XEXP (operands[1], 0), r5);
1585 emit_insn (gen_block_move_real (func_addr_rtx));
1586 return 1;
1589 /* This is the same number of bytes as a memcpy call, but to a different
1590 less common function name, so this will occasionally use more space. */
1591 if (! optimize_size)
1593 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1594 int final_switch, while_loop;
1595 rtx r4 = gen_rtx_REG (SImode, 4);
1596 rtx r5 = gen_rtx_REG (SImode, 5);
1597 rtx r6 = gen_rtx_REG (SImode, 6);
1599 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1600 force_into (XEXP (operands[0], 0), r4);
1601 force_into (XEXP (operands[1], 0), r5);
1603 /* r6 controls the size of the move. 16 is decremented from it
1604 for each 64 bytes moved. Then the negative bit left over is used
1605 as an index into a list of move instructions. e.g., a 72 byte move
1606 would be set up with size(r6) = 14, for one iteration through the
1607 big while loop, and a switch of -2 for the last part. */
1609 final_switch = 16 - ((bytes / 4) % 16);
1610 while_loop = ((bytes / 4) / 16 - 1) * 16;
1611 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1612 emit_insn (gen_block_lump_real (func_addr_rtx));
1613 return 1;
1616 return 0;
1619 /* Prepare operands for a move define_expand; specifically, one of the
1620 operands must be in a register. */
1623 prepare_move_operands (rtx operands[], enum machine_mode mode)
1625 if ((mode == SImode || mode == DImode)
1626 && flag_pic
1627 && ! ((mode == Pmode || mode == ptr_mode)
1628 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1630 rtx temp;
1631 if (SYMBOLIC_CONST_P (operands[1]))
1633 if (MEM_P (operands[0]))
1634 operands[1] = force_reg (Pmode, operands[1]);
1635 else if (TARGET_SHMEDIA
1636 && GET_CODE (operands[1]) == LABEL_REF
1637 && target_reg_operand (operands[0], mode))
1638 /* It's ok. */;
1639 else
1641 temp = (!can_create_pseudo_p ()
1642 ? operands[0]
1643 : gen_reg_rtx (Pmode));
1644 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1647 else if (GET_CODE (operands[1]) == CONST
1648 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1649 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1651 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1652 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1653 mode, temp);
1654 operands[1] = expand_binop (mode, add_optab, temp,
1655 XEXP (XEXP (operands[1], 0), 1),
1656 (!can_create_pseudo_p ()
1657 ? temp
1658 : gen_reg_rtx (Pmode)),
1659 0, OPTAB_LIB_WIDEN);
1663 if (! reload_in_progress && ! reload_completed)
1665 /* Copy the source to a register if both operands aren't registers. */
1666 if (! register_operand (operands[0], mode)
1667 && ! sh_register_operand (operands[1], mode))
1668 operands[1] = copy_to_mode_reg (mode, operands[1]);
1670 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1672 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1673 except that we can't use that function because it is static. */
1674 rtx new_rtx = change_address (operands[0], mode, 0);
1675 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1676 operands[0] = new_rtx;
1679 /* This case can happen while generating code to move the result
1680 of a library call to the target. Reject `st r0,@(rX,rY)' because
1681 reload will fail to find a spill register for rX, since r0 is already
1682 being used for the source. */
1683 else if (TARGET_SH1
1684 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1685 && MEM_P (operands[0])
1686 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1687 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1688 operands[1] = copy_to_mode_reg (mode, operands[1]);
1691 if (mode == Pmode || mode == ptr_mode)
1693 rtx op0, op1, opc;
1694 enum tls_model tls_kind;
1696 op0 = operands[0];
1697 op1 = operands[1];
1698 if (GET_CODE (op1) == CONST
1699 && GET_CODE (XEXP (op1, 0)) == PLUS
1700 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1701 != TLS_MODEL_NONE))
1703 opc = XEXP (XEXP (op1, 0), 1);
1704 op1 = XEXP (XEXP (op1, 0), 0);
1706 else
1707 opc = NULL_RTX;
1709 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1711 rtx tga_op1, tga_ret, tmp, tmp2;
1713 switch (tls_kind)
1715 case TLS_MODEL_GLOBAL_DYNAMIC:
1716 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1717 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1718 op1 = tga_ret;
1719 break;
1721 case TLS_MODEL_LOCAL_DYNAMIC:
1722 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1723 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1725 tmp = gen_reg_rtx (Pmode);
1726 emit_move_insn (tmp, tga_ret);
1728 if (register_operand (op0, Pmode))
1729 tmp2 = op0;
1730 else
1731 tmp2 = gen_reg_rtx (Pmode);
1733 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1734 op1 = tmp2;
1735 break;
1737 case TLS_MODEL_INITIAL_EXEC:
1738 if (! flag_pic)
1740 /* Don't schedule insns for getting GOT address when
1741 the first scheduling is enabled, to avoid spill
1742 failures for R0. */
1743 if (flag_schedule_insns)
1744 emit_insn (gen_blockage ());
1745 emit_insn (gen_GOTaddr2picreg ());
1746 emit_use (gen_rtx_REG (SImode, PIC_REG));
1747 if (flag_schedule_insns)
1748 emit_insn (gen_blockage ());
1750 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1751 tmp = gen_sym2GOTTPOFF (op1);
1752 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1753 op1 = tga_op1;
1754 break;
1756 case TLS_MODEL_LOCAL_EXEC:
1757 tmp2 = gen_reg_rtx (Pmode);
1758 emit_insn (gen_load_gbr (tmp2));
1759 tmp = gen_reg_rtx (Pmode);
1760 emit_insn (gen_symTPOFF2reg (tmp, op1));
1762 if (register_operand (op0, Pmode))
1763 op1 = op0;
1764 else
1765 op1 = gen_reg_rtx (Pmode);
1767 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1768 break;
1770 default:
1771 gcc_unreachable ();
1773 if (opc)
1774 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1775 operands[1] = op1;
1779 return 0;
1782 enum rtx_code
1783 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1784 enum rtx_code comparison)
1786 rtx op1;
1787 rtx scratch = NULL_RTX;
1789 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1790 comparison = GET_CODE (operands[0]);
1791 else
1792 scratch = operands[4];
1793 if (CONST_INT_P (operands[1])
1794 && !CONST_INT_P (operands[2]))
1796 rtx tmp = operands[1];
1798 operands[1] = operands[2];
1799 operands[2] = tmp;
1800 comparison = swap_condition (comparison);
1802 if (CONST_INT_P (operands[2]))
1804 HOST_WIDE_INT val = INTVAL (operands[2]);
1805 if ((val == -1 || val == -0x81)
1806 && (comparison == GT || comparison == LE))
1808 comparison = (comparison == GT) ? GE : LT;
1809 operands[2] = gen_int_mode (val + 1, mode);
1811 else if ((val == 1 || val == 0x80)
1812 && (comparison == GE || comparison == LT))
1814 comparison = (comparison == GE) ? GT : LE;
1815 operands[2] = gen_int_mode (val - 1, mode);
1817 else if (val == 1 && (comparison == GEU || comparison == LTU))
1819 comparison = (comparison == GEU) ? NE : EQ;
1820 operands[2] = CONST0_RTX (mode);
1822 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1824 comparison = (comparison == GEU) ? GTU : LEU;
1825 operands[2] = gen_int_mode (val - 1, mode);
1827 else if (val == 0 && (comparison == GTU || comparison == LEU))
1828 comparison = (comparison == GTU) ? NE : EQ;
1829 else if (mode == SImode
1830 && ((val == 0x7fffffff
1831 && (comparison == GTU || comparison == LEU))
1832 || ((unsigned HOST_WIDE_INT) val
1833 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1834 && (comparison == GEU || comparison == LTU))))
1836 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1837 operands[2] = CONST0_RTX (mode);
1840 op1 = operands[1];
1841 if (can_create_pseudo_p ())
1842 operands[1] = force_reg (mode, op1);
1843 /* When we are handling DImode comparisons, we want to keep constants so
1844 that we can optimize the component comparisons; however, memory loads
1845 are better issued as a whole so that they can be scheduled well.
1846 SImode equality comparisons allow I08 constants, but only when they
1847 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1848 into a register, that register might as well be r0, and we allow the
1849 constant. If it is already in a register, this is likely to be
1850 allocated to a different hard register, thus we load the constant into
1851 a register unless it is zero. */
1852 if (!REG_P (operands[2])
1853 && (!CONST_INT_P (operands[2])
1854 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1855 && ((comparison != EQ && comparison != NE)
1856 || (REG_P (op1) && REGNO (op1) != R0_REG)
1857 || !satisfies_constraint_I08 (operands[2])))))
1859 if (scratch && GET_MODE (scratch) == mode)
1861 emit_move_insn (scratch, operands[2]);
1862 operands[2] = scratch;
1864 else if (can_create_pseudo_p ())
1865 operands[2] = force_reg (mode, operands[2]);
1867 return comparison;
1870 void
1871 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1873 rtx (*branch_expander) (rtx) = gen_branch_true;
1874 rtx jump;
1876 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1877 switch (comparison)
1879 case NE: case LT: case LE: case LTU: case LEU:
1880 comparison = reverse_condition (comparison);
1881 branch_expander = gen_branch_false;
1882 default: ;
1884 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1885 gen_rtx_fmt_ee (comparison, SImode,
1886 operands[1], operands[2])));
1887 jump = emit_jump_insn (branch_expander (operands[3]));
1888 if (probability >= 0)
1889 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1893 /* ??? How should we distribute probabilities when more than one branch
1894 is generated. So far we only have soem ad-hoc observations:
1895 - If the operands are random, they are likely to differ in both parts.
1896 - If comparing items in a hash chain, the operands are random or equal;
1897 operation should be EQ or NE.
1898 - If items are searched in an ordered tree from the root, we can expect
1899 the highpart to be unequal about half of the time; operation should be
1900 an inequality comparison, operands non-constant, and overall probability
1901 about 50%. Likewise for quicksort.
1902 - Range checks will be often made against constants. Even if we assume for
1903 simplicity an even distribution of the non-constant operand over a
1904 sub-range here, the same probability could be generated with differently
1905 wide sub-ranges - as long as the ratio of the part of the subrange that
1906 is before the threshold to the part that comes after the threshold stays
1907 the same. Thus, we can't really tell anything here;
1908 assuming random distribution is at least simple.
1911 bool
1912 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1914 enum rtx_code msw_taken, msw_skip, lsw_taken;
1915 rtx skip_label = NULL_RTX;
1916 rtx op1h, op1l, op2h, op2l;
1917 int num_branches;
1918 int prob, rev_prob;
1919 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1920 rtx scratch = operands[4];
1922 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1923 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1924 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1925 op1l = gen_lowpart (SImode, operands[1]);
1926 op2l = gen_lowpart (SImode, operands[2]);
1927 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1928 prob = split_branch_probability;
1929 rev_prob = REG_BR_PROB_BASE - prob;
1930 switch (comparison)
1932 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1933 That costs 1 cycle more when the first branch can be predicted taken,
1934 but saves us mispredicts because only one branch needs prediction.
1935 It also enables generating the cmpeqdi_t-1 pattern. */
1936 case EQ:
1937 if (TARGET_CMPEQDI_T)
1939 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1940 emit_jump_insn (gen_branch_true (operands[3]));
1941 return true;
1943 msw_skip = NE;
1944 lsw_taken = EQ;
1945 if (prob >= 0)
1947 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1949 msw_skip_prob = rev_prob;
1950 if (REG_BR_PROB_BASE <= 65535)
1951 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1952 else
1954 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1955 lsw_taken_prob
1956 = (prob
1957 ? (REG_BR_PROB_BASE
1958 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1959 / ((HOST_WIDEST_INT) prob << 32)))
1960 : 0);
1963 break;
1964 case NE:
1965 if (TARGET_CMPEQDI_T)
1967 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1968 emit_jump_insn (gen_branch_false (operands[3]));
1969 return true;
1971 msw_taken = NE;
1972 msw_taken_prob = prob;
1973 lsw_taken = NE;
1974 lsw_taken_prob = 0;
1975 break;
1976 case GTU: case GT:
1977 msw_taken = comparison;
1978 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1979 break;
1980 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1981 msw_skip = swap_condition (msw_taken);
1982 lsw_taken = GTU;
1983 break;
1984 case GEU: case GE:
1985 if (op2l == CONST0_RTX (SImode))
1986 msw_taken = comparison;
1987 else
1989 msw_taken = comparison == GE ? GT : GTU;
1990 msw_skip = swap_condition (msw_taken);
1991 lsw_taken = GEU;
1993 break;
1994 case LTU: case LT:
1995 msw_taken = comparison;
1996 if (op2l == CONST0_RTX (SImode))
1997 break;
1998 msw_skip = swap_condition (msw_taken);
1999 lsw_taken = LTU;
2000 break;
2001 case LEU: case LE:
2002 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2003 msw_taken = comparison;
2004 else
2006 lsw_taken = LEU;
2007 if (comparison == LE)
2008 msw_taken = LT;
2009 else if (op2h != CONST0_RTX (SImode))
2010 msw_taken = LTU;
2011 else
2012 break;
2013 msw_skip = swap_condition (msw_taken);
2015 break;
2016 default: return false;
2018 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2019 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2020 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2021 if (comparison != EQ && comparison != NE && num_branches > 1)
2023 if (!CONSTANT_P (operands[2])
2024 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2025 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2027 msw_taken_prob = prob / 2U;
2028 msw_skip_prob
2029 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2030 lsw_taken_prob = prob;
2032 else
2034 msw_taken_prob = prob;
2035 msw_skip_prob = REG_BR_PROB_BASE;
2036 /* ??? If we have a constant op2h, should we use that when
2037 calculating lsw_taken_prob? */
2038 lsw_taken_prob = prob;
2041 operands[1] = op1h;
2042 operands[2] = op2h;
2043 operands[4] = NULL_RTX;
2044 if (reload_completed
2045 && ! arith_reg_or_0_operand (op2h, SImode)
2046 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2047 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2048 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2050 emit_move_insn (scratch, operands[2]);
2051 operands[2] = scratch;
2053 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2054 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2055 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2057 rtx taken_label = operands[3];
2059 /* Operands were possibly modified, but msw_skip doesn't expect this.
2060 Always use the original ones. */
2061 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2063 operands[1] = op1h;
2064 operands[2] = op2h;
2067 operands[3] = skip_label = gen_label_rtx ();
2068 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2069 operands[3] = taken_label;
2071 operands[1] = op1l;
2072 operands[2] = op2l;
2073 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2075 if (reload_completed
2076 && ! arith_reg_or_0_operand (op2l, SImode)
2077 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2079 emit_move_insn (scratch, operands[2]);
2080 operands[2] = scratch;
2082 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2084 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2085 emit_label (skip_label);
2086 return true;
2089 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2091 static void
2092 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2094 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2096 insn = gen_rtx_PARALLEL (VOIDmode,
2097 gen_rtvec (2, insn,
2098 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2099 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2101 else
2102 emit_insn (insn);
2105 /* Prepare the operands for an scc instruction; make sure that the
2106 compare has been done and the result is in T_REG. */
2107 void
2108 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2110 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2111 enum rtx_code oldcode = code;
2112 enum machine_mode mode;
2114 /* First need a compare insn. */
2115 switch (code)
2117 case NE:
2118 /* It isn't possible to handle this case. */
2119 gcc_unreachable ();
2120 case LT:
2121 code = GT;
2122 break;
2123 case LE:
2124 code = GE;
2125 break;
2126 case LTU:
2127 code = GTU;
2128 break;
2129 case LEU:
2130 code = GEU;
2131 break;
2132 default:
2133 break;
2135 if (code != oldcode)
2137 rtx tmp = op0;
2138 op0 = op1;
2139 op1 = tmp;
2142 mode = GET_MODE (op0);
2143 if (mode == VOIDmode)
2144 mode = GET_MODE (op1);
2146 op0 = force_reg (mode, op0);
2147 if ((code != EQ && code != NE
2148 && (op1 != const0_rtx
2149 || code == GTU || code == GEU || code == LTU || code == LEU))
2150 || (mode == DImode && op1 != const0_rtx)
2151 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2152 op1 = force_reg (mode, op1);
2154 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2155 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2156 mode);
2160 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2161 rtx op0, rtx op1)
2163 rtx target = gen_reg_rtx (SImode);
2164 rtx tmp;
2166 gcc_assert (TARGET_SHMEDIA);
2167 switch (code)
2169 case EQ:
2170 case GT:
2171 case LT:
2172 case UNORDERED:
2173 case GTU:
2174 case LTU:
2175 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2176 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2177 code = NE;
2178 break;
2180 case NE:
2181 case GE:
2182 case LE:
2183 case ORDERED:
2184 case GEU:
2185 case LEU:
2186 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2187 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2188 code = EQ;
2189 break;
2191 case UNEQ:
2192 case UNGE:
2193 case UNGT:
2194 case UNLE:
2195 case UNLT:
2196 case LTGT:
2197 return NULL_RTX;
2199 default:
2200 gcc_unreachable ();
2203 if (mode == DImode)
2205 rtx t2 = gen_reg_rtx (DImode);
2206 emit_insn (gen_extendsidi2 (t2, target));
2207 target = t2;
2210 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2213 /* Called from the md file, set up the operands of a compare instruction. */
2215 void
2216 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2218 enum rtx_code code = GET_CODE (operands[0]);
2219 enum rtx_code branch_code;
2220 rtx op0 = operands[1];
2221 rtx op1 = operands[2];
2222 rtx insn, tem;
2223 bool need_ccmpeq = false;
2225 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2227 op0 = force_reg (mode, op0);
2228 op1 = force_reg (mode, op1);
2230 else
2232 if (code != EQ || mode == DImode)
2234 /* Force args into regs, since we can't use constants here. */
2235 op0 = force_reg (mode, op0);
2236 if (op1 != const0_rtx || code == GTU || code == GEU)
2237 op1 = force_reg (mode, op1);
2241 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2243 if (code == LT
2244 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2245 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2247 tem = op0, op0 = op1, op1 = tem;
2248 code = swap_condition (code);
2251 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2252 if (code == GE)
2254 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2255 need_ccmpeq = true;
2256 code = GT;
2259 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2260 to EQ/GT respectively. */
2261 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2264 switch (code)
2266 case EQ:
2267 case GT:
2268 case GE:
2269 case GTU:
2270 case GEU:
2271 branch_code = code;
2272 break;
2273 case NE:
2274 case LT:
2275 case LE:
2276 case LTU:
2277 case LEU:
2278 branch_code = reverse_condition (code);
2279 break;
2280 default:
2281 gcc_unreachable ();
2284 insn = gen_rtx_SET (VOIDmode,
2285 gen_rtx_REG (SImode, T_REG),
2286 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2288 sh_emit_set_t_insn (insn, mode);
2289 if (need_ccmpeq)
2290 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2292 if (branch_code == code)
2293 emit_jump_insn (gen_branch_true (operands[3]));
2294 else
2295 emit_jump_insn (gen_branch_false (operands[3]));
2298 void
2299 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2301 enum rtx_code code = GET_CODE (operands[1]);
2302 rtx op0 = operands[2];
2303 rtx op1 = operands[3];
2304 rtx lab = NULL_RTX;
2305 bool invert = false;
2306 rtx tem;
2308 op0 = force_reg (mode, op0);
2309 if ((code != EQ && code != NE
2310 && (op1 != const0_rtx
2311 || code == GTU || code == GEU || code == LTU || code == LEU))
2312 || (mode == DImode && op1 != const0_rtx)
2313 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2314 op1 = force_reg (mode, op1);
2316 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2318 if (code == LT || code == LE)
2320 code = swap_condition (code);
2321 tem = op0, op0 = op1, op1 = tem;
2323 if (code == GE)
2325 if (TARGET_IEEE)
2327 lab = gen_label_rtx ();
2328 sh_emit_scc_to_t (EQ, op0, op1);
2329 emit_jump_insn (gen_branch_true (lab));
2330 code = GT;
2332 else
2334 code = LT;
2335 invert = true;
2340 if (code == NE)
2342 code = EQ;
2343 invert = true;
2346 sh_emit_scc_to_t (code, op0, op1);
2347 if (lab)
2348 emit_label (lab);
2349 if (invert)
2350 emit_insn (gen_movnegt (operands[0]));
2351 else
2352 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2355 /* Functions to output assembly code. */
2357 /* Return a sequence of instructions to perform DI or DF move.
2359 Since the SH cannot move a DI or DF in one instruction, we have
2360 to take care when we see overlapping source and dest registers. */
2362 const char *
2363 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2364 enum machine_mode mode)
2366 rtx dst = operands[0];
2367 rtx src = operands[1];
2369 if (MEM_P (dst)
2370 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2371 return "mov.l %T1,%0\n\tmov.l %1,%0";
2373 if (register_operand (dst, mode)
2374 && register_operand (src, mode))
2376 if (REGNO (src) == MACH_REG)
2377 return "sts mach,%S0\n\tsts macl,%R0";
2379 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2380 when mov.d r1,r0 do r1->r0 then r2->r1. */
2382 if (REGNO (src) + 1 == REGNO (dst))
2383 return "mov %T1,%T0\n\tmov %1,%0";
2384 else
2385 return "mov %1,%0\n\tmov %T1,%T0";
2387 else if (CONST_INT_P (src))
2389 if (INTVAL (src) < 0)
2390 output_asm_insn ("mov #-1,%S0", operands);
2391 else
2392 output_asm_insn ("mov #0,%S0", operands);
2394 return "mov %1,%R0";
2396 else if (MEM_P (src))
2398 int ptrreg = -1;
2399 int dreg = REGNO (dst);
2400 rtx inside = XEXP (src, 0);
2402 switch (GET_CODE (inside))
2404 case REG:
2405 ptrreg = REGNO (inside);
2406 break;
2408 case SUBREG:
2409 ptrreg = subreg_regno (inside);
2410 break;
2412 case PLUS:
2413 ptrreg = REGNO (XEXP (inside, 0));
2414 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2415 an offsettable address. Unfortunately, offsettable addresses use
2416 QImode to check the offset, and a QImode offsettable address
2417 requires r0 for the other operand, which is not currently
2418 supported, so we can't use the 'o' constraint.
2419 Thus we must check for and handle r0+REG addresses here.
2420 We punt for now, since this is likely very rare. */
2421 gcc_assert (!REG_P (XEXP (inside, 1)));
2422 break;
2424 case LABEL_REF:
2425 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2426 case POST_INC:
2427 return "mov.l %1,%0\n\tmov.l %1,%T0";
2428 default:
2429 gcc_unreachable ();
2432 /* Work out the safe way to copy. Copy into the second half first. */
2433 if (dreg == ptrreg)
2434 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2437 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2440 /* Print an instruction which would have gone into a delay slot after
2441 another instruction, but couldn't because the other instruction expanded
2442 into a sequence where putting the slot insn at the end wouldn't work. */
2444 static void
2445 print_slot (rtx insn)
2447 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2449 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2452 const char *
2453 output_far_jump (rtx insn, rtx op)
2455 struct { rtx lab, reg, op; } this_jmp;
2456 rtx braf_base_lab = NULL_RTX;
2457 const char *jump;
2458 int far;
2459 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2460 rtx prev;
2462 this_jmp.lab = gen_label_rtx ();
2464 if (TARGET_SH2
2465 && offset >= -32764
2466 && offset - get_attr_length (insn) <= 32766)
2468 far = 0;
2469 jump = "mov.w %O0,%1; braf %1";
2471 else
2473 far = 1;
2474 if (flag_pic)
2476 if (TARGET_SH2)
2477 jump = "mov.l %O0,%1; braf %1";
2478 else
2479 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2481 else
2482 jump = "mov.l %O0,%1; jmp @%1";
2484 /* If we have a scratch register available, use it. */
2485 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2486 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2488 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2489 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2490 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2491 output_asm_insn (jump, &this_jmp.lab);
2492 if (dbr_sequence_length ())
2493 print_slot (final_sequence);
2494 else
2495 output_asm_insn ("nop", 0);
2497 else
2499 /* Output the delay slot insn first if any. */
2500 if (dbr_sequence_length ())
2501 print_slot (final_sequence);
2503 this_jmp.reg = gen_rtx_REG (SImode, 13);
2504 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2505 Fortunately, MACL is fixed and call-clobbered, and we never
2506 need its value across jumps, so save r13 in it instead of in
2507 the stack. */
2508 if (TARGET_SH5)
2509 output_asm_insn ("lds r13, macl", 0);
2510 else
2511 output_asm_insn ("mov.l r13,@-r15", 0);
2512 output_asm_insn (jump, &this_jmp.lab);
2513 if (TARGET_SH5)
2514 output_asm_insn ("sts macl, r13", 0);
2515 else
2516 output_asm_insn ("mov.l @r15+,r13", 0);
2518 if (far && flag_pic && TARGET_SH2)
2520 braf_base_lab = gen_label_rtx ();
2521 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2522 CODE_LABEL_NUMBER (braf_base_lab));
2524 if (far)
2525 output_asm_insn (".align 2", 0);
2526 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2527 this_jmp.op = op;
2528 if (far && flag_pic)
2530 if (TARGET_SH2)
2531 this_jmp.lab = braf_base_lab;
2532 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2534 else
2535 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2536 return "";
2539 /* Local label counter, used for constants in the pool and inside
2540 pattern branches. */
2542 static int lf = 100;
2544 /* Output code for ordinary branches. */
2546 const char *
2547 output_branch (int logic, rtx insn, rtx *operands)
2549 switch (get_attr_length (insn))
2551 case 6:
2552 /* This can happen if filling the delay slot has caused a forward
2553 branch to exceed its range (we could reverse it, but only
2554 when we know we won't overextend other branches; this should
2555 best be handled by relaxation).
2556 It can also happen when other condbranches hoist delay slot insn
2557 from their destination, thus leading to code size increase.
2558 But the branch will still be in the range -4092..+4098 bytes. */
2560 if (! TARGET_RELAX)
2562 int label = lf++;
2563 /* The call to print_slot will clobber the operands. */
2564 rtx op0 = operands[0];
2566 /* If the instruction in the delay slot is annulled (true), then
2567 there is no delay slot where we can put it now. The only safe
2568 place for it is after the label. final will do that by default. */
2570 if (final_sequence
2571 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2572 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2574 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2575 ASSEMBLER_DIALECT ? "/" : ".", label);
2576 print_slot (final_sequence);
2578 else
2579 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2581 output_asm_insn ("bra\t%l0", &op0);
2582 fprintf (asm_out_file, "\tnop\n");
2583 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2585 return "";
2587 /* When relaxing, handle this like a short branch. The linker
2588 will fix it up if it still doesn't fit after relaxation. */
2589 case 2:
2590 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2592 /* These are for SH2e, in which we have to account for the
2593 extra nop because of the hardware bug in annulled branches. */
2594 case 8:
2595 if (! TARGET_RELAX)
2597 int label = lf++;
2599 gcc_assert (!final_sequence
2600 || !(INSN_ANNULLED_BRANCH_P
2601 (XVECEXP (final_sequence, 0, 0))));
2602 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2603 logic ? "f" : "t",
2604 ASSEMBLER_DIALECT ? "/" : ".", label);
2605 fprintf (asm_out_file, "\tnop\n");
2606 output_asm_insn ("bra\t%l0", operands);
2607 fprintf (asm_out_file, "\tnop\n");
2608 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2610 return "";
2612 /* When relaxing, fall through. */
2613 case 4:
2615 char buffer[10];
2617 sprintf (buffer, "b%s%ss\t%%l0",
2618 logic ? "t" : "f",
2619 ASSEMBLER_DIALECT ? "/" : ".");
2620 output_asm_insn (buffer, &operands[0]);
2621 return "nop";
2624 default:
2625 /* There should be no longer branches now - that would
2626 indicate that something has destroyed the branches set
2627 up in machine_dependent_reorg. */
2628 gcc_unreachable ();
2632 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2633 fill in operands 9 as a label to the successor insn.
2634 We try to use jump threading where possible.
2635 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2636 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2637 follow jmp and bt, if the address is in range. */
2638 const char *
2639 output_branchy_insn (enum rtx_code code, const char *templ,
2640 rtx insn, rtx *operands)
2642 rtx next_insn = NEXT_INSN (insn);
2644 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2646 rtx src = SET_SRC (PATTERN (next_insn));
2647 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2649 /* Following branch not taken */
2650 operands[9] = gen_label_rtx ();
2651 emit_label_after (operands[9], next_insn);
2652 INSN_ADDRESSES_NEW (operands[9],
2653 INSN_ADDRESSES (INSN_UID (next_insn))
2654 + get_attr_length (next_insn));
2655 return templ;
2657 else
2659 int offset = (branch_dest (next_insn)
2660 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2661 if (offset >= -252 && offset <= 258)
2663 if (GET_CODE (src) == IF_THEN_ELSE)
2664 /* branch_true */
2665 src = XEXP (src, 1);
2666 operands[9] = src;
2667 return templ;
2671 operands[9] = gen_label_rtx ();
2672 emit_label_after (operands[9], insn);
2673 INSN_ADDRESSES_NEW (operands[9],
2674 INSN_ADDRESSES (INSN_UID (insn))
2675 + get_attr_length (insn));
2676 return templ;
2679 const char *
2680 output_ieee_ccmpeq (rtx insn, rtx *operands)
2682 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2683 insn, operands);
2686 /* Output the start of the assembler file. */
2688 static void
2689 sh_file_start (void)
2691 default_file_start ();
2693 #ifdef SYMBIAN
2694 /* Declare the .directive section before it is used. */
2695 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2696 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2697 #endif
2699 if (TARGET_ELF)
2700 /* We need to show the text section with the proper
2701 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2702 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2703 will complain. We can teach GAS specifically about the
2704 default attributes for our choice of text section, but
2705 then we would have to change GAS again if/when we change
2706 the text section name. */
2707 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2708 else
2709 /* Switch to the data section so that the coffsem symbol
2710 isn't in the text section. */
2711 switch_to_section (data_section);
2713 if (TARGET_LITTLE_ENDIAN)
2714 fputs ("\t.little\n", asm_out_file);
2716 if (!TARGET_ELF)
2718 if (TARGET_SHCOMPACT)
2719 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2720 else if (TARGET_SHMEDIA)
2721 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2722 TARGET_SHMEDIA64 ? 64 : 32);
2726 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2728 static bool
2729 unspec_caller_rtx_p (rtx pat)
2731 rtx base, offset;
2732 int i;
2734 split_const (pat, &base, &offset);
2735 if (GET_CODE (base) == UNSPEC)
2737 if (XINT (base, 1) == UNSPEC_CALLER)
2738 return true;
2739 for (i = 0; i < XVECLEN (base, 0); i++)
2740 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2741 return true;
2743 return false;
2746 /* Indicate that INSN cannot be duplicated. This is true for insn
2747 that generates a unique label. */
2749 static bool
2750 sh_cannot_copy_insn_p (rtx insn)
2752 rtx pat;
2754 if (!reload_completed || !flag_pic)
2755 return false;
2757 if (!NONJUMP_INSN_P (insn))
2758 return false;
2759 if (asm_noperands (insn) >= 0)
2760 return false;
2762 pat = PATTERN (insn);
2763 if (GET_CODE (pat) != SET)
2764 return false;
2765 pat = SET_SRC (pat);
2767 if (unspec_caller_rtx_p (pat))
2768 return true;
2770 return false;
2773 /* Actual number of instructions used to make a shift by N. */
2774 static const char ashiftrt_insns[] =
2775 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2777 /* Left shift and logical right shift are the same. */
2778 static const char shift_insns[] =
2779 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2781 /* Individual shift amounts needed to get the above length sequences.
2782 One bit right shifts clobber the T bit, so when possible, put one bit
2783 shifts in the middle of the sequence, so the ends are eligible for
2784 branch delay slots. */
2785 static const short shift_amounts[32][5] = {
2786 {0}, {1}, {2}, {2, 1},
2787 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2788 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2789 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2790 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2791 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2792 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2793 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2795 /* Likewise, but for shift amounts < 16, up to three highmost bits
2796 might be clobbered. This is typically used when combined with some
2797 kind of sign or zero extension. */
2799 static const char ext_shift_insns[] =
2800 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2802 static const short ext_shift_amounts[32][4] = {
2803 {0}, {1}, {2}, {2, 1},
2804 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2805 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2806 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2807 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2808 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2809 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2810 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2812 /* Assuming we have a value that has been sign-extended by at least one bit,
2813 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2814 to shift it by N without data loss, and quicker than by other means? */
2815 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2817 /* This is used in length attributes in sh.md to help compute the length
2818 of arbitrary constant shift instructions. */
2821 shift_insns_rtx (rtx insn)
2823 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2824 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2825 enum rtx_code shift_code = GET_CODE (set_src);
2827 switch (shift_code)
2829 case ASHIFTRT:
2830 return ashiftrt_insns[shift_count];
2831 case LSHIFTRT:
2832 case ASHIFT:
2833 return shift_insns[shift_count];
2834 default:
2835 gcc_unreachable ();
2839 /* Return the cost of a shift. */
2841 static inline int
2842 shiftcosts (rtx x)
2844 int value;
2846 if (TARGET_SHMEDIA)
2847 return 1;
2849 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2851 if (GET_MODE (x) == DImode
2852 && CONST_INT_P (XEXP (x, 1))
2853 && INTVAL (XEXP (x, 1)) == 1)
2854 return 2;
2856 /* Everything else is invalid, because there is no pattern for it. */
2857 return MAX_COST;
2859 /* If shift by a non constant, then this will be expensive. */
2860 if (!CONST_INT_P (XEXP (x, 1)))
2861 return SH_DYNAMIC_SHIFT_COST;
2863 /* Otherwise, return the true cost in instructions. Cope with out of range
2864 shift counts more or less arbitrarily. */
2865 value = INTVAL (XEXP (x, 1)) & 31;
2867 if (GET_CODE (x) == ASHIFTRT)
2869 int cost = ashiftrt_insns[value];
2870 /* If SH3, then we put the constant in a reg and use shad. */
2871 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2872 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2873 return cost;
2875 else
2876 return shift_insns[value];
2879 /* Return the cost of an AND operation. */
2881 static inline int
2882 andcosts (rtx x)
2884 int i;
2886 /* Anding with a register is a single cycle and instruction. */
2887 if (!CONST_INT_P (XEXP (x, 1)))
2888 return 1;
2890 i = INTVAL (XEXP (x, 1));
2892 if (TARGET_SHMEDIA)
2894 if (satisfies_constraint_I10 (XEXP (x, 1))
2895 || satisfies_constraint_J16 (XEXP (x, 1)))
2896 return 1;
2897 else
2898 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2901 /* These constants are single cycle extu.[bw] instructions. */
2902 if (i == 0xff || i == 0xffff)
2903 return 1;
2904 /* Constants that can be used in an and immediate instruction in a single
2905 cycle, but this requires r0, so make it a little more expensive. */
2906 if (CONST_OK_FOR_K08 (i))
2907 return 2;
2908 /* Constants that can be loaded with a mov immediate and an and.
2909 This case is probably unnecessary. */
2910 if (CONST_OK_FOR_I08 (i))
2911 return 2;
2912 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2913 This case is probably unnecessary. */
2914 return 3;
2917 /* Return the cost of an addition or a subtraction. */
2919 static inline int
2920 addsubcosts (rtx x)
2922 /* Adding a register is a single cycle insn. */
2923 if (REG_P (XEXP (x, 1))
2924 || GET_CODE (XEXP (x, 1)) == SUBREG)
2925 return 1;
2927 /* Likewise for small constants. */
2928 if (CONST_INT_P (XEXP (x, 1))
2929 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2930 return 1;
2932 if (TARGET_SHMEDIA)
2933 switch (GET_CODE (XEXP (x, 1)))
2935 case CONST:
2936 case LABEL_REF:
2937 case SYMBOL_REF:
2938 return TARGET_SHMEDIA64 ? 5 : 3;
2940 case CONST_INT:
2941 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2942 return 2;
2943 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2944 return 3;
2945 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2946 return 4;
2948 /* Fall through. */
2949 default:
2950 return 5;
2953 /* Any other constant requires a 2 cycle pc-relative load plus an
2954 addition. */
2955 return 3;
2958 /* Return the cost of a multiply. */
2959 static inline int
2960 multcosts (rtx x ATTRIBUTE_UNUSED)
2962 if (sh_multcost >= 0)
2963 return sh_multcost;
2964 if (TARGET_SHMEDIA)
2965 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2966 accept constants. Ideally, we would use a cost of one or two and
2967 add the cost of the operand, but disregard the latter when inside loops
2968 and loop invariant code motion is still to follow.
2969 Using a multiply first and splitting it later if it's a loss
2970 doesn't work because of different sign / zero extension semantics
2971 of multiplies vs. shifts. */
2972 return optimize_size ? 2 : 3;
2974 if (TARGET_SH2)
2976 /* We have a mul insn, so we can never take more than the mul and the
2977 read of the mac reg, but count more because of the latency and extra
2978 reg usage. */
2979 if (optimize_size)
2980 return 2;
2981 return 3;
2984 /* If we're aiming at small code, then just count the number of
2985 insns in a multiply call sequence. */
2986 if (optimize_size)
2987 return 5;
2989 /* Otherwise count all the insns in the routine we'd be calling too. */
2990 return 20;
2993 /* Compute a (partial) cost for rtx X. Return true if the complete
2994 cost has been computed, and false if subexpressions should be
2995 scanned. In either case, *TOTAL contains the cost result. */
2997 static bool
2998 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2999 bool speed ATTRIBUTE_UNUSED)
3001 switch (code)
3003 case CONST_INT:
3004 if (TARGET_SHMEDIA)
3006 if (INTVAL (x) == 0)
3007 *total = 0;
3008 else if (outer_code == AND && and_operand ((x), DImode))
3009 *total = 0;
3010 else if ((outer_code == IOR || outer_code == XOR
3011 || outer_code == PLUS)
3012 && CONST_OK_FOR_I10 (INTVAL (x)))
3013 *total = 0;
3014 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3015 *total = COSTS_N_INSNS (outer_code != SET);
3016 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3017 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3018 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3019 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3020 else
3021 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3022 return true;
3024 if (CONST_OK_FOR_I08 (INTVAL (x)))
3025 *total = 0;
3026 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3027 && CONST_OK_FOR_K08 (INTVAL (x)))
3028 *total = 1;
3029 /* prepare_cmp_insn will force costly constants int registers before
3030 the cbranch[sd]i4 patterns can see them, so preserve potentially
3031 interesting ones not covered by I08 above. */
3032 else if (outer_code == COMPARE
3033 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3034 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3035 || INTVAL (x) == 0x7fffffff
3036 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3037 *total = 1;
3038 else
3039 *total = 8;
3040 return true;
3042 case CONST:
3043 case LABEL_REF:
3044 case SYMBOL_REF:
3045 if (TARGET_SHMEDIA64)
3046 *total = COSTS_N_INSNS (4);
3047 else if (TARGET_SHMEDIA32)
3048 *total = COSTS_N_INSNS (2);
3049 else
3050 *total = 5;
3051 return true;
3053 case CONST_DOUBLE:
3054 if (TARGET_SHMEDIA)
3055 *total = COSTS_N_INSNS (4);
3056 /* prepare_cmp_insn will force costly constants int registers before
3057 the cbranchdi4 pattern can see them, so preserve potentially
3058 interesting ones. */
3059 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3060 *total = 1;
3061 else
3062 *total = 10;
3063 return true;
3064 case CONST_VECTOR:
3065 if (x == CONST0_RTX (GET_MODE (x)))
3066 *total = 0;
3067 else if (sh_1el_vec (x, VOIDmode))
3068 *total = outer_code != SET;
3069 if (sh_rep_vec (x, VOIDmode))
3070 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3071 + (outer_code != SET));
3072 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3073 return true;
3075 case PLUS:
3076 case MINUS:
3077 *total = COSTS_N_INSNS (addsubcosts (x));
3078 return true;
3080 case AND:
3081 *total = COSTS_N_INSNS (andcosts (x));
3082 return true;
3084 case MULT:
3085 *total = COSTS_N_INSNS (multcosts (x));
3086 return true;
3088 case ASHIFT:
3089 case ASHIFTRT:
3090 case LSHIFTRT:
3091 *total = COSTS_N_INSNS (shiftcosts (x));
3092 return true;
3094 case DIV:
3095 case UDIV:
3096 case MOD:
3097 case UMOD:
3098 *total = COSTS_N_INSNS (20);
3099 return true;
3101 case PARALLEL:
3102 if (sh_1el_vec (x, VOIDmode))
3103 *total = outer_code != SET;
3104 if (sh_rep_vec (x, VOIDmode))
3105 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3106 + (outer_code != SET));
3107 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3108 return true;
3110 case FLOAT:
3111 case FIX:
3112 *total = 100;
3113 return true;
3115 default:
3116 return false;
3120 /* Compute the cost of an address. For the SH, all valid addresses are
3121 the same cost. Use a slightly higher cost for reg + reg addressing,
3122 since it increases pressure on r0. */
3124 static int
3125 sh_address_cost (rtx X,
3126 bool speed ATTRIBUTE_UNUSED)
3128 return (GET_CODE (X) == PLUS
3129 && ! CONSTANT_P (XEXP (X, 1))
3130 && ! TARGET_SHMEDIA ? 1 : 0);
3133 /* Code to expand a shift. */
3135 void
3136 gen_ashift (int type, int n, rtx reg)
3138 /* Negative values here come from the shift_amounts array. */
3139 if (n < 0)
3141 if (type == ASHIFT)
3142 type = LSHIFTRT;
3143 else
3144 type = ASHIFT;
3145 n = -n;
3148 switch (type)
3150 case ASHIFTRT:
3151 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3152 break;
3153 case LSHIFTRT:
3154 if (n == 1)
3155 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3156 else
3157 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3158 break;
3159 case ASHIFT:
3160 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3161 break;
3165 /* Same for HImode */
3167 void
3168 gen_ashift_hi (int type, int n, rtx reg)
3170 /* Negative values here come from the shift_amounts array. */
3171 if (n < 0)
3173 if (type == ASHIFT)
3174 type = LSHIFTRT;
3175 else
3176 type = ASHIFT;
3177 n = -n;
3180 switch (type)
3182 case ASHIFTRT:
3183 case LSHIFTRT:
3184 /* We don't have HImode right shift operations because using the
3185 ordinary 32 bit shift instructions for that doesn't generate proper
3186 zero/sign extension.
3187 gen_ashift_hi is only called in contexts where we know that the
3188 sign extension works out correctly. */
3190 int offset = 0;
3191 if (GET_CODE (reg) == SUBREG)
3193 offset = SUBREG_BYTE (reg);
3194 reg = SUBREG_REG (reg);
3196 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3197 break;
3199 case ASHIFT:
3200 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3201 break;
3205 /* Output RTL to split a constant shift into its component SH constant
3206 shift instructions. */
3208 void
3209 gen_shifty_op (int code, rtx *operands)
3211 int value = INTVAL (operands[2]);
3212 int max, i;
3214 /* Truncate the shift count in case it is out of bounds. */
3215 value = value & 31;
3217 if (value == 31)
3219 if (code == LSHIFTRT)
3221 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3222 emit_insn (gen_movt (operands[0]));
3223 return;
3225 else if (code == ASHIFT)
3227 /* There is a two instruction sequence for 31 bit left shifts,
3228 but it requires r0. */
3229 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3231 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3232 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3233 return;
3237 else if (value == 0)
3239 /* This can happen even when optimizing, if there were subregs before
3240 reload. Don't output a nop here, as this is never optimized away;
3241 use a no-op move instead. */
3242 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3243 return;
3246 max = shift_insns[value];
3247 for (i = 0; i < max; i++)
3248 gen_ashift (code, shift_amounts[value][i], operands[0]);
3251 /* Same as above, but optimized for values where the topmost bits don't
3252 matter. */
3254 void
3255 gen_shifty_hi_op (int code, rtx *operands)
3257 int value = INTVAL (operands[2]);
3258 int max, i;
3259 void (*gen_fun) (int, int, rtx);
3261 /* This operation is used by and_shl for SImode values with a few
3262 high bits known to be cleared. */
3263 value &= 31;
3264 if (value == 0)
3266 emit_insn (gen_nop ());
3267 return;
3270 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3271 if (code == ASHIFT)
3273 max = ext_shift_insns[value];
3274 for (i = 0; i < max; i++)
3275 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3277 else
3278 /* When shifting right, emit the shifts in reverse order, so that
3279 solitary negative values come first. */
3280 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3281 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3284 /* Output RTL for an arithmetic right shift. */
3286 /* ??? Rewrite to use super-optimizer sequences. */
3289 expand_ashiftrt (rtx *operands)
3291 rtx wrk;
3292 char func[18];
3293 int value;
3295 if (TARGET_SH3)
3297 if (!CONST_INT_P (operands[2]))
3299 rtx count = copy_to_mode_reg (SImode, operands[2]);
3300 emit_insn (gen_negsi2 (count, count));
3301 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3302 return 1;
3304 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3305 > 1 + SH_DYNAMIC_SHIFT_COST)
3307 rtx count
3308 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3309 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3310 return 1;
3313 if (!CONST_INT_P (operands[2]))
3314 return 0;
3316 value = INTVAL (operands[2]) & 31;
3318 if (value == 31)
3320 /* If we are called from abs expansion, arrange things so that we
3321 we can use a single MT instruction that doesn't clobber the source,
3322 if LICM can hoist out the load of the constant zero. */
3323 if (currently_expanding_to_rtl)
3325 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3326 operands[1]));
3327 emit_insn (gen_mov_neg_si_t (operands[0]));
3328 return 1;
3330 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3331 return 1;
3333 else if (value >= 16 && value <= 19)
3335 wrk = gen_reg_rtx (SImode);
3336 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3337 value -= 16;
3338 while (value--)
3339 gen_ashift (ASHIFTRT, 1, wrk);
3340 emit_move_insn (operands[0], wrk);
3341 return 1;
3343 /* Expand a short sequence inline, longer call a magic routine. */
3344 else if (value <= 5)
3346 wrk = gen_reg_rtx (SImode);
3347 emit_move_insn (wrk, operands[1]);
3348 while (value--)
3349 gen_ashift (ASHIFTRT, 1, wrk);
3350 emit_move_insn (operands[0], wrk);
3351 return 1;
3354 wrk = gen_reg_rtx (Pmode);
3356 /* Load the value into an arg reg and call a helper. */
3357 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3358 sprintf (func, "__ashiftrt_r4_%d", value);
3359 function_symbol (wrk, func, SFUNC_STATIC);
3360 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3361 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3362 return 1;
3366 sh_dynamicalize_shift_p (rtx count)
3368 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3371 /* Try to find a good way to implement the combiner pattern
3372 [(set (match_operand:SI 0 "register_operand" "r")
3373 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3374 (match_operand:SI 2 "const_int_operand" "n"))
3375 (match_operand:SI 3 "const_int_operand" "n"))) .
3376 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3377 return 0 for simple right / left or left/right shift combination.
3378 return 1 for a combination of shifts with zero_extend.
3379 return 2 for a combination of shifts with an AND that needs r0.
3380 return 3 for a combination of shifts with an AND that needs an extra
3381 scratch register, when the three highmost bits of the AND mask are clear.
3382 return 4 for a combination of shifts with an AND that needs an extra
3383 scratch register, when any of the three highmost bits of the AND mask
3384 is set.
3385 If ATTRP is set, store an initial right shift width in ATTRP[0],
3386 and the instruction length in ATTRP[1] . These values are not valid
3387 when returning 0.
3388 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3389 shift_amounts for the last shift value that is to be used before the
3390 sign extend. */
3392 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3394 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3395 int left = INTVAL (left_rtx), right;
3396 int best = 0;
3397 int cost, best_cost = 10000;
3398 int best_right = 0, best_len = 0;
3399 int i;
3400 int can_ext;
3402 if (left < 0 || left > 31)
3403 return 0;
3404 if (CONST_INT_P (mask_rtx))
3405 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3406 else
3407 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3408 /* Can this be expressed as a right shift / left shift pair? */
3409 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3410 right = exact_log2 (lsb);
3411 mask2 = ~(mask + lsb - 1);
3412 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3413 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3414 if (! mask2)
3415 best_cost = shift_insns[right] + shift_insns[right + left];
3416 /* mask has no trailing zeroes <==> ! right */
3417 else if (! right && mask2 == ~(lsb2 - 1))
3419 int late_right = exact_log2 (lsb2);
3420 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3422 /* Try to use zero extend. */
3423 if (mask2 == ~(lsb2 - 1))
3425 int width, first;
3427 for (width = 8; width <= 16; width += 8)
3429 /* Can we zero-extend right away? */
3430 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3432 cost
3433 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3434 if (cost < best_cost)
3436 best = 1;
3437 best_cost = cost;
3438 best_right = right;
3439 best_len = cost;
3440 if (attrp)
3441 attrp[2] = -1;
3443 continue;
3445 /* ??? Could try to put zero extend into initial right shift,
3446 or even shift a bit left before the right shift. */
3447 /* Determine value of first part of left shift, to get to the
3448 zero extend cut-off point. */
3449 first = width - exact_log2 (lsb2) + right;
3450 if (first >= 0 && right + left - first >= 0)
3452 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3453 + ext_shift_insns[right + left - first];
3454 if (cost < best_cost)
3456 best = 1;
3457 best_cost = cost;
3458 best_right = right;
3459 best_len = cost;
3460 if (attrp)
3461 attrp[2] = first;
3466 /* Try to use r0 AND pattern */
3467 for (i = 0; i <= 2; i++)
3469 if (i > right)
3470 break;
3471 if (! CONST_OK_FOR_K08 (mask >> i))
3472 continue;
3473 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3474 if (cost < best_cost)
3476 best = 2;
3477 best_cost = cost;
3478 best_right = i;
3479 best_len = cost - 1;
3482 /* Try to use a scratch register to hold the AND operand. */
3483 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3484 for (i = 0; i <= 2; i++)
3486 if (i > right)
3487 break;
3488 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3489 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3490 if (cost < best_cost)
3492 best = 4 - can_ext;
3493 best_cost = cost;
3494 best_right = i;
3495 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3499 if (attrp)
3501 attrp[0] = best_right;
3502 attrp[1] = best_len;
3504 return best;
3507 /* This is used in length attributes of the unnamed instructions
3508 corresponding to shl_and_kind return values of 1 and 2. */
3510 shl_and_length (rtx insn)
3512 rtx set_src, left_rtx, mask_rtx;
3513 int attributes[3];
3515 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3516 left_rtx = XEXP (XEXP (set_src, 0), 1);
3517 mask_rtx = XEXP (set_src, 1);
3518 shl_and_kind (left_rtx, mask_rtx, attributes);
3519 return attributes[1];
3522 /* This is used in length attribute of the and_shl_scratch instruction. */
3525 shl_and_scr_length (rtx insn)
3527 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3528 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3529 rtx op = XEXP (set_src, 0);
3530 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3531 op = XEXP (XEXP (op, 0), 0);
3532 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3535 /* Generate rtl for instructions for which shl_and_kind advised a particular
3536 method of generating them, i.e. returned zero. */
3539 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3541 int attributes[3];
3542 unsigned HOST_WIDE_INT mask;
3543 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3544 int right, total_shift;
3545 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3547 right = attributes[0];
3548 total_shift = INTVAL (left_rtx) + right;
3549 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3550 switch (kind)
3552 default:
3553 return -1;
3554 case 1:
3556 int first = attributes[2];
3557 rtx operands[3];
3559 if (first < 0)
3561 emit_insn ((mask << right) <= 0xff
3562 ? gen_zero_extendqisi2 (dest,
3563 gen_lowpart (QImode, source))
3564 : gen_zero_extendhisi2 (dest,
3565 gen_lowpart (HImode, source)));
3566 source = dest;
3568 if (source != dest)
3569 emit_insn (gen_movsi (dest, source));
3570 operands[0] = dest;
3571 if (right)
3573 operands[2] = GEN_INT (right);
3574 gen_shifty_hi_op (LSHIFTRT, operands);
3576 if (first > 0)
3578 operands[2] = GEN_INT (first);
3579 gen_shifty_hi_op (ASHIFT, operands);
3580 total_shift -= first;
3581 mask <<= first;
3583 if (first >= 0)
3584 emit_insn (mask <= 0xff
3585 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3586 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3587 if (total_shift > 0)
3589 operands[2] = GEN_INT (total_shift);
3590 gen_shifty_hi_op (ASHIFT, operands);
3592 break;
3594 case 4:
3595 shift_gen_fun = gen_shifty_op;
3596 case 3:
3597 /* If the topmost bit that matters is set, set the topmost bits
3598 that don't matter. This way, we might be able to get a shorter
3599 signed constant. */
3600 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3601 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3602 case 2:
3603 /* Don't expand fine-grained when combining, because that will
3604 make the pattern fail. */
3605 if (currently_expanding_to_rtl
3606 || reload_in_progress || reload_completed)
3608 rtx operands[3];
3610 /* Cases 3 and 4 should be handled by this split
3611 only while combining */
3612 gcc_assert (kind <= 2);
3613 if (right)
3615 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3616 source = dest;
3618 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3619 if (total_shift)
3621 operands[0] = dest;
3622 operands[1] = dest;
3623 operands[2] = GEN_INT (total_shift);
3624 shift_gen_fun (ASHIFT, operands);
3626 break;
3628 else
3630 int neg = 0;
3631 if (kind != 4 && total_shift < 16)
3633 neg = -ext_shift_amounts[total_shift][1];
3634 if (neg > 0)
3635 neg -= ext_shift_amounts[total_shift][2];
3636 else
3637 neg = 0;
3639 emit_insn (gen_and_shl_scratch (dest, source,
3640 GEN_INT (right),
3641 GEN_INT (mask),
3642 GEN_INT (total_shift + neg),
3643 GEN_INT (neg)));
3644 emit_insn (gen_movsi (dest, dest));
3645 break;
3648 return 0;
3651 /* Try to find a good way to implement the combiner pattern
3652 [(set (match_operand:SI 0 "register_operand" "=r")
3653 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3654 (match_operand:SI 2 "const_int_operand" "n")
3655 (match_operand:SI 3 "const_int_operand" "n")
3656 (const_int 0)))
3657 (clobber (reg:SI T_REG))]
3658 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3659 return 0 for simple left / right shift combination.
3660 return 1 for left shift / 8 bit sign extend / left shift.
3661 return 2 for left shift / 16 bit sign extend / left shift.
3662 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3663 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3664 return 5 for left shift / 16 bit sign extend / right shift
3665 return 6 for < 8 bit sign extend / left shift.
3666 return 7 for < 8 bit sign extend / left shift / single right shift.
3667 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3670 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3672 int left, size, insize, ext;
3673 int cost = 0, best_cost;
3674 int kind;
3676 left = INTVAL (left_rtx);
3677 size = INTVAL (size_rtx);
3678 insize = size - left;
3679 gcc_assert (insize > 0);
3680 /* Default to left / right shift. */
3681 kind = 0;
3682 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3683 if (size <= 16)
3685 /* 16 bit shift / sign extend / 16 bit shift */
3686 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3687 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3688 below, by alternative 3 or something even better. */
3689 if (cost < best_cost)
3691 kind = 5;
3692 best_cost = cost;
3695 /* Try a plain sign extend between two shifts. */
3696 for (ext = 16; ext >= insize; ext -= 8)
3698 if (ext <= size)
3700 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3701 if (cost < best_cost)
3703 kind = ext / (unsigned) 8;
3704 best_cost = cost;
3707 /* Check if we can do a sloppy shift with a final signed shift
3708 restoring the sign. */
3709 if (EXT_SHIFT_SIGNED (size - ext))
3710 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3711 /* If not, maybe it's still cheaper to do the second shift sloppy,
3712 and do a final sign extend? */
3713 else if (size <= 16)
3714 cost = ext_shift_insns[ext - insize] + 1
3715 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3716 else
3717 continue;
3718 if (cost < best_cost)
3720 kind = ext / (unsigned) 8 + 2;
3721 best_cost = cost;
3724 /* Check if we can sign extend in r0 */
3725 if (insize < 8)
3727 cost = 3 + shift_insns[left];
3728 if (cost < best_cost)
3730 kind = 6;
3731 best_cost = cost;
3733 /* Try the same with a final signed shift. */
3734 if (left < 31)
3736 cost = 3 + ext_shift_insns[left + 1] + 1;
3737 if (cost < best_cost)
3739 kind = 7;
3740 best_cost = cost;
3744 if (TARGET_SH3)
3746 /* Try to use a dynamic shift. */
3747 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3748 if (cost < best_cost)
3750 kind = 0;
3751 best_cost = cost;
3754 if (costp)
3755 *costp = cost;
3756 return kind;
3759 /* Function to be used in the length attribute of the instructions
3760 implementing this pattern. */
3763 shl_sext_length (rtx insn)
3765 rtx set_src, left_rtx, size_rtx;
3766 int cost;
3768 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3769 left_rtx = XEXP (XEXP (set_src, 0), 1);
3770 size_rtx = XEXP (set_src, 1);
3771 shl_sext_kind (left_rtx, size_rtx, &cost);
3772 return cost;
3775 /* Generate rtl for this pattern */
3778 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3780 int kind;
3781 int left, size, insize, cost;
3782 rtx operands[3];
3784 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3785 left = INTVAL (left_rtx);
3786 size = INTVAL (size_rtx);
3787 insize = size - left;
3788 switch (kind)
3790 case 1:
3791 case 2:
3792 case 3:
3793 case 4:
3795 int ext = kind & 1 ? 8 : 16;
3796 int shift2 = size - ext;
3798 /* Don't expand fine-grained when combining, because that will
3799 make the pattern fail. */
3800 if (! currently_expanding_to_rtl
3801 && ! reload_in_progress && ! reload_completed)
3803 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3804 emit_insn (gen_movsi (dest, source));
3805 break;
3807 if (dest != source)
3808 emit_insn (gen_movsi (dest, source));
3809 operands[0] = dest;
3810 if (ext - insize)
3812 operands[2] = GEN_INT (ext - insize);
3813 gen_shifty_hi_op (ASHIFT, operands);
3815 emit_insn (kind & 1
3816 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3817 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3818 if (kind <= 2)
3820 if (shift2)
3822 operands[2] = GEN_INT (shift2);
3823 gen_shifty_op (ASHIFT, operands);
3826 else
3828 if (shift2 > 0)
3830 if (EXT_SHIFT_SIGNED (shift2))
3832 operands[2] = GEN_INT (shift2 + 1);
3833 gen_shifty_op (ASHIFT, operands);
3834 operands[2] = const1_rtx;
3835 gen_shifty_op (ASHIFTRT, operands);
3836 break;
3838 operands[2] = GEN_INT (shift2);
3839 gen_shifty_hi_op (ASHIFT, operands);
3841 else if (shift2)
3843 operands[2] = GEN_INT (-shift2);
3844 gen_shifty_hi_op (LSHIFTRT, operands);
3846 emit_insn (size <= 8
3847 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3848 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3850 break;
3852 case 5:
3854 int i = 16 - size;
3855 if (! currently_expanding_to_rtl
3856 && ! reload_in_progress && ! reload_completed)
3857 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3858 else
3860 operands[0] = dest;
3861 operands[2] = GEN_INT (16 - insize);
3862 gen_shifty_hi_op (ASHIFT, operands);
3863 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3865 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3866 while (--i >= 0)
3867 gen_ashift (ASHIFTRT, 1, dest);
3868 break;
3870 case 6:
3871 case 7:
3872 /* Don't expand fine-grained when combining, because that will
3873 make the pattern fail. */
3874 if (! currently_expanding_to_rtl
3875 && ! reload_in_progress && ! reload_completed)
3877 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3878 emit_insn (gen_movsi (dest, source));
3879 break;
3881 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3882 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3883 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3884 operands[0] = dest;
3885 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3886 gen_shifty_op (ASHIFT, operands);
3887 if (kind == 7)
3888 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3889 break;
3890 default:
3891 return -1;
3893 return 0;
3896 /* Prefix a symbol_ref name with "datalabel". */
3899 gen_datalabel_ref (rtx sym)
3901 const char *str;
3903 if (GET_CODE (sym) == LABEL_REF)
3904 return gen_rtx_CONST (GET_MODE (sym),
3905 gen_rtx_UNSPEC (GET_MODE (sym),
3906 gen_rtvec (1, sym),
3907 UNSPEC_DATALABEL));
3909 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3911 str = XSTR (sym, 0);
3912 /* Share all SYMBOL_REF strings with the same value - that is important
3913 for cse. */
3914 str = IDENTIFIER_POINTER (get_identifier (str));
3915 XSTR (sym, 0) = str;
3917 return sym;
3921 static alloc_pool label_ref_list_pool;
3923 typedef struct label_ref_list_d
3925 rtx label;
3926 struct label_ref_list_d *next;
3927 } *label_ref_list_t;
3929 /* The SH cannot load a large constant into a register, constants have to
3930 come from a pc relative load. The reference of a pc relative load
3931 instruction must be less than 1k in front of the instruction. This
3932 means that we often have to dump a constant inside a function, and
3933 generate code to branch around it.
3935 It is important to minimize this, since the branches will slow things
3936 down and make things bigger.
3938 Worst case code looks like:
3940 mov.l L1,rn
3941 bra L2
3943 align
3944 L1: .long value
3948 mov.l L3,rn
3949 bra L4
3951 align
3952 L3: .long value
3956 We fix this by performing a scan before scheduling, which notices which
3957 instructions need to have their operands fetched from the constant table
3958 and builds the table.
3960 The algorithm is:
3962 scan, find an instruction which needs a pcrel move. Look forward, find the
3963 last barrier which is within MAX_COUNT bytes of the requirement.
3964 If there isn't one, make one. Process all the instructions between
3965 the find and the barrier.
3967 In the above example, we can tell that L3 is within 1k of L1, so
3968 the first move can be shrunk from the 3 insn+constant sequence into
3969 just 1 insn, and the constant moved to L3 to make:
3971 mov.l L1,rn
3973 mov.l L3,rn
3974 bra L4
3976 align
3977 L3:.long value
3978 L4:.long value
3980 Then the second move becomes the target for the shortening process. */
3982 typedef struct
3984 rtx value; /* Value in table. */
3985 rtx label; /* Label of value. */
3986 label_ref_list_t wend; /* End of window. */
3987 enum machine_mode mode; /* Mode of value. */
3989 /* True if this constant is accessed as part of a post-increment
3990 sequence. Note that HImode constants are never accessed in this way. */
3991 bool part_of_sequence_p;
3992 } pool_node;
3994 /* The maximum number of constants that can fit into one pool, since
3995 constants in the range 0..510 are at least 2 bytes long, and in the
3996 range from there to 1018 at least 4 bytes. */
3998 #define MAX_POOL_SIZE 372
3999 static pool_node pool_vector[MAX_POOL_SIZE];
4000 static int pool_size;
4001 static rtx pool_window_label;
4002 static int pool_window_last;
4004 static int max_labelno_before_reorg;
4006 /* ??? If we need a constant in HImode which is the truncated value of a
4007 constant we need in SImode, we could combine the two entries thus saving
4008 two bytes. Is this common enough to be worth the effort of implementing
4009 it? */
4011 /* ??? This stuff should be done at the same time that we shorten branches.
4012 As it is now, we must assume that all branches are the maximum size, and
4013 this causes us to almost always output constant pools sooner than
4014 necessary. */
4016 /* Add a constant to the pool and return its label. */
4018 static rtx
4019 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4021 int i;
4022 rtx lab, new_rtx;
4023 label_ref_list_t ref, newref;
4025 /* First see if we've already got it. */
4026 for (i = 0; i < pool_size; i++)
4028 if (x->code == pool_vector[i].value->code
4029 && mode == pool_vector[i].mode)
4031 if (x->code == CODE_LABEL)
4033 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4034 continue;
4036 if (rtx_equal_p (x, pool_vector[i].value))
4038 lab = new_rtx = 0;
4039 if (! last_value
4040 || ! i
4041 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4043 new_rtx = gen_label_rtx ();
4044 LABEL_REFS (new_rtx) = pool_vector[i].label;
4045 pool_vector[i].label = lab = new_rtx;
4047 if (lab && pool_window_label)
4049 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4050 newref->label = pool_window_label;
4051 ref = pool_vector[pool_window_last].wend;
4052 newref->next = ref;
4053 pool_vector[pool_window_last].wend = newref;
4055 if (new_rtx)
4056 pool_window_label = new_rtx;
4057 pool_window_last = i;
4058 return lab;
4063 /* Need a new one. */
4064 pool_vector[pool_size].value = x;
4065 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4067 lab = 0;
4068 pool_vector[pool_size - 1].part_of_sequence_p = true;
4070 else
4071 lab = gen_label_rtx ();
4072 pool_vector[pool_size].mode = mode;
4073 pool_vector[pool_size].label = lab;
4074 pool_vector[pool_size].wend = NULL;
4075 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4076 if (lab && pool_window_label)
4078 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4079 newref->label = pool_window_label;
4080 ref = pool_vector[pool_window_last].wend;
4081 newref->next = ref;
4082 pool_vector[pool_window_last].wend = newref;
4084 if (lab)
4085 pool_window_label = lab;
4086 pool_window_last = pool_size;
4087 pool_size++;
4088 return lab;
4091 /* Output the literal table. START, if nonzero, is the first instruction
4092 this table is needed for, and also indicates that there is at least one
4093 casesi_worker_2 instruction; We have to emit the operand3 labels from
4094 these insns at a 4-byte aligned position. BARRIER is the barrier
4095 after which we are to place the table. */
4097 static void
4098 dump_table (rtx start, rtx barrier)
4100 rtx scan = barrier;
4101 int i;
4102 int need_align = 1;
4103 rtx lab;
4104 label_ref_list_t ref;
4105 int have_df = 0;
4107 /* Do two passes, first time dump out the HI sized constants. */
4109 for (i = 0; i < pool_size; i++)
4111 pool_node *p = &pool_vector[i];
4113 if (p->mode == HImode)
4115 if (need_align)
4117 scan = emit_insn_after (gen_align_2 (), scan);
4118 need_align = 0;
4120 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4121 scan = emit_label_after (lab, scan);
4122 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4123 scan);
4124 for (ref = p->wend; ref; ref = ref->next)
4126 lab = ref->label;
4127 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4130 else if (p->mode == DFmode)
4131 have_df = 1;
4134 need_align = 1;
4136 if (start)
4138 scan = emit_insn_after (gen_align_4 (), scan);
4139 need_align = 0;
4140 for (; start != barrier; start = NEXT_INSN (start))
4141 if (NONJUMP_INSN_P (start)
4142 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4144 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4145 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4147 scan = emit_label_after (lab, scan);
4150 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4152 rtx align_insn = NULL_RTX;
4154 scan = emit_label_after (gen_label_rtx (), scan);
4155 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4156 need_align = 0;
4158 for (i = 0; i < pool_size; i++)
4160 pool_node *p = &pool_vector[i];
4162 switch (p->mode)
4164 case HImode:
4165 break;
4166 case SImode:
4167 case SFmode:
4168 if (align_insn && !p->part_of_sequence_p)
4170 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4171 emit_label_before (lab, align_insn);
4172 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4173 align_insn);
4174 for (ref = p->wend; ref; ref = ref->next)
4176 lab = ref->label;
4177 emit_insn_before (gen_consttable_window_end (lab),
4178 align_insn);
4180 delete_insn (align_insn);
4181 align_insn = NULL_RTX;
4182 continue;
4184 else
4186 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4187 scan = emit_label_after (lab, scan);
4188 scan = emit_insn_after (gen_consttable_4 (p->value,
4189 const0_rtx), scan);
4190 need_align = ! need_align;
4192 break;
4193 case DFmode:
4194 if (need_align)
4196 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4197 align_insn = scan;
4198 need_align = 0;
4200 case DImode:
4201 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4202 scan = emit_label_after (lab, scan);
4203 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4204 scan);
4205 break;
4206 default:
4207 gcc_unreachable ();
4210 if (p->mode != HImode)
4212 for (ref = p->wend; ref; ref = ref->next)
4214 lab = ref->label;
4215 scan = emit_insn_after (gen_consttable_window_end (lab),
4216 scan);
4221 pool_size = 0;
4224 for (i = 0; i < pool_size; i++)
4226 pool_node *p = &pool_vector[i];
4228 switch (p->mode)
4230 case HImode:
4231 break;
4232 case SImode:
4233 case SFmode:
4234 if (need_align)
4236 need_align = 0;
4237 scan = emit_label_after (gen_label_rtx (), scan);
4238 scan = emit_insn_after (gen_align_4 (), scan);
4240 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4241 scan = emit_label_after (lab, scan);
4242 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4243 scan);
4244 break;
4245 case DFmode:
4246 case DImode:
4247 if (need_align)
4249 need_align = 0;
4250 scan = emit_label_after (gen_label_rtx (), scan);
4251 scan = emit_insn_after (gen_align_4 (), scan);
4253 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4254 scan = emit_label_after (lab, scan);
4255 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4256 scan);
4257 break;
4258 default:
4259 gcc_unreachable ();
4262 if (p->mode != HImode)
4264 for (ref = p->wend; ref; ref = ref->next)
4266 lab = ref->label;
4267 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4272 scan = emit_insn_after (gen_consttable_end (), scan);
4273 scan = emit_barrier_after (scan);
4274 pool_size = 0;
4275 pool_window_label = NULL_RTX;
4276 pool_window_last = 0;
4279 /* Return nonzero if constant would be an ok source for a
4280 mov.w instead of a mov.l. */
4282 static int
4283 hi_const (rtx src)
4285 return (CONST_INT_P (src)
4286 && INTVAL (src) >= -32768
4287 && INTVAL (src) <= 32767);
4290 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4292 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4294 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4295 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4296 need to fix it if the input value is CONST_OK_FOR_I08. */
4298 static int
4299 broken_move (rtx insn)
4301 if (NONJUMP_INSN_P (insn))
4303 rtx pat = PATTERN (insn);
4304 if (GET_CODE (pat) == PARALLEL)
4305 pat = XVECEXP (pat, 0, 0);
4306 if (GET_CODE (pat) == SET
4307 /* We can load any 8-bit value if we don't care what the high
4308 order bits end up as. */
4309 && GET_MODE (SET_DEST (pat)) != QImode
4310 && (CONSTANT_P (SET_SRC (pat))
4311 /* Match mova_const. */
4312 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4313 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4314 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4315 && ! (TARGET_SH2E
4316 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4317 && (fp_zero_operand (SET_SRC (pat))
4318 || fp_one_operand (SET_SRC (pat)))
4319 /* In general we don't know the current setting of fpscr, so disable fldi.
4320 There is an exception if this was a register-register move
4321 before reload - and hence it was ascertained that we have
4322 single precision setting - and in a post-reload optimization
4323 we changed this to do a constant load. In that case
4324 we don't have an r0 clobber, hence we must use fldi. */
4325 && (TARGET_FMOVD
4326 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4327 == SCRATCH))
4328 && REG_P (SET_DEST (pat))
4329 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4330 && ! (TARGET_SH2A
4331 && GET_MODE (SET_DEST (pat)) == SImode
4332 && (satisfies_constraint_I20 (SET_SRC (pat))
4333 || satisfies_constraint_I28 (SET_SRC (pat))))
4334 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4335 return 1;
4338 return 0;
4341 static int
4342 mova_p (rtx insn)
4344 return (NONJUMP_INSN_P (insn)
4345 && GET_CODE (PATTERN (insn)) == SET
4346 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4347 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4348 /* Don't match mova_const. */
4349 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4352 /* Fix up a mova from a switch that went out of range. */
4353 static void
4354 fixup_mova (rtx mova)
4356 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4357 if (! flag_pic)
4359 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4360 INSN_CODE (mova) = -1;
4362 else
4364 rtx worker = mova;
4365 rtx lab = gen_label_rtx ();
4366 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4370 worker = NEXT_INSN (worker);
4371 gcc_assert (worker
4372 && !LABEL_P (worker)
4373 && !JUMP_P (worker));
4374 } while (NOTE_P (worker)
4375 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4376 wpat = PATTERN (worker);
4377 wpat0 = XVECEXP (wpat, 0, 0);
4378 wpat1 = XVECEXP (wpat, 0, 1);
4379 wsrc = SET_SRC (wpat0);
4380 PATTERN (worker) = (gen_casesi_worker_2
4381 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4382 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4383 XEXP (wpat1, 0)));
4384 INSN_CODE (worker) = -1;
4385 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4386 base = gen_rtx_LABEL_REF (Pmode, lab);
4387 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4388 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4389 INSN_CODE (mova) = -1;
4393 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4394 *num_mova, and check if the new mova is not nested within the first one.
4395 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4396 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4397 static int
4398 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4400 int n_addr = 0; /* Initialization to shut up spurious warning. */
4401 int f_target, n_target = 0; /* Likewise. */
4403 if (optimize)
4405 /* If NEW_MOVA has no address yet, it will be handled later. */
4406 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4407 return -1;
4409 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4410 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4411 if (n_addr > n_target || n_addr + 1022 < n_target)
4413 /* Change the mova into a load.
4414 broken_move will then return true for it. */
4415 fixup_mova (new_mova);
4416 return 1;
4419 if (!(*num_mova)++)
4421 *first_mova = new_mova;
4422 return 2;
4424 if (!optimize
4425 || ((f_target
4426 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4427 >= n_target))
4428 return -1;
4430 (*num_mova)--;
4431 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4432 > n_target - n_addr)
4434 fixup_mova (*first_mova);
4435 return 0;
4437 else
4439 fixup_mova (new_mova);
4440 return 1;
4444 /* Find the last barrier from insn FROM which is close enough to hold the
4445 constant pool. If we can't find one, then create one near the end of
4446 the range. */
4448 static rtx
4449 find_barrier (int num_mova, rtx mova, rtx from)
4451 int count_si = 0;
4452 int count_hi = 0;
4453 int found_hi = 0;
4454 int found_si = 0;
4455 int found_di = 0;
4456 int hi_align = 2;
4457 int si_align = 2;
4458 int leading_mova = num_mova;
4459 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4460 int si_limit;
4461 int hi_limit;
4462 rtx orig = from;
4463 rtx last_got = NULL_RTX;
4464 rtx last_symoff = NULL_RTX;
4466 /* For HImode: range is 510, add 4 because pc counts from address of
4467 second instruction after this one, subtract 2 for the jump instruction
4468 that we may need to emit before the table, subtract 2 for the instruction
4469 that fills the jump delay slot (in very rare cases, reorg will take an
4470 instruction from after the constant pool or will leave the delay slot
4471 empty). This gives 510.
4472 For SImode: range is 1020, add 4 because pc counts from address of
4473 second instruction after this one, subtract 2 in case pc is 2 byte
4474 aligned, subtract 2 for the jump instruction that we may need to emit
4475 before the table, subtract 2 for the instruction that fills the jump
4476 delay slot. This gives 1018. */
4478 /* The branch will always be shortened now that the reference address for
4479 forward branches is the successor address, thus we need no longer make
4480 adjustments to the [sh]i_limit for -O0. */
4482 si_limit = 1018;
4483 hi_limit = 510;
4485 while (from && count_si < si_limit && count_hi < hi_limit)
4487 int inc = get_attr_length (from);
4488 int new_align = 1;
4490 /* If this is a label that existed at the time of the compute_alignments
4491 call, determine the alignment. N.B. When find_barrier recurses for
4492 an out-of-reach mova, we might see labels at the start of previously
4493 inserted constant tables. */
4494 if (LABEL_P (from)
4495 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4497 if (optimize)
4498 new_align = 1 << label_to_alignment (from);
4499 else if (BARRIER_P (prev_nonnote_insn (from)))
4500 new_align = 1 << barrier_align (from);
4501 else
4502 new_align = 1;
4503 inc = 0;
4505 /* In case we are scanning a constant table because of recursion, check
4506 for explicit alignments. If the table is long, we might be forced
4507 to emit the new table in front of it; the length of the alignment
4508 might be the last straw. */
4509 else if (NONJUMP_INSN_P (from)
4510 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4511 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4512 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4513 /* When we find the end of a constant table, paste the new constant
4514 at the end. That is better than putting it in front because
4515 this way, we don't need extra alignment for adding a 4-byte-aligned
4516 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4517 else if (NONJUMP_INSN_P (from)
4518 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4519 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4520 return from;
4522 if (BARRIER_P (from))
4524 rtx next;
4526 found_barrier = from;
4528 /* If we are at the end of the function, or in front of an alignment
4529 instruction, we need not insert an extra alignment. We prefer
4530 this kind of barrier. */
4531 if (barrier_align (from) > 2)
4532 good_barrier = from;
4534 /* If we are at the end of a hot/cold block, dump the constants
4535 here. */
4536 next = NEXT_INSN (from);
4537 if (next
4538 && NOTE_P (next)
4539 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4540 break;
4543 if (broken_move (from))
4545 rtx pat, src, dst;
4546 enum machine_mode mode;
4548 pat = PATTERN (from);
4549 if (GET_CODE (pat) == PARALLEL)
4550 pat = XVECEXP (pat, 0, 0);
4551 src = SET_SRC (pat);
4552 dst = SET_DEST (pat);
4553 mode = GET_MODE (dst);
4555 /* GOT pcrelat setting comes in pair of
4556 mova .L8,r0
4557 mov.l .L8,r12
4558 instructions. (plus add r0,r12).
4559 Remember if we see one without the other. */
4560 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4561 last_got = last_got ? NULL_RTX : from;
4562 else if (PIC_ADDR_P (src))
4563 last_got = last_got ? NULL_RTX : from;
4565 /* We must explicitly check the mode, because sometimes the
4566 front end will generate code to load unsigned constants into
4567 HImode targets without properly sign extending them. */
4568 if (mode == HImode
4569 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4571 found_hi += 2;
4572 /* We put the short constants before the long constants, so
4573 we must count the length of short constants in the range
4574 for the long constants. */
4575 /* ??? This isn't optimal, but is easy to do. */
4576 si_limit -= 2;
4578 else
4580 /* We dump DF/DI constants before SF/SI ones, because
4581 the limit is the same, but the alignment requirements
4582 are higher. We may waste up to 4 additional bytes
4583 for alignment, and the DF/DI constant may have
4584 another SF/SI constant placed before it. */
4585 if (TARGET_SHCOMPACT
4586 && ! found_di
4587 && (mode == DFmode || mode == DImode))
4589 found_di = 1;
4590 si_limit -= 8;
4592 while (si_align > 2 && found_si + si_align - 2 > count_si)
4593 si_align >>= 1;
4594 if (found_si > count_si)
4595 count_si = found_si;
4596 found_si += GET_MODE_SIZE (mode);
4597 if (num_mova)
4598 si_limit -= GET_MODE_SIZE (mode);
4602 if (mova_p (from))
4604 switch (untangle_mova (&num_mova, &mova, from))
4606 case 1:
4607 if (flag_pic)
4609 rtx src = SET_SRC (PATTERN (from));
4610 if (GET_CODE (src) == CONST
4611 && GET_CODE (XEXP (src, 0)) == UNSPEC
4612 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4613 last_symoff = from;
4615 break;
4616 case 0: return find_barrier (0, 0, mova);
4617 case 2:
4619 leading_mova = 0;
4620 barrier_before_mova
4621 = good_barrier ? good_barrier : found_barrier;
4623 default: break;
4625 if (found_si > count_si)
4626 count_si = found_si;
4628 else if (JUMP_TABLE_DATA_P (from))
4630 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4631 || (num_mova
4632 && (prev_nonnote_insn (from)
4633 == XEXP (MOVA_LABELREF (mova), 0))))
4634 num_mova--;
4635 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4637 /* We have just passed the barrier in front of the
4638 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4639 the ADDR_DIFF_VEC is accessed as data, just like our pool
4640 constants, this is a good opportunity to accommodate what
4641 we have gathered so far.
4642 If we waited any longer, we could end up at a barrier in
4643 front of code, which gives worse cache usage for separated
4644 instruction / data caches. */
4645 good_barrier = found_barrier;
4646 break;
4648 else
4650 rtx body = PATTERN (from);
4651 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4654 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4655 else if (JUMP_P (from)
4656 && ! TARGET_SH2
4657 && ! optimize_size)
4658 new_align = 4;
4660 /* There is a possibility that a bf is transformed into a bf/s by the
4661 delay slot scheduler. */
4662 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4663 && get_attr_type (from) == TYPE_CBRANCH
4664 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4665 inc += 2;
4667 if (found_si)
4669 count_si += inc;
4670 if (new_align > si_align)
4672 si_limit -= (count_si - 1) & (new_align - si_align);
4673 si_align = new_align;
4675 count_si = (count_si + new_align - 1) & -new_align;
4677 if (found_hi)
4679 count_hi += inc;
4680 if (new_align > hi_align)
4682 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4683 hi_align = new_align;
4685 count_hi = (count_hi + new_align - 1) & -new_align;
4687 from = NEXT_INSN (from);
4690 if (num_mova)
4692 if (leading_mova)
4694 /* Try as we might, the leading mova is out of range. Change
4695 it into a load (which will become a pcload) and retry. */
4696 fixup_mova (mova);
4697 return find_barrier (0, 0, mova);
4699 else
4701 /* Insert the constant pool table before the mova instruction,
4702 to prevent the mova label reference from going out of range. */
4703 from = mova;
4704 good_barrier = found_barrier = barrier_before_mova;
4708 if (found_barrier)
4710 if (good_barrier && next_real_insn (found_barrier))
4711 found_barrier = good_barrier;
4713 else
4715 /* We didn't find a barrier in time to dump our stuff,
4716 so we'll make one. */
4717 rtx label = gen_label_rtx ();
4719 /* Don't emit a constant table in the middle of insns for
4720 casesi_worker_2. This is a bit overkill but is enough
4721 because casesi_worker_2 wouldn't appear so frequently. */
4722 if (last_symoff)
4723 from = last_symoff;
4725 /* If we exceeded the range, then we must back up over the last
4726 instruction we looked at. Otherwise, we just need to undo the
4727 NEXT_INSN at the end of the loop. */
4728 if (PREV_INSN (from) != orig
4729 && (count_hi > hi_limit || count_si > si_limit))
4730 from = PREV_INSN (PREV_INSN (from));
4731 else
4732 from = PREV_INSN (from);
4734 /* Don't emit a constant table int the middle of global pointer setting,
4735 since that that would move the addressing base GOT into another table.
4736 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4737 in the pool anyway, so just move up the whole constant pool. */
4738 if (last_got)
4739 from = PREV_INSN (last_got);
4741 /* Don't insert the constant pool table at the position which
4742 may be the landing pad. */
4743 if (flag_exceptions
4744 && CALL_P (from)
4745 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4746 from = PREV_INSN (from);
4748 /* Walk back to be just before any jump or label.
4749 Putting it before a label reduces the number of times the branch
4750 around the constant pool table will be hit. Putting it before
4751 a jump makes it more likely that the bra delay slot will be
4752 filled. */
4753 while (NOTE_P (from) || JUMP_P (from)
4754 || LABEL_P (from))
4755 from = PREV_INSN (from);
4757 from = emit_jump_insn_after (gen_jump (label), from);
4758 JUMP_LABEL (from) = label;
4759 LABEL_NUSES (label) = 1;
4760 found_barrier = emit_barrier_after (from);
4761 emit_label_after (label, found_barrier);
4764 return found_barrier;
4767 /* If the instruction INSN is implemented by a special function, and we can
4768 positively find the register that is used to call the sfunc, and this
4769 register is not used anywhere else in this instruction - except as the
4770 destination of a set, return this register; else, return 0. */
4772 sfunc_uses_reg (rtx insn)
4774 int i;
4775 rtx pattern, part, reg_part, reg;
4777 if (!NONJUMP_INSN_P (insn))
4778 return 0;
4779 pattern = PATTERN (insn);
4780 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4781 return 0;
4783 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4785 part = XVECEXP (pattern, 0, i);
4786 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4787 reg_part = part;
4789 if (! reg_part)
4790 return 0;
4791 reg = XEXP (reg_part, 0);
4792 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4794 part = XVECEXP (pattern, 0, i);
4795 if (part == reg_part || GET_CODE (part) == CLOBBER)
4796 continue;
4797 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4798 && REG_P (SET_DEST (part)))
4799 ? SET_SRC (part) : part)))
4800 return 0;
4802 return reg;
4805 /* See if the only way in which INSN uses REG is by calling it, or by
4806 setting it while calling it. Set *SET to a SET rtx if the register
4807 is set by INSN. */
4809 static int
4810 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4812 rtx pattern, reg2;
4814 *set = NULL_RTX;
4816 reg2 = sfunc_uses_reg (insn);
4817 if (reg2 && REGNO (reg2) == REGNO (reg))
4819 pattern = single_set (insn);
4820 if (pattern
4821 && REG_P (SET_DEST (pattern))
4822 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4823 *set = pattern;
4824 return 0;
4826 if (!CALL_P (insn))
4828 /* We don't use rtx_equal_p because we don't care if the mode is
4829 different. */
4830 pattern = single_set (insn);
4831 if (pattern
4832 && REG_P (SET_DEST (pattern))
4833 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4835 rtx par, part;
4836 int i;
4838 *set = pattern;
4839 par = PATTERN (insn);
4840 if (GET_CODE (par) == PARALLEL)
4841 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4843 part = XVECEXP (par, 0, i);
4844 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4845 return 1;
4847 return reg_mentioned_p (reg, SET_SRC (pattern));
4850 return 1;
4853 pattern = PATTERN (insn);
4855 if (GET_CODE (pattern) == PARALLEL)
4857 int i;
4859 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4860 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4861 return 1;
4862 pattern = XVECEXP (pattern, 0, 0);
4865 if (GET_CODE (pattern) == SET)
4867 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4869 /* We don't use rtx_equal_p, because we don't care if the
4870 mode is different. */
4871 if (!REG_P (SET_DEST (pattern))
4872 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4873 return 1;
4875 *set = pattern;
4878 pattern = SET_SRC (pattern);
4881 if (GET_CODE (pattern) != CALL
4882 || !MEM_P (XEXP (pattern, 0))
4883 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4884 return 1;
4886 return 0;
4889 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4890 general registers. Bits 0..15 mean that the respective registers
4891 are used as inputs in the instruction. Bits 16..31 mean that the
4892 registers 0..15, respectively, are used as outputs, or are clobbered.
4893 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4895 regs_used (rtx x, int is_dest)
4897 enum rtx_code code;
4898 const char *fmt;
4899 int i, used = 0;
4901 if (! x)
4902 return used;
4903 code = GET_CODE (x);
4904 switch (code)
4906 case REG:
4907 if (REGNO (x) < 16)
4908 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4909 << (REGNO (x) + is_dest));
4910 return 0;
4911 case SUBREG:
4913 rtx y = SUBREG_REG (x);
4915 if (!REG_P (y))
4916 break;
4917 if (REGNO (y) < 16)
4918 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4919 << (REGNO (y) +
4920 subreg_regno_offset (REGNO (y),
4921 GET_MODE (y),
4922 SUBREG_BYTE (x),
4923 GET_MODE (x)) + is_dest));
4924 return 0;
4926 case SET:
4927 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4928 case RETURN:
4929 /* If there was a return value, it must have been indicated with USE. */
4930 return 0x00ffff00;
4931 case CLOBBER:
4932 is_dest = 1;
4933 break;
4934 case MEM:
4935 is_dest = 0;
4936 break;
4937 case CALL:
4938 used |= 0x00ff00f0;
4939 break;
4940 default:
4941 break;
4944 fmt = GET_RTX_FORMAT (code);
4946 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4948 if (fmt[i] == 'E')
4950 register int j;
4951 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4952 used |= regs_used (XVECEXP (x, i, j), is_dest);
4954 else if (fmt[i] == 'e')
4955 used |= regs_used (XEXP (x, i), is_dest);
4957 return used;
4960 /* Create an instruction that prevents redirection of a conditional branch
4961 to the destination of the JUMP with address ADDR.
4962 If the branch needs to be implemented as an indirect jump, try to find
4963 a scratch register for it.
4964 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4965 If any preceding insn that doesn't fit into a delay slot is good enough,
4966 pass 1. Pass 2 if a definite blocking insn is needed.
4967 -1 is used internally to avoid deep recursion.
4968 If a blocking instruction is made or recognized, return it. */
4970 static rtx
4971 gen_block_redirect (rtx jump, int addr, int need_block)
4973 int dead = 0;
4974 rtx prev = prev_nonnote_insn (jump);
4975 rtx dest;
4977 /* First, check if we already have an instruction that satisfies our need. */
4978 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4980 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4981 return prev;
4982 if (GET_CODE (PATTERN (prev)) == USE
4983 || GET_CODE (PATTERN (prev)) == CLOBBER
4984 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4985 prev = jump;
4986 else if ((need_block &= ~1) < 0)
4987 return prev;
4988 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4989 need_block = 0;
4991 if (GET_CODE (PATTERN (jump)) == RETURN)
4993 if (! need_block)
4994 return prev;
4995 /* Reorg even does nasty things with return insns that cause branches
4996 to go out of range - see find_end_label and callers. */
4997 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4999 /* We can't use JUMP_LABEL here because it might be undefined
5000 when not optimizing. */
5001 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5002 /* If the branch is out of range, try to find a scratch register for it. */
5003 if (optimize
5004 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5005 > 4092 + 4098))
5007 rtx scan;
5008 /* Don't look for the stack pointer as a scratch register,
5009 it would cause trouble if an interrupt occurred. */
5010 unsigned attempt = 0x7fff, used;
5011 int jump_left = flag_expensive_optimizations + 1;
5013 /* It is likely that the most recent eligible instruction is wanted for
5014 the delay slot. Therefore, find out which registers it uses, and
5015 try to avoid using them. */
5017 for (scan = jump; (scan = PREV_INSN (scan)); )
5019 enum rtx_code code;
5021 if (INSN_DELETED_P (scan))
5022 continue;
5023 code = GET_CODE (scan);
5024 if (code == CODE_LABEL || code == JUMP_INSN)
5025 break;
5026 if (code == INSN
5027 && GET_CODE (PATTERN (scan)) != USE
5028 && GET_CODE (PATTERN (scan)) != CLOBBER
5029 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5031 attempt &= ~regs_used (PATTERN (scan), 0);
5032 break;
5035 for (used = dead = 0, scan = JUMP_LABEL (jump);
5036 (scan = NEXT_INSN (scan)); )
5038 enum rtx_code code;
5040 if (INSN_DELETED_P (scan))
5041 continue;
5042 code = GET_CODE (scan);
5043 if (INSN_P (scan))
5045 used |= regs_used (PATTERN (scan), 0);
5046 if (code == CALL_INSN)
5047 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5048 dead |= (used >> 16) & ~used;
5049 if (dead & attempt)
5051 dead &= attempt;
5052 break;
5054 if (code == JUMP_INSN)
5056 if (jump_left-- && simplejump_p (scan))
5057 scan = JUMP_LABEL (scan);
5058 else
5059 break;
5063 /* Mask out the stack pointer again, in case it was
5064 the only 'free' register we have found. */
5065 dead &= 0x7fff;
5067 /* If the immediate destination is still in range, check for possible
5068 threading with a jump beyond the delay slot insn.
5069 Don't check if we are called recursively; the jump has been or will be
5070 checked in a different invocation then. */
5072 else if (optimize && need_block >= 0)
5074 rtx next = next_active_insn (next_active_insn (dest));
5075 if (next && JUMP_P (next)
5076 && GET_CODE (PATTERN (next)) == SET
5077 && recog_memoized (next) == CODE_FOR_jump_compact)
5079 dest = JUMP_LABEL (next);
5080 if (dest
5081 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5082 > 4092 + 4098))
5083 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5087 if (dead)
5089 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5091 /* It would be nice if we could convert the jump into an indirect
5092 jump / far branch right now, and thus exposing all constituent
5093 instructions to further optimization. However, reorg uses
5094 simplejump_p to determine if there is an unconditional jump where
5095 it should try to schedule instructions from the target of the
5096 branch; simplejump_p fails for indirect jumps even if they have
5097 a JUMP_LABEL. */
5098 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5099 (reg, GEN_INT (unspec_bbr_uid++)),
5100 jump);
5101 /* ??? We would like this to have the scope of the jump, but that
5102 scope will change when a delay slot insn of an inner scope is added.
5103 Hence, after delay slot scheduling, we'll have to expect
5104 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5105 the jump. */
5107 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5108 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5109 return insn;
5111 else if (need_block)
5112 /* We can't use JUMP_LABEL here because it might be undefined
5113 when not optimizing. */
5114 return emit_insn_before (gen_block_branch_redirect
5115 (GEN_INT (unspec_bbr_uid++)),
5116 jump);
5117 return prev;
5120 #define CONDJUMP_MIN -252
5121 #define CONDJUMP_MAX 262
5122 struct far_branch
5124 /* A label (to be placed) in front of the jump
5125 that jumps to our ultimate destination. */
5126 rtx near_label;
5127 /* Where we are going to insert it if we cannot move the jump any farther,
5128 or the jump itself if we have picked up an existing jump. */
5129 rtx insert_place;
5130 /* The ultimate destination. */
5131 rtx far_label;
5132 struct far_branch *prev;
5133 /* If the branch has already been created, its address;
5134 else the address of its first prospective user. */
5135 int address;
5138 static void gen_far_branch (struct far_branch *);
5139 enum mdep_reorg_phase_e mdep_reorg_phase;
5140 static void
5141 gen_far_branch (struct far_branch *bp)
5143 rtx insn = bp->insert_place;
5144 rtx jump;
5145 rtx label = gen_label_rtx ();
5146 int ok;
5148 emit_label_after (label, insn);
5149 if (bp->far_label)
5151 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5152 LABEL_NUSES (bp->far_label)++;
5154 else
5155 jump = emit_jump_insn_after (gen_return (), insn);
5156 /* Emit a barrier so that reorg knows that any following instructions
5157 are not reachable via a fall-through path.
5158 But don't do this when not optimizing, since we wouldn't suppress the
5159 alignment for the barrier then, and could end up with out-of-range
5160 pc-relative loads. */
5161 if (optimize)
5162 emit_barrier_after (jump);
5163 emit_label_after (bp->near_label, insn);
5164 JUMP_LABEL (jump) = bp->far_label;
5165 ok = invert_jump (insn, label, 1);
5166 gcc_assert (ok);
5168 /* If we are branching around a jump (rather than a return), prevent
5169 reorg from using an insn from the jump target as the delay slot insn -
5170 when reorg did this, it pessimized code (we rather hide the delay slot)
5171 and it could cause branches to go out of range. */
5172 if (bp->far_label)
5173 (emit_insn_after
5174 (gen_stuff_delay_slot
5175 (GEN_INT (unspec_bbr_uid++),
5176 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5177 insn));
5178 /* Prevent reorg from undoing our splits. */
5179 gen_block_redirect (jump, bp->address += 2, 2);
5182 /* Fix up ADDR_DIFF_VECs. */
5183 void
5184 fixup_addr_diff_vecs (rtx first)
5186 rtx insn;
5188 for (insn = first; insn; insn = NEXT_INSN (insn))
5190 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5192 if (!JUMP_P (insn)
5193 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5194 continue;
5195 pat = PATTERN (insn);
5196 vec_lab = XEXP (XEXP (pat, 0), 0);
5198 /* Search the matching casesi_jump_2. */
5199 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5201 if (!JUMP_P (prev))
5202 continue;
5203 prevpat = PATTERN (prev);
5204 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5205 continue;
5206 x = XVECEXP (prevpat, 0, 1);
5207 if (GET_CODE (x) != USE)
5208 continue;
5209 x = XEXP (x, 0);
5210 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5211 break;
5213 /* FIXME: This is a bug in the optimizer, but it seems harmless
5214 to just avoid panicing. */
5215 if (!prev)
5216 continue;
5218 /* Emit the reference label of the braf where it belongs, right after
5219 the casesi_jump_2 (i.e. braf). */
5220 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5221 emit_label_after (braf_label, prev);
5223 /* Fix up the ADDR_DIF_VEC to be relative
5224 to the reference address of the braf. */
5225 XEXP (XEXP (pat, 0), 0) = braf_label;
5229 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5230 a barrier. Return the base 2 logarithm of the desired alignment. */
5232 barrier_align (rtx barrier_or_label)
5234 rtx next = next_real_insn (barrier_or_label), pat, prev;
5235 int slot, credit, jump_to_next = 0;
5237 if (! next)
5238 return 0;
5240 pat = PATTERN (next);
5242 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5243 return 2;
5245 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5246 /* This is a barrier in front of a constant table. */
5247 return 0;
5249 prev = prev_real_insn (barrier_or_label);
5250 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5252 pat = PATTERN (prev);
5253 /* If this is a very small table, we want to keep the alignment after
5254 the table to the minimum for proper code alignment. */
5255 return ((optimize_size
5256 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5257 <= (unsigned) 1 << (CACHE_LOG - 2)))
5258 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5261 if (optimize_size)
5262 return 0;
5264 if (! TARGET_SH2 || ! optimize)
5265 return align_jumps_log;
5267 /* When fixing up pcloads, a constant table might be inserted just before
5268 the basic block that ends with the barrier. Thus, we can't trust the
5269 instruction lengths before that. */
5270 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5272 /* Check if there is an immediately preceding branch to the insn beyond
5273 the barrier. We must weight the cost of discarding useful information
5274 from the current cache line when executing this branch and there is
5275 an alignment, against that of fetching unneeded insn in front of the
5276 branch target when there is no alignment. */
5278 /* There are two delay_slot cases to consider. One is the simple case
5279 where the preceding branch is to the insn beyond the barrier (simple
5280 delay slot filling), and the other is where the preceding branch has
5281 a delay slot that is a duplicate of the insn after the barrier
5282 (fill_eager_delay_slots) and the branch is to the insn after the insn
5283 after the barrier. */
5285 /* PREV is presumed to be the JUMP_INSN for the barrier under
5286 investigation. Skip to the insn before it. */
5287 prev = prev_real_insn (prev);
5289 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5290 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5291 prev = prev_real_insn (prev))
5293 jump_to_next = 0;
5294 if (GET_CODE (PATTERN (prev)) == USE
5295 || GET_CODE (PATTERN (prev)) == CLOBBER)
5296 continue;
5297 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5299 prev = XVECEXP (PATTERN (prev), 0, 1);
5300 if (INSN_UID (prev) == INSN_UID (next))
5302 /* Delay slot was filled with insn at jump target. */
5303 jump_to_next = 1;
5304 continue;
5308 if (slot &&
5309 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5310 slot = 0;
5311 credit -= get_attr_length (prev);
5313 if (prev
5314 && JUMP_P (prev)
5315 && JUMP_LABEL (prev))
5317 rtx x;
5318 if (jump_to_next
5319 || next_real_insn (JUMP_LABEL (prev)) == next
5320 /* If relax_delay_slots() decides NEXT was redundant
5321 with some previous instruction, it will have
5322 redirected PREV's jump to the following insn. */
5323 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5324 /* There is no upper bound on redundant instructions
5325 that might have been skipped, but we must not put an
5326 alignment where none had been before. */
5327 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5328 (INSN_P (x)
5329 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5330 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5331 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5333 rtx pat = PATTERN (prev);
5334 if (GET_CODE (pat) == PARALLEL)
5335 pat = XVECEXP (pat, 0, 0);
5336 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5337 return 0;
5342 return align_jumps_log;
5345 /* If we are inside a phony loop, almost any kind of label can turn up as the
5346 first one in the loop. Aligning a braf label causes incorrect switch
5347 destination addresses; we can detect braf labels because they are
5348 followed by a BARRIER.
5349 Applying loop alignment to small constant or switch tables is a waste
5350 of space, so we suppress this too. */
5352 sh_loop_align (rtx label)
5354 rtx next = label;
5357 next = next_nonnote_insn (next);
5358 while (next && LABEL_P (next));
5360 if (! next
5361 || ! INSN_P (next)
5362 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5363 || recog_memoized (next) == CODE_FOR_consttable_2)
5364 return 0;
5366 return align_loops_log;
5369 /* Do a final pass over the function, just before delayed branch
5370 scheduling. */
5372 static void
5373 sh_reorg (void)
5375 rtx first, insn, mova = NULL_RTX;
5376 int num_mova;
5377 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5378 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5380 first = get_insns ();
5381 max_labelno_before_reorg = max_label_num ();
5383 /* We must split call insns before introducing `mova's. If we're
5384 optimizing, they'll have already been split. Otherwise, make
5385 sure we don't split them too late. */
5386 if (! optimize)
5387 split_all_insns_noflow ();
5389 if (TARGET_SHMEDIA)
5390 return;
5392 /* If relaxing, generate pseudo-ops to associate function calls with
5393 the symbols they call. It does no harm to not generate these
5394 pseudo-ops. However, when we can generate them, it enables to
5395 linker to potentially relax the jsr to a bsr, and eliminate the
5396 register load and, possibly, the constant pool entry. */
5398 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5399 if (TARGET_RELAX)
5401 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5402 own purposes. This works because none of the remaining passes
5403 need to look at them.
5405 ??? But it may break in the future. We should use a machine
5406 dependent REG_NOTE, or some other approach entirely. */
5407 for (insn = first; insn; insn = NEXT_INSN (insn))
5409 if (INSN_P (insn))
5411 rtx note;
5413 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5414 NULL_RTX)) != 0)
5415 remove_note (insn, note);
5419 for (insn = first; insn; insn = NEXT_INSN (insn))
5421 rtx pattern, reg, link, set, scan, dies, label;
5422 int rescan = 0, foundinsn = 0;
5424 if (CALL_P (insn))
5426 pattern = PATTERN (insn);
5428 if (GET_CODE (pattern) == PARALLEL)
5429 pattern = XVECEXP (pattern, 0, 0);
5430 if (GET_CODE (pattern) == SET)
5431 pattern = SET_SRC (pattern);
5433 if (GET_CODE (pattern) != CALL
5434 || !MEM_P (XEXP (pattern, 0)))
5435 continue;
5437 reg = XEXP (XEXP (pattern, 0), 0);
5439 else
5441 reg = sfunc_uses_reg (insn);
5442 if (! reg)
5443 continue;
5446 if (!REG_P (reg))
5447 continue;
5449 /* Try scanning backward to find where the register is set. */
5450 link = NULL;
5451 for (scan = PREV_INSN (insn);
5452 scan && !LABEL_P (scan);
5453 scan = PREV_INSN (scan))
5455 if (! INSN_P (scan))
5456 continue;
5458 if (! reg_mentioned_p (reg, scan))
5459 continue;
5461 if (noncall_uses_reg (reg, scan, &set))
5462 break;
5464 if (set)
5466 link = scan;
5467 break;
5471 if (! link)
5472 continue;
5474 /* The register is set at LINK. */
5476 /* We can only optimize the function call if the register is
5477 being set to a symbol. In theory, we could sometimes
5478 optimize calls to a constant location, but the assembler
5479 and linker do not support that at present. */
5480 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5481 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5482 continue;
5484 /* Scan forward from LINK to the place where REG dies, and
5485 make sure that the only insns which use REG are
5486 themselves function calls. */
5488 /* ??? This doesn't work for call targets that were allocated
5489 by reload, since there may not be a REG_DEAD note for the
5490 register. */
5492 dies = NULL_RTX;
5493 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5495 rtx scanset;
5497 /* Don't try to trace forward past a CODE_LABEL if we haven't
5498 seen INSN yet. Ordinarily, we will only find the setting insn
5499 if it is in the same basic block. However,
5500 cross-jumping can insert code labels in between the load and
5501 the call, and can result in situations where a single call
5502 insn may have two targets depending on where we came from. */
5504 if (LABEL_P (scan) && ! foundinsn)
5505 break;
5507 if (! INSN_P (scan))
5508 continue;
5510 /* Don't try to trace forward past a JUMP. To optimize
5511 safely, we would have to check that all the
5512 instructions at the jump destination did not use REG. */
5514 if (JUMP_P (scan))
5515 break;
5517 if (! reg_mentioned_p (reg, scan))
5518 continue;
5520 if (noncall_uses_reg (reg, scan, &scanset))
5521 break;
5523 if (scan == insn)
5524 foundinsn = 1;
5526 if (scan != insn
5527 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5529 /* There is a function call to this register other
5530 than the one we are checking. If we optimize
5531 this call, we need to rescan again below. */
5532 rescan = 1;
5535 /* ??? We shouldn't have to worry about SCANSET here.
5536 We should just be able to check for a REG_DEAD note
5537 on a function call. However, the REG_DEAD notes are
5538 apparently not dependable around libcalls; c-torture
5539 execute/920501-2 is a test case. If SCANSET is set,
5540 then this insn sets the register, so it must have
5541 died earlier. Unfortunately, this will only handle
5542 the cases in which the register is, in fact, set in a
5543 later insn. */
5545 /* ??? We shouldn't have to use FOUNDINSN here.
5546 This dates back to when we used LOG_LINKS to find
5547 the most recent insn which sets the register. */
5549 if (foundinsn
5550 && (scanset
5551 || find_reg_note (scan, REG_DEAD, reg)))
5553 dies = scan;
5554 break;
5558 if (! dies)
5560 /* Either there was a branch, or some insn used REG
5561 other than as a function call address. */
5562 continue;
5565 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5566 on the insn which sets the register, and on each call insn
5567 which uses the register. In final_prescan_insn we look for
5568 the REG_LABEL_OPERAND notes, and output the appropriate label
5569 or pseudo-op. */
5571 label = gen_label_rtx ();
5572 add_reg_note (link, REG_LABEL_OPERAND, label);
5573 add_reg_note (insn, REG_LABEL_OPERAND, label);
5574 if (rescan)
5576 scan = link;
5579 rtx reg2;
5581 scan = NEXT_INSN (scan);
5582 if (scan != insn
5583 && ((CALL_P (scan)
5584 && reg_mentioned_p (reg, scan))
5585 || ((reg2 = sfunc_uses_reg (scan))
5586 && REGNO (reg2) == REGNO (reg))))
5587 add_reg_note (scan, REG_LABEL_OPERAND, label);
5589 while (scan != dies);
5594 if (TARGET_SH2)
5595 fixup_addr_diff_vecs (first);
5597 if (optimize)
5599 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5600 shorten_branches (first);
5603 /* Scan the function looking for move instructions which have to be
5604 changed to pc-relative loads and insert the literal tables. */
5605 label_ref_list_pool = create_alloc_pool ("label references list",
5606 sizeof (struct label_ref_list_d),
5607 30);
5608 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5609 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5611 if (mova_p (insn))
5613 /* ??? basic block reordering can move a switch table dispatch
5614 below the switch table. Check if that has happened.
5615 We only have the addresses available when optimizing; but then,
5616 this check shouldn't be needed when not optimizing. */
5617 if (!untangle_mova (&num_mova, &mova, insn))
5619 insn = mova;
5620 num_mova = 0;
5623 else if (JUMP_P (insn)
5624 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5625 && num_mova
5626 /* ??? loop invariant motion can also move a mova out of a
5627 loop. Since loop does this code motion anyway, maybe we
5628 should wrap UNSPEC_MOVA into a CONST, so that reload can
5629 move it back. */
5630 && ((num_mova > 1
5631 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5632 || (prev_nonnote_insn (insn)
5633 == XEXP (MOVA_LABELREF (mova), 0))))
5635 rtx scan;
5636 int total;
5638 num_mova--;
5640 /* Some code might have been inserted between the mova and
5641 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5642 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5643 total += get_attr_length (scan);
5645 /* range of mova is 1020, add 4 because pc counts from address of
5646 second instruction after this one, subtract 2 in case pc is 2
5647 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5648 cancels out with alignment effects of the mova itself. */
5649 if (total > 1022)
5651 /* Change the mova into a load, and restart scanning
5652 there. broken_move will then return true for mova. */
5653 fixup_mova (mova);
5654 insn = mova;
5657 if (broken_move (insn)
5658 || (NONJUMP_INSN_P (insn)
5659 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5661 rtx scan;
5662 /* Scan ahead looking for a barrier to stick the constant table
5663 behind. */
5664 rtx barrier = find_barrier (num_mova, mova, insn);
5665 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5666 int need_aligned_label = 0;
5668 if (num_mova && ! mova_p (mova))
5670 /* find_barrier had to change the first mova into a
5671 pcload; thus, we have to start with this new pcload. */
5672 insn = mova;
5673 num_mova = 0;
5675 /* Now find all the moves between the points and modify them. */
5676 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5678 if (LABEL_P (scan))
5679 last_float = 0;
5680 if (NONJUMP_INSN_P (scan)
5681 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5682 need_aligned_label = 1;
5683 if (broken_move (scan))
5685 rtx *patp = &PATTERN (scan), pat = *patp;
5686 rtx src, dst;
5687 rtx lab;
5688 rtx newsrc;
5689 enum machine_mode mode;
5691 if (GET_CODE (pat) == PARALLEL)
5692 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5693 src = SET_SRC (pat);
5694 dst = SET_DEST (pat);
5695 mode = GET_MODE (dst);
5697 if (mode == SImode && hi_const (src)
5698 && REGNO (dst) != FPUL_REG)
5700 int offset = 0;
5702 mode = HImode;
5703 while (GET_CODE (dst) == SUBREG)
5705 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5706 GET_MODE (SUBREG_REG (dst)),
5707 SUBREG_BYTE (dst),
5708 GET_MODE (dst));
5709 dst = SUBREG_REG (dst);
5711 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5713 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5715 /* This must be an insn that clobbers r0. */
5716 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5717 XVECLEN (PATTERN (scan), 0)
5718 - 1);
5719 rtx clobber = *clobberp;
5721 gcc_assert (GET_CODE (clobber) == CLOBBER
5722 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5724 if (last_float
5725 && reg_set_between_p (r0_rtx, last_float_move, scan))
5726 last_float = 0;
5727 if (last_float
5728 && TARGET_SHCOMPACT
5729 && GET_MODE_SIZE (mode) != 4
5730 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5731 last_float = 0;
5732 lab = add_constant (src, mode, last_float);
5733 if (lab)
5734 emit_insn_before (gen_mova (lab), scan);
5735 else
5737 /* There will be a REG_UNUSED note for r0 on
5738 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5739 lest reorg:mark_target_live_regs will not
5740 consider r0 to be used, and we end up with delay
5741 slot insn in front of SCAN that clobbers r0. */
5742 rtx note
5743 = find_regno_note (last_float_move, REG_UNUSED, 0);
5745 /* If we are not optimizing, then there may not be
5746 a note. */
5747 if (note)
5748 PUT_REG_NOTE_KIND (note, REG_INC);
5750 *last_float_addr = r0_inc_rtx;
5752 last_float_move = scan;
5753 last_float = src;
5754 newsrc = gen_const_mem (mode,
5755 (((TARGET_SH4 && ! TARGET_FMOVD)
5756 || REGNO (dst) == FPUL_REG)
5757 ? r0_inc_rtx
5758 : r0_rtx));
5759 last_float_addr = &XEXP (newsrc, 0);
5761 /* Remove the clobber of r0. */
5762 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5763 gen_rtx_SCRATCH (Pmode));
5765 /* This is a mova needing a label. Create it. */
5766 else if (GET_CODE (src) == UNSPEC
5767 && XINT (src, 1) == UNSPEC_MOVA
5768 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5770 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5771 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5772 newsrc = gen_rtx_UNSPEC (SImode,
5773 gen_rtvec (1, newsrc),
5774 UNSPEC_MOVA);
5776 else
5778 lab = add_constant (src, mode, 0);
5779 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5780 newsrc = gen_const_mem (mode, newsrc);
5782 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5783 INSN_CODE (scan) = -1;
5786 dump_table (need_aligned_label ? insn : 0, barrier);
5787 insn = barrier;
5790 free_alloc_pool (label_ref_list_pool);
5791 for (insn = first; insn; insn = NEXT_INSN (insn))
5792 PUT_MODE (insn, VOIDmode);
5794 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5795 INSN_ADDRESSES_FREE ();
5796 split_branches (first);
5798 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5799 also has an effect on the register that holds the address of the sfunc.
5800 Insert an extra dummy insn in front of each sfunc that pretends to
5801 use this register. */
5802 if (flag_delayed_branch)
5804 for (insn = first; insn; insn = NEXT_INSN (insn))
5806 rtx reg = sfunc_uses_reg (insn);
5808 if (! reg)
5809 continue;
5810 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5813 #if 0
5814 /* fpscr is not actually a user variable, but we pretend it is for the
5815 sake of the previous optimization passes, since we want it handled like
5816 one. However, we don't have any debugging information for it, so turn
5817 it into a non-user variable now. */
5818 if (TARGET_SH4)
5819 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5820 #endif
5821 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5825 get_dest_uid (rtx label, int max_uid)
5827 rtx dest = next_real_insn (label);
5828 int dest_uid;
5829 if (! dest)
5830 /* This can happen for an undefined label. */
5831 return 0;
5832 dest_uid = INSN_UID (dest);
5833 /* If this is a newly created branch redirection blocking instruction,
5834 we cannot index the branch_uid or insn_addresses arrays with its
5835 uid. But then, we won't need to, because the actual destination is
5836 the following branch. */
5837 while (dest_uid >= max_uid)
5839 dest = NEXT_INSN (dest);
5840 dest_uid = INSN_UID (dest);
5842 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5843 return 0;
5844 return dest_uid;
5847 /* Split condbranches that are out of range. Also add clobbers for
5848 scratch registers that are needed in far jumps.
5849 We do this before delay slot scheduling, so that it can take our
5850 newly created instructions into account. It also allows us to
5851 find branches with common targets more easily. */
5853 static void
5854 split_branches (rtx first)
5856 rtx insn;
5857 struct far_branch **uid_branch, *far_branch_list = 0;
5858 int max_uid = get_max_uid ();
5859 int ok;
5861 /* Find out which branches are out of range. */
5862 shorten_branches (first);
5864 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5865 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5867 for (insn = first; insn; insn = NEXT_INSN (insn))
5868 if (! INSN_P (insn))
5869 continue;
5870 else if (INSN_DELETED_P (insn))
5872 /* Shorten_branches would split this instruction again,
5873 so transform it into a note. */
5874 SET_INSN_DELETED (insn);
5876 else if (JUMP_P (insn)
5877 /* Don't mess with ADDR_DIFF_VEC */
5878 && (GET_CODE (PATTERN (insn)) == SET
5879 || GET_CODE (PATTERN (insn)) == RETURN))
5881 enum attr_type type = get_attr_type (insn);
5882 if (type == TYPE_CBRANCH)
5884 rtx next, beyond;
5886 if (get_attr_length (insn) > 4)
5888 rtx src = SET_SRC (PATTERN (insn));
5889 rtx olabel = XEXP (XEXP (src, 1), 0);
5890 int addr = INSN_ADDRESSES (INSN_UID (insn));
5891 rtx label = 0;
5892 int dest_uid = get_dest_uid (olabel, max_uid);
5893 struct far_branch *bp = uid_branch[dest_uid];
5895 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5896 the label if the LABEL_NUSES count drops to zero. There is
5897 always a jump_optimize pass that sets these values, but it
5898 proceeds to delete unreferenced code, and then if not
5899 optimizing, to un-delete the deleted instructions, thus
5900 leaving labels with too low uses counts. */
5901 if (! optimize)
5903 JUMP_LABEL (insn) = olabel;
5904 LABEL_NUSES (olabel)++;
5906 if (! bp)
5908 bp = (struct far_branch *) alloca (sizeof *bp);
5909 uid_branch[dest_uid] = bp;
5910 bp->prev = far_branch_list;
5911 far_branch_list = bp;
5912 bp->far_label
5913 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5914 LABEL_NUSES (bp->far_label)++;
5916 else
5918 label = bp->near_label;
5919 if (! label && bp->address - addr >= CONDJUMP_MIN)
5921 rtx block = bp->insert_place;
5923 if (GET_CODE (PATTERN (block)) == RETURN)
5924 block = PREV_INSN (block);
5925 else
5926 block = gen_block_redirect (block,
5927 bp->address, 2);
5928 label = emit_label_after (gen_label_rtx (),
5929 PREV_INSN (block));
5930 bp->near_label = label;
5932 else if (label && ! NEXT_INSN (label))
5934 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5935 bp->insert_place = insn;
5936 else
5937 gen_far_branch (bp);
5940 if (! label
5941 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5943 bp->near_label = label = gen_label_rtx ();
5944 bp->insert_place = insn;
5945 bp->address = addr;
5947 ok = redirect_jump (insn, label, 0);
5948 gcc_assert (ok);
5950 else
5952 /* get_attr_length (insn) == 2 */
5953 /* Check if we have a pattern where reorg wants to redirect
5954 the branch to a label from an unconditional branch that
5955 is too far away. */
5956 /* We can't use JUMP_LABEL here because it might be undefined
5957 when not optimizing. */
5958 /* A syntax error might cause beyond to be NULL_RTX. */
5959 beyond
5960 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5961 0));
5963 if (beyond
5964 && (JUMP_P (beyond)
5965 || ((beyond = next_active_insn (beyond))
5966 && JUMP_P (beyond)))
5967 && GET_CODE (PATTERN (beyond)) == SET
5968 && recog_memoized (beyond) == CODE_FOR_jump_compact
5969 && ((INSN_ADDRESSES
5970 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5971 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5972 > 252 + 258 + 2))
5973 gen_block_redirect (beyond,
5974 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5977 next = next_active_insn (insn);
5979 if (next
5980 && (JUMP_P (next)
5981 || ((next = next_active_insn (next))
5982 && JUMP_P (next)))
5983 && GET_CODE (PATTERN (next)) == SET
5984 && recog_memoized (next) == CODE_FOR_jump_compact
5985 && ((INSN_ADDRESSES
5986 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5987 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5988 > 252 + 258 + 2))
5989 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5991 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5993 int addr = INSN_ADDRESSES (INSN_UID (insn));
5994 rtx far_label = 0;
5995 int dest_uid = 0;
5996 struct far_branch *bp;
5998 if (type == TYPE_JUMP)
6000 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6001 dest_uid = get_dest_uid (far_label, max_uid);
6002 if (! dest_uid)
6004 /* Parse errors can lead to labels outside
6005 the insn stream. */
6006 if (! NEXT_INSN (far_label))
6007 continue;
6009 if (! optimize)
6011 JUMP_LABEL (insn) = far_label;
6012 LABEL_NUSES (far_label)++;
6014 redirect_jump (insn, NULL_RTX, 1);
6015 far_label = 0;
6018 bp = uid_branch[dest_uid];
6019 if (! bp)
6021 bp = (struct far_branch *) alloca (sizeof *bp);
6022 uid_branch[dest_uid] = bp;
6023 bp->prev = far_branch_list;
6024 far_branch_list = bp;
6025 bp->near_label = 0;
6026 bp->far_label = far_label;
6027 if (far_label)
6028 LABEL_NUSES (far_label)++;
6030 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6031 if (addr - bp->address <= CONDJUMP_MAX)
6032 emit_label_after (bp->near_label, PREV_INSN (insn));
6033 else
6035 gen_far_branch (bp);
6036 bp->near_label = 0;
6038 else
6039 bp->near_label = 0;
6040 bp->address = addr;
6041 bp->insert_place = insn;
6042 if (! far_label)
6043 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6044 else
6045 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6048 /* Generate all pending far branches,
6049 and free our references to the far labels. */
6050 while (far_branch_list)
6052 if (far_branch_list->near_label
6053 && ! NEXT_INSN (far_branch_list->near_label))
6054 gen_far_branch (far_branch_list);
6055 if (optimize
6056 && far_branch_list->far_label
6057 && ! --LABEL_NUSES (far_branch_list->far_label))
6058 delete_insn (far_branch_list->far_label);
6059 far_branch_list = far_branch_list->prev;
6062 /* Instruction length information is no longer valid due to the new
6063 instructions that have been generated. */
6064 init_insn_lengths ();
6067 /* Dump out instruction addresses, which is useful for debugging the
6068 constant pool table stuff.
6070 If relaxing, output the label and pseudo-ops used to link together
6071 calls and the instruction which set the registers. */
6073 /* ??? The addresses printed by this routine for insns are nonsense for
6074 insns which are inside of a sequence where none of the inner insns have
6075 variable length. This is because the second pass of shorten_branches
6076 does not bother to update them. */
6078 void
6079 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6080 int noperands ATTRIBUTE_UNUSED)
6082 if (TARGET_DUMPISIZE)
6083 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6085 if (TARGET_RELAX)
6087 rtx note;
6089 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6090 if (note)
6092 rtx pattern;
6094 pattern = PATTERN (insn);
6095 if (GET_CODE (pattern) == PARALLEL)
6096 pattern = XVECEXP (pattern, 0, 0);
6097 switch (GET_CODE (pattern))
6099 case SET:
6100 if (GET_CODE (SET_SRC (pattern)) != CALL
6101 && get_attr_type (insn) != TYPE_SFUNC)
6103 targetm.asm_out.internal_label
6104 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6105 break;
6107 /* else FALLTHROUGH */
6108 case CALL:
6109 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6110 CODE_LABEL_NUMBER (XEXP (note, 0)));
6111 break;
6113 default:
6114 gcc_unreachable ();
6120 /* Dump out any constants accumulated in the final pass. These will
6121 only be labels. */
6123 const char *
6124 output_jump_label_table (void)
6126 int i;
6128 if (pool_size)
6130 fprintf (asm_out_file, "\t.align 2\n");
6131 for (i = 0; i < pool_size; i++)
6133 pool_node *p = &pool_vector[i];
6135 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6136 CODE_LABEL_NUMBER (p->label));
6137 output_asm_insn (".long %O0", &p->value);
6139 pool_size = 0;
6142 return "";
6145 /* A full frame looks like:
6147 arg-5
6148 arg-4
6149 [ if current_function_anonymous_args
6150 arg-3
6151 arg-2
6152 arg-1
6153 arg-0 ]
6154 saved-fp
6155 saved-r10
6156 saved-r11
6157 saved-r12
6158 saved-pr
6159 local-n
6161 local-1
6162 local-0 <- fp points here. */
6164 /* Number of bytes pushed for anonymous args, used to pass information
6165 between expand_prologue and expand_epilogue. */
6167 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6168 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6169 for an epilogue and a negative value means that it's for a sibcall
6170 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6171 all the registers that are about to be restored, and hence dead. */
6173 static void
6174 output_stack_adjust (int size, rtx reg, int epilogue_p,
6175 HARD_REG_SET *live_regs_mask, bool frame_p)
6177 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6178 if (size)
6180 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6182 /* This test is bogus, as output_stack_adjust is used to re-align the
6183 stack. */
6184 #if 0
6185 gcc_assert (!(size % align));
6186 #endif
6188 if (CONST_OK_FOR_ADD (size))
6189 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6190 /* Try to do it with two partial adjustments; however, we must make
6191 sure that the stack is properly aligned at all times, in case
6192 an interrupt occurs between the two partial adjustments. */
6193 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6194 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6196 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6197 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6199 else
6201 rtx const_reg;
6202 rtx insn;
6203 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6204 int i;
6206 /* If TEMP is invalid, we could temporarily save a general
6207 register to MACL. However, there is currently no need
6208 to handle this case, so just die when we see it. */
6209 if (epilogue_p < 0
6210 || current_function_interrupt
6211 || ! call_really_used_regs[temp] || fixed_regs[temp])
6212 temp = -1;
6213 if (temp < 0 && ! current_function_interrupt
6214 && (TARGET_SHMEDIA || epilogue_p >= 0))
6216 HARD_REG_SET temps;
6217 COPY_HARD_REG_SET (temps, call_used_reg_set);
6218 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6219 if (epilogue_p > 0)
6221 int nreg = 0;
6222 if (crtl->return_rtx)
6224 enum machine_mode mode;
6225 mode = GET_MODE (crtl->return_rtx);
6226 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6227 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6229 for (i = 0; i < nreg; i++)
6230 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6231 if (crtl->calls_eh_return)
6233 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6234 for (i = 0; i <= 3; i++)
6235 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6238 if (TARGET_SHMEDIA && epilogue_p < 0)
6239 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6240 CLEAR_HARD_REG_BIT (temps, i);
6241 if (epilogue_p <= 0)
6243 for (i = FIRST_PARM_REG;
6244 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6245 CLEAR_HARD_REG_BIT (temps, i);
6246 if (cfun->static_chain_decl != NULL)
6247 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6249 temp = scavenge_reg (&temps);
6251 if (temp < 0 && live_regs_mask)
6253 HARD_REG_SET temps;
6255 COPY_HARD_REG_SET (temps, *live_regs_mask);
6256 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6257 temp = scavenge_reg (&temps);
6259 if (temp < 0)
6261 rtx adj_reg, tmp_reg, mem;
6263 /* If we reached here, the most likely case is the (sibcall)
6264 epilogue for non SHmedia. Put a special push/pop sequence
6265 for such case as the last resort. This looks lengthy but
6266 would not be problem because it seems to be very
6267 rare. */
6269 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6272 /* ??? There is still the slight possibility that r4 or
6273 r5 have been reserved as fixed registers or assigned
6274 as global registers, and they change during an
6275 interrupt. There are possible ways to handle this:
6277 - If we are adjusting the frame pointer (r14), we can do
6278 with a single temp register and an ordinary push / pop
6279 on the stack.
6280 - Grab any call-used or call-saved registers (i.e. not
6281 fixed or globals) for the temps we need. We might
6282 also grab r14 if we are adjusting the stack pointer.
6283 If we can't find enough available registers, issue
6284 a diagnostic and die - the user must have reserved
6285 way too many registers.
6286 But since all this is rather unlikely to happen and
6287 would require extra testing, we just die if r4 / r5
6288 are not available. */
6289 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6290 && !global_regs[4] && !global_regs[5]);
6292 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6293 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6294 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6295 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6296 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6297 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6298 emit_move_insn (mem, tmp_reg);
6299 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6300 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6301 emit_move_insn (mem, tmp_reg);
6302 emit_move_insn (reg, adj_reg);
6303 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6304 emit_move_insn (adj_reg, mem);
6305 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6306 emit_move_insn (tmp_reg, mem);
6307 /* Tell flow the insns that pop r4/r5 aren't dead. */
6308 emit_use (tmp_reg);
6309 emit_use (adj_reg);
6310 return;
6312 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6314 /* If SIZE is negative, subtract the positive value.
6315 This sometimes allows a constant pool entry to be shared
6316 between prologue and epilogue code. */
6317 if (size < 0)
6319 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6320 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6322 else
6324 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6325 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6327 if (! epilogue_p)
6328 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6329 gen_rtx_SET (VOIDmode, reg,
6330 gen_rtx_PLUS (SImode, reg,
6331 GEN_INT (size))));
6336 static rtx
6337 frame_insn (rtx x)
6339 x = emit_insn (x);
6340 RTX_FRAME_RELATED_P (x) = 1;
6341 return x;
6344 /* Output RTL to push register RN onto the stack. */
6346 static rtx
6347 push (int rn)
6349 rtx x;
6350 if (rn == FPUL_REG)
6351 x = gen_push_fpul ();
6352 else if (rn == FPSCR_REG)
6353 x = gen_push_fpscr ();
6354 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6355 && FP_OR_XD_REGISTER_P (rn))
6357 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6358 return NULL_RTX;
6359 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6361 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6362 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6363 else
6364 x = gen_push (gen_rtx_REG (SImode, rn));
6366 x = frame_insn (x);
6367 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6368 return x;
6371 /* Output RTL to pop register RN from the stack. */
6373 static void
6374 pop (int rn)
6376 rtx x;
6377 if (rn == FPUL_REG)
6378 x = gen_pop_fpul ();
6379 else if (rn == FPSCR_REG)
6380 x = gen_pop_fpscr ();
6381 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6382 && FP_OR_XD_REGISTER_P (rn))
6384 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6385 return;
6386 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6388 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6389 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6390 else
6391 x = gen_pop (gen_rtx_REG (SImode, rn));
6393 x = emit_insn (x);
6394 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6397 /* Generate code to push the regs specified in the mask. */
6399 static void
6400 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6402 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6403 int skip_fpscr = 0;
6405 /* Push PR last; this gives better latencies after the prologue, and
6406 candidates for the return delay slot when there are no general
6407 registers pushed. */
6408 for (; i < FIRST_PSEUDO_REGISTER; i++)
6410 /* If this is an interrupt handler, and the SZ bit varies,
6411 and we have to push any floating point register, we need
6412 to switch to the correct precision first. */
6413 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6414 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6416 HARD_REG_SET unsaved;
6418 push (FPSCR_REG);
6419 COMPL_HARD_REG_SET (unsaved, *mask);
6420 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6421 skip_fpscr = 1;
6423 if (i != PR_REG
6424 && (i != FPSCR_REG || ! skip_fpscr)
6425 && TEST_HARD_REG_BIT (*mask, i))
6427 /* If the ISR has RESBANK attribute assigned, don't push any of
6428 the following registers - R0-R14, MACH, MACL and GBR. */
6429 if (! (sh_cfun_resbank_handler_p ()
6430 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6431 || i == MACH_REG
6432 || i == MACL_REG
6433 || i == GBR_REG)))
6434 push (i);
6438 /* Push banked registers last to improve delay slot opportunities. */
6439 if (interrupt_handler)
6441 bool use_movml = false;
6443 if (TARGET_SH2A)
6445 unsigned int count = 0;
6447 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6448 if (TEST_HARD_REG_BIT (*mask, i))
6449 count++;
6450 else
6451 break;
6453 /* Use movml when all banked registers are pushed. */
6454 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6455 use_movml = true;
6458 if (use_movml)
6460 rtx x, mem, reg, set;
6461 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6463 /* We must avoid scheduling multiple store insn with another
6464 insns. */
6465 emit_insn (gen_blockage ());
6466 x = gen_movml_push_banked (sp_reg);
6467 x = frame_insn (x);
6468 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6470 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6471 reg = gen_rtx_REG (SImode, i);
6472 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6475 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6476 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6477 emit_insn (gen_blockage ());
6479 else
6480 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6481 if (TEST_HARD_REG_BIT (*mask, i))
6482 push (i);
6485 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6486 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6487 push (PR_REG);
6490 /* Calculate how much extra space is needed to save all callee-saved
6491 target registers.
6492 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6494 static int
6495 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6497 int reg;
6498 int stack_space = 0;
6499 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6501 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6502 if ((! call_really_used_regs[reg] || interrupt_handler)
6503 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6504 /* Leave space to save this target register on the stack,
6505 in case target register allocation wants to use it. */
6506 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6507 return stack_space;
6510 /* Decide whether we should reserve space for callee-save target registers,
6511 in case target register allocation wants to use them. REGS_SAVED is
6512 the space, in bytes, that is already required for register saves.
6513 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6515 static int
6516 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6517 HARD_REG_SET *live_regs_mask)
6519 if (optimize_size)
6520 return 0;
6521 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6524 /* Decide how much space to reserve for callee-save target registers
6525 in case target register allocation wants to use them.
6526 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6528 static int
6529 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6531 if (shmedia_space_reserved_for_target_registers)
6532 return shmedia_target_regs_stack_space (live_regs_mask);
6533 else
6534 return 0;
6537 /* Work out the registers which need to be saved, both as a mask and a
6538 count of saved words. Return the count.
6540 If doing a pragma interrupt function, then push all regs used by the
6541 function, and if we call another function (we can tell by looking at PR),
6542 make sure that all the regs it clobbers are safe too. */
6544 static int
6545 calc_live_regs (HARD_REG_SET *live_regs_mask)
6547 unsigned int reg;
6548 int count;
6549 tree attrs;
6550 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6551 bool nosave_low_regs;
6552 int pr_live, has_call;
6554 attrs = DECL_ATTRIBUTES (current_function_decl);
6555 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6556 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6557 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6558 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6560 CLEAR_HARD_REG_SET (*live_regs_mask);
6561 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6562 && df_regs_ever_live_p (FPSCR_REG))
6563 target_flags &= ~MASK_FPU_SINGLE;
6564 /* If we can save a lot of saves by switching to double mode, do that. */
6565 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6566 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6567 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6568 && (! call_really_used_regs[reg]
6569 || interrupt_handler)
6570 && ++count > 2)
6572 target_flags &= ~MASK_FPU_SINGLE;
6573 break;
6575 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6576 knows how to use it. That means the pseudo originally allocated for
6577 the initial value can become the PR_MEDIA_REG hard register, as seen for
6578 execute/20010122-1.c:test9. */
6579 if (TARGET_SHMEDIA)
6580 /* ??? this function is called from initial_elimination_offset, hence we
6581 can't use the result of sh_media_register_for_return here. */
6582 pr_live = sh_pr_n_sets ();
6583 else
6585 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6586 pr_live = (pr_initial
6587 ? (!REG_P (pr_initial)
6588 || REGNO (pr_initial) != (PR_REG))
6589 : df_regs_ever_live_p (PR_REG));
6590 /* For Shcompact, if not optimizing, we end up with a memory reference
6591 using the return address pointer for __builtin_return_address even
6592 though there is no actual need to put the PR register on the stack. */
6593 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6595 /* Force PR to be live if the prologue has to call the SHmedia
6596 argument decoder or register saver. */
6597 if (TARGET_SHCOMPACT
6598 && ((crtl->args.info.call_cookie
6599 & ~ CALL_COOKIE_RET_TRAMP (1))
6600 || crtl->saves_all_registers))
6601 pr_live = 1;
6602 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6603 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6605 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6606 ? pr_live
6607 : interrupt_handler
6608 ? (/* Need to save all the regs ever live. */
6609 (df_regs_ever_live_p (reg)
6610 || (call_really_used_regs[reg]
6611 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6612 || reg == PIC_OFFSET_TABLE_REGNUM)
6613 && has_call)
6614 || (TARGET_SHMEDIA && has_call
6615 && REGISTER_NATURAL_MODE (reg) == SImode
6616 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6617 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6618 && reg != RETURN_ADDRESS_POINTER_REGNUM
6619 && reg != T_REG && reg != GBR_REG
6620 /* Push fpscr only on targets which have FPU */
6621 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6622 : (/* Only push those regs which are used and need to be saved. */
6623 (TARGET_SHCOMPACT
6624 && flag_pic
6625 && crtl->args.info.call_cookie
6626 && reg == PIC_OFFSET_TABLE_REGNUM)
6627 || (df_regs_ever_live_p (reg)
6628 && ((!call_really_used_regs[reg]
6629 && !(reg != PIC_OFFSET_TABLE_REGNUM
6630 && fixed_regs[reg] && call_used_regs[reg]))
6631 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6632 || (crtl->calls_eh_return
6633 && (reg == EH_RETURN_DATA_REGNO (0)
6634 || reg == EH_RETURN_DATA_REGNO (1)
6635 || reg == EH_RETURN_DATA_REGNO (2)
6636 || reg == EH_RETURN_DATA_REGNO (3)))
6637 || ((reg == MACL_REG || reg == MACH_REG)
6638 && df_regs_ever_live_p (reg)
6639 && sh_cfun_attr_renesas_p ())
6642 SET_HARD_REG_BIT (*live_regs_mask, reg);
6643 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6645 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6646 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6648 if (FP_REGISTER_P (reg))
6650 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6652 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6653 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6656 else if (XD_REGISTER_P (reg))
6658 /* Must switch to double mode to access these registers. */
6659 target_flags &= ~MASK_FPU_SINGLE;
6663 if (nosave_low_regs && reg == R8_REG)
6664 break;
6666 /* If we have a target register optimization pass after prologue / epilogue
6667 threading, we need to assume all target registers will be live even if
6668 they aren't now. */
6669 if (flag_branch_target_load_optimize2
6670 && TARGET_SAVE_ALL_TARGET_REGS
6671 && shmedia_space_reserved_for_target_registers)
6672 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6673 if ((! call_really_used_regs[reg] || interrupt_handler)
6674 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6676 SET_HARD_REG_BIT (*live_regs_mask, reg);
6677 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6679 /* If this is an interrupt handler, we don't have any call-clobbered
6680 registers we can conveniently use for target register save/restore.
6681 Make sure we save at least one general purpose register when we need
6682 to save target registers. */
6683 if (interrupt_handler
6684 && hard_reg_set_intersect_p (*live_regs_mask,
6685 reg_class_contents[TARGET_REGS])
6686 && ! hard_reg_set_intersect_p (*live_regs_mask,
6687 reg_class_contents[GENERAL_REGS]))
6689 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6690 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6693 return count;
6696 /* Code to generate prologue and epilogue sequences */
6698 /* PUSHED is the number of bytes that are being pushed on the
6699 stack for register saves. Return the frame size, padded
6700 appropriately so that the stack stays properly aligned. */
6701 static HOST_WIDE_INT
6702 rounded_frame_size (int pushed)
6704 HOST_WIDE_INT size = get_frame_size ();
6705 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6707 if (ACCUMULATE_OUTGOING_ARGS)
6708 size += crtl->outgoing_args_size;
6710 return ((size + pushed + align - 1) & -align) - pushed;
6713 /* Choose a call-clobbered target-branch register that remains
6714 unchanged along the whole function. We set it up as the return
6715 value in the prologue. */
6717 sh_media_register_for_return (void)
6719 int regno;
6720 int tr0_used;
6722 if (! current_function_is_leaf)
6723 return -1;
6724 if (lookup_attribute ("interrupt_handler",
6725 DECL_ATTRIBUTES (current_function_decl)))
6726 return -1;
6727 if (sh_cfun_interrupt_handler_p ())
6728 return -1;
6730 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6732 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6733 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6734 return regno;
6736 return -1;
6739 /* The maximum registers we need to save are:
6740 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6741 - 32 floating point registers (for each pair, we save none,
6742 one single precision value, or a double precision value).
6743 - 8 target registers
6744 - add 1 entry for a delimiter. */
6745 #define MAX_SAVED_REGS (62+32+8)
6747 typedef struct save_entry_s
6749 unsigned char reg;
6750 unsigned char mode;
6751 short offset;
6752 } save_entry;
6754 #define MAX_TEMPS 4
6756 /* There will be a delimiter entry with VOIDmode both at the start and the
6757 end of a filled in schedule. The end delimiter has the offset of the
6758 save with the smallest (i.e. most negative) offset. */
6759 typedef struct save_schedule_s
6761 save_entry entries[MAX_SAVED_REGS + 2];
6762 int temps[MAX_TEMPS+1];
6763 } save_schedule;
6765 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6766 use reverse order. Returns the last entry written to (not counting
6767 the delimiter). OFFSET_BASE is a number to be added to all offset
6768 entries. */
6770 static save_entry *
6771 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6772 int offset_base)
6774 int align, i;
6775 save_entry *entry = schedule->entries;
6776 int tmpx = 0;
6777 int offset;
6779 if (! current_function_interrupt)
6780 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6781 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6782 && ! FUNCTION_ARG_REGNO_P (i)
6783 && i != FIRST_RET_REG
6784 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6785 && ! (crtl->calls_eh_return
6786 && (i == EH_RETURN_STACKADJ_REGNO
6787 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6788 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6789 schedule->temps[tmpx++] = i;
6790 entry->reg = -1;
6791 entry->mode = VOIDmode;
6792 entry->offset = offset_base;
6793 entry++;
6794 /* We loop twice: first, we save 8-byte aligned registers in the
6795 higher addresses, that are known to be aligned. Then, we
6796 proceed to saving 32-bit registers that don't need 8-byte
6797 alignment.
6798 If this is an interrupt function, all registers that need saving
6799 need to be saved in full. moreover, we need to postpone saving
6800 target registers till we have saved some general purpose registers
6801 we can then use as scratch registers. */
6802 offset = offset_base;
6803 for (align = 1; align >= 0; align--)
6805 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6806 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6808 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6809 int reg = i;
6811 if (current_function_interrupt)
6813 if (TARGET_REGISTER_P (i))
6814 continue;
6815 if (GENERAL_REGISTER_P (i))
6816 mode = DImode;
6818 if (mode == SFmode && (i % 2) == 1
6819 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6820 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6822 mode = DFmode;
6823 i--;
6824 reg--;
6827 /* If we're doing the aligned pass and this is not aligned,
6828 or we're doing the unaligned pass and this is aligned,
6829 skip it. */
6830 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6831 != align)
6832 continue;
6834 if (current_function_interrupt
6835 && GENERAL_REGISTER_P (i)
6836 && tmpx < MAX_TEMPS)
6837 schedule->temps[tmpx++] = i;
6839 offset -= GET_MODE_SIZE (mode);
6840 entry->reg = i;
6841 entry->mode = mode;
6842 entry->offset = offset;
6843 entry++;
6845 if (align && current_function_interrupt)
6846 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6847 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6849 offset -= GET_MODE_SIZE (DImode);
6850 entry->reg = i;
6851 entry->mode = DImode;
6852 entry->offset = offset;
6853 entry++;
6856 entry->reg = -1;
6857 entry->mode = VOIDmode;
6858 entry->offset = offset;
6859 schedule->temps[tmpx] = -1;
6860 return entry - 1;
6863 void
6864 sh_expand_prologue (void)
6866 HARD_REG_SET live_regs_mask;
6867 int d, i;
6868 int d_rounding = 0;
6869 int save_flags = target_flags;
6870 int pretend_args;
6871 int stack_usage;
6872 tree sp_switch_attr
6873 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6875 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6877 /* We have pretend args if we had an object sent partially in registers
6878 and partially on the stack, e.g. a large structure. */
6879 pretend_args = crtl->args.pretend_args_size;
6880 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6881 && (NPARM_REGS(SImode)
6882 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6883 pretend_args = 0;
6885 output_stack_adjust (-pretend_args
6886 - crtl->args.info.stack_regs * 8,
6887 stack_pointer_rtx, 0, NULL, true);
6888 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
6890 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6891 /* We're going to use the PIC register to load the address of the
6892 incoming-argument decoder and/or of the return trampoline from
6893 the GOT, so make sure the PIC register is preserved and
6894 initialized. */
6895 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6897 if (TARGET_SHCOMPACT
6898 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6900 int reg;
6902 /* First, make all registers with incoming arguments that will
6903 be pushed onto the stack live, so that register renaming
6904 doesn't overwrite them. */
6905 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6906 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6907 >= NPARM_REGS (SImode) - reg)
6908 for (; reg < NPARM_REGS (SImode); reg++)
6909 emit_insn (gen_shcompact_preserve_incoming_args
6910 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6911 else if (CALL_COOKIE_INT_REG_GET
6912 (crtl->args.info.call_cookie, reg) == 1)
6913 emit_insn (gen_shcompact_preserve_incoming_args
6914 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6916 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6917 stack_pointer_rtx);
6918 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6919 GEN_INT (crtl->args.info.call_cookie));
6920 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6921 gen_rtx_REG (SImode, R0_REG));
6923 else if (TARGET_SHMEDIA)
6925 int tr = sh_media_register_for_return ();
6927 if (tr >= 0)
6928 emit_move_insn (gen_rtx_REG (DImode, tr),
6929 gen_rtx_REG (DImode, PR_MEDIA_REG));
6932 /* Emit the code for SETUP_VARARGS. */
6933 if (cfun->stdarg)
6935 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6937 /* Push arg regs as if they'd been provided by caller in stack. */
6938 for (i = 0; i < NPARM_REGS(SImode); i++)
6940 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6942 if (i >= (NPARM_REGS(SImode)
6943 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6945 break;
6946 push (rn);
6947 stack_usage += GET_MODE_SIZE (SImode);
6952 /* If we're supposed to switch stacks at function entry, do so now. */
6953 if (sp_switch_attr)
6955 rtx lab, newsrc;
6956 /* The argument specifies a variable holding the address of the
6957 stack the interrupt function should switch to/from at entry/exit. */
6958 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6959 const char *s
6960 = ggc_strdup (TREE_STRING_POINTER (arg));
6961 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6963 lab = add_constant (sp_switch, SImode, 0);
6964 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6965 newsrc = gen_const_mem (SImode, newsrc);
6967 emit_insn (gen_sp_switch_1 (newsrc));
6970 d = calc_live_regs (&live_regs_mask);
6971 /* ??? Maybe we could save some switching if we can move a mode switch
6972 that already happens to be at the function start into the prologue. */
6973 if (target_flags != save_flags && ! current_function_interrupt)
6974 emit_insn (gen_toggle_sz ());
6976 if (TARGET_SH5)
6978 int offset_base, offset;
6979 rtx r0 = NULL_RTX;
6980 int offset_in_r0 = -1;
6981 int sp_in_r0 = 0;
6982 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6983 int total_size, save_size;
6984 save_schedule schedule;
6985 save_entry *entry;
6986 int *tmp_pnt;
6988 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6989 && ! current_function_interrupt)
6990 r0 = gen_rtx_REG (Pmode, R0_REG);
6992 /* D is the actual number of bytes that we need for saving registers,
6993 however, in initial_elimination_offset we have committed to using
6994 an additional TREGS_SPACE amount of bytes - in order to keep both
6995 addresses to arguments supplied by the caller and local variables
6996 valid, we must keep this gap. Place it between the incoming
6997 arguments and the actually saved registers in a bid to optimize
6998 locality of reference. */
6999 total_size = d + tregs_space;
7000 total_size += rounded_frame_size (total_size);
7001 save_size = total_size - rounded_frame_size (d);
7002 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7003 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7004 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7006 /* If adjusting the stack in a single step costs nothing extra, do so.
7007 I.e. either if a single addi is enough, or we need a movi anyway,
7008 and we don't exceed the maximum offset range (the test for the
7009 latter is conservative for simplicity). */
7010 if (TARGET_SHMEDIA
7011 && (CONST_OK_FOR_I10 (-total_size)
7012 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7013 && total_size <= 2044)))
7014 d_rounding = total_size - save_size;
7016 offset_base = d + d_rounding;
7018 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7019 0, NULL, true);
7020 stack_usage += save_size + d_rounding;
7022 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7023 tmp_pnt = schedule.temps;
7024 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7026 enum machine_mode mode = (enum machine_mode) entry->mode;
7027 unsigned int reg = entry->reg;
7028 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7029 rtx orig_reg_rtx;
7031 offset = entry->offset;
7033 reg_rtx = gen_rtx_REG (mode, reg);
7035 mem_rtx = gen_frame_mem (mode,
7036 gen_rtx_PLUS (Pmode,
7037 stack_pointer_rtx,
7038 GEN_INT (offset)));
7040 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7042 gcc_assert (r0);
7043 mem_rtx = NULL_RTX;
7046 if (HAVE_PRE_DECREMENT
7047 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7048 || mem_rtx == NULL_RTX
7049 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7051 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7053 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7054 pre_dec = NULL_RTX;
7055 else
7057 mem_rtx = NULL_RTX;
7058 offset += GET_MODE_SIZE (mode);
7062 if (mem_rtx != NULL_RTX)
7063 goto addr_ok;
7065 if (offset_in_r0 == -1)
7067 emit_move_insn (r0, GEN_INT (offset));
7068 offset_in_r0 = offset;
7070 else if (offset != offset_in_r0)
7072 emit_move_insn (r0,
7073 gen_rtx_PLUS
7074 (Pmode, r0,
7075 GEN_INT (offset - offset_in_r0)));
7076 offset_in_r0 += offset - offset_in_r0;
7079 if (pre_dec != NULL_RTX)
7081 if (! sp_in_r0)
7083 emit_move_insn (r0,
7084 gen_rtx_PLUS
7085 (Pmode, r0, stack_pointer_rtx));
7086 sp_in_r0 = 1;
7089 offset -= GET_MODE_SIZE (mode);
7090 offset_in_r0 -= GET_MODE_SIZE (mode);
7092 mem_rtx = pre_dec;
7094 else if (sp_in_r0)
7095 mem_rtx = gen_frame_mem (mode, r0);
7096 else
7097 mem_rtx = gen_frame_mem (mode,
7098 gen_rtx_PLUS (Pmode,
7099 stack_pointer_rtx,
7100 r0));
7102 /* We must not use an r0-based address for target-branch
7103 registers or for special registers without pre-dec
7104 memory addresses, since we store their values in r0
7105 first. */
7106 gcc_assert (!TARGET_REGISTER_P (reg)
7107 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7108 || mem_rtx == pre_dec));
7110 addr_ok:
7111 orig_reg_rtx = reg_rtx;
7112 if (TARGET_REGISTER_P (reg)
7113 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7114 && mem_rtx != pre_dec))
7116 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7118 emit_move_insn (tmp_reg, reg_rtx);
7120 if (REGNO (tmp_reg) == R0_REG)
7122 offset_in_r0 = -1;
7123 sp_in_r0 = 0;
7124 gcc_assert (!refers_to_regno_p
7125 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7128 if (*++tmp_pnt <= 0)
7129 tmp_pnt = schedule.temps;
7131 reg_rtx = tmp_reg;
7134 rtx insn;
7136 /* Mark as interesting for dwarf cfi generator */
7137 insn = emit_move_insn (mem_rtx, reg_rtx);
7138 RTX_FRAME_RELATED_P (insn) = 1;
7139 /* If we use an intermediate register for the save, we can't
7140 describe this exactly in cfi as a copy of the to-be-saved
7141 register into the temporary register and then the temporary
7142 register on the stack, because the temporary register can
7143 have a different natural size than the to-be-saved register.
7144 Thus, we gloss over the intermediate copy and pretend we do
7145 a direct save from the to-be-saved register. */
7146 if (REGNO (reg_rtx) != reg)
7148 rtx set;
7150 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7151 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7154 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7156 rtx reg_rtx = gen_rtx_REG (mode, reg);
7157 rtx set;
7158 rtx mem_rtx = gen_frame_mem (mode,
7159 gen_rtx_PLUS (Pmode,
7160 stack_pointer_rtx,
7161 GEN_INT (offset)));
7163 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7164 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7169 gcc_assert (entry->offset == d_rounding);
7171 else
7173 push_regs (&live_regs_mask, current_function_interrupt);
7174 stack_usage += d;
7177 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7178 emit_insn (gen_GOTaddr2picreg ());
7180 if (SHMEDIA_REGS_STACK_ADJUST ())
7182 /* This must NOT go through the PLT, otherwise mach and macl
7183 may be clobbered. */
7184 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7185 (TARGET_FPU_ANY
7186 ? "__GCC_push_shmedia_regs"
7187 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7188 emit_insn (gen_shmedia_save_restore_regs_compact
7189 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7192 if (target_flags != save_flags && ! current_function_interrupt)
7193 emit_insn (gen_toggle_sz ());
7195 target_flags = save_flags;
7197 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7198 stack_pointer_rtx, 0, NULL, true);
7199 stack_usage += rounded_frame_size (d) - d_rounding;
7201 if (frame_pointer_needed)
7202 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7204 if (TARGET_SHCOMPACT
7205 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7207 /* This must NOT go through the PLT, otherwise mach and macl
7208 may be clobbered. */
7209 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7210 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7211 emit_insn (gen_shcompact_incoming_args ());
7214 if (flag_stack_usage)
7215 current_function_static_stack_size = stack_usage;
7218 void
7219 sh_expand_epilogue (bool sibcall_p)
7221 HARD_REG_SET live_regs_mask;
7222 int d, i;
7223 int d_rounding = 0;
7225 int save_flags = target_flags;
7226 int frame_size, save_size;
7227 int fpscr_deferred = 0;
7228 int e = sibcall_p ? -1 : 1;
7230 d = calc_live_regs (&live_regs_mask);
7232 save_size = d;
7233 frame_size = rounded_frame_size (d);
7235 if (TARGET_SH5)
7237 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7238 int total_size;
7239 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7240 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7241 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7243 total_size = d + tregs_space;
7244 total_size += rounded_frame_size (total_size);
7245 save_size = total_size - frame_size;
7247 /* If adjusting the stack in a single step costs nothing extra, do so.
7248 I.e. either if a single addi is enough, or we need a movi anyway,
7249 and we don't exceed the maximum offset range (the test for the
7250 latter is conservative for simplicity). */
7251 if (TARGET_SHMEDIA
7252 && ! frame_pointer_needed
7253 && (CONST_OK_FOR_I10 (total_size)
7254 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7255 && total_size <= 2044)))
7256 d_rounding = frame_size;
7258 frame_size -= d_rounding;
7261 if (frame_pointer_needed)
7263 /* We must avoid scheduling the epilogue with previous basic blocks.
7264 See PR/18032 and PR/40313. */
7265 emit_insn (gen_blockage ());
7266 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7267 &live_regs_mask, false);
7269 /* We must avoid moving the stack pointer adjustment past code
7270 which reads from the local frame, else an interrupt could
7271 occur after the SP adjustment and clobber data in the local
7272 frame. */
7273 emit_insn (gen_blockage ());
7274 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7276 else if (frame_size)
7278 /* We must avoid moving the stack pointer adjustment past code
7279 which reads from the local frame, else an interrupt could
7280 occur after the SP adjustment and clobber data in the local
7281 frame. */
7282 emit_insn (gen_blockage ());
7283 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7284 &live_regs_mask, false);
7287 if (SHMEDIA_REGS_STACK_ADJUST ())
7289 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7290 (TARGET_FPU_ANY
7291 ? "__GCC_pop_shmedia_regs"
7292 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7293 /* This must NOT go through the PLT, otherwise mach and macl
7294 may be clobbered. */
7295 emit_insn (gen_shmedia_save_restore_regs_compact
7296 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7299 /* Pop all the registers. */
7301 if (target_flags != save_flags && ! current_function_interrupt)
7302 emit_insn (gen_toggle_sz ());
7303 if (TARGET_SH5)
7305 int offset_base, offset;
7306 int offset_in_r0 = -1;
7307 int sp_in_r0 = 0;
7308 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7309 save_schedule schedule;
7310 save_entry *entry;
7311 int *tmp_pnt;
7313 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7314 offset_base = -entry[1].offset + d_rounding;
7315 tmp_pnt = schedule.temps;
7316 for (; entry->mode != VOIDmode; entry--)
7318 enum machine_mode mode = (enum machine_mode) entry->mode;
7319 int reg = entry->reg;
7320 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7322 offset = offset_base + entry->offset;
7323 reg_rtx = gen_rtx_REG (mode, reg);
7325 mem_rtx = gen_frame_mem (mode,
7326 gen_rtx_PLUS (Pmode,
7327 stack_pointer_rtx,
7328 GEN_INT (offset)));
7330 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7331 mem_rtx = NULL_RTX;
7333 if (HAVE_POST_INCREMENT
7334 && (offset == offset_in_r0
7335 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7336 && mem_rtx == NULL_RTX)
7337 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7339 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7341 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7342 post_inc = NULL_RTX;
7343 else
7344 mem_rtx = NULL_RTX;
7347 if (mem_rtx != NULL_RTX)
7348 goto addr_ok;
7350 if (offset_in_r0 == -1)
7352 emit_move_insn (r0, GEN_INT (offset));
7353 offset_in_r0 = offset;
7355 else if (offset != offset_in_r0)
7357 emit_move_insn (r0,
7358 gen_rtx_PLUS
7359 (Pmode, r0,
7360 GEN_INT (offset - offset_in_r0)));
7361 offset_in_r0 += offset - offset_in_r0;
7364 if (post_inc != NULL_RTX)
7366 if (! sp_in_r0)
7368 emit_move_insn (r0,
7369 gen_rtx_PLUS
7370 (Pmode, r0, stack_pointer_rtx));
7371 sp_in_r0 = 1;
7374 mem_rtx = post_inc;
7376 offset_in_r0 += GET_MODE_SIZE (mode);
7378 else if (sp_in_r0)
7379 mem_rtx = gen_frame_mem (mode, r0);
7380 else
7381 mem_rtx = gen_frame_mem (mode,
7382 gen_rtx_PLUS (Pmode,
7383 stack_pointer_rtx,
7384 r0));
7386 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7387 || mem_rtx == post_inc);
7389 addr_ok:
7390 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7391 && mem_rtx != post_inc)
7393 emit_move_insn (r0, mem_rtx);
7394 mem_rtx = r0;
7396 else if (TARGET_REGISTER_P (reg))
7398 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7400 /* Give the scheduler a bit of freedom by using up to
7401 MAX_TEMPS registers in a round-robin fashion. */
7402 emit_move_insn (tmp_reg, mem_rtx);
7403 mem_rtx = tmp_reg;
7404 if (*++tmp_pnt < 0)
7405 tmp_pnt = schedule.temps;
7408 emit_move_insn (reg_rtx, mem_rtx);
7411 gcc_assert (entry->offset + offset_base == d + d_rounding);
7413 else /* ! TARGET_SH5 */
7415 int last_reg;
7417 save_size = 0;
7418 /* For an ISR with RESBANK attribute assigned, don't pop PR
7419 register. */
7420 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7421 && !sh_cfun_resbank_handler_p ())
7423 if (!frame_pointer_needed)
7424 emit_insn (gen_blockage ());
7425 pop (PR_REG);
7428 /* Banked registers are popped first to avoid being scheduled in the
7429 delay slot. RTE switches banks before the ds instruction. */
7430 if (current_function_interrupt)
7432 bool use_movml = false;
7434 if (TARGET_SH2A)
7436 unsigned int count = 0;
7438 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7439 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7440 count++;
7441 else
7442 break;
7444 /* Use movml when all banked register are poped. */
7445 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7446 use_movml = true;
7449 if (use_movml)
7451 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7453 /* We must avoid scheduling multiple load insn with another
7454 insns. */
7455 emit_insn (gen_blockage ());
7456 emit_insn (gen_movml_pop_banked (sp_reg));
7457 emit_insn (gen_blockage ());
7459 else
7460 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7461 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7462 pop (i);
7464 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7466 else
7467 last_reg = FIRST_PSEUDO_REGISTER;
7469 for (i = 0; i < last_reg; i++)
7471 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7473 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7474 && hard_reg_set_intersect_p (live_regs_mask,
7475 reg_class_contents[DF_REGS]))
7476 fpscr_deferred = 1;
7477 /* For an ISR with RESBANK attribute assigned, don't pop
7478 following registers, R0-R14, MACH, MACL and GBR. */
7479 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7480 && ! (sh_cfun_resbank_handler_p ()
7481 && ((j >= FIRST_GENERAL_REG
7482 && j < LAST_GENERAL_REG)
7483 || j == MACH_REG
7484 || j == MACL_REG
7485 || j == GBR_REG)))
7486 pop (j);
7488 if (j == FIRST_FP_REG && fpscr_deferred)
7489 pop (FPSCR_REG);
7492 if (target_flags != save_flags && ! current_function_interrupt)
7493 emit_insn (gen_toggle_sz ());
7494 target_flags = save_flags;
7496 output_stack_adjust (crtl->args.pretend_args_size
7497 + save_size + d_rounding
7498 + crtl->args.info.stack_regs * 8,
7499 stack_pointer_rtx, e, NULL, false);
7501 if (crtl->calls_eh_return)
7502 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7503 EH_RETURN_STACKADJ_RTX));
7505 /* Switch back to the normal stack if necessary. */
7506 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7507 emit_insn (gen_sp_switch_2 ());
7509 /* Tell flow the insn that pops PR isn't dead. */
7510 /* PR_REG will never be live in SHmedia mode, and we don't need to
7511 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7512 by the return pattern. */
7513 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7514 emit_use (gen_rtx_REG (SImode, PR_REG));
7517 static int sh_need_epilogue_known = 0;
7520 sh_need_epilogue (void)
7522 if (! sh_need_epilogue_known)
7524 rtx epilogue;
7526 start_sequence ();
7527 sh_expand_epilogue (0);
7528 epilogue = get_insns ();
7529 end_sequence ();
7530 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7532 return sh_need_epilogue_known > 0;
7535 /* Emit code to change the current function's return address to RA.
7536 TEMP is available as a scratch register, if needed. */
7538 void
7539 sh_set_return_address (rtx ra, rtx tmp)
7541 HARD_REG_SET live_regs_mask;
7542 int d;
7543 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7544 int pr_offset;
7546 d = calc_live_regs (&live_regs_mask);
7548 /* If pr_reg isn't life, we can set it (or the register given in
7549 sh_media_register_for_return) directly. */
7550 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7552 rtx rr;
7554 if (TARGET_SHMEDIA)
7556 int rr_regno = sh_media_register_for_return ();
7558 if (rr_regno < 0)
7559 rr_regno = pr_reg;
7561 rr = gen_rtx_REG (DImode, rr_regno);
7563 else
7564 rr = gen_rtx_REG (SImode, pr_reg);
7566 emit_insn (GEN_MOV (rr, ra));
7567 /* Tell flow the register for return isn't dead. */
7568 emit_use (rr);
7569 return;
7572 if (TARGET_SH5)
7574 int offset;
7575 save_schedule schedule;
7576 save_entry *entry;
7578 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7579 offset = entry[1].offset;
7580 for (; entry->mode != VOIDmode; entry--)
7581 if (entry->reg == pr_reg)
7582 goto found;
7584 /* We can't find pr register. */
7585 gcc_unreachable ();
7587 found:
7588 offset = entry->offset - offset;
7589 pr_offset = (rounded_frame_size (d) + offset
7590 + SHMEDIA_REGS_STACK_ADJUST ());
7592 else
7593 pr_offset = rounded_frame_size (d);
7595 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7597 if (frame_pointer_needed)
7598 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7599 else
7600 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7602 tmp = gen_frame_mem (Pmode, tmp);
7603 emit_insn (GEN_MOV (tmp, ra));
7604 /* Tell this store isn't dead. */
7605 emit_use (tmp);
7608 /* Clear variables at function end. */
7610 static void
7611 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7612 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7614 sh_need_epilogue_known = 0;
7617 static rtx
7618 sh_builtin_saveregs (void)
7620 /* First unnamed integer register. */
7621 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7622 /* Number of integer registers we need to save. */
7623 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7624 /* First unnamed SFmode float reg */
7625 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7626 /* Number of SFmode float regs to save. */
7627 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7628 rtx regbuf, fpregs;
7629 int bufsize, regno;
7630 alias_set_type alias_set;
7632 if (TARGET_SH5)
7634 if (n_intregs)
7636 int pushregs = n_intregs;
7638 while (pushregs < NPARM_REGS (SImode) - 1
7639 && (CALL_COOKIE_INT_REG_GET
7640 (crtl->args.info.call_cookie,
7641 NPARM_REGS (SImode) - pushregs)
7642 == 1))
7644 crtl->args.info.call_cookie
7645 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7646 - pushregs, 1);
7647 pushregs++;
7650 if (pushregs == NPARM_REGS (SImode))
7651 crtl->args.info.call_cookie
7652 |= (CALL_COOKIE_INT_REG (0, 1)
7653 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7654 else
7655 crtl->args.info.call_cookie
7656 |= CALL_COOKIE_STACKSEQ (pushregs);
7658 crtl->args.pretend_args_size += 8 * n_intregs;
7660 if (TARGET_SHCOMPACT)
7661 return const0_rtx;
7664 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7666 error ("__builtin_saveregs not supported by this subtarget");
7667 return const0_rtx;
7670 if (TARGET_SHMEDIA)
7671 n_floatregs = 0;
7673 /* Allocate block of memory for the regs. */
7674 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7675 Or can assign_stack_local accept a 0 SIZE argument? */
7676 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7678 if (TARGET_SHMEDIA)
7679 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7680 else if (n_floatregs & 1)
7682 rtx addr;
7684 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7685 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7686 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7687 regbuf = change_address (regbuf, BLKmode, addr);
7689 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7691 rtx addr, mask;
7693 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7694 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7695 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7696 emit_insn (gen_andsi3 (addr, addr, mask));
7697 regbuf = change_address (regbuf, BLKmode, addr);
7699 else
7700 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7701 alias_set = get_varargs_alias_set ();
7702 set_mem_alias_set (regbuf, alias_set);
7704 /* Save int args.
7705 This is optimized to only save the regs that are necessary. Explicitly
7706 named args need not be saved. */
7707 if (n_intregs > 0)
7708 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7709 adjust_address (regbuf, BLKmode,
7710 n_floatregs * UNITS_PER_WORD),
7711 n_intregs);
7713 if (TARGET_SHMEDIA)
7714 /* Return the address of the regbuf. */
7715 return XEXP (regbuf, 0);
7717 /* Save float args.
7718 This is optimized to only save the regs that are necessary. Explicitly
7719 named args need not be saved.
7720 We explicitly build a pointer to the buffer because it halves the insn
7721 count when not optimizing (otherwise the pointer is built for each reg
7722 saved).
7723 We emit the moves in reverse order so that we can use predecrement. */
7725 fpregs = copy_to_mode_reg (Pmode,
7726 plus_constant (XEXP (regbuf, 0),
7727 n_floatregs * UNITS_PER_WORD));
7728 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7730 rtx mem;
7731 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7733 emit_insn (gen_addsi3 (fpregs, fpregs,
7734 GEN_INT (-2 * UNITS_PER_WORD)));
7735 mem = change_address (regbuf, DFmode, fpregs);
7736 emit_move_insn (mem,
7737 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7739 regno = first_floatreg;
7740 if (regno & 1)
7742 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7743 mem = change_address (regbuf, SFmode, fpregs);
7744 emit_move_insn (mem,
7745 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7746 - (TARGET_LITTLE_ENDIAN != 0)));
7749 else
7750 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7752 rtx mem;
7754 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7755 mem = change_address (regbuf, SFmode, fpregs);
7756 emit_move_insn (mem,
7757 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7760 /* Return the address of the regbuf. */
7761 return XEXP (regbuf, 0);
7764 /* Define the `__builtin_va_list' type for the ABI. */
7766 static tree
7767 sh_build_builtin_va_list (void)
7769 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7770 tree record, type_decl;
7772 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7773 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7774 return ptr_type_node;
7776 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7777 type_decl = build_decl (BUILTINS_LOCATION,
7778 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7780 f_next_o = build_decl (BUILTINS_LOCATION,
7781 FIELD_DECL, get_identifier ("__va_next_o"),
7782 ptr_type_node);
7783 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7784 FIELD_DECL,
7785 get_identifier ("__va_next_o_limit"),
7786 ptr_type_node);
7787 f_next_fp = build_decl (BUILTINS_LOCATION,
7788 FIELD_DECL, get_identifier ("__va_next_fp"),
7789 ptr_type_node);
7790 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7791 FIELD_DECL,
7792 get_identifier ("__va_next_fp_limit"),
7793 ptr_type_node);
7794 f_next_stack = build_decl (BUILTINS_LOCATION,
7795 FIELD_DECL, get_identifier ("__va_next_stack"),
7796 ptr_type_node);
7798 DECL_FIELD_CONTEXT (f_next_o) = record;
7799 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7800 DECL_FIELD_CONTEXT (f_next_fp) = record;
7801 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7802 DECL_FIELD_CONTEXT (f_next_stack) = record;
7804 TYPE_STUB_DECL (record) = type_decl;
7805 TYPE_NAME (record) = type_decl;
7806 TYPE_FIELDS (record) = f_next_o;
7807 DECL_CHAIN (f_next_o) = f_next_o_limit;
7808 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7809 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7810 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7812 layout_type (record);
7814 return record;
7817 /* Implement `va_start' for varargs and stdarg. */
7819 static void
7820 sh_va_start (tree valist, rtx nextarg)
7822 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7823 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7824 tree t, u;
7825 int nfp, nint;
7827 if (TARGET_SH5)
7829 expand_builtin_saveregs ();
7830 std_expand_builtin_va_start (valist, nextarg);
7831 return;
7834 if ((! TARGET_SH2E && ! TARGET_SH4)
7835 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7837 std_expand_builtin_va_start (valist, nextarg);
7838 return;
7841 f_next_o = TYPE_FIELDS (va_list_type_node);
7842 f_next_o_limit = DECL_CHAIN (f_next_o);
7843 f_next_fp = DECL_CHAIN (f_next_o_limit);
7844 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7845 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7847 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7848 NULL_TREE);
7849 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7850 valist, f_next_o_limit, NULL_TREE);
7851 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7852 NULL_TREE);
7853 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7854 valist, f_next_fp_limit, NULL_TREE);
7855 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7856 valist, f_next_stack, NULL_TREE);
7858 /* Call __builtin_saveregs. */
7859 u = make_tree (sizetype, expand_builtin_saveregs ());
7860 u = fold_convert (ptr_type_node, u);
7861 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7862 TREE_SIDE_EFFECTS (t) = 1;
7863 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7865 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7866 if (nfp < 8)
7867 nfp = 8 - nfp;
7868 else
7869 nfp = 0;
7870 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7871 size_int (UNITS_PER_WORD * nfp));
7872 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7873 TREE_SIDE_EFFECTS (t) = 1;
7874 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7876 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7877 TREE_SIDE_EFFECTS (t) = 1;
7878 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7880 nint = crtl->args.info.arg_count[SH_ARG_INT];
7881 if (nint < 4)
7882 nint = 4 - nint;
7883 else
7884 nint = 0;
7885 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7886 size_int (UNITS_PER_WORD * nint));
7887 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7888 TREE_SIDE_EFFECTS (t) = 1;
7889 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7891 u = make_tree (ptr_type_node, nextarg);
7892 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7893 TREE_SIDE_EFFECTS (t) = 1;
7894 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7897 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7898 member, return it. */
7899 static tree
7900 find_sole_member (tree type)
7902 tree field, member = NULL_TREE;
7904 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7906 if (TREE_CODE (field) != FIELD_DECL)
7907 continue;
7908 if (!DECL_SIZE (field))
7909 return NULL_TREE;
7910 if (integer_zerop (DECL_SIZE (field)))
7911 continue;
7912 if (member)
7913 return NULL_TREE;
7914 member = field;
7916 return member;
7918 /* Implement `va_arg'. */
7920 static tree
7921 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7922 gimple_seq *post_p ATTRIBUTE_UNUSED)
7924 HOST_WIDE_INT size, rsize;
7925 tree tmp, pptr_type_node;
7926 tree addr, lab_over = NULL, result = NULL;
7927 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7928 tree eff_type;
7930 if (pass_by_ref)
7931 type = build_pointer_type (type);
7933 size = int_size_in_bytes (type);
7934 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7935 pptr_type_node = build_pointer_type (ptr_type_node);
7937 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7938 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7940 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7941 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7942 int pass_as_float;
7943 tree lab_false;
7944 tree member;
7946 f_next_o = TYPE_FIELDS (va_list_type_node);
7947 f_next_o_limit = DECL_CHAIN (f_next_o);
7948 f_next_fp = DECL_CHAIN (f_next_o_limit);
7949 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7950 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7952 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7953 NULL_TREE);
7954 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7955 valist, f_next_o_limit, NULL_TREE);
7956 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7957 valist, f_next_fp, NULL_TREE);
7958 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7959 valist, f_next_fp_limit, NULL_TREE);
7960 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7961 valist, f_next_stack, NULL_TREE);
7963 /* Structures with a single member with a distinct mode are passed
7964 like their member. This is relevant if the latter has a REAL_TYPE
7965 or COMPLEX_TYPE type. */
7966 eff_type = type;
7967 while (TREE_CODE (eff_type) == RECORD_TYPE
7968 && (member = find_sole_member (eff_type))
7969 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7970 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7971 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7973 tree field_type = TREE_TYPE (member);
7975 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7976 eff_type = field_type;
7977 else
7979 gcc_assert ((TYPE_ALIGN (eff_type)
7980 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7981 || (TYPE_ALIGN (eff_type)
7982 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7983 break;
7987 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7989 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7990 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7991 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7992 && size <= 16));
7994 else
7996 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7999 addr = create_tmp_var (pptr_type_node, NULL);
8000 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8001 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8003 valist = build_simple_mem_ref (addr);
8005 if (pass_as_float)
8007 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8008 tree cmp;
8009 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8011 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8012 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8014 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8015 tmp = next_fp_limit;
8016 if (size > 4 && !is_double)
8017 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
8018 unshare_expr (tmp), size_int (4 - size));
8019 tmp = build2 (GE_EXPR, boolean_type_node,
8020 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8021 cmp = build3 (COND_EXPR, void_type_node, tmp,
8022 build1 (GOTO_EXPR, void_type_node,
8023 unshare_expr (lab_false)), NULL_TREE);
8024 if (!is_double)
8025 gimplify_and_add (cmp, pre_p);
8027 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8028 || (is_double || size == 16))
8030 tmp = fold_convert (sizetype, next_fp_tmp);
8031 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8032 size_int (UNITS_PER_WORD));
8033 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8034 unshare_expr (next_fp_tmp), tmp);
8035 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8037 if (is_double)
8038 gimplify_and_add (cmp, pre_p);
8040 #ifdef FUNCTION_ARG_SCmode_WART
8041 if (TYPE_MODE (eff_type) == SCmode
8042 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8044 tree subtype = TREE_TYPE (eff_type);
8045 tree real, imag;
8047 imag
8048 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8049 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8051 real
8052 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8053 real = get_initialized_tmp_var (real, pre_p, NULL);
8055 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8056 if (type != eff_type)
8057 result = build1 (VIEW_CONVERT_EXPR, type, result);
8058 result = get_initialized_tmp_var (result, pre_p, NULL);
8060 #endif /* FUNCTION_ARG_SCmode_WART */
8062 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8063 gimplify_and_add (tmp, pre_p);
8065 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8066 gimplify_and_add (tmp, pre_p);
8068 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8069 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8070 gimplify_assign (unshare_expr (next_fp_tmp),
8071 unshare_expr (valist), pre_p);
8073 gimplify_assign (unshare_expr (valist),
8074 unshare_expr (next_fp_tmp), post_p);
8075 valist = next_fp_tmp;
8077 else
8079 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8080 unshare_expr (next_o), size_int (rsize));
8081 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8082 unshare_expr (next_o_limit));
8083 tmp = build3 (COND_EXPR, void_type_node, tmp,
8084 build1 (GOTO_EXPR, void_type_node,
8085 unshare_expr (lab_false)),
8086 NULL_TREE);
8087 gimplify_and_add (tmp, pre_p);
8089 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8090 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8092 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8093 gimplify_and_add (tmp, pre_p);
8095 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8096 gimplify_and_add (tmp, pre_p);
8098 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8099 gimplify_assign (unshare_expr (next_o),
8100 unshare_expr (next_o_limit), pre_p);
8102 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8103 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8106 if (!result)
8108 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8109 gimplify_and_add (tmp, pre_p);
8113 /* ??? In va-sh.h, there had been code to make values larger than
8114 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8116 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8117 if (result)
8119 gimplify_assign (result, tmp, pre_p);
8120 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8121 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8122 gimplify_and_add (tmp, pre_p);
8124 else
8125 result = tmp;
8127 if (pass_by_ref)
8128 result = build_va_arg_indirect_ref (result);
8130 return result;
8133 /* 64 bit floating points memory transfers are paired single precision loads
8134 or store. So DWARF information needs fixing in little endian (unless
8135 PR=SZ=1 in FPSCR). */
8137 sh_dwarf_register_span (rtx reg)
8139 unsigned regno = REGNO (reg);
8141 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8142 return NULL_RTX;
8144 return
8145 gen_rtx_PARALLEL (VOIDmode,
8146 gen_rtvec (2,
8147 gen_rtx_REG (SFmode,
8148 DBX_REGISTER_NUMBER (regno+1)),
8149 gen_rtx_REG (SFmode,
8150 DBX_REGISTER_NUMBER (regno))));
8153 static enum machine_mode
8154 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8155 int *punsignedp, const_tree funtype,
8156 int for_return)
8158 if (sh_promote_prototypes (funtype))
8159 return promote_mode (type, mode, punsignedp);
8160 else
8161 return default_promote_function_mode (type, mode, punsignedp, funtype,
8162 for_return);
8165 static bool
8166 sh_promote_prototypes (const_tree type)
8168 if (TARGET_HITACHI)
8169 return 0;
8170 if (! type)
8171 return 1;
8172 return ! sh_attr_renesas_p (type);
8175 /* Whether an argument must be passed by reference. On SHcompact, we
8176 pretend arguments wider than 32-bits that would have been passed in
8177 registers are passed by reference, so that an SHmedia trampoline
8178 loads them into the full 64-bits registers. */
8180 static int
8181 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8182 const_tree type, bool named)
8184 unsigned HOST_WIDE_INT size;
8186 if (type)
8187 size = int_size_in_bytes (type);
8188 else
8189 size = GET_MODE_SIZE (mode);
8191 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8192 && (!named
8193 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8194 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8195 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8196 && size > 4
8197 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8198 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8199 return size;
8200 else
8201 return 0;
8204 static bool
8205 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8206 const_tree type, bool named)
8208 if (targetm.calls.must_pass_in_stack (mode, type))
8209 return true;
8211 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8212 wants to know about pass-by-reference semantics for incoming
8213 arguments. */
8214 if (! cum)
8215 return false;
8217 if (TARGET_SHCOMPACT)
8219 cum->byref = shcompact_byref (cum, mode, type, named);
8220 return cum->byref != 0;
8223 return false;
8226 static bool
8227 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8228 const_tree type, bool named ATTRIBUTE_UNUSED)
8230 /* ??? How can it possibly be correct to return true only on the
8231 caller side of the equation? Is there someplace else in the
8232 sh backend that's magically producing the copies? */
8233 return (cum->outgoing
8234 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8235 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8238 static int
8239 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8240 tree type, bool named ATTRIBUTE_UNUSED)
8242 int words = 0;
8244 if (!TARGET_SH5
8245 && PASS_IN_REG_P (*cum, mode, type)
8246 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8247 && (ROUND_REG (*cum, mode)
8248 + (mode != BLKmode
8249 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8250 : ROUND_ADVANCE (int_size_in_bytes (type)))
8251 > NPARM_REGS (mode)))
8252 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8254 else if (!TARGET_SHCOMPACT
8255 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8256 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8258 return words * UNITS_PER_WORD;
8262 /* Define where to put the arguments to a function.
8263 Value is zero to push the argument on the stack,
8264 or a hard register in which to store the argument.
8266 MODE is the argument's machine mode.
8267 TYPE is the data type of the argument (as a tree).
8268 This is null for libcalls where that information may
8269 not be available.
8270 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8271 the preceding args and about the function being called.
8272 NAMED is nonzero if this argument is a named parameter
8273 (otherwise it is an extra parameter matching an ellipsis).
8275 On SH the first args are normally in registers
8276 and the rest are pushed. Any arg that starts within the first
8277 NPARM_REGS words is at least partially passed in a register unless
8278 its data type forbids. */
8280 static rtx
8281 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8282 const_tree type, bool named)
8284 if (! TARGET_SH5 && mode == VOIDmode)
8285 return GEN_INT (ca->renesas_abi ? 1 : 0);
8287 if (! TARGET_SH5
8288 && PASS_IN_REG_P (*ca, mode, type)
8289 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8291 int regno;
8293 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8294 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8296 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8297 gen_rtx_REG (SFmode,
8298 BASE_ARG_REG (mode)
8299 + (ROUND_REG (*ca, mode) ^ 1)),
8300 const0_rtx);
8301 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8302 gen_rtx_REG (SFmode,
8303 BASE_ARG_REG (mode)
8304 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8305 GEN_INT (4));
8306 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8309 /* If the alignment of a DF value causes an SF register to be
8310 skipped, we will use that skipped register for the next SF
8311 value. */
8312 if ((TARGET_HITACHI || ca->renesas_abi)
8313 && ca->free_single_fp_reg
8314 && mode == SFmode)
8315 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8317 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8318 ^ (mode == SFmode && TARGET_SH4
8319 && TARGET_LITTLE_ENDIAN != 0
8320 && ! TARGET_HITACHI && ! ca->renesas_abi);
8321 return gen_rtx_REG (mode, regno);
8325 if (TARGET_SH5)
8327 if (mode == VOIDmode && TARGET_SHCOMPACT)
8328 return GEN_INT (ca->call_cookie);
8330 /* The following test assumes unnamed arguments are promoted to
8331 DFmode. */
8332 if (mode == SFmode && ca->free_single_fp_reg)
8333 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8335 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8336 && (named || ! ca->prototype_p)
8337 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8339 if (! ca->prototype_p && TARGET_SHMEDIA)
8340 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8342 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8343 FIRST_FP_PARM_REG
8344 + ca->arg_count[(int) SH_ARG_FLOAT]);
8347 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8348 && (! TARGET_SHCOMPACT
8349 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8350 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8351 type, named))))
8353 return gen_rtx_REG (mode, (FIRST_PARM_REG
8354 + ca->arg_count[(int) SH_ARG_INT]));
8357 return 0;
8360 return 0;
8363 /* Update the data in CUM to advance over an argument
8364 of mode MODE and data type TYPE.
8365 (TYPE is null for libcalls where that information may not be
8366 available.) */
8368 static void
8369 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8370 const_tree type, bool named)
8372 if (ca->force_mem)
8373 ca->force_mem = 0;
8374 else if (TARGET_SH5)
8376 const_tree type2 = (ca->byref && type
8377 ? TREE_TYPE (type)
8378 : type);
8379 enum machine_mode mode2 = (ca->byref && type
8380 ? TYPE_MODE (type2)
8381 : mode);
8382 int dwords = ((ca->byref
8383 ? ca->byref
8384 : mode2 == BLKmode
8385 ? int_size_in_bytes (type2)
8386 : GET_MODE_SIZE (mode2)) + 7) / 8;
8387 int numregs = MIN (dwords, NPARM_REGS (SImode)
8388 - ca->arg_count[(int) SH_ARG_INT]);
8390 if (numregs)
8392 ca->arg_count[(int) SH_ARG_INT] += numregs;
8393 if (TARGET_SHCOMPACT
8394 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8396 ca->call_cookie
8397 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8398 - numregs, 1);
8399 /* N.B. We want this also for outgoing. */
8400 ca->stack_regs += numregs;
8402 else if (ca->byref)
8404 if (! ca->outgoing)
8405 ca->stack_regs += numregs;
8406 ca->byref_regs += numregs;
8407 ca->byref = 0;
8409 ca->call_cookie
8410 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8411 - numregs, 2);
8412 while (--numregs);
8413 ca->call_cookie
8414 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8415 - 1, 1);
8417 else if (dwords > numregs)
8419 int pushregs = numregs;
8421 if (TARGET_SHCOMPACT)
8422 ca->stack_regs += numregs;
8423 while (pushregs < NPARM_REGS (SImode) - 1
8424 && (CALL_COOKIE_INT_REG_GET
8425 (ca->call_cookie,
8426 NPARM_REGS (SImode) - pushregs)
8427 == 1))
8429 ca->call_cookie
8430 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8431 - pushregs, 1);
8432 pushregs++;
8434 if (numregs == NPARM_REGS (SImode))
8435 ca->call_cookie
8436 |= CALL_COOKIE_INT_REG (0, 1)
8437 | CALL_COOKIE_STACKSEQ (numregs - 1);
8438 else
8439 ca->call_cookie
8440 |= CALL_COOKIE_STACKSEQ (numregs);
8443 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8444 && (named || ! ca->prototype_p))
8446 if (mode2 == SFmode && ca->free_single_fp_reg)
8447 ca->free_single_fp_reg = 0;
8448 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8449 < NPARM_REGS (SFmode))
8451 int numfpregs
8452 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8453 NPARM_REGS (SFmode)
8454 - ca->arg_count[(int) SH_ARG_FLOAT]);
8456 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8458 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8460 if (ca->outgoing && numregs > 0)
8463 ca->call_cookie
8464 |= (CALL_COOKIE_INT_REG
8465 (ca->arg_count[(int) SH_ARG_INT]
8466 - numregs + ((numfpregs - 2) / 2),
8467 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8468 - numfpregs) / 2));
8470 while (numfpregs -= 2);
8472 else if (mode2 == SFmode && (named)
8473 && (ca->arg_count[(int) SH_ARG_FLOAT]
8474 < NPARM_REGS (SFmode)))
8475 ca->free_single_fp_reg
8476 = FIRST_FP_PARM_REG - numfpregs
8477 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8480 return;
8483 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8485 /* Note that we've used the skipped register. */
8486 if (mode == SFmode && ca->free_single_fp_reg)
8488 ca->free_single_fp_reg = 0;
8489 return;
8491 /* When we have a DF after an SF, there's an SF register that get
8492 skipped in order to align the DF value. We note this skipped
8493 register, because the next SF value will use it, and not the
8494 SF that follows the DF. */
8495 if (mode == DFmode
8496 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8498 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8499 + BASE_ARG_REG (mode));
8503 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8504 || PASS_IN_REG_P (*ca, mode, type))
8505 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8506 = (ROUND_REG (*ca, mode)
8507 + (mode == BLKmode
8508 ? ROUND_ADVANCE (int_size_in_bytes (type))
8509 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8512 /* The Renesas calling convention doesn't quite fit into this scheme since
8513 the address is passed like an invisible argument, but one that is always
8514 passed in memory. */
8515 static rtx
8516 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8518 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8519 return 0;
8520 return gen_rtx_REG (Pmode, 2);
8523 /* Worker function for TARGET_FUNCTION_VALUE.
8525 For the SH, this is like LIBCALL_VALUE, except that we must change the
8526 mode like PROMOTE_MODE does.
8527 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8528 tested here has to be kept in sync with the one in explow.c:promote_mode.
8531 static rtx
8532 sh_function_value (const_tree valtype,
8533 const_tree fn_decl_or_type,
8534 bool outgoing ATTRIBUTE_UNUSED)
8536 if (fn_decl_or_type
8537 && !DECL_P (fn_decl_or_type))
8538 fn_decl_or_type = NULL;
8540 return gen_rtx_REG (
8541 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8542 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8543 && (TREE_CODE (valtype) == INTEGER_TYPE
8544 || TREE_CODE (valtype) == ENUMERAL_TYPE
8545 || TREE_CODE (valtype) == BOOLEAN_TYPE
8546 || TREE_CODE (valtype) == REAL_TYPE
8547 || TREE_CODE (valtype) == OFFSET_TYPE))
8548 && sh_promote_prototypes (fn_decl_or_type)
8549 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8550 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8553 /* Worker function for TARGET_LIBCALL_VALUE. */
8555 static rtx
8556 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8558 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8561 /* Return true if N is a possible register number of function value. */
8563 static bool
8564 sh_function_value_regno_p (const unsigned int regno)
8566 return ((regno) == FIRST_RET_REG
8567 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8568 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8571 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8573 static bool
8574 sh_return_in_memory (const_tree type, const_tree fndecl)
8576 if (TARGET_SH5)
8578 if (TYPE_MODE (type) == BLKmode)
8579 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8580 else
8581 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8583 else
8585 return (TYPE_MODE (type) == BLKmode
8586 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8587 && TREE_CODE (type) == RECORD_TYPE));
8591 /* We actually emit the code in sh_expand_prologue. We used to use
8592 a static variable to flag that we need to emit this code, but that
8593 doesn't when inlining, when functions are deferred and then emitted
8594 later. Fortunately, we already have two flags that are part of struct
8595 function that tell if a function uses varargs or stdarg. */
8596 static void
8597 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8598 enum machine_mode mode,
8599 tree type,
8600 int *pretend_arg_size,
8601 int second_time ATTRIBUTE_UNUSED)
8603 gcc_assert (cfun->stdarg);
8604 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8606 int named_parm_regs, anon_parm_regs;
8608 named_parm_regs = (ROUND_REG (*ca, mode)
8609 + (mode == BLKmode
8610 ? ROUND_ADVANCE (int_size_in_bytes (type))
8611 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8612 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8613 if (anon_parm_regs > 0)
8614 *pretend_arg_size = anon_parm_regs * 4;
8618 static bool
8619 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8621 return TARGET_SH5;
8624 static bool
8625 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8627 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8631 /* Define the offset between two registers, one to be eliminated, and
8632 the other its replacement, at the start of a routine. */
8635 initial_elimination_offset (int from, int to)
8637 int regs_saved;
8638 int regs_saved_rounding = 0;
8639 int total_saved_regs_space;
8640 int total_auto_space;
8641 int save_flags = target_flags;
8642 int copy_flags;
8643 HARD_REG_SET live_regs_mask;
8645 shmedia_space_reserved_for_target_registers = false;
8646 regs_saved = calc_live_regs (&live_regs_mask);
8647 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8649 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8651 shmedia_space_reserved_for_target_registers = true;
8652 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8655 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8656 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8657 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8659 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8660 copy_flags = target_flags;
8661 target_flags = save_flags;
8663 total_saved_regs_space = regs_saved + regs_saved_rounding;
8665 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8666 return total_saved_regs_space + total_auto_space
8667 + crtl->args.info.byref_regs * 8;
8669 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8670 return total_saved_regs_space + total_auto_space
8671 + crtl->args.info.byref_regs * 8;
8673 /* Initial gap between fp and sp is 0. */
8674 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8675 return 0;
8677 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8678 return rounded_frame_size (0);
8680 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8681 return rounded_frame_size (0);
8683 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8684 && (to == HARD_FRAME_POINTER_REGNUM
8685 || to == STACK_POINTER_REGNUM));
8686 if (TARGET_SH5)
8688 int n = total_saved_regs_space;
8689 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8690 save_schedule schedule;
8691 save_entry *entry;
8693 n += total_auto_space;
8695 /* If it wasn't saved, there's not much we can do. */
8696 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8697 return n;
8699 target_flags = copy_flags;
8701 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8702 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8703 if (entry->reg == pr_reg)
8705 target_flags = save_flags;
8706 return entry->offset;
8708 gcc_unreachable ();
8710 else
8711 return total_auto_space;
8714 /* Parse the -mfixed-range= option string. */
8715 void
8716 sh_fix_range (const char *const_str)
8718 int i, first, last;
8719 char *str, *dash, *comma;
8721 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8722 REG2 are either register names or register numbers. The effect
8723 of this option is to mark the registers in the range from REG1 to
8724 REG2 as ``fixed'' so they won't be used by the compiler. */
8726 i = strlen (const_str);
8727 str = (char *) alloca (i + 1);
8728 memcpy (str, const_str, i + 1);
8730 while (1)
8732 dash = strchr (str, '-');
8733 if (!dash)
8735 warning (0, "value of -mfixed-range must have form REG1-REG2");
8736 return;
8738 *dash = '\0';
8739 comma = strchr (dash + 1, ',');
8740 if (comma)
8741 *comma = '\0';
8743 first = decode_reg_name (str);
8744 if (first < 0)
8746 warning (0, "unknown register name: %s", str);
8747 return;
8750 last = decode_reg_name (dash + 1);
8751 if (last < 0)
8753 warning (0, "unknown register name: %s", dash + 1);
8754 return;
8757 *dash = '-';
8759 if (first > last)
8761 warning (0, "%s-%s is an empty range", str, dash + 1);
8762 return;
8765 for (i = first; i <= last; ++i)
8766 fixed_regs[i] = call_used_regs[i] = 1;
8768 if (!comma)
8769 break;
8771 *comma = ',';
8772 str = comma + 1;
8776 /* Insert any deferred function attributes from earlier pragmas. */
8777 static void
8778 sh_insert_attributes (tree node, tree *attributes)
8780 tree attrs;
8782 if (TREE_CODE (node) != FUNCTION_DECL)
8783 return;
8785 /* We are only interested in fields. */
8786 if (!DECL_P (node))
8787 return;
8789 /* Append the attributes to the deferred attributes. */
8790 *sh_deferred_function_attributes_tail = *attributes;
8791 attrs = sh_deferred_function_attributes;
8792 if (!attrs)
8793 return;
8795 /* Some attributes imply or require the interrupt attribute. */
8796 if (!lookup_attribute ("interrupt_handler", attrs)
8797 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8799 /* If we have a trapa_handler, but no interrupt_handler attribute,
8800 insert an interrupt_handler attribute. */
8801 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8802 /* We can't use sh_pr_interrupt here because that's not in the
8803 java frontend. */
8804 attrs
8805 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8806 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8807 if the interrupt attribute is missing, we ignore the attribute
8808 and warn. */
8809 else if (lookup_attribute ("sp_switch", attrs)
8810 || lookup_attribute ("trap_exit", attrs)
8811 || lookup_attribute ("nosave_low_regs", attrs)
8812 || lookup_attribute ("resbank", attrs))
8814 tree *tail;
8816 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8818 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8819 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8820 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8821 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8822 warning (OPT_Wattributes,
8823 "%qE attribute only applies to interrupt functions",
8824 TREE_PURPOSE (attrs));
8825 else
8827 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8828 NULL_TREE);
8829 tail = &TREE_CHAIN (*tail);
8832 attrs = *attributes;
8836 /* Install the processed list. */
8837 *attributes = attrs;
8839 /* Clear deferred attributes. */
8840 sh_deferred_function_attributes = NULL_TREE;
8841 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8843 return;
8846 /* Supported attributes:
8848 interrupt_handler -- specifies this function is an interrupt handler.
8850 trapa_handler - like above, but don't save all registers.
8852 sp_switch -- specifies an alternate stack for an interrupt handler
8853 to run on.
8855 trap_exit -- use a trapa to exit an interrupt function instead of
8856 an rte instruction.
8858 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8859 This is useful on the SH3 and upwards,
8860 which has a separate set of low regs for User and Supervisor modes.
8861 This should only be used for the lowest level of interrupts. Higher levels
8862 of interrupts must save the registers in case they themselves are
8863 interrupted.
8865 renesas -- use Renesas calling/layout conventions (functions and
8866 structures).
8868 resbank -- In case of an ISR, use a register bank to save registers
8869 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8872 /* Handle a 'resbank' attribute. */
8873 static tree
8874 sh_handle_resbank_handler_attribute (tree * node, tree name,
8875 tree args ATTRIBUTE_UNUSED,
8876 int flags ATTRIBUTE_UNUSED,
8877 bool * no_add_attrs)
8879 if (!TARGET_SH2A)
8881 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8882 name);
8883 *no_add_attrs = true;
8885 if (TREE_CODE (*node) != FUNCTION_DECL)
8887 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8888 name);
8889 *no_add_attrs = true;
8892 return NULL_TREE;
8895 /* Handle an "interrupt_handler" attribute; arguments as in
8896 struct attribute_spec.handler. */
8897 static tree
8898 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8899 tree args ATTRIBUTE_UNUSED,
8900 int flags ATTRIBUTE_UNUSED,
8901 bool *no_add_attrs)
8903 if (TREE_CODE (*node) != FUNCTION_DECL)
8905 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8906 name);
8907 *no_add_attrs = true;
8909 else if (TARGET_SHCOMPACT)
8911 error ("attribute interrupt_handler is not compatible with -m5-compact");
8912 *no_add_attrs = true;
8915 return NULL_TREE;
8918 /* Handle an 'function_vector' attribute; arguments as in
8919 struct attribute_spec.handler. */
8920 static tree
8921 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8922 tree args ATTRIBUTE_UNUSED,
8923 int flags ATTRIBUTE_UNUSED,
8924 bool * no_add_attrs)
8926 if (!TARGET_SH2A)
8928 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8929 name);
8930 *no_add_attrs = true;
8932 else if (TREE_CODE (*node) != FUNCTION_DECL)
8934 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8935 name);
8936 *no_add_attrs = true;
8938 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8940 /* The argument must be a constant integer. */
8941 warning (OPT_Wattributes,
8942 "%qE attribute argument not an integer constant",
8943 name);
8944 *no_add_attrs = true;
8946 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8948 /* The argument value must be between 0 to 255. */
8949 warning (OPT_Wattributes,
8950 "%qE attribute argument should be between 0 to 255",
8951 name);
8952 *no_add_attrs = true;
8954 return NULL_TREE;
8957 /* Returns 1 if current function has been assigned the attribute
8958 'function_vector'. */
8960 sh2a_is_function_vector_call (rtx x)
8962 if (GET_CODE (x) == SYMBOL_REF
8963 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8965 tree tr = SYMBOL_REF_DECL (x);
8967 if (sh2a_function_vector_p (tr))
8968 return 1;
8971 return 0;
8974 /* Returns the function vector number, if the the attribute
8975 'function_vector' is assigned, otherwise returns zero. */
8977 sh2a_get_function_vector_number (rtx x)
8979 int num;
8980 tree list, t;
8982 if ((GET_CODE (x) == SYMBOL_REF)
8983 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8985 t = SYMBOL_REF_DECL (x);
8987 if (TREE_CODE (t) != FUNCTION_DECL)
8988 return 0;
8990 list = SH_ATTRIBUTES (t);
8991 while (list)
8993 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8995 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8996 return num;
8999 list = TREE_CHAIN (list);
9002 return 0;
9004 else
9005 return 0;
9008 /* Handle an "sp_switch" attribute; arguments as in
9009 struct attribute_spec.handler. */
9010 static tree
9011 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9012 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9014 if (TREE_CODE (*node) != FUNCTION_DECL)
9016 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9017 name);
9018 *no_add_attrs = true;
9020 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9022 /* The argument must be a constant string. */
9023 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9024 name);
9025 *no_add_attrs = true;
9028 return NULL_TREE;
9031 /* Handle an "trap_exit" attribute; arguments as in
9032 struct attribute_spec.handler. */
9033 static tree
9034 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9035 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9037 if (TREE_CODE (*node) != FUNCTION_DECL)
9039 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9040 name);
9041 *no_add_attrs = true;
9043 /* The argument specifies a trap number to be used in a trapa instruction
9044 at function exit (instead of an rte instruction). */
9045 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9047 /* The argument must be a constant integer. */
9048 warning (OPT_Wattributes, "%qE attribute argument not an "
9049 "integer constant", name);
9050 *no_add_attrs = true;
9053 return NULL_TREE;
9056 static tree
9057 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9058 tree name ATTRIBUTE_UNUSED,
9059 tree args ATTRIBUTE_UNUSED,
9060 int flags ATTRIBUTE_UNUSED,
9061 bool *no_add_attrs ATTRIBUTE_UNUSED)
9063 return NULL_TREE;
9066 /* True if __attribute__((renesas)) or -mrenesas. */
9068 sh_attr_renesas_p (const_tree td)
9070 if (TARGET_HITACHI)
9071 return 1;
9072 if (td == 0)
9073 return 0;
9074 if (DECL_P (td))
9075 td = TREE_TYPE (td);
9076 if (td == error_mark_node)
9077 return 0;
9078 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9079 != NULL_TREE);
9082 /* True if __attribute__((renesas)) or -mrenesas, for the current
9083 function. */
9085 sh_cfun_attr_renesas_p (void)
9087 return sh_attr_renesas_p (current_function_decl);
9091 sh_cfun_interrupt_handler_p (void)
9093 return (lookup_attribute ("interrupt_handler",
9094 DECL_ATTRIBUTES (current_function_decl))
9095 != NULL_TREE);
9098 /* Returns 1 if FUNC has been assigned the attribute
9099 "function_vector". */
9101 sh2a_function_vector_p (tree func)
9103 tree list;
9104 if (TREE_CODE (func) != FUNCTION_DECL)
9105 return 0;
9107 list = SH_ATTRIBUTES (func);
9108 while (list)
9110 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9111 return 1;
9113 list = TREE_CHAIN (list);
9115 return 0;
9118 /* Returns TRUE if given tree has the "resbank" attribute. */
9121 sh_cfun_resbank_handler_p (void)
9123 return ((lookup_attribute ("resbank",
9124 DECL_ATTRIBUTES (current_function_decl))
9125 != NULL_TREE)
9126 && (lookup_attribute ("interrupt_handler",
9127 DECL_ATTRIBUTES (current_function_decl))
9128 != NULL_TREE) && TARGET_SH2A);
9131 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9133 static const char *
9134 sh_check_pch_target_flags (int old_flags)
9136 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9137 | MASK_SH_E | MASK_HARD_SH4
9138 | MASK_FPU_SINGLE | MASK_SH4))
9139 return _("created and used with different architectures / ABIs");
9140 if ((old_flags ^ target_flags) & MASK_HITACHI)
9141 return _("created and used with different ABIs");
9142 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9143 return _("created and used with different endianness");
9144 return NULL;
9147 /* Predicates used by the templates. */
9149 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9150 Used only in general_movsrc_operand. */
9153 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9155 switch (REGNO (op))
9157 case PR_REG:
9158 case MACL_REG:
9159 case MACH_REG:
9160 return 1;
9162 return 0;
9165 /* Nonzero if OP is a floating point value with value 0.0. */
9168 fp_zero_operand (rtx op)
9170 REAL_VALUE_TYPE r;
9172 if (GET_MODE (op) != SFmode)
9173 return 0;
9175 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9176 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9179 /* Nonzero if OP is a floating point value with value 1.0. */
9182 fp_one_operand (rtx op)
9184 REAL_VALUE_TYPE r;
9186 if (GET_MODE (op) != SFmode)
9187 return 0;
9189 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9190 return REAL_VALUES_EQUAL (r, dconst1);
9193 /* In general mode switching is used. If we are
9194 compiling without -mfmovd, movsf_ie isn't taken into account for
9195 mode switching. We could check in machine_dependent_reorg for
9196 cases where we know we are in single precision mode, but there is
9197 interface to find that out during reload, so we must avoid
9198 choosing an fldi alternative during reload and thus failing to
9199 allocate a scratch register for the constant loading. */
9201 fldi_ok (void)
9203 return 1;
9207 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9209 enum rtx_code code = GET_CODE (op);
9210 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9213 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9214 enum tls_model
9215 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9217 if (GET_CODE (op) != SYMBOL_REF)
9218 return TLS_MODEL_NONE;
9219 return SYMBOL_REF_TLS_MODEL (op);
9222 /* Return the destination address of a branch. */
9224 static int
9225 branch_dest (rtx branch)
9227 rtx dest = SET_SRC (PATTERN (branch));
9228 int dest_uid;
9230 if (GET_CODE (dest) == IF_THEN_ELSE)
9231 dest = XEXP (dest, 1);
9232 dest = XEXP (dest, 0);
9233 dest_uid = INSN_UID (dest);
9234 return INSN_ADDRESSES (dest_uid);
9237 /* Return nonzero if REG is not used after INSN.
9238 We assume REG is a reload reg, and therefore does
9239 not live past labels. It may live past calls or jumps though. */
9241 reg_unused_after (rtx reg, rtx insn)
9243 enum rtx_code code;
9244 rtx set;
9246 /* If the reg is set by this instruction, then it is safe for our
9247 case. Disregard the case where this is a store to memory, since
9248 we are checking a register used in the store address. */
9249 set = single_set (insn);
9250 if (set && !MEM_P (SET_DEST (set))
9251 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9252 return 1;
9254 while ((insn = NEXT_INSN (insn)))
9256 rtx set;
9257 if (!INSN_P (insn))
9258 continue;
9260 code = GET_CODE (insn);
9262 #if 0
9263 /* If this is a label that existed before reload, then the register
9264 if dead here. However, if this is a label added by reorg, then
9265 the register may still be live here. We can't tell the difference,
9266 so we just ignore labels completely. */
9267 if (code == CODE_LABEL)
9268 return 1;
9269 /* else */
9270 #endif
9272 if (code == JUMP_INSN)
9273 return 0;
9275 /* If this is a sequence, we must handle them all at once.
9276 We could have for instance a call that sets the target register,
9277 and an insn in a delay slot that uses the register. In this case,
9278 we must return 0. */
9279 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9281 int i;
9282 int retval = 0;
9284 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9286 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9287 rtx set = single_set (this_insn);
9289 if (CALL_P (this_insn))
9290 code = CALL_INSN;
9291 else if (JUMP_P (this_insn))
9293 if (INSN_ANNULLED_BRANCH_P (this_insn))
9294 return 0;
9295 code = JUMP_INSN;
9298 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9299 return 0;
9300 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9302 if (!MEM_P (SET_DEST (set)))
9303 retval = 1;
9304 else
9305 return 0;
9307 if (set == 0
9308 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9309 return 0;
9311 if (retval == 1)
9312 return 1;
9313 else if (code == JUMP_INSN)
9314 return 0;
9317 set = single_set (insn);
9318 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9319 return 0;
9320 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9321 return !MEM_P (SET_DEST (set));
9322 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9323 return 0;
9325 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9326 return 1;
9328 return 1;
9331 #include "ggc.h"
9333 static GTY(()) rtx fpscr_rtx;
9335 get_fpscr_rtx (void)
9337 if (! fpscr_rtx)
9339 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9340 REG_USERVAR_P (fpscr_rtx) = 1;
9341 mark_user_reg (fpscr_rtx);
9343 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9344 mark_user_reg (fpscr_rtx);
9345 return fpscr_rtx;
9348 static GTY(()) tree fpscr_values;
9350 static void
9351 emit_fpu_switch (rtx scratch, int index)
9353 rtx dst, src;
9355 if (fpscr_values == NULL)
9357 tree t;
9359 t = build_index_type (integer_one_node);
9360 t = build_array_type (integer_type_node, t);
9361 t = build_decl (BUILTINS_LOCATION,
9362 VAR_DECL, get_identifier ("__fpscr_values"), t);
9363 DECL_ARTIFICIAL (t) = 1;
9364 DECL_IGNORED_P (t) = 1;
9365 DECL_EXTERNAL (t) = 1;
9366 TREE_STATIC (t) = 1;
9367 TREE_PUBLIC (t) = 1;
9368 TREE_USED (t) = 1;
9370 fpscr_values = t;
9373 src = DECL_RTL (fpscr_values);
9374 if (!can_create_pseudo_p ())
9376 emit_move_insn (scratch, XEXP (src, 0));
9377 if (index != 0)
9378 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9379 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9381 else
9382 src = adjust_address (src, PSImode, index * 4);
9384 dst = get_fpscr_rtx ();
9385 emit_move_insn (dst, src);
9388 void
9389 emit_sf_insn (rtx pat)
9391 emit_insn (pat);
9394 void
9395 emit_df_insn (rtx pat)
9397 emit_insn (pat);
9400 void
9401 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9403 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9406 void
9407 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9409 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9410 get_fpscr_rtx ()));
9413 void
9414 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9416 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9419 void
9420 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9422 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9423 get_fpscr_rtx ()));
9426 static rtx get_free_reg (HARD_REG_SET);
9428 /* This function returns a register to use to load the address to load
9429 the fpscr from. Currently it always returns r1 or r7, but when we are
9430 able to use pseudo registers after combine, or have a better mechanism
9431 for choosing a register, it should be done here. */
9432 /* REGS_LIVE is the liveness information for the point for which we
9433 need this allocation. In some bare-bones exit blocks, r1 is live at the
9434 start. We can even have all of r0..r3 being live:
9435 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9436 INSN before which new insns are placed with will clobber the register
9437 we return. If a basic block consists only of setting the return value
9438 register to a pseudo and using that register, the return value is not
9439 live before or after this block, yet we we'll insert our insns right in
9440 the middle. */
9442 static rtx
9443 get_free_reg (HARD_REG_SET regs_live)
9445 if (! TEST_HARD_REG_BIT (regs_live, 1))
9446 return gen_rtx_REG (Pmode, 1);
9448 /* Hard reg 1 is live; since this is a small register classes target,
9449 there shouldn't be anything but a jump before the function end. */
9450 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9451 return gen_rtx_REG (Pmode, 7);
9454 /* This function will set the fpscr from memory.
9455 MODE is the mode we are setting it to. */
9456 void
9457 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9459 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9460 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9461 rtx addr_reg;
9463 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9464 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9467 /* Is the given character a logical line separator for the assembler? */
9468 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9469 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9470 #endif
9473 sh_insn_length_adjustment (rtx insn)
9475 /* Instructions with unfilled delay slots take up an extra two bytes for
9476 the nop in the delay slot. */
9477 if (((NONJUMP_INSN_P (insn)
9478 && GET_CODE (PATTERN (insn)) != USE
9479 && GET_CODE (PATTERN (insn)) != CLOBBER)
9480 || CALL_P (insn)
9481 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9482 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9483 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9484 return 2;
9486 /* SH2e has a bug that prevents the use of annulled branches, so if
9487 the delay slot is not filled, we'll have to put a NOP in it. */
9488 if (sh_cpu_attr == CPU_SH2E
9489 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9490 && get_attr_type (insn) == TYPE_CBRANCH
9491 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9492 return 2;
9494 /* sh-dsp parallel processing insn take four bytes instead of two. */
9496 if (NONJUMP_INSN_P (insn))
9498 int sum = 0;
9499 rtx body = PATTERN (insn);
9500 const char *templ;
9501 char c;
9502 int maybe_label = 1;
9504 if (GET_CODE (body) == ASM_INPUT)
9505 templ = XSTR (body, 0);
9506 else if (asm_noperands (body) >= 0)
9507 templ
9508 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9509 else
9510 return 0;
9513 int ppi_adjust = 0;
9516 c = *templ++;
9517 while (c == ' ' || c == '\t');
9518 /* all sh-dsp parallel-processing insns start with p.
9519 The only non-ppi sh insn starting with p is pref.
9520 The only ppi starting with pr is prnd. */
9521 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9522 ppi_adjust = 2;
9523 /* The repeat pseudo-insn expands two three insns, a total of
9524 six bytes in size. */
9525 else if ((c == 'r' || c == 'R')
9526 && ! strncasecmp ("epeat", templ, 5))
9527 ppi_adjust = 4;
9528 while (c && c != '\n'
9529 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9531 /* If this is a label, it is obviously not a ppi insn. */
9532 if (c == ':' && maybe_label)
9534 ppi_adjust = 0;
9535 break;
9537 else if (c == '\'' || c == '"')
9538 maybe_label = 0;
9539 c = *templ++;
9541 sum += ppi_adjust;
9542 maybe_label = c != ':';
9544 while (c);
9545 return sum;
9547 return 0;
9550 /* Return TRUE for a valid displacement for the REG+disp addressing
9551 with MODE. */
9553 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9554 into the FRx registers. We implement this by setting the maximum offset
9555 to zero when the value is SFmode. This also restricts loading of SFmode
9556 values into the integer registers, but that can't be helped. */
9558 /* The SH allows a displacement in a QI or HI amode, but only when the
9559 other operand is R0. GCC doesn't handle this very well, so we forgot
9560 all of that.
9562 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9563 DI can be any number 0..60. */
9565 bool
9566 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9568 if (CONST_INT_P (op))
9570 if (TARGET_SHMEDIA)
9572 int size;
9574 /* Check if this the address of an unaligned load / store. */
9575 if (mode == VOIDmode)
9576 return CONST_OK_FOR_I06 (INTVAL (op));
9578 size = GET_MODE_SIZE (mode);
9579 return (!(INTVAL (op) & (size - 1))
9580 && INTVAL (op) >= -512 * size
9581 && INTVAL (op) < 512 * size);
9584 if (TARGET_SH2A)
9586 if (GET_MODE_SIZE (mode) == 1
9587 && (unsigned) INTVAL (op) < 4096)
9588 return true;
9591 if ((GET_MODE_SIZE (mode) == 4
9592 && (unsigned) INTVAL (op) < 64
9593 && !(INTVAL (op) & 3)
9594 && !(TARGET_SH2E && mode == SFmode))
9595 || (GET_MODE_SIZE (mode) == 4
9596 && (unsigned) INTVAL (op) < 16383
9597 && !(INTVAL (op) & 3) && TARGET_SH2A))
9598 return true;
9600 if ((GET_MODE_SIZE (mode) == 8
9601 && (unsigned) INTVAL (op) < 60
9602 && !(INTVAL (op) & 3)
9603 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9604 || ((GET_MODE_SIZE (mode)==8)
9605 && (unsigned) INTVAL (op) < 8192
9606 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9607 && (TARGET_SH2A && mode == DFmode)))
9608 return true;
9611 return false;
9614 /* Recognize an RTL expression that is a valid memory address for
9615 an instruction.
9616 The MODE argument is the machine mode for the MEM expression
9617 that wants to use this address.
9618 Allow REG
9619 REG+disp
9620 REG+r0
9621 REG++
9622 --REG */
9624 static bool
9625 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9627 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9628 return true;
9629 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9630 && ! TARGET_SHMEDIA
9631 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9632 return true;
9633 else if (GET_CODE (x) == PLUS
9634 && (mode != PSImode || reload_completed))
9636 rtx xop0 = XEXP (x, 0);
9637 rtx xop1 = XEXP (x, 1);
9639 if (GET_MODE_SIZE (mode) <= 8
9640 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9641 && sh_legitimate_index_p (mode, xop1))
9642 return true;
9644 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9645 || ((xop0 == stack_pointer_rtx
9646 || xop0 == hard_frame_pointer_rtx)
9647 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9648 || ((xop1 == stack_pointer_rtx
9649 || xop1 == hard_frame_pointer_rtx)
9650 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9651 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9652 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9653 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9654 && TARGET_FMOVD && mode == DFmode)))
9656 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9657 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9658 return true;
9659 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9660 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9661 return true;
9665 return false;
9668 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9669 isn't protected by a PIC unspec. */
9671 nonpic_symbol_mentioned_p (rtx x)
9673 register const char *fmt;
9674 register int i;
9676 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9677 || GET_CODE (x) == PC)
9678 return 1;
9680 /* We don't want to look into the possible MEM location of a
9681 CONST_DOUBLE, since we're not going to use it, in general. */
9682 if (GET_CODE (x) == CONST_DOUBLE)
9683 return 0;
9685 if (GET_CODE (x) == UNSPEC
9686 && (XINT (x, 1) == UNSPEC_PIC
9687 || XINT (x, 1) == UNSPEC_GOT
9688 || XINT (x, 1) == UNSPEC_GOTOFF
9689 || XINT (x, 1) == UNSPEC_GOTPLT
9690 || XINT (x, 1) == UNSPEC_GOTTPOFF
9691 || XINT (x, 1) == UNSPEC_DTPOFF
9692 || XINT (x, 1) == UNSPEC_TPOFF
9693 || XINT (x, 1) == UNSPEC_PLT
9694 || XINT (x, 1) == UNSPEC_SYMOFF
9695 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9696 return 0;
9698 fmt = GET_RTX_FORMAT (GET_CODE (x));
9699 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9701 if (fmt[i] == 'E')
9703 register int j;
9705 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9706 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9707 return 1;
9709 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9710 return 1;
9713 return 0;
9716 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9717 @GOTOFF in `reg'. */
9719 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9720 rtx reg)
9722 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9723 return orig;
9725 if (GET_CODE (orig) == LABEL_REF
9726 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9728 if (reg == 0)
9729 reg = gen_reg_rtx (Pmode);
9731 emit_insn (gen_symGOTOFF2reg (reg, orig));
9732 return reg;
9734 else if (GET_CODE (orig) == SYMBOL_REF)
9736 if (reg == 0)
9737 reg = gen_reg_rtx (Pmode);
9739 emit_insn (gen_symGOT2reg (reg, orig));
9740 return reg;
9742 return orig;
9745 /* Try machine-dependent ways of modifying an illegitimate address
9746 to be legitimate. If we find one, return the new, valid address.
9747 Otherwise, return X.
9749 For the SH, if X is almost suitable for indexing, but the offset is
9750 out of range, convert it into a normal form so that CSE has a chance
9751 of reducing the number of address registers used. */
9753 static rtx
9754 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9756 if (flag_pic)
9757 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9759 if (GET_CODE (x) == PLUS
9760 && (GET_MODE_SIZE (mode) == 4
9761 || GET_MODE_SIZE (mode) == 8)
9762 && CONST_INT_P (XEXP (x, 1))
9763 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9764 && ! TARGET_SHMEDIA
9765 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9766 && ! (TARGET_SH2E && mode == SFmode))
9768 rtx index_rtx = XEXP (x, 1);
9769 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9770 rtx sum;
9772 /* On rare occasions, we might get an unaligned pointer
9773 that is indexed in a way to give an aligned address.
9774 Therefore, keep the lower two bits in offset_base. */
9775 /* Instead of offset_base 128..131 use 124..127, so that
9776 simple add suffices. */
9777 if (offset > 127)
9778 offset_base = ((offset + 4) & ~60) - 4;
9779 else
9780 offset_base = offset & ~60;
9782 /* Sometimes the normal form does not suit DImode. We
9783 could avoid that by using smaller ranges, but that
9784 would give less optimized code when SImode is
9785 prevalent. */
9786 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9788 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9789 GEN_INT (offset_base), NULL_RTX, 0,
9790 OPTAB_LIB_WIDEN);
9792 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9796 return x;
9799 /* Attempt to replace *P, which is an address that needs reloading, with
9800 a valid memory address for an operand of mode MODE.
9801 Like for sh_legitimize_address, for the SH we try to get a normal form
9802 of the address. That will allow inheritance of the address reloads. */
9804 bool
9805 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9806 int itype)
9808 enum reload_type type = (enum reload_type) itype;
9810 if (GET_CODE (*p) == PLUS
9811 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9812 && CONST_INT_P (XEXP (*p, 1))
9813 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9814 && ! TARGET_SHMEDIA
9815 && ! (TARGET_SH4 && mode == DFmode)
9816 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9817 && (ALLOW_INDEXED_ADDRESS
9818 || XEXP (*p, 0) == stack_pointer_rtx
9819 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9821 rtx index_rtx = XEXP (*p, 1);
9822 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9823 rtx sum;
9825 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9827 push_reload (*p, NULL_RTX, p, NULL,
9828 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9829 goto win;
9831 if (TARGET_SH2E && mode == SFmode)
9833 *p = copy_rtx (*p);
9834 push_reload (*p, NULL_RTX, p, NULL,
9835 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9836 goto win;
9838 /* Instead of offset_base 128..131 use 124..127, so that
9839 simple add suffices. */
9840 if (offset > 127)
9841 offset_base = ((offset + 4) & ~60) - 4;
9842 else
9843 offset_base = offset & ~60;
9844 /* Sometimes the normal form does not suit DImode. We could avoid
9845 that by using smaller ranges, but that would give less optimized
9846 code when SImode is prevalent. */
9847 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9849 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9850 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9851 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9852 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9853 goto win;
9856 /* We must re-recognize what we created before. */
9857 else if (GET_CODE (*p) == PLUS
9858 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9859 && GET_CODE (XEXP (*p, 0)) == PLUS
9860 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9861 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9862 && CONST_INT_P (XEXP (*p, 1))
9863 && ! TARGET_SHMEDIA
9864 && ! (TARGET_SH2E && mode == SFmode))
9866 /* Because this address is so complex, we know it must have
9867 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9868 it is already unshared, and needs no further unsharing. */
9869 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9870 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9871 goto win;
9874 return false;
9876 win:
9877 return true;
9880 /* In the name of slightly smaller debug output, and to cater to
9881 general assembler lossage, recognize various UNSPEC sequences
9882 and turn them back into a direct symbol reference. */
9884 static rtx
9885 sh_delegitimize_address (rtx orig_x)
9887 rtx x, y;
9889 orig_x = delegitimize_mem_from_attrs (orig_x);
9891 x = orig_x;
9892 if (MEM_P (x))
9893 x = XEXP (x, 0);
9894 if (GET_CODE (x) == CONST)
9896 y = XEXP (x, 0);
9897 if (GET_CODE (y) == UNSPEC)
9899 if (XINT (y, 1) == UNSPEC_GOT
9900 || XINT (y, 1) == UNSPEC_GOTOFF)
9901 return XVECEXP (y, 0, 0);
9902 else if (TARGET_SHMEDIA
9903 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
9904 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
9906 rtx offset = XVECEXP (y, 0, 1);
9908 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
9909 if (MEM_P (orig_x))
9910 x = replace_equiv_address_nv (orig_x, x);
9911 return x;
9916 return orig_x;
9919 /* Mark the use of a constant in the literal table. If the constant
9920 has multiple labels, make it unique. */
9921 static rtx
9922 mark_constant_pool_use (rtx x)
9924 rtx insn, lab, pattern;
9926 if (x == NULL)
9927 return x;
9929 switch (GET_CODE (x))
9931 case LABEL_REF:
9932 x = XEXP (x, 0);
9933 case CODE_LABEL:
9934 break;
9935 default:
9936 return x;
9939 /* Get the first label in the list of labels for the same constant
9940 and delete another labels in the list. */
9941 lab = x;
9942 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9944 if (!LABEL_P (insn)
9945 || LABEL_REFS (insn) != NEXT_INSN (insn))
9946 break;
9947 lab = insn;
9950 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9951 INSN_DELETED_P (insn) = 1;
9953 /* Mark constants in a window. */
9954 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9956 if (!NONJUMP_INSN_P (insn))
9957 continue;
9959 pattern = PATTERN (insn);
9960 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9961 continue;
9963 switch (XINT (pattern, 1))
9965 case UNSPECV_CONST2:
9966 case UNSPECV_CONST4:
9967 case UNSPECV_CONST8:
9968 XVECEXP (pattern, 0, 1) = const1_rtx;
9969 break;
9970 case UNSPECV_WINDOW_END:
9971 if (XVECEXP (pattern, 0, 0) == x)
9972 return lab;
9973 break;
9974 case UNSPECV_CONST_END:
9975 return lab;
9976 default:
9977 break;
9981 return lab;
9984 /* Return true if it's possible to redirect BRANCH1 to the destination
9985 of an unconditional jump BRANCH2. We only want to do this if the
9986 resulting branch will have a short displacement. */
9988 sh_can_redirect_branch (rtx branch1, rtx branch2)
9990 if (flag_expensive_optimizations && simplejump_p (branch2))
9992 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9993 rtx insn;
9994 int distance;
9996 for (distance = 0, insn = NEXT_INSN (branch1);
9997 insn && distance < 256;
9998 insn = PREV_INSN (insn))
10000 if (insn == dest)
10001 return 1;
10002 else
10003 distance += get_attr_length (insn);
10005 for (distance = 0, insn = NEXT_INSN (branch1);
10006 insn && distance < 256;
10007 insn = NEXT_INSN (insn))
10009 if (insn == dest)
10010 return 1;
10011 else
10012 distance += get_attr_length (insn);
10015 return 0;
10018 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10020 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10021 unsigned int new_reg)
10023 /* Interrupt functions can only use registers that have already been
10024 saved by the prologue, even if they would normally be
10025 call-clobbered. */
10027 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10028 return 0;
10030 return 1;
10033 /* Function to update the integer COST
10034 based on the relationship between INSN that is dependent on
10035 DEP_INSN through the dependence LINK. The default is to make no
10036 adjustment to COST. This can be used for example to specify to
10037 the scheduler that an output- or anti-dependence does not incur
10038 the same cost as a data-dependence. The return value should be
10039 the new value for COST. */
10040 static int
10041 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10043 rtx reg, use_pat;
10045 if (TARGET_SHMEDIA)
10047 /* On SHmedia, if the dependence is an anti-dependence or
10048 output-dependence, there is no cost. */
10049 if (REG_NOTE_KIND (link) != 0)
10051 /* However, dependencies between target register loads and
10052 uses of the register in a subsequent block that are separated
10053 by a conditional branch are not modelled - we have to do with
10054 the anti-dependency between the target register load and the
10055 conditional branch that ends the current block. */
10056 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10057 && GET_CODE (PATTERN (dep_insn)) == SET
10058 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10059 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10060 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10062 int orig_cost = cost;
10063 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10064 rtx target = ((! note
10065 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10066 ? insn : JUMP_LABEL (insn));
10067 /* On the likely path, the branch costs 1, on the unlikely path,
10068 it costs 3. */
10069 cost--;
10071 target = next_active_insn (target);
10072 while (target && ! flow_dependent_p (target, dep_insn)
10073 && --cost > 0);
10074 /* If two branches are executed in immediate succession, with the
10075 first branch properly predicted, this causes a stall at the
10076 second branch, hence we won't need the target for the
10077 second branch for two cycles after the launch of the first
10078 branch. */
10079 if (cost > orig_cost - 2)
10080 cost = orig_cost - 2;
10082 else
10083 cost = 0;
10086 else if (get_attr_is_mac_media (insn)
10087 && get_attr_is_mac_media (dep_insn))
10088 cost = 1;
10090 else if (! reload_completed
10091 && GET_CODE (PATTERN (insn)) == SET
10092 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10093 && GET_CODE (PATTERN (dep_insn)) == SET
10094 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10095 && cost < 4)
10096 cost = 4;
10097 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10098 that is needed at the target. */
10099 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10100 && ! flow_dependent_p (insn, dep_insn))
10101 cost--;
10103 else if (REG_NOTE_KIND (link) == 0)
10105 enum attr_type type;
10106 rtx dep_set;
10108 if (recog_memoized (insn) < 0
10109 || recog_memoized (dep_insn) < 0)
10110 return cost;
10112 dep_set = single_set (dep_insn);
10114 /* The latency that we specify in the scheduling description refers
10115 to the actual output, not to an auto-increment register; for that,
10116 the latency is one. */
10117 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10119 rtx set = single_set (insn);
10121 if (set
10122 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10123 && (!MEM_P (SET_DEST (set))
10124 || !reg_mentioned_p (SET_DEST (dep_set),
10125 XEXP (SET_DEST (set), 0))))
10126 cost = 1;
10128 /* The only input for a call that is timing-critical is the
10129 function's address. */
10130 if (CALL_P (insn))
10132 rtx call = PATTERN (insn);
10134 if (GET_CODE (call) == PARALLEL)
10135 call = XVECEXP (call, 0 ,0);
10136 if (GET_CODE (call) == SET)
10137 call = SET_SRC (call);
10138 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10139 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10140 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10141 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10142 cost -= TARGET_SH4_300 ? 3 : 6;
10144 /* Likewise, the most timing critical input for an sfuncs call
10145 is the function address. However, sfuncs typically start
10146 using their arguments pretty quickly.
10147 Assume a four cycle delay for SH4 before they are needed.
10148 Cached ST40-300 calls are quicker, so assume only a one
10149 cycle delay there.
10150 ??? Maybe we should encode the delays till input registers
10151 are needed by sfuncs into the sfunc call insn. */
10152 /* All sfunc calls are parallels with at least four components.
10153 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10154 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10155 && XVECLEN (PATTERN (insn), 0) >= 4
10156 && (reg = sfunc_uses_reg (insn)))
10158 if (! reg_set_p (reg, dep_insn))
10159 cost -= TARGET_SH4_300 ? 1 : 4;
10161 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10163 enum attr_type dep_type = get_attr_type (dep_insn);
10165 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10166 cost--;
10167 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10168 && (type = get_attr_type (insn)) != TYPE_CALL
10169 && type != TYPE_SFUNC)
10170 cost--;
10171 /* When the preceding instruction loads the shift amount of
10172 the following SHAD/SHLD, the latency of the load is increased
10173 by 1 cycle. */
10174 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10175 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10176 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10177 XEXP (SET_SRC (single_set (insn)),
10178 1)))
10179 cost++;
10180 /* When an LS group instruction with a latency of less than
10181 3 cycles is followed by a double-precision floating-point
10182 instruction, FIPR, or FTRV, the latency of the first
10183 instruction is increased to 3 cycles. */
10184 else if (cost < 3
10185 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10186 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10187 cost = 3;
10188 /* The lsw register of a double-precision computation is ready one
10189 cycle earlier. */
10190 else if (reload_completed
10191 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10192 && (use_pat = single_set (insn))
10193 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10194 SET_SRC (use_pat)))
10195 cost -= 1;
10197 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10198 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10199 cost -= 1;
10201 else if (TARGET_SH4_300)
10203 /* Stores need their input register two cycles later. */
10204 if (dep_set && cost >= 1
10205 && ((type = get_attr_type (insn)) == TYPE_STORE
10206 || type == TYPE_PSTORE
10207 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10209 rtx set = single_set (insn);
10211 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10212 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10214 cost -= 2;
10215 /* But don't reduce the cost below 1 if the address depends
10216 on a side effect of dep_insn. */
10217 if (cost < 1
10218 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10219 cost = 1;
10224 /* An anti-dependence penalty of two applies if the first insn is a double
10225 precision fadd / fsub / fmul. */
10226 else if (!TARGET_SH4_300
10227 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10228 && recog_memoized (dep_insn) >= 0
10229 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10230 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10231 /* A lot of alleged anti-flow dependences are fake,
10232 so check this one is real. */
10233 && flow_dependent_p (dep_insn, insn))
10234 cost = 2;
10236 return cost;
10239 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10240 if DEP_INSN is anti-flow dependent on INSN. */
10241 static int
10242 flow_dependent_p (rtx insn, rtx dep_insn)
10244 rtx tmp = PATTERN (insn);
10246 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10247 return tmp == NULL_RTX;
10250 /* A helper function for flow_dependent_p called through note_stores. */
10251 static void
10252 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10254 rtx * pinsn = (rtx *) data;
10256 if (*pinsn && reg_referenced_p (x, *pinsn))
10257 *pinsn = NULL_RTX;
10260 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10261 'special function' patterns (type sfunc) that clobber pr, but that
10262 do not look like function calls to leaf_function_p. Hence we must
10263 do this extra check. */
10264 static int
10265 sh_pr_n_sets (void)
10267 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10270 /* Return where to allocate pseudo for a given hard register initial
10271 value. */
10272 static rtx
10273 sh_allocate_initial_value (rtx hard_reg)
10275 rtx x;
10277 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10279 if (current_function_is_leaf
10280 && ! sh_pr_n_sets ()
10281 && ! (TARGET_SHCOMPACT
10282 && ((crtl->args.info.call_cookie
10283 & ~ CALL_COOKIE_RET_TRAMP (1))
10284 || crtl->saves_all_registers)))
10285 x = hard_reg;
10286 else
10287 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10289 else
10290 x = NULL_RTX;
10292 return x;
10295 /* This function returns "2" to indicate dual issue for the SH4
10296 processor. To be used by the DFA pipeline description. */
10297 static int
10298 sh_issue_rate (void)
10300 if (TARGET_SUPERSCALAR)
10301 return 2;
10302 else
10303 return 1;
10306 /* Functions for ready queue reordering for sched1. */
10308 /* Get weight for mode for a set x. */
10309 static short
10310 find_set_regmode_weight (rtx x, enum machine_mode mode)
10312 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10313 return 1;
10314 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10316 if (REG_P (SET_DEST (x)))
10318 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10319 return 1;
10320 else
10321 return 0;
10323 return 1;
10325 return 0;
10328 /* Get regmode weight for insn. */
10329 static short
10330 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10332 short reg_weight = 0;
10333 rtx x;
10335 /* Increment weight for each register born here. */
10336 x = PATTERN (insn);
10337 reg_weight += find_set_regmode_weight (x, mode);
10338 if (GET_CODE (x) == PARALLEL)
10340 int j;
10341 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10343 x = XVECEXP (PATTERN (insn), 0, j);
10344 reg_weight += find_set_regmode_weight (x, mode);
10347 /* Decrement weight for each register that dies here. */
10348 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10350 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10352 rtx note = XEXP (x, 0);
10353 if (REG_P (note) && GET_MODE (note) == mode)
10354 reg_weight--;
10357 return reg_weight;
10360 /* Calculate regmode weights for all insns of a basic block. */
10361 static void
10362 find_regmode_weight (basic_block b, enum machine_mode mode)
10364 rtx insn, next_tail, head, tail;
10366 get_ebb_head_tail (b, b, &head, &tail);
10367 next_tail = NEXT_INSN (tail);
10369 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10371 /* Handle register life information. */
10372 if (!INSN_P (insn))
10373 continue;
10375 if (mode == SFmode)
10376 INSN_REGMODE_WEIGHT (insn, mode) =
10377 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10378 else if (mode == SImode)
10379 INSN_REGMODE_WEIGHT (insn, mode) =
10380 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10384 /* Comparison function for ready queue sorting. */
10385 static int
10386 rank_for_reorder (const void *x, const void *y)
10388 rtx tmp = *(const rtx *) y;
10389 rtx tmp2 = *(const rtx *) x;
10391 /* The insn in a schedule group should be issued the first. */
10392 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10393 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10395 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10396 minimizes instruction movement, thus minimizing sched's effect on
10397 register pressure. */
10398 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10401 /* Resort the array A in which only element at index N may be out of order. */
10402 static void
10403 swap_reorder (rtx *a, int n)
10405 rtx insn = a[n - 1];
10406 int i = n - 2;
10408 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10410 a[i + 1] = a[i];
10411 i -= 1;
10413 a[i + 1] = insn;
10416 #define SCHED_REORDER(READY, N_READY) \
10417 do \
10419 if ((N_READY) == 2) \
10420 swap_reorder (READY, N_READY); \
10421 else if ((N_READY) > 2) \
10422 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10424 while (0)
10426 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10427 macro. */
10428 static void
10429 ready_reorder (rtx *ready, int nready)
10431 SCHED_REORDER (ready, nready);
10434 /* Count life regions of r0 for a block. */
10435 static int
10436 find_r0_life_regions (basic_block b)
10438 rtx end, insn;
10439 rtx pset;
10440 rtx r0_reg;
10441 int live;
10442 int set;
10443 int death = 0;
10445 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10447 set = 1;
10448 live = 1;
10450 else
10452 set = 0;
10453 live = 0;
10456 insn = BB_HEAD (b);
10457 end = BB_END (b);
10458 r0_reg = gen_rtx_REG (SImode, R0_REG);
10459 while (1)
10461 if (INSN_P (insn))
10463 if (find_regno_note (insn, REG_DEAD, R0_REG))
10465 death++;
10466 live = 0;
10468 if (!live
10469 && (pset = single_set (insn))
10470 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10471 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10473 set++;
10474 live = 1;
10477 if (insn == end)
10478 break;
10479 insn = NEXT_INSN (insn);
10481 return set - death;
10484 /* Calculate regmode weights for all insns of all basic block. */
10485 static void
10486 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10487 int verbose ATTRIBUTE_UNUSED,
10488 int old_max_uid)
10490 basic_block b;
10492 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10493 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10494 r0_life_regions = 0;
10496 FOR_EACH_BB_REVERSE (b)
10498 find_regmode_weight (b, SImode);
10499 find_regmode_weight (b, SFmode);
10500 if (!reload_completed)
10501 r0_life_regions += find_r0_life_regions (b);
10504 CURR_REGMODE_PRESSURE (SImode) = 0;
10505 CURR_REGMODE_PRESSURE (SFmode) = 0;
10509 /* Cleanup. */
10510 static void
10511 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10512 int verbose ATTRIBUTE_UNUSED)
10514 if (regmode_weight[0])
10516 free (regmode_weight[0]);
10517 regmode_weight[0] = NULL;
10519 if (regmode_weight[1])
10521 free (regmode_weight[1]);
10522 regmode_weight[1] = NULL;
10526 /* The scalar modes supported differs from the default version in TImode
10527 for 32-bit SHMEDIA. */
10528 static bool
10529 sh_scalar_mode_supported_p (enum machine_mode mode)
10531 if (TARGET_SHMEDIA32 && mode == TImode)
10532 return false;
10534 return default_scalar_mode_supported_p (mode);
10537 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10538 keep count of register pressures on SImode and SFmode. */
10539 static int
10540 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10541 int sched_verbose ATTRIBUTE_UNUSED,
10542 rtx insn,
10543 int can_issue_more)
10545 if (GET_CODE (PATTERN (insn)) != USE
10546 && GET_CODE (PATTERN (insn)) != CLOBBER)
10547 cached_can_issue_more = can_issue_more - 1;
10548 else
10549 cached_can_issue_more = can_issue_more;
10551 if (reload_completed)
10552 return cached_can_issue_more;
10554 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10555 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10557 return cached_can_issue_more;
10560 static void
10561 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10562 int verbose ATTRIBUTE_UNUSED,
10563 int veclen ATTRIBUTE_UNUSED)
10565 CURR_REGMODE_PRESSURE (SImode) = 0;
10566 CURR_REGMODE_PRESSURE (SFmode) = 0;
10569 /* Some magic numbers. */
10570 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10571 functions that already have high pressure on r0. */
10572 #define R0_MAX_LIFE_REGIONS 2
10573 /* Register Pressure thresholds for SImode and SFmode registers. */
10574 #define SIMODE_MAX_WEIGHT 5
10575 #define SFMODE_MAX_WEIGHT 10
10577 /* Return true if the pressure is high for MODE. */
10578 static short
10579 high_pressure (enum machine_mode mode)
10581 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10582 functions that already have high pressure on r0. */
10583 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10584 return 1;
10586 if (mode == SFmode)
10587 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10588 else
10589 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10592 /* Reorder ready queue if register pressure is high. */
10593 static int
10594 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10595 int sched_verbose ATTRIBUTE_UNUSED,
10596 rtx *ready,
10597 int *n_readyp,
10598 int clock_var ATTRIBUTE_UNUSED)
10600 if (reload_completed)
10601 return sh_issue_rate ();
10603 if (high_pressure (SFmode) || high_pressure (SImode))
10605 ready_reorder (ready, *n_readyp);
10608 return sh_issue_rate ();
10611 /* Skip cycles if the current register pressure is high. */
10612 static int
10613 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10614 int sched_verbose ATTRIBUTE_UNUSED,
10615 rtx *ready ATTRIBUTE_UNUSED,
10616 int *n_readyp ATTRIBUTE_UNUSED,
10617 int clock_var ATTRIBUTE_UNUSED)
10619 if (reload_completed)
10620 return cached_can_issue_more;
10622 if (high_pressure(SFmode) || high_pressure (SImode))
10623 skip_cycles = 1;
10625 return cached_can_issue_more;
10628 /* Skip cycles without sorting the ready queue. This will move insn from
10629 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10630 queue by sh_reorder. */
10632 /* Generally, skipping these many cycles are sufficient for all insns to move
10633 from Q -> R. */
10634 #define MAX_SKIPS 8
10636 static int
10637 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10638 int sched_verbose ATTRIBUTE_UNUSED,
10639 rtx insn ATTRIBUTE_UNUSED,
10640 int last_clock_var,
10641 int clock_var,
10642 int *sort_p)
10644 if (reload_completed)
10645 return 0;
10647 if (skip_cycles)
10649 if ((clock_var - last_clock_var) < MAX_SKIPS)
10651 *sort_p = 0;
10652 return 1;
10654 /* If this is the last cycle we are skipping, allow reordering of R. */
10655 if ((clock_var - last_clock_var) == MAX_SKIPS)
10657 *sort_p = 1;
10658 return 1;
10662 skip_cycles = 0;
10664 return 0;
10667 /* SHmedia requires registers for branches, so we can't generate new
10668 branches past reload. */
10669 static bool
10670 sh_cannot_modify_jumps_p (void)
10672 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10675 static reg_class_t
10676 sh_target_reg_class (void)
10678 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10681 static bool
10682 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10684 HARD_REG_SET dummy;
10685 #if 0
10686 rtx insn;
10687 #endif
10689 if (! shmedia_space_reserved_for_target_registers)
10690 return 0;
10691 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10692 return 0;
10693 if (calc_live_regs (&dummy) >= 6 * 8)
10694 return 1;
10695 return 0;
10698 static bool
10699 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10701 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10705 On the SH1..SH4, the trampoline looks like
10706 2 0002 D202 mov.l l2,r2
10707 1 0000 D301 mov.l l1,r3
10708 3 0004 422B jmp @r2
10709 4 0006 0009 nop
10710 5 0008 00000000 l1: .long area
10711 6 000c 00000000 l2: .long function
10713 SH5 (compact) uses r1 instead of r3 for the static chain. */
10716 /* Emit RTL insns to initialize the variable parts of a trampoline.
10717 FNADDR is an RTX for the address of the function's pure code.
10718 CXT is an RTX for the static chain value for the function. */
10720 static void
10721 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10723 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10724 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10726 if (TARGET_SHMEDIA64)
10728 rtx tramp_templ;
10729 int fixed_len;
10731 rtx movi1 = GEN_INT (0xcc000010);
10732 rtx shori1 = GEN_INT (0xc8000010);
10733 rtx src, dst;
10735 /* The following trampoline works within a +- 128 KB range for cxt:
10736 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10737 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10738 gettr tr1,r1; blink tr0,r63 */
10739 /* Address rounding makes it hard to compute the exact bounds of the
10740 offset for this trampoline, but we have a rather generous offset
10741 range, so frame_offset should do fine as an upper bound. */
10742 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10744 /* ??? could optimize this trampoline initialization
10745 by writing DImode words with two insns each. */
10746 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10747 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10748 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10749 insn = gen_rtx_AND (DImode, insn, mask);
10750 /* Or in ptb/u .,tr1 pattern */
10751 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10752 insn = force_operand (insn, NULL_RTX);
10753 insn = gen_lowpart (SImode, insn);
10754 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10755 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10756 insn = gen_rtx_AND (DImode, insn, mask);
10757 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10758 insn = gen_lowpart (SImode, insn);
10759 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10760 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10761 insn = gen_rtx_AND (DImode, insn, mask);
10762 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10763 insn = gen_lowpart (SImode, insn);
10764 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10765 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10766 insn = gen_rtx_AND (DImode, insn, mask);
10767 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10768 insn = gen_lowpart (SImode, insn);
10769 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10770 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10771 insn = gen_rtx_AND (DImode, insn, mask);
10772 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10773 insn = gen_lowpart (SImode, insn);
10774 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10775 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10776 GEN_INT (0x6bf10600));
10777 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10778 GEN_INT (0x4415fc10));
10779 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10780 GEN_INT (0x4401fff0));
10781 emit_insn (gen_ic_invalidate_line (tramp));
10782 return;
10784 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10785 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10787 tramp_templ = gen_datalabel_ref (tramp_templ);
10788 dst = tramp_mem;
10789 src = gen_const_mem (BLKmode, tramp_templ);
10790 set_mem_align (dst, 256);
10791 set_mem_align (src, 64);
10792 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10794 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10795 emit_move_insn (adjust_address (tramp_mem, Pmode,
10796 fixed_len + GET_MODE_SIZE (Pmode)),
10797 cxt);
10798 emit_insn (gen_ic_invalidate_line (tramp));
10799 return;
10801 else if (TARGET_SHMEDIA)
10803 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10804 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10805 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10806 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10807 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10808 rotated 10 right, and higher 16 bit of every 32 selected. */
10809 rtx movishori
10810 = force_reg (V2HImode, (simplify_gen_subreg
10811 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10812 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10813 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10815 fnaddr = force_reg (SImode, fnaddr);
10816 cxt = force_reg (SImode, cxt);
10817 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10818 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10819 movishori));
10820 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10821 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10822 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10823 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10824 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10825 gen_rtx_SUBREG (V2HImode, cxt, 0),
10826 movishori));
10827 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10828 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10829 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10830 if (TARGET_LITTLE_ENDIAN)
10832 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10833 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10835 else
10837 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10838 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10840 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10841 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10842 emit_insn (gen_ic_invalidate_line (tramp));
10843 return;
10845 else if (TARGET_SHCOMPACT)
10847 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10848 return;
10850 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10851 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10852 SImode));
10853 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10854 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10855 SImode));
10856 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10857 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10858 if (TARGET_HARVARD)
10860 if (!TARGET_INLINE_IC_INVALIDATE
10861 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10862 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10863 FUNCTION_ORDINARY),
10864 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10865 else
10866 emit_insn (gen_ic_invalidate_line (tramp));
10870 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10872 static rtx
10873 sh_trampoline_adjust_address (rtx tramp)
10875 if (TARGET_SHMEDIA)
10876 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10877 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10878 return tramp;
10881 /* FIXME: This is overly conservative. A SHcompact function that
10882 receives arguments ``by reference'' will have them stored in its
10883 own stack frame, so it must not pass pointers or references to
10884 these arguments to other functions by means of sibling calls. */
10885 /* If PIC, we cannot make sibling calls to global functions
10886 because the PLT requires r12 to be live. */
10887 static bool
10888 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10890 return (1
10891 && (! TARGET_SHCOMPACT
10892 || crtl->args.info.stack_regs == 0)
10893 && ! sh_cfun_interrupt_handler_p ()
10894 && (! flag_pic
10895 || (decl && ! TREE_PUBLIC (decl))
10896 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10899 /* Machine specific built-in functions. */
10901 struct builtin_description
10903 const enum insn_code icode;
10904 const char *const name;
10905 int signature;
10906 tree fndecl;
10909 /* describe number and signedness of arguments; arg[0] == result
10910 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10911 /* 9: 64-bit pointer, 10: 32-bit pointer */
10912 static const char signature_args[][4] =
10914 #define SH_BLTIN_V2SI2 0
10915 { 4, 4 },
10916 #define SH_BLTIN_V4HI2 1
10917 { 4, 4 },
10918 #define SH_BLTIN_V2SI3 2
10919 { 4, 4, 4 },
10920 #define SH_BLTIN_V4HI3 3
10921 { 4, 4, 4 },
10922 #define SH_BLTIN_V8QI3 4
10923 { 4, 4, 4 },
10924 #define SH_BLTIN_MAC_HISI 5
10925 { 1, 4, 4, 1 },
10926 #define SH_BLTIN_SH_HI 6
10927 { 4, 4, 1 },
10928 #define SH_BLTIN_SH_SI 7
10929 { 4, 4, 1 },
10930 #define SH_BLTIN_V4HI2V2SI 8
10931 { 4, 4, 4 },
10932 #define SH_BLTIN_V4HI2V8QI 9
10933 { 4, 4, 4 },
10934 #define SH_BLTIN_SISF 10
10935 { 4, 2 },
10936 #define SH_BLTIN_LDUA_L 11
10937 { 2, 10 },
10938 #define SH_BLTIN_LDUA_Q 12
10939 { 1, 10 },
10940 #define SH_BLTIN_STUA_L 13
10941 { 0, 10, 2 },
10942 #define SH_BLTIN_STUA_Q 14
10943 { 0, 10, 1 },
10944 #define SH_BLTIN_LDUA_L64 15
10945 { 2, 9 },
10946 #define SH_BLTIN_LDUA_Q64 16
10947 { 1, 9 },
10948 #define SH_BLTIN_STUA_L64 17
10949 { 0, 9, 2 },
10950 #define SH_BLTIN_STUA_Q64 18
10951 { 0, 9, 1 },
10952 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10953 #define SH_BLTIN_2 19
10954 #define SH_BLTIN_SU 19
10955 { 1, 2 },
10956 #define SH_BLTIN_3 20
10957 #define SH_BLTIN_SUS 20
10958 { 2, 2, 1 },
10959 #define SH_BLTIN_PSSV 21
10960 { 0, 8, 2, 2 },
10961 #define SH_BLTIN_XXUU 22
10962 #define SH_BLTIN_UUUU 22
10963 { 1, 1, 1, 1 },
10964 #define SH_BLTIN_PV 23
10965 { 0, 8 },
10967 /* mcmv: operands considered unsigned. */
10968 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10969 /* mperm: control value considered unsigned int. */
10970 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10971 /* mshards_q: returns signed short. */
10972 /* nsb: takes long long arg, returns unsigned char. */
10973 static struct builtin_description bdesc[] =
10975 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10976 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10977 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10978 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10979 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10980 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10981 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10982 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10983 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10984 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10985 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10986 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10987 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10988 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10989 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10990 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10991 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10992 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10993 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10994 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10995 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10996 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10997 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10998 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10999 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11000 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11001 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11002 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11003 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11004 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11005 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11006 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11007 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11008 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11009 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11010 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11011 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11012 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11013 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11014 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11015 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11016 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11017 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11018 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11019 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11020 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11021 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11022 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11023 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11024 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11025 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11026 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11027 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11028 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11029 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11030 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11031 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11032 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11033 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11034 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11035 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11036 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
11037 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11038 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11039 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11040 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11041 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11042 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11043 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11044 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11045 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11046 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11047 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11048 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11049 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11050 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11051 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11052 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11053 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11054 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11055 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11056 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11057 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11058 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11061 static void
11062 sh_media_init_builtins (void)
11064 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11065 struct builtin_description *d;
11067 memset (shared, 0, sizeof shared);
11068 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11070 tree type, arg_type = 0;
11071 int signature = d->signature;
11072 int i;
11074 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11075 type = shared[signature];
11076 else
11078 int has_result = signature_args[signature][0] != 0;
11080 if ((signature_args[signature][1] & 8)
11081 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11082 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11083 continue;
11084 if (! TARGET_FPU_ANY
11085 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11086 continue;
11087 type = void_list_node;
11088 for (i = 3; ; i--)
11090 int arg = signature_args[signature][i];
11091 int opno = i - 1 + has_result;
11093 if (arg & 8)
11094 arg_type = ptr_type_node;
11095 else if (arg)
11096 arg_type = (*lang_hooks.types.type_for_mode)
11097 (insn_data[d->icode].operand[opno].mode,
11098 (arg & 1));
11099 else if (i)
11100 continue;
11101 else
11102 arg_type = void_type_node;
11103 if (i == 0)
11104 break;
11105 type = tree_cons (NULL_TREE, arg_type, type);
11107 type = build_function_type (arg_type, type);
11108 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11109 shared[signature] = type;
11111 d->fndecl =
11112 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11113 NULL, NULL_TREE);
11117 /* Returns the shmedia builtin decl for CODE. */
11119 static tree
11120 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11122 if (code >= ARRAY_SIZE (bdesc))
11123 return error_mark_node;
11125 return bdesc[code].fndecl;
11128 /* Implements target hook vector_mode_supported_p. */
11129 bool
11130 sh_vector_mode_supported_p (enum machine_mode mode)
11132 if (TARGET_FPU_ANY
11133 && ((mode == V2SFmode)
11134 || (mode == V4SFmode)
11135 || (mode == V16SFmode)))
11136 return true;
11138 else if (TARGET_SHMEDIA
11139 && ((mode == V8QImode)
11140 || (mode == V2HImode)
11141 || (mode == V4HImode)
11142 || (mode == V2SImode)))
11143 return true;
11145 return false;
11148 bool
11149 sh_frame_pointer_required (void)
11151 /* If needed override this in other tm.h files to cope with various OS
11152 lossage requiring a frame pointer. */
11153 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11154 return true;
11156 if (crtl->profile)
11157 return true;
11159 return false;
11162 /* Implements target hook dwarf_calling_convention. Return an enum
11163 of dwarf_calling_convention. */
11165 sh_dwarf_calling_convention (const_tree func)
11167 if (sh_attr_renesas_p (func))
11168 return DW_CC_GNU_renesas_sh;
11170 return DW_CC_normal;
11173 static void
11174 sh_init_builtins (void)
11176 if (TARGET_SHMEDIA)
11177 sh_media_init_builtins ();
11180 /* Returns the sh builtin decl for CODE. */
11182 static tree
11183 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11185 if (TARGET_SHMEDIA)
11186 return sh_media_builtin_decl (code, initialize_p);
11188 return error_mark_node;
11191 /* Expand an expression EXP that calls a built-in function,
11192 with result going to TARGET if that's convenient
11193 (and in mode MODE if that's convenient).
11194 SUBTARGET may be used as the target for computing one of EXP's operands.
11195 IGNORE is nonzero if the value is to be ignored. */
11197 static rtx
11198 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11199 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11201 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11202 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11203 const struct builtin_description *d = &bdesc[fcode];
11204 enum insn_code icode = d->icode;
11205 int signature = d->signature;
11206 enum machine_mode tmode = VOIDmode;
11207 int nop = 0, i;
11208 rtx op[4];
11209 rtx pat = 0;
11211 if (signature_args[signature][0])
11213 if (ignore)
11214 return 0;
11216 tmode = insn_data[icode].operand[0].mode;
11217 if (! target
11218 || GET_MODE (target) != tmode
11219 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11220 target = gen_reg_rtx (tmode);
11221 op[nop++] = target;
11223 else
11224 target = 0;
11226 for (i = 1; i <= 3; i++, nop++)
11228 tree arg;
11229 enum machine_mode opmode, argmode;
11230 tree optype;
11232 if (! signature_args[signature][i])
11233 break;
11234 arg = CALL_EXPR_ARG (exp, i - 1);
11235 if (arg == error_mark_node)
11236 return const0_rtx;
11237 if (signature_args[signature][i] & 8)
11239 opmode = ptr_mode;
11240 optype = ptr_type_node;
11242 else
11244 opmode = insn_data[icode].operand[nop].mode;
11245 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11247 argmode = TYPE_MODE (TREE_TYPE (arg));
11248 if (argmode != opmode)
11249 arg = build1 (NOP_EXPR, optype, arg);
11250 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11251 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11252 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11255 switch (nop)
11257 case 1:
11258 pat = (*insn_data[d->icode].genfun) (op[0]);
11259 break;
11260 case 2:
11261 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11262 break;
11263 case 3:
11264 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11265 break;
11266 case 4:
11267 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11268 break;
11269 default:
11270 gcc_unreachable ();
11272 if (! pat)
11273 return 0;
11274 emit_insn (pat);
11275 return target;
11278 void
11279 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11281 rtx sel0 = const0_rtx;
11282 rtx sel1 = const1_rtx;
11283 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11284 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11286 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11287 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11290 void
11291 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11293 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11295 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11296 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11299 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11300 We can allow any mode in any general register. The special registers
11301 only allow SImode. Don't allow any mode in the PR.
11303 We cannot hold DCmode values in the XD registers because alter_reg
11304 handles subregs of them incorrectly. We could work around this by
11305 spacing the XD registers like the DR registers, but this would require
11306 additional memory in every compilation to hold larger register vectors.
11307 We could hold SFmode / SCmode values in XD registers, but that
11308 would require a tertiary reload when reloading from / to memory,
11309 and a secondary reload to reload from / to general regs; that
11310 seems to be a loosing proposition.
11312 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11313 it won't be ferried through GP registers first. */
11315 bool
11316 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11318 if (SPECIAL_REGISTER_P (regno))
11319 return mode == SImode;
11321 if (regno == FPUL_REG)
11322 return (mode == SImode || mode == SFmode);
11324 if (FP_REGISTER_P (regno) && mode == SFmode)
11325 return true;
11327 if (mode == V2SFmode)
11329 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11330 || GENERAL_REGISTER_P (regno)))
11331 return true;
11332 else
11333 return false;
11336 if (mode == V4SFmode)
11338 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11339 || GENERAL_REGISTER_P (regno))
11340 return true;
11341 else
11342 return false;
11345 if (mode == V16SFmode)
11347 if (TARGET_SHMEDIA)
11349 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11350 return true;
11351 else
11352 return false;
11354 else
11355 return regno == FIRST_XD_REG;
11358 if (FP_REGISTER_P (regno))
11360 if (mode == SFmode
11361 || mode == SImode
11362 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11363 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11364 || mode == DCmode
11365 || (TARGET_SHMEDIA
11366 && (mode == DFmode || mode == DImode
11367 || mode == V2SFmode || mode == TImode)))
11368 && ((regno - FIRST_FP_REG) & 1) == 0)
11369 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11370 && ((regno - FIRST_FP_REG) & 3) == 0))
11371 return true;
11372 else
11373 return false;
11376 if (XD_REGISTER_P (regno))
11377 return mode == DFmode;
11379 if (TARGET_REGISTER_P (regno))
11380 return (mode == DImode || mode == SImode || mode == PDImode);
11382 if (regno == PR_REG)
11383 return mode == SImode;
11385 if (regno == FPSCR_REG)
11386 return mode == PSImode;
11388 /* FIXME. This works around PR target/37633 for -O0. */
11389 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11391 unsigned int n = GET_MODE_SIZE (mode) / 8;
11393 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11394 && regno <= FIRST_GENERAL_REG + 14)
11395 return false;
11398 return true;
11401 /* Return the class of registers for which a mode change from FROM to TO
11402 is invalid. */
11403 bool
11404 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11405 enum reg_class rclass)
11407 /* We want to enable the use of SUBREGs as a means to
11408 VEC_SELECT a single element of a vector. */
11409 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11410 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11412 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11414 if (TARGET_LITTLE_ENDIAN)
11416 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11417 return reg_classes_intersect_p (DF_REGS, rclass);
11419 else
11421 if (GET_MODE_SIZE (from) < 8)
11422 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11425 return 0;
11428 /* Return true if registers in machine mode MODE will likely be
11429 allocated to registers in small register classes. */
11431 bool
11432 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11434 return (! TARGET_SHMEDIA);
11437 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11438 that label is used. */
11440 void
11441 sh_mark_label (rtx address, int nuses)
11443 if (GOTOFF_P (address))
11445 /* Extract the label or symbol. */
11446 address = XEXP (address, 0);
11447 if (GET_CODE (address) == PLUS)
11448 address = XEXP (address, 0);
11449 address = XVECEXP (address, 0, 0);
11451 if (GET_CODE (address) == LABEL_REF
11452 && LABEL_P (XEXP (address, 0)))
11453 LABEL_NUSES (XEXP (address, 0)) += nuses;
11456 /* Compute extra cost of moving data between one register class
11457 and another. */
11459 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11460 uses this information. Hence, the general register <-> floating point
11461 register information here is not used for SFmode. */
11463 static int
11464 sh_register_move_cost (enum machine_mode mode,
11465 reg_class_t srcclass, reg_class_t dstclass)
11467 if (dstclass == T_REGS || dstclass == PR_REGS)
11468 return 10;
11470 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11471 return 4;
11473 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11474 && REGCLASS_HAS_FP_REG (srcclass)
11475 && REGCLASS_HAS_FP_REG (dstclass))
11476 return 4;
11478 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11479 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11481 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11482 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11483 return 9;
11485 if ((REGCLASS_HAS_FP_REG (dstclass)
11486 && REGCLASS_HAS_GENERAL_REG (srcclass))
11487 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11488 && REGCLASS_HAS_FP_REG (srcclass)))
11489 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11490 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11492 if ((dstclass == FPUL_REGS
11493 && REGCLASS_HAS_GENERAL_REG (srcclass))
11494 || (srcclass == FPUL_REGS
11495 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11496 return 5;
11498 if ((dstclass == FPUL_REGS
11499 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11500 || (srcclass == FPUL_REGS
11501 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11502 return 7;
11504 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11505 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11506 return 20;
11508 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11509 if (TARGET_SHMEDIA
11510 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11512 if (sh_gettrcost >= 0)
11513 return sh_gettrcost;
11514 else if (!TARGET_PT_FIXED)
11515 return 100;
11518 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11519 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11520 return 4;
11522 if (TARGET_SHMEDIA
11523 || (TARGET_FMOVD
11524 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11525 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11526 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11528 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11531 static rtx emit_load_ptr (rtx, rtx);
11533 static rtx
11534 emit_load_ptr (rtx reg, rtx addr)
11536 rtx mem = gen_const_mem (ptr_mode, addr);
11538 if (Pmode != ptr_mode)
11539 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11540 return emit_move_insn (reg, mem);
11543 static void
11544 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11545 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11546 tree function)
11548 CUMULATIVE_ARGS cum;
11549 int structure_value_byref = 0;
11550 rtx this_rtx, this_value, sibcall, insns, funexp;
11551 tree funtype = TREE_TYPE (function);
11552 int simple_add = CONST_OK_FOR_ADD (delta);
11553 int did_load = 0;
11554 rtx scratch0, scratch1, scratch2;
11555 unsigned i;
11557 reload_completed = 1;
11558 epilogue_completed = 1;
11559 current_function_uses_only_leaf_regs = 1;
11561 emit_note (NOTE_INSN_PROLOGUE_END);
11563 /* Find the "this" pointer. We have such a wide range of ABIs for the
11564 SH that it's best to do this completely machine independently.
11565 "this" is passed as first argument, unless a structure return pointer
11566 comes first, in which case "this" comes second. */
11567 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11568 #ifndef PCC_STATIC_STRUCT_RETURN
11569 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11570 structure_value_byref = 1;
11571 #endif /* not PCC_STATIC_STRUCT_RETURN */
11572 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11574 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11576 sh_function_arg_advance (&cum, Pmode, ptype, true);
11578 this_rtx = sh_function_arg (&cum, Pmode, ptr_type_node, true);
11580 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11581 static chain pointer (even if you can't have nested virtual functions
11582 right now, someone might implement them sometime), and the rest of the
11583 registers are used for argument passing, are callee-saved, or reserved. */
11584 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11585 -ffixed-reg has been used. */
11586 if (! call_used_regs[0] || fixed_regs[0])
11587 error ("r0 needs to be available as a call-clobbered register");
11588 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11589 if (! TARGET_SH5)
11591 if (call_used_regs[1] && ! fixed_regs[1])
11592 scratch1 = gen_rtx_REG (ptr_mode, 1);
11593 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11594 pointing where to return struct values. */
11595 if (call_used_regs[3] && ! fixed_regs[3])
11596 scratch2 = gen_rtx_REG (Pmode, 3);
11598 else if (TARGET_SHMEDIA)
11600 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11601 if (i != REGNO (scratch0) &&
11602 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11604 scratch1 = gen_rtx_REG (ptr_mode, i);
11605 break;
11607 if (scratch1 == scratch0)
11608 error ("Need a second call-clobbered general purpose register");
11609 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11610 if (call_used_regs[i] && ! fixed_regs[i])
11612 scratch2 = gen_rtx_REG (Pmode, i);
11613 break;
11615 if (scratch2 == scratch0)
11616 error ("Need a call-clobbered target register");
11619 this_value = plus_constant (this_rtx, delta);
11620 if (vcall_offset
11621 && (simple_add || scratch0 != scratch1)
11622 && strict_memory_address_p (ptr_mode, this_value))
11624 emit_load_ptr (scratch0, this_value);
11625 did_load = 1;
11628 if (!delta)
11629 ; /* Do nothing. */
11630 else if (simple_add)
11631 emit_move_insn (this_rtx, this_value);
11632 else
11634 emit_move_insn (scratch1, GEN_INT (delta));
11635 emit_insn (gen_add2_insn (this_rtx, scratch1));
11638 if (vcall_offset)
11640 rtx offset_addr;
11642 if (!did_load)
11643 emit_load_ptr (scratch0, this_rtx);
11645 offset_addr = plus_constant (scratch0, vcall_offset);
11646 if (strict_memory_address_p (ptr_mode, offset_addr))
11647 ; /* Do nothing. */
11648 else if (! TARGET_SH5 && scratch0 != scratch1)
11650 /* scratch0 != scratch1, and we have indexed loads. Get better
11651 schedule by loading the offset into r1 and using an indexed
11652 load - then the load of r1 can issue before the load from
11653 (this_rtx + delta) finishes. */
11654 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11655 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11657 else if (CONST_OK_FOR_ADD (vcall_offset))
11659 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11660 offset_addr = scratch0;
11662 else if (scratch0 != scratch1)
11664 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11665 emit_insn (gen_add2_insn (scratch0, scratch1));
11666 offset_addr = scratch0;
11668 else
11669 gcc_unreachable (); /* FIXME */
11670 emit_load_ptr (scratch0, offset_addr);
11672 if (Pmode != ptr_mode)
11673 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11674 emit_insn (gen_add2_insn (this_rtx, scratch0));
11677 /* Generate a tail call to the target function. */
11678 if (! TREE_USED (function))
11680 assemble_external (function);
11681 TREE_USED (function) = 1;
11683 funexp = XEXP (DECL_RTL (function), 0);
11684 /* If the function is overridden, so is the thunk, hence we don't
11685 need GOT addressing even if this is a public symbol. */
11686 #if 0
11687 if (TARGET_SH1 && ! flag_weak)
11688 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11689 else
11690 #endif
11691 if (TARGET_SH2 && flag_pic)
11693 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11694 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11696 else
11698 if (TARGET_SHMEDIA && flag_pic)
11700 funexp = gen_sym2PIC (funexp);
11701 PUT_MODE (funexp, Pmode);
11703 emit_move_insn (scratch2, funexp);
11704 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11705 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11707 sibcall = emit_call_insn (sibcall);
11708 SIBLING_CALL_P (sibcall) = 1;
11709 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11710 emit_barrier ();
11712 /* Run just enough of rest_of_compilation to do scheduling and get
11713 the insns emitted. Note that use_thunk calls
11714 assemble_start_function and assemble_end_function. */
11716 insn_locators_alloc ();
11717 insns = get_insns ();
11719 if (optimize > 0)
11721 if (! cfun->cfg)
11722 init_flow (cfun);
11723 split_all_insns_noflow ();
11726 sh_reorg ();
11728 if (optimize > 0 && flag_delayed_branch)
11729 dbr_schedule (insns);
11731 shorten_branches (insns);
11732 final_start_function (insns, file, 1);
11733 final (insns, file, 1);
11734 final_end_function ();
11736 reload_completed = 0;
11737 epilogue_completed = 0;
11741 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11743 rtx sym;
11745 /* If this is not an ordinary function, the name usually comes from a
11746 string literal or an sprintf buffer. Make sure we use the same
11747 string consistently, so that cse will be able to unify address loads. */
11748 if (kind != FUNCTION_ORDINARY)
11749 name = IDENTIFIER_POINTER (get_identifier (name));
11750 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11751 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11752 if (flag_pic)
11753 switch (kind)
11755 case FUNCTION_ORDINARY:
11756 break;
11757 case SFUNC_GOT:
11759 rtx reg = target ? target : gen_reg_rtx (Pmode);
11761 emit_insn (gen_symGOT2reg (reg, sym));
11762 sym = reg;
11763 break;
11765 case SFUNC_STATIC:
11767 /* ??? To allow cse to work, we use GOTOFF relocations.
11768 we could add combiner patterns to transform this into
11769 straight pc-relative calls with sym2PIC / bsrf when
11770 label load and function call are still 1:1 and in the
11771 same basic block during combine. */
11772 rtx reg = target ? target : gen_reg_rtx (Pmode);
11774 emit_insn (gen_symGOTOFF2reg (reg, sym));
11775 sym = reg;
11776 break;
11779 if (target && sym != target)
11781 emit_move_insn (target, sym);
11782 return target;
11784 return sym;
11787 /* Find the number of a general purpose register in S. */
11788 static int
11789 scavenge_reg (HARD_REG_SET *s)
11791 int r;
11792 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11793 if (TEST_HARD_REG_BIT (*s, r))
11794 return r;
11795 return -1;
11799 sh_get_pr_initial_val (void)
11801 rtx val;
11803 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11804 PR register on SHcompact, because it might be clobbered by the prologue.
11805 We check first if that is known to be the case. */
11806 if (TARGET_SHCOMPACT
11807 && ((crtl->args.info.call_cookie
11808 & ~ CALL_COOKIE_RET_TRAMP (1))
11809 || crtl->saves_all_registers))
11810 return gen_frame_mem (SImode, return_address_pointer_rtx);
11812 /* If we haven't finished rtl generation, there might be a nonlocal label
11813 that we haven't seen yet.
11814 ??? get_hard_reg_initial_val fails if it is called after register
11815 allocation has started, unless it has been called before for the
11816 same register. And even then, we end in trouble if we didn't use
11817 the register in the same basic block before. So call
11818 get_hard_reg_initial_val now and wrap it in an unspec if we might
11819 need to replace it. */
11820 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11821 combine can put the pseudo returned by get_hard_reg_initial_val into
11822 instructions that need a general purpose registers, which will fail to
11823 be recognized when the pseudo becomes allocated to PR. */
11825 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11826 if (TARGET_SH1)
11827 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11828 return val;
11832 sh_expand_t_scc (rtx operands[])
11834 enum rtx_code code = GET_CODE (operands[1]);
11835 rtx target = operands[0];
11836 rtx op0 = operands[2];
11837 rtx op1 = operands[3];
11838 rtx result = target;
11839 HOST_WIDE_INT val;
11841 if (!REG_P (op0) || REGNO (op0) != T_REG
11842 || !CONST_INT_P (op1))
11843 return 0;
11844 if (!REG_P (result))
11845 result = gen_reg_rtx (SImode);
11846 val = INTVAL (op1);
11847 if ((code == EQ && val == 1) || (code == NE && val == 0))
11848 emit_insn (gen_movt (result));
11849 else if (TARGET_SH2A && ((code == EQ && val == 0)
11850 || (code == NE && val == 1)))
11851 emit_insn (gen_xorsi3_movrt (result));
11852 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11854 emit_clobber (result);
11855 emit_insn (gen_subc (result, result, result));
11856 emit_insn (gen_addsi3 (result, result, const1_rtx));
11858 else if (code == EQ || code == NE)
11859 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11860 else
11861 return 0;
11862 if (result != target)
11863 emit_move_insn (target, result);
11864 return 1;
11867 /* INSN is an sfunc; return the rtx that describes the address used. */
11868 static rtx
11869 extract_sfunc_addr (rtx insn)
11871 rtx pattern, part = NULL_RTX;
11872 int len, i;
11874 pattern = PATTERN (insn);
11875 len = XVECLEN (pattern, 0);
11876 for (i = 0; i < len; i++)
11878 part = XVECEXP (pattern, 0, i);
11879 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11880 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11881 return XEXP (part, 0);
11883 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11884 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11887 /* Verify that the register in use_sfunc_addr still agrees with the address
11888 used in the sfunc. This prevents fill_slots_from_thread from changing
11889 use_sfunc_addr.
11890 INSN is the use_sfunc_addr instruction, and REG is the register it
11891 guards. */
11893 check_use_sfunc_addr (rtx insn, rtx reg)
11895 /* Search for the sfunc. It should really come right after INSN. */
11896 while ((insn = NEXT_INSN (insn)))
11898 if (LABEL_P (insn) || JUMP_P (insn))
11899 break;
11900 if (! INSN_P (insn))
11901 continue;
11903 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11904 insn = XVECEXP (PATTERN (insn), 0, 0);
11905 if (GET_CODE (PATTERN (insn)) != PARALLEL
11906 || get_attr_type (insn) != TYPE_SFUNC)
11907 continue;
11908 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11910 gcc_unreachable ();
11913 /* This function returns a constant rtx that represents pi / 2**15 in
11914 SFmode. it's used to scale SFmode angles, in radians, to a
11915 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11916 maps to 0x10000). */
11918 static GTY(()) rtx sh_fsca_sf2int_rtx;
11921 sh_fsca_sf2int (void)
11923 if (! sh_fsca_sf2int_rtx)
11925 REAL_VALUE_TYPE rv;
11927 real_from_string (&rv, "10430.378350470453");
11928 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11931 return sh_fsca_sf2int_rtx;
11934 /* This function returns a constant rtx that represents pi / 2**15 in
11935 DFmode. it's used to scale DFmode angles, in radians, to a
11936 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11937 maps to 0x10000). */
11939 static GTY(()) rtx sh_fsca_df2int_rtx;
11942 sh_fsca_df2int (void)
11944 if (! sh_fsca_df2int_rtx)
11946 REAL_VALUE_TYPE rv;
11948 real_from_string (&rv, "10430.378350470453");
11949 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11952 return sh_fsca_df2int_rtx;
11955 /* This function returns a constant rtx that represents 2**15 / pi in
11956 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11957 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11958 2*pi). */
11960 static GTY(()) rtx sh_fsca_int2sf_rtx;
11963 sh_fsca_int2sf (void)
11965 if (! sh_fsca_int2sf_rtx)
11967 REAL_VALUE_TYPE rv;
11969 real_from_string (&rv, "9.587379924285257e-5");
11970 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11973 return sh_fsca_int2sf_rtx;
11976 /* Initialize the CUMULATIVE_ARGS structure. */
11978 void
11979 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11980 tree fntype,
11981 rtx libname ATTRIBUTE_UNUSED,
11982 tree fndecl,
11983 signed int n_named_args,
11984 enum machine_mode mode)
11986 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11987 pcum->free_single_fp_reg = 0;
11988 pcum->stack_regs = 0;
11989 pcum->byref_regs = 0;
11990 pcum->byref = 0;
11991 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11993 /* XXX - Should we check TARGET_HITACHI here ??? */
11994 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11996 if (fntype)
11998 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11999 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12000 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
12001 pcum->arg_count [(int) SH_ARG_INT]
12002 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12004 pcum->call_cookie
12005 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12006 && pcum->arg_count [(int) SH_ARG_INT] == 0
12007 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12008 ? int_size_in_bytes (TREE_TYPE (fntype))
12009 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12010 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12011 == FIRST_RET_REG));
12013 else
12015 pcum->arg_count [(int) SH_ARG_INT] = 0;
12016 pcum->prototype_p = FALSE;
12017 if (mode != VOIDmode)
12019 pcum->call_cookie =
12020 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12021 && GET_MODE_SIZE (mode) > 4
12022 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12024 /* If the default ABI is the Renesas ABI then all library
12025 calls must assume that the library will be using the
12026 Renesas ABI. So if the function would return its result
12027 in memory then we must force the address of this memory
12028 block onto the stack. Ideally we would like to call
12029 targetm.calls.return_in_memory() here but we do not have
12030 the TYPE or the FNDECL available so we synthesize the
12031 contents of that function as best we can. */
12032 pcum->force_mem =
12033 (TARGET_DEFAULT & MASK_HITACHI)
12034 && (mode == BLKmode
12035 || (GET_MODE_SIZE (mode) > 4
12036 && !(mode == DFmode
12037 && TARGET_FPU_DOUBLE)));
12039 else
12041 pcum->call_cookie = 0;
12042 pcum->force_mem = FALSE;
12047 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12048 not enter into CONST_DOUBLE for the replace.
12050 Note that copying is not done so X must not be shared unless all copies
12051 are to be modified.
12053 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12054 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12055 replacements[n*2+1] - and that we take mode changes into account.
12057 If a replacement is ambiguous, return NULL_RTX.
12059 If MODIFY is zero, don't modify any rtl in place,
12060 just return zero or nonzero for failure / success. */
12063 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12065 int i, j;
12066 const char *fmt;
12068 /* The following prevents loops occurrence when we change MEM in
12069 CONST_DOUBLE onto the same CONST_DOUBLE. */
12070 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
12071 return x;
12073 for (i = n_replacements - 1; i >= 0 ; i--)
12074 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12075 return replacements[i*2+1];
12077 /* Allow this function to make replacements in EXPR_LISTs. */
12078 if (x == 0)
12079 return 0;
12081 if (GET_CODE (x) == SUBREG)
12083 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12084 n_replacements, modify);
12086 if (CONST_INT_P (new_rtx))
12088 x = simplify_subreg (GET_MODE (x), new_rtx,
12089 GET_MODE (SUBREG_REG (x)),
12090 SUBREG_BYTE (x));
12091 if (! x)
12092 abort ();
12094 else if (modify)
12095 SUBREG_REG (x) = new_rtx;
12097 return x;
12099 else if (REG_P (x))
12101 unsigned regno = REGNO (x);
12102 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12103 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12104 rtx result = NULL_RTX;
12106 for (i = n_replacements - 1; i >= 0; i--)
12108 rtx from = replacements[i*2];
12109 rtx to = replacements[i*2+1];
12110 unsigned from_regno, from_nregs, to_regno, new_regno;
12112 if (!REG_P (from))
12113 continue;
12114 from_regno = REGNO (from);
12115 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12116 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12117 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12119 if (regno < from_regno
12120 || regno + nregs > from_regno + nregs
12121 || !REG_P (to)
12122 || result)
12123 return NULL_RTX;
12124 to_regno = REGNO (to);
12125 if (to_regno < FIRST_PSEUDO_REGISTER)
12127 new_regno = regno + to_regno - from_regno;
12128 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12129 != nregs)
12130 return NULL_RTX;
12131 result = gen_rtx_REG (GET_MODE (x), new_regno);
12133 else if (GET_MODE (x) <= GET_MODE (to))
12134 result = gen_lowpart_common (GET_MODE (x), to);
12135 else
12136 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12139 return result ? result : x;
12141 else if (GET_CODE (x) == ZERO_EXTEND)
12143 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12144 n_replacements, modify);
12146 if (CONST_INT_P (new_rtx))
12148 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12149 new_rtx, GET_MODE (XEXP (x, 0)));
12150 if (! x)
12151 abort ();
12153 else if (modify)
12154 XEXP (x, 0) = new_rtx;
12156 return x;
12159 fmt = GET_RTX_FORMAT (GET_CODE (x));
12160 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12162 rtx new_rtx;
12164 if (fmt[i] == 'e')
12166 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12167 n_replacements, modify);
12168 if (!new_rtx)
12169 return NULL_RTX;
12170 if (modify)
12171 XEXP (x, i) = new_rtx;
12173 else if (fmt[i] == 'E')
12174 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12176 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12177 n_replacements, modify);
12178 if (!new_rtx)
12179 return NULL_RTX;
12180 if (modify)
12181 XVECEXP (x, i, j) = new_rtx;
12185 return x;
12189 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12191 enum rtx_code code = TRUNCATE;
12193 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12195 rtx inner = XEXP (x, 0);
12196 enum machine_mode inner_mode = GET_MODE (inner);
12198 if (inner_mode == mode)
12199 return inner;
12200 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12201 x = inner;
12202 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12203 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12205 code = GET_CODE (x);
12206 x = inner;
12209 return gen_rtx_fmt_e (code, mode, x);
12212 /* called via for_each_rtx after reload, to clean up truncates of
12213 registers that span multiple actual hard registers. */
12215 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12217 rtx x = *p, reg;
12219 if (GET_CODE (x) != TRUNCATE)
12220 return 0;
12221 reg = XEXP (x, 0);
12222 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12224 enum machine_mode reg_mode = GET_MODE (reg);
12225 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12226 subreg_lowpart_offset (DImode, reg_mode));
12227 *(int*) n_changes += 1;
12228 return -1;
12230 return 0;
12233 /* Load and store depend on the highpart of the address. However,
12234 set_attr_alternative does not give well-defined results before reload,
12235 so we must look at the rtl ourselves to see if any of the feeding
12236 registers is used in a memref. */
12238 /* Called by sh_contains_memref_p via for_each_rtx. */
12239 static int
12240 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12242 return (MEM_P (*loc));
12245 /* Return nonzero iff INSN contains a MEM. */
12247 sh_contains_memref_p (rtx insn)
12249 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12252 /* Return nonzero iff INSN loads a banked register. */
12254 sh_loads_bankedreg_p (rtx insn)
12256 if (GET_CODE (PATTERN (insn)) == SET)
12258 rtx op = SET_DEST (PATTERN(insn));
12259 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12260 return 1;
12263 return 0;
12266 /* FNADDR is the MEM expression from a call expander. Return an address
12267 to use in an SHmedia insn pattern. */
12269 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12271 int is_sym;
12273 fnaddr = XEXP (fnaddr, 0);
12274 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12275 if (flag_pic && is_sym)
12277 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12279 rtx reg = gen_reg_rtx (Pmode);
12281 /* We must not use GOTPLT for sibcalls, because PIC_REG
12282 must be restored before the PLT code gets to run. */
12283 if (is_sibcall)
12284 emit_insn (gen_symGOT2reg (reg, fnaddr));
12285 else
12286 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12287 fnaddr = reg;
12289 else
12291 fnaddr = gen_sym2PIC (fnaddr);
12292 PUT_MODE (fnaddr, Pmode);
12295 /* If ptabs might trap, make this visible to the rest of the compiler.
12296 We generally assume that symbols pertain to valid locations, but
12297 it is possible to generate invalid symbols with asm or linker tricks.
12298 In a list of functions where each returns its successor, an invalid
12299 symbol might denote an empty list. */
12300 if (!TARGET_PT_FIXED
12301 && (!is_sym || TARGET_INVALID_SYMBOLS)
12302 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12304 rtx tr = gen_reg_rtx (PDImode);
12306 emit_insn (gen_ptabs (tr, fnaddr));
12307 fnaddr = tr;
12309 else if (! target_reg_operand (fnaddr, Pmode))
12310 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12311 return fnaddr;
12314 reg_class_t
12315 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12316 enum machine_mode mode, secondary_reload_info *sri)
12318 enum reg_class rclass = (enum reg_class) rclass_i;
12320 if (in_p)
12322 if (REGCLASS_HAS_FP_REG (rclass)
12323 && ! TARGET_SHMEDIA
12324 && immediate_operand ((x), mode)
12325 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12326 && mode == SFmode && fldi_ok ()))
12327 switch (mode)
12329 case SFmode:
12330 sri->icode = CODE_FOR_reload_insf__frn;
12331 return NO_REGS;
12332 case DFmode:
12333 sri->icode = CODE_FOR_reload_indf__frn;
12334 return NO_REGS;
12335 case SImode:
12336 /* ??? If we knew that we are in the appropriate mode -
12337 single precision - we could use a reload pattern directly. */
12338 return FPUL_REGS;
12339 default:
12340 abort ();
12342 if (rclass == FPUL_REGS
12343 && ((REG_P (x)
12344 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12345 || REGNO (x) == T_REG))
12346 || GET_CODE (x) == PLUS))
12347 return GENERAL_REGS;
12348 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12350 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12351 return GENERAL_REGS;
12352 else if (mode == SFmode)
12353 return FP_REGS;
12354 sri->icode = CODE_FOR_reload_insi__i_fpul;
12355 return NO_REGS;
12357 if (rclass == FPSCR_REGS
12358 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12359 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12360 return GENERAL_REGS;
12361 if (REGCLASS_HAS_FP_REG (rclass)
12362 && TARGET_SHMEDIA
12363 && immediate_operand (x, mode)
12364 && x != CONST0_RTX (GET_MODE (x))
12365 && GET_MODE (x) != V4SFmode)
12366 return GENERAL_REGS;
12367 if ((mode == QImode || mode == HImode)
12368 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12370 sri->icode = ((mode == QImode)
12371 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12372 return NO_REGS;
12374 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12375 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12376 return TARGET_REGS;
12377 } /* end of input-only processing. */
12379 if (((REGCLASS_HAS_FP_REG (rclass)
12380 && (REG_P (x)
12381 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12382 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12383 && TARGET_FMOVD))))
12384 || (REGCLASS_HAS_GENERAL_REG (rclass)
12385 && REG_P (x)
12386 && FP_REGISTER_P (REGNO (x))))
12387 && ! TARGET_SHMEDIA
12388 && (mode == SFmode || mode == SImode))
12389 return FPUL_REGS;
12390 if ((rclass == FPUL_REGS
12391 || (REGCLASS_HAS_FP_REG (rclass)
12392 && ! TARGET_SHMEDIA && mode == SImode))
12393 && (MEM_P (x)
12394 || (REG_P (x)
12395 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12396 || REGNO (x) == T_REG
12397 || system_reg_operand (x, VOIDmode)))))
12399 if (rclass == FPUL_REGS)
12400 return GENERAL_REGS;
12401 return FPUL_REGS;
12403 if ((rclass == TARGET_REGS
12404 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12405 && !satisfies_constraint_Csy (x)
12406 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12407 return GENERAL_REGS;
12408 if ((rclass == MAC_REGS || rclass == PR_REGS)
12409 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12410 && rclass != REGNO_REG_CLASS (REGNO (x)))
12411 return GENERAL_REGS;
12412 if (rclass != GENERAL_REGS && REG_P (x)
12413 && TARGET_REGISTER_P (REGNO (x)))
12414 return GENERAL_REGS;
12415 return NO_REGS;
12418 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12420 #include "gt-sh.h"