* config/sh/sh.c (sh_delegitimize_address): Handle UNSPEC_SYMOFF
[official-gcc.git] / gcc / config / sh / sh.c
blob18bd9642f7bd467cd342d289c36f57fd93372532
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "recog.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "params.h"
54 #include "ggc.h"
55 #include "gimple.h"
56 #include "cfgloop.h"
57 #include "alloc-pool.h"
58 #include "tm-constrs.h"
61 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
63 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
64 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
66 /* These are some macros to abstract register modes. */
67 #define CONST_OK_FOR_ADD(size) \
68 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
69 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
70 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
71 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
73 /* Used to simplify the logic below. Find the attributes wherever
74 they may be. */
75 #define SH_ATTRIBUTES(decl) \
76 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
77 : DECL_ATTRIBUTES (decl) \
78 ? (DECL_ATTRIBUTES (decl)) \
79 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
81 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
82 int current_function_interrupt;
84 tree sh_deferred_function_attributes;
85 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
87 /* Global variables for machine-dependent things. */
89 /* Which cpu are we scheduling for. */
90 enum processor_type sh_cpu;
92 /* Definitions used in ready queue reordering for first scheduling pass. */
94 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
95 static short *regmode_weight[2];
97 /* Total SFmode and SImode weights of scheduled insns. */
98 static int curr_regmode_pressure[2];
100 /* Number of r0 life regions. */
101 static int r0_life_regions;
103 /* If true, skip cycles for Q -> R movement. */
104 static int skip_cycles = 0;
106 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
107 and returned from sh_reorder2. */
108 static short cached_can_issue_more;
110 /* Unique number for UNSPEC_BBR pattern. */
111 static unsigned int unspec_bbr_uid = 1;
113 /* Provides the class number of the smallest class containing
114 reg number. */
116 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
118 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
155 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
156 GENERAL_REGS, GENERAL_REGS,
159 char sh_register_names[FIRST_PSEUDO_REGISTER] \
160 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
162 char sh_additional_register_names[ADDREGNAMES_SIZE] \
163 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
164 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
166 int assembler_dialect;
168 static bool shmedia_space_reserved_for_target_registers;
170 static bool sh_handle_option (size_t, const char *, int);
171 static void split_branches (rtx);
172 static int branch_dest (rtx);
173 static void force_into (rtx, rtx);
174 static void print_slot (rtx);
175 static rtx add_constant (rtx, enum machine_mode, rtx);
176 static void dump_table (rtx, rtx);
177 static int hi_const (rtx);
178 static int broken_move (rtx);
179 static int mova_p (rtx);
180 static rtx find_barrier (int, rtx, rtx);
181 static int noncall_uses_reg (rtx, rtx, rtx *);
182 static rtx gen_block_redirect (rtx, int, int);
183 static void sh_reorg (void);
184 static void sh_option_override (void);
185 static void sh_option_init_struct (struct gcc_options *);
186 static void sh_option_default_params (void);
187 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
188 static rtx frame_insn (rtx);
189 static rtx push (int);
190 static void pop (int);
191 static void push_regs (HARD_REG_SET *, int);
192 static int calc_live_regs (HARD_REG_SET *);
193 static HOST_WIDE_INT rounded_frame_size (int);
194 static bool sh_frame_pointer_required (void);
195 static rtx mark_constant_pool_use (rtx);
196 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
197 static tree sh_handle_resbank_handler_attribute (tree *, tree,
198 tree, int, bool *);
199 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
200 tree, int, bool *);
201 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
203 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
204 static void sh_print_operand (FILE *, rtx, int);
205 static void sh_print_operand_address (FILE *, rtx);
206 static bool sh_print_operand_punct_valid_p (unsigned char code);
207 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
208 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
209 static void sh_insert_attributes (tree, tree *);
210 static const char *sh_check_pch_target_flags (int);
211 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
212 static int sh_adjust_cost (rtx, rtx, rtx, int);
213 static int sh_issue_rate (void);
214 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
215 static short find_set_regmode_weight (rtx, enum machine_mode);
216 static short find_insn_regmode_weight (rtx, enum machine_mode);
217 static void find_regmode_weight (basic_block, enum machine_mode);
218 static int find_r0_life_regions (basic_block);
219 static void sh_md_init_global (FILE *, int, int);
220 static void sh_md_finish_global (FILE *, int);
221 static int rank_for_reorder (const void *, const void *);
222 static void swap_reorder (rtx *, int);
223 static void ready_reorder (rtx *, int);
224 static short high_pressure (enum machine_mode);
225 static int sh_reorder (FILE *, int, rtx *, int *, int);
226 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
227 static void sh_md_init (FILE *, int, int);
228 static int sh_variable_issue (FILE *, int, rtx, int);
230 static bool sh_function_ok_for_sibcall (tree, tree);
232 static bool sh_cannot_modify_jumps_p (void);
233 static reg_class_t sh_target_reg_class (void);
234 static bool sh_optimize_target_register_callee_saved (bool);
235 static bool sh_ms_bitfield_layout_p (const_tree);
237 static void sh_init_builtins (void);
238 static tree sh_builtin_decl (unsigned, bool);
239 static void sh_media_init_builtins (void);
240 static tree sh_media_builtin_decl (unsigned, bool);
241 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
242 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
243 static void sh_file_start (void);
244 static int flow_dependent_p (rtx, rtx);
245 static void flow_dependent_p_1 (rtx, const_rtx, void *);
246 static int shiftcosts (rtx);
247 static int andcosts (rtx);
248 static int addsubcosts (rtx);
249 static int multcosts (rtx);
250 static bool unspec_caller_rtx_p (rtx);
251 static bool sh_cannot_copy_insn_p (rtx);
252 static bool sh_rtx_costs (rtx, int, int, int *, bool);
253 static int sh_address_cost (rtx, bool);
254 static int sh_pr_n_sets (void);
255 static rtx sh_allocate_initial_value (rtx);
256 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
257 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
258 enum machine_mode,
259 struct secondary_reload_info *);
260 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
261 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
262 static rtx sh_delegitimize_address (rtx);
263 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
264 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
265 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
266 static int scavenge_reg (HARD_REG_SET *s);
267 struct save_schedule_s;
268 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
269 struct save_schedule_s *, int);
271 static rtx sh_struct_value_rtx (tree, int);
272 static rtx sh_function_value (const_tree, const_tree, bool);
273 static bool sh_function_value_regno_p (const unsigned int);
274 static rtx sh_libcall_value (enum machine_mode, const_rtx);
275 static bool sh_return_in_memory (const_tree, const_tree);
276 static rtx sh_builtin_saveregs (void);
277 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
278 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
279 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
280 static tree sh_build_builtin_va_list (void);
281 static void sh_va_start (tree, rtx);
282 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
283 static bool sh_promote_prototypes (const_tree);
284 static enum machine_mode sh_promote_function_mode (const_tree type,
285 enum machine_mode,
286 int *punsignedp,
287 const_tree funtype,
288 int for_return);
289 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
290 const_tree, bool);
291 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
292 const_tree, bool);
293 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
294 tree, bool);
295 static void sh_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
296 const_tree, bool);
297 static rtx sh_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
298 const_tree, bool);
299 static bool sh_scalar_mode_supported_p (enum machine_mode);
300 static int sh_dwarf_calling_convention (const_tree);
301 static void sh_encode_section_info (tree, rtx, int);
302 static int sh2a_function_vector_p (tree);
303 static void sh_trampoline_init (rtx, tree, rtx);
304 static rtx sh_trampoline_adjust_address (rtx);
305 static void sh_conditional_register_usage (void);
307 static const struct attribute_spec sh_attribute_table[] =
309 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
310 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
311 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
312 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
313 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
314 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
315 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
316 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
317 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
318 #ifdef SYMBIAN
319 /* Symbian support adds three new attributes:
320 dllexport - for exporting a function/variable that will live in a dll
321 dllimport - for importing a function/variable from a dll
323 Microsoft allows multiple declspecs in one __declspec, separating
324 them with spaces. We do NOT support this. Instead, use __declspec
325 multiple times. */
326 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
327 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
328 #endif
329 { NULL, 0, 0, false, false, false, NULL }
332 /* Set default optimization options. */
333 static const struct default_options sh_option_optimization_table[] =
335 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
336 { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_mdiv_, "inv:minlat", 1 },
337 { OPT_LEVELS_SIZE, OPT_mdiv_, SH_DIV_STR_FOR_SIZE, 1 },
338 { OPT_LEVELS_0_ONLY, OPT_mdiv_, "", 1 },
339 { OPT_LEVELS_SIZE, OPT_mcbranchdi, NULL, 0 },
340 /* We can't meaningfully test TARGET_SHMEDIA here, because -m
341 options haven't been parsed yet, hence we'd read only the
342 default. sh_target_reg_class will return NO_REGS if this is
343 not SHMEDIA, so it's OK to always set
344 flag_branch_target_load_optimize. */
345 { OPT_LEVELS_2_PLUS, OPT_fbranch_target_load_optimize, NULL, 1 },
346 { OPT_LEVELS_NONE, 0, NULL, 0 }
349 /* Initialize the GCC target structure. */
350 #undef TARGET_ATTRIBUTE_TABLE
351 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
353 /* The next two are used for debug info when compiling with -gdwarf. */
354 #undef TARGET_ASM_UNALIGNED_HI_OP
355 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
356 #undef TARGET_ASM_UNALIGNED_SI_OP
357 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
359 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
360 #undef TARGET_ASM_UNALIGNED_DI_OP
361 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
362 #undef TARGET_ASM_ALIGNED_DI_OP
363 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
365 #undef TARGET_OPTION_OVERRIDE
366 #define TARGET_OPTION_OVERRIDE sh_option_override
367 #undef TARGET_OPTION_OPTIMIZATION_TABLE
368 #define TARGET_OPTION_OPTIMIZATION_TABLE sh_option_optimization_table
369 #undef TARGET_OPTION_INIT_STRUCT
370 #define TARGET_OPTION_INIT_STRUCT sh_option_init_struct
371 #undef TARGET_OPTION_DEFAULT_PARAMS
372 #define TARGET_OPTION_DEFAULT_PARAMS sh_option_default_params
374 #undef TARGET_PRINT_OPERAND
375 #define TARGET_PRINT_OPERAND sh_print_operand
376 #undef TARGET_PRINT_OPERAND_ADDRESS
377 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
378 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
379 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
380 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
381 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
383 #undef TARGET_ASM_FUNCTION_EPILOGUE
384 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
386 #undef TARGET_ASM_OUTPUT_MI_THUNK
387 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
389 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
390 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
392 #undef TARGET_ASM_FILE_START
393 #define TARGET_ASM_FILE_START sh_file_start
394 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
395 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
397 #undef TARGET_DEFAULT_TARGET_FLAGS
398 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
399 #undef TARGET_HANDLE_OPTION
400 #define TARGET_HANDLE_OPTION sh_handle_option
402 #undef TARGET_REGISTER_MOVE_COST
403 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
405 #undef TARGET_INSERT_ATTRIBUTES
406 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
408 #undef TARGET_SCHED_ADJUST_COST
409 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
411 #undef TARGET_SCHED_ISSUE_RATE
412 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
414 /* The next 5 hooks have been implemented for reenabling sched1. With the
415 help of these macros we are limiting the movement of insns in sched1 to
416 reduce the register pressure. The overall idea is to keep count of SImode
417 and SFmode regs required by already scheduled insns. When these counts
418 cross some threshold values; give priority to insns that free registers.
419 The insn that frees registers is most likely to be the insn with lowest
420 LUID (original insn order); but such an insn might be there in the stalled
421 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
422 upto a max of 8 cycles so that such insns may move from Q -> R.
424 The description of the hooks are as below:
426 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
427 scheduler; it is called inside the sched_init function just after
428 find_insn_reg_weights function call. It is used to calculate the SImode
429 and SFmode weights of insns of basic blocks; much similar to what
430 find_insn_reg_weights does.
431 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
433 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
434 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
435 (Q)->(R).
437 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
438 high; reorder the ready queue so that the insn with lowest LUID will be
439 issued next.
441 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
442 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
444 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
445 can be returned from TARGET_SCHED_REORDER2.
447 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
449 #undef TARGET_SCHED_DFA_NEW_CYCLE
450 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
452 #undef TARGET_SCHED_INIT_GLOBAL
453 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
455 #undef TARGET_SCHED_FINISH_GLOBAL
456 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
458 #undef TARGET_SCHED_VARIABLE_ISSUE
459 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
461 #undef TARGET_SCHED_REORDER
462 #define TARGET_SCHED_REORDER sh_reorder
464 #undef TARGET_SCHED_REORDER2
465 #define TARGET_SCHED_REORDER2 sh_reorder2
467 #undef TARGET_SCHED_INIT
468 #define TARGET_SCHED_INIT sh_md_init
470 #undef TARGET_DELEGITIMIZE_ADDRESS
471 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
473 #undef TARGET_LEGITIMIZE_ADDRESS
474 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
476 #undef TARGET_CANNOT_MODIFY_JUMPS_P
477 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
478 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
479 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
480 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
481 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
482 sh_optimize_target_register_callee_saved
484 #undef TARGET_MS_BITFIELD_LAYOUT_P
485 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
487 #undef TARGET_INIT_BUILTINS
488 #define TARGET_INIT_BUILTINS sh_init_builtins
489 #undef TARGET_BUILTIN_DECL
490 #define TARGET_BUILTIN_DECL sh_builtin_decl
491 #undef TARGET_EXPAND_BUILTIN
492 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
494 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
495 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
497 #undef TARGET_CANNOT_COPY_INSN_P
498 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
499 #undef TARGET_RTX_COSTS
500 #define TARGET_RTX_COSTS sh_rtx_costs
501 #undef TARGET_ADDRESS_COST
502 #define TARGET_ADDRESS_COST sh_address_cost
503 #undef TARGET_ALLOCATE_INITIAL_VALUE
504 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
506 #undef TARGET_MACHINE_DEPENDENT_REORG
507 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
509 #undef TARGET_DWARF_REGISTER_SPAN
510 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
512 #ifdef HAVE_AS_TLS
513 #undef TARGET_HAVE_TLS
514 #define TARGET_HAVE_TLS true
515 #endif
517 #undef TARGET_PROMOTE_PROTOTYPES
518 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
519 #undef TARGET_PROMOTE_FUNCTION_MODE
520 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
522 #undef TARGET_FUNCTION_VALUE
523 #define TARGET_FUNCTION_VALUE sh_function_value
524 #undef TARGET_FUNCTION_VALUE_REGNO_P
525 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
526 #undef TARGET_LIBCALL_VALUE
527 #define TARGET_LIBCALL_VALUE sh_libcall_value
528 #undef TARGET_STRUCT_VALUE_RTX
529 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
530 #undef TARGET_RETURN_IN_MEMORY
531 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
533 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
534 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
535 #undef TARGET_SETUP_INCOMING_VARARGS
536 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
537 #undef TARGET_STRICT_ARGUMENT_NAMING
538 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
539 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
540 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
541 #undef TARGET_MUST_PASS_IN_STACK
542 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
545 #undef TARGET_CALLEE_COPIES
546 #define TARGET_CALLEE_COPIES sh_callee_copies
547 #undef TARGET_ARG_PARTIAL_BYTES
548 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
549 #undef TARGET_FUNCTION_ARG
550 #define TARGET_FUNCTION_ARG sh_function_arg
551 #undef TARGET_FUNCTION_ARG_ADVANCE
552 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
554 #undef TARGET_BUILD_BUILTIN_VA_LIST
555 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
556 #undef TARGET_EXPAND_BUILTIN_VA_START
557 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
558 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
559 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
561 #undef TARGET_SCALAR_MODE_SUPPORTED_P
562 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
563 #undef TARGET_VECTOR_MODE_SUPPORTED_P
564 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
566 #undef TARGET_CHECK_PCH_TARGET_FLAGS
567 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
569 #undef TARGET_DWARF_CALLING_CONVENTION
570 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
572 #undef TARGET_FRAME_POINTER_REQUIRED
573 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
575 /* Return regmode weight for insn. */
576 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
578 /* Return current register pressure for regmode. */
579 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
581 #undef TARGET_ENCODE_SECTION_INFO
582 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
584 #ifdef SYMBIAN
586 #undef TARGET_ENCODE_SECTION_INFO
587 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
588 #undef TARGET_STRIP_NAME_ENCODING
589 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
590 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
591 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
593 #endif /* SYMBIAN */
595 #undef TARGET_SECONDARY_RELOAD
596 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
598 #undef TARGET_PREFERRED_RELOAD_CLASS
599 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
601 #undef TARGET_CONDITIONAL_REGISTER_USAGE
602 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
604 #undef TARGET_LEGITIMATE_ADDRESS_P
605 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
607 #undef TARGET_TRAMPOLINE_INIT
608 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
609 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
610 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
612 /* Machine-specific symbol_ref flags. */
613 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
615 struct gcc_target targetm = TARGET_INITIALIZER;
617 /* Implement TARGET_HANDLE_OPTION. */
619 static bool
620 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
621 int value ATTRIBUTE_UNUSED)
623 switch (code)
625 case OPT_m1:
626 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
627 return true;
629 case OPT_m2:
630 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
631 return true;
633 case OPT_m2a:
634 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
635 return true;
637 case OPT_m2a_nofpu:
638 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
639 return true;
641 case OPT_m2a_single:
642 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
643 return true;
645 case OPT_m2a_single_only:
646 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
647 return true;
649 case OPT_m2e:
650 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
651 return true;
653 case OPT_m3:
654 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
655 return true;
657 case OPT_m3e:
658 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
659 return true;
661 case OPT_m4:
662 case OPT_m4_100:
663 case OPT_m4_200:
664 case OPT_m4_300:
665 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
666 return true;
668 case OPT_m4_nofpu:
669 case OPT_m4_100_nofpu:
670 case OPT_m4_200_nofpu:
671 case OPT_m4_300_nofpu:
672 case OPT_m4_340:
673 case OPT_m4_400:
674 case OPT_m4_500:
675 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
676 return true;
678 case OPT_m4_single:
679 case OPT_m4_100_single:
680 case OPT_m4_200_single:
681 case OPT_m4_300_single:
682 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
683 return true;
685 case OPT_m4_single_only:
686 case OPT_m4_100_single_only:
687 case OPT_m4_200_single_only:
688 case OPT_m4_300_single_only:
689 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
690 return true;
692 case OPT_m4a:
693 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
694 return true;
696 case OPT_m4a_nofpu:
697 case OPT_m4al:
698 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
699 return true;
701 case OPT_m4a_single:
702 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
703 return true;
705 case OPT_m4a_single_only:
706 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
707 return true;
709 case OPT_m5_32media:
710 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
711 return true;
713 case OPT_m5_32media_nofpu:
714 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
715 return true;
717 case OPT_m5_64media:
718 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
719 return true;
721 case OPT_m5_64media_nofpu:
722 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
723 return true;
725 case OPT_m5_compact:
726 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
727 return true;
729 case OPT_m5_compact_nofpu:
730 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
731 return true;
733 default:
734 return true;
738 /* Implement TARGET_OPTION_INIT_STRUCT. */
739 static void
740 sh_option_init_struct (struct gcc_options *opts)
742 /* We can't meaningfully test TARGET_SH2E / TARGET_IEEE
743 here, so leave it to TARGET_OPTION_OVERRIDE to set
744 flag_finite_math_only. We set it to 2 here so we know if the user
745 explicitly requested this to be on or off. */
746 opts->x_flag_finite_math_only = 2;
749 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
750 static void
751 sh_option_default_params (void)
753 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 2);
756 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
757 various options, and do some machine dependent initialization. */
758 static void
759 sh_option_override (void)
761 int regno;
763 SUBTARGET_OVERRIDE_OPTIONS;
764 if (optimize > 1 && !optimize_size)
765 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
766 if (flag_finite_math_only == 2)
767 flag_finite_math_only
768 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
769 if (TARGET_SH2E && !flag_finite_math_only)
770 target_flags |= MASK_IEEE;
771 sh_cpu = PROCESSOR_SH1;
772 assembler_dialect = 0;
773 if (TARGET_SH2)
774 sh_cpu = PROCESSOR_SH2;
775 if (TARGET_SH2E)
776 sh_cpu = PROCESSOR_SH2E;
777 if (TARGET_SH2A)
778 sh_cpu = PROCESSOR_SH2A;
779 if (TARGET_SH3)
780 sh_cpu = PROCESSOR_SH3;
781 if (TARGET_SH3E)
782 sh_cpu = PROCESSOR_SH3E;
783 if (TARGET_SH4)
785 assembler_dialect = 1;
786 sh_cpu = PROCESSOR_SH4;
788 if (TARGET_SH4A_ARCH)
790 assembler_dialect = 1;
791 sh_cpu = PROCESSOR_SH4A;
793 if (TARGET_SH5)
795 sh_cpu = PROCESSOR_SH5;
796 target_flags |= MASK_ALIGN_DOUBLE;
797 if (TARGET_SHMEDIA_FPU)
798 target_flags |= MASK_FMOVD;
799 if (TARGET_SHMEDIA)
801 /* There are no delay slots on SHmedia. */
802 flag_delayed_branch = 0;
803 /* Relaxation isn't yet supported for SHmedia */
804 target_flags &= ~MASK_RELAX;
805 /* After reload, if conversion does little good but can cause
806 ICEs:
807 - find_if_block doesn't do anything for SH because we don't
808 have conditional execution patterns. (We use conditional
809 move patterns, which are handled differently, and only
810 before reload).
811 - find_cond_trap doesn't do anything for the SH because we
812 don't have conditional traps.
813 - find_if_case_1 uses redirect_edge_and_branch_force in
814 the only path that does an optimization, and this causes
815 an ICE when branch targets are in registers.
816 - find_if_case_2 doesn't do anything for the SHmedia after
817 reload except when it can redirect a tablejump - and
818 that's rather rare. */
819 flag_if_conversion2 = 0;
820 if (! strcmp (sh_div_str, "call"))
821 sh_div_strategy = SH_DIV_CALL;
822 else if (! strcmp (sh_div_str, "call2"))
823 sh_div_strategy = SH_DIV_CALL2;
824 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
825 sh_div_strategy = SH_DIV_FP;
826 else if (! strcmp (sh_div_str, "inv"))
827 sh_div_strategy = SH_DIV_INV;
828 else if (! strcmp (sh_div_str, "inv:minlat"))
829 sh_div_strategy = SH_DIV_INV_MINLAT;
830 else if (! strcmp (sh_div_str, "inv20u"))
831 sh_div_strategy = SH_DIV_INV20U;
832 else if (! strcmp (sh_div_str, "inv20l"))
833 sh_div_strategy = SH_DIV_INV20L;
834 else if (! strcmp (sh_div_str, "inv:call2"))
835 sh_div_strategy = SH_DIV_INV_CALL2;
836 else if (! strcmp (sh_div_str, "inv:call"))
837 sh_div_strategy = SH_DIV_INV_CALL;
838 else if (! strcmp (sh_div_str, "inv:fp"))
840 if (TARGET_FPU_ANY)
841 sh_div_strategy = SH_DIV_INV_FP;
842 else
843 sh_div_strategy = SH_DIV_INV;
845 TARGET_CBRANCHDI4 = 0;
846 /* Assembler CFI isn't yet fully supported for SHmedia. */
847 flag_dwarf2_cfi_asm = 0;
850 else
852 /* Only the sh64-elf assembler fully supports .quad properly. */
853 targetm.asm_out.aligned_op.di = NULL;
854 targetm.asm_out.unaligned_op.di = NULL;
856 if (TARGET_SH1)
858 if (! strcmp (sh_div_str, "call-div1"))
859 sh_div_strategy = SH_DIV_CALL_DIV1;
860 else if (! strcmp (sh_div_str, "call-fp")
861 && (TARGET_FPU_DOUBLE
862 || (TARGET_HARD_SH4 && TARGET_SH2E)
863 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
864 sh_div_strategy = SH_DIV_CALL_FP;
865 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
866 sh_div_strategy = SH_DIV_CALL_TABLE;
867 else
868 /* Pick one that makes most sense for the target in general.
869 It is not much good to use different functions depending
870 on -Os, since then we'll end up with two different functions
871 when some of the code is compiled for size, and some for
872 speed. */
874 /* SH4 tends to emphasize speed. */
875 if (TARGET_HARD_SH4)
876 sh_div_strategy = SH_DIV_CALL_TABLE;
877 /* These have their own way of doing things. */
878 else if (TARGET_SH2A)
879 sh_div_strategy = SH_DIV_INTRINSIC;
880 /* ??? Should we use the integer SHmedia function instead? */
881 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
882 sh_div_strategy = SH_DIV_CALL_FP;
883 /* SH1 .. SH3 cores often go into small-footprint systems, so
884 default to the smallest implementation available. */
885 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
886 sh_div_strategy = SH_DIV_CALL_TABLE;
887 else
888 sh_div_strategy = SH_DIV_CALL_DIV1;
890 if (!TARGET_SH1)
891 TARGET_PRETEND_CMOVE = 0;
892 if (sh_divsi3_libfunc[0])
893 ; /* User supplied - leave it alone. */
894 else if (TARGET_DIVIDE_CALL_FP)
895 sh_divsi3_libfunc = "__sdivsi3_i4";
896 else if (TARGET_DIVIDE_CALL_TABLE)
897 sh_divsi3_libfunc = "__sdivsi3_i4i";
898 else if (TARGET_SH5)
899 sh_divsi3_libfunc = "__sdivsi3_1";
900 else
901 sh_divsi3_libfunc = "__sdivsi3";
902 if (sh_branch_cost == -1)
903 sh_branch_cost
904 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
906 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
907 if (! VALID_REGISTER_P (regno))
908 sh_register_names[regno][0] = '\0';
910 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
911 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
912 sh_additional_register_names[regno][0] = '\0';
914 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
916 if ((flag_pic && ! TARGET_PREFERGOT)
917 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
918 flag_no_function_cse = 1;
920 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
922 /* Never run scheduling before reload, since that can
923 break global alloc, and generates slower code anyway due
924 to the pressure on R0. */
925 /* Enable sched1 for SH4 if the user explicitly requests.
926 When sched1 is enabled, the ready queue will be reordered by
927 the target hooks if pressure is high. We can not do this for
928 PIC, SH3 and lower as they give spill failures for R0. */
929 if (!TARGET_HARD_SH4 || flag_pic)
930 flag_schedule_insns = 0;
931 /* ??? Current exception handling places basic block boundaries
932 after call_insns. It causes the high pressure on R0 and gives
933 spill failures for R0 in reload. See PR 22553 and the thread
934 on gcc-patches
935 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
936 else if (flag_exceptions)
938 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
939 warning (0, "ignoring -fschedule-insns because of exception handling bug");
940 flag_schedule_insns = 0;
942 else if (flag_schedule_insns
943 && !global_options_set.x_flag_schedule_insns)
944 flag_schedule_insns = 0;
947 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
948 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
950 /* Unwind info is not correct around the CFG unless either a frame
951 pointer is present or M_A_O_A is set. Fixing this requires rewriting
952 unwind info generation to be aware of the CFG and propagating states
953 around edges. */
954 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
955 || flag_exceptions || flag_non_call_exceptions)
956 && flag_omit_frame_pointer
957 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
959 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
960 warning (0, "unwind tables currently require either a frame pointer "
961 "or -maccumulate-outgoing-args for correctness");
962 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
965 /* Unwinding with -freorder-blocks-and-partition does not work on this
966 architecture, because it requires far jumps to label crossing between
967 hot/cold sections which are rejected on this architecture. */
968 if (flag_reorder_blocks_and_partition)
970 if (flag_exceptions)
972 inform (input_location,
973 "-freorder-blocks-and-partition does not work with "
974 "exceptions on this architecture");
975 flag_reorder_blocks_and_partition = 0;
976 flag_reorder_blocks = 1;
978 else if (flag_unwind_tables)
980 inform (input_location,
981 "-freorder-blocks-and-partition does not support unwind "
982 "info on this architecture");
983 flag_reorder_blocks_and_partition = 0;
984 flag_reorder_blocks = 1;
988 if (align_loops == 0)
989 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
990 if (align_jumps == 0)
991 align_jumps = 1 << CACHE_LOG;
992 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
993 align_jumps = TARGET_SHMEDIA ? 4 : 2;
995 /* Allocation boundary (in *bytes*) for the code of a function.
996 SH1: 32 bit alignment is faster, because instructions are always
997 fetched as a pair from a longword boundary.
998 SH2 .. SH5 : align to cache line start. */
999 if (align_functions == 0)
1000 align_functions
1001 = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1002 /* The linker relaxation code breaks when a function contains
1003 alignments that are larger than that at the start of a
1004 compilation unit. */
1005 if (TARGET_RELAX)
1007 int min_align
1008 = align_loops > align_jumps ? align_loops : align_jumps;
1010 /* Also take possible .long constants / mova tables int account. */
1011 if (min_align < 4)
1012 min_align = 4;
1013 if (align_functions < min_align)
1014 align_functions = min_align;
1017 if (sh_fixed_range_str)
1018 sh_fix_range (sh_fixed_range_str);
1020 /* This target defaults to strict volatile bitfields. */
1021 if (flag_strict_volatile_bitfields < 0)
1022 flag_strict_volatile_bitfields = 1;
1025 /* Print the operand address in x to the stream. */
1027 static void
1028 sh_print_operand_address (FILE *stream, rtx x)
1030 switch (GET_CODE (x))
1032 case REG:
1033 case SUBREG:
1034 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1035 break;
1037 case PLUS:
1039 rtx base = XEXP (x, 0);
1040 rtx index = XEXP (x, 1);
1042 switch (GET_CODE (index))
1044 case CONST_INT:
1045 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1046 reg_names[true_regnum (base)]);
1047 break;
1049 case REG:
1050 case SUBREG:
1052 int base_num = true_regnum (base);
1053 int index_num = true_regnum (index);
1055 fprintf (stream, "@(r0,%s)",
1056 reg_names[MAX (base_num, index_num)]);
1057 break;
1060 default:
1061 gcc_unreachable ();
1064 break;
1066 case PRE_DEC:
1067 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1068 break;
1070 case POST_INC:
1071 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1072 break;
1074 default:
1075 x = mark_constant_pool_use (x);
1076 output_addr_const (stream, x);
1077 break;
1081 /* Print operand x (an rtx) in assembler syntax to file stream
1082 according to modifier code.
1084 '.' print a .s if insn needs delay slot
1085 ',' print LOCAL_LABEL_PREFIX
1086 '@' print trap, rte or rts depending upon pragma interruptness
1087 '#' output a nop if there is nothing to put in the delay slot
1088 ''' print likelihood suffix (/u for unlikely).
1089 '>' print branch target if -fverbose-asm
1090 'O' print a constant without the #
1091 'R' print the LSW of a dp value - changes if in little endian
1092 'S' print the MSW of a dp value - changes if in little endian
1093 'T' print the next word of a dp value - same as 'R' in big endian mode.
1094 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1095 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1096 'N' print 'r63' if the operand is (const_int 0).
1097 'd' print a V2SF reg as dN instead of fpN.
1098 'm' print a pair `base,offset' or `base,index', for LD and ST.
1099 'U' Likewise for {LD,ST}{HI,LO}.
1100 'V' print the position of a single bit set.
1101 'W' print the position of a single bit cleared.
1102 't' print a memory address which is a register.
1103 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1104 'o' output an operator. */
1106 static void
1107 sh_print_operand (FILE *stream, rtx x, int code)
1109 int regno;
1110 enum machine_mode mode;
1112 switch (code)
1114 tree trapa_attr;
1116 case '.':
1117 if (final_sequence
1118 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1119 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1120 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1121 break;
1122 case ',':
1123 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1124 break;
1125 case '@':
1126 trapa_attr = lookup_attribute ("trap_exit",
1127 DECL_ATTRIBUTES (current_function_decl));
1128 if (trapa_attr)
1129 fprintf (stream, "trapa #%ld",
1130 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1131 else if (sh_cfun_interrupt_handler_p ())
1133 if (sh_cfun_resbank_handler_p ())
1134 fprintf (stream, "resbank\n");
1135 fprintf (stream, "rte");
1137 else
1138 fprintf (stream, "rts");
1139 break;
1140 case '#':
1141 /* Output a nop if there's nothing in the delay slot. */
1142 if (dbr_sequence_length () == 0)
1143 fprintf (stream, "\n\tnop");
1144 break;
1145 case '\'':
1147 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1149 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1150 fputs ("/u", stream);
1151 break;
1153 case '>':
1154 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1156 fputs ("\t! target: ", stream);
1157 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1159 break;
1160 case 'O':
1161 x = mark_constant_pool_use (x);
1162 output_addr_const (stream, x);
1163 break;
1164 /* N.B.: %R / %S / %T adjust memory addresses by four.
1165 For SHMEDIA, that means they can be used to access the first and
1166 second 32 bit part of a 64 bit (or larger) value that
1167 might be held in floating point registers or memory.
1168 While they can be used to access 64 bit parts of a larger value
1169 held in general purpose registers, that won't work with memory -
1170 neither for fp registers, since the frxx names are used. */
1171 case 'R':
1172 if (REG_P (x) || GET_CODE (x) == SUBREG)
1174 regno = true_regnum (x);
1175 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1176 fputs (reg_names[regno], (stream));
1178 else if (MEM_P (x))
1180 x = adjust_address (x, SImode, 4 * LSW);
1181 sh_print_operand_address (stream, XEXP (x, 0));
1183 else
1185 rtx sub = NULL_RTX;
1187 mode = GET_MODE (x);
1188 if (mode == VOIDmode)
1189 mode = DImode;
1190 if (GET_MODE_SIZE (mode) >= 8)
1191 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1192 if (sub)
1193 sh_print_operand (stream, sub, 0);
1194 else
1195 output_operand_lossage ("invalid operand to %%R");
1197 break;
1198 case 'S':
1199 if (REG_P (x) || GET_CODE (x) == SUBREG)
1201 regno = true_regnum (x);
1202 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1203 fputs (reg_names[regno], (stream));
1205 else if (MEM_P (x))
1207 x = adjust_address (x, SImode, 4 * MSW);
1208 sh_print_operand_address (stream, XEXP (x, 0));
1210 else
1212 rtx sub = NULL_RTX;
1214 mode = GET_MODE (x);
1215 if (mode == VOIDmode)
1216 mode = DImode;
1217 if (GET_MODE_SIZE (mode) >= 8)
1218 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1219 if (sub)
1220 sh_print_operand (stream, sub, 0);
1221 else
1222 output_operand_lossage ("invalid operand to %%S");
1224 break;
1225 case 'T':
1226 /* Next word of a double. */
1227 switch (GET_CODE (x))
1229 case REG:
1230 fputs (reg_names[REGNO (x) + 1], (stream));
1231 break;
1232 case MEM:
1233 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1234 && GET_CODE (XEXP (x, 0)) != POST_INC)
1235 x = adjust_address (x, SImode, 4);
1236 sh_print_operand_address (stream, XEXP (x, 0));
1237 break;
1238 default:
1239 break;
1241 break;
1243 case 't':
1244 gcc_assert (MEM_P (x));
1245 x = XEXP (x, 0);
1246 switch (GET_CODE (x))
1248 case REG:
1249 case SUBREG:
1250 sh_print_operand (stream, x, 0);
1251 break;
1252 default:
1253 break;
1255 break;
1257 case 'o':
1258 switch (GET_CODE (x))
1260 case PLUS: fputs ("add", stream); break;
1261 case MINUS: fputs ("sub", stream); break;
1262 case MULT: fputs ("mul", stream); break;
1263 case DIV: fputs ("div", stream); break;
1264 case EQ: fputs ("eq", stream); break;
1265 case NE: fputs ("ne", stream); break;
1266 case GT: case LT: fputs ("gt", stream); break;
1267 case GE: case LE: fputs ("ge", stream); break;
1268 case GTU: case LTU: fputs ("gtu", stream); break;
1269 case GEU: case LEU: fputs ("geu", stream); break;
1270 default:
1271 break;
1273 break;
1274 case 'M':
1275 if (TARGET_SHMEDIA)
1277 if (MEM_P (x)
1278 && GET_CODE (XEXP (x, 0)) == PLUS
1279 && (REG_P (XEXP (XEXP (x, 0), 1))
1280 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1281 fputc ('x', stream);
1283 else
1285 if (MEM_P (x))
1287 switch (GET_MODE (x))
1289 case QImode: fputs (".b", stream); break;
1290 case HImode: fputs (".w", stream); break;
1291 case SImode: fputs (".l", stream); break;
1292 case SFmode: fputs (".s", stream); break;
1293 case DFmode: fputs (".d", stream); break;
1294 default: gcc_unreachable ();
1298 break;
1300 case 'm':
1301 gcc_assert (MEM_P (x));
1302 x = XEXP (x, 0);
1303 /* Fall through. */
1304 case 'U':
1305 switch (GET_CODE (x))
1307 case REG:
1308 case SUBREG:
1309 sh_print_operand (stream, x, 0);
1310 fputs (", 0", stream);
1311 break;
1313 case PLUS:
1314 sh_print_operand (stream, XEXP (x, 0), 0);
1315 fputs (", ", stream);
1316 sh_print_operand (stream, XEXP (x, 1), 0);
1317 break;
1319 default:
1320 gcc_unreachable ();
1322 break;
1324 case 'V':
1326 int num = exact_log2 (INTVAL (x));
1327 gcc_assert (num >= 0);
1328 fprintf (stream, "#%d", num);
1330 break;
1332 case 'W':
1334 int num = exact_log2 (~INTVAL (x));
1335 gcc_assert (num >= 0);
1336 fprintf (stream, "#%d", num);
1338 break;
1340 case 'd':
1341 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1343 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1344 break;
1346 case 'N':
1347 if (x == CONST0_RTX (GET_MODE (x)))
1349 fprintf ((stream), "r63");
1350 break;
1352 goto default_output;
1353 case 'u':
1354 if (CONST_INT_P (x))
1356 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1357 break;
1359 /* Fall through. */
1361 default_output:
1362 default:
1363 regno = 0;
1364 mode = GET_MODE (x);
1366 switch (GET_CODE (x))
1368 case TRUNCATE:
1370 rtx inner = XEXP (x, 0);
1371 int offset = 0;
1372 enum machine_mode inner_mode;
1374 /* We might see SUBREGs with vector mode registers inside. */
1375 if (GET_CODE (inner) == SUBREG
1376 && (GET_MODE_SIZE (GET_MODE (inner))
1377 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1378 && subreg_lowpart_p (inner))
1379 inner = SUBREG_REG (inner);
1380 if (CONST_INT_P (inner))
1382 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1383 goto default_output;
1385 inner_mode = GET_MODE (inner);
1386 if (GET_CODE (inner) == SUBREG
1387 && (GET_MODE_SIZE (GET_MODE (inner))
1388 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1389 && REG_P (SUBREG_REG (inner)))
1391 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1392 GET_MODE (SUBREG_REG (inner)),
1393 SUBREG_BYTE (inner),
1394 GET_MODE (inner));
1395 inner = SUBREG_REG (inner);
1397 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1398 abort ();
1399 /* Floating point register pairs are always big endian;
1400 general purpose registers are 64 bit wide. */
1401 regno = REGNO (inner);
1402 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1403 - HARD_REGNO_NREGS (regno, mode))
1404 + offset;
1405 x = inner;
1406 goto reg;
1408 case SIGN_EXTEND:
1409 x = XEXP (x, 0);
1410 goto reg;
1411 /* FIXME: We need this on SHmedia32 because reload generates
1412 some sign-extended HI or QI loads into DImode registers
1413 but, because Pmode is SImode, the address ends up with a
1414 subreg:SI of the DImode register. Maybe reload should be
1415 fixed so as to apply alter_subreg to such loads? */
1416 case IF_THEN_ELSE:
1417 gcc_assert (trapping_target_operand (x, VOIDmode));
1418 x = XEXP (XEXP (x, 2), 0);
1419 goto default_output;
1420 case SUBREG:
1421 gcc_assert (SUBREG_BYTE (x) == 0
1422 && REG_P (SUBREG_REG (x)));
1424 x = SUBREG_REG (x);
1425 /* Fall through. */
1427 reg:
1428 case REG:
1429 regno += REGNO (x);
1430 if (FP_REGISTER_P (regno)
1431 && mode == V16SFmode)
1432 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1433 else if (FP_REGISTER_P (REGNO (x))
1434 && mode == V4SFmode)
1435 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1436 else if (REG_P (x)
1437 && mode == V2SFmode)
1438 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1439 else if (FP_REGISTER_P (REGNO (x))
1440 && GET_MODE_SIZE (mode) > 4)
1441 fprintf ((stream), "d%s", reg_names[regno] + 1);
1442 else
1443 fputs (reg_names[regno], (stream));
1444 break;
1446 case MEM:
1447 output_address (XEXP (x, 0));
1448 break;
1450 default:
1451 if (TARGET_SH1)
1452 fputc ('#', stream);
1453 output_addr_const (stream, x);
1454 break;
1456 break;
1460 static bool
1461 sh_print_operand_punct_valid_p (unsigned char code)
1463 return (code == '.' || code == '#' || code == '@' || code == ','
1464 || code == '$' || code == '\'' || code == '>');
1467 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1469 static bool
1470 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1472 if (GET_CODE (x) == UNSPEC)
1474 switch (XINT (x, 1))
1476 case UNSPEC_DATALABEL:
1477 fputs ("datalabel ", file);
1478 output_addr_const (file, XVECEXP (x, 0, 0));
1479 break;
1480 case UNSPEC_PIC:
1481 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1482 output_addr_const (file, XVECEXP (x, 0, 0));
1483 break;
1484 case UNSPEC_GOT:
1485 output_addr_const (file, XVECEXP (x, 0, 0));
1486 fputs ("@GOT", file);
1487 break;
1488 case UNSPEC_GOTOFF:
1489 output_addr_const (file, XVECEXP (x, 0, 0));
1490 fputs ("@GOTOFF", file);
1491 break;
1492 case UNSPEC_PLT:
1493 output_addr_const (file, XVECEXP (x, 0, 0));
1494 fputs ("@PLT", file);
1495 break;
1496 case UNSPEC_GOTPLT:
1497 output_addr_const (file, XVECEXP (x, 0, 0));
1498 fputs ("@GOTPLT", file);
1499 break;
1500 case UNSPEC_DTPOFF:
1501 output_addr_const (file, XVECEXP (x, 0, 0));
1502 fputs ("@DTPOFF", file);
1503 break;
1504 case UNSPEC_GOTTPOFF:
1505 output_addr_const (file, XVECEXP (x, 0, 0));
1506 fputs ("@GOTTPOFF", file);
1507 break;
1508 case UNSPEC_TPOFF:
1509 output_addr_const (file, XVECEXP (x, 0, 0));
1510 fputs ("@TPOFF", file);
1511 break;
1512 case UNSPEC_CALLER:
1514 char name[32];
1515 /* LPCS stands for Label for PIC Call Site. */
1516 targetm.asm_out.generate_internal_label (name, "LPCS",
1517 INTVAL (XVECEXP (x, 0, 0)));
1518 assemble_name (file, name);
1520 break;
1521 case UNSPEC_EXTRACT_S16:
1522 case UNSPEC_EXTRACT_U16:
1524 rtx val, shift;
1526 val = XVECEXP (x, 0, 0);
1527 shift = XVECEXP (x, 0, 1);
1528 fputc ('(', file);
1529 if (shift != const0_rtx)
1530 fputc ('(', file);
1531 if (GET_CODE (val) == CONST
1532 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1534 fputc ('(', file);
1535 output_addr_const (file, val);
1536 fputc (')', file);
1538 else
1539 output_addr_const (file, val);
1540 if (shift != const0_rtx)
1542 fputs (" >> ", file);
1543 output_addr_const (file, shift);
1544 fputc (')', file);
1546 fputs (" & 65535)", file);
1548 break;
1549 case UNSPEC_SYMOFF:
1550 output_addr_const (file, XVECEXP (x, 0, 0));
1551 fputc ('-', file);
1552 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1554 fputc ('(', file);
1555 output_addr_const (file, XVECEXP (x, 0, 1));
1556 fputc (')', file);
1558 else
1559 output_addr_const (file, XVECEXP (x, 0, 1));
1560 break;
1561 case UNSPEC_PCREL_SYMOFF:
1562 output_addr_const (file, XVECEXP (x, 0, 0));
1563 fputs ("-(", file);
1564 output_addr_const (file, XVECEXP (x, 0, 1));
1565 fputs ("-.)", file);
1566 break;
1567 default:
1568 return false;
1570 return true;
1572 else
1573 return false;
1577 /* Encode symbol attributes of a SYMBOL_REF into its
1578 SYMBOL_REF_FLAGS. */
1579 static void
1580 sh_encode_section_info (tree decl, rtx rtl, int first)
1582 default_encode_section_info (decl, rtl, first);
1584 if (TREE_CODE (decl) == FUNCTION_DECL
1585 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1586 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1589 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1590 static void
1591 force_into (rtx value, rtx target)
1593 value = force_operand (value, target);
1594 if (! rtx_equal_p (value, target))
1595 emit_insn (gen_move_insn (target, value));
1598 /* Emit code to perform a block move. Choose the best method.
1600 OPERANDS[0] is the destination.
1601 OPERANDS[1] is the source.
1602 OPERANDS[2] is the size.
1603 OPERANDS[3] is the alignment safe to use. */
1606 expand_block_move (rtx *operands)
1608 int align = INTVAL (operands[3]);
1609 int constp = (CONST_INT_P (operands[2]));
1610 int bytes = (constp ? INTVAL (operands[2]) : 0);
1612 if (! constp)
1613 return 0;
1615 /* If we could use mov.l to move words and dest is word-aligned, we
1616 can use movua.l for loads and still generate a relatively short
1617 and efficient sequence. */
1618 if (TARGET_SH4A_ARCH && align < 4
1619 && MEM_ALIGN (operands[0]) >= 32
1620 && can_move_by_pieces (bytes, 32))
1622 rtx dest = copy_rtx (operands[0]);
1623 rtx src = copy_rtx (operands[1]);
1624 /* We could use different pseudos for each copied word, but
1625 since movua can only load into r0, it's kind of
1626 pointless. */
1627 rtx temp = gen_reg_rtx (SImode);
1628 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1629 int copied = 0;
1631 while (copied + 4 <= bytes)
1633 rtx to = adjust_address (dest, SImode, copied);
1634 rtx from = adjust_automodify_address (src, BLKmode,
1635 src_addr, copied);
1637 set_mem_size (from, GEN_INT (4));
1638 emit_insn (gen_movua (temp, from));
1639 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1640 emit_move_insn (to, temp);
1641 copied += 4;
1644 if (copied < bytes)
1645 move_by_pieces (adjust_address (dest, BLKmode, copied),
1646 adjust_automodify_address (src, BLKmode,
1647 src_addr, copied),
1648 bytes - copied, align, 0);
1650 return 1;
1653 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1654 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1655 if (align < 4 || (bytes % 4 != 0))
1656 return 0;
1658 if (TARGET_HARD_SH4)
1660 if (bytes < 12)
1661 return 0;
1662 else if (bytes == 12)
1664 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1665 rtx r4 = gen_rtx_REG (SImode, 4);
1666 rtx r5 = gen_rtx_REG (SImode, 5);
1668 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1669 force_into (XEXP (operands[0], 0), r4);
1670 force_into (XEXP (operands[1], 0), r5);
1671 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1672 return 1;
1674 else if (! optimize_size)
1676 const char *entry_name;
1677 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1678 int dwords;
1679 rtx r4 = gen_rtx_REG (SImode, 4);
1680 rtx r5 = gen_rtx_REG (SImode, 5);
1681 rtx r6 = gen_rtx_REG (SImode, 6);
1683 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1684 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1685 force_into (XEXP (operands[0], 0), r4);
1686 force_into (XEXP (operands[1], 0), r5);
1688 dwords = bytes >> 3;
1689 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1690 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1691 return 1;
1693 else
1694 return 0;
1696 if (bytes < 64)
1698 char entry[30];
1699 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1700 rtx r4 = gen_rtx_REG (SImode, 4);
1701 rtx r5 = gen_rtx_REG (SImode, 5);
1703 sprintf (entry, "__movmemSI%d", bytes);
1704 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1705 force_into (XEXP (operands[0], 0), r4);
1706 force_into (XEXP (operands[1], 0), r5);
1707 emit_insn (gen_block_move_real (func_addr_rtx));
1708 return 1;
1711 /* This is the same number of bytes as a memcpy call, but to a different
1712 less common function name, so this will occasionally use more space. */
1713 if (! optimize_size)
1715 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1716 int final_switch, while_loop;
1717 rtx r4 = gen_rtx_REG (SImode, 4);
1718 rtx r5 = gen_rtx_REG (SImode, 5);
1719 rtx r6 = gen_rtx_REG (SImode, 6);
1721 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1722 force_into (XEXP (operands[0], 0), r4);
1723 force_into (XEXP (operands[1], 0), r5);
1725 /* r6 controls the size of the move. 16 is decremented from it
1726 for each 64 bytes moved. Then the negative bit left over is used
1727 as an index into a list of move instructions. e.g., a 72 byte move
1728 would be set up with size(r6) = 14, for one iteration through the
1729 big while loop, and a switch of -2 for the last part. */
1731 final_switch = 16 - ((bytes / 4) % 16);
1732 while_loop = ((bytes / 4) / 16 - 1) * 16;
1733 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1734 emit_insn (gen_block_lump_real (func_addr_rtx));
1735 return 1;
1738 return 0;
1741 /* Prepare operands for a move define_expand; specifically, one of the
1742 operands must be in a register. */
1745 prepare_move_operands (rtx operands[], enum machine_mode mode)
1747 if ((mode == SImode || mode == DImode)
1748 && flag_pic
1749 && ! ((mode == Pmode || mode == ptr_mode)
1750 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1752 rtx temp;
1753 if (SYMBOLIC_CONST_P (operands[1]))
1755 if (MEM_P (operands[0]))
1756 operands[1] = force_reg (Pmode, operands[1]);
1757 else if (TARGET_SHMEDIA
1758 && GET_CODE (operands[1]) == LABEL_REF
1759 && target_reg_operand (operands[0], mode))
1760 /* It's ok. */;
1761 else
1763 temp = (!can_create_pseudo_p ()
1764 ? operands[0]
1765 : gen_reg_rtx (Pmode));
1766 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1769 else if (GET_CODE (operands[1]) == CONST
1770 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1771 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1773 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1774 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1775 mode, temp);
1776 operands[1] = expand_binop (mode, add_optab, temp,
1777 XEXP (XEXP (operands[1], 0), 1),
1778 (!can_create_pseudo_p ()
1779 ? temp
1780 : gen_reg_rtx (Pmode)),
1781 0, OPTAB_LIB_WIDEN);
1785 if (! reload_in_progress && ! reload_completed)
1787 /* Copy the source to a register if both operands aren't registers. */
1788 if (! register_operand (operands[0], mode)
1789 && ! sh_register_operand (operands[1], mode))
1790 operands[1] = copy_to_mode_reg (mode, operands[1]);
1792 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1794 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1795 except that we can't use that function because it is static. */
1796 rtx new_rtx = change_address (operands[0], mode, 0);
1797 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1798 operands[0] = new_rtx;
1801 /* This case can happen while generating code to move the result
1802 of a library call to the target. Reject `st r0,@(rX,rY)' because
1803 reload will fail to find a spill register for rX, since r0 is already
1804 being used for the source. */
1805 else if (TARGET_SH1
1806 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1807 && MEM_P (operands[0])
1808 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1809 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1810 operands[1] = copy_to_mode_reg (mode, operands[1]);
1813 if (mode == Pmode || mode == ptr_mode)
1815 rtx op0, op1, opc;
1816 enum tls_model tls_kind;
1818 op0 = operands[0];
1819 op1 = operands[1];
1820 if (GET_CODE (op1) == CONST
1821 && GET_CODE (XEXP (op1, 0)) == PLUS
1822 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1823 != TLS_MODEL_NONE))
1825 opc = XEXP (XEXP (op1, 0), 1);
1826 op1 = XEXP (XEXP (op1, 0), 0);
1828 else
1829 opc = NULL_RTX;
1831 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1833 rtx tga_op1, tga_ret, tmp, tmp2;
1835 switch (tls_kind)
1837 case TLS_MODEL_GLOBAL_DYNAMIC:
1838 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1839 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1840 op1 = tga_ret;
1841 break;
1843 case TLS_MODEL_LOCAL_DYNAMIC:
1844 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1845 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1847 tmp = gen_reg_rtx (Pmode);
1848 emit_move_insn (tmp, tga_ret);
1850 if (register_operand (op0, Pmode))
1851 tmp2 = op0;
1852 else
1853 tmp2 = gen_reg_rtx (Pmode);
1855 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1856 op1 = tmp2;
1857 break;
1859 case TLS_MODEL_INITIAL_EXEC:
1860 if (! flag_pic)
1862 /* Don't schedule insns for getting GOT address when
1863 the first scheduling is enabled, to avoid spill
1864 failures for R0. */
1865 if (flag_schedule_insns)
1866 emit_insn (gen_blockage ());
1867 emit_insn (gen_GOTaddr2picreg ());
1868 emit_use (gen_rtx_REG (SImode, PIC_REG));
1869 if (flag_schedule_insns)
1870 emit_insn (gen_blockage ());
1872 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1873 tmp = gen_sym2GOTTPOFF (op1);
1874 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1875 op1 = tga_op1;
1876 break;
1878 case TLS_MODEL_LOCAL_EXEC:
1879 tmp2 = gen_reg_rtx (Pmode);
1880 emit_insn (gen_load_gbr (tmp2));
1881 tmp = gen_reg_rtx (Pmode);
1882 emit_insn (gen_symTPOFF2reg (tmp, op1));
1884 if (register_operand (op0, Pmode))
1885 op1 = op0;
1886 else
1887 op1 = gen_reg_rtx (Pmode);
1889 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1890 break;
1892 default:
1893 gcc_unreachable ();
1895 if (opc)
1896 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1897 operands[1] = op1;
1901 return 0;
1904 enum rtx_code
1905 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1906 enum rtx_code comparison)
1908 rtx op1;
1909 rtx scratch = NULL_RTX;
1911 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1912 comparison = GET_CODE (operands[0]);
1913 else
1914 scratch = operands[4];
1915 if (CONST_INT_P (operands[1])
1916 && !CONST_INT_P (operands[2]))
1918 rtx tmp = operands[1];
1920 operands[1] = operands[2];
1921 operands[2] = tmp;
1922 comparison = swap_condition (comparison);
1924 if (CONST_INT_P (operands[2]))
1926 HOST_WIDE_INT val = INTVAL (operands[2]);
1927 if ((val == -1 || val == -0x81)
1928 && (comparison == GT || comparison == LE))
1930 comparison = (comparison == GT) ? GE : LT;
1931 operands[2] = gen_int_mode (val + 1, mode);
1933 else if ((val == 1 || val == 0x80)
1934 && (comparison == GE || comparison == LT))
1936 comparison = (comparison == GE) ? GT : LE;
1937 operands[2] = gen_int_mode (val - 1, mode);
1939 else if (val == 1 && (comparison == GEU || comparison == LTU))
1941 comparison = (comparison == GEU) ? NE : EQ;
1942 operands[2] = CONST0_RTX (mode);
1944 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1946 comparison = (comparison == GEU) ? GTU : LEU;
1947 operands[2] = gen_int_mode (val - 1, mode);
1949 else if (val == 0 && (comparison == GTU || comparison == LEU))
1950 comparison = (comparison == GTU) ? NE : EQ;
1951 else if (mode == SImode
1952 && ((val == 0x7fffffff
1953 && (comparison == GTU || comparison == LEU))
1954 || ((unsigned HOST_WIDE_INT) val
1955 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1956 && (comparison == GEU || comparison == LTU))))
1958 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1959 operands[2] = CONST0_RTX (mode);
1962 op1 = operands[1];
1963 if (can_create_pseudo_p ())
1964 operands[1] = force_reg (mode, op1);
1965 /* When we are handling DImode comparisons, we want to keep constants so
1966 that we can optimize the component comparisons; however, memory loads
1967 are better issued as a whole so that they can be scheduled well.
1968 SImode equality comparisons allow I08 constants, but only when they
1969 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1970 into a register, that register might as well be r0, and we allow the
1971 constant. If it is already in a register, this is likely to be
1972 allocated to a different hard register, thus we load the constant into
1973 a register unless it is zero. */
1974 if (!REG_P (operands[2])
1975 && (!CONST_INT_P (operands[2])
1976 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1977 && ((comparison != EQ && comparison != NE)
1978 || (REG_P (op1) && REGNO (op1) != R0_REG)
1979 || !satisfies_constraint_I08 (operands[2])))))
1981 if (scratch && GET_MODE (scratch) == mode)
1983 emit_move_insn (scratch, operands[2]);
1984 operands[2] = scratch;
1986 else if (can_create_pseudo_p ())
1987 operands[2] = force_reg (mode, operands[2]);
1989 return comparison;
1992 void
1993 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1995 rtx (*branch_expander) (rtx) = gen_branch_true;
1996 rtx jump;
1998 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1999 switch (comparison)
2001 case NE: case LT: case LE: case LTU: case LEU:
2002 comparison = reverse_condition (comparison);
2003 branch_expander = gen_branch_false;
2004 default: ;
2006 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
2007 gen_rtx_fmt_ee (comparison, SImode,
2008 operands[1], operands[2])));
2009 jump = emit_jump_insn (branch_expander (operands[3]));
2010 if (probability >= 0)
2011 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
2015 /* ??? How should we distribute probabilities when more than one branch
2016 is generated. So far we only have soem ad-hoc observations:
2017 - If the operands are random, they are likely to differ in both parts.
2018 - If comparing items in a hash chain, the operands are random or equal;
2019 operation should be EQ or NE.
2020 - If items are searched in an ordered tree from the root, we can expect
2021 the highpart to be unequal about half of the time; operation should be
2022 an inequality comparison, operands non-constant, and overall probability
2023 about 50%. Likewise for quicksort.
2024 - Range checks will be often made against constants. Even if we assume for
2025 simplicity an even distribution of the non-constant operand over a
2026 sub-range here, the same probability could be generated with differently
2027 wide sub-ranges - as long as the ratio of the part of the subrange that
2028 is before the threshold to the part that comes after the threshold stays
2029 the same. Thus, we can't really tell anything here;
2030 assuming random distribution is at least simple.
2033 bool
2034 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2036 enum rtx_code msw_taken, msw_skip, lsw_taken;
2037 rtx skip_label = NULL_RTX;
2038 rtx op1h, op1l, op2h, op2l;
2039 int num_branches;
2040 int prob, rev_prob;
2041 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2042 rtx scratch = operands[4];
2044 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2045 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2046 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2047 op1l = gen_lowpart (SImode, operands[1]);
2048 op2l = gen_lowpart (SImode, operands[2]);
2049 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2050 prob = split_branch_probability;
2051 rev_prob = REG_BR_PROB_BASE - prob;
2052 switch (comparison)
2054 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2055 That costs 1 cycle more when the first branch can be predicted taken,
2056 but saves us mispredicts because only one branch needs prediction.
2057 It also enables generating the cmpeqdi_t-1 pattern. */
2058 case EQ:
2059 if (TARGET_CMPEQDI_T)
2061 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2062 emit_jump_insn (gen_branch_true (operands[3]));
2063 return true;
2065 msw_skip = NE;
2066 lsw_taken = EQ;
2067 if (prob >= 0)
2069 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2071 msw_skip_prob = rev_prob;
2072 if (REG_BR_PROB_BASE <= 65535)
2073 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2074 else
2076 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
2077 lsw_taken_prob
2078 = (prob
2079 ? (REG_BR_PROB_BASE
2080 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
2081 / ((HOST_WIDEST_INT) prob << 32)))
2082 : 0);
2085 break;
2086 case NE:
2087 if (TARGET_CMPEQDI_T)
2089 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2090 emit_jump_insn (gen_branch_false (operands[3]));
2091 return true;
2093 msw_taken = NE;
2094 msw_taken_prob = prob;
2095 lsw_taken = NE;
2096 lsw_taken_prob = 0;
2097 break;
2098 case GTU: case GT:
2099 msw_taken = comparison;
2100 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2101 break;
2102 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2103 msw_skip = swap_condition (msw_taken);
2104 lsw_taken = GTU;
2105 break;
2106 case GEU: case GE:
2107 if (op2l == CONST0_RTX (SImode))
2108 msw_taken = comparison;
2109 else
2111 msw_taken = comparison == GE ? GT : GTU;
2112 msw_skip = swap_condition (msw_taken);
2113 lsw_taken = GEU;
2115 break;
2116 case LTU: case LT:
2117 msw_taken = comparison;
2118 if (op2l == CONST0_RTX (SImode))
2119 break;
2120 msw_skip = swap_condition (msw_taken);
2121 lsw_taken = LTU;
2122 break;
2123 case LEU: case LE:
2124 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2125 msw_taken = comparison;
2126 else
2128 lsw_taken = LEU;
2129 if (comparison == LE)
2130 msw_taken = LT;
2131 else if (op2h != CONST0_RTX (SImode))
2132 msw_taken = LTU;
2133 else
2134 break;
2135 msw_skip = swap_condition (msw_taken);
2137 break;
2138 default: return false;
2140 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2141 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2142 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2143 if (comparison != EQ && comparison != NE && num_branches > 1)
2145 if (!CONSTANT_P (operands[2])
2146 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2147 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2149 msw_taken_prob = prob / 2U;
2150 msw_skip_prob
2151 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2152 lsw_taken_prob = prob;
2154 else
2156 msw_taken_prob = prob;
2157 msw_skip_prob = REG_BR_PROB_BASE;
2158 /* ??? If we have a constant op2h, should we use that when
2159 calculating lsw_taken_prob? */
2160 lsw_taken_prob = prob;
2163 operands[1] = op1h;
2164 operands[2] = op2h;
2165 operands[4] = NULL_RTX;
2166 if (reload_completed
2167 && ! arith_reg_or_0_operand (op2h, SImode)
2168 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2169 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2170 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2172 emit_move_insn (scratch, operands[2]);
2173 operands[2] = scratch;
2175 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2176 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2177 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2179 rtx taken_label = operands[3];
2181 /* Operands were possibly modified, but msw_skip doesn't expect this.
2182 Always use the original ones. */
2183 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2185 operands[1] = op1h;
2186 operands[2] = op2h;
2189 operands[3] = skip_label = gen_label_rtx ();
2190 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2191 operands[3] = taken_label;
2193 operands[1] = op1l;
2194 operands[2] = op2l;
2195 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2197 if (reload_completed
2198 && ! arith_reg_or_0_operand (op2l, SImode)
2199 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2201 emit_move_insn (scratch, operands[2]);
2202 operands[2] = scratch;
2204 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2206 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2207 emit_label (skip_label);
2208 return true;
2211 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2213 static void
2214 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2216 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2218 insn = gen_rtx_PARALLEL (VOIDmode,
2219 gen_rtvec (2, insn,
2220 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2221 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2223 else
2224 emit_insn (insn);
2227 /* Prepare the operands for an scc instruction; make sure that the
2228 compare has been done and the result is in T_REG. */
2229 void
2230 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2232 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2233 enum rtx_code oldcode = code;
2234 enum machine_mode mode;
2236 /* First need a compare insn. */
2237 switch (code)
2239 case NE:
2240 /* It isn't possible to handle this case. */
2241 gcc_unreachable ();
2242 case LT:
2243 code = GT;
2244 break;
2245 case LE:
2246 code = GE;
2247 break;
2248 case LTU:
2249 code = GTU;
2250 break;
2251 case LEU:
2252 code = GEU;
2253 break;
2254 default:
2255 break;
2257 if (code != oldcode)
2259 rtx tmp = op0;
2260 op0 = op1;
2261 op1 = tmp;
2264 mode = GET_MODE (op0);
2265 if (mode == VOIDmode)
2266 mode = GET_MODE (op1);
2268 op0 = force_reg (mode, op0);
2269 if ((code != EQ && code != NE
2270 && (op1 != const0_rtx
2271 || code == GTU || code == GEU || code == LTU || code == LEU))
2272 || (mode == DImode && op1 != const0_rtx)
2273 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2274 op1 = force_reg (mode, op1);
2276 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2277 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2278 mode);
2282 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2283 rtx op0, rtx op1)
2285 rtx target = gen_reg_rtx (SImode);
2286 rtx tmp;
2288 gcc_assert (TARGET_SHMEDIA);
2289 switch (code)
2291 case EQ:
2292 case GT:
2293 case LT:
2294 case UNORDERED:
2295 case GTU:
2296 case LTU:
2297 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2298 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2299 code = NE;
2300 break;
2302 case NE:
2303 case GE:
2304 case LE:
2305 case ORDERED:
2306 case GEU:
2307 case LEU:
2308 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2309 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2310 code = EQ;
2311 break;
2313 case UNEQ:
2314 case UNGE:
2315 case UNGT:
2316 case UNLE:
2317 case UNLT:
2318 case LTGT:
2319 return NULL_RTX;
2321 default:
2322 gcc_unreachable ();
2325 if (mode == DImode)
2327 rtx t2 = gen_reg_rtx (DImode);
2328 emit_insn (gen_extendsidi2 (t2, target));
2329 target = t2;
2332 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2335 /* Called from the md file, set up the operands of a compare instruction. */
2337 void
2338 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2340 enum rtx_code code = GET_CODE (operands[0]);
2341 enum rtx_code branch_code;
2342 rtx op0 = operands[1];
2343 rtx op1 = operands[2];
2344 rtx insn, tem;
2345 bool need_ccmpeq = false;
2347 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2349 op0 = force_reg (mode, op0);
2350 op1 = force_reg (mode, op1);
2352 else
2354 if (code != EQ || mode == DImode)
2356 /* Force args into regs, since we can't use constants here. */
2357 op0 = force_reg (mode, op0);
2358 if (op1 != const0_rtx || code == GTU || code == GEU)
2359 op1 = force_reg (mode, op1);
2363 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2365 if (code == LT
2366 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2367 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2369 tem = op0, op0 = op1, op1 = tem;
2370 code = swap_condition (code);
2373 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2374 if (code == GE)
2376 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2377 need_ccmpeq = true;
2378 code = GT;
2381 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2382 to EQ/GT respectively. */
2383 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2386 switch (code)
2388 case EQ:
2389 case GT:
2390 case GE:
2391 case GTU:
2392 case GEU:
2393 branch_code = code;
2394 break;
2395 case NE:
2396 case LT:
2397 case LE:
2398 case LTU:
2399 case LEU:
2400 branch_code = reverse_condition (code);
2401 break;
2402 default:
2403 gcc_unreachable ();
2406 insn = gen_rtx_SET (VOIDmode,
2407 gen_rtx_REG (SImode, T_REG),
2408 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2410 sh_emit_set_t_insn (insn, mode);
2411 if (need_ccmpeq)
2412 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2414 if (branch_code == code)
2415 emit_jump_insn (gen_branch_true (operands[3]));
2416 else
2417 emit_jump_insn (gen_branch_false (operands[3]));
2420 void
2421 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2423 enum rtx_code code = GET_CODE (operands[1]);
2424 rtx op0 = operands[2];
2425 rtx op1 = operands[3];
2426 rtx lab = NULL_RTX;
2427 bool invert = false;
2428 rtx tem;
2430 op0 = force_reg (mode, op0);
2431 if ((code != EQ && code != NE
2432 && (op1 != const0_rtx
2433 || code == GTU || code == GEU || code == LTU || code == LEU))
2434 || (mode == DImode && op1 != const0_rtx)
2435 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2436 op1 = force_reg (mode, op1);
2438 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2440 if (code == LT || code == LE)
2442 code = swap_condition (code);
2443 tem = op0, op0 = op1, op1 = tem;
2445 if (code == GE)
2447 if (TARGET_IEEE)
2449 lab = gen_label_rtx ();
2450 sh_emit_scc_to_t (EQ, op0, op1);
2451 emit_jump_insn (gen_branch_true (lab));
2452 code = GT;
2454 else
2456 code = LT;
2457 invert = true;
2462 if (code == NE)
2464 code = EQ;
2465 invert = true;
2468 sh_emit_scc_to_t (code, op0, op1);
2469 if (lab)
2470 emit_label (lab);
2471 if (invert)
2472 emit_insn (gen_movnegt (operands[0]));
2473 else
2474 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2477 /* Functions to output assembly code. */
2479 /* Return a sequence of instructions to perform DI or DF move.
2481 Since the SH cannot move a DI or DF in one instruction, we have
2482 to take care when we see overlapping source and dest registers. */
2484 const char *
2485 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2486 enum machine_mode mode)
2488 rtx dst = operands[0];
2489 rtx src = operands[1];
2491 if (MEM_P (dst)
2492 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2493 return "mov.l %T1,%0\n\tmov.l %1,%0";
2495 if (register_operand (dst, mode)
2496 && register_operand (src, mode))
2498 if (REGNO (src) == MACH_REG)
2499 return "sts mach,%S0\n\tsts macl,%R0";
2501 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2502 when mov.d r1,r0 do r1->r0 then r2->r1. */
2504 if (REGNO (src) + 1 == REGNO (dst))
2505 return "mov %T1,%T0\n\tmov %1,%0";
2506 else
2507 return "mov %1,%0\n\tmov %T1,%T0";
2509 else if (CONST_INT_P (src))
2511 if (INTVAL (src) < 0)
2512 output_asm_insn ("mov #-1,%S0", operands);
2513 else
2514 output_asm_insn ("mov #0,%S0", operands);
2516 return "mov %1,%R0";
2518 else if (MEM_P (src))
2520 int ptrreg = -1;
2521 int dreg = REGNO (dst);
2522 rtx inside = XEXP (src, 0);
2524 switch (GET_CODE (inside))
2526 case REG:
2527 ptrreg = REGNO (inside);
2528 break;
2530 case SUBREG:
2531 ptrreg = subreg_regno (inside);
2532 break;
2534 case PLUS:
2535 ptrreg = REGNO (XEXP (inside, 0));
2536 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2537 an offsettable address. Unfortunately, offsettable addresses use
2538 QImode to check the offset, and a QImode offsettable address
2539 requires r0 for the other operand, which is not currently
2540 supported, so we can't use the 'o' constraint.
2541 Thus we must check for and handle r0+REG addresses here.
2542 We punt for now, since this is likely very rare. */
2543 gcc_assert (!REG_P (XEXP (inside, 1)));
2544 break;
2546 case LABEL_REF:
2547 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2548 case POST_INC:
2549 return "mov.l %1,%0\n\tmov.l %1,%T0";
2550 default:
2551 gcc_unreachable ();
2554 /* Work out the safe way to copy. Copy into the second half first. */
2555 if (dreg == ptrreg)
2556 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2559 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2562 /* Print an instruction which would have gone into a delay slot after
2563 another instruction, but couldn't because the other instruction expanded
2564 into a sequence where putting the slot insn at the end wouldn't work. */
2566 static void
2567 print_slot (rtx insn)
2569 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2571 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2574 const char *
2575 output_far_jump (rtx insn, rtx op)
2577 struct { rtx lab, reg, op; } this_jmp;
2578 rtx braf_base_lab = NULL_RTX;
2579 const char *jump;
2580 int far;
2581 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2582 rtx prev;
2584 this_jmp.lab = gen_label_rtx ();
2586 if (TARGET_SH2
2587 && offset >= -32764
2588 && offset - get_attr_length (insn) <= 32766)
2590 far = 0;
2591 jump = "mov.w %O0,%1; braf %1";
2593 else
2595 far = 1;
2596 if (flag_pic)
2598 if (TARGET_SH2)
2599 jump = "mov.l %O0,%1; braf %1";
2600 else
2601 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2603 else
2604 jump = "mov.l %O0,%1; jmp @%1";
2606 /* If we have a scratch register available, use it. */
2607 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2608 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2610 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2611 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2612 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2613 output_asm_insn (jump, &this_jmp.lab);
2614 if (dbr_sequence_length ())
2615 print_slot (final_sequence);
2616 else
2617 output_asm_insn ("nop", 0);
2619 else
2621 /* Output the delay slot insn first if any. */
2622 if (dbr_sequence_length ())
2623 print_slot (final_sequence);
2625 this_jmp.reg = gen_rtx_REG (SImode, 13);
2626 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2627 Fortunately, MACL is fixed and call-clobbered, and we never
2628 need its value across jumps, so save r13 in it instead of in
2629 the stack. */
2630 if (TARGET_SH5)
2631 output_asm_insn ("lds r13, macl", 0);
2632 else
2633 output_asm_insn ("mov.l r13,@-r15", 0);
2634 output_asm_insn (jump, &this_jmp.lab);
2635 if (TARGET_SH5)
2636 output_asm_insn ("sts macl, r13", 0);
2637 else
2638 output_asm_insn ("mov.l @r15+,r13", 0);
2640 if (far && flag_pic && TARGET_SH2)
2642 braf_base_lab = gen_label_rtx ();
2643 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2644 CODE_LABEL_NUMBER (braf_base_lab));
2646 if (far)
2647 output_asm_insn (".align 2", 0);
2648 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2649 this_jmp.op = op;
2650 if (far && flag_pic)
2652 if (TARGET_SH2)
2653 this_jmp.lab = braf_base_lab;
2654 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2656 else
2657 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2658 return "";
2661 /* Local label counter, used for constants in the pool and inside
2662 pattern branches. */
2664 static int lf = 100;
2666 /* Output code for ordinary branches. */
2668 const char *
2669 output_branch (int logic, rtx insn, rtx *operands)
2671 switch (get_attr_length (insn))
2673 case 6:
2674 /* This can happen if filling the delay slot has caused a forward
2675 branch to exceed its range (we could reverse it, but only
2676 when we know we won't overextend other branches; this should
2677 best be handled by relaxation).
2678 It can also happen when other condbranches hoist delay slot insn
2679 from their destination, thus leading to code size increase.
2680 But the branch will still be in the range -4092..+4098 bytes. */
2682 if (! TARGET_RELAX)
2684 int label = lf++;
2685 /* The call to print_slot will clobber the operands. */
2686 rtx op0 = operands[0];
2688 /* If the instruction in the delay slot is annulled (true), then
2689 there is no delay slot where we can put it now. The only safe
2690 place for it is after the label. final will do that by default. */
2692 if (final_sequence
2693 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2694 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2696 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2697 ASSEMBLER_DIALECT ? "/" : ".", label);
2698 print_slot (final_sequence);
2700 else
2701 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2703 output_asm_insn ("bra\t%l0", &op0);
2704 fprintf (asm_out_file, "\tnop\n");
2705 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2707 return "";
2709 /* When relaxing, handle this like a short branch. The linker
2710 will fix it up if it still doesn't fit after relaxation. */
2711 case 2:
2712 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2714 /* These are for SH2e, in which we have to account for the
2715 extra nop because of the hardware bug in annulled branches. */
2716 case 8:
2717 if (! TARGET_RELAX)
2719 int label = lf++;
2721 gcc_assert (!final_sequence
2722 || !(INSN_ANNULLED_BRANCH_P
2723 (XVECEXP (final_sequence, 0, 0))));
2724 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2725 logic ? "f" : "t",
2726 ASSEMBLER_DIALECT ? "/" : ".", label);
2727 fprintf (asm_out_file, "\tnop\n");
2728 output_asm_insn ("bra\t%l0", operands);
2729 fprintf (asm_out_file, "\tnop\n");
2730 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2732 return "";
2734 /* When relaxing, fall through. */
2735 case 4:
2737 char buffer[10];
2739 sprintf (buffer, "b%s%ss\t%%l0",
2740 logic ? "t" : "f",
2741 ASSEMBLER_DIALECT ? "/" : ".");
2742 output_asm_insn (buffer, &operands[0]);
2743 return "nop";
2746 default:
2747 /* There should be no longer branches now - that would
2748 indicate that something has destroyed the branches set
2749 up in machine_dependent_reorg. */
2750 gcc_unreachable ();
2754 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2755 fill in operands 9 as a label to the successor insn.
2756 We try to use jump threading where possible.
2757 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2758 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2759 follow jmp and bt, if the address is in range. */
2760 const char *
2761 output_branchy_insn (enum rtx_code code, const char *templ,
2762 rtx insn, rtx *operands)
2764 rtx next_insn = NEXT_INSN (insn);
2766 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2768 rtx src = SET_SRC (PATTERN (next_insn));
2769 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2771 /* Following branch not taken */
2772 operands[9] = gen_label_rtx ();
2773 emit_label_after (operands[9], next_insn);
2774 INSN_ADDRESSES_NEW (operands[9],
2775 INSN_ADDRESSES (INSN_UID (next_insn))
2776 + get_attr_length (next_insn));
2777 return templ;
2779 else
2781 int offset = (branch_dest (next_insn)
2782 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2783 if (offset >= -252 && offset <= 258)
2785 if (GET_CODE (src) == IF_THEN_ELSE)
2786 /* branch_true */
2787 src = XEXP (src, 1);
2788 operands[9] = src;
2789 return templ;
2793 operands[9] = gen_label_rtx ();
2794 emit_label_after (operands[9], insn);
2795 INSN_ADDRESSES_NEW (operands[9],
2796 INSN_ADDRESSES (INSN_UID (insn))
2797 + get_attr_length (insn));
2798 return templ;
2801 const char *
2802 output_ieee_ccmpeq (rtx insn, rtx *operands)
2804 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2805 insn, operands);
2808 /* Output the start of the assembler file. */
2810 static void
2811 sh_file_start (void)
2813 default_file_start ();
2815 #ifdef SYMBIAN
2816 /* Declare the .directive section before it is used. */
2817 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2818 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2819 #endif
2821 if (TARGET_ELF)
2822 /* We need to show the text section with the proper
2823 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2824 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2825 will complain. We can teach GAS specifically about the
2826 default attributes for our choice of text section, but
2827 then we would have to change GAS again if/when we change
2828 the text section name. */
2829 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2830 else
2831 /* Switch to the data section so that the coffsem symbol
2832 isn't in the text section. */
2833 switch_to_section (data_section);
2835 if (TARGET_LITTLE_ENDIAN)
2836 fputs ("\t.little\n", asm_out_file);
2838 if (!TARGET_ELF)
2840 if (TARGET_SHCOMPACT)
2841 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2842 else if (TARGET_SHMEDIA)
2843 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2844 TARGET_SHMEDIA64 ? 64 : 32);
2848 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2850 static bool
2851 unspec_caller_rtx_p (rtx pat)
2853 rtx base, offset;
2854 int i;
2856 split_const (pat, &base, &offset);
2857 if (GET_CODE (base) == UNSPEC)
2859 if (XINT (base, 1) == UNSPEC_CALLER)
2860 return true;
2861 for (i = 0; i < XVECLEN (base, 0); i++)
2862 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2863 return true;
2865 return false;
2868 /* Indicate that INSN cannot be duplicated. This is true for insn
2869 that generates a unique label. */
2871 static bool
2872 sh_cannot_copy_insn_p (rtx insn)
2874 rtx pat;
2876 if (!reload_completed || !flag_pic)
2877 return false;
2879 if (!NONJUMP_INSN_P (insn))
2880 return false;
2881 if (asm_noperands (insn) >= 0)
2882 return false;
2884 pat = PATTERN (insn);
2885 if (GET_CODE (pat) != SET)
2886 return false;
2887 pat = SET_SRC (pat);
2889 if (unspec_caller_rtx_p (pat))
2890 return true;
2892 return false;
2895 /* Actual number of instructions used to make a shift by N. */
2896 static const char ashiftrt_insns[] =
2897 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2899 /* Left shift and logical right shift are the same. */
2900 static const char shift_insns[] =
2901 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2903 /* Individual shift amounts needed to get the above length sequences.
2904 One bit right shifts clobber the T bit, so when possible, put one bit
2905 shifts in the middle of the sequence, so the ends are eligible for
2906 branch delay slots. */
2907 static const short shift_amounts[32][5] = {
2908 {0}, {1}, {2}, {2, 1},
2909 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2910 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2911 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2912 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2913 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2914 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2915 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2917 /* Likewise, but for shift amounts < 16, up to three highmost bits
2918 might be clobbered. This is typically used when combined with some
2919 kind of sign or zero extension. */
2921 static const char ext_shift_insns[] =
2922 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2924 static const short ext_shift_amounts[32][4] = {
2925 {0}, {1}, {2}, {2, 1},
2926 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2927 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2928 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2929 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2930 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2931 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2932 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2934 /* Assuming we have a value that has been sign-extended by at least one bit,
2935 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2936 to shift it by N without data loss, and quicker than by other means? */
2937 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2939 /* This is used in length attributes in sh.md to help compute the length
2940 of arbitrary constant shift instructions. */
2943 shift_insns_rtx (rtx insn)
2945 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2946 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2947 enum rtx_code shift_code = GET_CODE (set_src);
2949 switch (shift_code)
2951 case ASHIFTRT:
2952 return ashiftrt_insns[shift_count];
2953 case LSHIFTRT:
2954 case ASHIFT:
2955 return shift_insns[shift_count];
2956 default:
2957 gcc_unreachable ();
2961 /* Return the cost of a shift. */
2963 static inline int
2964 shiftcosts (rtx x)
2966 int value;
2968 if (TARGET_SHMEDIA)
2969 return 1;
2971 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2973 if (GET_MODE (x) == DImode
2974 && CONST_INT_P (XEXP (x, 1))
2975 && INTVAL (XEXP (x, 1)) == 1)
2976 return 2;
2978 /* Everything else is invalid, because there is no pattern for it. */
2979 return MAX_COST;
2981 /* If shift by a non constant, then this will be expensive. */
2982 if (!CONST_INT_P (XEXP (x, 1)))
2983 return SH_DYNAMIC_SHIFT_COST;
2985 /* Otherwise, return the true cost in instructions. Cope with out of range
2986 shift counts more or less arbitrarily. */
2987 value = INTVAL (XEXP (x, 1)) & 31;
2989 if (GET_CODE (x) == ASHIFTRT)
2991 int cost = ashiftrt_insns[value];
2992 /* If SH3, then we put the constant in a reg and use shad. */
2993 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2994 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2995 return cost;
2997 else
2998 return shift_insns[value];
3001 /* Return the cost of an AND operation. */
3003 static inline int
3004 andcosts (rtx x)
3006 int i;
3008 /* Anding with a register is a single cycle and instruction. */
3009 if (!CONST_INT_P (XEXP (x, 1)))
3010 return 1;
3012 i = INTVAL (XEXP (x, 1));
3014 if (TARGET_SHMEDIA)
3016 if (satisfies_constraint_I10 (XEXP (x, 1))
3017 || satisfies_constraint_J16 (XEXP (x, 1)))
3018 return 1;
3019 else
3020 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
3023 /* These constants are single cycle extu.[bw] instructions. */
3024 if (i == 0xff || i == 0xffff)
3025 return 1;
3026 /* Constants that can be used in an and immediate instruction in a single
3027 cycle, but this requires r0, so make it a little more expensive. */
3028 if (CONST_OK_FOR_K08 (i))
3029 return 2;
3030 /* Constants that can be loaded with a mov immediate and an and.
3031 This case is probably unnecessary. */
3032 if (CONST_OK_FOR_I08 (i))
3033 return 2;
3034 /* Any other constants requires a 2 cycle pc-relative load plus an and.
3035 This case is probably unnecessary. */
3036 return 3;
3039 /* Return the cost of an addition or a subtraction. */
3041 static inline int
3042 addsubcosts (rtx x)
3044 /* Adding a register is a single cycle insn. */
3045 if (REG_P (XEXP (x, 1))
3046 || GET_CODE (XEXP (x, 1)) == SUBREG)
3047 return 1;
3049 /* Likewise for small constants. */
3050 if (CONST_INT_P (XEXP (x, 1))
3051 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3052 return 1;
3054 if (TARGET_SHMEDIA)
3055 switch (GET_CODE (XEXP (x, 1)))
3057 case CONST:
3058 case LABEL_REF:
3059 case SYMBOL_REF:
3060 return TARGET_SHMEDIA64 ? 5 : 3;
3062 case CONST_INT:
3063 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3064 return 2;
3065 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3066 return 3;
3067 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3068 return 4;
3070 /* Fall through. */
3071 default:
3072 return 5;
3075 /* Any other constant requires a 2 cycle pc-relative load plus an
3076 addition. */
3077 return 3;
3080 /* Return the cost of a multiply. */
3081 static inline int
3082 multcosts (rtx x ATTRIBUTE_UNUSED)
3084 if (sh_multcost >= 0)
3085 return sh_multcost;
3086 if (TARGET_SHMEDIA)
3087 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3088 accept constants. Ideally, we would use a cost of one or two and
3089 add the cost of the operand, but disregard the latter when inside loops
3090 and loop invariant code motion is still to follow.
3091 Using a multiply first and splitting it later if it's a loss
3092 doesn't work because of different sign / zero extension semantics
3093 of multiplies vs. shifts. */
3094 return optimize_size ? 2 : 3;
3096 if (TARGET_SH2)
3098 /* We have a mul insn, so we can never take more than the mul and the
3099 read of the mac reg, but count more because of the latency and extra
3100 reg usage. */
3101 if (optimize_size)
3102 return 2;
3103 return 3;
3106 /* If we're aiming at small code, then just count the number of
3107 insns in a multiply call sequence. */
3108 if (optimize_size)
3109 return 5;
3111 /* Otherwise count all the insns in the routine we'd be calling too. */
3112 return 20;
3115 /* Compute a (partial) cost for rtx X. Return true if the complete
3116 cost has been computed, and false if subexpressions should be
3117 scanned. In either case, *TOTAL contains the cost result. */
3119 static bool
3120 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
3121 bool speed ATTRIBUTE_UNUSED)
3123 switch (code)
3125 case CONST_INT:
3126 if (TARGET_SHMEDIA)
3128 if (INTVAL (x) == 0)
3129 *total = 0;
3130 else if (outer_code == AND && and_operand ((x), DImode))
3131 *total = 0;
3132 else if ((outer_code == IOR || outer_code == XOR
3133 || outer_code == PLUS)
3134 && CONST_OK_FOR_I10 (INTVAL (x)))
3135 *total = 0;
3136 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3137 *total = COSTS_N_INSNS (outer_code != SET);
3138 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3139 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3140 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3141 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3142 else
3143 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3144 return true;
3146 if (CONST_OK_FOR_I08 (INTVAL (x)))
3147 *total = 0;
3148 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3149 && CONST_OK_FOR_K08 (INTVAL (x)))
3150 *total = 1;
3151 /* prepare_cmp_insn will force costly constants int registers before
3152 the cbranch[sd]i4 patterns can see them, so preserve potentially
3153 interesting ones not covered by I08 above. */
3154 else if (outer_code == COMPARE
3155 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3156 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3157 || INTVAL (x) == 0x7fffffff
3158 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3159 *total = 1;
3160 else
3161 *total = 8;
3162 return true;
3164 case CONST:
3165 case LABEL_REF:
3166 case SYMBOL_REF:
3167 if (TARGET_SHMEDIA64)
3168 *total = COSTS_N_INSNS (4);
3169 else if (TARGET_SHMEDIA32)
3170 *total = COSTS_N_INSNS (2);
3171 else
3172 *total = 5;
3173 return true;
3175 case CONST_DOUBLE:
3176 if (TARGET_SHMEDIA)
3177 *total = COSTS_N_INSNS (4);
3178 /* prepare_cmp_insn will force costly constants int registers before
3179 the cbranchdi4 pattern can see them, so preserve potentially
3180 interesting ones. */
3181 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3182 *total = 1;
3183 else
3184 *total = 10;
3185 return true;
3186 case CONST_VECTOR:
3187 if (x == CONST0_RTX (GET_MODE (x)))
3188 *total = 0;
3189 else if (sh_1el_vec (x, VOIDmode))
3190 *total = outer_code != SET;
3191 if (sh_rep_vec (x, VOIDmode))
3192 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3193 + (outer_code != SET));
3194 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3195 return true;
3197 case PLUS:
3198 case MINUS:
3199 *total = COSTS_N_INSNS (addsubcosts (x));
3200 return true;
3202 case AND:
3203 *total = COSTS_N_INSNS (andcosts (x));
3204 return true;
3206 case MULT:
3207 *total = COSTS_N_INSNS (multcosts (x));
3208 return true;
3210 case ASHIFT:
3211 case ASHIFTRT:
3212 case LSHIFTRT:
3213 *total = COSTS_N_INSNS (shiftcosts (x));
3214 return true;
3216 case DIV:
3217 case UDIV:
3218 case MOD:
3219 case UMOD:
3220 *total = COSTS_N_INSNS (20);
3221 return true;
3223 case PARALLEL:
3224 if (sh_1el_vec (x, VOIDmode))
3225 *total = outer_code != SET;
3226 if (sh_rep_vec (x, VOIDmode))
3227 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3228 + (outer_code != SET));
3229 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3230 return true;
3232 case FLOAT:
3233 case FIX:
3234 *total = 100;
3235 return true;
3237 default:
3238 return false;
3242 /* Compute the cost of an address. For the SH, all valid addresses are
3243 the same cost. Use a slightly higher cost for reg + reg addressing,
3244 since it increases pressure on r0. */
3246 static int
3247 sh_address_cost (rtx X,
3248 bool speed ATTRIBUTE_UNUSED)
3250 return (GET_CODE (X) == PLUS
3251 && ! CONSTANT_P (XEXP (X, 1))
3252 && ! TARGET_SHMEDIA ? 1 : 0);
3255 /* Code to expand a shift. */
3257 void
3258 gen_ashift (int type, int n, rtx reg)
3260 /* Negative values here come from the shift_amounts array. */
3261 if (n < 0)
3263 if (type == ASHIFT)
3264 type = LSHIFTRT;
3265 else
3266 type = ASHIFT;
3267 n = -n;
3270 switch (type)
3272 case ASHIFTRT:
3273 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3274 break;
3275 case LSHIFTRT:
3276 if (n == 1)
3277 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3278 else
3279 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3280 break;
3281 case ASHIFT:
3282 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3283 break;
3287 /* Same for HImode */
3289 void
3290 gen_ashift_hi (int type, int n, rtx reg)
3292 /* Negative values here come from the shift_amounts array. */
3293 if (n < 0)
3295 if (type == ASHIFT)
3296 type = LSHIFTRT;
3297 else
3298 type = ASHIFT;
3299 n = -n;
3302 switch (type)
3304 case ASHIFTRT:
3305 case LSHIFTRT:
3306 /* We don't have HImode right shift operations because using the
3307 ordinary 32 bit shift instructions for that doesn't generate proper
3308 zero/sign extension.
3309 gen_ashift_hi is only called in contexts where we know that the
3310 sign extension works out correctly. */
3312 int offset = 0;
3313 if (GET_CODE (reg) == SUBREG)
3315 offset = SUBREG_BYTE (reg);
3316 reg = SUBREG_REG (reg);
3318 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3319 break;
3321 case ASHIFT:
3322 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3323 break;
3327 /* Output RTL to split a constant shift into its component SH constant
3328 shift instructions. */
3330 void
3331 gen_shifty_op (int code, rtx *operands)
3333 int value = INTVAL (operands[2]);
3334 int max, i;
3336 /* Truncate the shift count in case it is out of bounds. */
3337 value = value & 31;
3339 if (value == 31)
3341 if (code == LSHIFTRT)
3343 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3344 emit_insn (gen_movt (operands[0]));
3345 return;
3347 else if (code == ASHIFT)
3349 /* There is a two instruction sequence for 31 bit left shifts,
3350 but it requires r0. */
3351 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3353 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3354 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3355 return;
3359 else if (value == 0)
3361 /* This can happen even when optimizing, if there were subregs before
3362 reload. Don't output a nop here, as this is never optimized away;
3363 use a no-op move instead. */
3364 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3365 return;
3368 max = shift_insns[value];
3369 for (i = 0; i < max; i++)
3370 gen_ashift (code, shift_amounts[value][i], operands[0]);
3373 /* Same as above, but optimized for values where the topmost bits don't
3374 matter. */
3376 void
3377 gen_shifty_hi_op (int code, rtx *operands)
3379 int value = INTVAL (operands[2]);
3380 int max, i;
3381 void (*gen_fun) (int, int, rtx);
3383 /* This operation is used by and_shl for SImode values with a few
3384 high bits known to be cleared. */
3385 value &= 31;
3386 if (value == 0)
3388 emit_insn (gen_nop ());
3389 return;
3392 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3393 if (code == ASHIFT)
3395 max = ext_shift_insns[value];
3396 for (i = 0; i < max; i++)
3397 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3399 else
3400 /* When shifting right, emit the shifts in reverse order, so that
3401 solitary negative values come first. */
3402 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3403 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3406 /* Output RTL for an arithmetic right shift. */
3408 /* ??? Rewrite to use super-optimizer sequences. */
3411 expand_ashiftrt (rtx *operands)
3413 rtx wrk;
3414 char func[18];
3415 int value;
3417 if (TARGET_SH3)
3419 if (!CONST_INT_P (operands[2]))
3421 rtx count = copy_to_mode_reg (SImode, operands[2]);
3422 emit_insn (gen_negsi2 (count, count));
3423 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3424 return 1;
3426 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3427 > 1 + SH_DYNAMIC_SHIFT_COST)
3429 rtx count
3430 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3431 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3432 return 1;
3435 if (!CONST_INT_P (operands[2]))
3436 return 0;
3438 value = INTVAL (operands[2]) & 31;
3440 if (value == 31)
3442 /* If we are called from abs expansion, arrange things so that we
3443 we can use a single MT instruction that doesn't clobber the source,
3444 if LICM can hoist out the load of the constant zero. */
3445 if (currently_expanding_to_rtl)
3447 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3448 operands[1]));
3449 emit_insn (gen_mov_neg_si_t (operands[0]));
3450 return 1;
3452 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3453 return 1;
3455 else if (value >= 16 && value <= 19)
3457 wrk = gen_reg_rtx (SImode);
3458 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3459 value -= 16;
3460 while (value--)
3461 gen_ashift (ASHIFTRT, 1, wrk);
3462 emit_move_insn (operands[0], wrk);
3463 return 1;
3465 /* Expand a short sequence inline, longer call a magic routine. */
3466 else if (value <= 5)
3468 wrk = gen_reg_rtx (SImode);
3469 emit_move_insn (wrk, operands[1]);
3470 while (value--)
3471 gen_ashift (ASHIFTRT, 1, wrk);
3472 emit_move_insn (operands[0], wrk);
3473 return 1;
3476 wrk = gen_reg_rtx (Pmode);
3478 /* Load the value into an arg reg and call a helper. */
3479 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3480 sprintf (func, "__ashiftrt_r4_%d", value);
3481 function_symbol (wrk, func, SFUNC_STATIC);
3482 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3483 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3484 return 1;
3488 sh_dynamicalize_shift_p (rtx count)
3490 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3493 /* Try to find a good way to implement the combiner pattern
3494 [(set (match_operand:SI 0 "register_operand" "r")
3495 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3496 (match_operand:SI 2 "const_int_operand" "n"))
3497 (match_operand:SI 3 "const_int_operand" "n"))) .
3498 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3499 return 0 for simple right / left or left/right shift combination.
3500 return 1 for a combination of shifts with zero_extend.
3501 return 2 for a combination of shifts with an AND that needs r0.
3502 return 3 for a combination of shifts with an AND that needs an extra
3503 scratch register, when the three highmost bits of the AND mask are clear.
3504 return 4 for a combination of shifts with an AND that needs an extra
3505 scratch register, when any of the three highmost bits of the AND mask
3506 is set.
3507 If ATTRP is set, store an initial right shift width in ATTRP[0],
3508 and the instruction length in ATTRP[1] . These values are not valid
3509 when returning 0.
3510 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3511 shift_amounts for the last shift value that is to be used before the
3512 sign extend. */
3514 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3516 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3517 int left = INTVAL (left_rtx), right;
3518 int best = 0;
3519 int cost, best_cost = 10000;
3520 int best_right = 0, best_len = 0;
3521 int i;
3522 int can_ext;
3524 if (left < 0 || left > 31)
3525 return 0;
3526 if (CONST_INT_P (mask_rtx))
3527 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3528 else
3529 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3530 /* Can this be expressed as a right shift / left shift pair? */
3531 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3532 right = exact_log2 (lsb);
3533 mask2 = ~(mask + lsb - 1);
3534 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3535 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3536 if (! mask2)
3537 best_cost = shift_insns[right] + shift_insns[right + left];
3538 /* mask has no trailing zeroes <==> ! right */
3539 else if (! right && mask2 == ~(lsb2 - 1))
3541 int late_right = exact_log2 (lsb2);
3542 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3544 /* Try to use zero extend. */
3545 if (mask2 == ~(lsb2 - 1))
3547 int width, first;
3549 for (width = 8; width <= 16; width += 8)
3551 /* Can we zero-extend right away? */
3552 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3554 cost
3555 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3556 if (cost < best_cost)
3558 best = 1;
3559 best_cost = cost;
3560 best_right = right;
3561 best_len = cost;
3562 if (attrp)
3563 attrp[2] = -1;
3565 continue;
3567 /* ??? Could try to put zero extend into initial right shift,
3568 or even shift a bit left before the right shift. */
3569 /* Determine value of first part of left shift, to get to the
3570 zero extend cut-off point. */
3571 first = width - exact_log2 (lsb2) + right;
3572 if (first >= 0 && right + left - first >= 0)
3574 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3575 + ext_shift_insns[right + left - first];
3576 if (cost < best_cost)
3578 best = 1;
3579 best_cost = cost;
3580 best_right = right;
3581 best_len = cost;
3582 if (attrp)
3583 attrp[2] = first;
3588 /* Try to use r0 AND pattern */
3589 for (i = 0; i <= 2; i++)
3591 if (i > right)
3592 break;
3593 if (! CONST_OK_FOR_K08 (mask >> i))
3594 continue;
3595 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3596 if (cost < best_cost)
3598 best = 2;
3599 best_cost = cost;
3600 best_right = i;
3601 best_len = cost - 1;
3604 /* Try to use a scratch register to hold the AND operand. */
3605 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3606 for (i = 0; i <= 2; i++)
3608 if (i > right)
3609 break;
3610 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3611 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3612 if (cost < best_cost)
3614 best = 4 - can_ext;
3615 best_cost = cost;
3616 best_right = i;
3617 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3621 if (attrp)
3623 attrp[0] = best_right;
3624 attrp[1] = best_len;
3626 return best;
3629 /* This is used in length attributes of the unnamed instructions
3630 corresponding to shl_and_kind return values of 1 and 2. */
3632 shl_and_length (rtx insn)
3634 rtx set_src, left_rtx, mask_rtx;
3635 int attributes[3];
3637 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3638 left_rtx = XEXP (XEXP (set_src, 0), 1);
3639 mask_rtx = XEXP (set_src, 1);
3640 shl_and_kind (left_rtx, mask_rtx, attributes);
3641 return attributes[1];
3644 /* This is used in length attribute of the and_shl_scratch instruction. */
3647 shl_and_scr_length (rtx insn)
3649 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3650 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3651 rtx op = XEXP (set_src, 0);
3652 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3653 op = XEXP (XEXP (op, 0), 0);
3654 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3657 /* Generate rtl for instructions for which shl_and_kind advised a particular
3658 method of generating them, i.e. returned zero. */
3661 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3663 int attributes[3];
3664 unsigned HOST_WIDE_INT mask;
3665 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3666 int right, total_shift;
3667 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3669 right = attributes[0];
3670 total_shift = INTVAL (left_rtx) + right;
3671 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3672 switch (kind)
3674 default:
3675 return -1;
3676 case 1:
3678 int first = attributes[2];
3679 rtx operands[3];
3681 if (first < 0)
3683 emit_insn ((mask << right) <= 0xff
3684 ? gen_zero_extendqisi2 (dest,
3685 gen_lowpart (QImode, source))
3686 : gen_zero_extendhisi2 (dest,
3687 gen_lowpart (HImode, source)));
3688 source = dest;
3690 if (source != dest)
3691 emit_insn (gen_movsi (dest, source));
3692 operands[0] = dest;
3693 if (right)
3695 operands[2] = GEN_INT (right);
3696 gen_shifty_hi_op (LSHIFTRT, operands);
3698 if (first > 0)
3700 operands[2] = GEN_INT (first);
3701 gen_shifty_hi_op (ASHIFT, operands);
3702 total_shift -= first;
3703 mask <<= first;
3705 if (first >= 0)
3706 emit_insn (mask <= 0xff
3707 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3708 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3709 if (total_shift > 0)
3711 operands[2] = GEN_INT (total_shift);
3712 gen_shifty_hi_op (ASHIFT, operands);
3714 break;
3716 case 4:
3717 shift_gen_fun = gen_shifty_op;
3718 case 3:
3719 /* If the topmost bit that matters is set, set the topmost bits
3720 that don't matter. This way, we might be able to get a shorter
3721 signed constant. */
3722 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3723 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3724 case 2:
3725 /* Don't expand fine-grained when combining, because that will
3726 make the pattern fail. */
3727 if (currently_expanding_to_rtl
3728 || reload_in_progress || reload_completed)
3730 rtx operands[3];
3732 /* Cases 3 and 4 should be handled by this split
3733 only while combining */
3734 gcc_assert (kind <= 2);
3735 if (right)
3737 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3738 source = dest;
3740 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3741 if (total_shift)
3743 operands[0] = dest;
3744 operands[1] = dest;
3745 operands[2] = GEN_INT (total_shift);
3746 shift_gen_fun (ASHIFT, operands);
3748 break;
3750 else
3752 int neg = 0;
3753 if (kind != 4 && total_shift < 16)
3755 neg = -ext_shift_amounts[total_shift][1];
3756 if (neg > 0)
3757 neg -= ext_shift_amounts[total_shift][2];
3758 else
3759 neg = 0;
3761 emit_insn (gen_and_shl_scratch (dest, source,
3762 GEN_INT (right),
3763 GEN_INT (mask),
3764 GEN_INT (total_shift + neg),
3765 GEN_INT (neg)));
3766 emit_insn (gen_movsi (dest, dest));
3767 break;
3770 return 0;
3773 /* Try to find a good way to implement the combiner pattern
3774 [(set (match_operand:SI 0 "register_operand" "=r")
3775 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3776 (match_operand:SI 2 "const_int_operand" "n")
3777 (match_operand:SI 3 "const_int_operand" "n")
3778 (const_int 0)))
3779 (clobber (reg:SI T_REG))]
3780 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3781 return 0 for simple left / right shift combination.
3782 return 1 for left shift / 8 bit sign extend / left shift.
3783 return 2 for left shift / 16 bit sign extend / left shift.
3784 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3785 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3786 return 5 for left shift / 16 bit sign extend / right shift
3787 return 6 for < 8 bit sign extend / left shift.
3788 return 7 for < 8 bit sign extend / left shift / single right shift.
3789 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3792 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3794 int left, size, insize, ext;
3795 int cost = 0, best_cost;
3796 int kind;
3798 left = INTVAL (left_rtx);
3799 size = INTVAL (size_rtx);
3800 insize = size - left;
3801 gcc_assert (insize > 0);
3802 /* Default to left / right shift. */
3803 kind = 0;
3804 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3805 if (size <= 16)
3807 /* 16 bit shift / sign extend / 16 bit shift */
3808 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3809 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3810 below, by alternative 3 or something even better. */
3811 if (cost < best_cost)
3813 kind = 5;
3814 best_cost = cost;
3817 /* Try a plain sign extend between two shifts. */
3818 for (ext = 16; ext >= insize; ext -= 8)
3820 if (ext <= size)
3822 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3823 if (cost < best_cost)
3825 kind = ext / (unsigned) 8;
3826 best_cost = cost;
3829 /* Check if we can do a sloppy shift with a final signed shift
3830 restoring the sign. */
3831 if (EXT_SHIFT_SIGNED (size - ext))
3832 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3833 /* If not, maybe it's still cheaper to do the second shift sloppy,
3834 and do a final sign extend? */
3835 else if (size <= 16)
3836 cost = ext_shift_insns[ext - insize] + 1
3837 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3838 else
3839 continue;
3840 if (cost < best_cost)
3842 kind = ext / (unsigned) 8 + 2;
3843 best_cost = cost;
3846 /* Check if we can sign extend in r0 */
3847 if (insize < 8)
3849 cost = 3 + shift_insns[left];
3850 if (cost < best_cost)
3852 kind = 6;
3853 best_cost = cost;
3855 /* Try the same with a final signed shift. */
3856 if (left < 31)
3858 cost = 3 + ext_shift_insns[left + 1] + 1;
3859 if (cost < best_cost)
3861 kind = 7;
3862 best_cost = cost;
3866 if (TARGET_SH3)
3868 /* Try to use a dynamic shift. */
3869 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3870 if (cost < best_cost)
3872 kind = 0;
3873 best_cost = cost;
3876 if (costp)
3877 *costp = cost;
3878 return kind;
3881 /* Function to be used in the length attribute of the instructions
3882 implementing this pattern. */
3885 shl_sext_length (rtx insn)
3887 rtx set_src, left_rtx, size_rtx;
3888 int cost;
3890 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3891 left_rtx = XEXP (XEXP (set_src, 0), 1);
3892 size_rtx = XEXP (set_src, 1);
3893 shl_sext_kind (left_rtx, size_rtx, &cost);
3894 return cost;
3897 /* Generate rtl for this pattern */
3900 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3902 int kind;
3903 int left, size, insize, cost;
3904 rtx operands[3];
3906 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3907 left = INTVAL (left_rtx);
3908 size = INTVAL (size_rtx);
3909 insize = size - left;
3910 switch (kind)
3912 case 1:
3913 case 2:
3914 case 3:
3915 case 4:
3917 int ext = kind & 1 ? 8 : 16;
3918 int shift2 = size - ext;
3920 /* Don't expand fine-grained when combining, because that will
3921 make the pattern fail. */
3922 if (! currently_expanding_to_rtl
3923 && ! reload_in_progress && ! reload_completed)
3925 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3926 emit_insn (gen_movsi (dest, source));
3927 break;
3929 if (dest != source)
3930 emit_insn (gen_movsi (dest, source));
3931 operands[0] = dest;
3932 if (ext - insize)
3934 operands[2] = GEN_INT (ext - insize);
3935 gen_shifty_hi_op (ASHIFT, operands);
3937 emit_insn (kind & 1
3938 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3939 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3940 if (kind <= 2)
3942 if (shift2)
3944 operands[2] = GEN_INT (shift2);
3945 gen_shifty_op (ASHIFT, operands);
3948 else
3950 if (shift2 > 0)
3952 if (EXT_SHIFT_SIGNED (shift2))
3954 operands[2] = GEN_INT (shift2 + 1);
3955 gen_shifty_op (ASHIFT, operands);
3956 operands[2] = const1_rtx;
3957 gen_shifty_op (ASHIFTRT, operands);
3958 break;
3960 operands[2] = GEN_INT (shift2);
3961 gen_shifty_hi_op (ASHIFT, operands);
3963 else if (shift2)
3965 operands[2] = GEN_INT (-shift2);
3966 gen_shifty_hi_op (LSHIFTRT, operands);
3968 emit_insn (size <= 8
3969 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3970 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3972 break;
3974 case 5:
3976 int i = 16 - size;
3977 if (! currently_expanding_to_rtl
3978 && ! reload_in_progress && ! reload_completed)
3979 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3980 else
3982 operands[0] = dest;
3983 operands[2] = GEN_INT (16 - insize);
3984 gen_shifty_hi_op (ASHIFT, operands);
3985 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3987 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3988 while (--i >= 0)
3989 gen_ashift (ASHIFTRT, 1, dest);
3990 break;
3992 case 6:
3993 case 7:
3994 /* Don't expand fine-grained when combining, because that will
3995 make the pattern fail. */
3996 if (! currently_expanding_to_rtl
3997 && ! reload_in_progress && ! reload_completed)
3999 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4000 emit_insn (gen_movsi (dest, source));
4001 break;
4003 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4004 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4005 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4006 operands[0] = dest;
4007 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4008 gen_shifty_op (ASHIFT, operands);
4009 if (kind == 7)
4010 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4011 break;
4012 default:
4013 return -1;
4015 return 0;
4018 /* Prefix a symbol_ref name with "datalabel". */
4021 gen_datalabel_ref (rtx sym)
4023 const char *str;
4025 if (GET_CODE (sym) == LABEL_REF)
4026 return gen_rtx_CONST (GET_MODE (sym),
4027 gen_rtx_UNSPEC (GET_MODE (sym),
4028 gen_rtvec (1, sym),
4029 UNSPEC_DATALABEL));
4031 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4033 str = XSTR (sym, 0);
4034 /* Share all SYMBOL_REF strings with the same value - that is important
4035 for cse. */
4036 str = IDENTIFIER_POINTER (get_identifier (str));
4037 XSTR (sym, 0) = str;
4039 return sym;
4043 static alloc_pool label_ref_list_pool;
4045 typedef struct label_ref_list_d
4047 rtx label;
4048 struct label_ref_list_d *next;
4049 } *label_ref_list_t;
4051 /* The SH cannot load a large constant into a register, constants have to
4052 come from a pc relative load. The reference of a pc relative load
4053 instruction must be less than 1k in front of the instruction. This
4054 means that we often have to dump a constant inside a function, and
4055 generate code to branch around it.
4057 It is important to minimize this, since the branches will slow things
4058 down and make things bigger.
4060 Worst case code looks like:
4062 mov.l L1,rn
4063 bra L2
4065 align
4066 L1: .long value
4070 mov.l L3,rn
4071 bra L4
4073 align
4074 L3: .long value
4078 We fix this by performing a scan before scheduling, which notices which
4079 instructions need to have their operands fetched from the constant table
4080 and builds the table.
4082 The algorithm is:
4084 scan, find an instruction which needs a pcrel move. Look forward, find the
4085 last barrier which is within MAX_COUNT bytes of the requirement.
4086 If there isn't one, make one. Process all the instructions between
4087 the find and the barrier.
4089 In the above example, we can tell that L3 is within 1k of L1, so
4090 the first move can be shrunk from the 3 insn+constant sequence into
4091 just 1 insn, and the constant moved to L3 to make:
4093 mov.l L1,rn
4095 mov.l L3,rn
4096 bra L4
4098 align
4099 L3:.long value
4100 L4:.long value
4102 Then the second move becomes the target for the shortening process. */
4104 typedef struct
4106 rtx value; /* Value in table. */
4107 rtx label; /* Label of value. */
4108 label_ref_list_t wend; /* End of window. */
4109 enum machine_mode mode; /* Mode of value. */
4111 /* True if this constant is accessed as part of a post-increment
4112 sequence. Note that HImode constants are never accessed in this way. */
4113 bool part_of_sequence_p;
4114 } pool_node;
4116 /* The maximum number of constants that can fit into one pool, since
4117 constants in the range 0..510 are at least 2 bytes long, and in the
4118 range from there to 1018 at least 4 bytes. */
4120 #define MAX_POOL_SIZE 372
4121 static pool_node pool_vector[MAX_POOL_SIZE];
4122 static int pool_size;
4123 static rtx pool_window_label;
4124 static int pool_window_last;
4126 static int max_labelno_before_reorg;
4128 /* ??? If we need a constant in HImode which is the truncated value of a
4129 constant we need in SImode, we could combine the two entries thus saving
4130 two bytes. Is this common enough to be worth the effort of implementing
4131 it? */
4133 /* ??? This stuff should be done at the same time that we shorten branches.
4134 As it is now, we must assume that all branches are the maximum size, and
4135 this causes us to almost always output constant pools sooner than
4136 necessary. */
4138 /* Add a constant to the pool and return its label. */
4140 static rtx
4141 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4143 int i;
4144 rtx lab, new_rtx;
4145 label_ref_list_t ref, newref;
4147 /* First see if we've already got it. */
4148 for (i = 0; i < pool_size; i++)
4150 if (x->code == pool_vector[i].value->code
4151 && mode == pool_vector[i].mode)
4153 if (x->code == CODE_LABEL)
4155 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4156 continue;
4158 if (rtx_equal_p (x, pool_vector[i].value))
4160 lab = new_rtx = 0;
4161 if (! last_value
4162 || ! i
4163 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4165 new_rtx = gen_label_rtx ();
4166 LABEL_REFS (new_rtx) = pool_vector[i].label;
4167 pool_vector[i].label = lab = new_rtx;
4169 if (lab && pool_window_label)
4171 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4172 newref->label = pool_window_label;
4173 ref = pool_vector[pool_window_last].wend;
4174 newref->next = ref;
4175 pool_vector[pool_window_last].wend = newref;
4177 if (new_rtx)
4178 pool_window_label = new_rtx;
4179 pool_window_last = i;
4180 return lab;
4185 /* Need a new one. */
4186 pool_vector[pool_size].value = x;
4187 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4189 lab = 0;
4190 pool_vector[pool_size - 1].part_of_sequence_p = true;
4192 else
4193 lab = gen_label_rtx ();
4194 pool_vector[pool_size].mode = mode;
4195 pool_vector[pool_size].label = lab;
4196 pool_vector[pool_size].wend = NULL;
4197 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4198 if (lab && pool_window_label)
4200 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4201 newref->label = pool_window_label;
4202 ref = pool_vector[pool_window_last].wend;
4203 newref->next = ref;
4204 pool_vector[pool_window_last].wend = newref;
4206 if (lab)
4207 pool_window_label = lab;
4208 pool_window_last = pool_size;
4209 pool_size++;
4210 return lab;
4213 /* Output the literal table. START, if nonzero, is the first instruction
4214 this table is needed for, and also indicates that there is at least one
4215 casesi_worker_2 instruction; We have to emit the operand3 labels from
4216 these insns at a 4-byte aligned position. BARRIER is the barrier
4217 after which we are to place the table. */
4219 static void
4220 dump_table (rtx start, rtx barrier)
4222 rtx scan = barrier;
4223 int i;
4224 int need_align = 1;
4225 rtx lab;
4226 label_ref_list_t ref;
4227 int have_df = 0;
4229 /* Do two passes, first time dump out the HI sized constants. */
4231 for (i = 0; i < pool_size; i++)
4233 pool_node *p = &pool_vector[i];
4235 if (p->mode == HImode)
4237 if (need_align)
4239 scan = emit_insn_after (gen_align_2 (), scan);
4240 need_align = 0;
4242 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4243 scan = emit_label_after (lab, scan);
4244 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4245 scan);
4246 for (ref = p->wend; ref; ref = ref->next)
4248 lab = ref->label;
4249 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4252 else if (p->mode == DFmode)
4253 have_df = 1;
4256 need_align = 1;
4258 if (start)
4260 scan = emit_insn_after (gen_align_4 (), scan);
4261 need_align = 0;
4262 for (; start != barrier; start = NEXT_INSN (start))
4263 if (NONJUMP_INSN_P (start)
4264 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4266 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4267 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4269 scan = emit_label_after (lab, scan);
4272 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4274 rtx align_insn = NULL_RTX;
4276 scan = emit_label_after (gen_label_rtx (), scan);
4277 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4278 need_align = 0;
4280 for (i = 0; i < pool_size; i++)
4282 pool_node *p = &pool_vector[i];
4284 switch (p->mode)
4286 case HImode:
4287 break;
4288 case SImode:
4289 case SFmode:
4290 if (align_insn && !p->part_of_sequence_p)
4292 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4293 emit_label_before (lab, align_insn);
4294 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4295 align_insn);
4296 for (ref = p->wend; ref; ref = ref->next)
4298 lab = ref->label;
4299 emit_insn_before (gen_consttable_window_end (lab),
4300 align_insn);
4302 delete_insn (align_insn);
4303 align_insn = NULL_RTX;
4304 continue;
4306 else
4308 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4309 scan = emit_label_after (lab, scan);
4310 scan = emit_insn_after (gen_consttable_4 (p->value,
4311 const0_rtx), scan);
4312 need_align = ! need_align;
4314 break;
4315 case DFmode:
4316 if (need_align)
4318 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4319 align_insn = scan;
4320 need_align = 0;
4322 case DImode:
4323 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4324 scan = emit_label_after (lab, scan);
4325 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4326 scan);
4327 break;
4328 default:
4329 gcc_unreachable ();
4332 if (p->mode != HImode)
4334 for (ref = p->wend; ref; ref = ref->next)
4336 lab = ref->label;
4337 scan = emit_insn_after (gen_consttable_window_end (lab),
4338 scan);
4343 pool_size = 0;
4346 for (i = 0; i < pool_size; i++)
4348 pool_node *p = &pool_vector[i];
4350 switch (p->mode)
4352 case HImode:
4353 break;
4354 case SImode:
4355 case SFmode:
4356 if (need_align)
4358 need_align = 0;
4359 scan = emit_label_after (gen_label_rtx (), scan);
4360 scan = emit_insn_after (gen_align_4 (), scan);
4362 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4363 scan = emit_label_after (lab, scan);
4364 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4365 scan);
4366 break;
4367 case DFmode:
4368 case DImode:
4369 if (need_align)
4371 need_align = 0;
4372 scan = emit_label_after (gen_label_rtx (), scan);
4373 scan = emit_insn_after (gen_align_4 (), scan);
4375 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4376 scan = emit_label_after (lab, scan);
4377 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4378 scan);
4379 break;
4380 default:
4381 gcc_unreachable ();
4384 if (p->mode != HImode)
4386 for (ref = p->wend; ref; ref = ref->next)
4388 lab = ref->label;
4389 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4394 scan = emit_insn_after (gen_consttable_end (), scan);
4395 scan = emit_barrier_after (scan);
4396 pool_size = 0;
4397 pool_window_label = NULL_RTX;
4398 pool_window_last = 0;
4401 /* Return nonzero if constant would be an ok source for a
4402 mov.w instead of a mov.l. */
4404 static int
4405 hi_const (rtx src)
4407 return (CONST_INT_P (src)
4408 && INTVAL (src) >= -32768
4409 && INTVAL (src) <= 32767);
4412 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4414 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4416 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4417 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4418 need to fix it if the input value is CONST_OK_FOR_I08. */
4420 static int
4421 broken_move (rtx insn)
4423 if (NONJUMP_INSN_P (insn))
4425 rtx pat = PATTERN (insn);
4426 if (GET_CODE (pat) == PARALLEL)
4427 pat = XVECEXP (pat, 0, 0);
4428 if (GET_CODE (pat) == SET
4429 /* We can load any 8-bit value if we don't care what the high
4430 order bits end up as. */
4431 && GET_MODE (SET_DEST (pat)) != QImode
4432 && (CONSTANT_P (SET_SRC (pat))
4433 /* Match mova_const. */
4434 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4435 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4436 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4437 && ! (TARGET_SH2E
4438 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4439 && (fp_zero_operand (SET_SRC (pat))
4440 || fp_one_operand (SET_SRC (pat)))
4441 /* In general we don't know the current setting of fpscr, so disable fldi.
4442 There is an exception if this was a register-register move
4443 before reload - and hence it was ascertained that we have
4444 single precision setting - and in a post-reload optimization
4445 we changed this to do a constant load. In that case
4446 we don't have an r0 clobber, hence we must use fldi. */
4447 && (TARGET_FMOVD
4448 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4449 == SCRATCH))
4450 && REG_P (SET_DEST (pat))
4451 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4452 && ! (TARGET_SH2A
4453 && GET_MODE (SET_DEST (pat)) == SImode
4454 && (satisfies_constraint_I20 (SET_SRC (pat))
4455 || satisfies_constraint_I28 (SET_SRC (pat))))
4456 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4457 return 1;
4460 return 0;
4463 static int
4464 mova_p (rtx insn)
4466 return (NONJUMP_INSN_P (insn)
4467 && GET_CODE (PATTERN (insn)) == SET
4468 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4469 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4470 /* Don't match mova_const. */
4471 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4474 /* Fix up a mova from a switch that went out of range. */
4475 static void
4476 fixup_mova (rtx mova)
4478 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4479 if (! flag_pic)
4481 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4482 INSN_CODE (mova) = -1;
4484 else
4486 rtx worker = mova;
4487 rtx lab = gen_label_rtx ();
4488 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4492 worker = NEXT_INSN (worker);
4493 gcc_assert (worker
4494 && !LABEL_P (worker)
4495 && !JUMP_P (worker));
4496 } while (NOTE_P (worker)
4497 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4498 wpat = PATTERN (worker);
4499 wpat0 = XVECEXP (wpat, 0, 0);
4500 wpat1 = XVECEXP (wpat, 0, 1);
4501 wsrc = SET_SRC (wpat0);
4502 PATTERN (worker) = (gen_casesi_worker_2
4503 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4504 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4505 XEXP (wpat1, 0)));
4506 INSN_CODE (worker) = -1;
4507 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4508 base = gen_rtx_LABEL_REF (Pmode, lab);
4509 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4510 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4511 INSN_CODE (mova) = -1;
4515 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4516 *num_mova, and check if the new mova is not nested within the first one.
4517 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4518 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4519 static int
4520 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4522 int n_addr = 0; /* Initialization to shut up spurious warning. */
4523 int f_target, n_target = 0; /* Likewise. */
4525 if (optimize)
4527 /* If NEW_MOVA has no address yet, it will be handled later. */
4528 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4529 return -1;
4531 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4532 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4533 if (n_addr > n_target || n_addr + 1022 < n_target)
4535 /* Change the mova into a load.
4536 broken_move will then return true for it. */
4537 fixup_mova (new_mova);
4538 return 1;
4541 if (!(*num_mova)++)
4543 *first_mova = new_mova;
4544 return 2;
4546 if (!optimize
4547 || ((f_target
4548 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4549 >= n_target))
4550 return -1;
4552 (*num_mova)--;
4553 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4554 > n_target - n_addr)
4556 fixup_mova (*first_mova);
4557 return 0;
4559 else
4561 fixup_mova (new_mova);
4562 return 1;
4566 /* Find the last barrier from insn FROM which is close enough to hold the
4567 constant pool. If we can't find one, then create one near the end of
4568 the range. */
4570 static rtx
4571 find_barrier (int num_mova, rtx mova, rtx from)
4573 int count_si = 0;
4574 int count_hi = 0;
4575 int found_hi = 0;
4576 int found_si = 0;
4577 int found_di = 0;
4578 int hi_align = 2;
4579 int si_align = 2;
4580 int leading_mova = num_mova;
4581 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4582 int si_limit;
4583 int hi_limit;
4584 rtx orig = from;
4585 rtx last_got = NULL_RTX;
4586 rtx last_symoff = NULL_RTX;
4588 /* For HImode: range is 510, add 4 because pc counts from address of
4589 second instruction after this one, subtract 2 for the jump instruction
4590 that we may need to emit before the table, subtract 2 for the instruction
4591 that fills the jump delay slot (in very rare cases, reorg will take an
4592 instruction from after the constant pool or will leave the delay slot
4593 empty). This gives 510.
4594 For SImode: range is 1020, add 4 because pc counts from address of
4595 second instruction after this one, subtract 2 in case pc is 2 byte
4596 aligned, subtract 2 for the jump instruction that we may need to emit
4597 before the table, subtract 2 for the instruction that fills the jump
4598 delay slot. This gives 1018. */
4600 /* The branch will always be shortened now that the reference address for
4601 forward branches is the successor address, thus we need no longer make
4602 adjustments to the [sh]i_limit for -O0. */
4604 si_limit = 1018;
4605 hi_limit = 510;
4607 while (from && count_si < si_limit && count_hi < hi_limit)
4609 int inc = get_attr_length (from);
4610 int new_align = 1;
4612 /* If this is a label that existed at the time of the compute_alignments
4613 call, determine the alignment. N.B. When find_barrier recurses for
4614 an out-of-reach mova, we might see labels at the start of previously
4615 inserted constant tables. */
4616 if (LABEL_P (from)
4617 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4619 if (optimize)
4620 new_align = 1 << label_to_alignment (from);
4621 else if (BARRIER_P (prev_nonnote_insn (from)))
4622 new_align = 1 << barrier_align (from);
4623 else
4624 new_align = 1;
4625 inc = 0;
4627 /* In case we are scanning a constant table because of recursion, check
4628 for explicit alignments. If the table is long, we might be forced
4629 to emit the new table in front of it; the length of the alignment
4630 might be the last straw. */
4631 else if (NONJUMP_INSN_P (from)
4632 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4633 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4634 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4635 /* When we find the end of a constant table, paste the new constant
4636 at the end. That is better than putting it in front because
4637 this way, we don't need extra alignment for adding a 4-byte-aligned
4638 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4639 else if (NONJUMP_INSN_P (from)
4640 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4641 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4642 return from;
4644 if (BARRIER_P (from))
4646 rtx next;
4648 found_barrier = from;
4650 /* If we are at the end of the function, or in front of an alignment
4651 instruction, we need not insert an extra alignment. We prefer
4652 this kind of barrier. */
4653 if (barrier_align (from) > 2)
4654 good_barrier = from;
4656 /* If we are at the end of a hot/cold block, dump the constants
4657 here. */
4658 next = NEXT_INSN (from);
4659 if (next
4660 && NOTE_P (next)
4661 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4662 break;
4665 if (broken_move (from))
4667 rtx pat, src, dst;
4668 enum machine_mode mode;
4670 pat = PATTERN (from);
4671 if (GET_CODE (pat) == PARALLEL)
4672 pat = XVECEXP (pat, 0, 0);
4673 src = SET_SRC (pat);
4674 dst = SET_DEST (pat);
4675 mode = GET_MODE (dst);
4677 /* GOT pcrelat setting comes in pair of
4678 mova .L8,r0
4679 mov.l .L8,r12
4680 instructions. (plus add r0,r12).
4681 Remember if we see one without the other. */
4682 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4683 last_got = last_got ? NULL_RTX : from;
4684 else if (PIC_ADDR_P (src))
4685 last_got = last_got ? NULL_RTX : from;
4687 /* We must explicitly check the mode, because sometimes the
4688 front end will generate code to load unsigned constants into
4689 HImode targets without properly sign extending them. */
4690 if (mode == HImode
4691 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4693 found_hi += 2;
4694 /* We put the short constants before the long constants, so
4695 we must count the length of short constants in the range
4696 for the long constants. */
4697 /* ??? This isn't optimal, but is easy to do. */
4698 si_limit -= 2;
4700 else
4702 /* We dump DF/DI constants before SF/SI ones, because
4703 the limit is the same, but the alignment requirements
4704 are higher. We may waste up to 4 additional bytes
4705 for alignment, and the DF/DI constant may have
4706 another SF/SI constant placed before it. */
4707 if (TARGET_SHCOMPACT
4708 && ! found_di
4709 && (mode == DFmode || mode == DImode))
4711 found_di = 1;
4712 si_limit -= 8;
4714 while (si_align > 2 && found_si + si_align - 2 > count_si)
4715 si_align >>= 1;
4716 if (found_si > count_si)
4717 count_si = found_si;
4718 found_si += GET_MODE_SIZE (mode);
4719 if (num_mova)
4720 si_limit -= GET_MODE_SIZE (mode);
4724 if (mova_p (from))
4726 switch (untangle_mova (&num_mova, &mova, from))
4728 case 1:
4729 if (flag_pic)
4731 rtx src = SET_SRC (PATTERN (from));
4732 if (GET_CODE (src) == CONST
4733 && GET_CODE (XEXP (src, 0)) == UNSPEC
4734 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4735 last_symoff = from;
4737 break;
4738 case 0: return find_barrier (0, 0, mova);
4739 case 2:
4741 leading_mova = 0;
4742 barrier_before_mova
4743 = good_barrier ? good_barrier : found_barrier;
4745 default: break;
4747 if (found_si > count_si)
4748 count_si = found_si;
4750 else if (JUMP_TABLE_DATA_P (from))
4752 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4753 || (num_mova
4754 && (prev_nonnote_insn (from)
4755 == XEXP (MOVA_LABELREF (mova), 0))))
4756 num_mova--;
4757 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4759 /* We have just passed the barrier in front of the
4760 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4761 the ADDR_DIFF_VEC is accessed as data, just like our pool
4762 constants, this is a good opportunity to accommodate what
4763 we have gathered so far.
4764 If we waited any longer, we could end up at a barrier in
4765 front of code, which gives worse cache usage for separated
4766 instruction / data caches. */
4767 good_barrier = found_barrier;
4768 break;
4770 else
4772 rtx body = PATTERN (from);
4773 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4776 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4777 else if (JUMP_P (from)
4778 && ! TARGET_SH2
4779 && ! optimize_size)
4780 new_align = 4;
4782 /* There is a possibility that a bf is transformed into a bf/s by the
4783 delay slot scheduler. */
4784 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4785 && get_attr_type (from) == TYPE_CBRANCH
4786 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4787 inc += 2;
4789 if (found_si)
4791 count_si += inc;
4792 if (new_align > si_align)
4794 si_limit -= (count_si - 1) & (new_align - si_align);
4795 si_align = new_align;
4797 count_si = (count_si + new_align - 1) & -new_align;
4799 if (found_hi)
4801 count_hi += inc;
4802 if (new_align > hi_align)
4804 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4805 hi_align = new_align;
4807 count_hi = (count_hi + new_align - 1) & -new_align;
4809 from = NEXT_INSN (from);
4812 if (num_mova)
4814 if (leading_mova)
4816 /* Try as we might, the leading mova is out of range. Change
4817 it into a load (which will become a pcload) and retry. */
4818 fixup_mova (mova);
4819 return find_barrier (0, 0, mova);
4821 else
4823 /* Insert the constant pool table before the mova instruction,
4824 to prevent the mova label reference from going out of range. */
4825 from = mova;
4826 good_barrier = found_barrier = barrier_before_mova;
4830 if (found_barrier)
4832 if (good_barrier && next_real_insn (found_barrier))
4833 found_barrier = good_barrier;
4835 else
4837 /* We didn't find a barrier in time to dump our stuff,
4838 so we'll make one. */
4839 rtx label = gen_label_rtx ();
4841 /* Don't emit a constant table in the middle of insns for
4842 casesi_worker_2. This is a bit overkill but is enough
4843 because casesi_worker_2 wouldn't appear so frequently. */
4844 if (last_symoff)
4845 from = last_symoff;
4847 /* If we exceeded the range, then we must back up over the last
4848 instruction we looked at. Otherwise, we just need to undo the
4849 NEXT_INSN at the end of the loop. */
4850 if (PREV_INSN (from) != orig
4851 && (count_hi > hi_limit || count_si > si_limit))
4852 from = PREV_INSN (PREV_INSN (from));
4853 else
4854 from = PREV_INSN (from);
4856 /* Don't emit a constant table int the middle of global pointer setting,
4857 since that that would move the addressing base GOT into another table.
4858 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4859 in the pool anyway, so just move up the whole constant pool. */
4860 if (last_got)
4861 from = PREV_INSN (last_got);
4863 /* Don't insert the constant pool table at the position which
4864 may be the landing pad. */
4865 if (flag_exceptions
4866 && CALL_P (from)
4867 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4868 from = PREV_INSN (from);
4870 /* Walk back to be just before any jump or label.
4871 Putting it before a label reduces the number of times the branch
4872 around the constant pool table will be hit. Putting it before
4873 a jump makes it more likely that the bra delay slot will be
4874 filled. */
4875 while (NOTE_P (from) || JUMP_P (from)
4876 || LABEL_P (from))
4877 from = PREV_INSN (from);
4879 from = emit_jump_insn_after (gen_jump (label), from);
4880 JUMP_LABEL (from) = label;
4881 LABEL_NUSES (label) = 1;
4882 found_barrier = emit_barrier_after (from);
4883 emit_label_after (label, found_barrier);
4886 return found_barrier;
4889 /* If the instruction INSN is implemented by a special function, and we can
4890 positively find the register that is used to call the sfunc, and this
4891 register is not used anywhere else in this instruction - except as the
4892 destination of a set, return this register; else, return 0. */
4894 sfunc_uses_reg (rtx insn)
4896 int i;
4897 rtx pattern, part, reg_part, reg;
4899 if (!NONJUMP_INSN_P (insn))
4900 return 0;
4901 pattern = PATTERN (insn);
4902 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4903 return 0;
4905 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4907 part = XVECEXP (pattern, 0, i);
4908 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4909 reg_part = part;
4911 if (! reg_part)
4912 return 0;
4913 reg = XEXP (reg_part, 0);
4914 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4916 part = XVECEXP (pattern, 0, i);
4917 if (part == reg_part || GET_CODE (part) == CLOBBER)
4918 continue;
4919 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4920 && REG_P (SET_DEST (part)))
4921 ? SET_SRC (part) : part)))
4922 return 0;
4924 return reg;
4927 /* See if the only way in which INSN uses REG is by calling it, or by
4928 setting it while calling it. Set *SET to a SET rtx if the register
4929 is set by INSN. */
4931 static int
4932 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4934 rtx pattern, reg2;
4936 *set = NULL_RTX;
4938 reg2 = sfunc_uses_reg (insn);
4939 if (reg2 && REGNO (reg2) == REGNO (reg))
4941 pattern = single_set (insn);
4942 if (pattern
4943 && REG_P (SET_DEST (pattern))
4944 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4945 *set = pattern;
4946 return 0;
4948 if (!CALL_P (insn))
4950 /* We don't use rtx_equal_p because we don't care if the mode is
4951 different. */
4952 pattern = single_set (insn);
4953 if (pattern
4954 && REG_P (SET_DEST (pattern))
4955 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4957 rtx par, part;
4958 int i;
4960 *set = pattern;
4961 par = PATTERN (insn);
4962 if (GET_CODE (par) == PARALLEL)
4963 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4965 part = XVECEXP (par, 0, i);
4966 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4967 return 1;
4969 return reg_mentioned_p (reg, SET_SRC (pattern));
4972 return 1;
4975 pattern = PATTERN (insn);
4977 if (GET_CODE (pattern) == PARALLEL)
4979 int i;
4981 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4982 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4983 return 1;
4984 pattern = XVECEXP (pattern, 0, 0);
4987 if (GET_CODE (pattern) == SET)
4989 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4991 /* We don't use rtx_equal_p, because we don't care if the
4992 mode is different. */
4993 if (!REG_P (SET_DEST (pattern))
4994 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4995 return 1;
4997 *set = pattern;
5000 pattern = SET_SRC (pattern);
5003 if (GET_CODE (pattern) != CALL
5004 || !MEM_P (XEXP (pattern, 0))
5005 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5006 return 1;
5008 return 0;
5011 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5012 general registers. Bits 0..15 mean that the respective registers
5013 are used as inputs in the instruction. Bits 16..31 mean that the
5014 registers 0..15, respectively, are used as outputs, or are clobbered.
5015 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5017 regs_used (rtx x, int is_dest)
5019 enum rtx_code code;
5020 const char *fmt;
5021 int i, used = 0;
5023 if (! x)
5024 return used;
5025 code = GET_CODE (x);
5026 switch (code)
5028 case REG:
5029 if (REGNO (x) < 16)
5030 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5031 << (REGNO (x) + is_dest));
5032 return 0;
5033 case SUBREG:
5035 rtx y = SUBREG_REG (x);
5037 if (!REG_P (y))
5038 break;
5039 if (REGNO (y) < 16)
5040 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5041 << (REGNO (y) +
5042 subreg_regno_offset (REGNO (y),
5043 GET_MODE (y),
5044 SUBREG_BYTE (x),
5045 GET_MODE (x)) + is_dest));
5046 return 0;
5048 case SET:
5049 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5050 case RETURN:
5051 /* If there was a return value, it must have been indicated with USE. */
5052 return 0x00ffff00;
5053 case CLOBBER:
5054 is_dest = 1;
5055 break;
5056 case MEM:
5057 is_dest = 0;
5058 break;
5059 case CALL:
5060 used |= 0x00ff00f0;
5061 break;
5062 default:
5063 break;
5066 fmt = GET_RTX_FORMAT (code);
5068 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5070 if (fmt[i] == 'E')
5072 register int j;
5073 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5074 used |= regs_used (XVECEXP (x, i, j), is_dest);
5076 else if (fmt[i] == 'e')
5077 used |= regs_used (XEXP (x, i), is_dest);
5079 return used;
5082 /* Create an instruction that prevents redirection of a conditional branch
5083 to the destination of the JUMP with address ADDR.
5084 If the branch needs to be implemented as an indirect jump, try to find
5085 a scratch register for it.
5086 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5087 If any preceding insn that doesn't fit into a delay slot is good enough,
5088 pass 1. Pass 2 if a definite blocking insn is needed.
5089 -1 is used internally to avoid deep recursion.
5090 If a blocking instruction is made or recognized, return it. */
5092 static rtx
5093 gen_block_redirect (rtx jump, int addr, int need_block)
5095 int dead = 0;
5096 rtx prev = prev_nonnote_insn (jump);
5097 rtx dest;
5099 /* First, check if we already have an instruction that satisfies our need. */
5100 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5102 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5103 return prev;
5104 if (GET_CODE (PATTERN (prev)) == USE
5105 || GET_CODE (PATTERN (prev)) == CLOBBER
5106 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5107 prev = jump;
5108 else if ((need_block &= ~1) < 0)
5109 return prev;
5110 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5111 need_block = 0;
5113 if (GET_CODE (PATTERN (jump)) == RETURN)
5115 if (! need_block)
5116 return prev;
5117 /* Reorg even does nasty things with return insns that cause branches
5118 to go out of range - see find_end_label and callers. */
5119 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5121 /* We can't use JUMP_LABEL here because it might be undefined
5122 when not optimizing. */
5123 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5124 /* If the branch is out of range, try to find a scratch register for it. */
5125 if (optimize
5126 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5127 > 4092 + 4098))
5129 rtx scan;
5130 /* Don't look for the stack pointer as a scratch register,
5131 it would cause trouble if an interrupt occurred. */
5132 unsigned attempt = 0x7fff, used;
5133 int jump_left = flag_expensive_optimizations + 1;
5135 /* It is likely that the most recent eligible instruction is wanted for
5136 the delay slot. Therefore, find out which registers it uses, and
5137 try to avoid using them. */
5139 for (scan = jump; (scan = PREV_INSN (scan)); )
5141 enum rtx_code code;
5143 if (INSN_DELETED_P (scan))
5144 continue;
5145 code = GET_CODE (scan);
5146 if (code == CODE_LABEL || code == JUMP_INSN)
5147 break;
5148 if (code == INSN
5149 && GET_CODE (PATTERN (scan)) != USE
5150 && GET_CODE (PATTERN (scan)) != CLOBBER
5151 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5153 attempt &= ~regs_used (PATTERN (scan), 0);
5154 break;
5157 for (used = dead = 0, scan = JUMP_LABEL (jump);
5158 (scan = NEXT_INSN (scan)); )
5160 enum rtx_code code;
5162 if (INSN_DELETED_P (scan))
5163 continue;
5164 code = GET_CODE (scan);
5165 if (INSN_P (scan))
5167 used |= regs_used (PATTERN (scan), 0);
5168 if (code == CALL_INSN)
5169 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5170 dead |= (used >> 16) & ~used;
5171 if (dead & attempt)
5173 dead &= attempt;
5174 break;
5176 if (code == JUMP_INSN)
5178 if (jump_left-- && simplejump_p (scan))
5179 scan = JUMP_LABEL (scan);
5180 else
5181 break;
5185 /* Mask out the stack pointer again, in case it was
5186 the only 'free' register we have found. */
5187 dead &= 0x7fff;
5189 /* If the immediate destination is still in range, check for possible
5190 threading with a jump beyond the delay slot insn.
5191 Don't check if we are called recursively; the jump has been or will be
5192 checked in a different invocation then. */
5194 else if (optimize && need_block >= 0)
5196 rtx next = next_active_insn (next_active_insn (dest));
5197 if (next && JUMP_P (next)
5198 && GET_CODE (PATTERN (next)) == SET
5199 && recog_memoized (next) == CODE_FOR_jump_compact)
5201 dest = JUMP_LABEL (next);
5202 if (dest
5203 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5204 > 4092 + 4098))
5205 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5209 if (dead)
5211 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5213 /* It would be nice if we could convert the jump into an indirect
5214 jump / far branch right now, and thus exposing all constituent
5215 instructions to further optimization. However, reorg uses
5216 simplejump_p to determine if there is an unconditional jump where
5217 it should try to schedule instructions from the target of the
5218 branch; simplejump_p fails for indirect jumps even if they have
5219 a JUMP_LABEL. */
5220 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5221 (reg, GEN_INT (unspec_bbr_uid++)),
5222 jump);
5223 /* ??? We would like this to have the scope of the jump, but that
5224 scope will change when a delay slot insn of an inner scope is added.
5225 Hence, after delay slot scheduling, we'll have to expect
5226 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5227 the jump. */
5229 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5230 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5231 return insn;
5233 else if (need_block)
5234 /* We can't use JUMP_LABEL here because it might be undefined
5235 when not optimizing. */
5236 return emit_insn_before (gen_block_branch_redirect
5237 (GEN_INT (unspec_bbr_uid++)),
5238 jump);
5239 return prev;
5242 #define CONDJUMP_MIN -252
5243 #define CONDJUMP_MAX 262
5244 struct far_branch
5246 /* A label (to be placed) in front of the jump
5247 that jumps to our ultimate destination. */
5248 rtx near_label;
5249 /* Where we are going to insert it if we cannot move the jump any farther,
5250 or the jump itself if we have picked up an existing jump. */
5251 rtx insert_place;
5252 /* The ultimate destination. */
5253 rtx far_label;
5254 struct far_branch *prev;
5255 /* If the branch has already been created, its address;
5256 else the address of its first prospective user. */
5257 int address;
5260 static void gen_far_branch (struct far_branch *);
5261 enum mdep_reorg_phase_e mdep_reorg_phase;
5262 static void
5263 gen_far_branch (struct far_branch *bp)
5265 rtx insn = bp->insert_place;
5266 rtx jump;
5267 rtx label = gen_label_rtx ();
5268 int ok;
5270 emit_label_after (label, insn);
5271 if (bp->far_label)
5273 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5274 LABEL_NUSES (bp->far_label)++;
5276 else
5277 jump = emit_jump_insn_after (gen_return (), insn);
5278 /* Emit a barrier so that reorg knows that any following instructions
5279 are not reachable via a fall-through path.
5280 But don't do this when not optimizing, since we wouldn't suppress the
5281 alignment for the barrier then, and could end up with out-of-range
5282 pc-relative loads. */
5283 if (optimize)
5284 emit_barrier_after (jump);
5285 emit_label_after (bp->near_label, insn);
5286 JUMP_LABEL (jump) = bp->far_label;
5287 ok = invert_jump (insn, label, 1);
5288 gcc_assert (ok);
5290 /* If we are branching around a jump (rather than a return), prevent
5291 reorg from using an insn from the jump target as the delay slot insn -
5292 when reorg did this, it pessimized code (we rather hide the delay slot)
5293 and it could cause branches to go out of range. */
5294 if (bp->far_label)
5295 (emit_insn_after
5296 (gen_stuff_delay_slot
5297 (GEN_INT (unspec_bbr_uid++),
5298 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5299 insn));
5300 /* Prevent reorg from undoing our splits. */
5301 gen_block_redirect (jump, bp->address += 2, 2);
5304 /* Fix up ADDR_DIFF_VECs. */
5305 void
5306 fixup_addr_diff_vecs (rtx first)
5308 rtx insn;
5310 for (insn = first; insn; insn = NEXT_INSN (insn))
5312 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5314 if (!JUMP_P (insn)
5315 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5316 continue;
5317 pat = PATTERN (insn);
5318 vec_lab = XEXP (XEXP (pat, 0), 0);
5320 /* Search the matching casesi_jump_2. */
5321 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5323 if (!JUMP_P (prev))
5324 continue;
5325 prevpat = PATTERN (prev);
5326 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5327 continue;
5328 x = XVECEXP (prevpat, 0, 1);
5329 if (GET_CODE (x) != USE)
5330 continue;
5331 x = XEXP (x, 0);
5332 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5333 break;
5335 /* FIXME: This is a bug in the optimizer, but it seems harmless
5336 to just avoid panicing. */
5337 if (!prev)
5338 continue;
5340 /* Emit the reference label of the braf where it belongs, right after
5341 the casesi_jump_2 (i.e. braf). */
5342 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5343 emit_label_after (braf_label, prev);
5345 /* Fix up the ADDR_DIF_VEC to be relative
5346 to the reference address of the braf. */
5347 XEXP (XEXP (pat, 0), 0) = braf_label;
5351 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5352 a barrier. Return the base 2 logarithm of the desired alignment. */
5354 barrier_align (rtx barrier_or_label)
5356 rtx next = next_real_insn (barrier_or_label), pat, prev;
5357 int slot, credit, jump_to_next = 0;
5359 if (! next)
5360 return 0;
5362 pat = PATTERN (next);
5364 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5365 return 2;
5367 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5368 /* This is a barrier in front of a constant table. */
5369 return 0;
5371 prev = prev_real_insn (barrier_or_label);
5372 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5374 pat = PATTERN (prev);
5375 /* If this is a very small table, we want to keep the alignment after
5376 the table to the minimum for proper code alignment. */
5377 return ((optimize_size
5378 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5379 <= (unsigned) 1 << (CACHE_LOG - 2)))
5380 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5383 if (optimize_size)
5384 return 0;
5386 if (! TARGET_SH2 || ! optimize)
5387 return align_jumps_log;
5389 /* When fixing up pcloads, a constant table might be inserted just before
5390 the basic block that ends with the barrier. Thus, we can't trust the
5391 instruction lengths before that. */
5392 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5394 /* Check if there is an immediately preceding branch to the insn beyond
5395 the barrier. We must weight the cost of discarding useful information
5396 from the current cache line when executing this branch and there is
5397 an alignment, against that of fetching unneeded insn in front of the
5398 branch target when there is no alignment. */
5400 /* There are two delay_slot cases to consider. One is the simple case
5401 where the preceding branch is to the insn beyond the barrier (simple
5402 delay slot filling), and the other is where the preceding branch has
5403 a delay slot that is a duplicate of the insn after the barrier
5404 (fill_eager_delay_slots) and the branch is to the insn after the insn
5405 after the barrier. */
5407 /* PREV is presumed to be the JUMP_INSN for the barrier under
5408 investigation. Skip to the insn before it. */
5409 prev = prev_real_insn (prev);
5411 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5412 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5413 prev = prev_real_insn (prev))
5415 jump_to_next = 0;
5416 if (GET_CODE (PATTERN (prev)) == USE
5417 || GET_CODE (PATTERN (prev)) == CLOBBER)
5418 continue;
5419 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5421 prev = XVECEXP (PATTERN (prev), 0, 1);
5422 if (INSN_UID (prev) == INSN_UID (next))
5424 /* Delay slot was filled with insn at jump target. */
5425 jump_to_next = 1;
5426 continue;
5430 if (slot &&
5431 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5432 slot = 0;
5433 credit -= get_attr_length (prev);
5435 if (prev
5436 && JUMP_P (prev)
5437 && JUMP_LABEL (prev))
5439 rtx x;
5440 if (jump_to_next
5441 || next_real_insn (JUMP_LABEL (prev)) == next
5442 /* If relax_delay_slots() decides NEXT was redundant
5443 with some previous instruction, it will have
5444 redirected PREV's jump to the following insn. */
5445 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5446 /* There is no upper bound on redundant instructions
5447 that might have been skipped, but we must not put an
5448 alignment where none had been before. */
5449 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5450 (INSN_P (x)
5451 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5452 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5453 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5455 rtx pat = PATTERN (prev);
5456 if (GET_CODE (pat) == PARALLEL)
5457 pat = XVECEXP (pat, 0, 0);
5458 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5459 return 0;
5464 return align_jumps_log;
5467 /* If we are inside a phony loop, almost any kind of label can turn up as the
5468 first one in the loop. Aligning a braf label causes incorrect switch
5469 destination addresses; we can detect braf labels because they are
5470 followed by a BARRIER.
5471 Applying loop alignment to small constant or switch tables is a waste
5472 of space, so we suppress this too. */
5474 sh_loop_align (rtx label)
5476 rtx next = label;
5479 next = next_nonnote_insn (next);
5480 while (next && LABEL_P (next));
5482 if (! next
5483 || ! INSN_P (next)
5484 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5485 || recog_memoized (next) == CODE_FOR_consttable_2)
5486 return 0;
5488 return align_loops_log;
5491 /* Do a final pass over the function, just before delayed branch
5492 scheduling. */
5494 static void
5495 sh_reorg (void)
5497 rtx first, insn, mova = NULL_RTX;
5498 int num_mova;
5499 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5500 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5502 first = get_insns ();
5503 max_labelno_before_reorg = max_label_num ();
5505 /* We must split call insns before introducing `mova's. If we're
5506 optimizing, they'll have already been split. Otherwise, make
5507 sure we don't split them too late. */
5508 if (! optimize)
5509 split_all_insns_noflow ();
5511 if (TARGET_SHMEDIA)
5512 return;
5514 /* If relaxing, generate pseudo-ops to associate function calls with
5515 the symbols they call. It does no harm to not generate these
5516 pseudo-ops. However, when we can generate them, it enables to
5517 linker to potentially relax the jsr to a bsr, and eliminate the
5518 register load and, possibly, the constant pool entry. */
5520 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5521 if (TARGET_RELAX)
5523 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5524 own purposes. This works because none of the remaining passes
5525 need to look at them.
5527 ??? But it may break in the future. We should use a machine
5528 dependent REG_NOTE, or some other approach entirely. */
5529 for (insn = first; insn; insn = NEXT_INSN (insn))
5531 if (INSN_P (insn))
5533 rtx note;
5535 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5536 NULL_RTX)) != 0)
5537 remove_note (insn, note);
5541 for (insn = first; insn; insn = NEXT_INSN (insn))
5543 rtx pattern, reg, link, set, scan, dies, label;
5544 int rescan = 0, foundinsn = 0;
5546 if (CALL_P (insn))
5548 pattern = PATTERN (insn);
5550 if (GET_CODE (pattern) == PARALLEL)
5551 pattern = XVECEXP (pattern, 0, 0);
5552 if (GET_CODE (pattern) == SET)
5553 pattern = SET_SRC (pattern);
5555 if (GET_CODE (pattern) != CALL
5556 || !MEM_P (XEXP (pattern, 0)))
5557 continue;
5559 reg = XEXP (XEXP (pattern, 0), 0);
5561 else
5563 reg = sfunc_uses_reg (insn);
5564 if (! reg)
5565 continue;
5568 if (!REG_P (reg))
5569 continue;
5571 /* Try scanning backward to find where the register is set. */
5572 link = NULL;
5573 for (scan = PREV_INSN (insn);
5574 scan && !LABEL_P (scan);
5575 scan = PREV_INSN (scan))
5577 if (! INSN_P (scan))
5578 continue;
5580 if (! reg_mentioned_p (reg, scan))
5581 continue;
5583 if (noncall_uses_reg (reg, scan, &set))
5584 break;
5586 if (set)
5588 link = scan;
5589 break;
5593 if (! link)
5594 continue;
5596 /* The register is set at LINK. */
5598 /* We can only optimize the function call if the register is
5599 being set to a symbol. In theory, we could sometimes
5600 optimize calls to a constant location, but the assembler
5601 and linker do not support that at present. */
5602 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5603 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5604 continue;
5606 /* Scan forward from LINK to the place where REG dies, and
5607 make sure that the only insns which use REG are
5608 themselves function calls. */
5610 /* ??? This doesn't work for call targets that were allocated
5611 by reload, since there may not be a REG_DEAD note for the
5612 register. */
5614 dies = NULL_RTX;
5615 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5617 rtx scanset;
5619 /* Don't try to trace forward past a CODE_LABEL if we haven't
5620 seen INSN yet. Ordinarily, we will only find the setting insn
5621 if it is in the same basic block. However,
5622 cross-jumping can insert code labels in between the load and
5623 the call, and can result in situations where a single call
5624 insn may have two targets depending on where we came from. */
5626 if (LABEL_P (scan) && ! foundinsn)
5627 break;
5629 if (! INSN_P (scan))
5630 continue;
5632 /* Don't try to trace forward past a JUMP. To optimize
5633 safely, we would have to check that all the
5634 instructions at the jump destination did not use REG. */
5636 if (JUMP_P (scan))
5637 break;
5639 if (! reg_mentioned_p (reg, scan))
5640 continue;
5642 if (noncall_uses_reg (reg, scan, &scanset))
5643 break;
5645 if (scan == insn)
5646 foundinsn = 1;
5648 if (scan != insn
5649 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5651 /* There is a function call to this register other
5652 than the one we are checking. If we optimize
5653 this call, we need to rescan again below. */
5654 rescan = 1;
5657 /* ??? We shouldn't have to worry about SCANSET here.
5658 We should just be able to check for a REG_DEAD note
5659 on a function call. However, the REG_DEAD notes are
5660 apparently not dependable around libcalls; c-torture
5661 execute/920501-2 is a test case. If SCANSET is set,
5662 then this insn sets the register, so it must have
5663 died earlier. Unfortunately, this will only handle
5664 the cases in which the register is, in fact, set in a
5665 later insn. */
5667 /* ??? We shouldn't have to use FOUNDINSN here.
5668 This dates back to when we used LOG_LINKS to find
5669 the most recent insn which sets the register. */
5671 if (foundinsn
5672 && (scanset
5673 || find_reg_note (scan, REG_DEAD, reg)))
5675 dies = scan;
5676 break;
5680 if (! dies)
5682 /* Either there was a branch, or some insn used REG
5683 other than as a function call address. */
5684 continue;
5687 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5688 on the insn which sets the register, and on each call insn
5689 which uses the register. In final_prescan_insn we look for
5690 the REG_LABEL_OPERAND notes, and output the appropriate label
5691 or pseudo-op. */
5693 label = gen_label_rtx ();
5694 add_reg_note (link, REG_LABEL_OPERAND, label);
5695 add_reg_note (insn, REG_LABEL_OPERAND, label);
5696 if (rescan)
5698 scan = link;
5701 rtx reg2;
5703 scan = NEXT_INSN (scan);
5704 if (scan != insn
5705 && ((CALL_P (scan)
5706 && reg_mentioned_p (reg, scan))
5707 || ((reg2 = sfunc_uses_reg (scan))
5708 && REGNO (reg2) == REGNO (reg))))
5709 add_reg_note (scan, REG_LABEL_OPERAND, label);
5711 while (scan != dies);
5716 if (TARGET_SH2)
5717 fixup_addr_diff_vecs (first);
5719 if (optimize)
5721 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5722 shorten_branches (first);
5725 /* Scan the function looking for move instructions which have to be
5726 changed to pc-relative loads and insert the literal tables. */
5727 label_ref_list_pool = create_alloc_pool ("label references list",
5728 sizeof (struct label_ref_list_d),
5729 30);
5730 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5731 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5733 if (mova_p (insn))
5735 /* ??? basic block reordering can move a switch table dispatch
5736 below the switch table. Check if that has happened.
5737 We only have the addresses available when optimizing; but then,
5738 this check shouldn't be needed when not optimizing. */
5739 if (!untangle_mova (&num_mova, &mova, insn))
5741 insn = mova;
5742 num_mova = 0;
5745 else if (JUMP_P (insn)
5746 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5747 && num_mova
5748 /* ??? loop invariant motion can also move a mova out of a
5749 loop. Since loop does this code motion anyway, maybe we
5750 should wrap UNSPEC_MOVA into a CONST, so that reload can
5751 move it back. */
5752 && ((num_mova > 1
5753 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5754 || (prev_nonnote_insn (insn)
5755 == XEXP (MOVA_LABELREF (mova), 0))))
5757 rtx scan;
5758 int total;
5760 num_mova--;
5762 /* Some code might have been inserted between the mova and
5763 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5764 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5765 total += get_attr_length (scan);
5767 /* range of mova is 1020, add 4 because pc counts from address of
5768 second instruction after this one, subtract 2 in case pc is 2
5769 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5770 cancels out with alignment effects of the mova itself. */
5771 if (total > 1022)
5773 /* Change the mova into a load, and restart scanning
5774 there. broken_move will then return true for mova. */
5775 fixup_mova (mova);
5776 insn = mova;
5779 if (broken_move (insn)
5780 || (NONJUMP_INSN_P (insn)
5781 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5783 rtx scan;
5784 /* Scan ahead looking for a barrier to stick the constant table
5785 behind. */
5786 rtx barrier = find_barrier (num_mova, mova, insn);
5787 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5788 int need_aligned_label = 0;
5790 if (num_mova && ! mova_p (mova))
5792 /* find_barrier had to change the first mova into a
5793 pcload; thus, we have to start with this new pcload. */
5794 insn = mova;
5795 num_mova = 0;
5797 /* Now find all the moves between the points and modify them. */
5798 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5800 if (LABEL_P (scan))
5801 last_float = 0;
5802 if (NONJUMP_INSN_P (scan)
5803 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5804 need_aligned_label = 1;
5805 if (broken_move (scan))
5807 rtx *patp = &PATTERN (scan), pat = *patp;
5808 rtx src, dst;
5809 rtx lab;
5810 rtx newsrc;
5811 enum machine_mode mode;
5813 if (GET_CODE (pat) == PARALLEL)
5814 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5815 src = SET_SRC (pat);
5816 dst = SET_DEST (pat);
5817 mode = GET_MODE (dst);
5819 if (mode == SImode && hi_const (src)
5820 && REGNO (dst) != FPUL_REG)
5822 int offset = 0;
5824 mode = HImode;
5825 while (GET_CODE (dst) == SUBREG)
5827 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5828 GET_MODE (SUBREG_REG (dst)),
5829 SUBREG_BYTE (dst),
5830 GET_MODE (dst));
5831 dst = SUBREG_REG (dst);
5833 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5835 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5837 /* This must be an insn that clobbers r0. */
5838 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5839 XVECLEN (PATTERN (scan), 0)
5840 - 1);
5841 rtx clobber = *clobberp;
5843 gcc_assert (GET_CODE (clobber) == CLOBBER
5844 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5846 if (last_float
5847 && reg_set_between_p (r0_rtx, last_float_move, scan))
5848 last_float = 0;
5849 if (last_float
5850 && TARGET_SHCOMPACT
5851 && GET_MODE_SIZE (mode) != 4
5852 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5853 last_float = 0;
5854 lab = add_constant (src, mode, last_float);
5855 if (lab)
5856 emit_insn_before (gen_mova (lab), scan);
5857 else
5859 /* There will be a REG_UNUSED note for r0 on
5860 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5861 lest reorg:mark_target_live_regs will not
5862 consider r0 to be used, and we end up with delay
5863 slot insn in front of SCAN that clobbers r0. */
5864 rtx note
5865 = find_regno_note (last_float_move, REG_UNUSED, 0);
5867 /* If we are not optimizing, then there may not be
5868 a note. */
5869 if (note)
5870 PUT_REG_NOTE_KIND (note, REG_INC);
5872 *last_float_addr = r0_inc_rtx;
5874 last_float_move = scan;
5875 last_float = src;
5876 newsrc = gen_const_mem (mode,
5877 (((TARGET_SH4 && ! TARGET_FMOVD)
5878 || REGNO (dst) == FPUL_REG)
5879 ? r0_inc_rtx
5880 : r0_rtx));
5881 last_float_addr = &XEXP (newsrc, 0);
5883 /* Remove the clobber of r0. */
5884 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5885 gen_rtx_SCRATCH (Pmode));
5887 /* This is a mova needing a label. Create it. */
5888 else if (GET_CODE (src) == UNSPEC
5889 && XINT (src, 1) == UNSPEC_MOVA
5890 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5892 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5893 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5894 newsrc = gen_rtx_UNSPEC (SImode,
5895 gen_rtvec (1, newsrc),
5896 UNSPEC_MOVA);
5898 else
5900 lab = add_constant (src, mode, 0);
5901 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5902 newsrc = gen_const_mem (mode, newsrc);
5904 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5905 INSN_CODE (scan) = -1;
5908 dump_table (need_aligned_label ? insn : 0, barrier);
5909 insn = barrier;
5912 free_alloc_pool (label_ref_list_pool);
5913 for (insn = first; insn; insn = NEXT_INSN (insn))
5914 PUT_MODE (insn, VOIDmode);
5916 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5917 INSN_ADDRESSES_FREE ();
5918 split_branches (first);
5920 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5921 also has an effect on the register that holds the address of the sfunc.
5922 Insert an extra dummy insn in front of each sfunc that pretends to
5923 use this register. */
5924 if (flag_delayed_branch)
5926 for (insn = first; insn; insn = NEXT_INSN (insn))
5928 rtx reg = sfunc_uses_reg (insn);
5930 if (! reg)
5931 continue;
5932 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5935 #if 0
5936 /* fpscr is not actually a user variable, but we pretend it is for the
5937 sake of the previous optimization passes, since we want it handled like
5938 one. However, we don't have any debugging information for it, so turn
5939 it into a non-user variable now. */
5940 if (TARGET_SH4)
5941 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5942 #endif
5943 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5947 get_dest_uid (rtx label, int max_uid)
5949 rtx dest = next_real_insn (label);
5950 int dest_uid;
5951 if (! dest)
5952 /* This can happen for an undefined label. */
5953 return 0;
5954 dest_uid = INSN_UID (dest);
5955 /* If this is a newly created branch redirection blocking instruction,
5956 we cannot index the branch_uid or insn_addresses arrays with its
5957 uid. But then, we won't need to, because the actual destination is
5958 the following branch. */
5959 while (dest_uid >= max_uid)
5961 dest = NEXT_INSN (dest);
5962 dest_uid = INSN_UID (dest);
5964 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5965 return 0;
5966 return dest_uid;
5969 /* Split condbranches that are out of range. Also add clobbers for
5970 scratch registers that are needed in far jumps.
5971 We do this before delay slot scheduling, so that it can take our
5972 newly created instructions into account. It also allows us to
5973 find branches with common targets more easily. */
5975 static void
5976 split_branches (rtx first)
5978 rtx insn;
5979 struct far_branch **uid_branch, *far_branch_list = 0;
5980 int max_uid = get_max_uid ();
5981 int ok;
5983 /* Find out which branches are out of range. */
5984 shorten_branches (first);
5986 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5987 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5989 for (insn = first; insn; insn = NEXT_INSN (insn))
5990 if (! INSN_P (insn))
5991 continue;
5992 else if (INSN_DELETED_P (insn))
5994 /* Shorten_branches would split this instruction again,
5995 so transform it into a note. */
5996 SET_INSN_DELETED (insn);
5998 else if (JUMP_P (insn)
5999 /* Don't mess with ADDR_DIFF_VEC */
6000 && (GET_CODE (PATTERN (insn)) == SET
6001 || GET_CODE (PATTERN (insn)) == RETURN))
6003 enum attr_type type = get_attr_type (insn);
6004 if (type == TYPE_CBRANCH)
6006 rtx next, beyond;
6008 if (get_attr_length (insn) > 4)
6010 rtx src = SET_SRC (PATTERN (insn));
6011 rtx olabel = XEXP (XEXP (src, 1), 0);
6012 int addr = INSN_ADDRESSES (INSN_UID (insn));
6013 rtx label = 0;
6014 int dest_uid = get_dest_uid (olabel, max_uid);
6015 struct far_branch *bp = uid_branch[dest_uid];
6017 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6018 the label if the LABEL_NUSES count drops to zero. There is
6019 always a jump_optimize pass that sets these values, but it
6020 proceeds to delete unreferenced code, and then if not
6021 optimizing, to un-delete the deleted instructions, thus
6022 leaving labels with too low uses counts. */
6023 if (! optimize)
6025 JUMP_LABEL (insn) = olabel;
6026 LABEL_NUSES (olabel)++;
6028 if (! bp)
6030 bp = (struct far_branch *) alloca (sizeof *bp);
6031 uid_branch[dest_uid] = bp;
6032 bp->prev = far_branch_list;
6033 far_branch_list = bp;
6034 bp->far_label
6035 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6036 LABEL_NUSES (bp->far_label)++;
6038 else
6040 label = bp->near_label;
6041 if (! label && bp->address - addr >= CONDJUMP_MIN)
6043 rtx block = bp->insert_place;
6045 if (GET_CODE (PATTERN (block)) == RETURN)
6046 block = PREV_INSN (block);
6047 else
6048 block = gen_block_redirect (block,
6049 bp->address, 2);
6050 label = emit_label_after (gen_label_rtx (),
6051 PREV_INSN (block));
6052 bp->near_label = label;
6054 else if (label && ! NEXT_INSN (label))
6056 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6057 bp->insert_place = insn;
6058 else
6059 gen_far_branch (bp);
6062 if (! label
6063 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6065 bp->near_label = label = gen_label_rtx ();
6066 bp->insert_place = insn;
6067 bp->address = addr;
6069 ok = redirect_jump (insn, label, 0);
6070 gcc_assert (ok);
6072 else
6074 /* get_attr_length (insn) == 2 */
6075 /* Check if we have a pattern where reorg wants to redirect
6076 the branch to a label from an unconditional branch that
6077 is too far away. */
6078 /* We can't use JUMP_LABEL here because it might be undefined
6079 when not optimizing. */
6080 /* A syntax error might cause beyond to be NULL_RTX. */
6081 beyond
6082 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6083 0));
6085 if (beyond
6086 && (JUMP_P (beyond)
6087 || ((beyond = next_active_insn (beyond))
6088 && JUMP_P (beyond)))
6089 && GET_CODE (PATTERN (beyond)) == SET
6090 && recog_memoized (beyond) == CODE_FOR_jump_compact
6091 && ((INSN_ADDRESSES
6092 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6093 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6094 > 252 + 258 + 2))
6095 gen_block_redirect (beyond,
6096 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6099 next = next_active_insn (insn);
6101 if (next
6102 && (JUMP_P (next)
6103 || ((next = next_active_insn (next))
6104 && JUMP_P (next)))
6105 && GET_CODE (PATTERN (next)) == SET
6106 && recog_memoized (next) == CODE_FOR_jump_compact
6107 && ((INSN_ADDRESSES
6108 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6109 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6110 > 252 + 258 + 2))
6111 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6113 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6115 int addr = INSN_ADDRESSES (INSN_UID (insn));
6116 rtx far_label = 0;
6117 int dest_uid = 0;
6118 struct far_branch *bp;
6120 if (type == TYPE_JUMP)
6122 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6123 dest_uid = get_dest_uid (far_label, max_uid);
6124 if (! dest_uid)
6126 /* Parse errors can lead to labels outside
6127 the insn stream. */
6128 if (! NEXT_INSN (far_label))
6129 continue;
6131 if (! optimize)
6133 JUMP_LABEL (insn) = far_label;
6134 LABEL_NUSES (far_label)++;
6136 redirect_jump (insn, NULL_RTX, 1);
6137 far_label = 0;
6140 bp = uid_branch[dest_uid];
6141 if (! bp)
6143 bp = (struct far_branch *) alloca (sizeof *bp);
6144 uid_branch[dest_uid] = bp;
6145 bp->prev = far_branch_list;
6146 far_branch_list = bp;
6147 bp->near_label = 0;
6148 bp->far_label = far_label;
6149 if (far_label)
6150 LABEL_NUSES (far_label)++;
6152 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6153 if (addr - bp->address <= CONDJUMP_MAX)
6154 emit_label_after (bp->near_label, PREV_INSN (insn));
6155 else
6157 gen_far_branch (bp);
6158 bp->near_label = 0;
6160 else
6161 bp->near_label = 0;
6162 bp->address = addr;
6163 bp->insert_place = insn;
6164 if (! far_label)
6165 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6166 else
6167 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6170 /* Generate all pending far branches,
6171 and free our references to the far labels. */
6172 while (far_branch_list)
6174 if (far_branch_list->near_label
6175 && ! NEXT_INSN (far_branch_list->near_label))
6176 gen_far_branch (far_branch_list);
6177 if (optimize
6178 && far_branch_list->far_label
6179 && ! --LABEL_NUSES (far_branch_list->far_label))
6180 delete_insn (far_branch_list->far_label);
6181 far_branch_list = far_branch_list->prev;
6184 /* Instruction length information is no longer valid due to the new
6185 instructions that have been generated. */
6186 init_insn_lengths ();
6189 /* Dump out instruction addresses, which is useful for debugging the
6190 constant pool table stuff.
6192 If relaxing, output the label and pseudo-ops used to link together
6193 calls and the instruction which set the registers. */
6195 /* ??? The addresses printed by this routine for insns are nonsense for
6196 insns which are inside of a sequence where none of the inner insns have
6197 variable length. This is because the second pass of shorten_branches
6198 does not bother to update them. */
6200 void
6201 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6202 int noperands ATTRIBUTE_UNUSED)
6204 if (TARGET_DUMPISIZE)
6205 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6207 if (TARGET_RELAX)
6209 rtx note;
6211 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6212 if (note)
6214 rtx pattern;
6216 pattern = PATTERN (insn);
6217 if (GET_CODE (pattern) == PARALLEL)
6218 pattern = XVECEXP (pattern, 0, 0);
6219 switch (GET_CODE (pattern))
6221 case SET:
6222 if (GET_CODE (SET_SRC (pattern)) != CALL
6223 && get_attr_type (insn) != TYPE_SFUNC)
6225 targetm.asm_out.internal_label
6226 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6227 break;
6229 /* else FALLTHROUGH */
6230 case CALL:
6231 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6232 CODE_LABEL_NUMBER (XEXP (note, 0)));
6233 break;
6235 default:
6236 gcc_unreachable ();
6242 /* Dump out any constants accumulated in the final pass. These will
6243 only be labels. */
6245 const char *
6246 output_jump_label_table (void)
6248 int i;
6250 if (pool_size)
6252 fprintf (asm_out_file, "\t.align 2\n");
6253 for (i = 0; i < pool_size; i++)
6255 pool_node *p = &pool_vector[i];
6257 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6258 CODE_LABEL_NUMBER (p->label));
6259 output_asm_insn (".long %O0", &p->value);
6261 pool_size = 0;
6264 return "";
6267 /* A full frame looks like:
6269 arg-5
6270 arg-4
6271 [ if current_function_anonymous_args
6272 arg-3
6273 arg-2
6274 arg-1
6275 arg-0 ]
6276 saved-fp
6277 saved-r10
6278 saved-r11
6279 saved-r12
6280 saved-pr
6281 local-n
6283 local-1
6284 local-0 <- fp points here. */
6286 /* Number of bytes pushed for anonymous args, used to pass information
6287 between expand_prologue and expand_epilogue. */
6289 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6290 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6291 for an epilogue and a negative value means that it's for a sibcall
6292 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6293 all the registers that are about to be restored, and hence dead. */
6295 static void
6296 output_stack_adjust (int size, rtx reg, int epilogue_p,
6297 HARD_REG_SET *live_regs_mask, bool frame_p)
6299 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6300 if (size)
6302 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6304 /* This test is bogus, as output_stack_adjust is used to re-align the
6305 stack. */
6306 #if 0
6307 gcc_assert (!(size % align));
6308 #endif
6310 if (CONST_OK_FOR_ADD (size))
6311 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6312 /* Try to do it with two partial adjustments; however, we must make
6313 sure that the stack is properly aligned at all times, in case
6314 an interrupt occurs between the two partial adjustments. */
6315 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6316 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6318 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6319 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6321 else
6323 rtx const_reg;
6324 rtx insn;
6325 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6326 int i;
6328 /* If TEMP is invalid, we could temporarily save a general
6329 register to MACL. However, there is currently no need
6330 to handle this case, so just die when we see it. */
6331 if (epilogue_p < 0
6332 || current_function_interrupt
6333 || ! call_really_used_regs[temp] || fixed_regs[temp])
6334 temp = -1;
6335 if (temp < 0 && ! current_function_interrupt
6336 && (TARGET_SHMEDIA || epilogue_p >= 0))
6338 HARD_REG_SET temps;
6339 COPY_HARD_REG_SET (temps, call_used_reg_set);
6340 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6341 if (epilogue_p > 0)
6343 int nreg = 0;
6344 if (crtl->return_rtx)
6346 enum machine_mode mode;
6347 mode = GET_MODE (crtl->return_rtx);
6348 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6349 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6351 for (i = 0; i < nreg; i++)
6352 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6353 if (crtl->calls_eh_return)
6355 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6356 for (i = 0; i <= 3; i++)
6357 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6360 if (TARGET_SHMEDIA && epilogue_p < 0)
6361 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6362 CLEAR_HARD_REG_BIT (temps, i);
6363 if (epilogue_p <= 0)
6365 for (i = FIRST_PARM_REG;
6366 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6367 CLEAR_HARD_REG_BIT (temps, i);
6368 if (cfun->static_chain_decl != NULL)
6369 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6371 temp = scavenge_reg (&temps);
6373 if (temp < 0 && live_regs_mask)
6375 HARD_REG_SET temps;
6377 COPY_HARD_REG_SET (temps, *live_regs_mask);
6378 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6379 temp = scavenge_reg (&temps);
6381 if (temp < 0)
6383 rtx adj_reg, tmp_reg, mem;
6385 /* If we reached here, the most likely case is the (sibcall)
6386 epilogue for non SHmedia. Put a special push/pop sequence
6387 for such case as the last resort. This looks lengthy but
6388 would not be problem because it seems to be very
6389 rare. */
6391 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6394 /* ??? There is still the slight possibility that r4 or
6395 r5 have been reserved as fixed registers or assigned
6396 as global registers, and they change during an
6397 interrupt. There are possible ways to handle this:
6399 - If we are adjusting the frame pointer (r14), we can do
6400 with a single temp register and an ordinary push / pop
6401 on the stack.
6402 - Grab any call-used or call-saved registers (i.e. not
6403 fixed or globals) for the temps we need. We might
6404 also grab r14 if we are adjusting the stack pointer.
6405 If we can't find enough available registers, issue
6406 a diagnostic and die - the user must have reserved
6407 way too many registers.
6408 But since all this is rather unlikely to happen and
6409 would require extra testing, we just die if r4 / r5
6410 are not available. */
6411 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6412 && !global_regs[4] && !global_regs[5]);
6414 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6415 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6416 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6417 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6418 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6419 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6420 emit_move_insn (mem, tmp_reg);
6421 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6422 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6423 emit_move_insn (mem, tmp_reg);
6424 emit_move_insn (reg, adj_reg);
6425 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6426 emit_move_insn (adj_reg, mem);
6427 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6428 emit_move_insn (tmp_reg, mem);
6429 /* Tell flow the insns that pop r4/r5 aren't dead. */
6430 emit_use (tmp_reg);
6431 emit_use (adj_reg);
6432 return;
6434 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6436 /* If SIZE is negative, subtract the positive value.
6437 This sometimes allows a constant pool entry to be shared
6438 between prologue and epilogue code. */
6439 if (size < 0)
6441 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6442 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6444 else
6446 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6447 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6449 if (! epilogue_p)
6450 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6451 gen_rtx_SET (VOIDmode, reg,
6452 gen_rtx_PLUS (SImode, reg,
6453 GEN_INT (size))));
6458 static rtx
6459 frame_insn (rtx x)
6461 x = emit_insn (x);
6462 RTX_FRAME_RELATED_P (x) = 1;
6463 return x;
6466 /* Output RTL to push register RN onto the stack. */
6468 static rtx
6469 push (int rn)
6471 rtx x;
6472 if (rn == FPUL_REG)
6473 x = gen_push_fpul ();
6474 else if (rn == FPSCR_REG)
6475 x = gen_push_fpscr ();
6476 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6477 && FP_OR_XD_REGISTER_P (rn))
6479 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6480 return NULL_RTX;
6481 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6483 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6484 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6485 else
6486 x = gen_push (gen_rtx_REG (SImode, rn));
6488 x = frame_insn (x);
6489 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6490 return x;
6493 /* Output RTL to pop register RN from the stack. */
6495 static void
6496 pop (int rn)
6498 rtx x;
6499 if (rn == FPUL_REG)
6500 x = gen_pop_fpul ();
6501 else if (rn == FPSCR_REG)
6502 x = gen_pop_fpscr ();
6503 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6504 && FP_OR_XD_REGISTER_P (rn))
6506 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6507 return;
6508 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6510 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6511 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6512 else
6513 x = gen_pop (gen_rtx_REG (SImode, rn));
6515 x = emit_insn (x);
6516 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6519 /* Generate code to push the regs specified in the mask. */
6521 static void
6522 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6524 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6525 int skip_fpscr = 0;
6527 /* Push PR last; this gives better latencies after the prologue, and
6528 candidates for the return delay slot when there are no general
6529 registers pushed. */
6530 for (; i < FIRST_PSEUDO_REGISTER; i++)
6532 /* If this is an interrupt handler, and the SZ bit varies,
6533 and we have to push any floating point register, we need
6534 to switch to the correct precision first. */
6535 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6536 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6538 HARD_REG_SET unsaved;
6540 push (FPSCR_REG);
6541 COMPL_HARD_REG_SET (unsaved, *mask);
6542 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6543 skip_fpscr = 1;
6545 if (i != PR_REG
6546 && (i != FPSCR_REG || ! skip_fpscr)
6547 && TEST_HARD_REG_BIT (*mask, i))
6549 /* If the ISR has RESBANK attribute assigned, don't push any of
6550 the following registers - R0-R14, MACH, MACL and GBR. */
6551 if (! (sh_cfun_resbank_handler_p ()
6552 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6553 || i == MACH_REG
6554 || i == MACL_REG
6555 || i == GBR_REG)))
6556 push (i);
6560 /* Push banked registers last to improve delay slot opportunities. */
6561 if (interrupt_handler)
6563 bool use_movml = false;
6565 if (TARGET_SH2A)
6567 unsigned int count = 0;
6569 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6570 if (TEST_HARD_REG_BIT (*mask, i))
6571 count++;
6572 else
6573 break;
6575 /* Use movml when all banked registers are pushed. */
6576 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6577 use_movml = true;
6580 if (use_movml)
6582 rtx x, mem, reg, set;
6583 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6585 /* We must avoid scheduling multiple store insn with another
6586 insns. */
6587 emit_insn (gen_blockage ());
6588 x = gen_movml_push_banked (sp_reg);
6589 x = frame_insn (x);
6590 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6592 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6593 reg = gen_rtx_REG (SImode, i);
6594 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6597 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6598 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6599 emit_insn (gen_blockage ());
6601 else
6602 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6603 if (TEST_HARD_REG_BIT (*mask, i))
6604 push (i);
6607 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6608 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6609 push (PR_REG);
6612 /* Calculate how much extra space is needed to save all callee-saved
6613 target registers.
6614 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6616 static int
6617 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6619 int reg;
6620 int stack_space = 0;
6621 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6623 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6624 if ((! call_really_used_regs[reg] || interrupt_handler)
6625 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6626 /* Leave space to save this target register on the stack,
6627 in case target register allocation wants to use it. */
6628 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6629 return stack_space;
6632 /* Decide whether we should reserve space for callee-save target registers,
6633 in case target register allocation wants to use them. REGS_SAVED is
6634 the space, in bytes, that is already required for register saves.
6635 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6637 static int
6638 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6639 HARD_REG_SET *live_regs_mask)
6641 if (optimize_size)
6642 return 0;
6643 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6646 /* Decide how much space to reserve for callee-save target registers
6647 in case target register allocation wants to use them.
6648 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6650 static int
6651 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6653 if (shmedia_space_reserved_for_target_registers)
6654 return shmedia_target_regs_stack_space (live_regs_mask);
6655 else
6656 return 0;
6659 /* Work out the registers which need to be saved, both as a mask and a
6660 count of saved words. Return the count.
6662 If doing a pragma interrupt function, then push all regs used by the
6663 function, and if we call another function (we can tell by looking at PR),
6664 make sure that all the regs it clobbers are safe too. */
6666 static int
6667 calc_live_regs (HARD_REG_SET *live_regs_mask)
6669 unsigned int reg;
6670 int count;
6671 tree attrs;
6672 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6673 bool nosave_low_regs;
6674 int pr_live, has_call;
6676 attrs = DECL_ATTRIBUTES (current_function_decl);
6677 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6678 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6679 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6680 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6682 CLEAR_HARD_REG_SET (*live_regs_mask);
6683 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6684 && df_regs_ever_live_p (FPSCR_REG))
6685 target_flags &= ~MASK_FPU_SINGLE;
6686 /* If we can save a lot of saves by switching to double mode, do that. */
6687 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6688 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6689 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6690 && (! call_really_used_regs[reg]
6691 || interrupt_handler)
6692 && ++count > 2)
6694 target_flags &= ~MASK_FPU_SINGLE;
6695 break;
6697 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6698 knows how to use it. That means the pseudo originally allocated for
6699 the initial value can become the PR_MEDIA_REG hard register, as seen for
6700 execute/20010122-1.c:test9. */
6701 if (TARGET_SHMEDIA)
6702 /* ??? this function is called from initial_elimination_offset, hence we
6703 can't use the result of sh_media_register_for_return here. */
6704 pr_live = sh_pr_n_sets ();
6705 else
6707 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6708 pr_live = (pr_initial
6709 ? (!REG_P (pr_initial)
6710 || REGNO (pr_initial) != (PR_REG))
6711 : df_regs_ever_live_p (PR_REG));
6712 /* For Shcompact, if not optimizing, we end up with a memory reference
6713 using the return address pointer for __builtin_return_address even
6714 though there is no actual need to put the PR register on the stack. */
6715 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6717 /* Force PR to be live if the prologue has to call the SHmedia
6718 argument decoder or register saver. */
6719 if (TARGET_SHCOMPACT
6720 && ((crtl->args.info.call_cookie
6721 & ~ CALL_COOKIE_RET_TRAMP (1))
6722 || crtl->saves_all_registers))
6723 pr_live = 1;
6724 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6725 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6727 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6728 ? pr_live
6729 : interrupt_handler
6730 ? (/* Need to save all the regs ever live. */
6731 (df_regs_ever_live_p (reg)
6732 || (call_really_used_regs[reg]
6733 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6734 || reg == PIC_OFFSET_TABLE_REGNUM)
6735 && has_call)
6736 || (TARGET_SHMEDIA && has_call
6737 && REGISTER_NATURAL_MODE (reg) == SImode
6738 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6739 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6740 && reg != RETURN_ADDRESS_POINTER_REGNUM
6741 && reg != T_REG && reg != GBR_REG
6742 /* Push fpscr only on targets which have FPU */
6743 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6744 : (/* Only push those regs which are used and need to be saved. */
6745 (TARGET_SHCOMPACT
6746 && flag_pic
6747 && crtl->args.info.call_cookie
6748 && reg == PIC_OFFSET_TABLE_REGNUM)
6749 || (df_regs_ever_live_p (reg)
6750 && ((!call_really_used_regs[reg]
6751 && !(reg != PIC_OFFSET_TABLE_REGNUM
6752 && fixed_regs[reg] && call_used_regs[reg]))
6753 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6754 || (crtl->calls_eh_return
6755 && (reg == EH_RETURN_DATA_REGNO (0)
6756 || reg == EH_RETURN_DATA_REGNO (1)
6757 || reg == EH_RETURN_DATA_REGNO (2)
6758 || reg == EH_RETURN_DATA_REGNO (3)))
6759 || ((reg == MACL_REG || reg == MACH_REG)
6760 && df_regs_ever_live_p (reg)
6761 && sh_cfun_attr_renesas_p ())
6764 SET_HARD_REG_BIT (*live_regs_mask, reg);
6765 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6767 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6768 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6770 if (FP_REGISTER_P (reg))
6772 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6774 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6775 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6778 else if (XD_REGISTER_P (reg))
6780 /* Must switch to double mode to access these registers. */
6781 target_flags &= ~MASK_FPU_SINGLE;
6785 if (nosave_low_regs && reg == R8_REG)
6786 break;
6788 /* If we have a target register optimization pass after prologue / epilogue
6789 threading, we need to assume all target registers will be live even if
6790 they aren't now. */
6791 if (flag_branch_target_load_optimize2
6792 && TARGET_SAVE_ALL_TARGET_REGS
6793 && shmedia_space_reserved_for_target_registers)
6794 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6795 if ((! call_really_used_regs[reg] || interrupt_handler)
6796 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6798 SET_HARD_REG_BIT (*live_regs_mask, reg);
6799 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6801 /* If this is an interrupt handler, we don't have any call-clobbered
6802 registers we can conveniently use for target register save/restore.
6803 Make sure we save at least one general purpose register when we need
6804 to save target registers. */
6805 if (interrupt_handler
6806 && hard_reg_set_intersect_p (*live_regs_mask,
6807 reg_class_contents[TARGET_REGS])
6808 && ! hard_reg_set_intersect_p (*live_regs_mask,
6809 reg_class_contents[GENERAL_REGS]))
6811 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6812 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6815 return count;
6818 /* Code to generate prologue and epilogue sequences */
6820 /* PUSHED is the number of bytes that are being pushed on the
6821 stack for register saves. Return the frame size, padded
6822 appropriately so that the stack stays properly aligned. */
6823 static HOST_WIDE_INT
6824 rounded_frame_size (int pushed)
6826 HOST_WIDE_INT size = get_frame_size ();
6827 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6829 if (ACCUMULATE_OUTGOING_ARGS)
6830 size += crtl->outgoing_args_size;
6832 return ((size + pushed + align - 1) & -align) - pushed;
6835 /* Choose a call-clobbered target-branch register that remains
6836 unchanged along the whole function. We set it up as the return
6837 value in the prologue. */
6839 sh_media_register_for_return (void)
6841 int regno;
6842 int tr0_used;
6844 if (! current_function_is_leaf)
6845 return -1;
6846 if (lookup_attribute ("interrupt_handler",
6847 DECL_ATTRIBUTES (current_function_decl)))
6848 return -1;
6849 if (sh_cfun_interrupt_handler_p ())
6850 return -1;
6852 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6854 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6855 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6856 return regno;
6858 return -1;
6861 /* The maximum registers we need to save are:
6862 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6863 - 32 floating point registers (for each pair, we save none,
6864 one single precision value, or a double precision value).
6865 - 8 target registers
6866 - add 1 entry for a delimiter. */
6867 #define MAX_SAVED_REGS (62+32+8)
6869 typedef struct save_entry_s
6871 unsigned char reg;
6872 unsigned char mode;
6873 short offset;
6874 } save_entry;
6876 #define MAX_TEMPS 4
6878 /* There will be a delimiter entry with VOIDmode both at the start and the
6879 end of a filled in schedule. The end delimiter has the offset of the
6880 save with the smallest (i.e. most negative) offset. */
6881 typedef struct save_schedule_s
6883 save_entry entries[MAX_SAVED_REGS + 2];
6884 int temps[MAX_TEMPS+1];
6885 } save_schedule;
6887 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6888 use reverse order. Returns the last entry written to (not counting
6889 the delimiter). OFFSET_BASE is a number to be added to all offset
6890 entries. */
6892 static save_entry *
6893 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6894 int offset_base)
6896 int align, i;
6897 save_entry *entry = schedule->entries;
6898 int tmpx = 0;
6899 int offset;
6901 if (! current_function_interrupt)
6902 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6903 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6904 && ! FUNCTION_ARG_REGNO_P (i)
6905 && i != FIRST_RET_REG
6906 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6907 && ! (crtl->calls_eh_return
6908 && (i == EH_RETURN_STACKADJ_REGNO
6909 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6910 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6911 schedule->temps[tmpx++] = i;
6912 entry->reg = -1;
6913 entry->mode = VOIDmode;
6914 entry->offset = offset_base;
6915 entry++;
6916 /* We loop twice: first, we save 8-byte aligned registers in the
6917 higher addresses, that are known to be aligned. Then, we
6918 proceed to saving 32-bit registers that don't need 8-byte
6919 alignment.
6920 If this is an interrupt function, all registers that need saving
6921 need to be saved in full. moreover, we need to postpone saving
6922 target registers till we have saved some general purpose registers
6923 we can then use as scratch registers. */
6924 offset = offset_base;
6925 for (align = 1; align >= 0; align--)
6927 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6928 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6930 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6931 int reg = i;
6933 if (current_function_interrupt)
6935 if (TARGET_REGISTER_P (i))
6936 continue;
6937 if (GENERAL_REGISTER_P (i))
6938 mode = DImode;
6940 if (mode == SFmode && (i % 2) == 1
6941 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6942 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6944 mode = DFmode;
6945 i--;
6946 reg--;
6949 /* If we're doing the aligned pass and this is not aligned,
6950 or we're doing the unaligned pass and this is aligned,
6951 skip it. */
6952 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6953 != align)
6954 continue;
6956 if (current_function_interrupt
6957 && GENERAL_REGISTER_P (i)
6958 && tmpx < MAX_TEMPS)
6959 schedule->temps[tmpx++] = i;
6961 offset -= GET_MODE_SIZE (mode);
6962 entry->reg = i;
6963 entry->mode = mode;
6964 entry->offset = offset;
6965 entry++;
6967 if (align && current_function_interrupt)
6968 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6969 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6971 offset -= GET_MODE_SIZE (DImode);
6972 entry->reg = i;
6973 entry->mode = DImode;
6974 entry->offset = offset;
6975 entry++;
6978 entry->reg = -1;
6979 entry->mode = VOIDmode;
6980 entry->offset = offset;
6981 schedule->temps[tmpx] = -1;
6982 return entry - 1;
6985 void
6986 sh_expand_prologue (void)
6988 HARD_REG_SET live_regs_mask;
6989 int d, i;
6990 int d_rounding = 0;
6991 int save_flags = target_flags;
6992 int pretend_args;
6993 int stack_usage;
6994 tree sp_switch_attr
6995 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6997 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6999 /* We have pretend args if we had an object sent partially in registers
7000 and partially on the stack, e.g. a large structure. */
7001 pretend_args = crtl->args.pretend_args_size;
7002 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7003 && (NPARM_REGS(SImode)
7004 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7005 pretend_args = 0;
7007 output_stack_adjust (-pretend_args
7008 - crtl->args.info.stack_regs * 8,
7009 stack_pointer_rtx, 0, NULL, true);
7010 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7012 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7013 /* We're going to use the PIC register to load the address of the
7014 incoming-argument decoder and/or of the return trampoline from
7015 the GOT, so make sure the PIC register is preserved and
7016 initialized. */
7017 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7019 if (TARGET_SHCOMPACT
7020 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7022 int reg;
7024 /* First, make all registers with incoming arguments that will
7025 be pushed onto the stack live, so that register renaming
7026 doesn't overwrite them. */
7027 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7028 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7029 >= NPARM_REGS (SImode) - reg)
7030 for (; reg < NPARM_REGS (SImode); reg++)
7031 emit_insn (gen_shcompact_preserve_incoming_args
7032 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7033 else if (CALL_COOKIE_INT_REG_GET
7034 (crtl->args.info.call_cookie, reg) == 1)
7035 emit_insn (gen_shcompact_preserve_incoming_args
7036 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7038 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7039 stack_pointer_rtx);
7040 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7041 GEN_INT (crtl->args.info.call_cookie));
7042 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7043 gen_rtx_REG (SImode, R0_REG));
7045 else if (TARGET_SHMEDIA)
7047 int tr = sh_media_register_for_return ();
7049 if (tr >= 0)
7050 emit_move_insn (gen_rtx_REG (DImode, tr),
7051 gen_rtx_REG (DImode, PR_MEDIA_REG));
7054 /* Emit the code for SETUP_VARARGS. */
7055 if (cfun->stdarg)
7057 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7059 /* Push arg regs as if they'd been provided by caller in stack. */
7060 for (i = 0; i < NPARM_REGS(SImode); i++)
7062 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7064 if (i >= (NPARM_REGS(SImode)
7065 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7067 break;
7068 push (rn);
7069 stack_usage += GET_MODE_SIZE (SImode);
7074 /* If we're supposed to switch stacks at function entry, do so now. */
7075 if (sp_switch_attr)
7077 rtx lab, newsrc;
7078 /* The argument specifies a variable holding the address of the
7079 stack the interrupt function should switch to/from at entry/exit. */
7080 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7081 const char *s
7082 = ggc_strdup (TREE_STRING_POINTER (arg));
7083 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7085 lab = add_constant (sp_switch, SImode, 0);
7086 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7087 newsrc = gen_const_mem (SImode, newsrc);
7089 emit_insn (gen_sp_switch_1 (newsrc));
7092 d = calc_live_regs (&live_regs_mask);
7093 /* ??? Maybe we could save some switching if we can move a mode switch
7094 that already happens to be at the function start into the prologue. */
7095 if (target_flags != save_flags && ! current_function_interrupt)
7096 emit_insn (gen_toggle_sz ());
7098 if (TARGET_SH5)
7100 int offset_base, offset;
7101 rtx r0 = NULL_RTX;
7102 int offset_in_r0 = -1;
7103 int sp_in_r0 = 0;
7104 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7105 int total_size, save_size;
7106 save_schedule schedule;
7107 save_entry *entry;
7108 int *tmp_pnt;
7110 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7111 && ! current_function_interrupt)
7112 r0 = gen_rtx_REG (Pmode, R0_REG);
7114 /* D is the actual number of bytes that we need for saving registers,
7115 however, in initial_elimination_offset we have committed to using
7116 an additional TREGS_SPACE amount of bytes - in order to keep both
7117 addresses to arguments supplied by the caller and local variables
7118 valid, we must keep this gap. Place it between the incoming
7119 arguments and the actually saved registers in a bid to optimize
7120 locality of reference. */
7121 total_size = d + tregs_space;
7122 total_size += rounded_frame_size (total_size);
7123 save_size = total_size - rounded_frame_size (d);
7124 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7125 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7126 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7128 /* If adjusting the stack in a single step costs nothing extra, do so.
7129 I.e. either if a single addi is enough, or we need a movi anyway,
7130 and we don't exceed the maximum offset range (the test for the
7131 latter is conservative for simplicity). */
7132 if (TARGET_SHMEDIA
7133 && (CONST_OK_FOR_I10 (-total_size)
7134 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7135 && total_size <= 2044)))
7136 d_rounding = total_size - save_size;
7138 offset_base = d + d_rounding;
7140 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7141 0, NULL, true);
7142 stack_usage += save_size + d_rounding;
7144 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7145 tmp_pnt = schedule.temps;
7146 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7148 enum machine_mode mode = (enum machine_mode) entry->mode;
7149 unsigned int reg = entry->reg;
7150 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7151 rtx orig_reg_rtx;
7153 offset = entry->offset;
7155 reg_rtx = gen_rtx_REG (mode, reg);
7157 mem_rtx = gen_frame_mem (mode,
7158 gen_rtx_PLUS (Pmode,
7159 stack_pointer_rtx,
7160 GEN_INT (offset)));
7162 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7164 gcc_assert (r0);
7165 mem_rtx = NULL_RTX;
7168 if (HAVE_PRE_DECREMENT
7169 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7170 || mem_rtx == NULL_RTX
7171 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7173 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7175 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7176 pre_dec = NULL_RTX;
7177 else
7179 mem_rtx = NULL_RTX;
7180 offset += GET_MODE_SIZE (mode);
7184 if (mem_rtx != NULL_RTX)
7185 goto addr_ok;
7187 if (offset_in_r0 == -1)
7189 emit_move_insn (r0, GEN_INT (offset));
7190 offset_in_r0 = offset;
7192 else if (offset != offset_in_r0)
7194 emit_move_insn (r0,
7195 gen_rtx_PLUS
7196 (Pmode, r0,
7197 GEN_INT (offset - offset_in_r0)));
7198 offset_in_r0 += offset - offset_in_r0;
7201 if (pre_dec != NULL_RTX)
7203 if (! sp_in_r0)
7205 emit_move_insn (r0,
7206 gen_rtx_PLUS
7207 (Pmode, r0, stack_pointer_rtx));
7208 sp_in_r0 = 1;
7211 offset -= GET_MODE_SIZE (mode);
7212 offset_in_r0 -= GET_MODE_SIZE (mode);
7214 mem_rtx = pre_dec;
7216 else if (sp_in_r0)
7217 mem_rtx = gen_frame_mem (mode, r0);
7218 else
7219 mem_rtx = gen_frame_mem (mode,
7220 gen_rtx_PLUS (Pmode,
7221 stack_pointer_rtx,
7222 r0));
7224 /* We must not use an r0-based address for target-branch
7225 registers or for special registers without pre-dec
7226 memory addresses, since we store their values in r0
7227 first. */
7228 gcc_assert (!TARGET_REGISTER_P (reg)
7229 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7230 || mem_rtx == pre_dec));
7232 addr_ok:
7233 orig_reg_rtx = reg_rtx;
7234 if (TARGET_REGISTER_P (reg)
7235 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7236 && mem_rtx != pre_dec))
7238 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7240 emit_move_insn (tmp_reg, reg_rtx);
7242 if (REGNO (tmp_reg) == R0_REG)
7244 offset_in_r0 = -1;
7245 sp_in_r0 = 0;
7246 gcc_assert (!refers_to_regno_p
7247 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7250 if (*++tmp_pnt <= 0)
7251 tmp_pnt = schedule.temps;
7253 reg_rtx = tmp_reg;
7256 rtx insn;
7258 /* Mark as interesting for dwarf cfi generator */
7259 insn = emit_move_insn (mem_rtx, reg_rtx);
7260 RTX_FRAME_RELATED_P (insn) = 1;
7261 /* If we use an intermediate register for the save, we can't
7262 describe this exactly in cfi as a copy of the to-be-saved
7263 register into the temporary register and then the temporary
7264 register on the stack, because the temporary register can
7265 have a different natural size than the to-be-saved register.
7266 Thus, we gloss over the intermediate copy and pretend we do
7267 a direct save from the to-be-saved register. */
7268 if (REGNO (reg_rtx) != reg)
7270 rtx set;
7272 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7273 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7276 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7278 rtx reg_rtx = gen_rtx_REG (mode, reg);
7279 rtx set;
7280 rtx mem_rtx = gen_frame_mem (mode,
7281 gen_rtx_PLUS (Pmode,
7282 stack_pointer_rtx,
7283 GEN_INT (offset)));
7285 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7286 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7291 gcc_assert (entry->offset == d_rounding);
7293 else
7295 push_regs (&live_regs_mask, current_function_interrupt);
7296 stack_usage += d;
7299 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7300 emit_insn (gen_GOTaddr2picreg ());
7302 if (SHMEDIA_REGS_STACK_ADJUST ())
7304 /* This must NOT go through the PLT, otherwise mach and macl
7305 may be clobbered. */
7306 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7307 (TARGET_FPU_ANY
7308 ? "__GCC_push_shmedia_regs"
7309 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7310 emit_insn (gen_shmedia_save_restore_regs_compact
7311 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7314 if (target_flags != save_flags && ! current_function_interrupt)
7315 emit_insn (gen_toggle_sz ());
7317 target_flags = save_flags;
7319 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7320 stack_pointer_rtx, 0, NULL, true);
7321 stack_usage += rounded_frame_size (d) - d_rounding;
7323 if (frame_pointer_needed)
7324 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7326 if (TARGET_SHCOMPACT
7327 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7329 /* This must NOT go through the PLT, otherwise mach and macl
7330 may be clobbered. */
7331 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7332 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7333 emit_insn (gen_shcompact_incoming_args ());
7336 if (flag_stack_usage)
7337 current_function_static_stack_size = stack_usage;
7340 void
7341 sh_expand_epilogue (bool sibcall_p)
7343 HARD_REG_SET live_regs_mask;
7344 int d, i;
7345 int d_rounding = 0;
7347 int save_flags = target_flags;
7348 int frame_size, save_size;
7349 int fpscr_deferred = 0;
7350 int e = sibcall_p ? -1 : 1;
7352 d = calc_live_regs (&live_regs_mask);
7354 save_size = d;
7355 frame_size = rounded_frame_size (d);
7357 if (TARGET_SH5)
7359 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7360 int total_size;
7361 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7362 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7363 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7365 total_size = d + tregs_space;
7366 total_size += rounded_frame_size (total_size);
7367 save_size = total_size - frame_size;
7369 /* If adjusting the stack in a single step costs nothing extra, do so.
7370 I.e. either if a single addi is enough, or we need a movi anyway,
7371 and we don't exceed the maximum offset range (the test for the
7372 latter is conservative for simplicity). */
7373 if (TARGET_SHMEDIA
7374 && ! frame_pointer_needed
7375 && (CONST_OK_FOR_I10 (total_size)
7376 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7377 && total_size <= 2044)))
7378 d_rounding = frame_size;
7380 frame_size -= d_rounding;
7383 if (frame_pointer_needed)
7385 /* We must avoid scheduling the epilogue with previous basic blocks.
7386 See PR/18032 and PR/40313. */
7387 emit_insn (gen_blockage ());
7388 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7389 &live_regs_mask, false);
7391 /* We must avoid moving the stack pointer adjustment past code
7392 which reads from the local frame, else an interrupt could
7393 occur after the SP adjustment and clobber data in the local
7394 frame. */
7395 emit_insn (gen_blockage ());
7396 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7398 else if (frame_size)
7400 /* We must avoid moving the stack pointer adjustment past code
7401 which reads from the local frame, else an interrupt could
7402 occur after the SP adjustment and clobber data in the local
7403 frame. */
7404 emit_insn (gen_blockage ());
7405 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7406 &live_regs_mask, false);
7409 if (SHMEDIA_REGS_STACK_ADJUST ())
7411 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7412 (TARGET_FPU_ANY
7413 ? "__GCC_pop_shmedia_regs"
7414 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7415 /* This must NOT go through the PLT, otherwise mach and macl
7416 may be clobbered. */
7417 emit_insn (gen_shmedia_save_restore_regs_compact
7418 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7421 /* Pop all the registers. */
7423 if (target_flags != save_flags && ! current_function_interrupt)
7424 emit_insn (gen_toggle_sz ());
7425 if (TARGET_SH5)
7427 int offset_base, offset;
7428 int offset_in_r0 = -1;
7429 int sp_in_r0 = 0;
7430 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7431 save_schedule schedule;
7432 save_entry *entry;
7433 int *tmp_pnt;
7435 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7436 offset_base = -entry[1].offset + d_rounding;
7437 tmp_pnt = schedule.temps;
7438 for (; entry->mode != VOIDmode; entry--)
7440 enum machine_mode mode = (enum machine_mode) entry->mode;
7441 int reg = entry->reg;
7442 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7444 offset = offset_base + entry->offset;
7445 reg_rtx = gen_rtx_REG (mode, reg);
7447 mem_rtx = gen_frame_mem (mode,
7448 gen_rtx_PLUS (Pmode,
7449 stack_pointer_rtx,
7450 GEN_INT (offset)));
7452 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7453 mem_rtx = NULL_RTX;
7455 if (HAVE_POST_INCREMENT
7456 && (offset == offset_in_r0
7457 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7458 && mem_rtx == NULL_RTX)
7459 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7461 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7463 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7464 post_inc = NULL_RTX;
7465 else
7466 mem_rtx = NULL_RTX;
7469 if (mem_rtx != NULL_RTX)
7470 goto addr_ok;
7472 if (offset_in_r0 == -1)
7474 emit_move_insn (r0, GEN_INT (offset));
7475 offset_in_r0 = offset;
7477 else if (offset != offset_in_r0)
7479 emit_move_insn (r0,
7480 gen_rtx_PLUS
7481 (Pmode, r0,
7482 GEN_INT (offset - offset_in_r0)));
7483 offset_in_r0 += offset - offset_in_r0;
7486 if (post_inc != NULL_RTX)
7488 if (! sp_in_r0)
7490 emit_move_insn (r0,
7491 gen_rtx_PLUS
7492 (Pmode, r0, stack_pointer_rtx));
7493 sp_in_r0 = 1;
7496 mem_rtx = post_inc;
7498 offset_in_r0 += GET_MODE_SIZE (mode);
7500 else if (sp_in_r0)
7501 mem_rtx = gen_frame_mem (mode, r0);
7502 else
7503 mem_rtx = gen_frame_mem (mode,
7504 gen_rtx_PLUS (Pmode,
7505 stack_pointer_rtx,
7506 r0));
7508 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7509 || mem_rtx == post_inc);
7511 addr_ok:
7512 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7513 && mem_rtx != post_inc)
7515 emit_move_insn (r0, mem_rtx);
7516 mem_rtx = r0;
7518 else if (TARGET_REGISTER_P (reg))
7520 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7522 /* Give the scheduler a bit of freedom by using up to
7523 MAX_TEMPS registers in a round-robin fashion. */
7524 emit_move_insn (tmp_reg, mem_rtx);
7525 mem_rtx = tmp_reg;
7526 if (*++tmp_pnt < 0)
7527 tmp_pnt = schedule.temps;
7530 emit_move_insn (reg_rtx, mem_rtx);
7533 gcc_assert (entry->offset + offset_base == d + d_rounding);
7535 else /* ! TARGET_SH5 */
7537 int last_reg;
7539 save_size = 0;
7540 /* For an ISR with RESBANK attribute assigned, don't pop PR
7541 register. */
7542 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7543 && !sh_cfun_resbank_handler_p ())
7545 if (!frame_pointer_needed)
7546 emit_insn (gen_blockage ());
7547 pop (PR_REG);
7550 /* Banked registers are popped first to avoid being scheduled in the
7551 delay slot. RTE switches banks before the ds instruction. */
7552 if (current_function_interrupt)
7554 bool use_movml = false;
7556 if (TARGET_SH2A)
7558 unsigned int count = 0;
7560 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7561 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7562 count++;
7563 else
7564 break;
7566 /* Use movml when all banked register are poped. */
7567 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7568 use_movml = true;
7571 if (use_movml)
7573 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7575 /* We must avoid scheduling multiple load insn with another
7576 insns. */
7577 emit_insn (gen_blockage ());
7578 emit_insn (gen_movml_pop_banked (sp_reg));
7579 emit_insn (gen_blockage ());
7581 else
7582 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7583 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7584 pop (i);
7586 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7588 else
7589 last_reg = FIRST_PSEUDO_REGISTER;
7591 for (i = 0; i < last_reg; i++)
7593 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7595 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7596 && hard_reg_set_intersect_p (live_regs_mask,
7597 reg_class_contents[DF_REGS]))
7598 fpscr_deferred = 1;
7599 /* For an ISR with RESBANK attribute assigned, don't pop
7600 following registers, R0-R14, MACH, MACL and GBR. */
7601 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7602 && ! (sh_cfun_resbank_handler_p ()
7603 && ((j >= FIRST_GENERAL_REG
7604 && j < LAST_GENERAL_REG)
7605 || j == MACH_REG
7606 || j == MACL_REG
7607 || j == GBR_REG)))
7608 pop (j);
7610 if (j == FIRST_FP_REG && fpscr_deferred)
7611 pop (FPSCR_REG);
7614 if (target_flags != save_flags && ! current_function_interrupt)
7615 emit_insn (gen_toggle_sz ());
7616 target_flags = save_flags;
7618 output_stack_adjust (crtl->args.pretend_args_size
7619 + save_size + d_rounding
7620 + crtl->args.info.stack_regs * 8,
7621 stack_pointer_rtx, e, NULL, false);
7623 if (crtl->calls_eh_return)
7624 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7625 EH_RETURN_STACKADJ_RTX));
7627 /* Switch back to the normal stack if necessary. */
7628 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7629 emit_insn (gen_sp_switch_2 ());
7631 /* Tell flow the insn that pops PR isn't dead. */
7632 /* PR_REG will never be live in SHmedia mode, and we don't need to
7633 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7634 by the return pattern. */
7635 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7636 emit_use (gen_rtx_REG (SImode, PR_REG));
7639 static int sh_need_epilogue_known = 0;
7642 sh_need_epilogue (void)
7644 if (! sh_need_epilogue_known)
7646 rtx epilogue;
7648 start_sequence ();
7649 sh_expand_epilogue (0);
7650 epilogue = get_insns ();
7651 end_sequence ();
7652 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7654 return sh_need_epilogue_known > 0;
7657 /* Emit code to change the current function's return address to RA.
7658 TEMP is available as a scratch register, if needed. */
7660 void
7661 sh_set_return_address (rtx ra, rtx tmp)
7663 HARD_REG_SET live_regs_mask;
7664 int d;
7665 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7666 int pr_offset;
7668 d = calc_live_regs (&live_regs_mask);
7670 /* If pr_reg isn't life, we can set it (or the register given in
7671 sh_media_register_for_return) directly. */
7672 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7674 rtx rr;
7676 if (TARGET_SHMEDIA)
7678 int rr_regno = sh_media_register_for_return ();
7680 if (rr_regno < 0)
7681 rr_regno = pr_reg;
7683 rr = gen_rtx_REG (DImode, rr_regno);
7685 else
7686 rr = gen_rtx_REG (SImode, pr_reg);
7688 emit_insn (GEN_MOV (rr, ra));
7689 /* Tell flow the register for return isn't dead. */
7690 emit_use (rr);
7691 return;
7694 if (TARGET_SH5)
7696 int offset;
7697 save_schedule schedule;
7698 save_entry *entry;
7700 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7701 offset = entry[1].offset;
7702 for (; entry->mode != VOIDmode; entry--)
7703 if (entry->reg == pr_reg)
7704 goto found;
7706 /* We can't find pr register. */
7707 gcc_unreachable ();
7709 found:
7710 offset = entry->offset - offset;
7711 pr_offset = (rounded_frame_size (d) + offset
7712 + SHMEDIA_REGS_STACK_ADJUST ());
7714 else
7715 pr_offset = rounded_frame_size (d);
7717 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7719 if (frame_pointer_needed)
7720 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7721 else
7722 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7724 tmp = gen_frame_mem (Pmode, tmp);
7725 emit_insn (GEN_MOV (tmp, ra));
7726 /* Tell this store isn't dead. */
7727 emit_use (tmp);
7730 /* Clear variables at function end. */
7732 static void
7733 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7734 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7736 sh_need_epilogue_known = 0;
7739 static rtx
7740 sh_builtin_saveregs (void)
7742 /* First unnamed integer register. */
7743 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7744 /* Number of integer registers we need to save. */
7745 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7746 /* First unnamed SFmode float reg */
7747 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7748 /* Number of SFmode float regs to save. */
7749 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7750 rtx regbuf, fpregs;
7751 int bufsize, regno;
7752 alias_set_type alias_set;
7754 if (TARGET_SH5)
7756 if (n_intregs)
7758 int pushregs = n_intregs;
7760 while (pushregs < NPARM_REGS (SImode) - 1
7761 && (CALL_COOKIE_INT_REG_GET
7762 (crtl->args.info.call_cookie,
7763 NPARM_REGS (SImode) - pushregs)
7764 == 1))
7766 crtl->args.info.call_cookie
7767 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7768 - pushregs, 1);
7769 pushregs++;
7772 if (pushregs == NPARM_REGS (SImode))
7773 crtl->args.info.call_cookie
7774 |= (CALL_COOKIE_INT_REG (0, 1)
7775 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7776 else
7777 crtl->args.info.call_cookie
7778 |= CALL_COOKIE_STACKSEQ (pushregs);
7780 crtl->args.pretend_args_size += 8 * n_intregs;
7782 if (TARGET_SHCOMPACT)
7783 return const0_rtx;
7786 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7788 error ("__builtin_saveregs not supported by this subtarget");
7789 return const0_rtx;
7792 if (TARGET_SHMEDIA)
7793 n_floatregs = 0;
7795 /* Allocate block of memory for the regs. */
7796 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7797 Or can assign_stack_local accept a 0 SIZE argument? */
7798 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7800 if (TARGET_SHMEDIA)
7801 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7802 else if (n_floatregs & 1)
7804 rtx addr;
7806 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7807 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7808 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7809 regbuf = change_address (regbuf, BLKmode, addr);
7811 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7813 rtx addr, mask;
7815 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7816 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7817 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7818 emit_insn (gen_andsi3 (addr, addr, mask));
7819 regbuf = change_address (regbuf, BLKmode, addr);
7821 else
7822 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7823 alias_set = get_varargs_alias_set ();
7824 set_mem_alias_set (regbuf, alias_set);
7826 /* Save int args.
7827 This is optimized to only save the regs that are necessary. Explicitly
7828 named args need not be saved. */
7829 if (n_intregs > 0)
7830 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7831 adjust_address (regbuf, BLKmode,
7832 n_floatregs * UNITS_PER_WORD),
7833 n_intregs);
7835 if (TARGET_SHMEDIA)
7836 /* Return the address of the regbuf. */
7837 return XEXP (regbuf, 0);
7839 /* Save float args.
7840 This is optimized to only save the regs that are necessary. Explicitly
7841 named args need not be saved.
7842 We explicitly build a pointer to the buffer because it halves the insn
7843 count when not optimizing (otherwise the pointer is built for each reg
7844 saved).
7845 We emit the moves in reverse order so that we can use predecrement. */
7847 fpregs = copy_to_mode_reg (Pmode,
7848 plus_constant (XEXP (regbuf, 0),
7849 n_floatregs * UNITS_PER_WORD));
7850 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7852 rtx mem;
7853 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7855 emit_insn (gen_addsi3 (fpregs, fpregs,
7856 GEN_INT (-2 * UNITS_PER_WORD)));
7857 mem = change_address (regbuf, DFmode, fpregs);
7858 emit_move_insn (mem,
7859 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7861 regno = first_floatreg;
7862 if (regno & 1)
7864 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7865 mem = change_address (regbuf, SFmode, fpregs);
7866 emit_move_insn (mem,
7867 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7868 - (TARGET_LITTLE_ENDIAN != 0)));
7871 else
7872 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7874 rtx mem;
7876 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7877 mem = change_address (regbuf, SFmode, fpregs);
7878 emit_move_insn (mem,
7879 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7882 /* Return the address of the regbuf. */
7883 return XEXP (regbuf, 0);
7886 /* Define the `__builtin_va_list' type for the ABI. */
7888 static tree
7889 sh_build_builtin_va_list (void)
7891 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7892 tree record, type_decl;
7894 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7895 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7896 return ptr_type_node;
7898 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7899 type_decl = build_decl (BUILTINS_LOCATION,
7900 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7902 f_next_o = build_decl (BUILTINS_LOCATION,
7903 FIELD_DECL, get_identifier ("__va_next_o"),
7904 ptr_type_node);
7905 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7906 FIELD_DECL,
7907 get_identifier ("__va_next_o_limit"),
7908 ptr_type_node);
7909 f_next_fp = build_decl (BUILTINS_LOCATION,
7910 FIELD_DECL, get_identifier ("__va_next_fp"),
7911 ptr_type_node);
7912 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7913 FIELD_DECL,
7914 get_identifier ("__va_next_fp_limit"),
7915 ptr_type_node);
7916 f_next_stack = build_decl (BUILTINS_LOCATION,
7917 FIELD_DECL, get_identifier ("__va_next_stack"),
7918 ptr_type_node);
7920 DECL_FIELD_CONTEXT (f_next_o) = record;
7921 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7922 DECL_FIELD_CONTEXT (f_next_fp) = record;
7923 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7924 DECL_FIELD_CONTEXT (f_next_stack) = record;
7926 TYPE_STUB_DECL (record) = type_decl;
7927 TYPE_NAME (record) = type_decl;
7928 TYPE_FIELDS (record) = f_next_o;
7929 DECL_CHAIN (f_next_o) = f_next_o_limit;
7930 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7931 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7932 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7934 layout_type (record);
7936 return record;
7939 /* Implement `va_start' for varargs and stdarg. */
7941 static void
7942 sh_va_start (tree valist, rtx nextarg)
7944 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7945 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7946 tree t, u;
7947 int nfp, nint;
7949 if (TARGET_SH5)
7951 expand_builtin_saveregs ();
7952 std_expand_builtin_va_start (valist, nextarg);
7953 return;
7956 if ((! TARGET_SH2E && ! TARGET_SH4)
7957 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7959 std_expand_builtin_va_start (valist, nextarg);
7960 return;
7963 f_next_o = TYPE_FIELDS (va_list_type_node);
7964 f_next_o_limit = DECL_CHAIN (f_next_o);
7965 f_next_fp = DECL_CHAIN (f_next_o_limit);
7966 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7967 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7969 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7970 NULL_TREE);
7971 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7972 valist, f_next_o_limit, NULL_TREE);
7973 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7974 NULL_TREE);
7975 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7976 valist, f_next_fp_limit, NULL_TREE);
7977 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7978 valist, f_next_stack, NULL_TREE);
7980 /* Call __builtin_saveregs. */
7981 u = make_tree (sizetype, expand_builtin_saveregs ());
7982 u = fold_convert (ptr_type_node, u);
7983 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7984 TREE_SIDE_EFFECTS (t) = 1;
7985 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7987 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7988 if (nfp < 8)
7989 nfp = 8 - nfp;
7990 else
7991 nfp = 0;
7992 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7993 size_int (UNITS_PER_WORD * nfp));
7994 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7995 TREE_SIDE_EFFECTS (t) = 1;
7996 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7998 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7999 TREE_SIDE_EFFECTS (t) = 1;
8000 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8002 nint = crtl->args.info.arg_count[SH_ARG_INT];
8003 if (nint < 4)
8004 nint = 4 - nint;
8005 else
8006 nint = 0;
8007 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
8008 size_int (UNITS_PER_WORD * nint));
8009 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8010 TREE_SIDE_EFFECTS (t) = 1;
8011 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8013 u = make_tree (ptr_type_node, nextarg);
8014 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8015 TREE_SIDE_EFFECTS (t) = 1;
8016 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8019 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8020 member, return it. */
8021 static tree
8022 find_sole_member (tree type)
8024 tree field, member = NULL_TREE;
8026 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8028 if (TREE_CODE (field) != FIELD_DECL)
8029 continue;
8030 if (!DECL_SIZE (field))
8031 return NULL_TREE;
8032 if (integer_zerop (DECL_SIZE (field)))
8033 continue;
8034 if (member)
8035 return NULL_TREE;
8036 member = field;
8038 return member;
8040 /* Implement `va_arg'. */
8042 static tree
8043 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8044 gimple_seq *post_p ATTRIBUTE_UNUSED)
8046 HOST_WIDE_INT size, rsize;
8047 tree tmp, pptr_type_node;
8048 tree addr, lab_over = NULL, result = NULL;
8049 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8050 tree eff_type;
8052 if (pass_by_ref)
8053 type = build_pointer_type (type);
8055 size = int_size_in_bytes (type);
8056 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8057 pptr_type_node = build_pointer_type (ptr_type_node);
8059 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8060 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8062 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8063 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8064 int pass_as_float;
8065 tree lab_false;
8066 tree member;
8068 f_next_o = TYPE_FIELDS (va_list_type_node);
8069 f_next_o_limit = DECL_CHAIN (f_next_o);
8070 f_next_fp = DECL_CHAIN (f_next_o_limit);
8071 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8072 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8074 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8075 NULL_TREE);
8076 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8077 valist, f_next_o_limit, NULL_TREE);
8078 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8079 valist, f_next_fp, NULL_TREE);
8080 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8081 valist, f_next_fp_limit, NULL_TREE);
8082 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8083 valist, f_next_stack, NULL_TREE);
8085 /* Structures with a single member with a distinct mode are passed
8086 like their member. This is relevant if the latter has a REAL_TYPE
8087 or COMPLEX_TYPE type. */
8088 eff_type = type;
8089 while (TREE_CODE (eff_type) == RECORD_TYPE
8090 && (member = find_sole_member (eff_type))
8091 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8092 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8093 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8095 tree field_type = TREE_TYPE (member);
8097 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8098 eff_type = field_type;
8099 else
8101 gcc_assert ((TYPE_ALIGN (eff_type)
8102 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8103 || (TYPE_ALIGN (eff_type)
8104 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8105 break;
8109 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8111 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8112 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8113 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8114 && size <= 16));
8116 else
8118 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8121 addr = create_tmp_var (pptr_type_node, NULL);
8122 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8123 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8125 valist = build_simple_mem_ref (addr);
8127 if (pass_as_float)
8129 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8130 tree cmp;
8131 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8133 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8134 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8136 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8137 tmp = next_fp_limit;
8138 if (size > 4 && !is_double)
8139 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
8140 unshare_expr (tmp), size_int (4 - size));
8141 tmp = build2 (GE_EXPR, boolean_type_node,
8142 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8143 cmp = build3 (COND_EXPR, void_type_node, tmp,
8144 build1 (GOTO_EXPR, void_type_node,
8145 unshare_expr (lab_false)), NULL_TREE);
8146 if (!is_double)
8147 gimplify_and_add (cmp, pre_p);
8149 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8150 || (is_double || size == 16))
8152 tmp = fold_convert (sizetype, next_fp_tmp);
8153 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8154 size_int (UNITS_PER_WORD));
8155 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8156 unshare_expr (next_fp_tmp), tmp);
8157 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8159 if (is_double)
8160 gimplify_and_add (cmp, pre_p);
8162 #ifdef FUNCTION_ARG_SCmode_WART
8163 if (TYPE_MODE (eff_type) == SCmode
8164 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8166 tree subtype = TREE_TYPE (eff_type);
8167 tree real, imag;
8169 imag
8170 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8171 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8173 real
8174 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8175 real = get_initialized_tmp_var (real, pre_p, NULL);
8177 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8178 if (type != eff_type)
8179 result = build1 (VIEW_CONVERT_EXPR, type, result);
8180 result = get_initialized_tmp_var (result, pre_p, NULL);
8182 #endif /* FUNCTION_ARG_SCmode_WART */
8184 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8185 gimplify_and_add (tmp, pre_p);
8187 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8188 gimplify_and_add (tmp, pre_p);
8190 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8191 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8192 gimplify_assign (unshare_expr (next_fp_tmp),
8193 unshare_expr (valist), pre_p);
8195 gimplify_assign (unshare_expr (valist),
8196 unshare_expr (next_fp_tmp), post_p);
8197 valist = next_fp_tmp;
8199 else
8201 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8202 unshare_expr (next_o), size_int (rsize));
8203 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8204 unshare_expr (next_o_limit));
8205 tmp = build3 (COND_EXPR, void_type_node, tmp,
8206 build1 (GOTO_EXPR, void_type_node,
8207 unshare_expr (lab_false)),
8208 NULL_TREE);
8209 gimplify_and_add (tmp, pre_p);
8211 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8212 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8214 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8215 gimplify_and_add (tmp, pre_p);
8217 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8218 gimplify_and_add (tmp, pre_p);
8220 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8221 gimplify_assign (unshare_expr (next_o),
8222 unshare_expr (next_o_limit), pre_p);
8224 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8225 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8228 if (!result)
8230 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8231 gimplify_and_add (tmp, pre_p);
8235 /* ??? In va-sh.h, there had been code to make values larger than
8236 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8238 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8239 if (result)
8241 gimplify_assign (result, tmp, pre_p);
8242 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8243 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8244 gimplify_and_add (tmp, pre_p);
8246 else
8247 result = tmp;
8249 if (pass_by_ref)
8250 result = build_va_arg_indirect_ref (result);
8252 return result;
8255 /* 64 bit floating points memory transfers are paired single precision loads
8256 or store. So DWARF information needs fixing in little endian (unless
8257 PR=SZ=1 in FPSCR). */
8259 sh_dwarf_register_span (rtx reg)
8261 unsigned regno = REGNO (reg);
8263 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8264 return NULL_RTX;
8266 return
8267 gen_rtx_PARALLEL (VOIDmode,
8268 gen_rtvec (2,
8269 gen_rtx_REG (SFmode,
8270 DBX_REGISTER_NUMBER (regno+1)),
8271 gen_rtx_REG (SFmode,
8272 DBX_REGISTER_NUMBER (regno))));
8275 static enum machine_mode
8276 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8277 int *punsignedp, const_tree funtype,
8278 int for_return)
8280 if (sh_promote_prototypes (funtype))
8281 return promote_mode (type, mode, punsignedp);
8282 else
8283 return default_promote_function_mode (type, mode, punsignedp, funtype,
8284 for_return);
8287 static bool
8288 sh_promote_prototypes (const_tree type)
8290 if (TARGET_HITACHI)
8291 return 0;
8292 if (! type)
8293 return 1;
8294 return ! sh_attr_renesas_p (type);
8297 /* Whether an argument must be passed by reference. On SHcompact, we
8298 pretend arguments wider than 32-bits that would have been passed in
8299 registers are passed by reference, so that an SHmedia trampoline
8300 loads them into the full 64-bits registers. */
8302 static int
8303 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8304 const_tree type, bool named)
8306 unsigned HOST_WIDE_INT size;
8308 if (type)
8309 size = int_size_in_bytes (type);
8310 else
8311 size = GET_MODE_SIZE (mode);
8313 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8314 && (!named
8315 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8316 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8317 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8318 && size > 4
8319 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8320 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8321 return size;
8322 else
8323 return 0;
8326 static bool
8327 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8328 const_tree type, bool named)
8330 if (targetm.calls.must_pass_in_stack (mode, type))
8331 return true;
8333 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8334 wants to know about pass-by-reference semantics for incoming
8335 arguments. */
8336 if (! cum)
8337 return false;
8339 if (TARGET_SHCOMPACT)
8341 cum->byref = shcompact_byref (cum, mode, type, named);
8342 return cum->byref != 0;
8345 return false;
8348 static bool
8349 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8350 const_tree type, bool named ATTRIBUTE_UNUSED)
8352 /* ??? How can it possibly be correct to return true only on the
8353 caller side of the equation? Is there someplace else in the
8354 sh backend that's magically producing the copies? */
8355 return (cum->outgoing
8356 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8357 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8360 static int
8361 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8362 tree type, bool named ATTRIBUTE_UNUSED)
8364 int words = 0;
8366 if (!TARGET_SH5
8367 && PASS_IN_REG_P (*cum, mode, type)
8368 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8369 && (ROUND_REG (*cum, mode)
8370 + (mode != BLKmode
8371 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8372 : ROUND_ADVANCE (int_size_in_bytes (type)))
8373 > NPARM_REGS (mode)))
8374 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8376 else if (!TARGET_SHCOMPACT
8377 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8378 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8380 return words * UNITS_PER_WORD;
8384 /* Define where to put the arguments to a function.
8385 Value is zero to push the argument on the stack,
8386 or a hard register in which to store the argument.
8388 MODE is the argument's machine mode.
8389 TYPE is the data type of the argument (as a tree).
8390 This is null for libcalls where that information may
8391 not be available.
8392 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8393 the preceding args and about the function being called.
8394 NAMED is nonzero if this argument is a named parameter
8395 (otherwise it is an extra parameter matching an ellipsis).
8397 On SH the first args are normally in registers
8398 and the rest are pushed. Any arg that starts within the first
8399 NPARM_REGS words is at least partially passed in a register unless
8400 its data type forbids. */
8402 static rtx
8403 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8404 const_tree type, bool named)
8406 if (! TARGET_SH5 && mode == VOIDmode)
8407 return GEN_INT (ca->renesas_abi ? 1 : 0);
8409 if (! TARGET_SH5
8410 && PASS_IN_REG_P (*ca, mode, type)
8411 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8413 int regno;
8415 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8416 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8418 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8419 gen_rtx_REG (SFmode,
8420 BASE_ARG_REG (mode)
8421 + (ROUND_REG (*ca, mode) ^ 1)),
8422 const0_rtx);
8423 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8424 gen_rtx_REG (SFmode,
8425 BASE_ARG_REG (mode)
8426 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8427 GEN_INT (4));
8428 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8431 /* If the alignment of a DF value causes an SF register to be
8432 skipped, we will use that skipped register for the next SF
8433 value. */
8434 if ((TARGET_HITACHI || ca->renesas_abi)
8435 && ca->free_single_fp_reg
8436 && mode == SFmode)
8437 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8439 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8440 ^ (mode == SFmode && TARGET_SH4
8441 && TARGET_LITTLE_ENDIAN != 0
8442 && ! TARGET_HITACHI && ! ca->renesas_abi);
8443 return gen_rtx_REG (mode, regno);
8447 if (TARGET_SH5)
8449 if (mode == VOIDmode && TARGET_SHCOMPACT)
8450 return GEN_INT (ca->call_cookie);
8452 /* The following test assumes unnamed arguments are promoted to
8453 DFmode. */
8454 if (mode == SFmode && ca->free_single_fp_reg)
8455 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8457 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8458 && (named || ! ca->prototype_p)
8459 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8461 if (! ca->prototype_p && TARGET_SHMEDIA)
8462 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8464 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8465 FIRST_FP_PARM_REG
8466 + ca->arg_count[(int) SH_ARG_FLOAT]);
8469 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8470 && (! TARGET_SHCOMPACT
8471 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8472 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8473 type, named))))
8475 return gen_rtx_REG (mode, (FIRST_PARM_REG
8476 + ca->arg_count[(int) SH_ARG_INT]));
8479 return 0;
8482 return 0;
8485 /* Update the data in CUM to advance over an argument
8486 of mode MODE and data type TYPE.
8487 (TYPE is null for libcalls where that information may not be
8488 available.) */
8490 static void
8491 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8492 const_tree type, bool named)
8494 if (ca->force_mem)
8495 ca->force_mem = 0;
8496 else if (TARGET_SH5)
8498 const_tree type2 = (ca->byref && type
8499 ? TREE_TYPE (type)
8500 : type);
8501 enum machine_mode mode2 = (ca->byref && type
8502 ? TYPE_MODE (type2)
8503 : mode);
8504 int dwords = ((ca->byref
8505 ? ca->byref
8506 : mode2 == BLKmode
8507 ? int_size_in_bytes (type2)
8508 : GET_MODE_SIZE (mode2)) + 7) / 8;
8509 int numregs = MIN (dwords, NPARM_REGS (SImode)
8510 - ca->arg_count[(int) SH_ARG_INT]);
8512 if (numregs)
8514 ca->arg_count[(int) SH_ARG_INT] += numregs;
8515 if (TARGET_SHCOMPACT
8516 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8518 ca->call_cookie
8519 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8520 - numregs, 1);
8521 /* N.B. We want this also for outgoing. */
8522 ca->stack_regs += numregs;
8524 else if (ca->byref)
8526 if (! ca->outgoing)
8527 ca->stack_regs += numregs;
8528 ca->byref_regs += numregs;
8529 ca->byref = 0;
8531 ca->call_cookie
8532 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8533 - numregs, 2);
8534 while (--numregs);
8535 ca->call_cookie
8536 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8537 - 1, 1);
8539 else if (dwords > numregs)
8541 int pushregs = numregs;
8543 if (TARGET_SHCOMPACT)
8544 ca->stack_regs += numregs;
8545 while (pushregs < NPARM_REGS (SImode) - 1
8546 && (CALL_COOKIE_INT_REG_GET
8547 (ca->call_cookie,
8548 NPARM_REGS (SImode) - pushregs)
8549 == 1))
8551 ca->call_cookie
8552 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8553 - pushregs, 1);
8554 pushregs++;
8556 if (numregs == NPARM_REGS (SImode))
8557 ca->call_cookie
8558 |= CALL_COOKIE_INT_REG (0, 1)
8559 | CALL_COOKIE_STACKSEQ (numregs - 1);
8560 else
8561 ca->call_cookie
8562 |= CALL_COOKIE_STACKSEQ (numregs);
8565 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8566 && (named || ! ca->prototype_p))
8568 if (mode2 == SFmode && ca->free_single_fp_reg)
8569 ca->free_single_fp_reg = 0;
8570 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8571 < NPARM_REGS (SFmode))
8573 int numfpregs
8574 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8575 NPARM_REGS (SFmode)
8576 - ca->arg_count[(int) SH_ARG_FLOAT]);
8578 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8580 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8582 if (ca->outgoing && numregs > 0)
8585 ca->call_cookie
8586 |= (CALL_COOKIE_INT_REG
8587 (ca->arg_count[(int) SH_ARG_INT]
8588 - numregs + ((numfpregs - 2) / 2),
8589 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8590 - numfpregs) / 2));
8592 while (numfpregs -= 2);
8594 else if (mode2 == SFmode && (named)
8595 && (ca->arg_count[(int) SH_ARG_FLOAT]
8596 < NPARM_REGS (SFmode)))
8597 ca->free_single_fp_reg
8598 = FIRST_FP_PARM_REG - numfpregs
8599 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8602 return;
8605 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8607 /* Note that we've used the skipped register. */
8608 if (mode == SFmode && ca->free_single_fp_reg)
8610 ca->free_single_fp_reg = 0;
8611 return;
8613 /* When we have a DF after an SF, there's an SF register that get
8614 skipped in order to align the DF value. We note this skipped
8615 register, because the next SF value will use it, and not the
8616 SF that follows the DF. */
8617 if (mode == DFmode
8618 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8620 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8621 + BASE_ARG_REG (mode));
8625 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8626 || PASS_IN_REG_P (*ca, mode, type))
8627 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8628 = (ROUND_REG (*ca, mode)
8629 + (mode == BLKmode
8630 ? ROUND_ADVANCE (int_size_in_bytes (type))
8631 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8634 /* The Renesas calling convention doesn't quite fit into this scheme since
8635 the address is passed like an invisible argument, but one that is always
8636 passed in memory. */
8637 static rtx
8638 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8640 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8641 return 0;
8642 return gen_rtx_REG (Pmode, 2);
8645 /* Worker function for TARGET_FUNCTION_VALUE.
8647 For the SH, this is like LIBCALL_VALUE, except that we must change the
8648 mode like PROMOTE_MODE does.
8649 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8650 tested here has to be kept in sync with the one in explow.c:promote_mode.
8653 static rtx
8654 sh_function_value (const_tree valtype,
8655 const_tree fn_decl_or_type,
8656 bool outgoing ATTRIBUTE_UNUSED)
8658 if (fn_decl_or_type
8659 && !DECL_P (fn_decl_or_type))
8660 fn_decl_or_type = NULL;
8662 return gen_rtx_REG (
8663 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8664 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8665 && (TREE_CODE (valtype) == INTEGER_TYPE
8666 || TREE_CODE (valtype) == ENUMERAL_TYPE
8667 || TREE_CODE (valtype) == BOOLEAN_TYPE
8668 || TREE_CODE (valtype) == REAL_TYPE
8669 || TREE_CODE (valtype) == OFFSET_TYPE))
8670 && sh_promote_prototypes (fn_decl_or_type)
8671 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8672 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8675 /* Worker function for TARGET_LIBCALL_VALUE. */
8677 static rtx
8678 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8680 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8683 /* Return true if N is a possible register number of function value. */
8685 static bool
8686 sh_function_value_regno_p (const unsigned int regno)
8688 return ((regno) == FIRST_RET_REG
8689 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8690 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8693 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8695 static bool
8696 sh_return_in_memory (const_tree type, const_tree fndecl)
8698 if (TARGET_SH5)
8700 if (TYPE_MODE (type) == BLKmode)
8701 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8702 else
8703 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8705 else
8707 return (TYPE_MODE (type) == BLKmode
8708 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8709 && TREE_CODE (type) == RECORD_TYPE));
8713 /* We actually emit the code in sh_expand_prologue. We used to use
8714 a static variable to flag that we need to emit this code, but that
8715 doesn't when inlining, when functions are deferred and then emitted
8716 later. Fortunately, we already have two flags that are part of struct
8717 function that tell if a function uses varargs or stdarg. */
8718 static void
8719 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8720 enum machine_mode mode,
8721 tree type,
8722 int *pretend_arg_size,
8723 int second_time ATTRIBUTE_UNUSED)
8725 gcc_assert (cfun->stdarg);
8726 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8728 int named_parm_regs, anon_parm_regs;
8730 named_parm_regs = (ROUND_REG (*ca, mode)
8731 + (mode == BLKmode
8732 ? ROUND_ADVANCE (int_size_in_bytes (type))
8733 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8734 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8735 if (anon_parm_regs > 0)
8736 *pretend_arg_size = anon_parm_regs * 4;
8740 static bool
8741 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8743 return TARGET_SH5;
8746 static bool
8747 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8749 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8753 /* Define the offset between two registers, one to be eliminated, and
8754 the other its replacement, at the start of a routine. */
8757 initial_elimination_offset (int from, int to)
8759 int regs_saved;
8760 int regs_saved_rounding = 0;
8761 int total_saved_regs_space;
8762 int total_auto_space;
8763 int save_flags = target_flags;
8764 int copy_flags;
8765 HARD_REG_SET live_regs_mask;
8767 shmedia_space_reserved_for_target_registers = false;
8768 regs_saved = calc_live_regs (&live_regs_mask);
8769 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8771 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8773 shmedia_space_reserved_for_target_registers = true;
8774 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8777 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8778 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8779 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8781 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8782 copy_flags = target_flags;
8783 target_flags = save_flags;
8785 total_saved_regs_space = regs_saved + regs_saved_rounding;
8787 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8788 return total_saved_regs_space + total_auto_space
8789 + crtl->args.info.byref_regs * 8;
8791 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8792 return total_saved_regs_space + total_auto_space
8793 + crtl->args.info.byref_regs * 8;
8795 /* Initial gap between fp and sp is 0. */
8796 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8797 return 0;
8799 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8800 return rounded_frame_size (0);
8802 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8803 return rounded_frame_size (0);
8805 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8806 && (to == HARD_FRAME_POINTER_REGNUM
8807 || to == STACK_POINTER_REGNUM));
8808 if (TARGET_SH5)
8810 int n = total_saved_regs_space;
8811 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8812 save_schedule schedule;
8813 save_entry *entry;
8815 n += total_auto_space;
8817 /* If it wasn't saved, there's not much we can do. */
8818 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8819 return n;
8821 target_flags = copy_flags;
8823 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8824 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8825 if (entry->reg == pr_reg)
8827 target_flags = save_flags;
8828 return entry->offset;
8830 gcc_unreachable ();
8832 else
8833 return total_auto_space;
8836 /* Parse the -mfixed-range= option string. */
8837 void
8838 sh_fix_range (const char *const_str)
8840 int i, first, last;
8841 char *str, *dash, *comma;
8843 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8844 REG2 are either register names or register numbers. The effect
8845 of this option is to mark the registers in the range from REG1 to
8846 REG2 as ``fixed'' so they won't be used by the compiler. */
8848 i = strlen (const_str);
8849 str = (char *) alloca (i + 1);
8850 memcpy (str, const_str, i + 1);
8852 while (1)
8854 dash = strchr (str, '-');
8855 if (!dash)
8857 warning (0, "value of -mfixed-range must have form REG1-REG2");
8858 return;
8860 *dash = '\0';
8861 comma = strchr (dash + 1, ',');
8862 if (comma)
8863 *comma = '\0';
8865 first = decode_reg_name (str);
8866 if (first < 0)
8868 warning (0, "unknown register name: %s", str);
8869 return;
8872 last = decode_reg_name (dash + 1);
8873 if (last < 0)
8875 warning (0, "unknown register name: %s", dash + 1);
8876 return;
8879 *dash = '-';
8881 if (first > last)
8883 warning (0, "%s-%s is an empty range", str, dash + 1);
8884 return;
8887 for (i = first; i <= last; ++i)
8888 fixed_regs[i] = call_used_regs[i] = 1;
8890 if (!comma)
8891 break;
8893 *comma = ',';
8894 str = comma + 1;
8898 /* Insert any deferred function attributes from earlier pragmas. */
8899 static void
8900 sh_insert_attributes (tree node, tree *attributes)
8902 tree attrs;
8904 if (TREE_CODE (node) != FUNCTION_DECL)
8905 return;
8907 /* We are only interested in fields. */
8908 if (!DECL_P (node))
8909 return;
8911 /* Append the attributes to the deferred attributes. */
8912 *sh_deferred_function_attributes_tail = *attributes;
8913 attrs = sh_deferred_function_attributes;
8914 if (!attrs)
8915 return;
8917 /* Some attributes imply or require the interrupt attribute. */
8918 if (!lookup_attribute ("interrupt_handler", attrs)
8919 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8921 /* If we have a trapa_handler, but no interrupt_handler attribute,
8922 insert an interrupt_handler attribute. */
8923 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8924 /* We can't use sh_pr_interrupt here because that's not in the
8925 java frontend. */
8926 attrs
8927 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8928 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8929 if the interrupt attribute is missing, we ignore the attribute
8930 and warn. */
8931 else if (lookup_attribute ("sp_switch", attrs)
8932 || lookup_attribute ("trap_exit", attrs)
8933 || lookup_attribute ("nosave_low_regs", attrs)
8934 || lookup_attribute ("resbank", attrs))
8936 tree *tail;
8938 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8940 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8941 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8942 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8943 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8944 warning (OPT_Wattributes,
8945 "%qE attribute only applies to interrupt functions",
8946 TREE_PURPOSE (attrs));
8947 else
8949 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8950 NULL_TREE);
8951 tail = &TREE_CHAIN (*tail);
8954 attrs = *attributes;
8958 /* Install the processed list. */
8959 *attributes = attrs;
8961 /* Clear deferred attributes. */
8962 sh_deferred_function_attributes = NULL_TREE;
8963 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8965 return;
8968 /* Supported attributes:
8970 interrupt_handler -- specifies this function is an interrupt handler.
8972 trapa_handler - like above, but don't save all registers.
8974 sp_switch -- specifies an alternate stack for an interrupt handler
8975 to run on.
8977 trap_exit -- use a trapa to exit an interrupt function instead of
8978 an rte instruction.
8980 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8981 This is useful on the SH3 and upwards,
8982 which has a separate set of low regs for User and Supervisor modes.
8983 This should only be used for the lowest level of interrupts. Higher levels
8984 of interrupts must save the registers in case they themselves are
8985 interrupted.
8987 renesas -- use Renesas calling/layout conventions (functions and
8988 structures).
8990 resbank -- In case of an ISR, use a register bank to save registers
8991 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8994 /* Handle a 'resbank' attribute. */
8995 static tree
8996 sh_handle_resbank_handler_attribute (tree * node, tree name,
8997 tree args ATTRIBUTE_UNUSED,
8998 int flags ATTRIBUTE_UNUSED,
8999 bool * no_add_attrs)
9001 if (!TARGET_SH2A)
9003 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9004 name);
9005 *no_add_attrs = true;
9007 if (TREE_CODE (*node) != FUNCTION_DECL)
9009 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9010 name);
9011 *no_add_attrs = true;
9014 return NULL_TREE;
9017 /* Handle an "interrupt_handler" attribute; arguments as in
9018 struct attribute_spec.handler. */
9019 static tree
9020 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9021 tree args ATTRIBUTE_UNUSED,
9022 int flags ATTRIBUTE_UNUSED,
9023 bool *no_add_attrs)
9025 if (TREE_CODE (*node) != FUNCTION_DECL)
9027 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9028 name);
9029 *no_add_attrs = true;
9031 else if (TARGET_SHCOMPACT)
9033 error ("attribute interrupt_handler is not compatible with -m5-compact");
9034 *no_add_attrs = true;
9037 return NULL_TREE;
9040 /* Handle an 'function_vector' attribute; arguments as in
9041 struct attribute_spec.handler. */
9042 static tree
9043 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9044 tree args ATTRIBUTE_UNUSED,
9045 int flags ATTRIBUTE_UNUSED,
9046 bool * no_add_attrs)
9048 if (!TARGET_SH2A)
9050 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9051 name);
9052 *no_add_attrs = true;
9054 else if (TREE_CODE (*node) != FUNCTION_DECL)
9056 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9057 name);
9058 *no_add_attrs = true;
9060 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9062 /* The argument must be a constant integer. */
9063 warning (OPT_Wattributes,
9064 "%qE attribute argument not an integer constant",
9065 name);
9066 *no_add_attrs = true;
9068 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9070 /* The argument value must be between 0 to 255. */
9071 warning (OPT_Wattributes,
9072 "%qE attribute argument should be between 0 to 255",
9073 name);
9074 *no_add_attrs = true;
9076 return NULL_TREE;
9079 /* Returns 1 if current function has been assigned the attribute
9080 'function_vector'. */
9082 sh2a_is_function_vector_call (rtx x)
9084 if (GET_CODE (x) == SYMBOL_REF
9085 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9087 tree tr = SYMBOL_REF_DECL (x);
9089 if (sh2a_function_vector_p (tr))
9090 return 1;
9093 return 0;
9096 /* Returns the function vector number, if the the attribute
9097 'function_vector' is assigned, otherwise returns zero. */
9099 sh2a_get_function_vector_number (rtx x)
9101 int num;
9102 tree list, t;
9104 if ((GET_CODE (x) == SYMBOL_REF)
9105 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9107 t = SYMBOL_REF_DECL (x);
9109 if (TREE_CODE (t) != FUNCTION_DECL)
9110 return 0;
9112 list = SH_ATTRIBUTES (t);
9113 while (list)
9115 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9117 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9118 return num;
9121 list = TREE_CHAIN (list);
9124 return 0;
9126 else
9127 return 0;
9130 /* Handle an "sp_switch" attribute; arguments as in
9131 struct attribute_spec.handler. */
9132 static tree
9133 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9134 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9136 if (TREE_CODE (*node) != FUNCTION_DECL)
9138 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9139 name);
9140 *no_add_attrs = true;
9142 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9144 /* The argument must be a constant string. */
9145 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9146 name);
9147 *no_add_attrs = true;
9150 return NULL_TREE;
9153 /* Handle an "trap_exit" attribute; arguments as in
9154 struct attribute_spec.handler. */
9155 static tree
9156 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9157 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9159 if (TREE_CODE (*node) != FUNCTION_DECL)
9161 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9162 name);
9163 *no_add_attrs = true;
9165 /* The argument specifies a trap number to be used in a trapa instruction
9166 at function exit (instead of an rte instruction). */
9167 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9169 /* The argument must be a constant integer. */
9170 warning (OPT_Wattributes, "%qE attribute argument not an "
9171 "integer constant", name);
9172 *no_add_attrs = true;
9175 return NULL_TREE;
9178 static tree
9179 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9180 tree name ATTRIBUTE_UNUSED,
9181 tree args ATTRIBUTE_UNUSED,
9182 int flags ATTRIBUTE_UNUSED,
9183 bool *no_add_attrs ATTRIBUTE_UNUSED)
9185 return NULL_TREE;
9188 /* True if __attribute__((renesas)) or -mrenesas. */
9190 sh_attr_renesas_p (const_tree td)
9192 if (TARGET_HITACHI)
9193 return 1;
9194 if (td == 0)
9195 return 0;
9196 if (DECL_P (td))
9197 td = TREE_TYPE (td);
9198 if (td == error_mark_node)
9199 return 0;
9200 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9201 != NULL_TREE);
9204 /* True if __attribute__((renesas)) or -mrenesas, for the current
9205 function. */
9207 sh_cfun_attr_renesas_p (void)
9209 return sh_attr_renesas_p (current_function_decl);
9213 sh_cfun_interrupt_handler_p (void)
9215 return (lookup_attribute ("interrupt_handler",
9216 DECL_ATTRIBUTES (current_function_decl))
9217 != NULL_TREE);
9220 /* Returns 1 if FUNC has been assigned the attribute
9221 "function_vector". */
9223 sh2a_function_vector_p (tree func)
9225 tree list;
9226 if (TREE_CODE (func) != FUNCTION_DECL)
9227 return 0;
9229 list = SH_ATTRIBUTES (func);
9230 while (list)
9232 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9233 return 1;
9235 list = TREE_CHAIN (list);
9237 return 0;
9240 /* Returns TRUE if given tree has the "resbank" attribute. */
9243 sh_cfun_resbank_handler_p (void)
9245 return ((lookup_attribute ("resbank",
9246 DECL_ATTRIBUTES (current_function_decl))
9247 != NULL_TREE)
9248 && (lookup_attribute ("interrupt_handler",
9249 DECL_ATTRIBUTES (current_function_decl))
9250 != NULL_TREE) && TARGET_SH2A);
9253 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9255 static const char *
9256 sh_check_pch_target_flags (int old_flags)
9258 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9259 | MASK_SH_E | MASK_HARD_SH4
9260 | MASK_FPU_SINGLE | MASK_SH4))
9261 return _("created and used with different architectures / ABIs");
9262 if ((old_flags ^ target_flags) & MASK_HITACHI)
9263 return _("created and used with different ABIs");
9264 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9265 return _("created and used with different endianness");
9266 return NULL;
9269 /* Predicates used by the templates. */
9271 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9272 Used only in general_movsrc_operand. */
9275 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9277 switch (REGNO (op))
9279 case PR_REG:
9280 case MACL_REG:
9281 case MACH_REG:
9282 return 1;
9284 return 0;
9287 /* Nonzero if OP is a floating point value with value 0.0. */
9290 fp_zero_operand (rtx op)
9292 REAL_VALUE_TYPE r;
9294 if (GET_MODE (op) != SFmode)
9295 return 0;
9297 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9298 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9301 /* Nonzero if OP is a floating point value with value 1.0. */
9304 fp_one_operand (rtx op)
9306 REAL_VALUE_TYPE r;
9308 if (GET_MODE (op) != SFmode)
9309 return 0;
9311 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9312 return REAL_VALUES_EQUAL (r, dconst1);
9315 /* In general mode switching is used. If we are
9316 compiling without -mfmovd, movsf_ie isn't taken into account for
9317 mode switching. We could check in machine_dependent_reorg for
9318 cases where we know we are in single precision mode, but there is
9319 interface to find that out during reload, so we must avoid
9320 choosing an fldi alternative during reload and thus failing to
9321 allocate a scratch register for the constant loading. */
9323 fldi_ok (void)
9325 return 1;
9329 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9331 enum rtx_code code = GET_CODE (op);
9332 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9335 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9336 enum tls_model
9337 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9339 if (GET_CODE (op) != SYMBOL_REF)
9340 return TLS_MODEL_NONE;
9341 return SYMBOL_REF_TLS_MODEL (op);
9344 /* Return the destination address of a branch. */
9346 static int
9347 branch_dest (rtx branch)
9349 rtx dest = SET_SRC (PATTERN (branch));
9350 int dest_uid;
9352 if (GET_CODE (dest) == IF_THEN_ELSE)
9353 dest = XEXP (dest, 1);
9354 dest = XEXP (dest, 0);
9355 dest_uid = INSN_UID (dest);
9356 return INSN_ADDRESSES (dest_uid);
9359 /* Return nonzero if REG is not used after INSN.
9360 We assume REG is a reload reg, and therefore does
9361 not live past labels. It may live past calls or jumps though. */
9363 reg_unused_after (rtx reg, rtx insn)
9365 enum rtx_code code;
9366 rtx set;
9368 /* If the reg is set by this instruction, then it is safe for our
9369 case. Disregard the case where this is a store to memory, since
9370 we are checking a register used in the store address. */
9371 set = single_set (insn);
9372 if (set && !MEM_P (SET_DEST (set))
9373 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9374 return 1;
9376 while ((insn = NEXT_INSN (insn)))
9378 rtx set;
9379 if (!INSN_P (insn))
9380 continue;
9382 code = GET_CODE (insn);
9384 #if 0
9385 /* If this is a label that existed before reload, then the register
9386 if dead here. However, if this is a label added by reorg, then
9387 the register may still be live here. We can't tell the difference,
9388 so we just ignore labels completely. */
9389 if (code == CODE_LABEL)
9390 return 1;
9391 /* else */
9392 #endif
9394 if (code == JUMP_INSN)
9395 return 0;
9397 /* If this is a sequence, we must handle them all at once.
9398 We could have for instance a call that sets the target register,
9399 and an insn in a delay slot that uses the register. In this case,
9400 we must return 0. */
9401 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9403 int i;
9404 int retval = 0;
9406 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9408 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9409 rtx set = single_set (this_insn);
9411 if (CALL_P (this_insn))
9412 code = CALL_INSN;
9413 else if (JUMP_P (this_insn))
9415 if (INSN_ANNULLED_BRANCH_P (this_insn))
9416 return 0;
9417 code = JUMP_INSN;
9420 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9421 return 0;
9422 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9424 if (!MEM_P (SET_DEST (set)))
9425 retval = 1;
9426 else
9427 return 0;
9429 if (set == 0
9430 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9431 return 0;
9433 if (retval == 1)
9434 return 1;
9435 else if (code == JUMP_INSN)
9436 return 0;
9439 set = single_set (insn);
9440 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9441 return 0;
9442 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9443 return !MEM_P (SET_DEST (set));
9444 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9445 return 0;
9447 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9448 return 1;
9450 return 1;
9453 #include "ggc.h"
9455 static GTY(()) rtx fpscr_rtx;
9457 get_fpscr_rtx (void)
9459 if (! fpscr_rtx)
9461 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9462 REG_USERVAR_P (fpscr_rtx) = 1;
9463 mark_user_reg (fpscr_rtx);
9465 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9466 mark_user_reg (fpscr_rtx);
9467 return fpscr_rtx;
9470 static GTY(()) tree fpscr_values;
9472 static void
9473 emit_fpu_switch (rtx scratch, int index)
9475 rtx dst, src;
9477 if (fpscr_values == NULL)
9479 tree t;
9481 t = build_index_type (integer_one_node);
9482 t = build_array_type (integer_type_node, t);
9483 t = build_decl (BUILTINS_LOCATION,
9484 VAR_DECL, get_identifier ("__fpscr_values"), t);
9485 DECL_ARTIFICIAL (t) = 1;
9486 DECL_IGNORED_P (t) = 1;
9487 DECL_EXTERNAL (t) = 1;
9488 TREE_STATIC (t) = 1;
9489 TREE_PUBLIC (t) = 1;
9490 TREE_USED (t) = 1;
9492 fpscr_values = t;
9495 src = DECL_RTL (fpscr_values);
9496 if (!can_create_pseudo_p ())
9498 emit_move_insn (scratch, XEXP (src, 0));
9499 if (index != 0)
9500 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9501 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9503 else
9504 src = adjust_address (src, PSImode, index * 4);
9506 dst = get_fpscr_rtx ();
9507 emit_move_insn (dst, src);
9510 void
9511 emit_sf_insn (rtx pat)
9513 emit_insn (pat);
9516 void
9517 emit_df_insn (rtx pat)
9519 emit_insn (pat);
9522 void
9523 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9525 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9528 void
9529 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9531 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9532 get_fpscr_rtx ()));
9535 void
9536 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9538 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9541 void
9542 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9544 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9545 get_fpscr_rtx ()));
9548 static rtx get_free_reg (HARD_REG_SET);
9550 /* This function returns a register to use to load the address to load
9551 the fpscr from. Currently it always returns r1 or r7, but when we are
9552 able to use pseudo registers after combine, or have a better mechanism
9553 for choosing a register, it should be done here. */
9554 /* REGS_LIVE is the liveness information for the point for which we
9555 need this allocation. In some bare-bones exit blocks, r1 is live at the
9556 start. We can even have all of r0..r3 being live:
9557 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9558 INSN before which new insns are placed with will clobber the register
9559 we return. If a basic block consists only of setting the return value
9560 register to a pseudo and using that register, the return value is not
9561 live before or after this block, yet we we'll insert our insns right in
9562 the middle. */
9564 static rtx
9565 get_free_reg (HARD_REG_SET regs_live)
9567 if (! TEST_HARD_REG_BIT (regs_live, 1))
9568 return gen_rtx_REG (Pmode, 1);
9570 /* Hard reg 1 is live; since this is a small register classes target,
9571 there shouldn't be anything but a jump before the function end. */
9572 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9573 return gen_rtx_REG (Pmode, 7);
9576 /* This function will set the fpscr from memory.
9577 MODE is the mode we are setting it to. */
9578 void
9579 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9581 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9582 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9583 rtx addr_reg;
9585 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9586 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9589 /* Is the given character a logical line separator for the assembler? */
9590 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9591 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9592 #endif
9595 sh_insn_length_adjustment (rtx insn)
9597 /* Instructions with unfilled delay slots take up an extra two bytes for
9598 the nop in the delay slot. */
9599 if (((NONJUMP_INSN_P (insn)
9600 && GET_CODE (PATTERN (insn)) != USE
9601 && GET_CODE (PATTERN (insn)) != CLOBBER)
9602 || CALL_P (insn)
9603 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9604 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9605 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9606 return 2;
9608 /* SH2e has a bug that prevents the use of annulled branches, so if
9609 the delay slot is not filled, we'll have to put a NOP in it. */
9610 if (sh_cpu_attr == CPU_SH2E
9611 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9612 && get_attr_type (insn) == TYPE_CBRANCH
9613 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9614 return 2;
9616 /* sh-dsp parallel processing insn take four bytes instead of two. */
9618 if (NONJUMP_INSN_P (insn))
9620 int sum = 0;
9621 rtx body = PATTERN (insn);
9622 const char *templ;
9623 char c;
9624 int maybe_label = 1;
9626 if (GET_CODE (body) == ASM_INPUT)
9627 templ = XSTR (body, 0);
9628 else if (asm_noperands (body) >= 0)
9629 templ
9630 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9631 else
9632 return 0;
9635 int ppi_adjust = 0;
9638 c = *templ++;
9639 while (c == ' ' || c == '\t');
9640 /* all sh-dsp parallel-processing insns start with p.
9641 The only non-ppi sh insn starting with p is pref.
9642 The only ppi starting with pr is prnd. */
9643 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9644 ppi_adjust = 2;
9645 /* The repeat pseudo-insn expands two three insns, a total of
9646 six bytes in size. */
9647 else if ((c == 'r' || c == 'R')
9648 && ! strncasecmp ("epeat", templ, 5))
9649 ppi_adjust = 4;
9650 while (c && c != '\n'
9651 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9653 /* If this is a label, it is obviously not a ppi insn. */
9654 if (c == ':' && maybe_label)
9656 ppi_adjust = 0;
9657 break;
9659 else if (c == '\'' || c == '"')
9660 maybe_label = 0;
9661 c = *templ++;
9663 sum += ppi_adjust;
9664 maybe_label = c != ':';
9666 while (c);
9667 return sum;
9669 return 0;
9672 /* Return TRUE for a valid displacement for the REG+disp addressing
9673 with MODE. */
9675 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9676 into the FRx registers. We implement this by setting the maximum offset
9677 to zero when the value is SFmode. This also restricts loading of SFmode
9678 values into the integer registers, but that can't be helped. */
9680 /* The SH allows a displacement in a QI or HI amode, but only when the
9681 other operand is R0. GCC doesn't handle this very well, so we forgot
9682 all of that.
9684 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9685 DI can be any number 0..60. */
9687 bool
9688 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9690 if (CONST_INT_P (op))
9692 if (TARGET_SHMEDIA)
9694 int size;
9696 /* Check if this the address of an unaligned load / store. */
9697 if (mode == VOIDmode)
9698 return CONST_OK_FOR_I06 (INTVAL (op));
9700 size = GET_MODE_SIZE (mode);
9701 return (!(INTVAL (op) & (size - 1))
9702 && INTVAL (op) >= -512 * size
9703 && INTVAL (op) < 512 * size);
9706 if (TARGET_SH2A)
9708 if (GET_MODE_SIZE (mode) == 1
9709 && (unsigned) INTVAL (op) < 4096)
9710 return true;
9713 if ((GET_MODE_SIZE (mode) == 4
9714 && (unsigned) INTVAL (op) < 64
9715 && !(INTVAL (op) & 3)
9716 && !(TARGET_SH2E && mode == SFmode))
9717 || (GET_MODE_SIZE (mode) == 4
9718 && (unsigned) INTVAL (op) < 16383
9719 && !(INTVAL (op) & 3) && TARGET_SH2A))
9720 return true;
9722 if ((GET_MODE_SIZE (mode) == 8
9723 && (unsigned) INTVAL (op) < 60
9724 && !(INTVAL (op) & 3)
9725 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9726 || ((GET_MODE_SIZE (mode)==8)
9727 && (unsigned) INTVAL (op) < 8192
9728 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9729 && (TARGET_SH2A && mode == DFmode)))
9730 return true;
9733 return false;
9736 /* Recognize an RTL expression that is a valid memory address for
9737 an instruction.
9738 The MODE argument is the machine mode for the MEM expression
9739 that wants to use this address.
9740 Allow REG
9741 REG+disp
9742 REG+r0
9743 REG++
9744 --REG */
9746 static bool
9747 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9749 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9750 return true;
9751 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9752 && ! TARGET_SHMEDIA
9753 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9754 return true;
9755 else if (GET_CODE (x) == PLUS
9756 && (mode != PSImode || reload_completed))
9758 rtx xop0 = XEXP (x, 0);
9759 rtx xop1 = XEXP (x, 1);
9761 if (GET_MODE_SIZE (mode) <= 8
9762 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9763 && sh_legitimate_index_p (mode, xop1))
9764 return true;
9766 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9767 || ((xop0 == stack_pointer_rtx
9768 || xop0 == hard_frame_pointer_rtx)
9769 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9770 || ((xop1 == stack_pointer_rtx
9771 || xop1 == hard_frame_pointer_rtx)
9772 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9773 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9774 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9775 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9776 && TARGET_FMOVD && mode == DFmode)))
9778 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9779 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9780 return true;
9781 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9782 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9783 return true;
9787 return false;
9790 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9791 isn't protected by a PIC unspec. */
9793 nonpic_symbol_mentioned_p (rtx x)
9795 register const char *fmt;
9796 register int i;
9798 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9799 || GET_CODE (x) == PC)
9800 return 1;
9802 /* We don't want to look into the possible MEM location of a
9803 CONST_DOUBLE, since we're not going to use it, in general. */
9804 if (GET_CODE (x) == CONST_DOUBLE)
9805 return 0;
9807 if (GET_CODE (x) == UNSPEC
9808 && (XINT (x, 1) == UNSPEC_PIC
9809 || XINT (x, 1) == UNSPEC_GOT
9810 || XINT (x, 1) == UNSPEC_GOTOFF
9811 || XINT (x, 1) == UNSPEC_GOTPLT
9812 || XINT (x, 1) == UNSPEC_GOTTPOFF
9813 || XINT (x, 1) == UNSPEC_DTPOFF
9814 || XINT (x, 1) == UNSPEC_TPOFF
9815 || XINT (x, 1) == UNSPEC_PLT
9816 || XINT (x, 1) == UNSPEC_SYMOFF
9817 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9818 return 0;
9820 fmt = GET_RTX_FORMAT (GET_CODE (x));
9821 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9823 if (fmt[i] == 'E')
9825 register int j;
9827 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9828 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9829 return 1;
9831 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9832 return 1;
9835 return 0;
9838 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9839 @GOTOFF in `reg'. */
9841 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9842 rtx reg)
9844 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9845 return orig;
9847 if (GET_CODE (orig) == LABEL_REF
9848 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9850 if (reg == 0)
9851 reg = gen_reg_rtx (Pmode);
9853 emit_insn (gen_symGOTOFF2reg (reg, orig));
9854 return reg;
9856 else if (GET_CODE (orig) == SYMBOL_REF)
9858 if (reg == 0)
9859 reg = gen_reg_rtx (Pmode);
9861 emit_insn (gen_symGOT2reg (reg, orig));
9862 return reg;
9864 return orig;
9867 /* Try machine-dependent ways of modifying an illegitimate address
9868 to be legitimate. If we find one, return the new, valid address.
9869 Otherwise, return X.
9871 For the SH, if X is almost suitable for indexing, but the offset is
9872 out of range, convert it into a normal form so that CSE has a chance
9873 of reducing the number of address registers used. */
9875 static rtx
9876 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9878 if (flag_pic)
9879 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9881 if (GET_CODE (x) == PLUS
9882 && (GET_MODE_SIZE (mode) == 4
9883 || GET_MODE_SIZE (mode) == 8)
9884 && CONST_INT_P (XEXP (x, 1))
9885 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9886 && ! TARGET_SHMEDIA
9887 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9888 && ! (TARGET_SH2E && mode == SFmode))
9890 rtx index_rtx = XEXP (x, 1);
9891 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9892 rtx sum;
9894 /* On rare occasions, we might get an unaligned pointer
9895 that is indexed in a way to give an aligned address.
9896 Therefore, keep the lower two bits in offset_base. */
9897 /* Instead of offset_base 128..131 use 124..127, so that
9898 simple add suffices. */
9899 if (offset > 127)
9900 offset_base = ((offset + 4) & ~60) - 4;
9901 else
9902 offset_base = offset & ~60;
9904 /* Sometimes the normal form does not suit DImode. We
9905 could avoid that by using smaller ranges, but that
9906 would give less optimized code when SImode is
9907 prevalent. */
9908 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9910 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9911 GEN_INT (offset_base), NULL_RTX, 0,
9912 OPTAB_LIB_WIDEN);
9914 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9918 return x;
9921 /* Attempt to replace *P, which is an address that needs reloading, with
9922 a valid memory address for an operand of mode MODE.
9923 Like for sh_legitimize_address, for the SH we try to get a normal form
9924 of the address. That will allow inheritance of the address reloads. */
9926 bool
9927 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9928 int itype)
9930 enum reload_type type = (enum reload_type) itype;
9932 if (GET_CODE (*p) == PLUS
9933 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9934 && CONST_INT_P (XEXP (*p, 1))
9935 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9936 && ! TARGET_SHMEDIA
9937 && ! (TARGET_SH4 && mode == DFmode)
9938 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9939 && (ALLOW_INDEXED_ADDRESS
9940 || XEXP (*p, 0) == stack_pointer_rtx
9941 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9943 rtx index_rtx = XEXP (*p, 1);
9944 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9945 rtx sum;
9947 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9949 push_reload (*p, NULL_RTX, p, NULL,
9950 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9951 goto win;
9953 if (TARGET_SH2E && mode == SFmode)
9955 *p = copy_rtx (*p);
9956 push_reload (*p, NULL_RTX, p, NULL,
9957 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9958 goto win;
9960 /* Instead of offset_base 128..131 use 124..127, so that
9961 simple add suffices. */
9962 if (offset > 127)
9963 offset_base = ((offset + 4) & ~60) - 4;
9964 else
9965 offset_base = offset & ~60;
9966 /* Sometimes the normal form does not suit DImode. We could avoid
9967 that by using smaller ranges, but that would give less optimized
9968 code when SImode is prevalent. */
9969 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9971 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9972 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9973 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9974 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9975 goto win;
9978 /* We must re-recognize what we created before. */
9979 else if (GET_CODE (*p) == PLUS
9980 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9981 && GET_CODE (XEXP (*p, 0)) == PLUS
9982 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9983 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9984 && CONST_INT_P (XEXP (*p, 1))
9985 && ! TARGET_SHMEDIA
9986 && ! (TARGET_SH2E && mode == SFmode))
9988 /* Because this address is so complex, we know it must have
9989 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9990 it is already unshared, and needs no further unsharing. */
9991 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9992 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9993 goto win;
9996 return false;
9998 win:
9999 return true;
10002 /* In the name of slightly smaller debug output, and to cater to
10003 general assembler lossage, recognize various UNSPEC sequences
10004 and turn them back into a direct symbol reference. */
10006 static rtx
10007 sh_delegitimize_address (rtx orig_x)
10009 rtx x, y;
10011 orig_x = delegitimize_mem_from_attrs (orig_x);
10013 x = orig_x;
10014 if (MEM_P (x))
10015 x = XEXP (x, 0);
10016 if (GET_CODE (x) == CONST)
10018 y = XEXP (x, 0);
10019 if (GET_CODE (y) == UNSPEC)
10021 if (XINT (y, 1) == UNSPEC_GOT
10022 || XINT (y, 1) == UNSPEC_GOTOFF
10023 || XINT (y, 1) == UNSPEC_SYMOFF)
10024 return XVECEXP (y, 0, 0);
10025 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10027 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10029 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10031 if (GET_CODE (symplt) == UNSPEC
10032 && XINT (symplt, 1) == UNSPEC_PLT)
10033 return XVECEXP (symplt, 0, 0);
10036 else if (TARGET_SHMEDIA
10037 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10038 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10040 rtx offset = XVECEXP (y, 0, 1);
10042 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10043 if (MEM_P (orig_x))
10044 x = replace_equiv_address_nv (orig_x, x);
10045 return x;
10050 return orig_x;
10053 /* Mark the use of a constant in the literal table. If the constant
10054 has multiple labels, make it unique. */
10055 static rtx
10056 mark_constant_pool_use (rtx x)
10058 rtx insn, lab, pattern;
10060 if (x == NULL)
10061 return x;
10063 switch (GET_CODE (x))
10065 case LABEL_REF:
10066 x = XEXP (x, 0);
10067 case CODE_LABEL:
10068 break;
10069 default:
10070 return x;
10073 /* Get the first label in the list of labels for the same constant
10074 and delete another labels in the list. */
10075 lab = x;
10076 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10078 if (!LABEL_P (insn)
10079 || LABEL_REFS (insn) != NEXT_INSN (insn))
10080 break;
10081 lab = insn;
10084 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10085 INSN_DELETED_P (insn) = 1;
10087 /* Mark constants in a window. */
10088 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10090 if (!NONJUMP_INSN_P (insn))
10091 continue;
10093 pattern = PATTERN (insn);
10094 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10095 continue;
10097 switch (XINT (pattern, 1))
10099 case UNSPECV_CONST2:
10100 case UNSPECV_CONST4:
10101 case UNSPECV_CONST8:
10102 XVECEXP (pattern, 0, 1) = const1_rtx;
10103 break;
10104 case UNSPECV_WINDOW_END:
10105 if (XVECEXP (pattern, 0, 0) == x)
10106 return lab;
10107 break;
10108 case UNSPECV_CONST_END:
10109 return lab;
10110 default:
10111 break;
10115 return lab;
10118 /* Return true if it's possible to redirect BRANCH1 to the destination
10119 of an unconditional jump BRANCH2. We only want to do this if the
10120 resulting branch will have a short displacement. */
10122 sh_can_redirect_branch (rtx branch1, rtx branch2)
10124 if (flag_expensive_optimizations && simplejump_p (branch2))
10126 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10127 rtx insn;
10128 int distance;
10130 for (distance = 0, insn = NEXT_INSN (branch1);
10131 insn && distance < 256;
10132 insn = PREV_INSN (insn))
10134 if (insn == dest)
10135 return 1;
10136 else
10137 distance += get_attr_length (insn);
10139 for (distance = 0, insn = NEXT_INSN (branch1);
10140 insn && distance < 256;
10141 insn = NEXT_INSN (insn))
10143 if (insn == dest)
10144 return 1;
10145 else
10146 distance += get_attr_length (insn);
10149 return 0;
10152 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10154 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10155 unsigned int new_reg)
10157 /* Interrupt functions can only use registers that have already been
10158 saved by the prologue, even if they would normally be
10159 call-clobbered. */
10161 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10162 return 0;
10164 return 1;
10167 /* Function to update the integer COST
10168 based on the relationship between INSN that is dependent on
10169 DEP_INSN through the dependence LINK. The default is to make no
10170 adjustment to COST. This can be used for example to specify to
10171 the scheduler that an output- or anti-dependence does not incur
10172 the same cost as a data-dependence. The return value should be
10173 the new value for COST. */
10174 static int
10175 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10177 rtx reg, use_pat;
10179 if (TARGET_SHMEDIA)
10181 /* On SHmedia, if the dependence is an anti-dependence or
10182 output-dependence, there is no cost. */
10183 if (REG_NOTE_KIND (link) != 0)
10185 /* However, dependencies between target register loads and
10186 uses of the register in a subsequent block that are separated
10187 by a conditional branch are not modelled - we have to do with
10188 the anti-dependency between the target register load and the
10189 conditional branch that ends the current block. */
10190 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10191 && GET_CODE (PATTERN (dep_insn)) == SET
10192 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10193 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10194 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10196 int orig_cost = cost;
10197 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10198 rtx target = ((! note
10199 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10200 ? insn : JUMP_LABEL (insn));
10201 /* On the likely path, the branch costs 1, on the unlikely path,
10202 it costs 3. */
10203 cost--;
10205 target = next_active_insn (target);
10206 while (target && ! flow_dependent_p (target, dep_insn)
10207 && --cost > 0);
10208 /* If two branches are executed in immediate succession, with the
10209 first branch properly predicted, this causes a stall at the
10210 second branch, hence we won't need the target for the
10211 second branch for two cycles after the launch of the first
10212 branch. */
10213 if (cost > orig_cost - 2)
10214 cost = orig_cost - 2;
10216 else
10217 cost = 0;
10220 else if (get_attr_is_mac_media (insn)
10221 && get_attr_is_mac_media (dep_insn))
10222 cost = 1;
10224 else if (! reload_completed
10225 && GET_CODE (PATTERN (insn)) == SET
10226 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10227 && GET_CODE (PATTERN (dep_insn)) == SET
10228 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10229 && cost < 4)
10230 cost = 4;
10231 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10232 that is needed at the target. */
10233 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10234 && ! flow_dependent_p (insn, dep_insn))
10235 cost--;
10237 else if (REG_NOTE_KIND (link) == 0)
10239 enum attr_type type;
10240 rtx dep_set;
10242 if (recog_memoized (insn) < 0
10243 || recog_memoized (dep_insn) < 0)
10244 return cost;
10246 dep_set = single_set (dep_insn);
10248 /* The latency that we specify in the scheduling description refers
10249 to the actual output, not to an auto-increment register; for that,
10250 the latency is one. */
10251 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10253 rtx set = single_set (insn);
10255 if (set
10256 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10257 && (!MEM_P (SET_DEST (set))
10258 || !reg_mentioned_p (SET_DEST (dep_set),
10259 XEXP (SET_DEST (set), 0))))
10260 cost = 1;
10262 /* The only input for a call that is timing-critical is the
10263 function's address. */
10264 if (CALL_P (insn))
10266 rtx call = PATTERN (insn);
10268 if (GET_CODE (call) == PARALLEL)
10269 call = XVECEXP (call, 0 ,0);
10270 if (GET_CODE (call) == SET)
10271 call = SET_SRC (call);
10272 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10273 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10274 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10275 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10276 cost -= TARGET_SH4_300 ? 3 : 6;
10278 /* Likewise, the most timing critical input for an sfuncs call
10279 is the function address. However, sfuncs typically start
10280 using their arguments pretty quickly.
10281 Assume a four cycle delay for SH4 before they are needed.
10282 Cached ST40-300 calls are quicker, so assume only a one
10283 cycle delay there.
10284 ??? Maybe we should encode the delays till input registers
10285 are needed by sfuncs into the sfunc call insn. */
10286 /* All sfunc calls are parallels with at least four components.
10287 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10288 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10289 && XVECLEN (PATTERN (insn), 0) >= 4
10290 && (reg = sfunc_uses_reg (insn)))
10292 if (! reg_set_p (reg, dep_insn))
10293 cost -= TARGET_SH4_300 ? 1 : 4;
10295 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10297 enum attr_type dep_type = get_attr_type (dep_insn);
10299 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10300 cost--;
10301 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10302 && (type = get_attr_type (insn)) != TYPE_CALL
10303 && type != TYPE_SFUNC)
10304 cost--;
10305 /* When the preceding instruction loads the shift amount of
10306 the following SHAD/SHLD, the latency of the load is increased
10307 by 1 cycle. */
10308 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10309 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10310 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10311 XEXP (SET_SRC (single_set (insn)),
10312 1)))
10313 cost++;
10314 /* When an LS group instruction with a latency of less than
10315 3 cycles is followed by a double-precision floating-point
10316 instruction, FIPR, or FTRV, the latency of the first
10317 instruction is increased to 3 cycles. */
10318 else if (cost < 3
10319 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10320 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10321 cost = 3;
10322 /* The lsw register of a double-precision computation is ready one
10323 cycle earlier. */
10324 else if (reload_completed
10325 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10326 && (use_pat = single_set (insn))
10327 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10328 SET_SRC (use_pat)))
10329 cost -= 1;
10331 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10332 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10333 cost -= 1;
10335 else if (TARGET_SH4_300)
10337 /* Stores need their input register two cycles later. */
10338 if (dep_set && cost >= 1
10339 && ((type = get_attr_type (insn)) == TYPE_STORE
10340 || type == TYPE_PSTORE
10341 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10343 rtx set = single_set (insn);
10345 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10346 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10348 cost -= 2;
10349 /* But don't reduce the cost below 1 if the address depends
10350 on a side effect of dep_insn. */
10351 if (cost < 1
10352 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10353 cost = 1;
10358 /* An anti-dependence penalty of two applies if the first insn is a double
10359 precision fadd / fsub / fmul. */
10360 else if (!TARGET_SH4_300
10361 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10362 && recog_memoized (dep_insn) >= 0
10363 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10364 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10365 /* A lot of alleged anti-flow dependences are fake,
10366 so check this one is real. */
10367 && flow_dependent_p (dep_insn, insn))
10368 cost = 2;
10370 return cost;
10373 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10374 if DEP_INSN is anti-flow dependent on INSN. */
10375 static int
10376 flow_dependent_p (rtx insn, rtx dep_insn)
10378 rtx tmp = PATTERN (insn);
10380 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10381 return tmp == NULL_RTX;
10384 /* A helper function for flow_dependent_p called through note_stores. */
10385 static void
10386 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10388 rtx * pinsn = (rtx *) data;
10390 if (*pinsn && reg_referenced_p (x, *pinsn))
10391 *pinsn = NULL_RTX;
10394 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10395 'special function' patterns (type sfunc) that clobber pr, but that
10396 do not look like function calls to leaf_function_p. Hence we must
10397 do this extra check. */
10398 static int
10399 sh_pr_n_sets (void)
10401 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10404 /* Return where to allocate pseudo for a given hard register initial
10405 value. */
10406 static rtx
10407 sh_allocate_initial_value (rtx hard_reg)
10409 rtx x;
10411 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10413 if (current_function_is_leaf
10414 && ! sh_pr_n_sets ()
10415 && ! (TARGET_SHCOMPACT
10416 && ((crtl->args.info.call_cookie
10417 & ~ CALL_COOKIE_RET_TRAMP (1))
10418 || crtl->saves_all_registers)))
10419 x = hard_reg;
10420 else
10421 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10423 else
10424 x = NULL_RTX;
10426 return x;
10429 /* This function returns "2" to indicate dual issue for the SH4
10430 processor. To be used by the DFA pipeline description. */
10431 static int
10432 sh_issue_rate (void)
10434 if (TARGET_SUPERSCALAR)
10435 return 2;
10436 else
10437 return 1;
10440 /* Functions for ready queue reordering for sched1. */
10442 /* Get weight for mode for a set x. */
10443 static short
10444 find_set_regmode_weight (rtx x, enum machine_mode mode)
10446 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10447 return 1;
10448 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10450 if (REG_P (SET_DEST (x)))
10452 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10453 return 1;
10454 else
10455 return 0;
10457 return 1;
10459 return 0;
10462 /* Get regmode weight for insn. */
10463 static short
10464 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10466 short reg_weight = 0;
10467 rtx x;
10469 /* Increment weight for each register born here. */
10470 x = PATTERN (insn);
10471 reg_weight += find_set_regmode_weight (x, mode);
10472 if (GET_CODE (x) == PARALLEL)
10474 int j;
10475 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10477 x = XVECEXP (PATTERN (insn), 0, j);
10478 reg_weight += find_set_regmode_weight (x, mode);
10481 /* Decrement weight for each register that dies here. */
10482 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10484 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10486 rtx note = XEXP (x, 0);
10487 if (REG_P (note) && GET_MODE (note) == mode)
10488 reg_weight--;
10491 return reg_weight;
10494 /* Calculate regmode weights for all insns of a basic block. */
10495 static void
10496 find_regmode_weight (basic_block b, enum machine_mode mode)
10498 rtx insn, next_tail, head, tail;
10500 get_ebb_head_tail (b, b, &head, &tail);
10501 next_tail = NEXT_INSN (tail);
10503 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10505 /* Handle register life information. */
10506 if (!INSN_P (insn))
10507 continue;
10509 if (mode == SFmode)
10510 INSN_REGMODE_WEIGHT (insn, mode) =
10511 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10512 else if (mode == SImode)
10513 INSN_REGMODE_WEIGHT (insn, mode) =
10514 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10518 /* Comparison function for ready queue sorting. */
10519 static int
10520 rank_for_reorder (const void *x, const void *y)
10522 rtx tmp = *(const rtx *) y;
10523 rtx tmp2 = *(const rtx *) x;
10525 /* The insn in a schedule group should be issued the first. */
10526 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10527 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10529 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10530 minimizes instruction movement, thus minimizing sched's effect on
10531 register pressure. */
10532 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10535 /* Resort the array A in which only element at index N may be out of order. */
10536 static void
10537 swap_reorder (rtx *a, int n)
10539 rtx insn = a[n - 1];
10540 int i = n - 2;
10542 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10544 a[i + 1] = a[i];
10545 i -= 1;
10547 a[i + 1] = insn;
10550 #define SCHED_REORDER(READY, N_READY) \
10551 do \
10553 if ((N_READY) == 2) \
10554 swap_reorder (READY, N_READY); \
10555 else if ((N_READY) > 2) \
10556 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10558 while (0)
10560 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10561 macro. */
10562 static void
10563 ready_reorder (rtx *ready, int nready)
10565 SCHED_REORDER (ready, nready);
10568 /* Count life regions of r0 for a block. */
10569 static int
10570 find_r0_life_regions (basic_block b)
10572 rtx end, insn;
10573 rtx pset;
10574 rtx r0_reg;
10575 int live;
10576 int set;
10577 int death = 0;
10579 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10581 set = 1;
10582 live = 1;
10584 else
10586 set = 0;
10587 live = 0;
10590 insn = BB_HEAD (b);
10591 end = BB_END (b);
10592 r0_reg = gen_rtx_REG (SImode, R0_REG);
10593 while (1)
10595 if (INSN_P (insn))
10597 if (find_regno_note (insn, REG_DEAD, R0_REG))
10599 death++;
10600 live = 0;
10602 if (!live
10603 && (pset = single_set (insn))
10604 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10605 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10607 set++;
10608 live = 1;
10611 if (insn == end)
10612 break;
10613 insn = NEXT_INSN (insn);
10615 return set - death;
10618 /* Calculate regmode weights for all insns of all basic block. */
10619 static void
10620 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10621 int verbose ATTRIBUTE_UNUSED,
10622 int old_max_uid)
10624 basic_block b;
10626 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10627 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10628 r0_life_regions = 0;
10630 FOR_EACH_BB_REVERSE (b)
10632 find_regmode_weight (b, SImode);
10633 find_regmode_weight (b, SFmode);
10634 if (!reload_completed)
10635 r0_life_regions += find_r0_life_regions (b);
10638 CURR_REGMODE_PRESSURE (SImode) = 0;
10639 CURR_REGMODE_PRESSURE (SFmode) = 0;
10643 /* Cleanup. */
10644 static void
10645 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10646 int verbose ATTRIBUTE_UNUSED)
10648 if (regmode_weight[0])
10650 free (regmode_weight[0]);
10651 regmode_weight[0] = NULL;
10653 if (regmode_weight[1])
10655 free (regmode_weight[1]);
10656 regmode_weight[1] = NULL;
10660 /* The scalar modes supported differs from the default version in TImode
10661 for 32-bit SHMEDIA. */
10662 static bool
10663 sh_scalar_mode_supported_p (enum machine_mode mode)
10665 if (TARGET_SHMEDIA32 && mode == TImode)
10666 return false;
10668 return default_scalar_mode_supported_p (mode);
10671 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10672 keep count of register pressures on SImode and SFmode. */
10673 static int
10674 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10675 int sched_verbose ATTRIBUTE_UNUSED,
10676 rtx insn,
10677 int can_issue_more)
10679 if (GET_CODE (PATTERN (insn)) != USE
10680 && GET_CODE (PATTERN (insn)) != CLOBBER)
10681 cached_can_issue_more = can_issue_more - 1;
10682 else
10683 cached_can_issue_more = can_issue_more;
10685 if (reload_completed)
10686 return cached_can_issue_more;
10688 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10689 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10691 return cached_can_issue_more;
10694 static void
10695 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10696 int verbose ATTRIBUTE_UNUSED,
10697 int veclen ATTRIBUTE_UNUSED)
10699 CURR_REGMODE_PRESSURE (SImode) = 0;
10700 CURR_REGMODE_PRESSURE (SFmode) = 0;
10703 /* Some magic numbers. */
10704 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10705 functions that already have high pressure on r0. */
10706 #define R0_MAX_LIFE_REGIONS 2
10707 /* Register Pressure thresholds for SImode and SFmode registers. */
10708 #define SIMODE_MAX_WEIGHT 5
10709 #define SFMODE_MAX_WEIGHT 10
10711 /* Return true if the pressure is high for MODE. */
10712 static short
10713 high_pressure (enum machine_mode mode)
10715 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10716 functions that already have high pressure on r0. */
10717 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10718 return 1;
10720 if (mode == SFmode)
10721 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10722 else
10723 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10726 /* Reorder ready queue if register pressure is high. */
10727 static int
10728 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10729 int sched_verbose ATTRIBUTE_UNUSED,
10730 rtx *ready,
10731 int *n_readyp,
10732 int clock_var ATTRIBUTE_UNUSED)
10734 if (reload_completed)
10735 return sh_issue_rate ();
10737 if (high_pressure (SFmode) || high_pressure (SImode))
10739 ready_reorder (ready, *n_readyp);
10742 return sh_issue_rate ();
10745 /* Skip cycles if the current register pressure is high. */
10746 static int
10747 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10748 int sched_verbose ATTRIBUTE_UNUSED,
10749 rtx *ready ATTRIBUTE_UNUSED,
10750 int *n_readyp ATTRIBUTE_UNUSED,
10751 int clock_var ATTRIBUTE_UNUSED)
10753 if (reload_completed)
10754 return cached_can_issue_more;
10756 if (high_pressure(SFmode) || high_pressure (SImode))
10757 skip_cycles = 1;
10759 return cached_can_issue_more;
10762 /* Skip cycles without sorting the ready queue. This will move insn from
10763 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10764 queue by sh_reorder. */
10766 /* Generally, skipping these many cycles are sufficient for all insns to move
10767 from Q -> R. */
10768 #define MAX_SKIPS 8
10770 static int
10771 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10772 int sched_verbose ATTRIBUTE_UNUSED,
10773 rtx insn ATTRIBUTE_UNUSED,
10774 int last_clock_var,
10775 int clock_var,
10776 int *sort_p)
10778 if (reload_completed)
10779 return 0;
10781 if (skip_cycles)
10783 if ((clock_var - last_clock_var) < MAX_SKIPS)
10785 *sort_p = 0;
10786 return 1;
10788 /* If this is the last cycle we are skipping, allow reordering of R. */
10789 if ((clock_var - last_clock_var) == MAX_SKIPS)
10791 *sort_p = 1;
10792 return 1;
10796 skip_cycles = 0;
10798 return 0;
10801 /* SHmedia requires registers for branches, so we can't generate new
10802 branches past reload. */
10803 static bool
10804 sh_cannot_modify_jumps_p (void)
10806 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10809 static reg_class_t
10810 sh_target_reg_class (void)
10812 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10815 static bool
10816 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10818 HARD_REG_SET dummy;
10819 #if 0
10820 rtx insn;
10821 #endif
10823 if (! shmedia_space_reserved_for_target_registers)
10824 return 0;
10825 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10826 return 0;
10827 if (calc_live_regs (&dummy) >= 6 * 8)
10828 return 1;
10829 return 0;
10832 static bool
10833 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10835 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10839 On the SH1..SH4, the trampoline looks like
10840 2 0002 D202 mov.l l2,r2
10841 1 0000 D301 mov.l l1,r3
10842 3 0004 422B jmp @r2
10843 4 0006 0009 nop
10844 5 0008 00000000 l1: .long area
10845 6 000c 00000000 l2: .long function
10847 SH5 (compact) uses r1 instead of r3 for the static chain. */
10850 /* Emit RTL insns to initialize the variable parts of a trampoline.
10851 FNADDR is an RTX for the address of the function's pure code.
10852 CXT is an RTX for the static chain value for the function. */
10854 static void
10855 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10857 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10858 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10860 if (TARGET_SHMEDIA64)
10862 rtx tramp_templ;
10863 int fixed_len;
10865 rtx movi1 = GEN_INT (0xcc000010);
10866 rtx shori1 = GEN_INT (0xc8000010);
10867 rtx src, dst;
10869 /* The following trampoline works within a +- 128 KB range for cxt:
10870 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10871 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10872 gettr tr1,r1; blink tr0,r63 */
10873 /* Address rounding makes it hard to compute the exact bounds of the
10874 offset for this trampoline, but we have a rather generous offset
10875 range, so frame_offset should do fine as an upper bound. */
10876 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10878 /* ??? could optimize this trampoline initialization
10879 by writing DImode words with two insns each. */
10880 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10881 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10882 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10883 insn = gen_rtx_AND (DImode, insn, mask);
10884 /* Or in ptb/u .,tr1 pattern */
10885 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10886 insn = force_operand (insn, NULL_RTX);
10887 insn = gen_lowpart (SImode, insn);
10888 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10889 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10890 insn = gen_rtx_AND (DImode, insn, mask);
10891 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10892 insn = gen_lowpart (SImode, insn);
10893 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10894 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10895 insn = gen_rtx_AND (DImode, insn, mask);
10896 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10897 insn = gen_lowpart (SImode, insn);
10898 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10899 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10900 insn = gen_rtx_AND (DImode, insn, mask);
10901 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10902 insn = gen_lowpart (SImode, insn);
10903 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10904 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10905 insn = gen_rtx_AND (DImode, insn, mask);
10906 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10907 insn = gen_lowpart (SImode, insn);
10908 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10909 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10910 GEN_INT (0x6bf10600));
10911 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10912 GEN_INT (0x4415fc10));
10913 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10914 GEN_INT (0x4401fff0));
10915 emit_insn (gen_ic_invalidate_line (tramp));
10916 return;
10918 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10919 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10921 tramp_templ = gen_datalabel_ref (tramp_templ);
10922 dst = tramp_mem;
10923 src = gen_const_mem (BLKmode, tramp_templ);
10924 set_mem_align (dst, 256);
10925 set_mem_align (src, 64);
10926 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10928 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10929 emit_move_insn (adjust_address (tramp_mem, Pmode,
10930 fixed_len + GET_MODE_SIZE (Pmode)),
10931 cxt);
10932 emit_insn (gen_ic_invalidate_line (tramp));
10933 return;
10935 else if (TARGET_SHMEDIA)
10937 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10938 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10939 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10940 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10941 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10942 rotated 10 right, and higher 16 bit of every 32 selected. */
10943 rtx movishori
10944 = force_reg (V2HImode, (simplify_gen_subreg
10945 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10946 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10947 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10949 fnaddr = force_reg (SImode, fnaddr);
10950 cxt = force_reg (SImode, cxt);
10951 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10952 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10953 movishori));
10954 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10955 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10956 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10957 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10958 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10959 gen_rtx_SUBREG (V2HImode, cxt, 0),
10960 movishori));
10961 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10962 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10963 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10964 if (TARGET_LITTLE_ENDIAN)
10966 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10967 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10969 else
10971 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10972 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10974 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10975 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10976 emit_insn (gen_ic_invalidate_line (tramp));
10977 return;
10979 else if (TARGET_SHCOMPACT)
10981 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10982 return;
10984 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10985 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10986 SImode));
10987 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10988 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10989 SImode));
10990 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10991 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10992 if (TARGET_HARVARD)
10994 if (!TARGET_INLINE_IC_INVALIDATE
10995 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10996 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10997 FUNCTION_ORDINARY),
10998 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10999 else
11000 emit_insn (gen_ic_invalidate_line (tramp));
11004 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11006 static rtx
11007 sh_trampoline_adjust_address (rtx tramp)
11009 if (TARGET_SHMEDIA)
11010 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11011 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11012 return tramp;
11015 /* FIXME: This is overly conservative. A SHcompact function that
11016 receives arguments ``by reference'' will have them stored in its
11017 own stack frame, so it must not pass pointers or references to
11018 these arguments to other functions by means of sibling calls. */
11019 /* If PIC, we cannot make sibling calls to global functions
11020 because the PLT requires r12 to be live. */
11021 static bool
11022 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11024 return (1
11025 && (! TARGET_SHCOMPACT
11026 || crtl->args.info.stack_regs == 0)
11027 && ! sh_cfun_interrupt_handler_p ()
11028 && (! flag_pic
11029 || (decl && ! TREE_PUBLIC (decl))
11030 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11033 /* Machine specific built-in functions. */
11035 struct builtin_description
11037 const enum insn_code icode;
11038 const char *const name;
11039 int signature;
11040 tree fndecl;
11043 /* describe number and signedness of arguments; arg[0] == result
11044 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11045 /* 9: 64-bit pointer, 10: 32-bit pointer */
11046 static const char signature_args[][4] =
11048 #define SH_BLTIN_V2SI2 0
11049 { 4, 4 },
11050 #define SH_BLTIN_V4HI2 1
11051 { 4, 4 },
11052 #define SH_BLTIN_V2SI3 2
11053 { 4, 4, 4 },
11054 #define SH_BLTIN_V4HI3 3
11055 { 4, 4, 4 },
11056 #define SH_BLTIN_V8QI3 4
11057 { 4, 4, 4 },
11058 #define SH_BLTIN_MAC_HISI 5
11059 { 1, 4, 4, 1 },
11060 #define SH_BLTIN_SH_HI 6
11061 { 4, 4, 1 },
11062 #define SH_BLTIN_SH_SI 7
11063 { 4, 4, 1 },
11064 #define SH_BLTIN_V4HI2V2SI 8
11065 { 4, 4, 4 },
11066 #define SH_BLTIN_V4HI2V8QI 9
11067 { 4, 4, 4 },
11068 #define SH_BLTIN_SISF 10
11069 { 4, 2 },
11070 #define SH_BLTIN_LDUA_L 11
11071 { 2, 10 },
11072 #define SH_BLTIN_LDUA_Q 12
11073 { 1, 10 },
11074 #define SH_BLTIN_STUA_L 13
11075 { 0, 10, 2 },
11076 #define SH_BLTIN_STUA_Q 14
11077 { 0, 10, 1 },
11078 #define SH_BLTIN_LDUA_L64 15
11079 { 2, 9 },
11080 #define SH_BLTIN_LDUA_Q64 16
11081 { 1, 9 },
11082 #define SH_BLTIN_STUA_L64 17
11083 { 0, 9, 2 },
11084 #define SH_BLTIN_STUA_Q64 18
11085 { 0, 9, 1 },
11086 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11087 #define SH_BLTIN_2 19
11088 #define SH_BLTIN_SU 19
11089 { 1, 2 },
11090 #define SH_BLTIN_3 20
11091 #define SH_BLTIN_SUS 20
11092 { 2, 2, 1 },
11093 #define SH_BLTIN_PSSV 21
11094 { 0, 8, 2, 2 },
11095 #define SH_BLTIN_XXUU 22
11096 #define SH_BLTIN_UUUU 22
11097 { 1, 1, 1, 1 },
11098 #define SH_BLTIN_PV 23
11099 { 0, 8 },
11101 /* mcmv: operands considered unsigned. */
11102 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11103 /* mperm: control value considered unsigned int. */
11104 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11105 /* mshards_q: returns signed short. */
11106 /* nsb: takes long long arg, returns unsigned char. */
11107 static struct builtin_description bdesc[] =
11109 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11110 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11111 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11112 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11113 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11114 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11115 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11116 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11117 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11118 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11119 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11120 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11121 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11122 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11123 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11124 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11125 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11126 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11127 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11128 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11129 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11130 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11131 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11132 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11133 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11134 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11135 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11136 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11137 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11138 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11139 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11140 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11141 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11142 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11143 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11144 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11145 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11146 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11147 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11148 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11149 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11150 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11151 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11152 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11153 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11154 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11155 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11156 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11157 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11158 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11159 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11160 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11161 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11162 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11163 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11164 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11165 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11166 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11167 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11168 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11169 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11170 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
11171 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11172 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11173 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11174 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11175 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11176 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11177 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11178 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11179 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11180 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11181 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11182 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11183 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11184 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11185 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11186 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11187 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11188 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11189 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11190 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11191 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11192 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11195 static void
11196 sh_media_init_builtins (void)
11198 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11199 struct builtin_description *d;
11201 memset (shared, 0, sizeof shared);
11202 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11204 tree type, arg_type = 0;
11205 int signature = d->signature;
11206 int i;
11208 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11209 type = shared[signature];
11210 else
11212 int has_result = signature_args[signature][0] != 0;
11214 if ((signature_args[signature][1] & 8)
11215 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11216 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11217 continue;
11218 if (! TARGET_FPU_ANY
11219 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11220 continue;
11221 type = void_list_node;
11222 for (i = 3; ; i--)
11224 int arg = signature_args[signature][i];
11225 int opno = i - 1 + has_result;
11227 if (arg & 8)
11228 arg_type = ptr_type_node;
11229 else if (arg)
11230 arg_type = (*lang_hooks.types.type_for_mode)
11231 (insn_data[d->icode].operand[opno].mode,
11232 (arg & 1));
11233 else if (i)
11234 continue;
11235 else
11236 arg_type = void_type_node;
11237 if (i == 0)
11238 break;
11239 type = tree_cons (NULL_TREE, arg_type, type);
11241 type = build_function_type (arg_type, type);
11242 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11243 shared[signature] = type;
11245 d->fndecl =
11246 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11247 NULL, NULL_TREE);
11251 /* Returns the shmedia builtin decl for CODE. */
11253 static tree
11254 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11256 if (code >= ARRAY_SIZE (bdesc))
11257 return error_mark_node;
11259 return bdesc[code].fndecl;
11262 /* Implements target hook vector_mode_supported_p. */
11263 bool
11264 sh_vector_mode_supported_p (enum machine_mode mode)
11266 if (TARGET_FPU_ANY
11267 && ((mode == V2SFmode)
11268 || (mode == V4SFmode)
11269 || (mode == V16SFmode)))
11270 return true;
11272 else if (TARGET_SHMEDIA
11273 && ((mode == V8QImode)
11274 || (mode == V2HImode)
11275 || (mode == V4HImode)
11276 || (mode == V2SImode)))
11277 return true;
11279 return false;
11282 bool
11283 sh_frame_pointer_required (void)
11285 /* If needed override this in other tm.h files to cope with various OS
11286 lossage requiring a frame pointer. */
11287 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11288 return true;
11290 if (crtl->profile)
11291 return true;
11293 return false;
11296 /* Implements target hook dwarf_calling_convention. Return an enum
11297 of dwarf_calling_convention. */
11299 sh_dwarf_calling_convention (const_tree func)
11301 if (sh_attr_renesas_p (func))
11302 return DW_CC_GNU_renesas_sh;
11304 return DW_CC_normal;
11307 static void
11308 sh_init_builtins (void)
11310 if (TARGET_SHMEDIA)
11311 sh_media_init_builtins ();
11314 /* Returns the sh builtin decl for CODE. */
11316 static tree
11317 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11319 if (TARGET_SHMEDIA)
11320 return sh_media_builtin_decl (code, initialize_p);
11322 return error_mark_node;
11325 /* Expand an expression EXP that calls a built-in function,
11326 with result going to TARGET if that's convenient
11327 (and in mode MODE if that's convenient).
11328 SUBTARGET may be used as the target for computing one of EXP's operands.
11329 IGNORE is nonzero if the value is to be ignored. */
11331 static rtx
11332 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11333 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11335 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11336 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11337 const struct builtin_description *d = &bdesc[fcode];
11338 enum insn_code icode = d->icode;
11339 int signature = d->signature;
11340 enum machine_mode tmode = VOIDmode;
11341 int nop = 0, i;
11342 rtx op[4];
11343 rtx pat = 0;
11345 if (signature_args[signature][0])
11347 if (ignore)
11348 return 0;
11350 tmode = insn_data[icode].operand[0].mode;
11351 if (! target
11352 || GET_MODE (target) != tmode
11353 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11354 target = gen_reg_rtx (tmode);
11355 op[nop++] = target;
11357 else
11358 target = 0;
11360 for (i = 1; i <= 3; i++, nop++)
11362 tree arg;
11363 enum machine_mode opmode, argmode;
11364 tree optype;
11366 if (! signature_args[signature][i])
11367 break;
11368 arg = CALL_EXPR_ARG (exp, i - 1);
11369 if (arg == error_mark_node)
11370 return const0_rtx;
11371 if (signature_args[signature][i] & 8)
11373 opmode = ptr_mode;
11374 optype = ptr_type_node;
11376 else
11378 opmode = insn_data[icode].operand[nop].mode;
11379 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11381 argmode = TYPE_MODE (TREE_TYPE (arg));
11382 if (argmode != opmode)
11383 arg = build1 (NOP_EXPR, optype, arg);
11384 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11385 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11386 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11389 switch (nop)
11391 case 1:
11392 pat = (*insn_data[d->icode].genfun) (op[0]);
11393 break;
11394 case 2:
11395 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11396 break;
11397 case 3:
11398 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11399 break;
11400 case 4:
11401 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11402 break;
11403 default:
11404 gcc_unreachable ();
11406 if (! pat)
11407 return 0;
11408 emit_insn (pat);
11409 return target;
11412 void
11413 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11415 rtx sel0 = const0_rtx;
11416 rtx sel1 = const1_rtx;
11417 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11418 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11420 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11421 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11424 void
11425 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11427 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11429 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11430 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11433 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11434 We can allow any mode in any general register. The special registers
11435 only allow SImode. Don't allow any mode in the PR.
11437 We cannot hold DCmode values in the XD registers because alter_reg
11438 handles subregs of them incorrectly. We could work around this by
11439 spacing the XD registers like the DR registers, but this would require
11440 additional memory in every compilation to hold larger register vectors.
11441 We could hold SFmode / SCmode values in XD registers, but that
11442 would require a tertiary reload when reloading from / to memory,
11443 and a secondary reload to reload from / to general regs; that
11444 seems to be a loosing proposition.
11446 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11447 it won't be ferried through GP registers first. */
11449 bool
11450 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11452 if (SPECIAL_REGISTER_P (regno))
11453 return mode == SImode;
11455 if (regno == FPUL_REG)
11456 return (mode == SImode || mode == SFmode);
11458 if (FP_REGISTER_P (regno) && mode == SFmode)
11459 return true;
11461 if (mode == V2SFmode)
11463 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11464 || GENERAL_REGISTER_P (regno)))
11465 return true;
11466 else
11467 return false;
11470 if (mode == V4SFmode)
11472 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11473 || GENERAL_REGISTER_P (regno))
11474 return true;
11475 else
11476 return false;
11479 if (mode == V16SFmode)
11481 if (TARGET_SHMEDIA)
11483 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11484 return true;
11485 else
11486 return false;
11488 else
11489 return regno == FIRST_XD_REG;
11492 if (FP_REGISTER_P (regno))
11494 if (mode == SFmode
11495 || mode == SImode
11496 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11497 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11498 || mode == DCmode
11499 || (TARGET_SHMEDIA
11500 && (mode == DFmode || mode == DImode
11501 || mode == V2SFmode || mode == TImode)))
11502 && ((regno - FIRST_FP_REG) & 1) == 0)
11503 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11504 && ((regno - FIRST_FP_REG) & 3) == 0))
11505 return true;
11506 else
11507 return false;
11510 if (XD_REGISTER_P (regno))
11511 return mode == DFmode;
11513 if (TARGET_REGISTER_P (regno))
11514 return (mode == DImode || mode == SImode || mode == PDImode);
11516 if (regno == PR_REG)
11517 return mode == SImode;
11519 if (regno == FPSCR_REG)
11520 return mode == PSImode;
11522 /* FIXME. This works around PR target/37633 for -O0. */
11523 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11525 unsigned int n = GET_MODE_SIZE (mode) / 8;
11527 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11528 && regno <= FIRST_GENERAL_REG + 14)
11529 return false;
11532 return true;
11535 /* Return the class of registers for which a mode change from FROM to TO
11536 is invalid. */
11537 bool
11538 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11539 enum reg_class rclass)
11541 /* We want to enable the use of SUBREGs as a means to
11542 VEC_SELECT a single element of a vector. */
11543 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11544 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11546 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11548 if (TARGET_LITTLE_ENDIAN)
11550 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11551 return reg_classes_intersect_p (DF_REGS, rclass);
11553 else
11555 if (GET_MODE_SIZE (from) < 8)
11556 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11559 return 0;
11562 /* Return true if registers in machine mode MODE will likely be
11563 allocated to registers in small register classes. */
11565 bool
11566 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11568 return (! TARGET_SHMEDIA);
11571 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11572 that label is used. */
11574 void
11575 sh_mark_label (rtx address, int nuses)
11577 if (GOTOFF_P (address))
11579 /* Extract the label or symbol. */
11580 address = XEXP (address, 0);
11581 if (GET_CODE (address) == PLUS)
11582 address = XEXP (address, 0);
11583 address = XVECEXP (address, 0, 0);
11585 if (GET_CODE (address) == LABEL_REF
11586 && LABEL_P (XEXP (address, 0)))
11587 LABEL_NUSES (XEXP (address, 0)) += nuses;
11590 /* Compute extra cost of moving data between one register class
11591 and another. */
11593 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11594 uses this information. Hence, the general register <-> floating point
11595 register information here is not used for SFmode. */
11597 static int
11598 sh_register_move_cost (enum machine_mode mode,
11599 reg_class_t srcclass, reg_class_t dstclass)
11601 if (dstclass == T_REGS || dstclass == PR_REGS)
11602 return 10;
11604 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11605 return 4;
11607 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11608 && REGCLASS_HAS_FP_REG (srcclass)
11609 && REGCLASS_HAS_FP_REG (dstclass))
11610 return 4;
11612 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11613 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11615 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11616 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11617 return 9;
11619 if ((REGCLASS_HAS_FP_REG (dstclass)
11620 && REGCLASS_HAS_GENERAL_REG (srcclass))
11621 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11622 && REGCLASS_HAS_FP_REG (srcclass)))
11623 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11624 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11626 if ((dstclass == FPUL_REGS
11627 && REGCLASS_HAS_GENERAL_REG (srcclass))
11628 || (srcclass == FPUL_REGS
11629 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11630 return 5;
11632 if ((dstclass == FPUL_REGS
11633 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11634 || (srcclass == FPUL_REGS
11635 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11636 return 7;
11638 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11639 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11640 return 20;
11642 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11643 if (TARGET_SHMEDIA
11644 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11646 if (sh_gettrcost >= 0)
11647 return sh_gettrcost;
11648 else if (!TARGET_PT_FIXED)
11649 return 100;
11652 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11653 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11654 return 4;
11656 if (TARGET_SHMEDIA
11657 || (TARGET_FMOVD
11658 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11659 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11660 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11662 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11665 static rtx emit_load_ptr (rtx, rtx);
11667 static rtx
11668 emit_load_ptr (rtx reg, rtx addr)
11670 rtx mem = gen_const_mem (ptr_mode, addr);
11672 if (Pmode != ptr_mode)
11673 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11674 return emit_move_insn (reg, mem);
11677 static void
11678 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11679 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11680 tree function)
11682 CUMULATIVE_ARGS cum;
11683 int structure_value_byref = 0;
11684 rtx this_rtx, this_value, sibcall, insns, funexp;
11685 tree funtype = TREE_TYPE (function);
11686 int simple_add = CONST_OK_FOR_ADD (delta);
11687 int did_load = 0;
11688 rtx scratch0, scratch1, scratch2;
11689 unsigned i;
11691 reload_completed = 1;
11692 epilogue_completed = 1;
11693 current_function_uses_only_leaf_regs = 1;
11695 emit_note (NOTE_INSN_PROLOGUE_END);
11697 /* Find the "this" pointer. We have such a wide range of ABIs for the
11698 SH that it's best to do this completely machine independently.
11699 "this" is passed as first argument, unless a structure return pointer
11700 comes first, in which case "this" comes second. */
11701 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11702 #ifndef PCC_STATIC_STRUCT_RETURN
11703 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11704 structure_value_byref = 1;
11705 #endif /* not PCC_STATIC_STRUCT_RETURN */
11706 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11708 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11710 sh_function_arg_advance (&cum, Pmode, ptype, true);
11712 this_rtx = sh_function_arg (&cum, Pmode, ptr_type_node, true);
11714 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11715 static chain pointer (even if you can't have nested virtual functions
11716 right now, someone might implement them sometime), and the rest of the
11717 registers are used for argument passing, are callee-saved, or reserved. */
11718 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11719 -ffixed-reg has been used. */
11720 if (! call_used_regs[0] || fixed_regs[0])
11721 error ("r0 needs to be available as a call-clobbered register");
11722 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11723 if (! TARGET_SH5)
11725 if (call_used_regs[1] && ! fixed_regs[1])
11726 scratch1 = gen_rtx_REG (ptr_mode, 1);
11727 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11728 pointing where to return struct values. */
11729 if (call_used_regs[3] && ! fixed_regs[3])
11730 scratch2 = gen_rtx_REG (Pmode, 3);
11732 else if (TARGET_SHMEDIA)
11734 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11735 if (i != REGNO (scratch0) &&
11736 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11738 scratch1 = gen_rtx_REG (ptr_mode, i);
11739 break;
11741 if (scratch1 == scratch0)
11742 error ("need a second call-clobbered general purpose register");
11743 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11744 if (call_used_regs[i] && ! fixed_regs[i])
11746 scratch2 = gen_rtx_REG (Pmode, i);
11747 break;
11749 if (scratch2 == scratch0)
11750 error ("need a call-clobbered target register");
11753 this_value = plus_constant (this_rtx, delta);
11754 if (vcall_offset
11755 && (simple_add || scratch0 != scratch1)
11756 && strict_memory_address_p (ptr_mode, this_value))
11758 emit_load_ptr (scratch0, this_value);
11759 did_load = 1;
11762 if (!delta)
11763 ; /* Do nothing. */
11764 else if (simple_add)
11765 emit_move_insn (this_rtx, this_value);
11766 else
11768 emit_move_insn (scratch1, GEN_INT (delta));
11769 emit_insn (gen_add2_insn (this_rtx, scratch1));
11772 if (vcall_offset)
11774 rtx offset_addr;
11776 if (!did_load)
11777 emit_load_ptr (scratch0, this_rtx);
11779 offset_addr = plus_constant (scratch0, vcall_offset);
11780 if (strict_memory_address_p (ptr_mode, offset_addr))
11781 ; /* Do nothing. */
11782 else if (! TARGET_SH5 && scratch0 != scratch1)
11784 /* scratch0 != scratch1, and we have indexed loads. Get better
11785 schedule by loading the offset into r1 and using an indexed
11786 load - then the load of r1 can issue before the load from
11787 (this_rtx + delta) finishes. */
11788 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11789 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11791 else if (CONST_OK_FOR_ADD (vcall_offset))
11793 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11794 offset_addr = scratch0;
11796 else if (scratch0 != scratch1)
11798 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11799 emit_insn (gen_add2_insn (scratch0, scratch1));
11800 offset_addr = scratch0;
11802 else
11803 gcc_unreachable (); /* FIXME */
11804 emit_load_ptr (scratch0, offset_addr);
11806 if (Pmode != ptr_mode)
11807 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11808 emit_insn (gen_add2_insn (this_rtx, scratch0));
11811 /* Generate a tail call to the target function. */
11812 if (! TREE_USED (function))
11814 assemble_external (function);
11815 TREE_USED (function) = 1;
11817 funexp = XEXP (DECL_RTL (function), 0);
11818 /* If the function is overridden, so is the thunk, hence we don't
11819 need GOT addressing even if this is a public symbol. */
11820 #if 0
11821 if (TARGET_SH1 && ! flag_weak)
11822 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11823 else
11824 #endif
11825 if (TARGET_SH2 && flag_pic)
11827 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11828 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11830 else
11832 if (TARGET_SHMEDIA && flag_pic)
11834 funexp = gen_sym2PIC (funexp);
11835 PUT_MODE (funexp, Pmode);
11837 emit_move_insn (scratch2, funexp);
11838 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11839 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11841 sibcall = emit_call_insn (sibcall);
11842 SIBLING_CALL_P (sibcall) = 1;
11843 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11844 emit_barrier ();
11846 /* Run just enough of rest_of_compilation to do scheduling and get
11847 the insns emitted. Note that use_thunk calls
11848 assemble_start_function and assemble_end_function. */
11850 insn_locators_alloc ();
11851 insns = get_insns ();
11853 if (optimize > 0)
11855 if (! cfun->cfg)
11856 init_flow (cfun);
11857 split_all_insns_noflow ();
11860 sh_reorg ();
11862 if (optimize > 0 && flag_delayed_branch)
11863 dbr_schedule (insns);
11865 shorten_branches (insns);
11866 final_start_function (insns, file, 1);
11867 final (insns, file, 1);
11868 final_end_function ();
11870 reload_completed = 0;
11871 epilogue_completed = 0;
11875 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11877 rtx sym;
11879 /* If this is not an ordinary function, the name usually comes from a
11880 string literal or an sprintf buffer. Make sure we use the same
11881 string consistently, so that cse will be able to unify address loads. */
11882 if (kind != FUNCTION_ORDINARY)
11883 name = IDENTIFIER_POINTER (get_identifier (name));
11884 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11885 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11886 if (flag_pic)
11887 switch (kind)
11889 case FUNCTION_ORDINARY:
11890 break;
11891 case SFUNC_GOT:
11893 rtx reg = target ? target : gen_reg_rtx (Pmode);
11895 emit_insn (gen_symGOT2reg (reg, sym));
11896 sym = reg;
11897 break;
11899 case SFUNC_STATIC:
11901 /* ??? To allow cse to work, we use GOTOFF relocations.
11902 we could add combiner patterns to transform this into
11903 straight pc-relative calls with sym2PIC / bsrf when
11904 label load and function call are still 1:1 and in the
11905 same basic block during combine. */
11906 rtx reg = target ? target : gen_reg_rtx (Pmode);
11908 emit_insn (gen_symGOTOFF2reg (reg, sym));
11909 sym = reg;
11910 break;
11913 if (target && sym != target)
11915 emit_move_insn (target, sym);
11916 return target;
11918 return sym;
11921 /* Find the number of a general purpose register in S. */
11922 static int
11923 scavenge_reg (HARD_REG_SET *s)
11925 int r;
11926 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11927 if (TEST_HARD_REG_BIT (*s, r))
11928 return r;
11929 return -1;
11933 sh_get_pr_initial_val (void)
11935 rtx val;
11937 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11938 PR register on SHcompact, because it might be clobbered by the prologue.
11939 We check first if that is known to be the case. */
11940 if (TARGET_SHCOMPACT
11941 && ((crtl->args.info.call_cookie
11942 & ~ CALL_COOKIE_RET_TRAMP (1))
11943 || crtl->saves_all_registers))
11944 return gen_frame_mem (SImode, return_address_pointer_rtx);
11946 /* If we haven't finished rtl generation, there might be a nonlocal label
11947 that we haven't seen yet.
11948 ??? get_hard_reg_initial_val fails if it is called after register
11949 allocation has started, unless it has been called before for the
11950 same register. And even then, we end in trouble if we didn't use
11951 the register in the same basic block before. So call
11952 get_hard_reg_initial_val now and wrap it in an unspec if we might
11953 need to replace it. */
11954 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11955 combine can put the pseudo returned by get_hard_reg_initial_val into
11956 instructions that need a general purpose registers, which will fail to
11957 be recognized when the pseudo becomes allocated to PR. */
11959 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11960 if (TARGET_SH1)
11961 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11962 return val;
11966 sh_expand_t_scc (rtx operands[])
11968 enum rtx_code code = GET_CODE (operands[1]);
11969 rtx target = operands[0];
11970 rtx op0 = operands[2];
11971 rtx op1 = operands[3];
11972 rtx result = target;
11973 HOST_WIDE_INT val;
11975 if (!REG_P (op0) || REGNO (op0) != T_REG
11976 || !CONST_INT_P (op1))
11977 return 0;
11978 if (!REG_P (result))
11979 result = gen_reg_rtx (SImode);
11980 val = INTVAL (op1);
11981 if ((code == EQ && val == 1) || (code == NE && val == 0))
11982 emit_insn (gen_movt (result));
11983 else if (TARGET_SH2A && ((code == EQ && val == 0)
11984 || (code == NE && val == 1)))
11985 emit_insn (gen_xorsi3_movrt (result));
11986 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11988 emit_clobber (result);
11989 emit_insn (gen_subc (result, result, result));
11990 emit_insn (gen_addsi3 (result, result, const1_rtx));
11992 else if (code == EQ || code == NE)
11993 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11994 else
11995 return 0;
11996 if (result != target)
11997 emit_move_insn (target, result);
11998 return 1;
12001 /* INSN is an sfunc; return the rtx that describes the address used. */
12002 static rtx
12003 extract_sfunc_addr (rtx insn)
12005 rtx pattern, part = NULL_RTX;
12006 int len, i;
12008 pattern = PATTERN (insn);
12009 len = XVECLEN (pattern, 0);
12010 for (i = 0; i < len; i++)
12012 part = XVECEXP (pattern, 0, i);
12013 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12014 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12015 return XEXP (part, 0);
12017 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12018 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12021 /* Verify that the register in use_sfunc_addr still agrees with the address
12022 used in the sfunc. This prevents fill_slots_from_thread from changing
12023 use_sfunc_addr.
12024 INSN is the use_sfunc_addr instruction, and REG is the register it
12025 guards. */
12027 check_use_sfunc_addr (rtx insn, rtx reg)
12029 /* Search for the sfunc. It should really come right after INSN. */
12030 while ((insn = NEXT_INSN (insn)))
12032 if (LABEL_P (insn) || JUMP_P (insn))
12033 break;
12034 if (! INSN_P (insn))
12035 continue;
12037 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12038 insn = XVECEXP (PATTERN (insn), 0, 0);
12039 if (GET_CODE (PATTERN (insn)) != PARALLEL
12040 || get_attr_type (insn) != TYPE_SFUNC)
12041 continue;
12042 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12044 gcc_unreachable ();
12047 /* This function returns a constant rtx that represents pi / 2**15 in
12048 SFmode. it's used to scale SFmode angles, in radians, to a
12049 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
12050 maps to 0x10000). */
12052 static GTY(()) rtx sh_fsca_sf2int_rtx;
12055 sh_fsca_sf2int (void)
12057 if (! sh_fsca_sf2int_rtx)
12059 REAL_VALUE_TYPE rv;
12061 real_from_string (&rv, "10430.378350470453");
12062 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12065 return sh_fsca_sf2int_rtx;
12068 /* This function returns a constant rtx that represents pi / 2**15 in
12069 DFmode. it's used to scale DFmode angles, in radians, to a
12070 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
12071 maps to 0x10000). */
12073 static GTY(()) rtx sh_fsca_df2int_rtx;
12076 sh_fsca_df2int (void)
12078 if (! sh_fsca_df2int_rtx)
12080 REAL_VALUE_TYPE rv;
12082 real_from_string (&rv, "10430.378350470453");
12083 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
12086 return sh_fsca_df2int_rtx;
12089 /* This function returns a constant rtx that represents 2**15 / pi in
12090 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
12091 of a full circle back to a SFmode value, i.e., 0x10000 maps to
12092 2*pi). */
12094 static GTY(()) rtx sh_fsca_int2sf_rtx;
12097 sh_fsca_int2sf (void)
12099 if (! sh_fsca_int2sf_rtx)
12101 REAL_VALUE_TYPE rv;
12103 real_from_string (&rv, "9.587379924285257e-5");
12104 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12107 return sh_fsca_int2sf_rtx;
12110 /* Initialize the CUMULATIVE_ARGS structure. */
12112 void
12113 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12114 tree fntype,
12115 rtx libname ATTRIBUTE_UNUSED,
12116 tree fndecl,
12117 signed int n_named_args,
12118 enum machine_mode mode)
12120 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12121 pcum->free_single_fp_reg = 0;
12122 pcum->stack_regs = 0;
12123 pcum->byref_regs = 0;
12124 pcum->byref = 0;
12125 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12127 /* XXX - Should we check TARGET_HITACHI here ??? */
12128 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12130 if (fntype)
12132 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12133 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12134 pcum->prototype_p = prototype_p (fntype);
12135 pcum->arg_count [(int) SH_ARG_INT]
12136 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12138 pcum->call_cookie
12139 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12140 && pcum->arg_count [(int) SH_ARG_INT] == 0
12141 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12142 ? int_size_in_bytes (TREE_TYPE (fntype))
12143 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12144 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12145 == FIRST_RET_REG));
12147 else
12149 pcum->arg_count [(int) SH_ARG_INT] = 0;
12150 pcum->prototype_p = FALSE;
12151 if (mode != VOIDmode)
12153 pcum->call_cookie =
12154 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12155 && GET_MODE_SIZE (mode) > 4
12156 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12158 /* If the default ABI is the Renesas ABI then all library
12159 calls must assume that the library will be using the
12160 Renesas ABI. So if the function would return its result
12161 in memory then we must force the address of this memory
12162 block onto the stack. Ideally we would like to call
12163 targetm.calls.return_in_memory() here but we do not have
12164 the TYPE or the FNDECL available so we synthesize the
12165 contents of that function as best we can. */
12166 pcum->force_mem =
12167 (TARGET_DEFAULT & MASK_HITACHI)
12168 && (mode == BLKmode
12169 || (GET_MODE_SIZE (mode) > 4
12170 && !(mode == DFmode
12171 && TARGET_FPU_DOUBLE)));
12173 else
12175 pcum->call_cookie = 0;
12176 pcum->force_mem = FALSE;
12181 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12182 not enter into CONST_DOUBLE for the replace.
12184 Note that copying is not done so X must not be shared unless all copies
12185 are to be modified.
12187 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12188 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12189 replacements[n*2+1] - and that we take mode changes into account.
12191 If a replacement is ambiguous, return NULL_RTX.
12193 If MODIFY is zero, don't modify any rtl in place,
12194 just return zero or nonzero for failure / success. */
12197 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12199 int i, j;
12200 const char *fmt;
12202 /* The following prevents loops occurrence when we change MEM in
12203 CONST_DOUBLE onto the same CONST_DOUBLE. */
12204 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
12205 return x;
12207 for (i = n_replacements - 1; i >= 0 ; i--)
12208 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12209 return replacements[i*2+1];
12211 /* Allow this function to make replacements in EXPR_LISTs. */
12212 if (x == 0)
12213 return 0;
12215 if (GET_CODE (x) == SUBREG)
12217 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12218 n_replacements, modify);
12220 if (CONST_INT_P (new_rtx))
12222 x = simplify_subreg (GET_MODE (x), new_rtx,
12223 GET_MODE (SUBREG_REG (x)),
12224 SUBREG_BYTE (x));
12225 if (! x)
12226 abort ();
12228 else if (modify)
12229 SUBREG_REG (x) = new_rtx;
12231 return x;
12233 else if (REG_P (x))
12235 unsigned regno = REGNO (x);
12236 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12237 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12238 rtx result = NULL_RTX;
12240 for (i = n_replacements - 1; i >= 0; i--)
12242 rtx from = replacements[i*2];
12243 rtx to = replacements[i*2+1];
12244 unsigned from_regno, from_nregs, to_regno, new_regno;
12246 if (!REG_P (from))
12247 continue;
12248 from_regno = REGNO (from);
12249 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12250 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12251 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12253 if (regno < from_regno
12254 || regno + nregs > from_regno + nregs
12255 || !REG_P (to)
12256 || result)
12257 return NULL_RTX;
12258 to_regno = REGNO (to);
12259 if (to_regno < FIRST_PSEUDO_REGISTER)
12261 new_regno = regno + to_regno - from_regno;
12262 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12263 != nregs)
12264 return NULL_RTX;
12265 result = gen_rtx_REG (GET_MODE (x), new_regno);
12267 else if (GET_MODE (x) <= GET_MODE (to))
12268 result = gen_lowpart_common (GET_MODE (x), to);
12269 else
12270 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12273 return result ? result : x;
12275 else if (GET_CODE (x) == ZERO_EXTEND)
12277 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12278 n_replacements, modify);
12280 if (CONST_INT_P (new_rtx))
12282 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12283 new_rtx, GET_MODE (XEXP (x, 0)));
12284 if (! x)
12285 abort ();
12287 else if (modify)
12288 XEXP (x, 0) = new_rtx;
12290 return x;
12293 fmt = GET_RTX_FORMAT (GET_CODE (x));
12294 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12296 rtx new_rtx;
12298 if (fmt[i] == 'e')
12300 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12301 n_replacements, modify);
12302 if (!new_rtx)
12303 return NULL_RTX;
12304 if (modify)
12305 XEXP (x, i) = new_rtx;
12307 else if (fmt[i] == 'E')
12308 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12310 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12311 n_replacements, modify);
12312 if (!new_rtx)
12313 return NULL_RTX;
12314 if (modify)
12315 XVECEXP (x, i, j) = new_rtx;
12319 return x;
12323 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12325 enum rtx_code code = TRUNCATE;
12327 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12329 rtx inner = XEXP (x, 0);
12330 enum machine_mode inner_mode = GET_MODE (inner);
12332 if (inner_mode == mode)
12333 return inner;
12334 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12335 x = inner;
12336 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12337 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12339 code = GET_CODE (x);
12340 x = inner;
12343 return gen_rtx_fmt_e (code, mode, x);
12346 /* called via for_each_rtx after reload, to clean up truncates of
12347 registers that span multiple actual hard registers. */
12349 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12351 rtx x = *p, reg;
12353 if (GET_CODE (x) != TRUNCATE)
12354 return 0;
12355 reg = XEXP (x, 0);
12356 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12358 enum machine_mode reg_mode = GET_MODE (reg);
12359 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12360 subreg_lowpart_offset (DImode, reg_mode));
12361 *(int*) n_changes += 1;
12362 return -1;
12364 return 0;
12367 /* Load and store depend on the highpart of the address. However,
12368 set_attr_alternative does not give well-defined results before reload,
12369 so we must look at the rtl ourselves to see if any of the feeding
12370 registers is used in a memref. */
12372 /* Called by sh_contains_memref_p via for_each_rtx. */
12373 static int
12374 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12376 return (MEM_P (*loc));
12379 /* Return nonzero iff INSN contains a MEM. */
12381 sh_contains_memref_p (rtx insn)
12383 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12386 /* Return nonzero iff INSN loads a banked register. */
12388 sh_loads_bankedreg_p (rtx insn)
12390 if (GET_CODE (PATTERN (insn)) == SET)
12392 rtx op = SET_DEST (PATTERN(insn));
12393 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12394 return 1;
12397 return 0;
12400 /* FNADDR is the MEM expression from a call expander. Return an address
12401 to use in an SHmedia insn pattern. */
12403 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12405 int is_sym;
12407 fnaddr = XEXP (fnaddr, 0);
12408 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12409 if (flag_pic && is_sym)
12411 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12413 rtx reg = gen_reg_rtx (Pmode);
12415 /* We must not use GOTPLT for sibcalls, because PIC_REG
12416 must be restored before the PLT code gets to run. */
12417 if (is_sibcall)
12418 emit_insn (gen_symGOT2reg (reg, fnaddr));
12419 else
12420 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12421 fnaddr = reg;
12423 else
12425 fnaddr = gen_sym2PIC (fnaddr);
12426 PUT_MODE (fnaddr, Pmode);
12429 /* If ptabs might trap, make this visible to the rest of the compiler.
12430 We generally assume that symbols pertain to valid locations, but
12431 it is possible to generate invalid symbols with asm or linker tricks.
12432 In a list of functions where each returns its successor, an invalid
12433 symbol might denote an empty list. */
12434 if (!TARGET_PT_FIXED
12435 && (!is_sym || TARGET_INVALID_SYMBOLS)
12436 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12438 rtx tr = gen_reg_rtx (PDImode);
12440 emit_insn (gen_ptabs (tr, fnaddr));
12441 fnaddr = tr;
12443 else if (! target_reg_operand (fnaddr, Pmode))
12444 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12445 return fnaddr;
12448 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
12450 static reg_class_t
12451 sh_preferred_reload_class (rtx x, reg_class_t rclass)
12453 if (rclass == NO_REGS
12454 && TARGET_SHMEDIA
12455 && (CONST_DOUBLE_P (x)
12456 || GET_CODE (x) == SYMBOL_REF
12457 || PIC_ADDR_P (x)))
12458 return GENERAL_REGS;
12460 return rclass;
12463 /* Implement TARGET_SECONDARY_RELOAD. */
12465 static reg_class_t
12466 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12467 enum machine_mode mode, secondary_reload_info *sri)
12469 enum reg_class rclass = (enum reg_class) rclass_i;
12471 if (in_p)
12473 if (REGCLASS_HAS_FP_REG (rclass)
12474 && ! TARGET_SHMEDIA
12475 && immediate_operand ((x), mode)
12476 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12477 && mode == SFmode && fldi_ok ()))
12478 switch (mode)
12480 case SFmode:
12481 sri->icode = CODE_FOR_reload_insf__frn;
12482 return NO_REGS;
12483 case DFmode:
12484 sri->icode = CODE_FOR_reload_indf__frn;
12485 return NO_REGS;
12486 case SImode:
12487 /* ??? If we knew that we are in the appropriate mode -
12488 single precision - we could use a reload pattern directly. */
12489 return FPUL_REGS;
12490 default:
12491 abort ();
12493 if (rclass == FPUL_REGS
12494 && ((REG_P (x)
12495 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12496 || REGNO (x) == T_REG))
12497 || GET_CODE (x) == PLUS))
12498 return GENERAL_REGS;
12499 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12501 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12502 return GENERAL_REGS;
12503 else if (mode == SFmode)
12504 return FP_REGS;
12505 sri->icode = CODE_FOR_reload_insi__i_fpul;
12506 return NO_REGS;
12508 if (rclass == FPSCR_REGS
12509 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12510 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12511 return GENERAL_REGS;
12512 if (REGCLASS_HAS_FP_REG (rclass)
12513 && TARGET_SHMEDIA
12514 && immediate_operand (x, mode)
12515 && x != CONST0_RTX (GET_MODE (x))
12516 && GET_MODE (x) != V4SFmode)
12517 return GENERAL_REGS;
12518 if ((mode == QImode || mode == HImode)
12519 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12521 sri->icode = ((mode == QImode)
12522 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12523 return NO_REGS;
12525 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12526 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12527 return TARGET_REGS;
12528 } /* end of input-only processing. */
12530 if (((REGCLASS_HAS_FP_REG (rclass)
12531 && (REG_P (x)
12532 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12533 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12534 && TARGET_FMOVD))))
12535 || (REGCLASS_HAS_GENERAL_REG (rclass)
12536 && REG_P (x)
12537 && FP_REGISTER_P (REGNO (x))))
12538 && ! TARGET_SHMEDIA
12539 && (mode == SFmode || mode == SImode))
12540 return FPUL_REGS;
12541 if ((rclass == FPUL_REGS
12542 || (REGCLASS_HAS_FP_REG (rclass)
12543 && ! TARGET_SHMEDIA && mode == SImode))
12544 && (MEM_P (x)
12545 || (REG_P (x)
12546 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12547 || REGNO (x) == T_REG
12548 || system_reg_operand (x, VOIDmode)))))
12550 if (rclass == FPUL_REGS)
12551 return GENERAL_REGS;
12552 return FPUL_REGS;
12554 if ((rclass == TARGET_REGS
12555 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12556 && !satisfies_constraint_Csy (x)
12557 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12558 return GENERAL_REGS;
12559 if ((rclass == MAC_REGS || rclass == PR_REGS)
12560 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12561 && rclass != REGNO_REG_CLASS (REGNO (x)))
12562 return GENERAL_REGS;
12563 if (rclass != GENERAL_REGS && REG_P (x)
12564 && TARGET_REGISTER_P (REGNO (x)))
12565 return GENERAL_REGS;
12566 return NO_REGS;
12569 static void
12570 sh_conditional_register_usage (void)
12572 int regno;
12573 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
12574 if (! VALID_REGISTER_P (regno))
12575 fixed_regs[regno] = call_used_regs[regno] = 1;
12576 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
12577 if (TARGET_SH5)
12579 call_used_regs[FIRST_GENERAL_REG + 8]
12580 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
12581 call_really_used_regs[FIRST_GENERAL_REG + 8]
12582 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
12584 if (TARGET_SHMEDIA)
12586 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
12587 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
12588 regno_reg_class[FIRST_FP_REG] = FP_REGS;
12590 if (flag_pic)
12592 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12593 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12595 /* Renesas saves and restores mac registers on call. */
12596 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
12598 call_really_used_regs[MACH_REG] = 0;
12599 call_really_used_regs[MACL_REG] = 0;
12601 for (regno = FIRST_FP_REG + (TARGET_LITTLE_ENDIAN != 0);
12602 regno <= LAST_FP_REG; regno += 2)
12603 SET_HARD_REG_BIT (reg_class_contents[DF_HI_REGS], regno);
12604 if (TARGET_SHMEDIA)
12606 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
12607 if (! fixed_regs[regno] && call_really_used_regs[regno])
12608 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
12610 else
12611 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
12612 if (! fixed_regs[regno] && call_really_used_regs[regno])
12613 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
12617 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12619 #include "gt-sh.h"