* target.h (enum opt_levels, struct default_options): New.
[official-gcc.git] / gcc / config / sh / sh.c
blob40587b5ab84e766915ceb224e58c2e9576b4a2d4
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "reload.h"
35 #include "function.h"
36 #include "regs.h"
37 #include "hard-reg-set.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
41 #include "toplev.h"
42 #include "recog.h"
43 #include "integrate.h"
44 #include "dwarf2.h"
45 #include "tm_p.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "df.h"
51 #include "cfglayout.h"
52 #include "intl.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "ggc.h"
56 #include "gimple.h"
57 #include "cfgloop.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
75 they may be. */
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
115 reg number. */
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void sh_option_override (void);
186 static void sh_option_init_struct (struct gcc_options *);
187 static void sh_option_default_params (void);
188 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
189 static rtx frame_insn (rtx);
190 static rtx push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET *, int);
193 static int calc_live_regs (HARD_REG_SET *);
194 static HOST_WIDE_INT rounded_frame_size (int);
195 static bool sh_frame_pointer_required (void);
196 static rtx mark_constant_pool_use (rtx);
197 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
198 static tree sh_handle_resbank_handler_attribute (tree *, tree,
199 tree, int, bool *);
200 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
201 tree, int, bool *);
202 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
203 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
204 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
205 static void sh_print_operand (FILE *, rtx, int);
206 static void sh_print_operand_address (FILE *, rtx);
207 static bool sh_print_operand_punct_valid_p (unsigned char code);
208 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
209 static void sh_insert_attributes (tree, tree *);
210 static const char *sh_check_pch_target_flags (int);
211 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
212 static int sh_adjust_cost (rtx, rtx, rtx, int);
213 static int sh_issue_rate (void);
214 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
215 static short find_set_regmode_weight (rtx, enum machine_mode);
216 static short find_insn_regmode_weight (rtx, enum machine_mode);
217 static void find_regmode_weight (basic_block, enum machine_mode);
218 static int find_r0_life_regions (basic_block);
219 static void sh_md_init_global (FILE *, int, int);
220 static void sh_md_finish_global (FILE *, int);
221 static int rank_for_reorder (const void *, const void *);
222 static void swap_reorder (rtx *, int);
223 static void ready_reorder (rtx *, int);
224 static short high_pressure (enum machine_mode);
225 static int sh_reorder (FILE *, int, rtx *, int *, int);
226 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
227 static void sh_md_init (FILE *, int, int);
228 static int sh_variable_issue (FILE *, int, rtx, int);
230 static bool sh_function_ok_for_sibcall (tree, tree);
232 static bool sh_cannot_modify_jumps_p (void);
233 static reg_class_t sh_target_reg_class (void);
234 static bool sh_optimize_target_register_callee_saved (bool);
235 static bool sh_ms_bitfield_layout_p (const_tree);
237 static void sh_init_builtins (void);
238 static tree sh_builtin_decl (unsigned, bool);
239 static void sh_media_init_builtins (void);
240 static tree sh_media_builtin_decl (unsigned, bool);
241 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
242 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
243 static void sh_file_start (void);
244 static int flow_dependent_p (rtx, rtx);
245 static void flow_dependent_p_1 (rtx, const_rtx, void *);
246 static int shiftcosts (rtx);
247 static int andcosts (rtx);
248 static int addsubcosts (rtx);
249 static int multcosts (rtx);
250 static bool unspec_caller_rtx_p (rtx);
251 static bool sh_cannot_copy_insn_p (rtx);
252 static bool sh_rtx_costs (rtx, int, int, int *, bool);
253 static int sh_address_cost (rtx, bool);
254 static int sh_pr_n_sets (void);
255 static rtx sh_allocate_initial_value (rtx);
256 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
257 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
258 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
259 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
260 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
261 static int scavenge_reg (HARD_REG_SET *s);
262 struct save_schedule_s;
263 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
264 struct save_schedule_s *, int);
266 static rtx sh_struct_value_rtx (tree, int);
267 static rtx sh_function_value (const_tree, const_tree, bool);
268 static bool sh_function_value_regno_p (const unsigned int);
269 static rtx sh_libcall_value (enum machine_mode, const_rtx);
270 static bool sh_return_in_memory (const_tree, const_tree);
271 static rtx sh_builtin_saveregs (void);
272 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
273 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
274 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
275 static tree sh_build_builtin_va_list (void);
276 static void sh_va_start (tree, rtx);
277 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
278 static bool sh_promote_prototypes (const_tree);
279 static enum machine_mode sh_promote_function_mode (const_tree type,
280 enum machine_mode,
281 int *punsignedp,
282 const_tree funtype,
283 int for_return);
284 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
285 const_tree, bool);
286 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
287 const_tree, bool);
288 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
289 tree, bool);
290 static void sh_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
291 const_tree, bool);
292 static rtx sh_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
293 const_tree, bool);
294 static bool sh_scalar_mode_supported_p (enum machine_mode);
295 static int sh_dwarf_calling_convention (const_tree);
296 static void sh_encode_section_info (tree, rtx, int);
297 static int sh2a_function_vector_p (tree);
298 static void sh_trampoline_init (rtx, tree, rtx);
299 static rtx sh_trampoline_adjust_address (rtx);
301 static const struct attribute_spec sh_attribute_table[] =
303 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
304 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
305 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
306 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
307 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
308 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
309 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
310 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
311 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
312 #ifdef SYMBIAN
313 /* Symbian support adds three new attributes:
314 dllexport - for exporting a function/variable that will live in a dll
315 dllimport - for importing a function/variable from a dll
317 Microsoft allows multiple declspecs in one __declspec, separating
318 them with spaces. We do NOT support this. Instead, use __declspec
319 multiple times. */
320 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
321 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
322 #endif
323 { NULL, 0, 0, false, false, false, NULL }
326 /* Set default optimization options. */
327 static const struct default_options sh_option_optimization_table[] =
329 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
330 { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_mdiv_, "inv:minlat", 1 },
331 { OPT_LEVELS_SIZE, OPT_mdiv_, SH_DIV_STR_FOR_SIZE, 1 },
332 { OPT_LEVELS_0_ONLY, OPT_mdiv_, "", 1 },
333 { OPT_LEVELS_SIZE, OPT_mcbranchdi, NULL, 0 },
334 /* We can't meaningfully test TARGET_SHMEDIA here, because -m
335 options haven't been parsed yet, hence we'd read only the
336 default. sh_target_reg_class will return NO_REGS if this is
337 not SHMEDIA, so it's OK to always set
338 flag_branch_target_load_optimize. */
339 { OPT_LEVELS_2_PLUS, OPT_fbranch_target_load_optimize, NULL, 1 },
340 { OPT_LEVELS_NONE, 0, NULL, 0 }
343 /* Initialize the GCC target structure. */
344 #undef TARGET_ATTRIBUTE_TABLE
345 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
347 /* The next two are used for debug info when compiling with -gdwarf. */
348 #undef TARGET_ASM_UNALIGNED_HI_OP
349 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
350 #undef TARGET_ASM_UNALIGNED_SI_OP
351 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
353 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
354 #undef TARGET_ASM_UNALIGNED_DI_OP
355 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
356 #undef TARGET_ASM_ALIGNED_DI_OP
357 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
359 #undef TARGET_OPTION_OVERRIDE
360 #define TARGET_OPTION_OVERRIDE sh_option_override
361 #undef TARGET_OPTION_OPTIMIZATION_TABLE
362 #define TARGET_OPTION_OPTIMIZATION_TABLE sh_option_optimization_table
363 #undef TARGET_OPTION_INIT_STRUCT
364 #define TARGET_OPTION_INIT_STRUCT sh_option_init_struct
365 #undef TARGET_OPTION_DEFAULT_PARAMS
366 #define TARGET_OPTION_DEFAULT_PARAMS sh_option_default_params
368 #undef TARGET_PRINT_OPERAND
369 #define TARGET_PRINT_OPERAND sh_print_operand
370 #undef TARGET_PRINT_OPERAND_ADDRESS
371 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
372 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
373 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
375 #undef TARGET_ASM_FUNCTION_EPILOGUE
376 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
378 #undef TARGET_ASM_OUTPUT_MI_THUNK
379 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
381 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
382 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
384 #undef TARGET_ASM_FILE_START
385 #define TARGET_ASM_FILE_START sh_file_start
386 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
387 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
389 #undef TARGET_DEFAULT_TARGET_FLAGS
390 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
391 #undef TARGET_HANDLE_OPTION
392 #define TARGET_HANDLE_OPTION sh_handle_option
394 #undef TARGET_REGISTER_MOVE_COST
395 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
397 #undef TARGET_INSERT_ATTRIBUTES
398 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
400 #undef TARGET_SCHED_ADJUST_COST
401 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
403 #undef TARGET_SCHED_ISSUE_RATE
404 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
406 /* The next 5 hooks have been implemented for reenabling sched1. With the
407 help of these macros we are limiting the movement of insns in sched1 to
408 reduce the register pressure. The overall idea is to keep count of SImode
409 and SFmode regs required by already scheduled insns. When these counts
410 cross some threshold values; give priority to insns that free registers.
411 The insn that frees registers is most likely to be the insn with lowest
412 LUID (original insn order); but such an insn might be there in the stalled
413 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
414 upto a max of 8 cycles so that such insns may move from Q -> R.
416 The description of the hooks are as below:
418 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
419 scheduler; it is called inside the sched_init function just after
420 find_insn_reg_weights function call. It is used to calculate the SImode
421 and SFmode weights of insns of basic blocks; much similar to what
422 find_insn_reg_weights does.
423 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
425 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
426 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
427 (Q)->(R).
429 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
430 high; reorder the ready queue so that the insn with lowest LUID will be
431 issued next.
433 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
434 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
436 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
437 can be returned from TARGET_SCHED_REORDER2.
439 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
441 #undef TARGET_SCHED_DFA_NEW_CYCLE
442 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
444 #undef TARGET_SCHED_INIT_GLOBAL
445 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
447 #undef TARGET_SCHED_FINISH_GLOBAL
448 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
450 #undef TARGET_SCHED_VARIABLE_ISSUE
451 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
453 #undef TARGET_SCHED_REORDER
454 #define TARGET_SCHED_REORDER sh_reorder
456 #undef TARGET_SCHED_REORDER2
457 #define TARGET_SCHED_REORDER2 sh_reorder2
459 #undef TARGET_SCHED_INIT
460 #define TARGET_SCHED_INIT sh_md_init
462 #undef TARGET_LEGITIMIZE_ADDRESS
463 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
465 #undef TARGET_CANNOT_MODIFY_JUMPS_P
466 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
467 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
468 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
469 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
470 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
471 sh_optimize_target_register_callee_saved
473 #undef TARGET_MS_BITFIELD_LAYOUT_P
474 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
476 #undef TARGET_INIT_BUILTINS
477 #define TARGET_INIT_BUILTINS sh_init_builtins
478 #undef TARGET_BUILTIN_DECL
479 #define TARGET_BUILTIN_DECL sh_builtin_decl
480 #undef TARGET_EXPAND_BUILTIN
481 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
483 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
484 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
486 #undef TARGET_CANNOT_COPY_INSN_P
487 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
488 #undef TARGET_RTX_COSTS
489 #define TARGET_RTX_COSTS sh_rtx_costs
490 #undef TARGET_ADDRESS_COST
491 #define TARGET_ADDRESS_COST sh_address_cost
492 #undef TARGET_ALLOCATE_INITIAL_VALUE
493 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
495 #undef TARGET_MACHINE_DEPENDENT_REORG
496 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
498 #undef TARGET_DWARF_REGISTER_SPAN
499 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
501 #ifdef HAVE_AS_TLS
502 #undef TARGET_HAVE_TLS
503 #define TARGET_HAVE_TLS true
504 #endif
506 #undef TARGET_PROMOTE_PROTOTYPES
507 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
508 #undef TARGET_PROMOTE_FUNCTION_MODE
509 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
511 #undef TARGET_FUNCTION_VALUE
512 #define TARGET_FUNCTION_VALUE sh_function_value
513 #undef TARGET_FUNCTION_VALUE_REGNO_P
514 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
515 #undef TARGET_LIBCALL_VALUE
516 #define TARGET_LIBCALL_VALUE sh_libcall_value
517 #undef TARGET_STRUCT_VALUE_RTX
518 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
519 #undef TARGET_RETURN_IN_MEMORY
520 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
522 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
523 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
524 #undef TARGET_SETUP_INCOMING_VARARGS
525 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
526 #undef TARGET_STRICT_ARGUMENT_NAMING
527 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
528 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
529 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
530 #undef TARGET_MUST_PASS_IN_STACK
531 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
532 #undef TARGET_PASS_BY_REFERENCE
533 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
534 #undef TARGET_CALLEE_COPIES
535 #define TARGET_CALLEE_COPIES sh_callee_copies
536 #undef TARGET_ARG_PARTIAL_BYTES
537 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
538 #undef TARGET_FUNCTION_ARG
539 #define TARGET_FUNCTION_ARG sh_function_arg
540 #undef TARGET_FUNCTION_ARG_ADVANCE
541 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
543 #undef TARGET_BUILD_BUILTIN_VA_LIST
544 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
545 #undef TARGET_EXPAND_BUILTIN_VA_START
546 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
547 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
548 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
550 #undef TARGET_SCALAR_MODE_SUPPORTED_P
551 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
552 #undef TARGET_VECTOR_MODE_SUPPORTED_P
553 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
555 #undef TARGET_CHECK_PCH_TARGET_FLAGS
556 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
558 #undef TARGET_DWARF_CALLING_CONVENTION
559 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
561 #undef TARGET_FRAME_POINTER_REQUIRED
562 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
564 /* Return regmode weight for insn. */
565 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
567 /* Return current register pressure for regmode. */
568 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
570 #undef TARGET_ENCODE_SECTION_INFO
571 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
573 #ifdef SYMBIAN
575 #undef TARGET_ENCODE_SECTION_INFO
576 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
577 #undef TARGET_STRIP_NAME_ENCODING
578 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
579 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
580 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
582 #endif /* SYMBIAN */
584 #undef TARGET_SECONDARY_RELOAD
585 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
587 #undef TARGET_LEGITIMATE_ADDRESS_P
588 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
590 #undef TARGET_TRAMPOLINE_INIT
591 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
592 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
593 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
595 /* Machine-specific symbol_ref flags. */
596 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
598 struct gcc_target targetm = TARGET_INITIALIZER;
600 /* Implement TARGET_HANDLE_OPTION. */
602 static bool
603 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
604 int value ATTRIBUTE_UNUSED)
606 switch (code)
608 case OPT_m1:
609 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
610 return true;
612 case OPT_m2:
613 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
614 return true;
616 case OPT_m2a:
617 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
618 return true;
620 case OPT_m2a_nofpu:
621 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
622 return true;
624 case OPT_m2a_single:
625 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
626 return true;
628 case OPT_m2a_single_only:
629 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
630 return true;
632 case OPT_m2e:
633 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
634 return true;
636 case OPT_m3:
637 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
638 return true;
640 case OPT_m3e:
641 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
642 return true;
644 case OPT_m4:
645 case OPT_m4_100:
646 case OPT_m4_200:
647 case OPT_m4_300:
648 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
649 return true;
651 case OPT_m4_nofpu:
652 case OPT_m4_100_nofpu:
653 case OPT_m4_200_nofpu:
654 case OPT_m4_300_nofpu:
655 case OPT_m4_340:
656 case OPT_m4_400:
657 case OPT_m4_500:
658 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
659 return true;
661 case OPT_m4_single:
662 case OPT_m4_100_single:
663 case OPT_m4_200_single:
664 case OPT_m4_300_single:
665 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
666 return true;
668 case OPT_m4_single_only:
669 case OPT_m4_100_single_only:
670 case OPT_m4_200_single_only:
671 case OPT_m4_300_single_only:
672 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
673 return true;
675 case OPT_m4a:
676 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
677 return true;
679 case OPT_m4a_nofpu:
680 case OPT_m4al:
681 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
682 return true;
684 case OPT_m4a_single:
685 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
686 return true;
688 case OPT_m4a_single_only:
689 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
690 return true;
692 case OPT_m5_32media:
693 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
694 return true;
696 case OPT_m5_32media_nofpu:
697 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
698 return true;
700 case OPT_m5_64media:
701 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
702 return true;
704 case OPT_m5_64media_nofpu:
705 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
706 return true;
708 case OPT_m5_compact:
709 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
710 return true;
712 case OPT_m5_compact_nofpu:
713 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
714 return true;
716 default:
717 return true;
721 /* Implement TARGET_OPTION_INIT_STRUCT. */
722 static void
723 sh_option_init_struct (struct gcc_options *opts)
725 /* We can't meaningfully test TARGET_SH2E / TARGET_IEEE
726 here, so leave it to TARGET_OPTION_OVERRIDE to set
727 flag_finite_math_only. We set it to 2 here so we know if the user
728 explicitly requested this to be on or off. */
729 opts->x_flag_finite_math_only = 2;
732 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
733 static void
734 sh_option_default_params (void)
736 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 2);
739 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
740 various options, and do some machine dependent initialization. */
741 static void
742 sh_option_override (void)
744 int regno;
746 SUBTARGET_OVERRIDE_OPTIONS;
747 if (optimize > 1 && !optimize_size)
748 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
749 if (flag_finite_math_only == 2)
750 flag_finite_math_only
751 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
752 if (TARGET_SH2E && !flag_finite_math_only)
753 target_flags |= MASK_IEEE;
754 sh_cpu = PROCESSOR_SH1;
755 assembler_dialect = 0;
756 if (TARGET_SH2)
757 sh_cpu = PROCESSOR_SH2;
758 if (TARGET_SH2E)
759 sh_cpu = PROCESSOR_SH2E;
760 if (TARGET_SH2A)
761 sh_cpu = PROCESSOR_SH2A;
762 if (TARGET_SH3)
763 sh_cpu = PROCESSOR_SH3;
764 if (TARGET_SH3E)
765 sh_cpu = PROCESSOR_SH3E;
766 if (TARGET_SH4)
768 assembler_dialect = 1;
769 sh_cpu = PROCESSOR_SH4;
771 if (TARGET_SH4A_ARCH)
773 assembler_dialect = 1;
774 sh_cpu = PROCESSOR_SH4A;
776 if (TARGET_SH5)
778 sh_cpu = PROCESSOR_SH5;
779 target_flags |= MASK_ALIGN_DOUBLE;
780 if (TARGET_SHMEDIA_FPU)
781 target_flags |= MASK_FMOVD;
782 if (TARGET_SHMEDIA)
784 /* There are no delay slots on SHmedia. */
785 flag_delayed_branch = 0;
786 /* Relaxation isn't yet supported for SHmedia */
787 target_flags &= ~MASK_RELAX;
788 /* After reload, if conversion does little good but can cause
789 ICEs:
790 - find_if_block doesn't do anything for SH because we don't
791 have conditional execution patterns. (We use conditional
792 move patterns, which are handled differently, and only
793 before reload).
794 - find_cond_trap doesn't do anything for the SH because we
795 don't have conditional traps.
796 - find_if_case_1 uses redirect_edge_and_branch_force in
797 the only path that does an optimization, and this causes
798 an ICE when branch targets are in registers.
799 - find_if_case_2 doesn't do anything for the SHmedia after
800 reload except when it can redirect a tablejump - and
801 that's rather rare. */
802 flag_if_conversion2 = 0;
803 if (! strcmp (sh_div_str, "call"))
804 sh_div_strategy = SH_DIV_CALL;
805 else if (! strcmp (sh_div_str, "call2"))
806 sh_div_strategy = SH_DIV_CALL2;
807 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
808 sh_div_strategy = SH_DIV_FP;
809 else if (! strcmp (sh_div_str, "inv"))
810 sh_div_strategy = SH_DIV_INV;
811 else if (! strcmp (sh_div_str, "inv:minlat"))
812 sh_div_strategy = SH_DIV_INV_MINLAT;
813 else if (! strcmp (sh_div_str, "inv20u"))
814 sh_div_strategy = SH_DIV_INV20U;
815 else if (! strcmp (sh_div_str, "inv20l"))
816 sh_div_strategy = SH_DIV_INV20L;
817 else if (! strcmp (sh_div_str, "inv:call2"))
818 sh_div_strategy = SH_DIV_INV_CALL2;
819 else if (! strcmp (sh_div_str, "inv:call"))
820 sh_div_strategy = SH_DIV_INV_CALL;
821 else if (! strcmp (sh_div_str, "inv:fp"))
823 if (TARGET_FPU_ANY)
824 sh_div_strategy = SH_DIV_INV_FP;
825 else
826 sh_div_strategy = SH_DIV_INV;
828 TARGET_CBRANCHDI4 = 0;
829 /* Assembler CFI isn't yet fully supported for SHmedia. */
830 flag_dwarf2_cfi_asm = 0;
833 else
835 /* Only the sh64-elf assembler fully supports .quad properly. */
836 targetm.asm_out.aligned_op.di = NULL;
837 targetm.asm_out.unaligned_op.di = NULL;
839 if (TARGET_SH1)
841 if (! strcmp (sh_div_str, "call-div1"))
842 sh_div_strategy = SH_DIV_CALL_DIV1;
843 else if (! strcmp (sh_div_str, "call-fp")
844 && (TARGET_FPU_DOUBLE
845 || (TARGET_HARD_SH4 && TARGET_SH2E)
846 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
847 sh_div_strategy = SH_DIV_CALL_FP;
848 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
849 sh_div_strategy = SH_DIV_CALL_TABLE;
850 else
851 /* Pick one that makes most sense for the target in general.
852 It is not much good to use different functions depending
853 on -Os, since then we'll end up with two different functions
854 when some of the code is compiled for size, and some for
855 speed. */
857 /* SH4 tends to emphasize speed. */
858 if (TARGET_HARD_SH4)
859 sh_div_strategy = SH_DIV_CALL_TABLE;
860 /* These have their own way of doing things. */
861 else if (TARGET_SH2A)
862 sh_div_strategy = SH_DIV_INTRINSIC;
863 /* ??? Should we use the integer SHmedia function instead? */
864 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
865 sh_div_strategy = SH_DIV_CALL_FP;
866 /* SH1 .. SH3 cores often go into small-footprint systems, so
867 default to the smallest implementation available. */
868 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
869 sh_div_strategy = SH_DIV_CALL_TABLE;
870 else
871 sh_div_strategy = SH_DIV_CALL_DIV1;
873 if (!TARGET_SH1)
874 TARGET_PRETEND_CMOVE = 0;
875 if (sh_divsi3_libfunc[0])
876 ; /* User supplied - leave it alone. */
877 else if (TARGET_DIVIDE_CALL_FP)
878 sh_divsi3_libfunc = "__sdivsi3_i4";
879 else if (TARGET_DIVIDE_CALL_TABLE)
880 sh_divsi3_libfunc = "__sdivsi3_i4i";
881 else if (TARGET_SH5)
882 sh_divsi3_libfunc = "__sdivsi3_1";
883 else
884 sh_divsi3_libfunc = "__sdivsi3";
885 if (sh_branch_cost == -1)
886 sh_branch_cost
887 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
889 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
890 if (! VALID_REGISTER_P (regno))
891 sh_register_names[regno][0] = '\0';
893 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
894 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
895 sh_additional_register_names[regno][0] = '\0';
897 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
899 if ((flag_pic && ! TARGET_PREFERGOT)
900 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
901 flag_no_function_cse = 1;
903 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
905 /* Never run scheduling before reload, since that can
906 break global alloc, and generates slower code anyway due
907 to the pressure on R0. */
908 /* Enable sched1 for SH4 if the user explicitly requests.
909 When sched1 is enabled, the ready queue will be reordered by
910 the target hooks if pressure is high. We can not do this for
911 PIC, SH3 and lower as they give spill failures for R0. */
912 if (!TARGET_HARD_SH4 || flag_pic)
913 flag_schedule_insns = 0;
914 /* ??? Current exception handling places basic block boundaries
915 after call_insns. It causes the high pressure on R0 and gives
916 spill failures for R0 in reload. See PR 22553 and the thread
917 on gcc-patches
918 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
919 else if (flag_exceptions)
921 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
922 warning (0, "ignoring -fschedule-insns because of exception handling bug");
923 flag_schedule_insns = 0;
925 else if (flag_schedule_insns
926 && !global_options_set.x_flag_schedule_insns)
927 flag_schedule_insns = 0;
930 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
931 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
933 /* Unwind info is not correct around the CFG unless either a frame
934 pointer is present or M_A_O_A is set. Fixing this requires rewriting
935 unwind info generation to be aware of the CFG and propagating states
936 around edges. */
937 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
938 || flag_exceptions || flag_non_call_exceptions)
939 && flag_omit_frame_pointer
940 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
942 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
943 warning (0, "unwind tables currently require either a frame pointer "
944 "or -maccumulate-outgoing-args for correctness");
945 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
948 /* Unwinding with -freorder-blocks-and-partition does not work on this
949 architecture, because it requires far jumps to label crossing between
950 hot/cold sections which are rejected on this architecture. */
951 if (flag_reorder_blocks_and_partition)
953 if (flag_exceptions)
955 inform (input_location,
956 "-freorder-blocks-and-partition does not work with "
957 "exceptions on this architecture");
958 flag_reorder_blocks_and_partition = 0;
959 flag_reorder_blocks = 1;
961 else if (flag_unwind_tables)
963 inform (input_location,
964 "-freorder-blocks-and-partition does not support unwind "
965 "info on this architecture");
966 flag_reorder_blocks_and_partition = 0;
967 flag_reorder_blocks = 1;
971 if (align_loops == 0)
972 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
973 if (align_jumps == 0)
974 align_jumps = 1 << CACHE_LOG;
975 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
976 align_jumps = TARGET_SHMEDIA ? 4 : 2;
978 /* Allocation boundary (in *bytes*) for the code of a function.
979 SH1: 32 bit alignment is faster, because instructions are always
980 fetched as a pair from a longword boundary.
981 SH2 .. SH5 : align to cache line start. */
982 if (align_functions == 0)
983 align_functions
984 = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
985 /* The linker relaxation code breaks when a function contains
986 alignments that are larger than that at the start of a
987 compilation unit. */
988 if (TARGET_RELAX)
990 int min_align
991 = align_loops > align_jumps ? align_loops : align_jumps;
993 /* Also take possible .long constants / mova tables int account. */
994 if (min_align < 4)
995 min_align = 4;
996 if (align_functions < min_align)
997 align_functions = min_align;
1000 if (sh_fixed_range_str)
1001 sh_fix_range (sh_fixed_range_str);
1003 /* This target defaults to strict volatile bitfields. */
1004 if (flag_strict_volatile_bitfields < 0)
1005 flag_strict_volatile_bitfields = 1;
1008 /* Print the operand address in x to the stream. */
1010 static void
1011 sh_print_operand_address (FILE *stream, rtx x)
1013 switch (GET_CODE (x))
1015 case REG:
1016 case SUBREG:
1017 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1018 break;
1020 case PLUS:
1022 rtx base = XEXP (x, 0);
1023 rtx index = XEXP (x, 1);
1025 switch (GET_CODE (index))
1027 case CONST_INT:
1028 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1029 reg_names[true_regnum (base)]);
1030 break;
1032 case REG:
1033 case SUBREG:
1035 int base_num = true_regnum (base);
1036 int index_num = true_regnum (index);
1038 fprintf (stream, "@(r0,%s)",
1039 reg_names[MAX (base_num, index_num)]);
1040 break;
1043 default:
1044 gcc_unreachable ();
1047 break;
1049 case PRE_DEC:
1050 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1051 break;
1053 case POST_INC:
1054 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1055 break;
1057 default:
1058 x = mark_constant_pool_use (x);
1059 output_addr_const (stream, x);
1060 break;
1064 /* Print operand x (an rtx) in assembler syntax to file stream
1065 according to modifier code.
1067 '.' print a .s if insn needs delay slot
1068 ',' print LOCAL_LABEL_PREFIX
1069 '@' print trap, rte or rts depending upon pragma interruptness
1070 '#' output a nop if there is nothing to put in the delay slot
1071 ''' print likelihood suffix (/u for unlikely).
1072 '>' print branch target if -fverbose-asm
1073 'O' print a constant without the #
1074 'R' print the LSW of a dp value - changes if in little endian
1075 'S' print the MSW of a dp value - changes if in little endian
1076 'T' print the next word of a dp value - same as 'R' in big endian mode.
1077 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1078 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1079 'N' print 'r63' if the operand is (const_int 0).
1080 'd' print a V2SF reg as dN instead of fpN.
1081 'm' print a pair `base,offset' or `base,index', for LD and ST.
1082 'U' Likewise for {LD,ST}{HI,LO}.
1083 'V' print the position of a single bit set.
1084 'W' print the position of a single bit cleared.
1085 't' print a memory address which is a register.
1086 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1087 'o' output an operator. */
1089 static void
1090 sh_print_operand (FILE *stream, rtx x, int code)
1092 int regno;
1093 enum machine_mode mode;
1095 switch (code)
1097 tree trapa_attr;
1099 case '.':
1100 if (final_sequence
1101 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1102 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1103 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1104 break;
1105 case ',':
1106 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1107 break;
1108 case '@':
1109 trapa_attr = lookup_attribute ("trap_exit",
1110 DECL_ATTRIBUTES (current_function_decl));
1111 if (trapa_attr)
1112 fprintf (stream, "trapa #%ld",
1113 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1114 else if (sh_cfun_interrupt_handler_p ())
1116 if (sh_cfun_resbank_handler_p ())
1117 fprintf (stream, "resbank\n");
1118 fprintf (stream, "rte");
1120 else
1121 fprintf (stream, "rts");
1122 break;
1123 case '#':
1124 /* Output a nop if there's nothing in the delay slot. */
1125 if (dbr_sequence_length () == 0)
1126 fprintf (stream, "\n\tnop");
1127 break;
1128 case '\'':
1130 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1132 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1133 fputs ("/u", stream);
1134 break;
1136 case '>':
1137 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1139 fputs ("\t! target: ", stream);
1140 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1142 break;
1143 case 'O':
1144 x = mark_constant_pool_use (x);
1145 output_addr_const (stream, x);
1146 break;
1147 /* N.B.: %R / %S / %T adjust memory addresses by four.
1148 For SHMEDIA, that means they can be used to access the first and
1149 second 32 bit part of a 64 bit (or larger) value that
1150 might be held in floating point registers or memory.
1151 While they can be used to access 64 bit parts of a larger value
1152 held in general purpose registers, that won't work with memory -
1153 neither for fp registers, since the frxx names are used. */
1154 case 'R':
1155 if (REG_P (x) || GET_CODE (x) == SUBREG)
1157 regno = true_regnum (x);
1158 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1159 fputs (reg_names[regno], (stream));
1161 else if (MEM_P (x))
1163 x = adjust_address (x, SImode, 4 * LSW);
1164 sh_print_operand_address (stream, XEXP (x, 0));
1166 else
1168 rtx sub = NULL_RTX;
1170 mode = GET_MODE (x);
1171 if (mode == VOIDmode)
1172 mode = DImode;
1173 if (GET_MODE_SIZE (mode) >= 8)
1174 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1175 if (sub)
1176 sh_print_operand (stream, sub, 0);
1177 else
1178 output_operand_lossage ("invalid operand to %%R");
1180 break;
1181 case 'S':
1182 if (REG_P (x) || GET_CODE (x) == SUBREG)
1184 regno = true_regnum (x);
1185 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1186 fputs (reg_names[regno], (stream));
1188 else if (MEM_P (x))
1190 x = adjust_address (x, SImode, 4 * MSW);
1191 sh_print_operand_address (stream, XEXP (x, 0));
1193 else
1195 rtx sub = NULL_RTX;
1197 mode = GET_MODE (x);
1198 if (mode == VOIDmode)
1199 mode = DImode;
1200 if (GET_MODE_SIZE (mode) >= 8)
1201 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1202 if (sub)
1203 sh_print_operand (stream, sub, 0);
1204 else
1205 output_operand_lossage ("invalid operand to %%S");
1207 break;
1208 case 'T':
1209 /* Next word of a double. */
1210 switch (GET_CODE (x))
1212 case REG:
1213 fputs (reg_names[REGNO (x) + 1], (stream));
1214 break;
1215 case MEM:
1216 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1217 && GET_CODE (XEXP (x, 0)) != POST_INC)
1218 x = adjust_address (x, SImode, 4);
1219 sh_print_operand_address (stream, XEXP (x, 0));
1220 break;
1221 default:
1222 break;
1224 break;
1226 case 't':
1227 gcc_assert (MEM_P (x));
1228 x = XEXP (x, 0);
1229 switch (GET_CODE (x))
1231 case REG:
1232 case SUBREG:
1233 sh_print_operand (stream, x, 0);
1234 break;
1235 default:
1236 break;
1238 break;
1240 case 'o':
1241 switch (GET_CODE (x))
1243 case PLUS: fputs ("add", stream); break;
1244 case MINUS: fputs ("sub", stream); break;
1245 case MULT: fputs ("mul", stream); break;
1246 case DIV: fputs ("div", stream); break;
1247 case EQ: fputs ("eq", stream); break;
1248 case NE: fputs ("ne", stream); break;
1249 case GT: case LT: fputs ("gt", stream); break;
1250 case GE: case LE: fputs ("ge", stream); break;
1251 case GTU: case LTU: fputs ("gtu", stream); break;
1252 case GEU: case LEU: fputs ("geu", stream); break;
1253 default:
1254 break;
1256 break;
1257 case 'M':
1258 if (TARGET_SHMEDIA)
1260 if (MEM_P (x)
1261 && GET_CODE (XEXP (x, 0)) == PLUS
1262 && (REG_P (XEXP (XEXP (x, 0), 1))
1263 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1264 fputc ('x', stream);
1266 else
1268 if (MEM_P (x))
1270 switch (GET_MODE (x))
1272 case QImode: fputs (".b", stream); break;
1273 case HImode: fputs (".w", stream); break;
1274 case SImode: fputs (".l", stream); break;
1275 case SFmode: fputs (".s", stream); break;
1276 case DFmode: fputs (".d", stream); break;
1277 default: gcc_unreachable ();
1281 break;
1283 case 'm':
1284 gcc_assert (MEM_P (x));
1285 x = XEXP (x, 0);
1286 /* Fall through. */
1287 case 'U':
1288 switch (GET_CODE (x))
1290 case REG:
1291 case SUBREG:
1292 sh_print_operand (stream, x, 0);
1293 fputs (", 0", stream);
1294 break;
1296 case PLUS:
1297 sh_print_operand (stream, XEXP (x, 0), 0);
1298 fputs (", ", stream);
1299 sh_print_operand (stream, XEXP (x, 1), 0);
1300 break;
1302 default:
1303 gcc_unreachable ();
1305 break;
1307 case 'V':
1309 int num = exact_log2 (INTVAL (x));
1310 gcc_assert (num >= 0);
1311 fprintf (stream, "#%d", num);
1313 break;
1315 case 'W':
1317 int num = exact_log2 (~INTVAL (x));
1318 gcc_assert (num >= 0);
1319 fprintf (stream, "#%d", num);
1321 break;
1323 case 'd':
1324 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1326 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1327 break;
1329 case 'N':
1330 if (x == CONST0_RTX (GET_MODE (x)))
1332 fprintf ((stream), "r63");
1333 break;
1335 goto default_output;
1336 case 'u':
1337 if (CONST_INT_P (x))
1339 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1340 break;
1342 /* Fall through. */
1344 default_output:
1345 default:
1346 regno = 0;
1347 mode = GET_MODE (x);
1349 switch (GET_CODE (x))
1351 case TRUNCATE:
1353 rtx inner = XEXP (x, 0);
1354 int offset = 0;
1355 enum machine_mode inner_mode;
1357 /* We might see SUBREGs with vector mode registers inside. */
1358 if (GET_CODE (inner) == SUBREG
1359 && (GET_MODE_SIZE (GET_MODE (inner))
1360 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1361 && subreg_lowpart_p (inner))
1362 inner = SUBREG_REG (inner);
1363 if (CONST_INT_P (inner))
1365 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1366 goto default_output;
1368 inner_mode = GET_MODE (inner);
1369 if (GET_CODE (inner) == SUBREG
1370 && (GET_MODE_SIZE (GET_MODE (inner))
1371 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1372 && REG_P (SUBREG_REG (inner)))
1374 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1375 GET_MODE (SUBREG_REG (inner)),
1376 SUBREG_BYTE (inner),
1377 GET_MODE (inner));
1378 inner = SUBREG_REG (inner);
1380 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1381 abort ();
1382 /* Floating point register pairs are always big endian;
1383 general purpose registers are 64 bit wide. */
1384 regno = REGNO (inner);
1385 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1386 - HARD_REGNO_NREGS (regno, mode))
1387 + offset;
1388 x = inner;
1389 goto reg;
1391 case SIGN_EXTEND:
1392 x = XEXP (x, 0);
1393 goto reg;
1394 /* FIXME: We need this on SHmedia32 because reload generates
1395 some sign-extended HI or QI loads into DImode registers
1396 but, because Pmode is SImode, the address ends up with a
1397 subreg:SI of the DImode register. Maybe reload should be
1398 fixed so as to apply alter_subreg to such loads? */
1399 case IF_THEN_ELSE:
1400 gcc_assert (trapping_target_operand (x, VOIDmode));
1401 x = XEXP (XEXP (x, 2), 0);
1402 goto default_output;
1403 case SUBREG:
1404 gcc_assert (SUBREG_BYTE (x) == 0
1405 && REG_P (SUBREG_REG (x)));
1407 x = SUBREG_REG (x);
1408 /* Fall through. */
1410 reg:
1411 case REG:
1412 regno += REGNO (x);
1413 if (FP_REGISTER_P (regno)
1414 && mode == V16SFmode)
1415 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1416 else if (FP_REGISTER_P (REGNO (x))
1417 && mode == V4SFmode)
1418 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1419 else if (REG_P (x)
1420 && mode == V2SFmode)
1421 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1422 else if (FP_REGISTER_P (REGNO (x))
1423 && GET_MODE_SIZE (mode) > 4)
1424 fprintf ((stream), "d%s", reg_names[regno] + 1);
1425 else
1426 fputs (reg_names[regno], (stream));
1427 break;
1429 case MEM:
1430 output_address (XEXP (x, 0));
1431 break;
1433 default:
1434 if (TARGET_SH1)
1435 fputc ('#', stream);
1436 output_addr_const (stream, x);
1437 break;
1439 break;
1443 static bool
1444 sh_print_operand_punct_valid_p (unsigned char code)
1446 return (code == '.' || code == '#' || code == '@' || code == ','
1447 || code == '$' || code == '\'' || code == '>');
1451 /* Encode symbol attributes of a SYMBOL_REF into its
1452 SYMBOL_REF_FLAGS. */
1453 static void
1454 sh_encode_section_info (tree decl, rtx rtl, int first)
1456 default_encode_section_info (decl, rtl, first);
1458 if (TREE_CODE (decl) == FUNCTION_DECL
1459 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1460 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1463 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1464 static void
1465 force_into (rtx value, rtx target)
1467 value = force_operand (value, target);
1468 if (! rtx_equal_p (value, target))
1469 emit_insn (gen_move_insn (target, value));
1472 /* Emit code to perform a block move. Choose the best method.
1474 OPERANDS[0] is the destination.
1475 OPERANDS[1] is the source.
1476 OPERANDS[2] is the size.
1477 OPERANDS[3] is the alignment safe to use. */
1480 expand_block_move (rtx *operands)
1482 int align = INTVAL (operands[3]);
1483 int constp = (CONST_INT_P (operands[2]));
1484 int bytes = (constp ? INTVAL (operands[2]) : 0);
1486 if (! constp)
1487 return 0;
1489 /* If we could use mov.l to move words and dest is word-aligned, we
1490 can use movua.l for loads and still generate a relatively short
1491 and efficient sequence. */
1492 if (TARGET_SH4A_ARCH && align < 4
1493 && MEM_ALIGN (operands[0]) >= 32
1494 && can_move_by_pieces (bytes, 32))
1496 rtx dest = copy_rtx (operands[0]);
1497 rtx src = copy_rtx (operands[1]);
1498 /* We could use different pseudos for each copied word, but
1499 since movua can only load into r0, it's kind of
1500 pointless. */
1501 rtx temp = gen_reg_rtx (SImode);
1502 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1503 int copied = 0;
1505 while (copied + 4 <= bytes)
1507 rtx to = adjust_address (dest, SImode, copied);
1508 rtx from = adjust_automodify_address (src, BLKmode,
1509 src_addr, copied);
1511 set_mem_size (from, GEN_INT (4));
1512 emit_insn (gen_movua (temp, from));
1513 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1514 emit_move_insn (to, temp);
1515 copied += 4;
1518 if (copied < bytes)
1519 move_by_pieces (adjust_address (dest, BLKmode, copied),
1520 adjust_automodify_address (src, BLKmode,
1521 src_addr, copied),
1522 bytes - copied, align, 0);
1524 return 1;
1527 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1528 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1529 if (align < 4 || (bytes % 4 != 0))
1530 return 0;
1532 if (TARGET_HARD_SH4)
1534 if (bytes < 12)
1535 return 0;
1536 else if (bytes == 12)
1538 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1539 rtx r4 = gen_rtx_REG (SImode, 4);
1540 rtx r5 = gen_rtx_REG (SImode, 5);
1542 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1543 force_into (XEXP (operands[0], 0), r4);
1544 force_into (XEXP (operands[1], 0), r5);
1545 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1546 return 1;
1548 else if (! optimize_size)
1550 const char *entry_name;
1551 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1552 int dwords;
1553 rtx r4 = gen_rtx_REG (SImode, 4);
1554 rtx r5 = gen_rtx_REG (SImode, 5);
1555 rtx r6 = gen_rtx_REG (SImode, 6);
1557 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1558 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1559 force_into (XEXP (operands[0], 0), r4);
1560 force_into (XEXP (operands[1], 0), r5);
1562 dwords = bytes >> 3;
1563 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1564 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1565 return 1;
1567 else
1568 return 0;
1570 if (bytes < 64)
1572 char entry[30];
1573 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1574 rtx r4 = gen_rtx_REG (SImode, 4);
1575 rtx r5 = gen_rtx_REG (SImode, 5);
1577 sprintf (entry, "__movmemSI%d", bytes);
1578 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1579 force_into (XEXP (operands[0], 0), r4);
1580 force_into (XEXP (operands[1], 0), r5);
1581 emit_insn (gen_block_move_real (func_addr_rtx));
1582 return 1;
1585 /* This is the same number of bytes as a memcpy call, but to a different
1586 less common function name, so this will occasionally use more space. */
1587 if (! optimize_size)
1589 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1590 int final_switch, while_loop;
1591 rtx r4 = gen_rtx_REG (SImode, 4);
1592 rtx r5 = gen_rtx_REG (SImode, 5);
1593 rtx r6 = gen_rtx_REG (SImode, 6);
1595 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1596 force_into (XEXP (operands[0], 0), r4);
1597 force_into (XEXP (operands[1], 0), r5);
1599 /* r6 controls the size of the move. 16 is decremented from it
1600 for each 64 bytes moved. Then the negative bit left over is used
1601 as an index into a list of move instructions. e.g., a 72 byte move
1602 would be set up with size(r6) = 14, for one iteration through the
1603 big while loop, and a switch of -2 for the last part. */
1605 final_switch = 16 - ((bytes / 4) % 16);
1606 while_loop = ((bytes / 4) / 16 - 1) * 16;
1607 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1608 emit_insn (gen_block_lump_real (func_addr_rtx));
1609 return 1;
1612 return 0;
1615 /* Prepare operands for a move define_expand; specifically, one of the
1616 operands must be in a register. */
1619 prepare_move_operands (rtx operands[], enum machine_mode mode)
1621 if ((mode == SImode || mode == DImode)
1622 && flag_pic
1623 && ! ((mode == Pmode || mode == ptr_mode)
1624 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1626 rtx temp;
1627 if (SYMBOLIC_CONST_P (operands[1]))
1629 if (MEM_P (operands[0]))
1630 operands[1] = force_reg (Pmode, operands[1]);
1631 else if (TARGET_SHMEDIA
1632 && GET_CODE (operands[1]) == LABEL_REF
1633 && target_reg_operand (operands[0], mode))
1634 /* It's ok. */;
1635 else
1637 temp = (!can_create_pseudo_p ()
1638 ? operands[0]
1639 : gen_reg_rtx (Pmode));
1640 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1643 else if (GET_CODE (operands[1]) == CONST
1644 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1645 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1647 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1648 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1649 mode, temp);
1650 operands[1] = expand_binop (mode, add_optab, temp,
1651 XEXP (XEXP (operands[1], 0), 1),
1652 (!can_create_pseudo_p ()
1653 ? temp
1654 : gen_reg_rtx (Pmode)),
1655 0, OPTAB_LIB_WIDEN);
1659 if (! reload_in_progress && ! reload_completed)
1661 /* Copy the source to a register if both operands aren't registers. */
1662 if (! register_operand (operands[0], mode)
1663 && ! sh_register_operand (operands[1], mode))
1664 operands[1] = copy_to_mode_reg (mode, operands[1]);
1666 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1668 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1669 except that we can't use that function because it is static. */
1670 rtx new_rtx = change_address (operands[0], mode, 0);
1671 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1672 operands[0] = new_rtx;
1675 /* This case can happen while generating code to move the result
1676 of a library call to the target. Reject `st r0,@(rX,rY)' because
1677 reload will fail to find a spill register for rX, since r0 is already
1678 being used for the source. */
1679 else if (TARGET_SH1
1680 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1681 && MEM_P (operands[0])
1682 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1683 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1684 operands[1] = copy_to_mode_reg (mode, operands[1]);
1687 if (mode == Pmode || mode == ptr_mode)
1689 rtx op0, op1, opc;
1690 enum tls_model tls_kind;
1692 op0 = operands[0];
1693 op1 = operands[1];
1694 if (GET_CODE (op1) == CONST
1695 && GET_CODE (XEXP (op1, 0)) == PLUS
1696 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1697 != TLS_MODEL_NONE))
1699 opc = XEXP (XEXP (op1, 0), 1);
1700 op1 = XEXP (XEXP (op1, 0), 0);
1702 else
1703 opc = NULL_RTX;
1705 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1707 rtx tga_op1, tga_ret, tmp, tmp2;
1709 switch (tls_kind)
1711 case TLS_MODEL_GLOBAL_DYNAMIC:
1712 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1713 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1714 op1 = tga_ret;
1715 break;
1717 case TLS_MODEL_LOCAL_DYNAMIC:
1718 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1719 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1721 tmp = gen_reg_rtx (Pmode);
1722 emit_move_insn (tmp, tga_ret);
1724 if (register_operand (op0, Pmode))
1725 tmp2 = op0;
1726 else
1727 tmp2 = gen_reg_rtx (Pmode);
1729 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1730 op1 = tmp2;
1731 break;
1733 case TLS_MODEL_INITIAL_EXEC:
1734 if (! flag_pic)
1736 /* Don't schedule insns for getting GOT address when
1737 the first scheduling is enabled, to avoid spill
1738 failures for R0. */
1739 if (flag_schedule_insns)
1740 emit_insn (gen_blockage ());
1741 emit_insn (gen_GOTaddr2picreg ());
1742 emit_use (gen_rtx_REG (SImode, PIC_REG));
1743 if (flag_schedule_insns)
1744 emit_insn (gen_blockage ());
1746 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1747 tmp = gen_sym2GOTTPOFF (op1);
1748 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1749 op1 = tga_op1;
1750 break;
1752 case TLS_MODEL_LOCAL_EXEC:
1753 tmp2 = gen_reg_rtx (Pmode);
1754 emit_insn (gen_load_gbr (tmp2));
1755 tmp = gen_reg_rtx (Pmode);
1756 emit_insn (gen_symTPOFF2reg (tmp, op1));
1758 if (register_operand (op0, Pmode))
1759 op1 = op0;
1760 else
1761 op1 = gen_reg_rtx (Pmode);
1763 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1764 break;
1766 default:
1767 gcc_unreachable ();
1769 if (opc)
1770 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1771 operands[1] = op1;
1775 return 0;
1778 enum rtx_code
1779 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1780 enum rtx_code comparison)
1782 rtx op1;
1783 rtx scratch = NULL_RTX;
1785 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1786 comparison = GET_CODE (operands[0]);
1787 else
1788 scratch = operands[4];
1789 if (CONST_INT_P (operands[1])
1790 && !CONST_INT_P (operands[2]))
1792 rtx tmp = operands[1];
1794 operands[1] = operands[2];
1795 operands[2] = tmp;
1796 comparison = swap_condition (comparison);
1798 if (CONST_INT_P (operands[2]))
1800 HOST_WIDE_INT val = INTVAL (operands[2]);
1801 if ((val == -1 || val == -0x81)
1802 && (comparison == GT || comparison == LE))
1804 comparison = (comparison == GT) ? GE : LT;
1805 operands[2] = gen_int_mode (val + 1, mode);
1807 else if ((val == 1 || val == 0x80)
1808 && (comparison == GE || comparison == LT))
1810 comparison = (comparison == GE) ? GT : LE;
1811 operands[2] = gen_int_mode (val - 1, mode);
1813 else if (val == 1 && (comparison == GEU || comparison == LTU))
1815 comparison = (comparison == GEU) ? NE : EQ;
1816 operands[2] = CONST0_RTX (mode);
1818 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1820 comparison = (comparison == GEU) ? GTU : LEU;
1821 operands[2] = gen_int_mode (val - 1, mode);
1823 else if (val == 0 && (comparison == GTU || comparison == LEU))
1824 comparison = (comparison == GTU) ? NE : EQ;
1825 else if (mode == SImode
1826 && ((val == 0x7fffffff
1827 && (comparison == GTU || comparison == LEU))
1828 || ((unsigned HOST_WIDE_INT) val
1829 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1830 && (comparison == GEU || comparison == LTU))))
1832 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1833 operands[2] = CONST0_RTX (mode);
1836 op1 = operands[1];
1837 if (can_create_pseudo_p ())
1838 operands[1] = force_reg (mode, op1);
1839 /* When we are handling DImode comparisons, we want to keep constants so
1840 that we can optimize the component comparisons; however, memory loads
1841 are better issued as a whole so that they can be scheduled well.
1842 SImode equality comparisons allow I08 constants, but only when they
1843 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1844 into a register, that register might as well be r0, and we allow the
1845 constant. If it is already in a register, this is likely to be
1846 allocated to a different hard register, thus we load the constant into
1847 a register unless it is zero. */
1848 if (!REG_P (operands[2])
1849 && (!CONST_INT_P (operands[2])
1850 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1851 && ((comparison != EQ && comparison != NE)
1852 || (REG_P (op1) && REGNO (op1) != R0_REG)
1853 || !satisfies_constraint_I08 (operands[2])))))
1855 if (scratch && GET_MODE (scratch) == mode)
1857 emit_move_insn (scratch, operands[2]);
1858 operands[2] = scratch;
1860 else if (can_create_pseudo_p ())
1861 operands[2] = force_reg (mode, operands[2]);
1863 return comparison;
1866 void
1867 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1869 rtx (*branch_expander) (rtx) = gen_branch_true;
1870 rtx jump;
1872 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1873 switch (comparison)
1875 case NE: case LT: case LE: case LTU: case LEU:
1876 comparison = reverse_condition (comparison);
1877 branch_expander = gen_branch_false;
1878 default: ;
1880 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1881 gen_rtx_fmt_ee (comparison, SImode,
1882 operands[1], operands[2])));
1883 jump = emit_jump_insn (branch_expander (operands[3]));
1884 if (probability >= 0)
1885 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1889 /* ??? How should we distribute probabilities when more than one branch
1890 is generated. So far we only have soem ad-hoc observations:
1891 - If the operands are random, they are likely to differ in both parts.
1892 - If comparing items in a hash chain, the operands are random or equal;
1893 operation should be EQ or NE.
1894 - If items are searched in an ordered tree from the root, we can expect
1895 the highpart to be unequal about half of the time; operation should be
1896 an inequality comparison, operands non-constant, and overall probability
1897 about 50%. Likewise for quicksort.
1898 - Range checks will be often made against constants. Even if we assume for
1899 simplicity an even distribution of the non-constant operand over a
1900 sub-range here, the same probability could be generated with differently
1901 wide sub-ranges - as long as the ratio of the part of the subrange that
1902 is before the threshold to the part that comes after the threshold stays
1903 the same. Thus, we can't really tell anything here;
1904 assuming random distribution is at least simple.
1907 bool
1908 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1910 enum rtx_code msw_taken, msw_skip, lsw_taken;
1911 rtx skip_label = NULL_RTX;
1912 rtx op1h, op1l, op2h, op2l;
1913 int num_branches;
1914 int prob, rev_prob;
1915 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1916 rtx scratch = operands[4];
1918 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1919 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1920 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1921 op1l = gen_lowpart (SImode, operands[1]);
1922 op2l = gen_lowpart (SImode, operands[2]);
1923 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1924 prob = split_branch_probability;
1925 rev_prob = REG_BR_PROB_BASE - prob;
1926 switch (comparison)
1928 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1929 That costs 1 cycle more when the first branch can be predicted taken,
1930 but saves us mispredicts because only one branch needs prediction.
1931 It also enables generating the cmpeqdi_t-1 pattern. */
1932 case EQ:
1933 if (TARGET_CMPEQDI_T)
1935 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1936 emit_jump_insn (gen_branch_true (operands[3]));
1937 return true;
1939 msw_skip = NE;
1940 lsw_taken = EQ;
1941 if (prob >= 0)
1943 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1945 msw_skip_prob = rev_prob;
1946 if (REG_BR_PROB_BASE <= 65535)
1947 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1948 else
1950 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1951 lsw_taken_prob
1952 = (prob
1953 ? (REG_BR_PROB_BASE
1954 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1955 / ((HOST_WIDEST_INT) prob << 32)))
1956 : 0);
1959 break;
1960 case NE:
1961 if (TARGET_CMPEQDI_T)
1963 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1964 emit_jump_insn (gen_branch_false (operands[3]));
1965 return true;
1967 msw_taken = NE;
1968 msw_taken_prob = prob;
1969 lsw_taken = NE;
1970 lsw_taken_prob = 0;
1971 break;
1972 case GTU: case GT:
1973 msw_taken = comparison;
1974 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1975 break;
1976 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1977 msw_skip = swap_condition (msw_taken);
1978 lsw_taken = GTU;
1979 break;
1980 case GEU: case GE:
1981 if (op2l == CONST0_RTX (SImode))
1982 msw_taken = comparison;
1983 else
1985 msw_taken = comparison == GE ? GT : GTU;
1986 msw_skip = swap_condition (msw_taken);
1987 lsw_taken = GEU;
1989 break;
1990 case LTU: case LT:
1991 msw_taken = comparison;
1992 if (op2l == CONST0_RTX (SImode))
1993 break;
1994 msw_skip = swap_condition (msw_taken);
1995 lsw_taken = LTU;
1996 break;
1997 case LEU: case LE:
1998 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1999 msw_taken = comparison;
2000 else
2002 lsw_taken = LEU;
2003 if (comparison == LE)
2004 msw_taken = LT;
2005 else if (op2h != CONST0_RTX (SImode))
2006 msw_taken = LTU;
2007 else
2008 break;
2009 msw_skip = swap_condition (msw_taken);
2011 break;
2012 default: return false;
2014 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2015 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2016 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2017 if (comparison != EQ && comparison != NE && num_branches > 1)
2019 if (!CONSTANT_P (operands[2])
2020 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2021 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2023 msw_taken_prob = prob / 2U;
2024 msw_skip_prob
2025 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2026 lsw_taken_prob = prob;
2028 else
2030 msw_taken_prob = prob;
2031 msw_skip_prob = REG_BR_PROB_BASE;
2032 /* ??? If we have a constant op2h, should we use that when
2033 calculating lsw_taken_prob? */
2034 lsw_taken_prob = prob;
2037 operands[1] = op1h;
2038 operands[2] = op2h;
2039 operands[4] = NULL_RTX;
2040 if (reload_completed
2041 && ! arith_reg_or_0_operand (op2h, SImode)
2042 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2043 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2044 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2046 emit_move_insn (scratch, operands[2]);
2047 operands[2] = scratch;
2049 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2050 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2051 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2053 rtx taken_label = operands[3];
2055 /* Operands were possibly modified, but msw_skip doesn't expect this.
2056 Always use the original ones. */
2057 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2059 operands[1] = op1h;
2060 operands[2] = op2h;
2063 operands[3] = skip_label = gen_label_rtx ();
2064 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2065 operands[3] = taken_label;
2067 operands[1] = op1l;
2068 operands[2] = op2l;
2069 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2071 if (reload_completed
2072 && ! arith_reg_or_0_operand (op2l, SImode)
2073 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2075 emit_move_insn (scratch, operands[2]);
2076 operands[2] = scratch;
2078 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2080 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2081 emit_label (skip_label);
2082 return true;
2085 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2087 static void
2088 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2090 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2092 insn = gen_rtx_PARALLEL (VOIDmode,
2093 gen_rtvec (2, insn,
2094 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2095 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2097 else
2098 emit_insn (insn);
2101 /* Prepare the operands for an scc instruction; make sure that the
2102 compare has been done and the result is in T_REG. */
2103 void
2104 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2106 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2107 enum rtx_code oldcode = code;
2108 enum machine_mode mode;
2110 /* First need a compare insn. */
2111 switch (code)
2113 case NE:
2114 /* It isn't possible to handle this case. */
2115 gcc_unreachable ();
2116 case LT:
2117 code = GT;
2118 break;
2119 case LE:
2120 code = GE;
2121 break;
2122 case LTU:
2123 code = GTU;
2124 break;
2125 case LEU:
2126 code = GEU;
2127 break;
2128 default:
2129 break;
2131 if (code != oldcode)
2133 rtx tmp = op0;
2134 op0 = op1;
2135 op1 = tmp;
2138 mode = GET_MODE (op0);
2139 if (mode == VOIDmode)
2140 mode = GET_MODE (op1);
2142 op0 = force_reg (mode, op0);
2143 if ((code != EQ && code != NE
2144 && (op1 != const0_rtx
2145 || code == GTU || code == GEU || code == LTU || code == LEU))
2146 || (mode == DImode && op1 != const0_rtx)
2147 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2148 op1 = force_reg (mode, op1);
2150 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2151 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2152 mode);
2156 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2157 rtx op0, rtx op1)
2159 rtx target = gen_reg_rtx (SImode);
2160 rtx tmp;
2162 gcc_assert (TARGET_SHMEDIA);
2163 switch (code)
2165 case EQ:
2166 case GT:
2167 case LT:
2168 case UNORDERED:
2169 case GTU:
2170 case LTU:
2171 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2172 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2173 code = NE;
2174 break;
2176 case NE:
2177 case GE:
2178 case LE:
2179 case ORDERED:
2180 case GEU:
2181 case LEU:
2182 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2183 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2184 code = EQ;
2185 break;
2187 case UNEQ:
2188 case UNGE:
2189 case UNGT:
2190 case UNLE:
2191 case UNLT:
2192 case LTGT:
2193 return NULL_RTX;
2195 default:
2196 gcc_unreachable ();
2199 if (mode == DImode)
2201 rtx t2 = gen_reg_rtx (DImode);
2202 emit_insn (gen_extendsidi2 (t2, target));
2203 target = t2;
2206 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2209 /* Called from the md file, set up the operands of a compare instruction. */
2211 void
2212 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2214 enum rtx_code code = GET_CODE (operands[0]);
2215 enum rtx_code branch_code;
2216 rtx op0 = operands[1];
2217 rtx op1 = operands[2];
2218 rtx insn, tem;
2219 bool need_ccmpeq = false;
2221 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2223 op0 = force_reg (mode, op0);
2224 op1 = force_reg (mode, op1);
2226 else
2228 if (code != EQ || mode == DImode)
2230 /* Force args into regs, since we can't use constants here. */
2231 op0 = force_reg (mode, op0);
2232 if (op1 != const0_rtx || code == GTU || code == GEU)
2233 op1 = force_reg (mode, op1);
2237 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2239 if (code == LT
2240 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2241 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2243 tem = op0, op0 = op1, op1 = tem;
2244 code = swap_condition (code);
2247 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2248 if (code == GE)
2250 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2251 need_ccmpeq = true;
2252 code = GT;
2255 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2256 to EQ/GT respectively. */
2257 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2260 switch (code)
2262 case EQ:
2263 case GT:
2264 case GE:
2265 case GTU:
2266 case GEU:
2267 branch_code = code;
2268 break;
2269 case NE:
2270 case LT:
2271 case LE:
2272 case LTU:
2273 case LEU:
2274 branch_code = reverse_condition (code);
2275 break;
2276 default:
2277 gcc_unreachable ();
2280 insn = gen_rtx_SET (VOIDmode,
2281 gen_rtx_REG (SImode, T_REG),
2282 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2284 sh_emit_set_t_insn (insn, mode);
2285 if (need_ccmpeq)
2286 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2288 if (branch_code == code)
2289 emit_jump_insn (gen_branch_true (operands[3]));
2290 else
2291 emit_jump_insn (gen_branch_false (operands[3]));
2294 void
2295 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2297 enum rtx_code code = GET_CODE (operands[1]);
2298 rtx op0 = operands[2];
2299 rtx op1 = operands[3];
2300 rtx lab = NULL_RTX;
2301 bool invert = false;
2302 rtx tem;
2304 op0 = force_reg (mode, op0);
2305 if ((code != EQ && code != NE
2306 && (op1 != const0_rtx
2307 || code == GTU || code == GEU || code == LTU || code == LEU))
2308 || (mode == DImode && op1 != const0_rtx)
2309 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2310 op1 = force_reg (mode, op1);
2312 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2314 if (code == LT || code == LE)
2316 code = swap_condition (code);
2317 tem = op0, op0 = op1, op1 = tem;
2319 if (code == GE)
2321 if (TARGET_IEEE)
2323 lab = gen_label_rtx ();
2324 sh_emit_scc_to_t (EQ, op0, op1);
2325 emit_jump_insn (gen_branch_true (lab));
2326 code = GT;
2328 else
2330 code = LT;
2331 invert = true;
2336 if (code == NE)
2338 code = EQ;
2339 invert = true;
2342 sh_emit_scc_to_t (code, op0, op1);
2343 if (lab)
2344 emit_label (lab);
2345 if (invert)
2346 emit_insn (gen_movnegt (operands[0]));
2347 else
2348 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2351 /* Functions to output assembly code. */
2353 /* Return a sequence of instructions to perform DI or DF move.
2355 Since the SH cannot move a DI or DF in one instruction, we have
2356 to take care when we see overlapping source and dest registers. */
2358 const char *
2359 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2360 enum machine_mode mode)
2362 rtx dst = operands[0];
2363 rtx src = operands[1];
2365 if (MEM_P (dst)
2366 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2367 return "mov.l %T1,%0\n\tmov.l %1,%0";
2369 if (register_operand (dst, mode)
2370 && register_operand (src, mode))
2372 if (REGNO (src) == MACH_REG)
2373 return "sts mach,%S0\n\tsts macl,%R0";
2375 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2376 when mov.d r1,r0 do r1->r0 then r2->r1. */
2378 if (REGNO (src) + 1 == REGNO (dst))
2379 return "mov %T1,%T0\n\tmov %1,%0";
2380 else
2381 return "mov %1,%0\n\tmov %T1,%T0";
2383 else if (CONST_INT_P (src))
2385 if (INTVAL (src) < 0)
2386 output_asm_insn ("mov #-1,%S0", operands);
2387 else
2388 output_asm_insn ("mov #0,%S0", operands);
2390 return "mov %1,%R0";
2392 else if (MEM_P (src))
2394 int ptrreg = -1;
2395 int dreg = REGNO (dst);
2396 rtx inside = XEXP (src, 0);
2398 switch (GET_CODE (inside))
2400 case REG:
2401 ptrreg = REGNO (inside);
2402 break;
2404 case SUBREG:
2405 ptrreg = subreg_regno (inside);
2406 break;
2408 case PLUS:
2409 ptrreg = REGNO (XEXP (inside, 0));
2410 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2411 an offsettable address. Unfortunately, offsettable addresses use
2412 QImode to check the offset, and a QImode offsettable address
2413 requires r0 for the other operand, which is not currently
2414 supported, so we can't use the 'o' constraint.
2415 Thus we must check for and handle r0+REG addresses here.
2416 We punt for now, since this is likely very rare. */
2417 gcc_assert (!REG_P (XEXP (inside, 1)));
2418 break;
2420 case LABEL_REF:
2421 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2422 case POST_INC:
2423 return "mov.l %1,%0\n\tmov.l %1,%T0";
2424 default:
2425 gcc_unreachable ();
2428 /* Work out the safe way to copy. Copy into the second half first. */
2429 if (dreg == ptrreg)
2430 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2433 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2436 /* Print an instruction which would have gone into a delay slot after
2437 another instruction, but couldn't because the other instruction expanded
2438 into a sequence where putting the slot insn at the end wouldn't work. */
2440 static void
2441 print_slot (rtx insn)
2443 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2445 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2448 const char *
2449 output_far_jump (rtx insn, rtx op)
2451 struct { rtx lab, reg, op; } this_jmp;
2452 rtx braf_base_lab = NULL_RTX;
2453 const char *jump;
2454 int far;
2455 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2456 rtx prev;
2458 this_jmp.lab = gen_label_rtx ();
2460 if (TARGET_SH2
2461 && offset >= -32764
2462 && offset - get_attr_length (insn) <= 32766)
2464 far = 0;
2465 jump = "mov.w %O0,%1; braf %1";
2467 else
2469 far = 1;
2470 if (flag_pic)
2472 if (TARGET_SH2)
2473 jump = "mov.l %O0,%1; braf %1";
2474 else
2475 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2477 else
2478 jump = "mov.l %O0,%1; jmp @%1";
2480 /* If we have a scratch register available, use it. */
2481 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2482 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2484 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2485 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2486 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2487 output_asm_insn (jump, &this_jmp.lab);
2488 if (dbr_sequence_length ())
2489 print_slot (final_sequence);
2490 else
2491 output_asm_insn ("nop", 0);
2493 else
2495 /* Output the delay slot insn first if any. */
2496 if (dbr_sequence_length ())
2497 print_slot (final_sequence);
2499 this_jmp.reg = gen_rtx_REG (SImode, 13);
2500 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2501 Fortunately, MACL is fixed and call-clobbered, and we never
2502 need its value across jumps, so save r13 in it instead of in
2503 the stack. */
2504 if (TARGET_SH5)
2505 output_asm_insn ("lds r13, macl", 0);
2506 else
2507 output_asm_insn ("mov.l r13,@-r15", 0);
2508 output_asm_insn (jump, &this_jmp.lab);
2509 if (TARGET_SH5)
2510 output_asm_insn ("sts macl, r13", 0);
2511 else
2512 output_asm_insn ("mov.l @r15+,r13", 0);
2514 if (far && flag_pic && TARGET_SH2)
2516 braf_base_lab = gen_label_rtx ();
2517 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2518 CODE_LABEL_NUMBER (braf_base_lab));
2520 if (far)
2521 output_asm_insn (".align 2", 0);
2522 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2523 this_jmp.op = op;
2524 if (far && flag_pic)
2526 if (TARGET_SH2)
2527 this_jmp.lab = braf_base_lab;
2528 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2530 else
2531 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2532 return "";
2535 /* Local label counter, used for constants in the pool and inside
2536 pattern branches. */
2538 static int lf = 100;
2540 /* Output code for ordinary branches. */
2542 const char *
2543 output_branch (int logic, rtx insn, rtx *operands)
2545 switch (get_attr_length (insn))
2547 case 6:
2548 /* This can happen if filling the delay slot has caused a forward
2549 branch to exceed its range (we could reverse it, but only
2550 when we know we won't overextend other branches; this should
2551 best be handled by relaxation).
2552 It can also happen when other condbranches hoist delay slot insn
2553 from their destination, thus leading to code size increase.
2554 But the branch will still be in the range -4092..+4098 bytes. */
2556 if (! TARGET_RELAX)
2558 int label = lf++;
2559 /* The call to print_slot will clobber the operands. */
2560 rtx op0 = operands[0];
2562 /* If the instruction in the delay slot is annulled (true), then
2563 there is no delay slot where we can put it now. The only safe
2564 place for it is after the label. final will do that by default. */
2566 if (final_sequence
2567 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2568 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2570 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2571 ASSEMBLER_DIALECT ? "/" : ".", label);
2572 print_slot (final_sequence);
2574 else
2575 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2577 output_asm_insn ("bra\t%l0", &op0);
2578 fprintf (asm_out_file, "\tnop\n");
2579 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2581 return "";
2583 /* When relaxing, handle this like a short branch. The linker
2584 will fix it up if it still doesn't fit after relaxation. */
2585 case 2:
2586 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2588 /* These are for SH2e, in which we have to account for the
2589 extra nop because of the hardware bug in annulled branches. */
2590 case 8:
2591 if (! TARGET_RELAX)
2593 int label = lf++;
2595 gcc_assert (!final_sequence
2596 || !(INSN_ANNULLED_BRANCH_P
2597 (XVECEXP (final_sequence, 0, 0))));
2598 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2599 logic ? "f" : "t",
2600 ASSEMBLER_DIALECT ? "/" : ".", label);
2601 fprintf (asm_out_file, "\tnop\n");
2602 output_asm_insn ("bra\t%l0", operands);
2603 fprintf (asm_out_file, "\tnop\n");
2604 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2606 return "";
2608 /* When relaxing, fall through. */
2609 case 4:
2611 char buffer[10];
2613 sprintf (buffer, "b%s%ss\t%%l0",
2614 logic ? "t" : "f",
2615 ASSEMBLER_DIALECT ? "/" : ".");
2616 output_asm_insn (buffer, &operands[0]);
2617 return "nop";
2620 default:
2621 /* There should be no longer branches now - that would
2622 indicate that something has destroyed the branches set
2623 up in machine_dependent_reorg. */
2624 gcc_unreachable ();
2628 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2629 fill in operands 9 as a label to the successor insn.
2630 We try to use jump threading where possible.
2631 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2632 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2633 follow jmp and bt, if the address is in range. */
2634 const char *
2635 output_branchy_insn (enum rtx_code code, const char *templ,
2636 rtx insn, rtx *operands)
2638 rtx next_insn = NEXT_INSN (insn);
2640 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2642 rtx src = SET_SRC (PATTERN (next_insn));
2643 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2645 /* Following branch not taken */
2646 operands[9] = gen_label_rtx ();
2647 emit_label_after (operands[9], next_insn);
2648 INSN_ADDRESSES_NEW (operands[9],
2649 INSN_ADDRESSES (INSN_UID (next_insn))
2650 + get_attr_length (next_insn));
2651 return templ;
2653 else
2655 int offset = (branch_dest (next_insn)
2656 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2657 if (offset >= -252 && offset <= 258)
2659 if (GET_CODE (src) == IF_THEN_ELSE)
2660 /* branch_true */
2661 src = XEXP (src, 1);
2662 operands[9] = src;
2663 return templ;
2667 operands[9] = gen_label_rtx ();
2668 emit_label_after (operands[9], insn);
2669 INSN_ADDRESSES_NEW (operands[9],
2670 INSN_ADDRESSES (INSN_UID (insn))
2671 + get_attr_length (insn));
2672 return templ;
2675 const char *
2676 output_ieee_ccmpeq (rtx insn, rtx *operands)
2678 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2679 insn, operands);
2682 /* Output the start of the assembler file. */
2684 static void
2685 sh_file_start (void)
2687 default_file_start ();
2689 #ifdef SYMBIAN
2690 /* Declare the .directive section before it is used. */
2691 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2692 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2693 #endif
2695 if (TARGET_ELF)
2696 /* We need to show the text section with the proper
2697 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2698 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2699 will complain. We can teach GAS specifically about the
2700 default attributes for our choice of text section, but
2701 then we would have to change GAS again if/when we change
2702 the text section name. */
2703 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2704 else
2705 /* Switch to the data section so that the coffsem symbol
2706 isn't in the text section. */
2707 switch_to_section (data_section);
2709 if (TARGET_LITTLE_ENDIAN)
2710 fputs ("\t.little\n", asm_out_file);
2712 if (!TARGET_ELF)
2714 if (TARGET_SHCOMPACT)
2715 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2716 else if (TARGET_SHMEDIA)
2717 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2718 TARGET_SHMEDIA64 ? 64 : 32);
2722 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2724 static bool
2725 unspec_caller_rtx_p (rtx pat)
2727 rtx base, offset;
2728 int i;
2730 split_const (pat, &base, &offset);
2731 if (GET_CODE (base) == UNSPEC)
2733 if (XINT (base, 1) == UNSPEC_CALLER)
2734 return true;
2735 for (i = 0; i < XVECLEN (base, 0); i++)
2736 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2737 return true;
2739 return false;
2742 /* Indicate that INSN cannot be duplicated. This is true for insn
2743 that generates a unique label. */
2745 static bool
2746 sh_cannot_copy_insn_p (rtx insn)
2748 rtx pat;
2750 if (!reload_completed || !flag_pic)
2751 return false;
2753 if (!NONJUMP_INSN_P (insn))
2754 return false;
2755 if (asm_noperands (insn) >= 0)
2756 return false;
2758 pat = PATTERN (insn);
2759 if (GET_CODE (pat) != SET)
2760 return false;
2761 pat = SET_SRC (pat);
2763 if (unspec_caller_rtx_p (pat))
2764 return true;
2766 return false;
2769 /* Actual number of instructions used to make a shift by N. */
2770 static const char ashiftrt_insns[] =
2771 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2773 /* Left shift and logical right shift are the same. */
2774 static const char shift_insns[] =
2775 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2777 /* Individual shift amounts needed to get the above length sequences.
2778 One bit right shifts clobber the T bit, so when possible, put one bit
2779 shifts in the middle of the sequence, so the ends are eligible for
2780 branch delay slots. */
2781 static const short shift_amounts[32][5] = {
2782 {0}, {1}, {2}, {2, 1},
2783 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2784 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2785 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2786 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2787 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2788 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2789 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2791 /* Likewise, but for shift amounts < 16, up to three highmost bits
2792 might be clobbered. This is typically used when combined with some
2793 kind of sign or zero extension. */
2795 static const char ext_shift_insns[] =
2796 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2798 static const short ext_shift_amounts[32][4] = {
2799 {0}, {1}, {2}, {2, 1},
2800 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2801 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2802 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2803 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2804 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2805 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2806 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2808 /* Assuming we have a value that has been sign-extended by at least one bit,
2809 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2810 to shift it by N without data loss, and quicker than by other means? */
2811 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2813 /* This is used in length attributes in sh.md to help compute the length
2814 of arbitrary constant shift instructions. */
2817 shift_insns_rtx (rtx insn)
2819 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2820 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2821 enum rtx_code shift_code = GET_CODE (set_src);
2823 switch (shift_code)
2825 case ASHIFTRT:
2826 return ashiftrt_insns[shift_count];
2827 case LSHIFTRT:
2828 case ASHIFT:
2829 return shift_insns[shift_count];
2830 default:
2831 gcc_unreachable ();
2835 /* Return the cost of a shift. */
2837 static inline int
2838 shiftcosts (rtx x)
2840 int value;
2842 if (TARGET_SHMEDIA)
2843 return 1;
2845 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2847 if (GET_MODE (x) == DImode
2848 && CONST_INT_P (XEXP (x, 1))
2849 && INTVAL (XEXP (x, 1)) == 1)
2850 return 2;
2852 /* Everything else is invalid, because there is no pattern for it. */
2853 return MAX_COST;
2855 /* If shift by a non constant, then this will be expensive. */
2856 if (!CONST_INT_P (XEXP (x, 1)))
2857 return SH_DYNAMIC_SHIFT_COST;
2859 /* Otherwise, return the true cost in instructions. Cope with out of range
2860 shift counts more or less arbitrarily. */
2861 value = INTVAL (XEXP (x, 1)) & 31;
2863 if (GET_CODE (x) == ASHIFTRT)
2865 int cost = ashiftrt_insns[value];
2866 /* If SH3, then we put the constant in a reg and use shad. */
2867 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2868 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2869 return cost;
2871 else
2872 return shift_insns[value];
2875 /* Return the cost of an AND operation. */
2877 static inline int
2878 andcosts (rtx x)
2880 int i;
2882 /* Anding with a register is a single cycle and instruction. */
2883 if (!CONST_INT_P (XEXP (x, 1)))
2884 return 1;
2886 i = INTVAL (XEXP (x, 1));
2888 if (TARGET_SHMEDIA)
2890 if (satisfies_constraint_I10 (XEXP (x, 1))
2891 || satisfies_constraint_J16 (XEXP (x, 1)))
2892 return 1;
2893 else
2894 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2897 /* These constants are single cycle extu.[bw] instructions. */
2898 if (i == 0xff || i == 0xffff)
2899 return 1;
2900 /* Constants that can be used in an and immediate instruction in a single
2901 cycle, but this requires r0, so make it a little more expensive. */
2902 if (CONST_OK_FOR_K08 (i))
2903 return 2;
2904 /* Constants that can be loaded with a mov immediate and an and.
2905 This case is probably unnecessary. */
2906 if (CONST_OK_FOR_I08 (i))
2907 return 2;
2908 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2909 This case is probably unnecessary. */
2910 return 3;
2913 /* Return the cost of an addition or a subtraction. */
2915 static inline int
2916 addsubcosts (rtx x)
2918 /* Adding a register is a single cycle insn. */
2919 if (REG_P (XEXP (x, 1))
2920 || GET_CODE (XEXP (x, 1)) == SUBREG)
2921 return 1;
2923 /* Likewise for small constants. */
2924 if (CONST_INT_P (XEXP (x, 1))
2925 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2926 return 1;
2928 if (TARGET_SHMEDIA)
2929 switch (GET_CODE (XEXP (x, 1)))
2931 case CONST:
2932 case LABEL_REF:
2933 case SYMBOL_REF:
2934 return TARGET_SHMEDIA64 ? 5 : 3;
2936 case CONST_INT:
2937 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2938 return 2;
2939 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2940 return 3;
2941 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2942 return 4;
2944 /* Fall through. */
2945 default:
2946 return 5;
2949 /* Any other constant requires a 2 cycle pc-relative load plus an
2950 addition. */
2951 return 3;
2954 /* Return the cost of a multiply. */
2955 static inline int
2956 multcosts (rtx x ATTRIBUTE_UNUSED)
2958 if (sh_multcost >= 0)
2959 return sh_multcost;
2960 if (TARGET_SHMEDIA)
2961 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2962 accept constants. Ideally, we would use a cost of one or two and
2963 add the cost of the operand, but disregard the latter when inside loops
2964 and loop invariant code motion is still to follow.
2965 Using a multiply first and splitting it later if it's a loss
2966 doesn't work because of different sign / zero extension semantics
2967 of multiplies vs. shifts. */
2968 return optimize_size ? 2 : 3;
2970 if (TARGET_SH2)
2972 /* We have a mul insn, so we can never take more than the mul and the
2973 read of the mac reg, but count more because of the latency and extra
2974 reg usage. */
2975 if (optimize_size)
2976 return 2;
2977 return 3;
2980 /* If we're aiming at small code, then just count the number of
2981 insns in a multiply call sequence. */
2982 if (optimize_size)
2983 return 5;
2985 /* Otherwise count all the insns in the routine we'd be calling too. */
2986 return 20;
2989 /* Compute a (partial) cost for rtx X. Return true if the complete
2990 cost has been computed, and false if subexpressions should be
2991 scanned. In either case, *TOTAL contains the cost result. */
2993 static bool
2994 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2995 bool speed ATTRIBUTE_UNUSED)
2997 switch (code)
2999 case CONST_INT:
3000 if (TARGET_SHMEDIA)
3002 if (INTVAL (x) == 0)
3003 *total = 0;
3004 else if (outer_code == AND && and_operand ((x), DImode))
3005 *total = 0;
3006 else if ((outer_code == IOR || outer_code == XOR
3007 || outer_code == PLUS)
3008 && CONST_OK_FOR_I10 (INTVAL (x)))
3009 *total = 0;
3010 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3011 *total = COSTS_N_INSNS (outer_code != SET);
3012 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3013 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3014 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3015 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3016 else
3017 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3018 return true;
3020 if (CONST_OK_FOR_I08 (INTVAL (x)))
3021 *total = 0;
3022 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3023 && CONST_OK_FOR_K08 (INTVAL (x)))
3024 *total = 1;
3025 /* prepare_cmp_insn will force costly constants int registers before
3026 the cbranch[sd]i4 patterns can see them, so preserve potentially
3027 interesting ones not covered by I08 above. */
3028 else if (outer_code == COMPARE
3029 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3030 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3031 || INTVAL (x) == 0x7fffffff
3032 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3033 *total = 1;
3034 else
3035 *total = 8;
3036 return true;
3038 case CONST:
3039 case LABEL_REF:
3040 case SYMBOL_REF:
3041 if (TARGET_SHMEDIA64)
3042 *total = COSTS_N_INSNS (4);
3043 else if (TARGET_SHMEDIA32)
3044 *total = COSTS_N_INSNS (2);
3045 else
3046 *total = 5;
3047 return true;
3049 case CONST_DOUBLE:
3050 if (TARGET_SHMEDIA)
3051 *total = COSTS_N_INSNS (4);
3052 /* prepare_cmp_insn will force costly constants int registers before
3053 the cbranchdi4 pattern can see them, so preserve potentially
3054 interesting ones. */
3055 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3056 *total = 1;
3057 else
3058 *total = 10;
3059 return true;
3060 case CONST_VECTOR:
3061 if (x == CONST0_RTX (GET_MODE (x)))
3062 *total = 0;
3063 else if (sh_1el_vec (x, VOIDmode))
3064 *total = outer_code != SET;
3065 if (sh_rep_vec (x, VOIDmode))
3066 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3067 + (outer_code != SET));
3068 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3069 return true;
3071 case PLUS:
3072 case MINUS:
3073 *total = COSTS_N_INSNS (addsubcosts (x));
3074 return true;
3076 case AND:
3077 *total = COSTS_N_INSNS (andcosts (x));
3078 return true;
3080 case MULT:
3081 *total = COSTS_N_INSNS (multcosts (x));
3082 return true;
3084 case ASHIFT:
3085 case ASHIFTRT:
3086 case LSHIFTRT:
3087 *total = COSTS_N_INSNS (shiftcosts (x));
3088 return true;
3090 case DIV:
3091 case UDIV:
3092 case MOD:
3093 case UMOD:
3094 *total = COSTS_N_INSNS (20);
3095 return true;
3097 case PARALLEL:
3098 if (sh_1el_vec (x, VOIDmode))
3099 *total = outer_code != SET;
3100 if (sh_rep_vec (x, VOIDmode))
3101 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3102 + (outer_code != SET));
3103 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3104 return true;
3106 case FLOAT:
3107 case FIX:
3108 *total = 100;
3109 return true;
3111 default:
3112 return false;
3116 /* Compute the cost of an address. For the SH, all valid addresses are
3117 the same cost. Use a slightly higher cost for reg + reg addressing,
3118 since it increases pressure on r0. */
3120 static int
3121 sh_address_cost (rtx X,
3122 bool speed ATTRIBUTE_UNUSED)
3124 return (GET_CODE (X) == PLUS
3125 && ! CONSTANT_P (XEXP (X, 1))
3126 && ! TARGET_SHMEDIA ? 1 : 0);
3129 /* Code to expand a shift. */
3131 void
3132 gen_ashift (int type, int n, rtx reg)
3134 /* Negative values here come from the shift_amounts array. */
3135 if (n < 0)
3137 if (type == ASHIFT)
3138 type = LSHIFTRT;
3139 else
3140 type = ASHIFT;
3141 n = -n;
3144 switch (type)
3146 case ASHIFTRT:
3147 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3148 break;
3149 case LSHIFTRT:
3150 if (n == 1)
3151 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3152 else
3153 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3154 break;
3155 case ASHIFT:
3156 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3157 break;
3161 /* Same for HImode */
3163 void
3164 gen_ashift_hi (int type, int n, rtx reg)
3166 /* Negative values here come from the shift_amounts array. */
3167 if (n < 0)
3169 if (type == ASHIFT)
3170 type = LSHIFTRT;
3171 else
3172 type = ASHIFT;
3173 n = -n;
3176 switch (type)
3178 case ASHIFTRT:
3179 case LSHIFTRT:
3180 /* We don't have HImode right shift operations because using the
3181 ordinary 32 bit shift instructions for that doesn't generate proper
3182 zero/sign extension.
3183 gen_ashift_hi is only called in contexts where we know that the
3184 sign extension works out correctly. */
3186 int offset = 0;
3187 if (GET_CODE (reg) == SUBREG)
3189 offset = SUBREG_BYTE (reg);
3190 reg = SUBREG_REG (reg);
3192 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3193 break;
3195 case ASHIFT:
3196 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3197 break;
3201 /* Output RTL to split a constant shift into its component SH constant
3202 shift instructions. */
3204 void
3205 gen_shifty_op (int code, rtx *operands)
3207 int value = INTVAL (operands[2]);
3208 int max, i;
3210 /* Truncate the shift count in case it is out of bounds. */
3211 value = value & 31;
3213 if (value == 31)
3215 if (code == LSHIFTRT)
3217 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3218 emit_insn (gen_movt (operands[0]));
3219 return;
3221 else if (code == ASHIFT)
3223 /* There is a two instruction sequence for 31 bit left shifts,
3224 but it requires r0. */
3225 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3227 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3228 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3229 return;
3233 else if (value == 0)
3235 /* This can happen even when optimizing, if there were subregs before
3236 reload. Don't output a nop here, as this is never optimized away;
3237 use a no-op move instead. */
3238 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3239 return;
3242 max = shift_insns[value];
3243 for (i = 0; i < max; i++)
3244 gen_ashift (code, shift_amounts[value][i], operands[0]);
3247 /* Same as above, but optimized for values where the topmost bits don't
3248 matter. */
3250 void
3251 gen_shifty_hi_op (int code, rtx *operands)
3253 int value = INTVAL (operands[2]);
3254 int max, i;
3255 void (*gen_fun) (int, int, rtx);
3257 /* This operation is used by and_shl for SImode values with a few
3258 high bits known to be cleared. */
3259 value &= 31;
3260 if (value == 0)
3262 emit_insn (gen_nop ());
3263 return;
3266 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3267 if (code == ASHIFT)
3269 max = ext_shift_insns[value];
3270 for (i = 0; i < max; i++)
3271 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3273 else
3274 /* When shifting right, emit the shifts in reverse order, so that
3275 solitary negative values come first. */
3276 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3277 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3280 /* Output RTL for an arithmetic right shift. */
3282 /* ??? Rewrite to use super-optimizer sequences. */
3285 expand_ashiftrt (rtx *operands)
3287 rtx wrk;
3288 char func[18];
3289 int value;
3291 if (TARGET_SH3)
3293 if (!CONST_INT_P (operands[2]))
3295 rtx count = copy_to_mode_reg (SImode, operands[2]);
3296 emit_insn (gen_negsi2 (count, count));
3297 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3298 return 1;
3300 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3301 > 1 + SH_DYNAMIC_SHIFT_COST)
3303 rtx count
3304 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3305 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3306 return 1;
3309 if (!CONST_INT_P (operands[2]))
3310 return 0;
3312 value = INTVAL (operands[2]) & 31;
3314 if (value == 31)
3316 /* If we are called from abs expansion, arrange things so that we
3317 we can use a single MT instruction that doesn't clobber the source,
3318 if LICM can hoist out the load of the constant zero. */
3319 if (currently_expanding_to_rtl)
3321 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3322 operands[1]));
3323 emit_insn (gen_mov_neg_si_t (operands[0]));
3324 return 1;
3326 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3327 return 1;
3329 else if (value >= 16 && value <= 19)
3331 wrk = gen_reg_rtx (SImode);
3332 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3333 value -= 16;
3334 while (value--)
3335 gen_ashift (ASHIFTRT, 1, wrk);
3336 emit_move_insn (operands[0], wrk);
3337 return 1;
3339 /* Expand a short sequence inline, longer call a magic routine. */
3340 else if (value <= 5)
3342 wrk = gen_reg_rtx (SImode);
3343 emit_move_insn (wrk, operands[1]);
3344 while (value--)
3345 gen_ashift (ASHIFTRT, 1, wrk);
3346 emit_move_insn (operands[0], wrk);
3347 return 1;
3350 wrk = gen_reg_rtx (Pmode);
3352 /* Load the value into an arg reg and call a helper. */
3353 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3354 sprintf (func, "__ashiftrt_r4_%d", value);
3355 function_symbol (wrk, func, SFUNC_STATIC);
3356 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3357 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3358 return 1;
3362 sh_dynamicalize_shift_p (rtx count)
3364 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3367 /* Try to find a good way to implement the combiner pattern
3368 [(set (match_operand:SI 0 "register_operand" "r")
3369 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3370 (match_operand:SI 2 "const_int_operand" "n"))
3371 (match_operand:SI 3 "const_int_operand" "n"))) .
3372 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3373 return 0 for simple right / left or left/right shift combination.
3374 return 1 for a combination of shifts with zero_extend.
3375 return 2 for a combination of shifts with an AND that needs r0.
3376 return 3 for a combination of shifts with an AND that needs an extra
3377 scratch register, when the three highmost bits of the AND mask are clear.
3378 return 4 for a combination of shifts with an AND that needs an extra
3379 scratch register, when any of the three highmost bits of the AND mask
3380 is set.
3381 If ATTRP is set, store an initial right shift width in ATTRP[0],
3382 and the instruction length in ATTRP[1] . These values are not valid
3383 when returning 0.
3384 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3385 shift_amounts for the last shift value that is to be used before the
3386 sign extend. */
3388 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3390 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3391 int left = INTVAL (left_rtx), right;
3392 int best = 0;
3393 int cost, best_cost = 10000;
3394 int best_right = 0, best_len = 0;
3395 int i;
3396 int can_ext;
3398 if (left < 0 || left > 31)
3399 return 0;
3400 if (CONST_INT_P (mask_rtx))
3401 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3402 else
3403 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3404 /* Can this be expressed as a right shift / left shift pair? */
3405 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3406 right = exact_log2 (lsb);
3407 mask2 = ~(mask + lsb - 1);
3408 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3409 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3410 if (! mask2)
3411 best_cost = shift_insns[right] + shift_insns[right + left];
3412 /* mask has no trailing zeroes <==> ! right */
3413 else if (! right && mask2 == ~(lsb2 - 1))
3415 int late_right = exact_log2 (lsb2);
3416 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3418 /* Try to use zero extend. */
3419 if (mask2 == ~(lsb2 - 1))
3421 int width, first;
3423 for (width = 8; width <= 16; width += 8)
3425 /* Can we zero-extend right away? */
3426 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3428 cost
3429 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3430 if (cost < best_cost)
3432 best = 1;
3433 best_cost = cost;
3434 best_right = right;
3435 best_len = cost;
3436 if (attrp)
3437 attrp[2] = -1;
3439 continue;
3441 /* ??? Could try to put zero extend into initial right shift,
3442 or even shift a bit left before the right shift. */
3443 /* Determine value of first part of left shift, to get to the
3444 zero extend cut-off point. */
3445 first = width - exact_log2 (lsb2) + right;
3446 if (first >= 0 && right + left - first >= 0)
3448 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3449 + ext_shift_insns[right + left - first];
3450 if (cost < best_cost)
3452 best = 1;
3453 best_cost = cost;
3454 best_right = right;
3455 best_len = cost;
3456 if (attrp)
3457 attrp[2] = first;
3462 /* Try to use r0 AND pattern */
3463 for (i = 0; i <= 2; i++)
3465 if (i > right)
3466 break;
3467 if (! CONST_OK_FOR_K08 (mask >> i))
3468 continue;
3469 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3470 if (cost < best_cost)
3472 best = 2;
3473 best_cost = cost;
3474 best_right = i;
3475 best_len = cost - 1;
3478 /* Try to use a scratch register to hold the AND operand. */
3479 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3480 for (i = 0; i <= 2; i++)
3482 if (i > right)
3483 break;
3484 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3485 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3486 if (cost < best_cost)
3488 best = 4 - can_ext;
3489 best_cost = cost;
3490 best_right = i;
3491 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3495 if (attrp)
3497 attrp[0] = best_right;
3498 attrp[1] = best_len;
3500 return best;
3503 /* This is used in length attributes of the unnamed instructions
3504 corresponding to shl_and_kind return values of 1 and 2. */
3506 shl_and_length (rtx insn)
3508 rtx set_src, left_rtx, mask_rtx;
3509 int attributes[3];
3511 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3512 left_rtx = XEXP (XEXP (set_src, 0), 1);
3513 mask_rtx = XEXP (set_src, 1);
3514 shl_and_kind (left_rtx, mask_rtx, attributes);
3515 return attributes[1];
3518 /* This is used in length attribute of the and_shl_scratch instruction. */
3521 shl_and_scr_length (rtx insn)
3523 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3524 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3525 rtx op = XEXP (set_src, 0);
3526 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3527 op = XEXP (XEXP (op, 0), 0);
3528 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3531 /* Generate rtl for instructions for which shl_and_kind advised a particular
3532 method of generating them, i.e. returned zero. */
3535 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3537 int attributes[3];
3538 unsigned HOST_WIDE_INT mask;
3539 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3540 int right, total_shift;
3541 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3543 right = attributes[0];
3544 total_shift = INTVAL (left_rtx) + right;
3545 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3546 switch (kind)
3548 default:
3549 return -1;
3550 case 1:
3552 int first = attributes[2];
3553 rtx operands[3];
3555 if (first < 0)
3557 emit_insn ((mask << right) <= 0xff
3558 ? gen_zero_extendqisi2 (dest,
3559 gen_lowpart (QImode, source))
3560 : gen_zero_extendhisi2 (dest,
3561 gen_lowpart (HImode, source)));
3562 source = dest;
3564 if (source != dest)
3565 emit_insn (gen_movsi (dest, source));
3566 operands[0] = dest;
3567 if (right)
3569 operands[2] = GEN_INT (right);
3570 gen_shifty_hi_op (LSHIFTRT, operands);
3572 if (first > 0)
3574 operands[2] = GEN_INT (first);
3575 gen_shifty_hi_op (ASHIFT, operands);
3576 total_shift -= first;
3577 mask <<= first;
3579 if (first >= 0)
3580 emit_insn (mask <= 0xff
3581 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3582 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3583 if (total_shift > 0)
3585 operands[2] = GEN_INT (total_shift);
3586 gen_shifty_hi_op (ASHIFT, operands);
3588 break;
3590 case 4:
3591 shift_gen_fun = gen_shifty_op;
3592 case 3:
3593 /* If the topmost bit that matters is set, set the topmost bits
3594 that don't matter. This way, we might be able to get a shorter
3595 signed constant. */
3596 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3597 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3598 case 2:
3599 /* Don't expand fine-grained when combining, because that will
3600 make the pattern fail. */
3601 if (currently_expanding_to_rtl
3602 || reload_in_progress || reload_completed)
3604 rtx operands[3];
3606 /* Cases 3 and 4 should be handled by this split
3607 only while combining */
3608 gcc_assert (kind <= 2);
3609 if (right)
3611 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3612 source = dest;
3614 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3615 if (total_shift)
3617 operands[0] = dest;
3618 operands[1] = dest;
3619 operands[2] = GEN_INT (total_shift);
3620 shift_gen_fun (ASHIFT, operands);
3622 break;
3624 else
3626 int neg = 0;
3627 if (kind != 4 && total_shift < 16)
3629 neg = -ext_shift_amounts[total_shift][1];
3630 if (neg > 0)
3631 neg -= ext_shift_amounts[total_shift][2];
3632 else
3633 neg = 0;
3635 emit_insn (gen_and_shl_scratch (dest, source,
3636 GEN_INT (right),
3637 GEN_INT (mask),
3638 GEN_INT (total_shift + neg),
3639 GEN_INT (neg)));
3640 emit_insn (gen_movsi (dest, dest));
3641 break;
3644 return 0;
3647 /* Try to find a good way to implement the combiner pattern
3648 [(set (match_operand:SI 0 "register_operand" "=r")
3649 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3650 (match_operand:SI 2 "const_int_operand" "n")
3651 (match_operand:SI 3 "const_int_operand" "n")
3652 (const_int 0)))
3653 (clobber (reg:SI T_REG))]
3654 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3655 return 0 for simple left / right shift combination.
3656 return 1 for left shift / 8 bit sign extend / left shift.
3657 return 2 for left shift / 16 bit sign extend / left shift.
3658 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3659 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3660 return 5 for left shift / 16 bit sign extend / right shift
3661 return 6 for < 8 bit sign extend / left shift.
3662 return 7 for < 8 bit sign extend / left shift / single right shift.
3663 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3666 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3668 int left, size, insize, ext;
3669 int cost = 0, best_cost;
3670 int kind;
3672 left = INTVAL (left_rtx);
3673 size = INTVAL (size_rtx);
3674 insize = size - left;
3675 gcc_assert (insize > 0);
3676 /* Default to left / right shift. */
3677 kind = 0;
3678 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3679 if (size <= 16)
3681 /* 16 bit shift / sign extend / 16 bit shift */
3682 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3683 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3684 below, by alternative 3 or something even better. */
3685 if (cost < best_cost)
3687 kind = 5;
3688 best_cost = cost;
3691 /* Try a plain sign extend between two shifts. */
3692 for (ext = 16; ext >= insize; ext -= 8)
3694 if (ext <= size)
3696 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3697 if (cost < best_cost)
3699 kind = ext / (unsigned) 8;
3700 best_cost = cost;
3703 /* Check if we can do a sloppy shift with a final signed shift
3704 restoring the sign. */
3705 if (EXT_SHIFT_SIGNED (size - ext))
3706 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3707 /* If not, maybe it's still cheaper to do the second shift sloppy,
3708 and do a final sign extend? */
3709 else if (size <= 16)
3710 cost = ext_shift_insns[ext - insize] + 1
3711 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3712 else
3713 continue;
3714 if (cost < best_cost)
3716 kind = ext / (unsigned) 8 + 2;
3717 best_cost = cost;
3720 /* Check if we can sign extend in r0 */
3721 if (insize < 8)
3723 cost = 3 + shift_insns[left];
3724 if (cost < best_cost)
3726 kind = 6;
3727 best_cost = cost;
3729 /* Try the same with a final signed shift. */
3730 if (left < 31)
3732 cost = 3 + ext_shift_insns[left + 1] + 1;
3733 if (cost < best_cost)
3735 kind = 7;
3736 best_cost = cost;
3740 if (TARGET_SH3)
3742 /* Try to use a dynamic shift. */
3743 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3744 if (cost < best_cost)
3746 kind = 0;
3747 best_cost = cost;
3750 if (costp)
3751 *costp = cost;
3752 return kind;
3755 /* Function to be used in the length attribute of the instructions
3756 implementing this pattern. */
3759 shl_sext_length (rtx insn)
3761 rtx set_src, left_rtx, size_rtx;
3762 int cost;
3764 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3765 left_rtx = XEXP (XEXP (set_src, 0), 1);
3766 size_rtx = XEXP (set_src, 1);
3767 shl_sext_kind (left_rtx, size_rtx, &cost);
3768 return cost;
3771 /* Generate rtl for this pattern */
3774 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3776 int kind;
3777 int left, size, insize, cost;
3778 rtx operands[3];
3780 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3781 left = INTVAL (left_rtx);
3782 size = INTVAL (size_rtx);
3783 insize = size - left;
3784 switch (kind)
3786 case 1:
3787 case 2:
3788 case 3:
3789 case 4:
3791 int ext = kind & 1 ? 8 : 16;
3792 int shift2 = size - ext;
3794 /* Don't expand fine-grained when combining, because that will
3795 make the pattern fail. */
3796 if (! currently_expanding_to_rtl
3797 && ! reload_in_progress && ! reload_completed)
3799 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3800 emit_insn (gen_movsi (dest, source));
3801 break;
3803 if (dest != source)
3804 emit_insn (gen_movsi (dest, source));
3805 operands[0] = dest;
3806 if (ext - insize)
3808 operands[2] = GEN_INT (ext - insize);
3809 gen_shifty_hi_op (ASHIFT, operands);
3811 emit_insn (kind & 1
3812 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3813 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3814 if (kind <= 2)
3816 if (shift2)
3818 operands[2] = GEN_INT (shift2);
3819 gen_shifty_op (ASHIFT, operands);
3822 else
3824 if (shift2 > 0)
3826 if (EXT_SHIFT_SIGNED (shift2))
3828 operands[2] = GEN_INT (shift2 + 1);
3829 gen_shifty_op (ASHIFT, operands);
3830 operands[2] = const1_rtx;
3831 gen_shifty_op (ASHIFTRT, operands);
3832 break;
3834 operands[2] = GEN_INT (shift2);
3835 gen_shifty_hi_op (ASHIFT, operands);
3837 else if (shift2)
3839 operands[2] = GEN_INT (-shift2);
3840 gen_shifty_hi_op (LSHIFTRT, operands);
3842 emit_insn (size <= 8
3843 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3844 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3846 break;
3848 case 5:
3850 int i = 16 - size;
3851 if (! currently_expanding_to_rtl
3852 && ! reload_in_progress && ! reload_completed)
3853 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3854 else
3856 operands[0] = dest;
3857 operands[2] = GEN_INT (16 - insize);
3858 gen_shifty_hi_op (ASHIFT, operands);
3859 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3861 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3862 while (--i >= 0)
3863 gen_ashift (ASHIFTRT, 1, dest);
3864 break;
3866 case 6:
3867 case 7:
3868 /* Don't expand fine-grained when combining, because that will
3869 make the pattern fail. */
3870 if (! currently_expanding_to_rtl
3871 && ! reload_in_progress && ! reload_completed)
3873 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3874 emit_insn (gen_movsi (dest, source));
3875 break;
3877 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3878 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3879 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3880 operands[0] = dest;
3881 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3882 gen_shifty_op (ASHIFT, operands);
3883 if (kind == 7)
3884 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3885 break;
3886 default:
3887 return -1;
3889 return 0;
3892 /* Prefix a symbol_ref name with "datalabel". */
3895 gen_datalabel_ref (rtx sym)
3897 const char *str;
3899 if (GET_CODE (sym) == LABEL_REF)
3900 return gen_rtx_CONST (GET_MODE (sym),
3901 gen_rtx_UNSPEC (GET_MODE (sym),
3902 gen_rtvec (1, sym),
3903 UNSPEC_DATALABEL));
3905 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3907 str = XSTR (sym, 0);
3908 /* Share all SYMBOL_REF strings with the same value - that is important
3909 for cse. */
3910 str = IDENTIFIER_POINTER (get_identifier (str));
3911 XSTR (sym, 0) = str;
3913 return sym;
3917 static alloc_pool label_ref_list_pool;
3919 typedef struct label_ref_list_d
3921 rtx label;
3922 struct label_ref_list_d *next;
3923 } *label_ref_list_t;
3925 /* The SH cannot load a large constant into a register, constants have to
3926 come from a pc relative load. The reference of a pc relative load
3927 instruction must be less than 1k in front of the instruction. This
3928 means that we often have to dump a constant inside a function, and
3929 generate code to branch around it.
3931 It is important to minimize this, since the branches will slow things
3932 down and make things bigger.
3934 Worst case code looks like:
3936 mov.l L1,rn
3937 bra L2
3939 align
3940 L1: .long value
3944 mov.l L3,rn
3945 bra L4
3947 align
3948 L3: .long value
3952 We fix this by performing a scan before scheduling, which notices which
3953 instructions need to have their operands fetched from the constant table
3954 and builds the table.
3956 The algorithm is:
3958 scan, find an instruction which needs a pcrel move. Look forward, find the
3959 last barrier which is within MAX_COUNT bytes of the requirement.
3960 If there isn't one, make one. Process all the instructions between
3961 the find and the barrier.
3963 In the above example, we can tell that L3 is within 1k of L1, so
3964 the first move can be shrunk from the 3 insn+constant sequence into
3965 just 1 insn, and the constant moved to L3 to make:
3967 mov.l L1,rn
3969 mov.l L3,rn
3970 bra L4
3972 align
3973 L3:.long value
3974 L4:.long value
3976 Then the second move becomes the target for the shortening process. */
3978 typedef struct
3980 rtx value; /* Value in table. */
3981 rtx label; /* Label of value. */
3982 label_ref_list_t wend; /* End of window. */
3983 enum machine_mode mode; /* Mode of value. */
3985 /* True if this constant is accessed as part of a post-increment
3986 sequence. Note that HImode constants are never accessed in this way. */
3987 bool part_of_sequence_p;
3988 } pool_node;
3990 /* The maximum number of constants that can fit into one pool, since
3991 constants in the range 0..510 are at least 2 bytes long, and in the
3992 range from there to 1018 at least 4 bytes. */
3994 #define MAX_POOL_SIZE 372
3995 static pool_node pool_vector[MAX_POOL_SIZE];
3996 static int pool_size;
3997 static rtx pool_window_label;
3998 static int pool_window_last;
4000 static int max_labelno_before_reorg;
4002 /* ??? If we need a constant in HImode which is the truncated value of a
4003 constant we need in SImode, we could combine the two entries thus saving
4004 two bytes. Is this common enough to be worth the effort of implementing
4005 it? */
4007 /* ??? This stuff should be done at the same time that we shorten branches.
4008 As it is now, we must assume that all branches are the maximum size, and
4009 this causes us to almost always output constant pools sooner than
4010 necessary. */
4012 /* Add a constant to the pool and return its label. */
4014 static rtx
4015 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4017 int i;
4018 rtx lab, new_rtx;
4019 label_ref_list_t ref, newref;
4021 /* First see if we've already got it. */
4022 for (i = 0; i < pool_size; i++)
4024 if (x->code == pool_vector[i].value->code
4025 && mode == pool_vector[i].mode)
4027 if (x->code == CODE_LABEL)
4029 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4030 continue;
4032 if (rtx_equal_p (x, pool_vector[i].value))
4034 lab = new_rtx = 0;
4035 if (! last_value
4036 || ! i
4037 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4039 new_rtx = gen_label_rtx ();
4040 LABEL_REFS (new_rtx) = pool_vector[i].label;
4041 pool_vector[i].label = lab = new_rtx;
4043 if (lab && pool_window_label)
4045 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4046 newref->label = pool_window_label;
4047 ref = pool_vector[pool_window_last].wend;
4048 newref->next = ref;
4049 pool_vector[pool_window_last].wend = newref;
4051 if (new_rtx)
4052 pool_window_label = new_rtx;
4053 pool_window_last = i;
4054 return lab;
4059 /* Need a new one. */
4060 pool_vector[pool_size].value = x;
4061 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4063 lab = 0;
4064 pool_vector[pool_size - 1].part_of_sequence_p = true;
4066 else
4067 lab = gen_label_rtx ();
4068 pool_vector[pool_size].mode = mode;
4069 pool_vector[pool_size].label = lab;
4070 pool_vector[pool_size].wend = NULL;
4071 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4072 if (lab && pool_window_label)
4074 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4075 newref->label = pool_window_label;
4076 ref = pool_vector[pool_window_last].wend;
4077 newref->next = ref;
4078 pool_vector[pool_window_last].wend = newref;
4080 if (lab)
4081 pool_window_label = lab;
4082 pool_window_last = pool_size;
4083 pool_size++;
4084 return lab;
4087 /* Output the literal table. START, if nonzero, is the first instruction
4088 this table is needed for, and also indicates that there is at least one
4089 casesi_worker_2 instruction; We have to emit the operand3 labels from
4090 these insns at a 4-byte aligned position. BARRIER is the barrier
4091 after which we are to place the table. */
4093 static void
4094 dump_table (rtx start, rtx barrier)
4096 rtx scan = barrier;
4097 int i;
4098 int need_align = 1;
4099 rtx lab;
4100 label_ref_list_t ref;
4101 int have_df = 0;
4103 /* Do two passes, first time dump out the HI sized constants. */
4105 for (i = 0; i < pool_size; i++)
4107 pool_node *p = &pool_vector[i];
4109 if (p->mode == HImode)
4111 if (need_align)
4113 scan = emit_insn_after (gen_align_2 (), scan);
4114 need_align = 0;
4116 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4117 scan = emit_label_after (lab, scan);
4118 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4119 scan);
4120 for (ref = p->wend; ref; ref = ref->next)
4122 lab = ref->label;
4123 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4126 else if (p->mode == DFmode)
4127 have_df = 1;
4130 need_align = 1;
4132 if (start)
4134 scan = emit_insn_after (gen_align_4 (), scan);
4135 need_align = 0;
4136 for (; start != barrier; start = NEXT_INSN (start))
4137 if (NONJUMP_INSN_P (start)
4138 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4140 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4141 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4143 scan = emit_label_after (lab, scan);
4146 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4148 rtx align_insn = NULL_RTX;
4150 scan = emit_label_after (gen_label_rtx (), scan);
4151 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4152 need_align = 0;
4154 for (i = 0; i < pool_size; i++)
4156 pool_node *p = &pool_vector[i];
4158 switch (p->mode)
4160 case HImode:
4161 break;
4162 case SImode:
4163 case SFmode:
4164 if (align_insn && !p->part_of_sequence_p)
4166 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4167 emit_label_before (lab, align_insn);
4168 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4169 align_insn);
4170 for (ref = p->wend; ref; ref = ref->next)
4172 lab = ref->label;
4173 emit_insn_before (gen_consttable_window_end (lab),
4174 align_insn);
4176 delete_insn (align_insn);
4177 align_insn = NULL_RTX;
4178 continue;
4180 else
4182 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4183 scan = emit_label_after (lab, scan);
4184 scan = emit_insn_after (gen_consttable_4 (p->value,
4185 const0_rtx), scan);
4186 need_align = ! need_align;
4188 break;
4189 case DFmode:
4190 if (need_align)
4192 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4193 align_insn = scan;
4194 need_align = 0;
4196 case DImode:
4197 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4198 scan = emit_label_after (lab, scan);
4199 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4200 scan);
4201 break;
4202 default:
4203 gcc_unreachable ();
4206 if (p->mode != HImode)
4208 for (ref = p->wend; ref; ref = ref->next)
4210 lab = ref->label;
4211 scan = emit_insn_after (gen_consttable_window_end (lab),
4212 scan);
4217 pool_size = 0;
4220 for (i = 0; i < pool_size; i++)
4222 pool_node *p = &pool_vector[i];
4224 switch (p->mode)
4226 case HImode:
4227 break;
4228 case SImode:
4229 case SFmode:
4230 if (need_align)
4232 need_align = 0;
4233 scan = emit_label_after (gen_label_rtx (), scan);
4234 scan = emit_insn_after (gen_align_4 (), scan);
4236 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4237 scan = emit_label_after (lab, scan);
4238 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4239 scan);
4240 break;
4241 case DFmode:
4242 case DImode:
4243 if (need_align)
4245 need_align = 0;
4246 scan = emit_label_after (gen_label_rtx (), scan);
4247 scan = emit_insn_after (gen_align_4 (), scan);
4249 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4250 scan = emit_label_after (lab, scan);
4251 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4252 scan);
4253 break;
4254 default:
4255 gcc_unreachable ();
4258 if (p->mode != HImode)
4260 for (ref = p->wend; ref; ref = ref->next)
4262 lab = ref->label;
4263 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4268 scan = emit_insn_after (gen_consttable_end (), scan);
4269 scan = emit_barrier_after (scan);
4270 pool_size = 0;
4271 pool_window_label = NULL_RTX;
4272 pool_window_last = 0;
4275 /* Return nonzero if constant would be an ok source for a
4276 mov.w instead of a mov.l. */
4278 static int
4279 hi_const (rtx src)
4281 return (CONST_INT_P (src)
4282 && INTVAL (src) >= -32768
4283 && INTVAL (src) <= 32767);
4286 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4288 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4290 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4291 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4292 need to fix it if the input value is CONST_OK_FOR_I08. */
4294 static int
4295 broken_move (rtx insn)
4297 if (NONJUMP_INSN_P (insn))
4299 rtx pat = PATTERN (insn);
4300 if (GET_CODE (pat) == PARALLEL)
4301 pat = XVECEXP (pat, 0, 0);
4302 if (GET_CODE (pat) == SET
4303 /* We can load any 8-bit value if we don't care what the high
4304 order bits end up as. */
4305 && GET_MODE (SET_DEST (pat)) != QImode
4306 && (CONSTANT_P (SET_SRC (pat))
4307 /* Match mova_const. */
4308 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4309 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4310 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4311 && ! (TARGET_SH2E
4312 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4313 && (fp_zero_operand (SET_SRC (pat))
4314 || fp_one_operand (SET_SRC (pat)))
4315 /* In general we don't know the current setting of fpscr, so disable fldi.
4316 There is an exception if this was a register-register move
4317 before reload - and hence it was ascertained that we have
4318 single precision setting - and in a post-reload optimization
4319 we changed this to do a constant load. In that case
4320 we don't have an r0 clobber, hence we must use fldi. */
4321 && (TARGET_FMOVD
4322 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4323 == SCRATCH))
4324 && REG_P (SET_DEST (pat))
4325 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4326 && ! (TARGET_SH2A
4327 && GET_MODE (SET_DEST (pat)) == SImode
4328 && (satisfies_constraint_I20 (SET_SRC (pat))
4329 || satisfies_constraint_I28 (SET_SRC (pat))))
4330 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4331 return 1;
4334 return 0;
4337 static int
4338 mova_p (rtx insn)
4340 return (NONJUMP_INSN_P (insn)
4341 && GET_CODE (PATTERN (insn)) == SET
4342 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4343 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4344 /* Don't match mova_const. */
4345 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4348 /* Fix up a mova from a switch that went out of range. */
4349 static void
4350 fixup_mova (rtx mova)
4352 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4353 if (! flag_pic)
4355 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4356 INSN_CODE (mova) = -1;
4358 else
4360 rtx worker = mova;
4361 rtx lab = gen_label_rtx ();
4362 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4366 worker = NEXT_INSN (worker);
4367 gcc_assert (worker
4368 && !LABEL_P (worker)
4369 && !JUMP_P (worker));
4370 } while (NOTE_P (worker)
4371 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4372 wpat = PATTERN (worker);
4373 wpat0 = XVECEXP (wpat, 0, 0);
4374 wpat1 = XVECEXP (wpat, 0, 1);
4375 wsrc = SET_SRC (wpat0);
4376 PATTERN (worker) = (gen_casesi_worker_2
4377 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4378 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4379 XEXP (wpat1, 0)));
4380 INSN_CODE (worker) = -1;
4381 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4382 base = gen_rtx_LABEL_REF (Pmode, lab);
4383 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4384 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4385 INSN_CODE (mova) = -1;
4389 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4390 *num_mova, and check if the new mova is not nested within the first one.
4391 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4392 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4393 static int
4394 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4396 int n_addr = 0; /* Initialization to shut up spurious warning. */
4397 int f_target, n_target = 0; /* Likewise. */
4399 if (optimize)
4401 /* If NEW_MOVA has no address yet, it will be handled later. */
4402 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4403 return -1;
4405 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4406 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4407 if (n_addr > n_target || n_addr + 1022 < n_target)
4409 /* Change the mova into a load.
4410 broken_move will then return true for it. */
4411 fixup_mova (new_mova);
4412 return 1;
4415 if (!(*num_mova)++)
4417 *first_mova = new_mova;
4418 return 2;
4420 if (!optimize
4421 || ((f_target
4422 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4423 >= n_target))
4424 return -1;
4426 (*num_mova)--;
4427 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4428 > n_target - n_addr)
4430 fixup_mova (*first_mova);
4431 return 0;
4433 else
4435 fixup_mova (new_mova);
4436 return 1;
4440 /* Find the last barrier from insn FROM which is close enough to hold the
4441 constant pool. If we can't find one, then create one near the end of
4442 the range. */
4444 static rtx
4445 find_barrier (int num_mova, rtx mova, rtx from)
4447 int count_si = 0;
4448 int count_hi = 0;
4449 int found_hi = 0;
4450 int found_si = 0;
4451 int found_di = 0;
4452 int hi_align = 2;
4453 int si_align = 2;
4454 int leading_mova = num_mova;
4455 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4456 int si_limit;
4457 int hi_limit;
4458 rtx orig = from;
4459 rtx last_got = NULL_RTX;
4460 rtx last_symoff = NULL_RTX;
4462 /* For HImode: range is 510, add 4 because pc counts from address of
4463 second instruction after this one, subtract 2 for the jump instruction
4464 that we may need to emit before the table, subtract 2 for the instruction
4465 that fills the jump delay slot (in very rare cases, reorg will take an
4466 instruction from after the constant pool or will leave the delay slot
4467 empty). This gives 510.
4468 For SImode: range is 1020, add 4 because pc counts from address of
4469 second instruction after this one, subtract 2 in case pc is 2 byte
4470 aligned, subtract 2 for the jump instruction that we may need to emit
4471 before the table, subtract 2 for the instruction that fills the jump
4472 delay slot. This gives 1018. */
4474 /* The branch will always be shortened now that the reference address for
4475 forward branches is the successor address, thus we need no longer make
4476 adjustments to the [sh]i_limit for -O0. */
4478 si_limit = 1018;
4479 hi_limit = 510;
4481 while (from && count_si < si_limit && count_hi < hi_limit)
4483 int inc = get_attr_length (from);
4484 int new_align = 1;
4486 /* If this is a label that existed at the time of the compute_alignments
4487 call, determine the alignment. N.B. When find_barrier recurses for
4488 an out-of-reach mova, we might see labels at the start of previously
4489 inserted constant tables. */
4490 if (LABEL_P (from)
4491 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4493 if (optimize)
4494 new_align = 1 << label_to_alignment (from);
4495 else if (BARRIER_P (prev_nonnote_insn (from)))
4496 new_align = 1 << barrier_align (from);
4497 else
4498 new_align = 1;
4499 inc = 0;
4501 /* In case we are scanning a constant table because of recursion, check
4502 for explicit alignments. If the table is long, we might be forced
4503 to emit the new table in front of it; the length of the alignment
4504 might be the last straw. */
4505 else if (NONJUMP_INSN_P (from)
4506 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4507 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4508 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4509 /* When we find the end of a constant table, paste the new constant
4510 at the end. That is better than putting it in front because
4511 this way, we don't need extra alignment for adding a 4-byte-aligned
4512 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4513 else if (NONJUMP_INSN_P (from)
4514 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4515 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4516 return from;
4518 if (BARRIER_P (from))
4520 rtx next;
4522 found_barrier = from;
4524 /* If we are at the end of the function, or in front of an alignment
4525 instruction, we need not insert an extra alignment. We prefer
4526 this kind of barrier. */
4527 if (barrier_align (from) > 2)
4528 good_barrier = from;
4530 /* If we are at the end of a hot/cold block, dump the constants
4531 here. */
4532 next = NEXT_INSN (from);
4533 if (next
4534 && NOTE_P (next)
4535 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4536 break;
4539 if (broken_move (from))
4541 rtx pat, src, dst;
4542 enum machine_mode mode;
4544 pat = PATTERN (from);
4545 if (GET_CODE (pat) == PARALLEL)
4546 pat = XVECEXP (pat, 0, 0);
4547 src = SET_SRC (pat);
4548 dst = SET_DEST (pat);
4549 mode = GET_MODE (dst);
4551 /* GOT pcrelat setting comes in pair of
4552 mova .L8,r0
4553 mov.l .L8,r12
4554 instructions. (plus add r0,r12).
4555 Remember if we see one without the other. */
4556 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4557 last_got = last_got ? NULL_RTX : from;
4558 else if (PIC_ADDR_P (src))
4559 last_got = last_got ? NULL_RTX : from;
4561 /* We must explicitly check the mode, because sometimes the
4562 front end will generate code to load unsigned constants into
4563 HImode targets without properly sign extending them. */
4564 if (mode == HImode
4565 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4567 found_hi += 2;
4568 /* We put the short constants before the long constants, so
4569 we must count the length of short constants in the range
4570 for the long constants. */
4571 /* ??? This isn't optimal, but is easy to do. */
4572 si_limit -= 2;
4574 else
4576 /* We dump DF/DI constants before SF/SI ones, because
4577 the limit is the same, but the alignment requirements
4578 are higher. We may waste up to 4 additional bytes
4579 for alignment, and the DF/DI constant may have
4580 another SF/SI constant placed before it. */
4581 if (TARGET_SHCOMPACT
4582 && ! found_di
4583 && (mode == DFmode || mode == DImode))
4585 found_di = 1;
4586 si_limit -= 8;
4588 while (si_align > 2 && found_si + si_align - 2 > count_si)
4589 si_align >>= 1;
4590 if (found_si > count_si)
4591 count_si = found_si;
4592 found_si += GET_MODE_SIZE (mode);
4593 if (num_mova)
4594 si_limit -= GET_MODE_SIZE (mode);
4598 if (mova_p (from))
4600 switch (untangle_mova (&num_mova, &mova, from))
4602 case 1:
4603 if (flag_pic)
4605 rtx src = SET_SRC (PATTERN (from));
4606 if (GET_CODE (src) == CONST
4607 && GET_CODE (XEXP (src, 0)) == UNSPEC
4608 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4609 last_symoff = from;
4611 break;
4612 case 0: return find_barrier (0, 0, mova);
4613 case 2:
4615 leading_mova = 0;
4616 barrier_before_mova
4617 = good_barrier ? good_barrier : found_barrier;
4619 default: break;
4621 if (found_si > count_si)
4622 count_si = found_si;
4624 else if (JUMP_TABLE_DATA_P (from))
4626 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4627 || (num_mova
4628 && (prev_nonnote_insn (from)
4629 == XEXP (MOVA_LABELREF (mova), 0))))
4630 num_mova--;
4631 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4633 /* We have just passed the barrier in front of the
4634 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4635 the ADDR_DIFF_VEC is accessed as data, just like our pool
4636 constants, this is a good opportunity to accommodate what
4637 we have gathered so far.
4638 If we waited any longer, we could end up at a barrier in
4639 front of code, which gives worse cache usage for separated
4640 instruction / data caches. */
4641 good_barrier = found_barrier;
4642 break;
4644 else
4646 rtx body = PATTERN (from);
4647 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4650 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4651 else if (JUMP_P (from)
4652 && ! TARGET_SH2
4653 && ! optimize_size)
4654 new_align = 4;
4656 /* There is a possibility that a bf is transformed into a bf/s by the
4657 delay slot scheduler. */
4658 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4659 && get_attr_type (from) == TYPE_CBRANCH
4660 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4661 inc += 2;
4663 if (found_si)
4665 count_si += inc;
4666 if (new_align > si_align)
4668 si_limit -= (count_si - 1) & (new_align - si_align);
4669 si_align = new_align;
4671 count_si = (count_si + new_align - 1) & -new_align;
4673 if (found_hi)
4675 count_hi += inc;
4676 if (new_align > hi_align)
4678 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4679 hi_align = new_align;
4681 count_hi = (count_hi + new_align - 1) & -new_align;
4683 from = NEXT_INSN (from);
4686 if (num_mova)
4688 if (leading_mova)
4690 /* Try as we might, the leading mova is out of range. Change
4691 it into a load (which will become a pcload) and retry. */
4692 fixup_mova (mova);
4693 return find_barrier (0, 0, mova);
4695 else
4697 /* Insert the constant pool table before the mova instruction,
4698 to prevent the mova label reference from going out of range. */
4699 from = mova;
4700 good_barrier = found_barrier = barrier_before_mova;
4704 if (found_barrier)
4706 if (good_barrier && next_real_insn (found_barrier))
4707 found_barrier = good_barrier;
4709 else
4711 /* We didn't find a barrier in time to dump our stuff,
4712 so we'll make one. */
4713 rtx label = gen_label_rtx ();
4715 /* Don't emit a constant table in the middle of insns for
4716 casesi_worker_2. This is a bit overkill but is enough
4717 because casesi_worker_2 wouldn't appear so frequently. */
4718 if (last_symoff)
4719 from = last_symoff;
4721 /* If we exceeded the range, then we must back up over the last
4722 instruction we looked at. Otherwise, we just need to undo the
4723 NEXT_INSN at the end of the loop. */
4724 if (PREV_INSN (from) != orig
4725 && (count_hi > hi_limit || count_si > si_limit))
4726 from = PREV_INSN (PREV_INSN (from));
4727 else
4728 from = PREV_INSN (from);
4730 /* Don't emit a constant table int the middle of global pointer setting,
4731 since that that would move the addressing base GOT into another table.
4732 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4733 in the pool anyway, so just move up the whole constant pool. */
4734 if (last_got)
4735 from = PREV_INSN (last_got);
4737 /* Don't insert the constant pool table at the position which
4738 may be the landing pad. */
4739 if (flag_exceptions
4740 && CALL_P (from)
4741 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4742 from = PREV_INSN (from);
4744 /* Walk back to be just before any jump or label.
4745 Putting it before a label reduces the number of times the branch
4746 around the constant pool table will be hit. Putting it before
4747 a jump makes it more likely that the bra delay slot will be
4748 filled. */
4749 while (NOTE_P (from) || JUMP_P (from)
4750 || LABEL_P (from))
4751 from = PREV_INSN (from);
4753 from = emit_jump_insn_after (gen_jump (label), from);
4754 JUMP_LABEL (from) = label;
4755 LABEL_NUSES (label) = 1;
4756 found_barrier = emit_barrier_after (from);
4757 emit_label_after (label, found_barrier);
4760 return found_barrier;
4763 /* If the instruction INSN is implemented by a special function, and we can
4764 positively find the register that is used to call the sfunc, and this
4765 register is not used anywhere else in this instruction - except as the
4766 destination of a set, return this register; else, return 0. */
4768 sfunc_uses_reg (rtx insn)
4770 int i;
4771 rtx pattern, part, reg_part, reg;
4773 if (!NONJUMP_INSN_P (insn))
4774 return 0;
4775 pattern = PATTERN (insn);
4776 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4777 return 0;
4779 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4781 part = XVECEXP (pattern, 0, i);
4782 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4783 reg_part = part;
4785 if (! reg_part)
4786 return 0;
4787 reg = XEXP (reg_part, 0);
4788 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4790 part = XVECEXP (pattern, 0, i);
4791 if (part == reg_part || GET_CODE (part) == CLOBBER)
4792 continue;
4793 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4794 && REG_P (SET_DEST (part)))
4795 ? SET_SRC (part) : part)))
4796 return 0;
4798 return reg;
4801 /* See if the only way in which INSN uses REG is by calling it, or by
4802 setting it while calling it. Set *SET to a SET rtx if the register
4803 is set by INSN. */
4805 static int
4806 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4808 rtx pattern, reg2;
4810 *set = NULL_RTX;
4812 reg2 = sfunc_uses_reg (insn);
4813 if (reg2 && REGNO (reg2) == REGNO (reg))
4815 pattern = single_set (insn);
4816 if (pattern
4817 && REG_P (SET_DEST (pattern))
4818 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4819 *set = pattern;
4820 return 0;
4822 if (!CALL_P (insn))
4824 /* We don't use rtx_equal_p because we don't care if the mode is
4825 different. */
4826 pattern = single_set (insn);
4827 if (pattern
4828 && REG_P (SET_DEST (pattern))
4829 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4831 rtx par, part;
4832 int i;
4834 *set = pattern;
4835 par = PATTERN (insn);
4836 if (GET_CODE (par) == PARALLEL)
4837 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4839 part = XVECEXP (par, 0, i);
4840 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4841 return 1;
4843 return reg_mentioned_p (reg, SET_SRC (pattern));
4846 return 1;
4849 pattern = PATTERN (insn);
4851 if (GET_CODE (pattern) == PARALLEL)
4853 int i;
4855 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4856 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4857 return 1;
4858 pattern = XVECEXP (pattern, 0, 0);
4861 if (GET_CODE (pattern) == SET)
4863 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4865 /* We don't use rtx_equal_p, because we don't care if the
4866 mode is different. */
4867 if (!REG_P (SET_DEST (pattern))
4868 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4869 return 1;
4871 *set = pattern;
4874 pattern = SET_SRC (pattern);
4877 if (GET_CODE (pattern) != CALL
4878 || !MEM_P (XEXP (pattern, 0))
4879 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4880 return 1;
4882 return 0;
4885 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4886 general registers. Bits 0..15 mean that the respective registers
4887 are used as inputs in the instruction. Bits 16..31 mean that the
4888 registers 0..15, respectively, are used as outputs, or are clobbered.
4889 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4891 regs_used (rtx x, int is_dest)
4893 enum rtx_code code;
4894 const char *fmt;
4895 int i, used = 0;
4897 if (! x)
4898 return used;
4899 code = GET_CODE (x);
4900 switch (code)
4902 case REG:
4903 if (REGNO (x) < 16)
4904 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4905 << (REGNO (x) + is_dest));
4906 return 0;
4907 case SUBREG:
4909 rtx y = SUBREG_REG (x);
4911 if (!REG_P (y))
4912 break;
4913 if (REGNO (y) < 16)
4914 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4915 << (REGNO (y) +
4916 subreg_regno_offset (REGNO (y),
4917 GET_MODE (y),
4918 SUBREG_BYTE (x),
4919 GET_MODE (x)) + is_dest));
4920 return 0;
4922 case SET:
4923 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4924 case RETURN:
4925 /* If there was a return value, it must have been indicated with USE. */
4926 return 0x00ffff00;
4927 case CLOBBER:
4928 is_dest = 1;
4929 break;
4930 case MEM:
4931 is_dest = 0;
4932 break;
4933 case CALL:
4934 used |= 0x00ff00f0;
4935 break;
4936 default:
4937 break;
4940 fmt = GET_RTX_FORMAT (code);
4942 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4944 if (fmt[i] == 'E')
4946 register int j;
4947 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4948 used |= regs_used (XVECEXP (x, i, j), is_dest);
4950 else if (fmt[i] == 'e')
4951 used |= regs_used (XEXP (x, i), is_dest);
4953 return used;
4956 /* Create an instruction that prevents redirection of a conditional branch
4957 to the destination of the JUMP with address ADDR.
4958 If the branch needs to be implemented as an indirect jump, try to find
4959 a scratch register for it.
4960 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4961 If any preceding insn that doesn't fit into a delay slot is good enough,
4962 pass 1. Pass 2 if a definite blocking insn is needed.
4963 -1 is used internally to avoid deep recursion.
4964 If a blocking instruction is made or recognized, return it. */
4966 static rtx
4967 gen_block_redirect (rtx jump, int addr, int need_block)
4969 int dead = 0;
4970 rtx prev = prev_nonnote_insn (jump);
4971 rtx dest;
4973 /* First, check if we already have an instruction that satisfies our need. */
4974 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4976 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4977 return prev;
4978 if (GET_CODE (PATTERN (prev)) == USE
4979 || GET_CODE (PATTERN (prev)) == CLOBBER
4980 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4981 prev = jump;
4982 else if ((need_block &= ~1) < 0)
4983 return prev;
4984 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4985 need_block = 0;
4987 if (GET_CODE (PATTERN (jump)) == RETURN)
4989 if (! need_block)
4990 return prev;
4991 /* Reorg even does nasty things with return insns that cause branches
4992 to go out of range - see find_end_label and callers. */
4993 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4995 /* We can't use JUMP_LABEL here because it might be undefined
4996 when not optimizing. */
4997 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4998 /* If the branch is out of range, try to find a scratch register for it. */
4999 if (optimize
5000 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5001 > 4092 + 4098))
5003 rtx scan;
5004 /* Don't look for the stack pointer as a scratch register,
5005 it would cause trouble if an interrupt occurred. */
5006 unsigned attempt = 0x7fff, used;
5007 int jump_left = flag_expensive_optimizations + 1;
5009 /* It is likely that the most recent eligible instruction is wanted for
5010 the delay slot. Therefore, find out which registers it uses, and
5011 try to avoid using them. */
5013 for (scan = jump; (scan = PREV_INSN (scan)); )
5015 enum rtx_code code;
5017 if (INSN_DELETED_P (scan))
5018 continue;
5019 code = GET_CODE (scan);
5020 if (code == CODE_LABEL || code == JUMP_INSN)
5021 break;
5022 if (code == INSN
5023 && GET_CODE (PATTERN (scan)) != USE
5024 && GET_CODE (PATTERN (scan)) != CLOBBER
5025 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5027 attempt &= ~regs_used (PATTERN (scan), 0);
5028 break;
5031 for (used = dead = 0, scan = JUMP_LABEL (jump);
5032 (scan = NEXT_INSN (scan)); )
5034 enum rtx_code code;
5036 if (INSN_DELETED_P (scan))
5037 continue;
5038 code = GET_CODE (scan);
5039 if (INSN_P (scan))
5041 used |= regs_used (PATTERN (scan), 0);
5042 if (code == CALL_INSN)
5043 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5044 dead |= (used >> 16) & ~used;
5045 if (dead & attempt)
5047 dead &= attempt;
5048 break;
5050 if (code == JUMP_INSN)
5052 if (jump_left-- && simplejump_p (scan))
5053 scan = JUMP_LABEL (scan);
5054 else
5055 break;
5059 /* Mask out the stack pointer again, in case it was
5060 the only 'free' register we have found. */
5061 dead &= 0x7fff;
5063 /* If the immediate destination is still in range, check for possible
5064 threading with a jump beyond the delay slot insn.
5065 Don't check if we are called recursively; the jump has been or will be
5066 checked in a different invocation then. */
5068 else if (optimize && need_block >= 0)
5070 rtx next = next_active_insn (next_active_insn (dest));
5071 if (next && JUMP_P (next)
5072 && GET_CODE (PATTERN (next)) == SET
5073 && recog_memoized (next) == CODE_FOR_jump_compact)
5075 dest = JUMP_LABEL (next);
5076 if (dest
5077 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5078 > 4092 + 4098))
5079 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5083 if (dead)
5085 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5087 /* It would be nice if we could convert the jump into an indirect
5088 jump / far branch right now, and thus exposing all constituent
5089 instructions to further optimization. However, reorg uses
5090 simplejump_p to determine if there is an unconditional jump where
5091 it should try to schedule instructions from the target of the
5092 branch; simplejump_p fails for indirect jumps even if they have
5093 a JUMP_LABEL. */
5094 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5095 (reg, GEN_INT (unspec_bbr_uid++)),
5096 jump);
5097 /* ??? We would like this to have the scope of the jump, but that
5098 scope will change when a delay slot insn of an inner scope is added.
5099 Hence, after delay slot scheduling, we'll have to expect
5100 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5101 the jump. */
5103 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5104 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5105 return insn;
5107 else if (need_block)
5108 /* We can't use JUMP_LABEL here because it might be undefined
5109 when not optimizing. */
5110 return emit_insn_before (gen_block_branch_redirect
5111 (GEN_INT (unspec_bbr_uid++)),
5112 jump);
5113 return prev;
5116 #define CONDJUMP_MIN -252
5117 #define CONDJUMP_MAX 262
5118 struct far_branch
5120 /* A label (to be placed) in front of the jump
5121 that jumps to our ultimate destination. */
5122 rtx near_label;
5123 /* Where we are going to insert it if we cannot move the jump any farther,
5124 or the jump itself if we have picked up an existing jump. */
5125 rtx insert_place;
5126 /* The ultimate destination. */
5127 rtx far_label;
5128 struct far_branch *prev;
5129 /* If the branch has already been created, its address;
5130 else the address of its first prospective user. */
5131 int address;
5134 static void gen_far_branch (struct far_branch *);
5135 enum mdep_reorg_phase_e mdep_reorg_phase;
5136 static void
5137 gen_far_branch (struct far_branch *bp)
5139 rtx insn = bp->insert_place;
5140 rtx jump;
5141 rtx label = gen_label_rtx ();
5142 int ok;
5144 emit_label_after (label, insn);
5145 if (bp->far_label)
5147 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5148 LABEL_NUSES (bp->far_label)++;
5150 else
5151 jump = emit_jump_insn_after (gen_return (), insn);
5152 /* Emit a barrier so that reorg knows that any following instructions
5153 are not reachable via a fall-through path.
5154 But don't do this when not optimizing, since we wouldn't suppress the
5155 alignment for the barrier then, and could end up with out-of-range
5156 pc-relative loads. */
5157 if (optimize)
5158 emit_barrier_after (jump);
5159 emit_label_after (bp->near_label, insn);
5160 JUMP_LABEL (jump) = bp->far_label;
5161 ok = invert_jump (insn, label, 1);
5162 gcc_assert (ok);
5164 /* If we are branching around a jump (rather than a return), prevent
5165 reorg from using an insn from the jump target as the delay slot insn -
5166 when reorg did this, it pessimized code (we rather hide the delay slot)
5167 and it could cause branches to go out of range. */
5168 if (bp->far_label)
5169 (emit_insn_after
5170 (gen_stuff_delay_slot
5171 (GEN_INT (unspec_bbr_uid++),
5172 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5173 insn));
5174 /* Prevent reorg from undoing our splits. */
5175 gen_block_redirect (jump, bp->address += 2, 2);
5178 /* Fix up ADDR_DIFF_VECs. */
5179 void
5180 fixup_addr_diff_vecs (rtx first)
5182 rtx insn;
5184 for (insn = first; insn; insn = NEXT_INSN (insn))
5186 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5188 if (!JUMP_P (insn)
5189 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5190 continue;
5191 pat = PATTERN (insn);
5192 vec_lab = XEXP (XEXP (pat, 0), 0);
5194 /* Search the matching casesi_jump_2. */
5195 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5197 if (!JUMP_P (prev))
5198 continue;
5199 prevpat = PATTERN (prev);
5200 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5201 continue;
5202 x = XVECEXP (prevpat, 0, 1);
5203 if (GET_CODE (x) != USE)
5204 continue;
5205 x = XEXP (x, 0);
5206 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5207 break;
5209 /* FIXME: This is a bug in the optimizer, but it seems harmless
5210 to just avoid panicing. */
5211 if (!prev)
5212 continue;
5214 /* Emit the reference label of the braf where it belongs, right after
5215 the casesi_jump_2 (i.e. braf). */
5216 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5217 emit_label_after (braf_label, prev);
5219 /* Fix up the ADDR_DIF_VEC to be relative
5220 to the reference address of the braf. */
5221 XEXP (XEXP (pat, 0), 0) = braf_label;
5225 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5226 a barrier. Return the base 2 logarithm of the desired alignment. */
5228 barrier_align (rtx barrier_or_label)
5230 rtx next = next_real_insn (barrier_or_label), pat, prev;
5231 int slot, credit, jump_to_next = 0;
5233 if (! next)
5234 return 0;
5236 pat = PATTERN (next);
5238 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5239 return 2;
5241 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5242 /* This is a barrier in front of a constant table. */
5243 return 0;
5245 prev = prev_real_insn (barrier_or_label);
5246 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5248 pat = PATTERN (prev);
5249 /* If this is a very small table, we want to keep the alignment after
5250 the table to the minimum for proper code alignment. */
5251 return ((optimize_size
5252 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5253 <= (unsigned) 1 << (CACHE_LOG - 2)))
5254 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5257 if (optimize_size)
5258 return 0;
5260 if (! TARGET_SH2 || ! optimize)
5261 return align_jumps_log;
5263 /* When fixing up pcloads, a constant table might be inserted just before
5264 the basic block that ends with the barrier. Thus, we can't trust the
5265 instruction lengths before that. */
5266 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5268 /* Check if there is an immediately preceding branch to the insn beyond
5269 the barrier. We must weight the cost of discarding useful information
5270 from the current cache line when executing this branch and there is
5271 an alignment, against that of fetching unneeded insn in front of the
5272 branch target when there is no alignment. */
5274 /* There are two delay_slot cases to consider. One is the simple case
5275 where the preceding branch is to the insn beyond the barrier (simple
5276 delay slot filling), and the other is where the preceding branch has
5277 a delay slot that is a duplicate of the insn after the barrier
5278 (fill_eager_delay_slots) and the branch is to the insn after the insn
5279 after the barrier. */
5281 /* PREV is presumed to be the JUMP_INSN for the barrier under
5282 investigation. Skip to the insn before it. */
5283 prev = prev_real_insn (prev);
5285 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5286 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5287 prev = prev_real_insn (prev))
5289 jump_to_next = 0;
5290 if (GET_CODE (PATTERN (prev)) == USE
5291 || GET_CODE (PATTERN (prev)) == CLOBBER)
5292 continue;
5293 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5295 prev = XVECEXP (PATTERN (prev), 0, 1);
5296 if (INSN_UID (prev) == INSN_UID (next))
5298 /* Delay slot was filled with insn at jump target. */
5299 jump_to_next = 1;
5300 continue;
5304 if (slot &&
5305 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5306 slot = 0;
5307 credit -= get_attr_length (prev);
5309 if (prev
5310 && JUMP_P (prev)
5311 && JUMP_LABEL (prev))
5313 rtx x;
5314 if (jump_to_next
5315 || next_real_insn (JUMP_LABEL (prev)) == next
5316 /* If relax_delay_slots() decides NEXT was redundant
5317 with some previous instruction, it will have
5318 redirected PREV's jump to the following insn. */
5319 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5320 /* There is no upper bound on redundant instructions
5321 that might have been skipped, but we must not put an
5322 alignment where none had been before. */
5323 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5324 (INSN_P (x)
5325 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5326 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5327 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5329 rtx pat = PATTERN (prev);
5330 if (GET_CODE (pat) == PARALLEL)
5331 pat = XVECEXP (pat, 0, 0);
5332 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5333 return 0;
5338 return align_jumps_log;
5341 /* If we are inside a phony loop, almost any kind of label can turn up as the
5342 first one in the loop. Aligning a braf label causes incorrect switch
5343 destination addresses; we can detect braf labels because they are
5344 followed by a BARRIER.
5345 Applying loop alignment to small constant or switch tables is a waste
5346 of space, so we suppress this too. */
5348 sh_loop_align (rtx label)
5350 rtx next = label;
5353 next = next_nonnote_insn (next);
5354 while (next && LABEL_P (next));
5356 if (! next
5357 || ! INSN_P (next)
5358 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5359 || recog_memoized (next) == CODE_FOR_consttable_2)
5360 return 0;
5362 return align_loops_log;
5365 /* Do a final pass over the function, just before delayed branch
5366 scheduling. */
5368 static void
5369 sh_reorg (void)
5371 rtx first, insn, mova = NULL_RTX;
5372 int num_mova;
5373 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5374 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5376 first = get_insns ();
5377 max_labelno_before_reorg = max_label_num ();
5379 /* We must split call insns before introducing `mova's. If we're
5380 optimizing, they'll have already been split. Otherwise, make
5381 sure we don't split them too late. */
5382 if (! optimize)
5383 split_all_insns_noflow ();
5385 if (TARGET_SHMEDIA)
5386 return;
5388 /* If relaxing, generate pseudo-ops to associate function calls with
5389 the symbols they call. It does no harm to not generate these
5390 pseudo-ops. However, when we can generate them, it enables to
5391 linker to potentially relax the jsr to a bsr, and eliminate the
5392 register load and, possibly, the constant pool entry. */
5394 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5395 if (TARGET_RELAX)
5397 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5398 own purposes. This works because none of the remaining passes
5399 need to look at them.
5401 ??? But it may break in the future. We should use a machine
5402 dependent REG_NOTE, or some other approach entirely. */
5403 for (insn = first; insn; insn = NEXT_INSN (insn))
5405 if (INSN_P (insn))
5407 rtx note;
5409 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5410 NULL_RTX)) != 0)
5411 remove_note (insn, note);
5415 for (insn = first; insn; insn = NEXT_INSN (insn))
5417 rtx pattern, reg, link, set, scan, dies, label;
5418 int rescan = 0, foundinsn = 0;
5420 if (CALL_P (insn))
5422 pattern = PATTERN (insn);
5424 if (GET_CODE (pattern) == PARALLEL)
5425 pattern = XVECEXP (pattern, 0, 0);
5426 if (GET_CODE (pattern) == SET)
5427 pattern = SET_SRC (pattern);
5429 if (GET_CODE (pattern) != CALL
5430 || !MEM_P (XEXP (pattern, 0)))
5431 continue;
5433 reg = XEXP (XEXP (pattern, 0), 0);
5435 else
5437 reg = sfunc_uses_reg (insn);
5438 if (! reg)
5439 continue;
5442 if (!REG_P (reg))
5443 continue;
5445 /* Try scanning backward to find where the register is set. */
5446 link = NULL;
5447 for (scan = PREV_INSN (insn);
5448 scan && !LABEL_P (scan);
5449 scan = PREV_INSN (scan))
5451 if (! INSN_P (scan))
5452 continue;
5454 if (! reg_mentioned_p (reg, scan))
5455 continue;
5457 if (noncall_uses_reg (reg, scan, &set))
5458 break;
5460 if (set)
5462 link = scan;
5463 break;
5467 if (! link)
5468 continue;
5470 /* The register is set at LINK. */
5472 /* We can only optimize the function call if the register is
5473 being set to a symbol. In theory, we could sometimes
5474 optimize calls to a constant location, but the assembler
5475 and linker do not support that at present. */
5476 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5477 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5478 continue;
5480 /* Scan forward from LINK to the place where REG dies, and
5481 make sure that the only insns which use REG are
5482 themselves function calls. */
5484 /* ??? This doesn't work for call targets that were allocated
5485 by reload, since there may not be a REG_DEAD note for the
5486 register. */
5488 dies = NULL_RTX;
5489 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5491 rtx scanset;
5493 /* Don't try to trace forward past a CODE_LABEL if we haven't
5494 seen INSN yet. Ordinarily, we will only find the setting insn
5495 if it is in the same basic block. However,
5496 cross-jumping can insert code labels in between the load and
5497 the call, and can result in situations where a single call
5498 insn may have two targets depending on where we came from. */
5500 if (LABEL_P (scan) && ! foundinsn)
5501 break;
5503 if (! INSN_P (scan))
5504 continue;
5506 /* Don't try to trace forward past a JUMP. To optimize
5507 safely, we would have to check that all the
5508 instructions at the jump destination did not use REG. */
5510 if (JUMP_P (scan))
5511 break;
5513 if (! reg_mentioned_p (reg, scan))
5514 continue;
5516 if (noncall_uses_reg (reg, scan, &scanset))
5517 break;
5519 if (scan == insn)
5520 foundinsn = 1;
5522 if (scan != insn
5523 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5525 /* There is a function call to this register other
5526 than the one we are checking. If we optimize
5527 this call, we need to rescan again below. */
5528 rescan = 1;
5531 /* ??? We shouldn't have to worry about SCANSET here.
5532 We should just be able to check for a REG_DEAD note
5533 on a function call. However, the REG_DEAD notes are
5534 apparently not dependable around libcalls; c-torture
5535 execute/920501-2 is a test case. If SCANSET is set,
5536 then this insn sets the register, so it must have
5537 died earlier. Unfortunately, this will only handle
5538 the cases in which the register is, in fact, set in a
5539 later insn. */
5541 /* ??? We shouldn't have to use FOUNDINSN here.
5542 This dates back to when we used LOG_LINKS to find
5543 the most recent insn which sets the register. */
5545 if (foundinsn
5546 && (scanset
5547 || find_reg_note (scan, REG_DEAD, reg)))
5549 dies = scan;
5550 break;
5554 if (! dies)
5556 /* Either there was a branch, or some insn used REG
5557 other than as a function call address. */
5558 continue;
5561 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5562 on the insn which sets the register, and on each call insn
5563 which uses the register. In final_prescan_insn we look for
5564 the REG_LABEL_OPERAND notes, and output the appropriate label
5565 or pseudo-op. */
5567 label = gen_label_rtx ();
5568 add_reg_note (link, REG_LABEL_OPERAND, label);
5569 add_reg_note (insn, REG_LABEL_OPERAND, label);
5570 if (rescan)
5572 scan = link;
5575 rtx reg2;
5577 scan = NEXT_INSN (scan);
5578 if (scan != insn
5579 && ((CALL_P (scan)
5580 && reg_mentioned_p (reg, scan))
5581 || ((reg2 = sfunc_uses_reg (scan))
5582 && REGNO (reg2) == REGNO (reg))))
5583 add_reg_note (scan, REG_LABEL_OPERAND, label);
5585 while (scan != dies);
5590 if (TARGET_SH2)
5591 fixup_addr_diff_vecs (first);
5593 if (optimize)
5595 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5596 shorten_branches (first);
5599 /* Scan the function looking for move instructions which have to be
5600 changed to pc-relative loads and insert the literal tables. */
5601 label_ref_list_pool = create_alloc_pool ("label references list",
5602 sizeof (struct label_ref_list_d),
5603 30);
5604 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5605 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5607 if (mova_p (insn))
5609 /* ??? basic block reordering can move a switch table dispatch
5610 below the switch table. Check if that has happened.
5611 We only have the addresses available when optimizing; but then,
5612 this check shouldn't be needed when not optimizing. */
5613 if (!untangle_mova (&num_mova, &mova, insn))
5615 insn = mova;
5616 num_mova = 0;
5619 else if (JUMP_P (insn)
5620 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5621 && num_mova
5622 /* ??? loop invariant motion can also move a mova out of a
5623 loop. Since loop does this code motion anyway, maybe we
5624 should wrap UNSPEC_MOVA into a CONST, so that reload can
5625 move it back. */
5626 && ((num_mova > 1
5627 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5628 || (prev_nonnote_insn (insn)
5629 == XEXP (MOVA_LABELREF (mova), 0))))
5631 rtx scan;
5632 int total;
5634 num_mova--;
5636 /* Some code might have been inserted between the mova and
5637 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5638 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5639 total += get_attr_length (scan);
5641 /* range of mova is 1020, add 4 because pc counts from address of
5642 second instruction after this one, subtract 2 in case pc is 2
5643 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5644 cancels out with alignment effects of the mova itself. */
5645 if (total > 1022)
5647 /* Change the mova into a load, and restart scanning
5648 there. broken_move will then return true for mova. */
5649 fixup_mova (mova);
5650 insn = mova;
5653 if (broken_move (insn)
5654 || (NONJUMP_INSN_P (insn)
5655 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5657 rtx scan;
5658 /* Scan ahead looking for a barrier to stick the constant table
5659 behind. */
5660 rtx barrier = find_barrier (num_mova, mova, insn);
5661 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5662 int need_aligned_label = 0;
5664 if (num_mova && ! mova_p (mova))
5666 /* find_barrier had to change the first mova into a
5667 pcload; thus, we have to start with this new pcload. */
5668 insn = mova;
5669 num_mova = 0;
5671 /* Now find all the moves between the points and modify them. */
5672 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5674 if (LABEL_P (scan))
5675 last_float = 0;
5676 if (NONJUMP_INSN_P (scan)
5677 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5678 need_aligned_label = 1;
5679 if (broken_move (scan))
5681 rtx *patp = &PATTERN (scan), pat = *patp;
5682 rtx src, dst;
5683 rtx lab;
5684 rtx newsrc;
5685 enum machine_mode mode;
5687 if (GET_CODE (pat) == PARALLEL)
5688 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5689 src = SET_SRC (pat);
5690 dst = SET_DEST (pat);
5691 mode = GET_MODE (dst);
5693 if (mode == SImode && hi_const (src)
5694 && REGNO (dst) != FPUL_REG)
5696 int offset = 0;
5698 mode = HImode;
5699 while (GET_CODE (dst) == SUBREG)
5701 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5702 GET_MODE (SUBREG_REG (dst)),
5703 SUBREG_BYTE (dst),
5704 GET_MODE (dst));
5705 dst = SUBREG_REG (dst);
5707 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5709 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5711 /* This must be an insn that clobbers r0. */
5712 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5713 XVECLEN (PATTERN (scan), 0)
5714 - 1);
5715 rtx clobber = *clobberp;
5717 gcc_assert (GET_CODE (clobber) == CLOBBER
5718 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5720 if (last_float
5721 && reg_set_between_p (r0_rtx, last_float_move, scan))
5722 last_float = 0;
5723 if (last_float
5724 && TARGET_SHCOMPACT
5725 && GET_MODE_SIZE (mode) != 4
5726 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5727 last_float = 0;
5728 lab = add_constant (src, mode, last_float);
5729 if (lab)
5730 emit_insn_before (gen_mova (lab), scan);
5731 else
5733 /* There will be a REG_UNUSED note for r0 on
5734 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5735 lest reorg:mark_target_live_regs will not
5736 consider r0 to be used, and we end up with delay
5737 slot insn in front of SCAN that clobbers r0. */
5738 rtx note
5739 = find_regno_note (last_float_move, REG_UNUSED, 0);
5741 /* If we are not optimizing, then there may not be
5742 a note. */
5743 if (note)
5744 PUT_REG_NOTE_KIND (note, REG_INC);
5746 *last_float_addr = r0_inc_rtx;
5748 last_float_move = scan;
5749 last_float = src;
5750 newsrc = gen_const_mem (mode,
5751 (((TARGET_SH4 && ! TARGET_FMOVD)
5752 || REGNO (dst) == FPUL_REG)
5753 ? r0_inc_rtx
5754 : r0_rtx));
5755 last_float_addr = &XEXP (newsrc, 0);
5757 /* Remove the clobber of r0. */
5758 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5759 gen_rtx_SCRATCH (Pmode));
5761 /* This is a mova needing a label. Create it. */
5762 else if (GET_CODE (src) == UNSPEC
5763 && XINT (src, 1) == UNSPEC_MOVA
5764 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5766 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5767 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5768 newsrc = gen_rtx_UNSPEC (SImode,
5769 gen_rtvec (1, newsrc),
5770 UNSPEC_MOVA);
5772 else
5774 lab = add_constant (src, mode, 0);
5775 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5776 newsrc = gen_const_mem (mode, newsrc);
5778 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5779 INSN_CODE (scan) = -1;
5782 dump_table (need_aligned_label ? insn : 0, barrier);
5783 insn = barrier;
5786 free_alloc_pool (label_ref_list_pool);
5787 for (insn = first; insn; insn = NEXT_INSN (insn))
5788 PUT_MODE (insn, VOIDmode);
5790 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5791 INSN_ADDRESSES_FREE ();
5792 split_branches (first);
5794 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5795 also has an effect on the register that holds the address of the sfunc.
5796 Insert an extra dummy insn in front of each sfunc that pretends to
5797 use this register. */
5798 if (flag_delayed_branch)
5800 for (insn = first; insn; insn = NEXT_INSN (insn))
5802 rtx reg = sfunc_uses_reg (insn);
5804 if (! reg)
5805 continue;
5806 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5809 #if 0
5810 /* fpscr is not actually a user variable, but we pretend it is for the
5811 sake of the previous optimization passes, since we want it handled like
5812 one. However, we don't have any debugging information for it, so turn
5813 it into a non-user variable now. */
5814 if (TARGET_SH4)
5815 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5816 #endif
5817 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5821 get_dest_uid (rtx label, int max_uid)
5823 rtx dest = next_real_insn (label);
5824 int dest_uid;
5825 if (! dest)
5826 /* This can happen for an undefined label. */
5827 return 0;
5828 dest_uid = INSN_UID (dest);
5829 /* If this is a newly created branch redirection blocking instruction,
5830 we cannot index the branch_uid or insn_addresses arrays with its
5831 uid. But then, we won't need to, because the actual destination is
5832 the following branch. */
5833 while (dest_uid >= max_uid)
5835 dest = NEXT_INSN (dest);
5836 dest_uid = INSN_UID (dest);
5838 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5839 return 0;
5840 return dest_uid;
5843 /* Split condbranches that are out of range. Also add clobbers for
5844 scratch registers that are needed in far jumps.
5845 We do this before delay slot scheduling, so that it can take our
5846 newly created instructions into account. It also allows us to
5847 find branches with common targets more easily. */
5849 static void
5850 split_branches (rtx first)
5852 rtx insn;
5853 struct far_branch **uid_branch, *far_branch_list = 0;
5854 int max_uid = get_max_uid ();
5855 int ok;
5857 /* Find out which branches are out of range. */
5858 shorten_branches (first);
5860 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5861 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5863 for (insn = first; insn; insn = NEXT_INSN (insn))
5864 if (! INSN_P (insn))
5865 continue;
5866 else if (INSN_DELETED_P (insn))
5868 /* Shorten_branches would split this instruction again,
5869 so transform it into a note. */
5870 SET_INSN_DELETED (insn);
5872 else if (JUMP_P (insn)
5873 /* Don't mess with ADDR_DIFF_VEC */
5874 && (GET_CODE (PATTERN (insn)) == SET
5875 || GET_CODE (PATTERN (insn)) == RETURN))
5877 enum attr_type type = get_attr_type (insn);
5878 if (type == TYPE_CBRANCH)
5880 rtx next, beyond;
5882 if (get_attr_length (insn) > 4)
5884 rtx src = SET_SRC (PATTERN (insn));
5885 rtx olabel = XEXP (XEXP (src, 1), 0);
5886 int addr = INSN_ADDRESSES (INSN_UID (insn));
5887 rtx label = 0;
5888 int dest_uid = get_dest_uid (olabel, max_uid);
5889 struct far_branch *bp = uid_branch[dest_uid];
5891 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5892 the label if the LABEL_NUSES count drops to zero. There is
5893 always a jump_optimize pass that sets these values, but it
5894 proceeds to delete unreferenced code, and then if not
5895 optimizing, to un-delete the deleted instructions, thus
5896 leaving labels with too low uses counts. */
5897 if (! optimize)
5899 JUMP_LABEL (insn) = olabel;
5900 LABEL_NUSES (olabel)++;
5902 if (! bp)
5904 bp = (struct far_branch *) alloca (sizeof *bp);
5905 uid_branch[dest_uid] = bp;
5906 bp->prev = far_branch_list;
5907 far_branch_list = bp;
5908 bp->far_label
5909 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5910 LABEL_NUSES (bp->far_label)++;
5912 else
5914 label = bp->near_label;
5915 if (! label && bp->address - addr >= CONDJUMP_MIN)
5917 rtx block = bp->insert_place;
5919 if (GET_CODE (PATTERN (block)) == RETURN)
5920 block = PREV_INSN (block);
5921 else
5922 block = gen_block_redirect (block,
5923 bp->address, 2);
5924 label = emit_label_after (gen_label_rtx (),
5925 PREV_INSN (block));
5926 bp->near_label = label;
5928 else if (label && ! NEXT_INSN (label))
5930 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5931 bp->insert_place = insn;
5932 else
5933 gen_far_branch (bp);
5936 if (! label
5937 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5939 bp->near_label = label = gen_label_rtx ();
5940 bp->insert_place = insn;
5941 bp->address = addr;
5943 ok = redirect_jump (insn, label, 0);
5944 gcc_assert (ok);
5946 else
5948 /* get_attr_length (insn) == 2 */
5949 /* Check if we have a pattern where reorg wants to redirect
5950 the branch to a label from an unconditional branch that
5951 is too far away. */
5952 /* We can't use JUMP_LABEL here because it might be undefined
5953 when not optimizing. */
5954 /* A syntax error might cause beyond to be NULL_RTX. */
5955 beyond
5956 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5957 0));
5959 if (beyond
5960 && (JUMP_P (beyond)
5961 || ((beyond = next_active_insn (beyond))
5962 && JUMP_P (beyond)))
5963 && GET_CODE (PATTERN (beyond)) == SET
5964 && recog_memoized (beyond) == CODE_FOR_jump_compact
5965 && ((INSN_ADDRESSES
5966 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5967 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5968 > 252 + 258 + 2))
5969 gen_block_redirect (beyond,
5970 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5973 next = next_active_insn (insn);
5975 if (next
5976 && (JUMP_P (next)
5977 || ((next = next_active_insn (next))
5978 && JUMP_P (next)))
5979 && GET_CODE (PATTERN (next)) == SET
5980 && recog_memoized (next) == CODE_FOR_jump_compact
5981 && ((INSN_ADDRESSES
5982 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5983 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5984 > 252 + 258 + 2))
5985 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5987 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5989 int addr = INSN_ADDRESSES (INSN_UID (insn));
5990 rtx far_label = 0;
5991 int dest_uid = 0;
5992 struct far_branch *bp;
5994 if (type == TYPE_JUMP)
5996 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5997 dest_uid = get_dest_uid (far_label, max_uid);
5998 if (! dest_uid)
6000 /* Parse errors can lead to labels outside
6001 the insn stream. */
6002 if (! NEXT_INSN (far_label))
6003 continue;
6005 if (! optimize)
6007 JUMP_LABEL (insn) = far_label;
6008 LABEL_NUSES (far_label)++;
6010 redirect_jump (insn, NULL_RTX, 1);
6011 far_label = 0;
6014 bp = uid_branch[dest_uid];
6015 if (! bp)
6017 bp = (struct far_branch *) alloca (sizeof *bp);
6018 uid_branch[dest_uid] = bp;
6019 bp->prev = far_branch_list;
6020 far_branch_list = bp;
6021 bp->near_label = 0;
6022 bp->far_label = far_label;
6023 if (far_label)
6024 LABEL_NUSES (far_label)++;
6026 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6027 if (addr - bp->address <= CONDJUMP_MAX)
6028 emit_label_after (bp->near_label, PREV_INSN (insn));
6029 else
6031 gen_far_branch (bp);
6032 bp->near_label = 0;
6034 else
6035 bp->near_label = 0;
6036 bp->address = addr;
6037 bp->insert_place = insn;
6038 if (! far_label)
6039 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6040 else
6041 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6044 /* Generate all pending far branches,
6045 and free our references to the far labels. */
6046 while (far_branch_list)
6048 if (far_branch_list->near_label
6049 && ! NEXT_INSN (far_branch_list->near_label))
6050 gen_far_branch (far_branch_list);
6051 if (optimize
6052 && far_branch_list->far_label
6053 && ! --LABEL_NUSES (far_branch_list->far_label))
6054 delete_insn (far_branch_list->far_label);
6055 far_branch_list = far_branch_list->prev;
6058 /* Instruction length information is no longer valid due to the new
6059 instructions that have been generated. */
6060 init_insn_lengths ();
6063 /* Dump out instruction addresses, which is useful for debugging the
6064 constant pool table stuff.
6066 If relaxing, output the label and pseudo-ops used to link together
6067 calls and the instruction which set the registers. */
6069 /* ??? The addresses printed by this routine for insns are nonsense for
6070 insns which are inside of a sequence where none of the inner insns have
6071 variable length. This is because the second pass of shorten_branches
6072 does not bother to update them. */
6074 void
6075 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6076 int noperands ATTRIBUTE_UNUSED)
6078 if (TARGET_DUMPISIZE)
6079 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6081 if (TARGET_RELAX)
6083 rtx note;
6085 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6086 if (note)
6088 rtx pattern;
6090 pattern = PATTERN (insn);
6091 if (GET_CODE (pattern) == PARALLEL)
6092 pattern = XVECEXP (pattern, 0, 0);
6093 switch (GET_CODE (pattern))
6095 case SET:
6096 if (GET_CODE (SET_SRC (pattern)) != CALL
6097 && get_attr_type (insn) != TYPE_SFUNC)
6099 targetm.asm_out.internal_label
6100 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6101 break;
6103 /* else FALLTHROUGH */
6104 case CALL:
6105 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6106 CODE_LABEL_NUMBER (XEXP (note, 0)));
6107 break;
6109 default:
6110 gcc_unreachable ();
6116 /* Dump out any constants accumulated in the final pass. These will
6117 only be labels. */
6119 const char *
6120 output_jump_label_table (void)
6122 int i;
6124 if (pool_size)
6126 fprintf (asm_out_file, "\t.align 2\n");
6127 for (i = 0; i < pool_size; i++)
6129 pool_node *p = &pool_vector[i];
6131 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6132 CODE_LABEL_NUMBER (p->label));
6133 output_asm_insn (".long %O0", &p->value);
6135 pool_size = 0;
6138 return "";
6141 /* A full frame looks like:
6143 arg-5
6144 arg-4
6145 [ if current_function_anonymous_args
6146 arg-3
6147 arg-2
6148 arg-1
6149 arg-0 ]
6150 saved-fp
6151 saved-r10
6152 saved-r11
6153 saved-r12
6154 saved-pr
6155 local-n
6157 local-1
6158 local-0 <- fp points here. */
6160 /* Number of bytes pushed for anonymous args, used to pass information
6161 between expand_prologue and expand_epilogue. */
6163 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6164 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6165 for an epilogue and a negative value means that it's for a sibcall
6166 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6167 all the registers that are about to be restored, and hence dead. */
6169 static void
6170 output_stack_adjust (int size, rtx reg, int epilogue_p,
6171 HARD_REG_SET *live_regs_mask, bool frame_p)
6173 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6174 if (size)
6176 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6178 /* This test is bogus, as output_stack_adjust is used to re-align the
6179 stack. */
6180 #if 0
6181 gcc_assert (!(size % align));
6182 #endif
6184 if (CONST_OK_FOR_ADD (size))
6185 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6186 /* Try to do it with two partial adjustments; however, we must make
6187 sure that the stack is properly aligned at all times, in case
6188 an interrupt occurs between the two partial adjustments. */
6189 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6190 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6192 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6193 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6195 else
6197 rtx const_reg;
6198 rtx insn;
6199 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6200 int i;
6202 /* If TEMP is invalid, we could temporarily save a general
6203 register to MACL. However, there is currently no need
6204 to handle this case, so just die when we see it. */
6205 if (epilogue_p < 0
6206 || current_function_interrupt
6207 || ! call_really_used_regs[temp] || fixed_regs[temp])
6208 temp = -1;
6209 if (temp < 0 && ! current_function_interrupt
6210 && (TARGET_SHMEDIA || epilogue_p >= 0))
6212 HARD_REG_SET temps;
6213 COPY_HARD_REG_SET (temps, call_used_reg_set);
6214 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6215 if (epilogue_p > 0)
6217 int nreg = 0;
6218 if (crtl->return_rtx)
6220 enum machine_mode mode;
6221 mode = GET_MODE (crtl->return_rtx);
6222 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6223 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6225 for (i = 0; i < nreg; i++)
6226 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6227 if (crtl->calls_eh_return)
6229 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6230 for (i = 0; i <= 3; i++)
6231 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6234 if (TARGET_SHMEDIA && epilogue_p < 0)
6235 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6236 CLEAR_HARD_REG_BIT (temps, i);
6237 if (epilogue_p <= 0)
6239 for (i = FIRST_PARM_REG;
6240 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6241 CLEAR_HARD_REG_BIT (temps, i);
6242 if (cfun->static_chain_decl != NULL)
6243 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6245 temp = scavenge_reg (&temps);
6247 if (temp < 0 && live_regs_mask)
6249 HARD_REG_SET temps;
6251 COPY_HARD_REG_SET (temps, *live_regs_mask);
6252 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6253 temp = scavenge_reg (&temps);
6255 if (temp < 0)
6257 rtx adj_reg, tmp_reg, mem;
6259 /* If we reached here, the most likely case is the (sibcall)
6260 epilogue for non SHmedia. Put a special push/pop sequence
6261 for such case as the last resort. This looks lengthy but
6262 would not be problem because it seems to be very
6263 rare. */
6265 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6268 /* ??? There is still the slight possibility that r4 or
6269 r5 have been reserved as fixed registers or assigned
6270 as global registers, and they change during an
6271 interrupt. There are possible ways to handle this:
6273 - If we are adjusting the frame pointer (r14), we can do
6274 with a single temp register and an ordinary push / pop
6275 on the stack.
6276 - Grab any call-used or call-saved registers (i.e. not
6277 fixed or globals) for the temps we need. We might
6278 also grab r14 if we are adjusting the stack pointer.
6279 If we can't find enough available registers, issue
6280 a diagnostic and die - the user must have reserved
6281 way too many registers.
6282 But since all this is rather unlikely to happen and
6283 would require extra testing, we just die if r4 / r5
6284 are not available. */
6285 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6286 && !global_regs[4] && !global_regs[5]);
6288 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6289 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6290 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6291 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6292 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6293 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6294 emit_move_insn (mem, tmp_reg);
6295 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6296 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6297 emit_move_insn (mem, tmp_reg);
6298 emit_move_insn (reg, adj_reg);
6299 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6300 emit_move_insn (adj_reg, mem);
6301 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6302 emit_move_insn (tmp_reg, mem);
6303 /* Tell flow the insns that pop r4/r5 aren't dead. */
6304 emit_use (tmp_reg);
6305 emit_use (adj_reg);
6306 return;
6308 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6310 /* If SIZE is negative, subtract the positive value.
6311 This sometimes allows a constant pool entry to be shared
6312 between prologue and epilogue code. */
6313 if (size < 0)
6315 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6316 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6318 else
6320 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6321 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6323 if (! epilogue_p)
6324 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6325 gen_rtx_SET (VOIDmode, reg,
6326 gen_rtx_PLUS (SImode, reg,
6327 GEN_INT (size))));
6332 static rtx
6333 frame_insn (rtx x)
6335 x = emit_insn (x);
6336 RTX_FRAME_RELATED_P (x) = 1;
6337 return x;
6340 /* Output RTL to push register RN onto the stack. */
6342 static rtx
6343 push (int rn)
6345 rtx x;
6346 if (rn == FPUL_REG)
6347 x = gen_push_fpul ();
6348 else if (rn == FPSCR_REG)
6349 x = gen_push_fpscr ();
6350 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6351 && FP_OR_XD_REGISTER_P (rn))
6353 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6354 return NULL_RTX;
6355 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6357 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6358 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6359 else
6360 x = gen_push (gen_rtx_REG (SImode, rn));
6362 x = frame_insn (x);
6363 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6364 return x;
6367 /* Output RTL to pop register RN from the stack. */
6369 static void
6370 pop (int rn)
6372 rtx x;
6373 if (rn == FPUL_REG)
6374 x = gen_pop_fpul ();
6375 else if (rn == FPSCR_REG)
6376 x = gen_pop_fpscr ();
6377 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6378 && FP_OR_XD_REGISTER_P (rn))
6380 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6381 return;
6382 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6384 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6385 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6386 else
6387 x = gen_pop (gen_rtx_REG (SImode, rn));
6389 x = emit_insn (x);
6390 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6393 /* Generate code to push the regs specified in the mask. */
6395 static void
6396 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6398 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6399 int skip_fpscr = 0;
6401 /* Push PR last; this gives better latencies after the prologue, and
6402 candidates for the return delay slot when there are no general
6403 registers pushed. */
6404 for (; i < FIRST_PSEUDO_REGISTER; i++)
6406 /* If this is an interrupt handler, and the SZ bit varies,
6407 and we have to push any floating point register, we need
6408 to switch to the correct precision first. */
6409 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6410 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6412 HARD_REG_SET unsaved;
6414 push (FPSCR_REG);
6415 COMPL_HARD_REG_SET (unsaved, *mask);
6416 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6417 skip_fpscr = 1;
6419 if (i != PR_REG
6420 && (i != FPSCR_REG || ! skip_fpscr)
6421 && TEST_HARD_REG_BIT (*mask, i))
6423 /* If the ISR has RESBANK attribute assigned, don't push any of
6424 the following registers - R0-R14, MACH, MACL and GBR. */
6425 if (! (sh_cfun_resbank_handler_p ()
6426 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6427 || i == MACH_REG
6428 || i == MACL_REG
6429 || i == GBR_REG)))
6430 push (i);
6434 /* Push banked registers last to improve delay slot opportunities. */
6435 if (interrupt_handler)
6437 bool use_movml = false;
6439 if (TARGET_SH2A)
6441 unsigned int count = 0;
6443 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6444 if (TEST_HARD_REG_BIT (*mask, i))
6445 count++;
6446 else
6447 break;
6449 /* Use movml when all banked registers are pushed. */
6450 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6451 use_movml = true;
6454 if (use_movml)
6456 rtx x, mem, reg, set;
6457 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6459 /* We must avoid scheduling multiple store insn with another
6460 insns. */
6461 emit_insn (gen_blockage ());
6462 x = gen_movml_push_banked (sp_reg);
6463 x = frame_insn (x);
6464 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6466 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6467 reg = gen_rtx_REG (SImode, i);
6468 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6471 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6472 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6473 emit_insn (gen_blockage ());
6475 else
6476 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6477 if (TEST_HARD_REG_BIT (*mask, i))
6478 push (i);
6481 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6482 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6483 push (PR_REG);
6486 /* Calculate how much extra space is needed to save all callee-saved
6487 target registers.
6488 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6490 static int
6491 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6493 int reg;
6494 int stack_space = 0;
6495 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6497 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6498 if ((! call_really_used_regs[reg] || interrupt_handler)
6499 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6500 /* Leave space to save this target register on the stack,
6501 in case target register allocation wants to use it. */
6502 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6503 return stack_space;
6506 /* Decide whether we should reserve space for callee-save target registers,
6507 in case target register allocation wants to use them. REGS_SAVED is
6508 the space, in bytes, that is already required for register saves.
6509 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6511 static int
6512 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6513 HARD_REG_SET *live_regs_mask)
6515 if (optimize_size)
6516 return 0;
6517 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6520 /* Decide how much space to reserve for callee-save target registers
6521 in case target register allocation wants to use them.
6522 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6524 static int
6525 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6527 if (shmedia_space_reserved_for_target_registers)
6528 return shmedia_target_regs_stack_space (live_regs_mask);
6529 else
6530 return 0;
6533 /* Work out the registers which need to be saved, both as a mask and a
6534 count of saved words. Return the count.
6536 If doing a pragma interrupt function, then push all regs used by the
6537 function, and if we call another function (we can tell by looking at PR),
6538 make sure that all the regs it clobbers are safe too. */
6540 static int
6541 calc_live_regs (HARD_REG_SET *live_regs_mask)
6543 unsigned int reg;
6544 int count;
6545 tree attrs;
6546 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6547 bool nosave_low_regs;
6548 int pr_live, has_call;
6550 attrs = DECL_ATTRIBUTES (current_function_decl);
6551 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6552 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6553 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6554 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6556 CLEAR_HARD_REG_SET (*live_regs_mask);
6557 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6558 && df_regs_ever_live_p (FPSCR_REG))
6559 target_flags &= ~MASK_FPU_SINGLE;
6560 /* If we can save a lot of saves by switching to double mode, do that. */
6561 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6562 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6563 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6564 && (! call_really_used_regs[reg]
6565 || interrupt_handler)
6566 && ++count > 2)
6568 target_flags &= ~MASK_FPU_SINGLE;
6569 break;
6571 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6572 knows how to use it. That means the pseudo originally allocated for
6573 the initial value can become the PR_MEDIA_REG hard register, as seen for
6574 execute/20010122-1.c:test9. */
6575 if (TARGET_SHMEDIA)
6576 /* ??? this function is called from initial_elimination_offset, hence we
6577 can't use the result of sh_media_register_for_return here. */
6578 pr_live = sh_pr_n_sets ();
6579 else
6581 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6582 pr_live = (pr_initial
6583 ? (!REG_P (pr_initial)
6584 || REGNO (pr_initial) != (PR_REG))
6585 : df_regs_ever_live_p (PR_REG));
6586 /* For Shcompact, if not optimizing, we end up with a memory reference
6587 using the return address pointer for __builtin_return_address even
6588 though there is no actual need to put the PR register on the stack. */
6589 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6591 /* Force PR to be live if the prologue has to call the SHmedia
6592 argument decoder or register saver. */
6593 if (TARGET_SHCOMPACT
6594 && ((crtl->args.info.call_cookie
6595 & ~ CALL_COOKIE_RET_TRAMP (1))
6596 || crtl->saves_all_registers))
6597 pr_live = 1;
6598 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6599 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6601 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6602 ? pr_live
6603 : interrupt_handler
6604 ? (/* Need to save all the regs ever live. */
6605 (df_regs_ever_live_p (reg)
6606 || (call_really_used_regs[reg]
6607 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6608 || reg == PIC_OFFSET_TABLE_REGNUM)
6609 && has_call)
6610 || (TARGET_SHMEDIA && has_call
6611 && REGISTER_NATURAL_MODE (reg) == SImode
6612 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6613 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6614 && reg != RETURN_ADDRESS_POINTER_REGNUM
6615 && reg != T_REG && reg != GBR_REG
6616 /* Push fpscr only on targets which have FPU */
6617 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6618 : (/* Only push those regs which are used and need to be saved. */
6619 (TARGET_SHCOMPACT
6620 && flag_pic
6621 && crtl->args.info.call_cookie
6622 && reg == PIC_OFFSET_TABLE_REGNUM)
6623 || (df_regs_ever_live_p (reg)
6624 && ((!call_really_used_regs[reg]
6625 && !(reg != PIC_OFFSET_TABLE_REGNUM
6626 && fixed_regs[reg] && call_used_regs[reg]))
6627 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6628 || (crtl->calls_eh_return
6629 && (reg == EH_RETURN_DATA_REGNO (0)
6630 || reg == EH_RETURN_DATA_REGNO (1)
6631 || reg == EH_RETURN_DATA_REGNO (2)
6632 || reg == EH_RETURN_DATA_REGNO (3)))
6633 || ((reg == MACL_REG || reg == MACH_REG)
6634 && df_regs_ever_live_p (reg)
6635 && sh_cfun_attr_renesas_p ())
6638 SET_HARD_REG_BIT (*live_regs_mask, reg);
6639 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6641 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6642 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6644 if (FP_REGISTER_P (reg))
6646 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6648 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6649 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6652 else if (XD_REGISTER_P (reg))
6654 /* Must switch to double mode to access these registers. */
6655 target_flags &= ~MASK_FPU_SINGLE;
6659 if (nosave_low_regs && reg == R8_REG)
6660 break;
6662 /* If we have a target register optimization pass after prologue / epilogue
6663 threading, we need to assume all target registers will be live even if
6664 they aren't now. */
6665 if (flag_branch_target_load_optimize2
6666 && TARGET_SAVE_ALL_TARGET_REGS
6667 && shmedia_space_reserved_for_target_registers)
6668 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6669 if ((! call_really_used_regs[reg] || interrupt_handler)
6670 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6672 SET_HARD_REG_BIT (*live_regs_mask, reg);
6673 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6675 /* If this is an interrupt handler, we don't have any call-clobbered
6676 registers we can conveniently use for target register save/restore.
6677 Make sure we save at least one general purpose register when we need
6678 to save target registers. */
6679 if (interrupt_handler
6680 && hard_reg_set_intersect_p (*live_regs_mask,
6681 reg_class_contents[TARGET_REGS])
6682 && ! hard_reg_set_intersect_p (*live_regs_mask,
6683 reg_class_contents[GENERAL_REGS]))
6685 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6686 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6689 return count;
6692 /* Code to generate prologue and epilogue sequences */
6694 /* PUSHED is the number of bytes that are being pushed on the
6695 stack for register saves. Return the frame size, padded
6696 appropriately so that the stack stays properly aligned. */
6697 static HOST_WIDE_INT
6698 rounded_frame_size (int pushed)
6700 HOST_WIDE_INT size = get_frame_size ();
6701 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6703 if (ACCUMULATE_OUTGOING_ARGS)
6704 size += crtl->outgoing_args_size;
6706 return ((size + pushed + align - 1) & -align) - pushed;
6709 /* Choose a call-clobbered target-branch register that remains
6710 unchanged along the whole function. We set it up as the return
6711 value in the prologue. */
6713 sh_media_register_for_return (void)
6715 int regno;
6716 int tr0_used;
6718 if (! current_function_is_leaf)
6719 return -1;
6720 if (lookup_attribute ("interrupt_handler",
6721 DECL_ATTRIBUTES (current_function_decl)))
6722 return -1;
6723 if (sh_cfun_interrupt_handler_p ())
6724 return -1;
6726 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6728 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6729 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6730 return regno;
6732 return -1;
6735 /* The maximum registers we need to save are:
6736 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6737 - 32 floating point registers (for each pair, we save none,
6738 one single precision value, or a double precision value).
6739 - 8 target registers
6740 - add 1 entry for a delimiter. */
6741 #define MAX_SAVED_REGS (62+32+8)
6743 typedef struct save_entry_s
6745 unsigned char reg;
6746 unsigned char mode;
6747 short offset;
6748 } save_entry;
6750 #define MAX_TEMPS 4
6752 /* There will be a delimiter entry with VOIDmode both at the start and the
6753 end of a filled in schedule. The end delimiter has the offset of the
6754 save with the smallest (i.e. most negative) offset. */
6755 typedef struct save_schedule_s
6757 save_entry entries[MAX_SAVED_REGS + 2];
6758 int temps[MAX_TEMPS+1];
6759 } save_schedule;
6761 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6762 use reverse order. Returns the last entry written to (not counting
6763 the delimiter). OFFSET_BASE is a number to be added to all offset
6764 entries. */
6766 static save_entry *
6767 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6768 int offset_base)
6770 int align, i;
6771 save_entry *entry = schedule->entries;
6772 int tmpx = 0;
6773 int offset;
6775 if (! current_function_interrupt)
6776 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6777 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6778 && ! FUNCTION_ARG_REGNO_P (i)
6779 && i != FIRST_RET_REG
6780 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6781 && ! (crtl->calls_eh_return
6782 && (i == EH_RETURN_STACKADJ_REGNO
6783 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6784 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6785 schedule->temps[tmpx++] = i;
6786 entry->reg = -1;
6787 entry->mode = VOIDmode;
6788 entry->offset = offset_base;
6789 entry++;
6790 /* We loop twice: first, we save 8-byte aligned registers in the
6791 higher addresses, that are known to be aligned. Then, we
6792 proceed to saving 32-bit registers that don't need 8-byte
6793 alignment.
6794 If this is an interrupt function, all registers that need saving
6795 need to be saved in full. moreover, we need to postpone saving
6796 target registers till we have saved some general purpose registers
6797 we can then use as scratch registers. */
6798 offset = offset_base;
6799 for (align = 1; align >= 0; align--)
6801 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6802 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6804 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6805 int reg = i;
6807 if (current_function_interrupt)
6809 if (TARGET_REGISTER_P (i))
6810 continue;
6811 if (GENERAL_REGISTER_P (i))
6812 mode = DImode;
6814 if (mode == SFmode && (i % 2) == 1
6815 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6816 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6818 mode = DFmode;
6819 i--;
6820 reg--;
6823 /* If we're doing the aligned pass and this is not aligned,
6824 or we're doing the unaligned pass and this is aligned,
6825 skip it. */
6826 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6827 != align)
6828 continue;
6830 if (current_function_interrupt
6831 && GENERAL_REGISTER_P (i)
6832 && tmpx < MAX_TEMPS)
6833 schedule->temps[tmpx++] = i;
6835 offset -= GET_MODE_SIZE (mode);
6836 entry->reg = i;
6837 entry->mode = mode;
6838 entry->offset = offset;
6839 entry++;
6841 if (align && current_function_interrupt)
6842 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6843 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6845 offset -= GET_MODE_SIZE (DImode);
6846 entry->reg = i;
6847 entry->mode = DImode;
6848 entry->offset = offset;
6849 entry++;
6852 entry->reg = -1;
6853 entry->mode = VOIDmode;
6854 entry->offset = offset;
6855 schedule->temps[tmpx] = -1;
6856 return entry - 1;
6859 void
6860 sh_expand_prologue (void)
6862 HARD_REG_SET live_regs_mask;
6863 int d, i;
6864 int d_rounding = 0;
6865 int save_flags = target_flags;
6866 int pretend_args;
6867 int stack_usage;
6868 tree sp_switch_attr
6869 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6871 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6873 /* We have pretend args if we had an object sent partially in registers
6874 and partially on the stack, e.g. a large structure. */
6875 pretend_args = crtl->args.pretend_args_size;
6876 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6877 && (NPARM_REGS(SImode)
6878 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6879 pretend_args = 0;
6880 /* Dwarf2 module doesn't expect frame related insns here. */
6881 output_stack_adjust (-pretend_args
6882 - crtl->args.info.stack_regs * 8,
6883 stack_pointer_rtx, 0, NULL, false);
6884 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
6886 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6887 /* We're going to use the PIC register to load the address of the
6888 incoming-argument decoder and/or of the return trampoline from
6889 the GOT, so make sure the PIC register is preserved and
6890 initialized. */
6891 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6893 if (TARGET_SHCOMPACT
6894 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6896 int reg;
6898 /* First, make all registers with incoming arguments that will
6899 be pushed onto the stack live, so that register renaming
6900 doesn't overwrite them. */
6901 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6902 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6903 >= NPARM_REGS (SImode) - reg)
6904 for (; reg < NPARM_REGS (SImode); reg++)
6905 emit_insn (gen_shcompact_preserve_incoming_args
6906 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6907 else if (CALL_COOKIE_INT_REG_GET
6908 (crtl->args.info.call_cookie, reg) == 1)
6909 emit_insn (gen_shcompact_preserve_incoming_args
6910 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6912 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6913 stack_pointer_rtx);
6914 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6915 GEN_INT (crtl->args.info.call_cookie));
6916 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6917 gen_rtx_REG (SImode, R0_REG));
6919 else if (TARGET_SHMEDIA)
6921 int tr = sh_media_register_for_return ();
6923 if (tr >= 0)
6924 emit_move_insn (gen_rtx_REG (DImode, tr),
6925 gen_rtx_REG (DImode, PR_MEDIA_REG));
6928 /* Emit the code for SETUP_VARARGS. */
6929 if (cfun->stdarg)
6931 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6933 /* Push arg regs as if they'd been provided by caller in stack. */
6934 for (i = 0; i < NPARM_REGS(SImode); i++)
6936 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6938 if (i >= (NPARM_REGS(SImode)
6939 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6941 break;
6942 push (rn);
6943 stack_usage += GET_MODE_SIZE (SImode);
6948 /* If we're supposed to switch stacks at function entry, do so now. */
6949 if (sp_switch_attr)
6951 rtx lab, newsrc;
6952 /* The argument specifies a variable holding the address of the
6953 stack the interrupt function should switch to/from at entry/exit. */
6954 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6955 const char *s
6956 = ggc_strdup (TREE_STRING_POINTER (arg));
6957 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6959 lab = add_constant (sp_switch, SImode, 0);
6960 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6961 newsrc = gen_const_mem (SImode, newsrc);
6963 emit_insn (gen_sp_switch_1 (newsrc));
6966 d = calc_live_regs (&live_regs_mask);
6967 /* ??? Maybe we could save some switching if we can move a mode switch
6968 that already happens to be at the function start into the prologue. */
6969 if (target_flags != save_flags && ! current_function_interrupt)
6970 emit_insn (gen_toggle_sz ());
6972 if (TARGET_SH5)
6974 int offset_base, offset;
6975 rtx r0 = NULL_RTX;
6976 int offset_in_r0 = -1;
6977 int sp_in_r0 = 0;
6978 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6979 int total_size, save_size;
6980 save_schedule schedule;
6981 save_entry *entry;
6982 int *tmp_pnt;
6984 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6985 && ! current_function_interrupt)
6986 r0 = gen_rtx_REG (Pmode, R0_REG);
6988 /* D is the actual number of bytes that we need for saving registers,
6989 however, in initial_elimination_offset we have committed to using
6990 an additional TREGS_SPACE amount of bytes - in order to keep both
6991 addresses to arguments supplied by the caller and local variables
6992 valid, we must keep this gap. Place it between the incoming
6993 arguments and the actually saved registers in a bid to optimize
6994 locality of reference. */
6995 total_size = d + tregs_space;
6996 total_size += rounded_frame_size (total_size);
6997 save_size = total_size - rounded_frame_size (d);
6998 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6999 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7000 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7002 /* If adjusting the stack in a single step costs nothing extra, do so.
7003 I.e. either if a single addi is enough, or we need a movi anyway,
7004 and we don't exceed the maximum offset range (the test for the
7005 latter is conservative for simplicity). */
7006 if (TARGET_SHMEDIA
7007 && (CONST_OK_FOR_I10 (-total_size)
7008 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7009 && total_size <= 2044)))
7010 d_rounding = total_size - save_size;
7012 offset_base = d + d_rounding;
7014 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7015 0, NULL, true);
7016 stack_usage += save_size + d_rounding;
7018 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7019 tmp_pnt = schedule.temps;
7020 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7022 enum machine_mode mode = (enum machine_mode) entry->mode;
7023 unsigned int reg = entry->reg;
7024 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7025 rtx orig_reg_rtx;
7027 offset = entry->offset;
7029 reg_rtx = gen_rtx_REG (mode, reg);
7031 mem_rtx = gen_frame_mem (mode,
7032 gen_rtx_PLUS (Pmode,
7033 stack_pointer_rtx,
7034 GEN_INT (offset)));
7036 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7038 gcc_assert (r0);
7039 mem_rtx = NULL_RTX;
7042 if (HAVE_PRE_DECREMENT
7043 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7044 || mem_rtx == NULL_RTX
7045 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7047 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7049 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7050 pre_dec = NULL_RTX;
7051 else
7053 mem_rtx = NULL_RTX;
7054 offset += GET_MODE_SIZE (mode);
7058 if (mem_rtx != NULL_RTX)
7059 goto addr_ok;
7061 if (offset_in_r0 == -1)
7063 emit_move_insn (r0, GEN_INT (offset));
7064 offset_in_r0 = offset;
7066 else if (offset != offset_in_r0)
7068 emit_move_insn (r0,
7069 gen_rtx_PLUS
7070 (Pmode, r0,
7071 GEN_INT (offset - offset_in_r0)));
7072 offset_in_r0 += offset - offset_in_r0;
7075 if (pre_dec != NULL_RTX)
7077 if (! sp_in_r0)
7079 emit_move_insn (r0,
7080 gen_rtx_PLUS
7081 (Pmode, r0, stack_pointer_rtx));
7082 sp_in_r0 = 1;
7085 offset -= GET_MODE_SIZE (mode);
7086 offset_in_r0 -= GET_MODE_SIZE (mode);
7088 mem_rtx = pre_dec;
7090 else if (sp_in_r0)
7091 mem_rtx = gen_frame_mem (mode, r0);
7092 else
7093 mem_rtx = gen_frame_mem (mode,
7094 gen_rtx_PLUS (Pmode,
7095 stack_pointer_rtx,
7096 r0));
7098 /* We must not use an r0-based address for target-branch
7099 registers or for special registers without pre-dec
7100 memory addresses, since we store their values in r0
7101 first. */
7102 gcc_assert (!TARGET_REGISTER_P (reg)
7103 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7104 || mem_rtx == pre_dec));
7106 addr_ok:
7107 orig_reg_rtx = reg_rtx;
7108 if (TARGET_REGISTER_P (reg)
7109 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7110 && mem_rtx != pre_dec))
7112 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7114 emit_move_insn (tmp_reg, reg_rtx);
7116 if (REGNO (tmp_reg) == R0_REG)
7118 offset_in_r0 = -1;
7119 sp_in_r0 = 0;
7120 gcc_assert (!refers_to_regno_p
7121 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7124 if (*++tmp_pnt <= 0)
7125 tmp_pnt = schedule.temps;
7127 reg_rtx = tmp_reg;
7130 rtx insn;
7132 /* Mark as interesting for dwarf cfi generator */
7133 insn = emit_move_insn (mem_rtx, reg_rtx);
7134 RTX_FRAME_RELATED_P (insn) = 1;
7135 /* If we use an intermediate register for the save, we can't
7136 describe this exactly in cfi as a copy of the to-be-saved
7137 register into the temporary register and then the temporary
7138 register on the stack, because the temporary register can
7139 have a different natural size than the to-be-saved register.
7140 Thus, we gloss over the intermediate copy and pretend we do
7141 a direct save from the to-be-saved register. */
7142 if (REGNO (reg_rtx) != reg)
7144 rtx set;
7146 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7147 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7150 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7152 rtx reg_rtx = gen_rtx_REG (mode, reg);
7153 rtx set;
7154 rtx mem_rtx = gen_frame_mem (mode,
7155 gen_rtx_PLUS (Pmode,
7156 stack_pointer_rtx,
7157 GEN_INT (offset)));
7159 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7160 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7165 gcc_assert (entry->offset == d_rounding);
7167 else
7169 push_regs (&live_regs_mask, current_function_interrupt);
7170 stack_usage += d;
7173 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7174 emit_insn (gen_GOTaddr2picreg ());
7176 if (SHMEDIA_REGS_STACK_ADJUST ())
7178 /* This must NOT go through the PLT, otherwise mach and macl
7179 may be clobbered. */
7180 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7181 (TARGET_FPU_ANY
7182 ? "__GCC_push_shmedia_regs"
7183 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7184 emit_insn (gen_shmedia_save_restore_regs_compact
7185 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7188 if (target_flags != save_flags && ! current_function_interrupt)
7189 emit_insn (gen_toggle_sz ());
7191 target_flags = save_flags;
7193 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7194 stack_pointer_rtx, 0, NULL, true);
7195 stack_usage += rounded_frame_size (d) - d_rounding;
7197 if (frame_pointer_needed)
7198 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7200 if (TARGET_SHCOMPACT
7201 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7203 /* This must NOT go through the PLT, otherwise mach and macl
7204 may be clobbered. */
7205 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7206 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7207 emit_insn (gen_shcompact_incoming_args ());
7210 if (flag_stack_usage)
7211 current_function_static_stack_size = stack_usage;
7214 void
7215 sh_expand_epilogue (bool sibcall_p)
7217 HARD_REG_SET live_regs_mask;
7218 int d, i;
7219 int d_rounding = 0;
7221 int save_flags = target_flags;
7222 int frame_size, save_size;
7223 int fpscr_deferred = 0;
7224 int e = sibcall_p ? -1 : 1;
7226 d = calc_live_regs (&live_regs_mask);
7228 save_size = d;
7229 frame_size = rounded_frame_size (d);
7231 if (TARGET_SH5)
7233 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7234 int total_size;
7235 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7236 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7237 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7239 total_size = d + tregs_space;
7240 total_size += rounded_frame_size (total_size);
7241 save_size = total_size - frame_size;
7243 /* If adjusting the stack in a single step costs nothing extra, do so.
7244 I.e. either if a single addi is enough, or we need a movi anyway,
7245 and we don't exceed the maximum offset range (the test for the
7246 latter is conservative for simplicity). */
7247 if (TARGET_SHMEDIA
7248 && ! frame_pointer_needed
7249 && (CONST_OK_FOR_I10 (total_size)
7250 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7251 && total_size <= 2044)))
7252 d_rounding = frame_size;
7254 frame_size -= d_rounding;
7257 if (frame_pointer_needed)
7259 /* We must avoid scheduling the epilogue with previous basic blocks.
7260 See PR/18032 and PR/40313. */
7261 emit_insn (gen_blockage ());
7262 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7263 &live_regs_mask, false);
7265 /* We must avoid moving the stack pointer adjustment past code
7266 which reads from the local frame, else an interrupt could
7267 occur after the SP adjustment and clobber data in the local
7268 frame. */
7269 emit_insn (gen_blockage ());
7270 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7272 else if (frame_size)
7274 /* We must avoid moving the stack pointer adjustment past code
7275 which reads from the local frame, else an interrupt could
7276 occur after the SP adjustment and clobber data in the local
7277 frame. */
7278 emit_insn (gen_blockage ());
7279 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7280 &live_regs_mask, false);
7283 if (SHMEDIA_REGS_STACK_ADJUST ())
7285 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7286 (TARGET_FPU_ANY
7287 ? "__GCC_pop_shmedia_regs"
7288 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7289 /* This must NOT go through the PLT, otherwise mach and macl
7290 may be clobbered. */
7291 emit_insn (gen_shmedia_save_restore_regs_compact
7292 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7295 /* Pop all the registers. */
7297 if (target_flags != save_flags && ! current_function_interrupt)
7298 emit_insn (gen_toggle_sz ());
7299 if (TARGET_SH5)
7301 int offset_base, offset;
7302 int offset_in_r0 = -1;
7303 int sp_in_r0 = 0;
7304 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7305 save_schedule schedule;
7306 save_entry *entry;
7307 int *tmp_pnt;
7309 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7310 offset_base = -entry[1].offset + d_rounding;
7311 tmp_pnt = schedule.temps;
7312 for (; entry->mode != VOIDmode; entry--)
7314 enum machine_mode mode = (enum machine_mode) entry->mode;
7315 int reg = entry->reg;
7316 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7318 offset = offset_base + entry->offset;
7319 reg_rtx = gen_rtx_REG (mode, reg);
7321 mem_rtx = gen_frame_mem (mode,
7322 gen_rtx_PLUS (Pmode,
7323 stack_pointer_rtx,
7324 GEN_INT (offset)));
7326 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7327 mem_rtx = NULL_RTX;
7329 if (HAVE_POST_INCREMENT
7330 && (offset == offset_in_r0
7331 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7332 && mem_rtx == NULL_RTX)
7333 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7335 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7337 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7338 post_inc = NULL_RTX;
7339 else
7340 mem_rtx = NULL_RTX;
7343 if (mem_rtx != NULL_RTX)
7344 goto addr_ok;
7346 if (offset_in_r0 == -1)
7348 emit_move_insn (r0, GEN_INT (offset));
7349 offset_in_r0 = offset;
7351 else if (offset != offset_in_r0)
7353 emit_move_insn (r0,
7354 gen_rtx_PLUS
7355 (Pmode, r0,
7356 GEN_INT (offset - offset_in_r0)));
7357 offset_in_r0 += offset - offset_in_r0;
7360 if (post_inc != NULL_RTX)
7362 if (! sp_in_r0)
7364 emit_move_insn (r0,
7365 gen_rtx_PLUS
7366 (Pmode, r0, stack_pointer_rtx));
7367 sp_in_r0 = 1;
7370 mem_rtx = post_inc;
7372 offset_in_r0 += GET_MODE_SIZE (mode);
7374 else if (sp_in_r0)
7375 mem_rtx = gen_frame_mem (mode, r0);
7376 else
7377 mem_rtx = gen_frame_mem (mode,
7378 gen_rtx_PLUS (Pmode,
7379 stack_pointer_rtx,
7380 r0));
7382 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7383 || mem_rtx == post_inc);
7385 addr_ok:
7386 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7387 && mem_rtx != post_inc)
7389 emit_move_insn (r0, mem_rtx);
7390 mem_rtx = r0;
7392 else if (TARGET_REGISTER_P (reg))
7394 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7396 /* Give the scheduler a bit of freedom by using up to
7397 MAX_TEMPS registers in a round-robin fashion. */
7398 emit_move_insn (tmp_reg, mem_rtx);
7399 mem_rtx = tmp_reg;
7400 if (*++tmp_pnt < 0)
7401 tmp_pnt = schedule.temps;
7404 emit_move_insn (reg_rtx, mem_rtx);
7407 gcc_assert (entry->offset + offset_base == d + d_rounding);
7409 else /* ! TARGET_SH5 */
7411 int last_reg;
7413 save_size = 0;
7414 /* For an ISR with RESBANK attribute assigned, don't pop PR
7415 register. */
7416 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7417 && !sh_cfun_resbank_handler_p ())
7419 if (!frame_pointer_needed)
7420 emit_insn (gen_blockage ());
7421 pop (PR_REG);
7424 /* Banked registers are popped first to avoid being scheduled in the
7425 delay slot. RTE switches banks before the ds instruction. */
7426 if (current_function_interrupt)
7428 bool use_movml = false;
7430 if (TARGET_SH2A)
7432 unsigned int count = 0;
7434 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7435 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7436 count++;
7437 else
7438 break;
7440 /* Use movml when all banked register are poped. */
7441 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7442 use_movml = true;
7445 if (use_movml)
7447 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7449 /* We must avoid scheduling multiple load insn with another
7450 insns. */
7451 emit_insn (gen_blockage ());
7452 emit_insn (gen_movml_pop_banked (sp_reg));
7453 emit_insn (gen_blockage ());
7455 else
7456 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7457 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7458 pop (i);
7460 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7462 else
7463 last_reg = FIRST_PSEUDO_REGISTER;
7465 for (i = 0; i < last_reg; i++)
7467 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7469 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7470 && hard_reg_set_intersect_p (live_regs_mask,
7471 reg_class_contents[DF_REGS]))
7472 fpscr_deferred = 1;
7473 /* For an ISR with RESBANK attribute assigned, don't pop
7474 following registers, R0-R14, MACH, MACL and GBR. */
7475 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7476 && ! (sh_cfun_resbank_handler_p ()
7477 && ((j >= FIRST_GENERAL_REG
7478 && j < LAST_GENERAL_REG)
7479 || j == MACH_REG
7480 || j == MACL_REG
7481 || j == GBR_REG)))
7482 pop (j);
7484 if (j == FIRST_FP_REG && fpscr_deferred)
7485 pop (FPSCR_REG);
7488 if (target_flags != save_flags && ! current_function_interrupt)
7489 emit_insn (gen_toggle_sz ());
7490 target_flags = save_flags;
7492 output_stack_adjust (crtl->args.pretend_args_size
7493 + save_size + d_rounding
7494 + crtl->args.info.stack_regs * 8,
7495 stack_pointer_rtx, e, NULL, false);
7497 if (crtl->calls_eh_return)
7498 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7499 EH_RETURN_STACKADJ_RTX));
7501 /* Switch back to the normal stack if necessary. */
7502 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7503 emit_insn (gen_sp_switch_2 ());
7505 /* Tell flow the insn that pops PR isn't dead. */
7506 /* PR_REG will never be live in SHmedia mode, and we don't need to
7507 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7508 by the return pattern. */
7509 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7510 emit_use (gen_rtx_REG (SImode, PR_REG));
7513 static int sh_need_epilogue_known = 0;
7516 sh_need_epilogue (void)
7518 if (! sh_need_epilogue_known)
7520 rtx epilogue;
7522 start_sequence ();
7523 sh_expand_epilogue (0);
7524 epilogue = get_insns ();
7525 end_sequence ();
7526 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7528 return sh_need_epilogue_known > 0;
7531 /* Emit code to change the current function's return address to RA.
7532 TEMP is available as a scratch register, if needed. */
7534 void
7535 sh_set_return_address (rtx ra, rtx tmp)
7537 HARD_REG_SET live_regs_mask;
7538 int d;
7539 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7540 int pr_offset;
7542 d = calc_live_regs (&live_regs_mask);
7544 /* If pr_reg isn't life, we can set it (or the register given in
7545 sh_media_register_for_return) directly. */
7546 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7548 rtx rr;
7550 if (TARGET_SHMEDIA)
7552 int rr_regno = sh_media_register_for_return ();
7554 if (rr_regno < 0)
7555 rr_regno = pr_reg;
7557 rr = gen_rtx_REG (DImode, rr_regno);
7559 else
7560 rr = gen_rtx_REG (SImode, pr_reg);
7562 emit_insn (GEN_MOV (rr, ra));
7563 /* Tell flow the register for return isn't dead. */
7564 emit_use (rr);
7565 return;
7568 if (TARGET_SH5)
7570 int offset;
7571 save_schedule schedule;
7572 save_entry *entry;
7574 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7575 offset = entry[1].offset;
7576 for (; entry->mode != VOIDmode; entry--)
7577 if (entry->reg == pr_reg)
7578 goto found;
7580 /* We can't find pr register. */
7581 gcc_unreachable ();
7583 found:
7584 offset = entry->offset - offset;
7585 pr_offset = (rounded_frame_size (d) + offset
7586 + SHMEDIA_REGS_STACK_ADJUST ());
7588 else
7589 pr_offset = rounded_frame_size (d);
7591 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7593 if (frame_pointer_needed)
7594 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7595 else
7596 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7598 tmp = gen_frame_mem (Pmode, tmp);
7599 emit_insn (GEN_MOV (tmp, ra));
7600 /* Tell this store isn't dead. */
7601 emit_use (tmp);
7604 /* Clear variables at function end. */
7606 static void
7607 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7608 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7610 sh_need_epilogue_known = 0;
7613 static rtx
7614 sh_builtin_saveregs (void)
7616 /* First unnamed integer register. */
7617 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7618 /* Number of integer registers we need to save. */
7619 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7620 /* First unnamed SFmode float reg */
7621 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7622 /* Number of SFmode float regs to save. */
7623 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7624 rtx regbuf, fpregs;
7625 int bufsize, regno;
7626 alias_set_type alias_set;
7628 if (TARGET_SH5)
7630 if (n_intregs)
7632 int pushregs = n_intregs;
7634 while (pushregs < NPARM_REGS (SImode) - 1
7635 && (CALL_COOKIE_INT_REG_GET
7636 (crtl->args.info.call_cookie,
7637 NPARM_REGS (SImode) - pushregs)
7638 == 1))
7640 crtl->args.info.call_cookie
7641 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7642 - pushregs, 1);
7643 pushregs++;
7646 if (pushregs == NPARM_REGS (SImode))
7647 crtl->args.info.call_cookie
7648 |= (CALL_COOKIE_INT_REG (0, 1)
7649 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7650 else
7651 crtl->args.info.call_cookie
7652 |= CALL_COOKIE_STACKSEQ (pushregs);
7654 crtl->args.pretend_args_size += 8 * n_intregs;
7656 if (TARGET_SHCOMPACT)
7657 return const0_rtx;
7660 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7662 error ("__builtin_saveregs not supported by this subtarget");
7663 return const0_rtx;
7666 if (TARGET_SHMEDIA)
7667 n_floatregs = 0;
7669 /* Allocate block of memory for the regs. */
7670 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7671 Or can assign_stack_local accept a 0 SIZE argument? */
7672 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7674 if (TARGET_SHMEDIA)
7675 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7676 else if (n_floatregs & 1)
7678 rtx addr;
7680 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7681 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7682 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7683 regbuf = change_address (regbuf, BLKmode, addr);
7685 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7687 rtx addr, mask;
7689 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7690 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7691 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7692 emit_insn (gen_andsi3 (addr, addr, mask));
7693 regbuf = change_address (regbuf, BLKmode, addr);
7695 else
7696 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7697 alias_set = get_varargs_alias_set ();
7698 set_mem_alias_set (regbuf, alias_set);
7700 /* Save int args.
7701 This is optimized to only save the regs that are necessary. Explicitly
7702 named args need not be saved. */
7703 if (n_intregs > 0)
7704 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7705 adjust_address (regbuf, BLKmode,
7706 n_floatregs * UNITS_PER_WORD),
7707 n_intregs);
7709 if (TARGET_SHMEDIA)
7710 /* Return the address of the regbuf. */
7711 return XEXP (regbuf, 0);
7713 /* Save float args.
7714 This is optimized to only save the regs that are necessary. Explicitly
7715 named args need not be saved.
7716 We explicitly build a pointer to the buffer because it halves the insn
7717 count when not optimizing (otherwise the pointer is built for each reg
7718 saved).
7719 We emit the moves in reverse order so that we can use predecrement. */
7721 fpregs = copy_to_mode_reg (Pmode,
7722 plus_constant (XEXP (regbuf, 0),
7723 n_floatregs * UNITS_PER_WORD));
7724 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7726 rtx mem;
7727 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7729 emit_insn (gen_addsi3 (fpregs, fpregs,
7730 GEN_INT (-2 * UNITS_PER_WORD)));
7731 mem = change_address (regbuf, DFmode, fpregs);
7732 emit_move_insn (mem,
7733 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7735 regno = first_floatreg;
7736 if (regno & 1)
7738 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7739 mem = change_address (regbuf, SFmode, fpregs);
7740 emit_move_insn (mem,
7741 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7742 - (TARGET_LITTLE_ENDIAN != 0)));
7745 else
7746 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7748 rtx mem;
7750 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7751 mem = change_address (regbuf, SFmode, fpregs);
7752 emit_move_insn (mem,
7753 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7756 /* Return the address of the regbuf. */
7757 return XEXP (regbuf, 0);
7760 /* Define the `__builtin_va_list' type for the ABI. */
7762 static tree
7763 sh_build_builtin_va_list (void)
7765 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7766 tree record, type_decl;
7768 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7769 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7770 return ptr_type_node;
7772 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7773 type_decl = build_decl (BUILTINS_LOCATION,
7774 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7776 f_next_o = build_decl (BUILTINS_LOCATION,
7777 FIELD_DECL, get_identifier ("__va_next_o"),
7778 ptr_type_node);
7779 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7780 FIELD_DECL,
7781 get_identifier ("__va_next_o_limit"),
7782 ptr_type_node);
7783 f_next_fp = build_decl (BUILTINS_LOCATION,
7784 FIELD_DECL, get_identifier ("__va_next_fp"),
7785 ptr_type_node);
7786 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7787 FIELD_DECL,
7788 get_identifier ("__va_next_fp_limit"),
7789 ptr_type_node);
7790 f_next_stack = build_decl (BUILTINS_LOCATION,
7791 FIELD_DECL, get_identifier ("__va_next_stack"),
7792 ptr_type_node);
7794 DECL_FIELD_CONTEXT (f_next_o) = record;
7795 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7796 DECL_FIELD_CONTEXT (f_next_fp) = record;
7797 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7798 DECL_FIELD_CONTEXT (f_next_stack) = record;
7800 TYPE_STUB_DECL (record) = type_decl;
7801 TYPE_NAME (record) = type_decl;
7802 TYPE_FIELDS (record) = f_next_o;
7803 DECL_CHAIN (f_next_o) = f_next_o_limit;
7804 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7805 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7806 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7808 layout_type (record);
7810 return record;
7813 /* Implement `va_start' for varargs and stdarg. */
7815 static void
7816 sh_va_start (tree valist, rtx nextarg)
7818 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7819 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7820 tree t, u;
7821 int nfp, nint;
7823 if (TARGET_SH5)
7825 expand_builtin_saveregs ();
7826 std_expand_builtin_va_start (valist, nextarg);
7827 return;
7830 if ((! TARGET_SH2E && ! TARGET_SH4)
7831 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7833 std_expand_builtin_va_start (valist, nextarg);
7834 return;
7837 f_next_o = TYPE_FIELDS (va_list_type_node);
7838 f_next_o_limit = DECL_CHAIN (f_next_o);
7839 f_next_fp = DECL_CHAIN (f_next_o_limit);
7840 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7841 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7843 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7844 NULL_TREE);
7845 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7846 valist, f_next_o_limit, NULL_TREE);
7847 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7848 NULL_TREE);
7849 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7850 valist, f_next_fp_limit, NULL_TREE);
7851 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7852 valist, f_next_stack, NULL_TREE);
7854 /* Call __builtin_saveregs. */
7855 u = make_tree (sizetype, expand_builtin_saveregs ());
7856 u = fold_convert (ptr_type_node, u);
7857 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7858 TREE_SIDE_EFFECTS (t) = 1;
7859 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7861 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7862 if (nfp < 8)
7863 nfp = 8 - nfp;
7864 else
7865 nfp = 0;
7866 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7867 size_int (UNITS_PER_WORD * nfp));
7868 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7869 TREE_SIDE_EFFECTS (t) = 1;
7870 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7872 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7873 TREE_SIDE_EFFECTS (t) = 1;
7874 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7876 nint = crtl->args.info.arg_count[SH_ARG_INT];
7877 if (nint < 4)
7878 nint = 4 - nint;
7879 else
7880 nint = 0;
7881 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7882 size_int (UNITS_PER_WORD * nint));
7883 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7884 TREE_SIDE_EFFECTS (t) = 1;
7885 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7887 u = make_tree (ptr_type_node, nextarg);
7888 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7889 TREE_SIDE_EFFECTS (t) = 1;
7890 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7893 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7894 member, return it. */
7895 static tree
7896 find_sole_member (tree type)
7898 tree field, member = NULL_TREE;
7900 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7902 if (TREE_CODE (field) != FIELD_DECL)
7903 continue;
7904 if (!DECL_SIZE (field))
7905 return NULL_TREE;
7906 if (integer_zerop (DECL_SIZE (field)))
7907 continue;
7908 if (member)
7909 return NULL_TREE;
7910 member = field;
7912 return member;
7914 /* Implement `va_arg'. */
7916 static tree
7917 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7918 gimple_seq *post_p ATTRIBUTE_UNUSED)
7920 HOST_WIDE_INT size, rsize;
7921 tree tmp, pptr_type_node;
7922 tree addr, lab_over = NULL, result = NULL;
7923 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7924 tree eff_type;
7926 if (pass_by_ref)
7927 type = build_pointer_type (type);
7929 size = int_size_in_bytes (type);
7930 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7931 pptr_type_node = build_pointer_type (ptr_type_node);
7933 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7934 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7936 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7937 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7938 int pass_as_float;
7939 tree lab_false;
7940 tree member;
7942 f_next_o = TYPE_FIELDS (va_list_type_node);
7943 f_next_o_limit = DECL_CHAIN (f_next_o);
7944 f_next_fp = DECL_CHAIN (f_next_o_limit);
7945 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7946 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7948 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7949 NULL_TREE);
7950 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7951 valist, f_next_o_limit, NULL_TREE);
7952 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7953 valist, f_next_fp, NULL_TREE);
7954 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7955 valist, f_next_fp_limit, NULL_TREE);
7956 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7957 valist, f_next_stack, NULL_TREE);
7959 /* Structures with a single member with a distinct mode are passed
7960 like their member. This is relevant if the latter has a REAL_TYPE
7961 or COMPLEX_TYPE type. */
7962 eff_type = type;
7963 while (TREE_CODE (eff_type) == RECORD_TYPE
7964 && (member = find_sole_member (eff_type))
7965 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7966 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7967 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7969 tree field_type = TREE_TYPE (member);
7971 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7972 eff_type = field_type;
7973 else
7975 gcc_assert ((TYPE_ALIGN (eff_type)
7976 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7977 || (TYPE_ALIGN (eff_type)
7978 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7979 break;
7983 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7985 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7986 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7987 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7988 && size <= 16));
7990 else
7992 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7995 addr = create_tmp_var (pptr_type_node, NULL);
7996 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7997 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7999 valist = build_simple_mem_ref (addr);
8001 if (pass_as_float)
8003 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8004 tree cmp;
8005 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8007 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8008 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8010 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8011 tmp = next_fp_limit;
8012 if (size > 4 && !is_double)
8013 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
8014 unshare_expr (tmp), size_int (4 - size));
8015 tmp = build2 (GE_EXPR, boolean_type_node,
8016 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8017 cmp = build3 (COND_EXPR, void_type_node, tmp,
8018 build1 (GOTO_EXPR, void_type_node,
8019 unshare_expr (lab_false)), NULL_TREE);
8020 if (!is_double)
8021 gimplify_and_add (cmp, pre_p);
8023 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8024 || (is_double || size == 16))
8026 tmp = fold_convert (sizetype, next_fp_tmp);
8027 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8028 size_int (UNITS_PER_WORD));
8029 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8030 unshare_expr (next_fp_tmp), tmp);
8031 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8033 if (is_double)
8034 gimplify_and_add (cmp, pre_p);
8036 #ifdef FUNCTION_ARG_SCmode_WART
8037 if (TYPE_MODE (eff_type) == SCmode
8038 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8040 tree subtype = TREE_TYPE (eff_type);
8041 tree real, imag;
8043 imag
8044 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8045 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8047 real
8048 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8049 real = get_initialized_tmp_var (real, pre_p, NULL);
8051 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8052 if (type != eff_type)
8053 result = build1 (VIEW_CONVERT_EXPR, type, result);
8054 result = get_initialized_tmp_var (result, pre_p, NULL);
8056 #endif /* FUNCTION_ARG_SCmode_WART */
8058 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8059 gimplify_and_add (tmp, pre_p);
8061 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8062 gimplify_and_add (tmp, pre_p);
8064 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8065 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8066 gimplify_assign (unshare_expr (next_fp_tmp),
8067 unshare_expr (valist), pre_p);
8069 gimplify_assign (unshare_expr (valist),
8070 unshare_expr (next_fp_tmp), post_p);
8071 valist = next_fp_tmp;
8073 else
8075 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8076 unshare_expr (next_o), size_int (rsize));
8077 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8078 unshare_expr (next_o_limit));
8079 tmp = build3 (COND_EXPR, void_type_node, tmp,
8080 build1 (GOTO_EXPR, void_type_node,
8081 unshare_expr (lab_false)),
8082 NULL_TREE);
8083 gimplify_and_add (tmp, pre_p);
8085 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8086 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8088 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8089 gimplify_and_add (tmp, pre_p);
8091 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8092 gimplify_and_add (tmp, pre_p);
8094 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8095 gimplify_assign (unshare_expr (next_o),
8096 unshare_expr (next_o_limit), pre_p);
8098 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8099 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8102 if (!result)
8104 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8105 gimplify_and_add (tmp, pre_p);
8109 /* ??? In va-sh.h, there had been code to make values larger than
8110 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8112 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8113 if (result)
8115 gimplify_assign (result, tmp, pre_p);
8116 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8117 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8118 gimplify_and_add (tmp, pre_p);
8120 else
8121 result = tmp;
8123 if (pass_by_ref)
8124 result = build_va_arg_indirect_ref (result);
8126 return result;
8129 /* 64 bit floating points memory transfers are paired single precision loads
8130 or store. So DWARF information needs fixing in little endian (unless
8131 PR=SZ=1 in FPSCR). */
8133 sh_dwarf_register_span (rtx reg)
8135 unsigned regno = REGNO (reg);
8137 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8138 return NULL_RTX;
8140 return
8141 gen_rtx_PARALLEL (VOIDmode,
8142 gen_rtvec (2,
8143 gen_rtx_REG (SFmode,
8144 DBX_REGISTER_NUMBER (regno+1)),
8145 gen_rtx_REG (SFmode,
8146 DBX_REGISTER_NUMBER (regno))));
8149 static enum machine_mode
8150 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8151 int *punsignedp, const_tree funtype,
8152 int for_return ATTRIBUTE_UNUSED)
8154 if (sh_promote_prototypes (funtype))
8155 return promote_mode (type, mode, punsignedp);
8156 else
8157 return mode;
8160 static bool
8161 sh_promote_prototypes (const_tree type)
8163 if (TARGET_HITACHI)
8164 return 0;
8165 if (! type)
8166 return 1;
8167 return ! sh_attr_renesas_p (type);
8170 /* Whether an argument must be passed by reference. On SHcompact, we
8171 pretend arguments wider than 32-bits that would have been passed in
8172 registers are passed by reference, so that an SHmedia trampoline
8173 loads them into the full 64-bits registers. */
8175 static int
8176 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8177 const_tree type, bool named)
8179 unsigned HOST_WIDE_INT size;
8181 if (type)
8182 size = int_size_in_bytes (type);
8183 else
8184 size = GET_MODE_SIZE (mode);
8186 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8187 && (!named
8188 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8189 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8190 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8191 && size > 4
8192 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8193 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8194 return size;
8195 else
8196 return 0;
8199 static bool
8200 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8201 const_tree type, bool named)
8203 if (targetm.calls.must_pass_in_stack (mode, type))
8204 return true;
8206 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8207 wants to know about pass-by-reference semantics for incoming
8208 arguments. */
8209 if (! cum)
8210 return false;
8212 if (TARGET_SHCOMPACT)
8214 cum->byref = shcompact_byref (cum, mode, type, named);
8215 return cum->byref != 0;
8218 return false;
8221 static bool
8222 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8223 const_tree type, bool named ATTRIBUTE_UNUSED)
8225 /* ??? How can it possibly be correct to return true only on the
8226 caller side of the equation? Is there someplace else in the
8227 sh backend that's magically producing the copies? */
8228 return (cum->outgoing
8229 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8230 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8233 static int
8234 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8235 tree type, bool named ATTRIBUTE_UNUSED)
8237 int words = 0;
8239 if (!TARGET_SH5
8240 && PASS_IN_REG_P (*cum, mode, type)
8241 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8242 && (ROUND_REG (*cum, mode)
8243 + (mode != BLKmode
8244 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8245 : ROUND_ADVANCE (int_size_in_bytes (type)))
8246 > NPARM_REGS (mode)))
8247 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8249 else if (!TARGET_SHCOMPACT
8250 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8251 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8253 return words * UNITS_PER_WORD;
8257 /* Define where to put the arguments to a function.
8258 Value is zero to push the argument on the stack,
8259 or a hard register in which to store the argument.
8261 MODE is the argument's machine mode.
8262 TYPE is the data type of the argument (as a tree).
8263 This is null for libcalls where that information may
8264 not be available.
8265 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8266 the preceding args and about the function being called.
8267 NAMED is nonzero if this argument is a named parameter
8268 (otherwise it is an extra parameter matching an ellipsis).
8270 On SH the first args are normally in registers
8271 and the rest are pushed. Any arg that starts within the first
8272 NPARM_REGS words is at least partially passed in a register unless
8273 its data type forbids. */
8275 static rtx
8276 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8277 const_tree type, bool named)
8279 if (! TARGET_SH5 && mode == VOIDmode)
8280 return GEN_INT (ca->renesas_abi ? 1 : 0);
8282 if (! TARGET_SH5
8283 && PASS_IN_REG_P (*ca, mode, type)
8284 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8286 int regno;
8288 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8289 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8291 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8292 gen_rtx_REG (SFmode,
8293 BASE_ARG_REG (mode)
8294 + (ROUND_REG (*ca, mode) ^ 1)),
8295 const0_rtx);
8296 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8297 gen_rtx_REG (SFmode,
8298 BASE_ARG_REG (mode)
8299 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8300 GEN_INT (4));
8301 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8304 /* If the alignment of a DF value causes an SF register to be
8305 skipped, we will use that skipped register for the next SF
8306 value. */
8307 if ((TARGET_HITACHI || ca->renesas_abi)
8308 && ca->free_single_fp_reg
8309 && mode == SFmode)
8310 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8312 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8313 ^ (mode == SFmode && TARGET_SH4
8314 && TARGET_LITTLE_ENDIAN != 0
8315 && ! TARGET_HITACHI && ! ca->renesas_abi);
8316 return gen_rtx_REG (mode, regno);
8320 if (TARGET_SH5)
8322 if (mode == VOIDmode && TARGET_SHCOMPACT)
8323 return GEN_INT (ca->call_cookie);
8325 /* The following test assumes unnamed arguments are promoted to
8326 DFmode. */
8327 if (mode == SFmode && ca->free_single_fp_reg)
8328 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8330 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8331 && (named || ! ca->prototype_p)
8332 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8334 if (! ca->prototype_p && TARGET_SHMEDIA)
8335 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8337 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8338 FIRST_FP_PARM_REG
8339 + ca->arg_count[(int) SH_ARG_FLOAT]);
8342 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8343 && (! TARGET_SHCOMPACT
8344 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8345 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8346 type, named))))
8348 return gen_rtx_REG (mode, (FIRST_PARM_REG
8349 + ca->arg_count[(int) SH_ARG_INT]));
8352 return 0;
8355 return 0;
8358 /* Update the data in CUM to advance over an argument
8359 of mode MODE and data type TYPE.
8360 (TYPE is null for libcalls where that information may not be
8361 available.) */
8363 static void
8364 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8365 const_tree type, bool named)
8367 if (ca->force_mem)
8368 ca->force_mem = 0;
8369 else if (TARGET_SH5)
8371 const_tree type2 = (ca->byref && type
8372 ? TREE_TYPE (type)
8373 : type);
8374 enum machine_mode mode2 = (ca->byref && type
8375 ? TYPE_MODE (type2)
8376 : mode);
8377 int dwords = ((ca->byref
8378 ? ca->byref
8379 : mode2 == BLKmode
8380 ? int_size_in_bytes (type2)
8381 : GET_MODE_SIZE (mode2)) + 7) / 8;
8382 int numregs = MIN (dwords, NPARM_REGS (SImode)
8383 - ca->arg_count[(int) SH_ARG_INT]);
8385 if (numregs)
8387 ca->arg_count[(int) SH_ARG_INT] += numregs;
8388 if (TARGET_SHCOMPACT
8389 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8391 ca->call_cookie
8392 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8393 - numregs, 1);
8394 /* N.B. We want this also for outgoing. */
8395 ca->stack_regs += numregs;
8397 else if (ca->byref)
8399 if (! ca->outgoing)
8400 ca->stack_regs += numregs;
8401 ca->byref_regs += numregs;
8402 ca->byref = 0;
8404 ca->call_cookie
8405 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8406 - numregs, 2);
8407 while (--numregs);
8408 ca->call_cookie
8409 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8410 - 1, 1);
8412 else if (dwords > numregs)
8414 int pushregs = numregs;
8416 if (TARGET_SHCOMPACT)
8417 ca->stack_regs += numregs;
8418 while (pushregs < NPARM_REGS (SImode) - 1
8419 && (CALL_COOKIE_INT_REG_GET
8420 (ca->call_cookie,
8421 NPARM_REGS (SImode) - pushregs)
8422 == 1))
8424 ca->call_cookie
8425 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8426 - pushregs, 1);
8427 pushregs++;
8429 if (numregs == NPARM_REGS (SImode))
8430 ca->call_cookie
8431 |= CALL_COOKIE_INT_REG (0, 1)
8432 | CALL_COOKIE_STACKSEQ (numregs - 1);
8433 else
8434 ca->call_cookie
8435 |= CALL_COOKIE_STACKSEQ (numregs);
8438 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8439 && (named || ! ca->prototype_p))
8441 if (mode2 == SFmode && ca->free_single_fp_reg)
8442 ca->free_single_fp_reg = 0;
8443 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8444 < NPARM_REGS (SFmode))
8446 int numfpregs
8447 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8448 NPARM_REGS (SFmode)
8449 - ca->arg_count[(int) SH_ARG_FLOAT]);
8451 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8453 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8455 if (ca->outgoing && numregs > 0)
8458 ca->call_cookie
8459 |= (CALL_COOKIE_INT_REG
8460 (ca->arg_count[(int) SH_ARG_INT]
8461 - numregs + ((numfpregs - 2) / 2),
8462 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8463 - numfpregs) / 2));
8465 while (numfpregs -= 2);
8467 else if (mode2 == SFmode && (named)
8468 && (ca->arg_count[(int) SH_ARG_FLOAT]
8469 < NPARM_REGS (SFmode)))
8470 ca->free_single_fp_reg
8471 = FIRST_FP_PARM_REG - numfpregs
8472 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8475 return;
8478 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8480 /* Note that we've used the skipped register. */
8481 if (mode == SFmode && ca->free_single_fp_reg)
8483 ca->free_single_fp_reg = 0;
8484 return;
8486 /* When we have a DF after an SF, there's an SF register that get
8487 skipped in order to align the DF value. We note this skipped
8488 register, because the next SF value will use it, and not the
8489 SF that follows the DF. */
8490 if (mode == DFmode
8491 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8493 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8494 + BASE_ARG_REG (mode));
8498 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8499 || PASS_IN_REG_P (*ca, mode, type))
8500 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8501 = (ROUND_REG (*ca, mode)
8502 + (mode == BLKmode
8503 ? ROUND_ADVANCE (int_size_in_bytes (type))
8504 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8507 /* The Renesas calling convention doesn't quite fit into this scheme since
8508 the address is passed like an invisible argument, but one that is always
8509 passed in memory. */
8510 static rtx
8511 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8513 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8514 return 0;
8515 return gen_rtx_REG (Pmode, 2);
8518 /* Worker function for TARGET_FUNCTION_VALUE.
8520 For the SH, this is like LIBCALL_VALUE, except that we must change the
8521 mode like PROMOTE_MODE does.
8522 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8523 tested here has to be kept in sync with the one in explow.c:promote_mode.
8526 static rtx
8527 sh_function_value (const_tree valtype,
8528 const_tree fn_decl_or_type,
8529 bool outgoing ATTRIBUTE_UNUSED)
8531 if (fn_decl_or_type
8532 && !DECL_P (fn_decl_or_type))
8533 fn_decl_or_type = NULL;
8535 return gen_rtx_REG (
8536 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8537 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8538 && (TREE_CODE (valtype) == INTEGER_TYPE
8539 || TREE_CODE (valtype) == ENUMERAL_TYPE
8540 || TREE_CODE (valtype) == BOOLEAN_TYPE
8541 || TREE_CODE (valtype) == REAL_TYPE
8542 || TREE_CODE (valtype) == OFFSET_TYPE))
8543 && sh_promote_prototypes (fn_decl_or_type)
8544 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8545 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8548 /* Worker function for TARGET_LIBCALL_VALUE. */
8550 static rtx
8551 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8553 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8556 /* Return true if N is a possible register number of function value. */
8558 static bool
8559 sh_function_value_regno_p (const unsigned int regno)
8561 return ((regno) == FIRST_RET_REG
8562 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8563 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8566 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8568 static bool
8569 sh_return_in_memory (const_tree type, const_tree fndecl)
8571 if (TARGET_SH5)
8573 if (TYPE_MODE (type) == BLKmode)
8574 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8575 else
8576 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8578 else
8580 return (TYPE_MODE (type) == BLKmode
8581 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8582 && TREE_CODE (type) == RECORD_TYPE));
8586 /* We actually emit the code in sh_expand_prologue. We used to use
8587 a static variable to flag that we need to emit this code, but that
8588 doesn't when inlining, when functions are deferred and then emitted
8589 later. Fortunately, we already have two flags that are part of struct
8590 function that tell if a function uses varargs or stdarg. */
8591 static void
8592 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8593 enum machine_mode mode,
8594 tree type,
8595 int *pretend_arg_size,
8596 int second_time ATTRIBUTE_UNUSED)
8598 gcc_assert (cfun->stdarg);
8599 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8601 int named_parm_regs, anon_parm_regs;
8603 named_parm_regs = (ROUND_REG (*ca, mode)
8604 + (mode == BLKmode
8605 ? ROUND_ADVANCE (int_size_in_bytes (type))
8606 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8607 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8608 if (anon_parm_regs > 0)
8609 *pretend_arg_size = anon_parm_regs * 4;
8613 static bool
8614 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8616 return TARGET_SH5;
8619 static bool
8620 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8622 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8626 /* Define the offset between two registers, one to be eliminated, and
8627 the other its replacement, at the start of a routine. */
8630 initial_elimination_offset (int from, int to)
8632 int regs_saved;
8633 int regs_saved_rounding = 0;
8634 int total_saved_regs_space;
8635 int total_auto_space;
8636 int save_flags = target_flags;
8637 int copy_flags;
8638 HARD_REG_SET live_regs_mask;
8640 shmedia_space_reserved_for_target_registers = false;
8641 regs_saved = calc_live_regs (&live_regs_mask);
8642 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8644 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8646 shmedia_space_reserved_for_target_registers = true;
8647 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8650 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8651 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8652 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8654 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8655 copy_flags = target_flags;
8656 target_flags = save_flags;
8658 total_saved_regs_space = regs_saved + regs_saved_rounding;
8660 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8661 return total_saved_regs_space + total_auto_space
8662 + crtl->args.info.byref_regs * 8;
8664 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8665 return total_saved_regs_space + total_auto_space
8666 + crtl->args.info.byref_regs * 8;
8668 /* Initial gap between fp and sp is 0. */
8669 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8670 return 0;
8672 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8673 return rounded_frame_size (0);
8675 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8676 return rounded_frame_size (0);
8678 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8679 && (to == HARD_FRAME_POINTER_REGNUM
8680 || to == STACK_POINTER_REGNUM));
8681 if (TARGET_SH5)
8683 int n = total_saved_regs_space;
8684 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8685 save_schedule schedule;
8686 save_entry *entry;
8688 n += total_auto_space;
8690 /* If it wasn't saved, there's not much we can do. */
8691 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8692 return n;
8694 target_flags = copy_flags;
8696 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8697 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8698 if (entry->reg == pr_reg)
8700 target_flags = save_flags;
8701 return entry->offset;
8703 gcc_unreachable ();
8705 else
8706 return total_auto_space;
8709 /* Parse the -mfixed-range= option string. */
8710 void
8711 sh_fix_range (const char *const_str)
8713 int i, first, last;
8714 char *str, *dash, *comma;
8716 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8717 REG2 are either register names or register numbers. The effect
8718 of this option is to mark the registers in the range from REG1 to
8719 REG2 as ``fixed'' so they won't be used by the compiler. */
8721 i = strlen (const_str);
8722 str = (char *) alloca (i + 1);
8723 memcpy (str, const_str, i + 1);
8725 while (1)
8727 dash = strchr (str, '-');
8728 if (!dash)
8730 warning (0, "value of -mfixed-range must have form REG1-REG2");
8731 return;
8733 *dash = '\0';
8734 comma = strchr (dash + 1, ',');
8735 if (comma)
8736 *comma = '\0';
8738 first = decode_reg_name (str);
8739 if (first < 0)
8741 warning (0, "unknown register name: %s", str);
8742 return;
8745 last = decode_reg_name (dash + 1);
8746 if (last < 0)
8748 warning (0, "unknown register name: %s", dash + 1);
8749 return;
8752 *dash = '-';
8754 if (first > last)
8756 warning (0, "%s-%s is an empty range", str, dash + 1);
8757 return;
8760 for (i = first; i <= last; ++i)
8761 fixed_regs[i] = call_used_regs[i] = 1;
8763 if (!comma)
8764 break;
8766 *comma = ',';
8767 str = comma + 1;
8771 /* Insert any deferred function attributes from earlier pragmas. */
8772 static void
8773 sh_insert_attributes (tree node, tree *attributes)
8775 tree attrs;
8777 if (TREE_CODE (node) != FUNCTION_DECL)
8778 return;
8780 /* We are only interested in fields. */
8781 if (!DECL_P (node))
8782 return;
8784 /* Append the attributes to the deferred attributes. */
8785 *sh_deferred_function_attributes_tail = *attributes;
8786 attrs = sh_deferred_function_attributes;
8787 if (!attrs)
8788 return;
8790 /* Some attributes imply or require the interrupt attribute. */
8791 if (!lookup_attribute ("interrupt_handler", attrs)
8792 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8794 /* If we have a trapa_handler, but no interrupt_handler attribute,
8795 insert an interrupt_handler attribute. */
8796 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8797 /* We can't use sh_pr_interrupt here because that's not in the
8798 java frontend. */
8799 attrs
8800 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8801 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8802 if the interrupt attribute is missing, we ignore the attribute
8803 and warn. */
8804 else if (lookup_attribute ("sp_switch", attrs)
8805 || lookup_attribute ("trap_exit", attrs)
8806 || lookup_attribute ("nosave_low_regs", attrs)
8807 || lookup_attribute ("resbank", attrs))
8809 tree *tail;
8811 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8813 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8814 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8815 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8816 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8817 warning (OPT_Wattributes,
8818 "%qE attribute only applies to interrupt functions",
8819 TREE_PURPOSE (attrs));
8820 else
8822 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8823 NULL_TREE);
8824 tail = &TREE_CHAIN (*tail);
8827 attrs = *attributes;
8831 /* Install the processed list. */
8832 *attributes = attrs;
8834 /* Clear deferred attributes. */
8835 sh_deferred_function_attributes = NULL_TREE;
8836 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8838 return;
8841 /* Supported attributes:
8843 interrupt_handler -- specifies this function is an interrupt handler.
8845 trapa_handler - like above, but don't save all registers.
8847 sp_switch -- specifies an alternate stack for an interrupt handler
8848 to run on.
8850 trap_exit -- use a trapa to exit an interrupt function instead of
8851 an rte instruction.
8853 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8854 This is useful on the SH3 and upwards,
8855 which has a separate set of low regs for User and Supervisor modes.
8856 This should only be used for the lowest level of interrupts. Higher levels
8857 of interrupts must save the registers in case they themselves are
8858 interrupted.
8860 renesas -- use Renesas calling/layout conventions (functions and
8861 structures).
8863 resbank -- In case of an ISR, use a register bank to save registers
8864 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8867 /* Handle a 'resbank' attribute. */
8868 static tree
8869 sh_handle_resbank_handler_attribute (tree * node, tree name,
8870 tree args ATTRIBUTE_UNUSED,
8871 int flags ATTRIBUTE_UNUSED,
8872 bool * no_add_attrs)
8874 if (!TARGET_SH2A)
8876 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8877 name);
8878 *no_add_attrs = true;
8880 if (TREE_CODE (*node) != FUNCTION_DECL)
8882 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8883 name);
8884 *no_add_attrs = true;
8887 return NULL_TREE;
8890 /* Handle an "interrupt_handler" attribute; arguments as in
8891 struct attribute_spec.handler. */
8892 static tree
8893 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8894 tree args ATTRIBUTE_UNUSED,
8895 int flags ATTRIBUTE_UNUSED,
8896 bool *no_add_attrs)
8898 if (TREE_CODE (*node) != FUNCTION_DECL)
8900 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8901 name);
8902 *no_add_attrs = true;
8904 else if (TARGET_SHCOMPACT)
8906 error ("attribute interrupt_handler is not compatible with -m5-compact");
8907 *no_add_attrs = true;
8910 return NULL_TREE;
8913 /* Handle an 'function_vector' attribute; arguments as in
8914 struct attribute_spec.handler. */
8915 static tree
8916 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8917 tree args ATTRIBUTE_UNUSED,
8918 int flags ATTRIBUTE_UNUSED,
8919 bool * no_add_attrs)
8921 if (!TARGET_SH2A)
8923 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8924 name);
8925 *no_add_attrs = true;
8927 else if (TREE_CODE (*node) != FUNCTION_DECL)
8929 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8930 name);
8931 *no_add_attrs = true;
8933 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8935 /* The argument must be a constant integer. */
8936 warning (OPT_Wattributes,
8937 "%qE attribute argument not an integer constant",
8938 name);
8939 *no_add_attrs = true;
8941 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8943 /* The argument value must be between 0 to 255. */
8944 warning (OPT_Wattributes,
8945 "%qE attribute argument should be between 0 to 255",
8946 name);
8947 *no_add_attrs = true;
8949 return NULL_TREE;
8952 /* Returns 1 if current function has been assigned the attribute
8953 'function_vector'. */
8955 sh2a_is_function_vector_call (rtx x)
8957 if (GET_CODE (x) == SYMBOL_REF
8958 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8960 tree tr = SYMBOL_REF_DECL (x);
8962 if (sh2a_function_vector_p (tr))
8963 return 1;
8966 return 0;
8969 /* Returns the function vector number, if the the attribute
8970 'function_vector' is assigned, otherwise returns zero. */
8972 sh2a_get_function_vector_number (rtx x)
8974 int num;
8975 tree list, t;
8977 if ((GET_CODE (x) == SYMBOL_REF)
8978 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8980 t = SYMBOL_REF_DECL (x);
8982 if (TREE_CODE (t) != FUNCTION_DECL)
8983 return 0;
8985 list = SH_ATTRIBUTES (t);
8986 while (list)
8988 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8990 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8991 return num;
8994 list = TREE_CHAIN (list);
8997 return 0;
8999 else
9000 return 0;
9003 /* Handle an "sp_switch" attribute; arguments as in
9004 struct attribute_spec.handler. */
9005 static tree
9006 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9007 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9009 if (TREE_CODE (*node) != FUNCTION_DECL)
9011 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9012 name);
9013 *no_add_attrs = true;
9015 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9017 /* The argument must be a constant string. */
9018 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9019 name);
9020 *no_add_attrs = true;
9023 return NULL_TREE;
9026 /* Handle an "trap_exit" attribute; arguments as in
9027 struct attribute_spec.handler. */
9028 static tree
9029 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9030 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9032 if (TREE_CODE (*node) != FUNCTION_DECL)
9034 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9035 name);
9036 *no_add_attrs = true;
9038 /* The argument specifies a trap number to be used in a trapa instruction
9039 at function exit (instead of an rte instruction). */
9040 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9042 /* The argument must be a constant integer. */
9043 warning (OPT_Wattributes, "%qE attribute argument not an "
9044 "integer constant", name);
9045 *no_add_attrs = true;
9048 return NULL_TREE;
9051 static tree
9052 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9053 tree name ATTRIBUTE_UNUSED,
9054 tree args ATTRIBUTE_UNUSED,
9055 int flags ATTRIBUTE_UNUSED,
9056 bool *no_add_attrs ATTRIBUTE_UNUSED)
9058 return NULL_TREE;
9061 /* True if __attribute__((renesas)) or -mrenesas. */
9063 sh_attr_renesas_p (const_tree td)
9065 if (TARGET_HITACHI)
9066 return 1;
9067 if (td == 0)
9068 return 0;
9069 if (DECL_P (td))
9070 td = TREE_TYPE (td);
9071 if (td == error_mark_node)
9072 return 0;
9073 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9074 != NULL_TREE);
9077 /* True if __attribute__((renesas)) or -mrenesas, for the current
9078 function. */
9080 sh_cfun_attr_renesas_p (void)
9082 return sh_attr_renesas_p (current_function_decl);
9086 sh_cfun_interrupt_handler_p (void)
9088 return (lookup_attribute ("interrupt_handler",
9089 DECL_ATTRIBUTES (current_function_decl))
9090 != NULL_TREE);
9093 /* Returns 1 if FUNC has been assigned the attribute
9094 "function_vector". */
9096 sh2a_function_vector_p (tree func)
9098 tree list;
9099 if (TREE_CODE (func) != FUNCTION_DECL)
9100 return 0;
9102 list = SH_ATTRIBUTES (func);
9103 while (list)
9105 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9106 return 1;
9108 list = TREE_CHAIN (list);
9110 return 0;
9113 /* Returns TRUE if given tree has the "resbank" attribute. */
9116 sh_cfun_resbank_handler_p (void)
9118 return ((lookup_attribute ("resbank",
9119 DECL_ATTRIBUTES (current_function_decl))
9120 != NULL_TREE)
9121 && (lookup_attribute ("interrupt_handler",
9122 DECL_ATTRIBUTES (current_function_decl))
9123 != NULL_TREE) && TARGET_SH2A);
9126 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9128 static const char *
9129 sh_check_pch_target_flags (int old_flags)
9131 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9132 | MASK_SH_E | MASK_HARD_SH4
9133 | MASK_FPU_SINGLE | MASK_SH4))
9134 return _("created and used with different architectures / ABIs");
9135 if ((old_flags ^ target_flags) & MASK_HITACHI)
9136 return _("created and used with different ABIs");
9137 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9138 return _("created and used with different endianness");
9139 return NULL;
9142 /* Predicates used by the templates. */
9144 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9145 Used only in general_movsrc_operand. */
9148 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9150 switch (REGNO (op))
9152 case PR_REG:
9153 case MACL_REG:
9154 case MACH_REG:
9155 return 1;
9157 return 0;
9160 /* Nonzero if OP is a floating point value with value 0.0. */
9163 fp_zero_operand (rtx op)
9165 REAL_VALUE_TYPE r;
9167 if (GET_MODE (op) != SFmode)
9168 return 0;
9170 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9171 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9174 /* Nonzero if OP is a floating point value with value 1.0. */
9177 fp_one_operand (rtx op)
9179 REAL_VALUE_TYPE r;
9181 if (GET_MODE (op) != SFmode)
9182 return 0;
9184 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9185 return REAL_VALUES_EQUAL (r, dconst1);
9188 /* In general mode switching is used. If we are
9189 compiling without -mfmovd, movsf_ie isn't taken into account for
9190 mode switching. We could check in machine_dependent_reorg for
9191 cases where we know we are in single precision mode, but there is
9192 interface to find that out during reload, so we must avoid
9193 choosing an fldi alternative during reload and thus failing to
9194 allocate a scratch register for the constant loading. */
9196 fldi_ok (void)
9198 return 1;
9202 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9204 enum rtx_code code = GET_CODE (op);
9205 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9208 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9209 enum tls_model
9210 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9212 if (GET_CODE (op) != SYMBOL_REF)
9213 return TLS_MODEL_NONE;
9214 return SYMBOL_REF_TLS_MODEL (op);
9217 /* Return the destination address of a branch. */
9219 static int
9220 branch_dest (rtx branch)
9222 rtx dest = SET_SRC (PATTERN (branch));
9223 int dest_uid;
9225 if (GET_CODE (dest) == IF_THEN_ELSE)
9226 dest = XEXP (dest, 1);
9227 dest = XEXP (dest, 0);
9228 dest_uid = INSN_UID (dest);
9229 return INSN_ADDRESSES (dest_uid);
9232 /* Return nonzero if REG is not used after INSN.
9233 We assume REG is a reload reg, and therefore does
9234 not live past labels. It may live past calls or jumps though. */
9236 reg_unused_after (rtx reg, rtx insn)
9238 enum rtx_code code;
9239 rtx set;
9241 /* If the reg is set by this instruction, then it is safe for our
9242 case. Disregard the case where this is a store to memory, since
9243 we are checking a register used in the store address. */
9244 set = single_set (insn);
9245 if (set && !MEM_P (SET_DEST (set))
9246 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9247 return 1;
9249 while ((insn = NEXT_INSN (insn)))
9251 rtx set;
9252 if (!INSN_P (insn))
9253 continue;
9255 code = GET_CODE (insn);
9257 #if 0
9258 /* If this is a label that existed before reload, then the register
9259 if dead here. However, if this is a label added by reorg, then
9260 the register may still be live here. We can't tell the difference,
9261 so we just ignore labels completely. */
9262 if (code == CODE_LABEL)
9263 return 1;
9264 /* else */
9265 #endif
9267 if (code == JUMP_INSN)
9268 return 0;
9270 /* If this is a sequence, we must handle them all at once.
9271 We could have for instance a call that sets the target register,
9272 and an insn in a delay slot that uses the register. In this case,
9273 we must return 0. */
9274 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9276 int i;
9277 int retval = 0;
9279 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9281 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9282 rtx set = single_set (this_insn);
9284 if (CALL_P (this_insn))
9285 code = CALL_INSN;
9286 else if (JUMP_P (this_insn))
9288 if (INSN_ANNULLED_BRANCH_P (this_insn))
9289 return 0;
9290 code = JUMP_INSN;
9293 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9294 return 0;
9295 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9297 if (!MEM_P (SET_DEST (set)))
9298 retval = 1;
9299 else
9300 return 0;
9302 if (set == 0
9303 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9304 return 0;
9306 if (retval == 1)
9307 return 1;
9308 else if (code == JUMP_INSN)
9309 return 0;
9312 set = single_set (insn);
9313 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9314 return 0;
9315 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9316 return !MEM_P (SET_DEST (set));
9317 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9318 return 0;
9320 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9321 return 1;
9323 return 1;
9326 #include "ggc.h"
9328 static GTY(()) rtx fpscr_rtx;
9330 get_fpscr_rtx (void)
9332 if (! fpscr_rtx)
9334 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9335 REG_USERVAR_P (fpscr_rtx) = 1;
9336 mark_user_reg (fpscr_rtx);
9338 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9339 mark_user_reg (fpscr_rtx);
9340 return fpscr_rtx;
9343 static GTY(()) tree fpscr_values;
9345 static void
9346 emit_fpu_switch (rtx scratch, int index)
9348 rtx dst, src;
9350 if (fpscr_values == NULL)
9352 tree t;
9354 t = build_index_type (integer_one_node);
9355 t = build_array_type (integer_type_node, t);
9356 t = build_decl (BUILTINS_LOCATION,
9357 VAR_DECL, get_identifier ("__fpscr_values"), t);
9358 DECL_ARTIFICIAL (t) = 1;
9359 DECL_IGNORED_P (t) = 1;
9360 DECL_EXTERNAL (t) = 1;
9361 TREE_STATIC (t) = 1;
9362 TREE_PUBLIC (t) = 1;
9363 TREE_USED (t) = 1;
9365 fpscr_values = t;
9368 src = DECL_RTL (fpscr_values);
9369 if (!can_create_pseudo_p ())
9371 emit_move_insn (scratch, XEXP (src, 0));
9372 if (index != 0)
9373 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9374 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9376 else
9377 src = adjust_address (src, PSImode, index * 4);
9379 dst = get_fpscr_rtx ();
9380 emit_move_insn (dst, src);
9383 void
9384 emit_sf_insn (rtx pat)
9386 emit_insn (pat);
9389 void
9390 emit_df_insn (rtx pat)
9392 emit_insn (pat);
9395 void
9396 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9398 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9401 void
9402 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9404 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9405 get_fpscr_rtx ()));
9408 void
9409 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9411 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9414 void
9415 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9417 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9418 get_fpscr_rtx ()));
9421 static rtx get_free_reg (HARD_REG_SET);
9423 /* This function returns a register to use to load the address to load
9424 the fpscr from. Currently it always returns r1 or r7, but when we are
9425 able to use pseudo registers after combine, or have a better mechanism
9426 for choosing a register, it should be done here. */
9427 /* REGS_LIVE is the liveness information for the point for which we
9428 need this allocation. In some bare-bones exit blocks, r1 is live at the
9429 start. We can even have all of r0..r3 being live:
9430 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9431 INSN before which new insns are placed with will clobber the register
9432 we return. If a basic block consists only of setting the return value
9433 register to a pseudo and using that register, the return value is not
9434 live before or after this block, yet we we'll insert our insns right in
9435 the middle. */
9437 static rtx
9438 get_free_reg (HARD_REG_SET regs_live)
9440 if (! TEST_HARD_REG_BIT (regs_live, 1))
9441 return gen_rtx_REG (Pmode, 1);
9443 /* Hard reg 1 is live; since this is a small register classes target,
9444 there shouldn't be anything but a jump before the function end. */
9445 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9446 return gen_rtx_REG (Pmode, 7);
9449 /* This function will set the fpscr from memory.
9450 MODE is the mode we are setting it to. */
9451 void
9452 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9454 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9455 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9456 rtx addr_reg;
9458 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9459 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9462 /* Is the given character a logical line separator for the assembler? */
9463 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9464 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9465 #endif
9468 sh_insn_length_adjustment (rtx insn)
9470 /* Instructions with unfilled delay slots take up an extra two bytes for
9471 the nop in the delay slot. */
9472 if (((NONJUMP_INSN_P (insn)
9473 && GET_CODE (PATTERN (insn)) != USE
9474 && GET_CODE (PATTERN (insn)) != CLOBBER)
9475 || CALL_P (insn)
9476 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9477 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9478 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9479 return 2;
9481 /* SH2e has a bug that prevents the use of annulled branches, so if
9482 the delay slot is not filled, we'll have to put a NOP in it. */
9483 if (sh_cpu_attr == CPU_SH2E
9484 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9485 && get_attr_type (insn) == TYPE_CBRANCH
9486 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9487 return 2;
9489 /* sh-dsp parallel processing insn take four bytes instead of two. */
9491 if (NONJUMP_INSN_P (insn))
9493 int sum = 0;
9494 rtx body = PATTERN (insn);
9495 const char *templ;
9496 char c;
9497 int maybe_label = 1;
9499 if (GET_CODE (body) == ASM_INPUT)
9500 templ = XSTR (body, 0);
9501 else if (asm_noperands (body) >= 0)
9502 templ
9503 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9504 else
9505 return 0;
9508 int ppi_adjust = 0;
9511 c = *templ++;
9512 while (c == ' ' || c == '\t');
9513 /* all sh-dsp parallel-processing insns start with p.
9514 The only non-ppi sh insn starting with p is pref.
9515 The only ppi starting with pr is prnd. */
9516 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9517 ppi_adjust = 2;
9518 /* The repeat pseudo-insn expands two three insns, a total of
9519 six bytes in size. */
9520 else if ((c == 'r' || c == 'R')
9521 && ! strncasecmp ("epeat", templ, 5))
9522 ppi_adjust = 4;
9523 while (c && c != '\n'
9524 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9526 /* If this is a label, it is obviously not a ppi insn. */
9527 if (c == ':' && maybe_label)
9529 ppi_adjust = 0;
9530 break;
9532 else if (c == '\'' || c == '"')
9533 maybe_label = 0;
9534 c = *templ++;
9536 sum += ppi_adjust;
9537 maybe_label = c != ':';
9539 while (c);
9540 return sum;
9542 return 0;
9545 /* Return TRUE for a valid displacement for the REG+disp addressing
9546 with MODE. */
9548 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9549 into the FRx registers. We implement this by setting the maximum offset
9550 to zero when the value is SFmode. This also restricts loading of SFmode
9551 values into the integer registers, but that can't be helped. */
9553 /* The SH allows a displacement in a QI or HI amode, but only when the
9554 other operand is R0. GCC doesn't handle this very well, so we forgot
9555 all of that.
9557 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9558 DI can be any number 0..60. */
9560 bool
9561 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9563 if (CONST_INT_P (op))
9565 if (TARGET_SHMEDIA)
9567 int size;
9569 /* Check if this the address of an unaligned load / store. */
9570 if (mode == VOIDmode)
9571 return CONST_OK_FOR_I06 (INTVAL (op));
9573 size = GET_MODE_SIZE (mode);
9574 return (!(INTVAL (op) & (size - 1))
9575 && INTVAL (op) >= -512 * size
9576 && INTVAL (op) < 512 * size);
9579 if (TARGET_SH2A)
9581 if (GET_MODE_SIZE (mode) == 1
9582 && (unsigned) INTVAL (op) < 4096)
9583 return true;
9586 if ((GET_MODE_SIZE (mode) == 4
9587 && (unsigned) INTVAL (op) < 64
9588 && !(INTVAL (op) & 3)
9589 && !(TARGET_SH2E && mode == SFmode))
9590 || (GET_MODE_SIZE (mode) == 4
9591 && (unsigned) INTVAL (op) < 16383
9592 && !(INTVAL (op) & 3) && TARGET_SH2A))
9593 return true;
9595 if ((GET_MODE_SIZE (mode) == 8
9596 && (unsigned) INTVAL (op) < 60
9597 && !(INTVAL (op) & 3)
9598 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9599 || ((GET_MODE_SIZE (mode)==8)
9600 && (unsigned) INTVAL (op) < 8192
9601 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9602 && (TARGET_SH2A && mode == DFmode)))
9603 return true;
9606 return false;
9609 /* Recognize an RTL expression that is a valid memory address for
9610 an instruction.
9611 The MODE argument is the machine mode for the MEM expression
9612 that wants to use this address.
9613 Allow REG
9614 REG+disp
9615 REG+r0
9616 REG++
9617 --REG */
9619 static bool
9620 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9622 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9623 return true;
9624 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9625 && ! TARGET_SHMEDIA
9626 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9627 return true;
9628 else if (GET_CODE (x) == PLUS
9629 && (mode != PSImode || reload_completed))
9631 rtx xop0 = XEXP (x, 0);
9632 rtx xop1 = XEXP (x, 1);
9634 if (GET_MODE_SIZE (mode) <= 8
9635 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9636 && sh_legitimate_index_p (mode, xop1))
9637 return true;
9639 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9640 || ((xop0 == stack_pointer_rtx
9641 || xop0 == hard_frame_pointer_rtx)
9642 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9643 || ((xop1 == stack_pointer_rtx
9644 || xop1 == hard_frame_pointer_rtx)
9645 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9646 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9647 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9648 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9649 && TARGET_FMOVD && mode == DFmode)))
9651 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9652 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9653 return true;
9654 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9655 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9656 return true;
9660 return false;
9663 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9664 isn't protected by a PIC unspec. */
9666 nonpic_symbol_mentioned_p (rtx x)
9668 register const char *fmt;
9669 register int i;
9671 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9672 || GET_CODE (x) == PC)
9673 return 1;
9675 /* We don't want to look into the possible MEM location of a
9676 CONST_DOUBLE, since we're not going to use it, in general. */
9677 if (GET_CODE (x) == CONST_DOUBLE)
9678 return 0;
9680 if (GET_CODE (x) == UNSPEC
9681 && (XINT (x, 1) == UNSPEC_PIC
9682 || XINT (x, 1) == UNSPEC_GOT
9683 || XINT (x, 1) == UNSPEC_GOTOFF
9684 || XINT (x, 1) == UNSPEC_GOTPLT
9685 || XINT (x, 1) == UNSPEC_GOTTPOFF
9686 || XINT (x, 1) == UNSPEC_DTPOFF
9687 || XINT (x, 1) == UNSPEC_TPOFF
9688 || XINT (x, 1) == UNSPEC_PLT
9689 || XINT (x, 1) == UNSPEC_SYMOFF
9690 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9691 return 0;
9693 fmt = GET_RTX_FORMAT (GET_CODE (x));
9694 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9696 if (fmt[i] == 'E')
9698 register int j;
9700 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9701 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9702 return 1;
9704 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9705 return 1;
9708 return 0;
9711 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9712 @GOTOFF in `reg'. */
9714 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9715 rtx reg)
9717 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9718 return orig;
9720 if (GET_CODE (orig) == LABEL_REF
9721 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9723 if (reg == 0)
9724 reg = gen_reg_rtx (Pmode);
9726 emit_insn (gen_symGOTOFF2reg (reg, orig));
9727 return reg;
9729 else if (GET_CODE (orig) == SYMBOL_REF)
9731 if (reg == 0)
9732 reg = gen_reg_rtx (Pmode);
9734 emit_insn (gen_symGOT2reg (reg, orig));
9735 return reg;
9737 return orig;
9740 /* Try machine-dependent ways of modifying an illegitimate address
9741 to be legitimate. If we find one, return the new, valid address.
9742 Otherwise, return X.
9744 For the SH, if X is almost suitable for indexing, but the offset is
9745 out of range, convert it into a normal form so that CSE has a chance
9746 of reducing the number of address registers used. */
9748 static rtx
9749 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9751 if (flag_pic)
9752 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9754 if (GET_CODE (x) == PLUS
9755 && (GET_MODE_SIZE (mode) == 4
9756 || GET_MODE_SIZE (mode) == 8)
9757 && CONST_INT_P (XEXP (x, 1))
9758 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9759 && ! TARGET_SHMEDIA
9760 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9761 && ! (TARGET_SH2E && mode == SFmode))
9763 rtx index_rtx = XEXP (x, 1);
9764 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9765 rtx sum;
9767 /* On rare occasions, we might get an unaligned pointer
9768 that is indexed in a way to give an aligned address.
9769 Therefore, keep the lower two bits in offset_base. */
9770 /* Instead of offset_base 128..131 use 124..127, so that
9771 simple add suffices. */
9772 if (offset > 127)
9773 offset_base = ((offset + 4) & ~60) - 4;
9774 else
9775 offset_base = offset & ~60;
9777 /* Sometimes the normal form does not suit DImode. We
9778 could avoid that by using smaller ranges, but that
9779 would give less optimized code when SImode is
9780 prevalent. */
9781 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9783 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9784 GEN_INT (offset_base), NULL_RTX, 0,
9785 OPTAB_LIB_WIDEN);
9787 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9791 return x;
9794 /* Attempt to replace *P, which is an address that needs reloading, with
9795 a valid memory address for an operand of mode MODE.
9796 Like for sh_legitimize_address, for the SH we try to get a normal form
9797 of the address. That will allow inheritance of the address reloads. */
9799 bool
9800 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9801 int itype)
9803 enum reload_type type = (enum reload_type) itype;
9805 if (GET_CODE (*p) == PLUS
9806 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9807 && CONST_INT_P (XEXP (*p, 1))
9808 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9809 && ! TARGET_SHMEDIA
9810 && ! (TARGET_SH4 && mode == DFmode)
9811 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9812 && (ALLOW_INDEXED_ADDRESS
9813 || XEXP (*p, 0) == stack_pointer_rtx
9814 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9816 rtx index_rtx = XEXP (*p, 1);
9817 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9818 rtx sum;
9820 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9822 push_reload (*p, NULL_RTX, p, NULL,
9823 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9824 goto win;
9826 if (TARGET_SH2E && mode == SFmode)
9828 *p = copy_rtx (*p);
9829 push_reload (*p, NULL_RTX, p, NULL,
9830 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9831 goto win;
9833 /* Instead of offset_base 128..131 use 124..127, so that
9834 simple add suffices. */
9835 if (offset > 127)
9836 offset_base = ((offset + 4) & ~60) - 4;
9837 else
9838 offset_base = offset & ~60;
9839 /* Sometimes the normal form does not suit DImode. We could avoid
9840 that by using smaller ranges, but that would give less optimized
9841 code when SImode is prevalent. */
9842 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9844 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9845 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9846 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9847 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9848 goto win;
9851 /* We must re-recognize what we created before. */
9852 else if (GET_CODE (*p) == PLUS
9853 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9854 && GET_CODE (XEXP (*p, 0)) == PLUS
9855 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9856 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9857 && CONST_INT_P (XEXP (*p, 1))
9858 && ! TARGET_SHMEDIA
9859 && ! (TARGET_SH2E && mode == SFmode))
9861 /* Because this address is so complex, we know it must have
9862 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9863 it is already unshared, and needs no further unsharing. */
9864 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9865 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9866 goto win;
9869 return false;
9871 win:
9872 return true;
9875 /* Mark the use of a constant in the literal table. If the constant
9876 has multiple labels, make it unique. */
9877 static rtx
9878 mark_constant_pool_use (rtx x)
9880 rtx insn, lab, pattern;
9882 if (x == NULL)
9883 return x;
9885 switch (GET_CODE (x))
9887 case LABEL_REF:
9888 x = XEXP (x, 0);
9889 case CODE_LABEL:
9890 break;
9891 default:
9892 return x;
9895 /* Get the first label in the list of labels for the same constant
9896 and delete another labels in the list. */
9897 lab = x;
9898 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9900 if (!LABEL_P (insn)
9901 || LABEL_REFS (insn) != NEXT_INSN (insn))
9902 break;
9903 lab = insn;
9906 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9907 INSN_DELETED_P (insn) = 1;
9909 /* Mark constants in a window. */
9910 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9912 if (!NONJUMP_INSN_P (insn))
9913 continue;
9915 pattern = PATTERN (insn);
9916 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9917 continue;
9919 switch (XINT (pattern, 1))
9921 case UNSPECV_CONST2:
9922 case UNSPECV_CONST4:
9923 case UNSPECV_CONST8:
9924 XVECEXP (pattern, 0, 1) = const1_rtx;
9925 break;
9926 case UNSPECV_WINDOW_END:
9927 if (XVECEXP (pattern, 0, 0) == x)
9928 return lab;
9929 break;
9930 case UNSPECV_CONST_END:
9931 return lab;
9932 default:
9933 break;
9937 return lab;
9940 /* Return true if it's possible to redirect BRANCH1 to the destination
9941 of an unconditional jump BRANCH2. We only want to do this if the
9942 resulting branch will have a short displacement. */
9944 sh_can_redirect_branch (rtx branch1, rtx branch2)
9946 if (flag_expensive_optimizations && simplejump_p (branch2))
9948 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9949 rtx insn;
9950 int distance;
9952 for (distance = 0, insn = NEXT_INSN (branch1);
9953 insn && distance < 256;
9954 insn = PREV_INSN (insn))
9956 if (insn == dest)
9957 return 1;
9958 else
9959 distance += get_attr_length (insn);
9961 for (distance = 0, insn = NEXT_INSN (branch1);
9962 insn && distance < 256;
9963 insn = NEXT_INSN (insn))
9965 if (insn == dest)
9966 return 1;
9967 else
9968 distance += get_attr_length (insn);
9971 return 0;
9974 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9976 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9977 unsigned int new_reg)
9979 /* Interrupt functions can only use registers that have already been
9980 saved by the prologue, even if they would normally be
9981 call-clobbered. */
9983 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9984 return 0;
9986 return 1;
9989 /* Function to update the integer COST
9990 based on the relationship between INSN that is dependent on
9991 DEP_INSN through the dependence LINK. The default is to make no
9992 adjustment to COST. This can be used for example to specify to
9993 the scheduler that an output- or anti-dependence does not incur
9994 the same cost as a data-dependence. The return value should be
9995 the new value for COST. */
9996 static int
9997 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9999 rtx reg, use_pat;
10001 if (TARGET_SHMEDIA)
10003 /* On SHmedia, if the dependence is an anti-dependence or
10004 output-dependence, there is no cost. */
10005 if (REG_NOTE_KIND (link) != 0)
10007 /* However, dependencies between target register loads and
10008 uses of the register in a subsequent block that are separated
10009 by a conditional branch are not modelled - we have to do with
10010 the anti-dependency between the target register load and the
10011 conditional branch that ends the current block. */
10012 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10013 && GET_CODE (PATTERN (dep_insn)) == SET
10014 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10015 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10016 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10018 int orig_cost = cost;
10019 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10020 rtx target = ((! note
10021 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10022 ? insn : JUMP_LABEL (insn));
10023 /* On the likely path, the branch costs 1, on the unlikely path,
10024 it costs 3. */
10025 cost--;
10027 target = next_active_insn (target);
10028 while (target && ! flow_dependent_p (target, dep_insn)
10029 && --cost > 0);
10030 /* If two branches are executed in immediate succession, with the
10031 first branch properly predicted, this causes a stall at the
10032 second branch, hence we won't need the target for the
10033 second branch for two cycles after the launch of the first
10034 branch. */
10035 if (cost > orig_cost - 2)
10036 cost = orig_cost - 2;
10038 else
10039 cost = 0;
10042 else if (get_attr_is_mac_media (insn)
10043 && get_attr_is_mac_media (dep_insn))
10044 cost = 1;
10046 else if (! reload_completed
10047 && GET_CODE (PATTERN (insn)) == SET
10048 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10049 && GET_CODE (PATTERN (dep_insn)) == SET
10050 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10051 && cost < 4)
10052 cost = 4;
10053 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10054 that is needed at the target. */
10055 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10056 && ! flow_dependent_p (insn, dep_insn))
10057 cost--;
10059 else if (REG_NOTE_KIND (link) == 0)
10061 enum attr_type type;
10062 rtx dep_set;
10064 if (recog_memoized (insn) < 0
10065 || recog_memoized (dep_insn) < 0)
10066 return cost;
10068 dep_set = single_set (dep_insn);
10070 /* The latency that we specify in the scheduling description refers
10071 to the actual output, not to an auto-increment register; for that,
10072 the latency is one. */
10073 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10075 rtx set = single_set (insn);
10077 if (set
10078 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10079 && (!MEM_P (SET_DEST (set))
10080 || !reg_mentioned_p (SET_DEST (dep_set),
10081 XEXP (SET_DEST (set), 0))))
10082 cost = 1;
10084 /* The only input for a call that is timing-critical is the
10085 function's address. */
10086 if (CALL_P (insn))
10088 rtx call = PATTERN (insn);
10090 if (GET_CODE (call) == PARALLEL)
10091 call = XVECEXP (call, 0 ,0);
10092 if (GET_CODE (call) == SET)
10093 call = SET_SRC (call);
10094 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10095 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10096 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10097 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10098 cost -= TARGET_SH4_300 ? 3 : 6;
10100 /* Likewise, the most timing critical input for an sfuncs call
10101 is the function address. However, sfuncs typically start
10102 using their arguments pretty quickly.
10103 Assume a four cycle delay for SH4 before they are needed.
10104 Cached ST40-300 calls are quicker, so assume only a one
10105 cycle delay there.
10106 ??? Maybe we should encode the delays till input registers
10107 are needed by sfuncs into the sfunc call insn. */
10108 /* All sfunc calls are parallels with at least four components.
10109 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10110 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10111 && XVECLEN (PATTERN (insn), 0) >= 4
10112 && (reg = sfunc_uses_reg (insn)))
10114 if (! reg_set_p (reg, dep_insn))
10115 cost -= TARGET_SH4_300 ? 1 : 4;
10117 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10119 enum attr_type dep_type = get_attr_type (dep_insn);
10121 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10122 cost--;
10123 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10124 && (type = get_attr_type (insn)) != TYPE_CALL
10125 && type != TYPE_SFUNC)
10126 cost--;
10127 /* When the preceding instruction loads the shift amount of
10128 the following SHAD/SHLD, the latency of the load is increased
10129 by 1 cycle. */
10130 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10131 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10132 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10133 XEXP (SET_SRC (single_set (insn)),
10134 1)))
10135 cost++;
10136 /* When an LS group instruction with a latency of less than
10137 3 cycles is followed by a double-precision floating-point
10138 instruction, FIPR, or FTRV, the latency of the first
10139 instruction is increased to 3 cycles. */
10140 else if (cost < 3
10141 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10142 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10143 cost = 3;
10144 /* The lsw register of a double-precision computation is ready one
10145 cycle earlier. */
10146 else if (reload_completed
10147 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10148 && (use_pat = single_set (insn))
10149 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10150 SET_SRC (use_pat)))
10151 cost -= 1;
10153 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10154 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10155 cost -= 1;
10157 else if (TARGET_SH4_300)
10159 /* Stores need their input register two cycles later. */
10160 if (dep_set && cost >= 1
10161 && ((type = get_attr_type (insn)) == TYPE_STORE
10162 || type == TYPE_PSTORE
10163 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10165 rtx set = single_set (insn);
10167 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10168 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10170 cost -= 2;
10171 /* But don't reduce the cost below 1 if the address depends
10172 on a side effect of dep_insn. */
10173 if (cost < 1
10174 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10175 cost = 1;
10180 /* An anti-dependence penalty of two applies if the first insn is a double
10181 precision fadd / fsub / fmul. */
10182 else if (!TARGET_SH4_300
10183 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10184 && recog_memoized (dep_insn) >= 0
10185 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10186 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10187 /* A lot of alleged anti-flow dependences are fake,
10188 so check this one is real. */
10189 && flow_dependent_p (dep_insn, insn))
10190 cost = 2;
10192 return cost;
10195 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10196 if DEP_INSN is anti-flow dependent on INSN. */
10197 static int
10198 flow_dependent_p (rtx insn, rtx dep_insn)
10200 rtx tmp = PATTERN (insn);
10202 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10203 return tmp == NULL_RTX;
10206 /* A helper function for flow_dependent_p called through note_stores. */
10207 static void
10208 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10210 rtx * pinsn = (rtx *) data;
10212 if (*pinsn && reg_referenced_p (x, *pinsn))
10213 *pinsn = NULL_RTX;
10216 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10217 'special function' patterns (type sfunc) that clobber pr, but that
10218 do not look like function calls to leaf_function_p. Hence we must
10219 do this extra check. */
10220 static int
10221 sh_pr_n_sets (void)
10223 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10226 /* Return where to allocate pseudo for a given hard register initial
10227 value. */
10228 static rtx
10229 sh_allocate_initial_value (rtx hard_reg)
10231 rtx x;
10233 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10235 if (current_function_is_leaf
10236 && ! sh_pr_n_sets ()
10237 && ! (TARGET_SHCOMPACT
10238 && ((crtl->args.info.call_cookie
10239 & ~ CALL_COOKIE_RET_TRAMP (1))
10240 || crtl->saves_all_registers)))
10241 x = hard_reg;
10242 else
10243 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10245 else
10246 x = NULL_RTX;
10248 return x;
10251 /* This function returns "2" to indicate dual issue for the SH4
10252 processor. To be used by the DFA pipeline description. */
10253 static int
10254 sh_issue_rate (void)
10256 if (TARGET_SUPERSCALAR)
10257 return 2;
10258 else
10259 return 1;
10262 /* Functions for ready queue reordering for sched1. */
10264 /* Get weight for mode for a set x. */
10265 static short
10266 find_set_regmode_weight (rtx x, enum machine_mode mode)
10268 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10269 return 1;
10270 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10272 if (REG_P (SET_DEST (x)))
10274 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10275 return 1;
10276 else
10277 return 0;
10279 return 1;
10281 return 0;
10284 /* Get regmode weight for insn. */
10285 static short
10286 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10288 short reg_weight = 0;
10289 rtx x;
10291 /* Increment weight for each register born here. */
10292 x = PATTERN (insn);
10293 reg_weight += find_set_regmode_weight (x, mode);
10294 if (GET_CODE (x) == PARALLEL)
10296 int j;
10297 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10299 x = XVECEXP (PATTERN (insn), 0, j);
10300 reg_weight += find_set_regmode_weight (x, mode);
10303 /* Decrement weight for each register that dies here. */
10304 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10306 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10308 rtx note = XEXP (x, 0);
10309 if (REG_P (note) && GET_MODE (note) == mode)
10310 reg_weight--;
10313 return reg_weight;
10316 /* Calculate regmode weights for all insns of a basic block. */
10317 static void
10318 find_regmode_weight (basic_block b, enum machine_mode mode)
10320 rtx insn, next_tail, head, tail;
10322 get_ebb_head_tail (b, b, &head, &tail);
10323 next_tail = NEXT_INSN (tail);
10325 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10327 /* Handle register life information. */
10328 if (!INSN_P (insn))
10329 continue;
10331 if (mode == SFmode)
10332 INSN_REGMODE_WEIGHT (insn, mode) =
10333 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10334 else if (mode == SImode)
10335 INSN_REGMODE_WEIGHT (insn, mode) =
10336 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10340 /* Comparison function for ready queue sorting. */
10341 static int
10342 rank_for_reorder (const void *x, const void *y)
10344 rtx tmp = *(const rtx *) y;
10345 rtx tmp2 = *(const rtx *) x;
10347 /* The insn in a schedule group should be issued the first. */
10348 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10349 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10351 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10352 minimizes instruction movement, thus minimizing sched's effect on
10353 register pressure. */
10354 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10357 /* Resort the array A in which only element at index N may be out of order. */
10358 static void
10359 swap_reorder (rtx *a, int n)
10361 rtx insn = a[n - 1];
10362 int i = n - 2;
10364 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10366 a[i + 1] = a[i];
10367 i -= 1;
10369 a[i + 1] = insn;
10372 #define SCHED_REORDER(READY, N_READY) \
10373 do \
10375 if ((N_READY) == 2) \
10376 swap_reorder (READY, N_READY); \
10377 else if ((N_READY) > 2) \
10378 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10380 while (0)
10382 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10383 macro. */
10384 static void
10385 ready_reorder (rtx *ready, int nready)
10387 SCHED_REORDER (ready, nready);
10390 /* Count life regions of r0 for a block. */
10391 static int
10392 find_r0_life_regions (basic_block b)
10394 rtx end, insn;
10395 rtx pset;
10396 rtx r0_reg;
10397 int live;
10398 int set;
10399 int death = 0;
10401 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10403 set = 1;
10404 live = 1;
10406 else
10408 set = 0;
10409 live = 0;
10412 insn = BB_HEAD (b);
10413 end = BB_END (b);
10414 r0_reg = gen_rtx_REG (SImode, R0_REG);
10415 while (1)
10417 if (INSN_P (insn))
10419 if (find_regno_note (insn, REG_DEAD, R0_REG))
10421 death++;
10422 live = 0;
10424 if (!live
10425 && (pset = single_set (insn))
10426 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10427 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10429 set++;
10430 live = 1;
10433 if (insn == end)
10434 break;
10435 insn = NEXT_INSN (insn);
10437 return set - death;
10440 /* Calculate regmode weights for all insns of all basic block. */
10441 static void
10442 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10443 int verbose ATTRIBUTE_UNUSED,
10444 int old_max_uid)
10446 basic_block b;
10448 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10449 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10450 r0_life_regions = 0;
10452 FOR_EACH_BB_REVERSE (b)
10454 find_regmode_weight (b, SImode);
10455 find_regmode_weight (b, SFmode);
10456 if (!reload_completed)
10457 r0_life_regions += find_r0_life_regions (b);
10460 CURR_REGMODE_PRESSURE (SImode) = 0;
10461 CURR_REGMODE_PRESSURE (SFmode) = 0;
10465 /* Cleanup. */
10466 static void
10467 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10468 int verbose ATTRIBUTE_UNUSED)
10470 if (regmode_weight[0])
10472 free (regmode_weight[0]);
10473 regmode_weight[0] = NULL;
10475 if (regmode_weight[1])
10477 free (regmode_weight[1]);
10478 regmode_weight[1] = NULL;
10482 /* The scalar modes supported differs from the default version in TImode
10483 for 32-bit SHMEDIA. */
10484 static bool
10485 sh_scalar_mode_supported_p (enum machine_mode mode)
10487 if (TARGET_SHMEDIA32 && mode == TImode)
10488 return false;
10490 return default_scalar_mode_supported_p (mode);
10493 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10494 keep count of register pressures on SImode and SFmode. */
10495 static int
10496 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10497 int sched_verbose ATTRIBUTE_UNUSED,
10498 rtx insn,
10499 int can_issue_more)
10501 if (GET_CODE (PATTERN (insn)) != USE
10502 && GET_CODE (PATTERN (insn)) != CLOBBER)
10503 cached_can_issue_more = can_issue_more - 1;
10504 else
10505 cached_can_issue_more = can_issue_more;
10507 if (reload_completed)
10508 return cached_can_issue_more;
10510 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10511 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10513 return cached_can_issue_more;
10516 static void
10517 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10518 int verbose ATTRIBUTE_UNUSED,
10519 int veclen ATTRIBUTE_UNUSED)
10521 CURR_REGMODE_PRESSURE (SImode) = 0;
10522 CURR_REGMODE_PRESSURE (SFmode) = 0;
10525 /* Some magic numbers. */
10526 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10527 functions that already have high pressure on r0. */
10528 #define R0_MAX_LIFE_REGIONS 2
10529 /* Register Pressure thresholds for SImode and SFmode registers. */
10530 #define SIMODE_MAX_WEIGHT 5
10531 #define SFMODE_MAX_WEIGHT 10
10533 /* Return true if the pressure is high for MODE. */
10534 static short
10535 high_pressure (enum machine_mode mode)
10537 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10538 functions that already have high pressure on r0. */
10539 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10540 return 1;
10542 if (mode == SFmode)
10543 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10544 else
10545 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10548 /* Reorder ready queue if register pressure is high. */
10549 static int
10550 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10551 int sched_verbose ATTRIBUTE_UNUSED,
10552 rtx *ready,
10553 int *n_readyp,
10554 int clock_var ATTRIBUTE_UNUSED)
10556 if (reload_completed)
10557 return sh_issue_rate ();
10559 if (high_pressure (SFmode) || high_pressure (SImode))
10561 ready_reorder (ready, *n_readyp);
10564 return sh_issue_rate ();
10567 /* Skip cycles if the current register pressure is high. */
10568 static int
10569 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10570 int sched_verbose ATTRIBUTE_UNUSED,
10571 rtx *ready ATTRIBUTE_UNUSED,
10572 int *n_readyp ATTRIBUTE_UNUSED,
10573 int clock_var ATTRIBUTE_UNUSED)
10575 if (reload_completed)
10576 return cached_can_issue_more;
10578 if (high_pressure(SFmode) || high_pressure (SImode))
10579 skip_cycles = 1;
10581 return cached_can_issue_more;
10584 /* Skip cycles without sorting the ready queue. This will move insn from
10585 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10586 queue by sh_reorder. */
10588 /* Generally, skipping these many cycles are sufficient for all insns to move
10589 from Q -> R. */
10590 #define MAX_SKIPS 8
10592 static int
10593 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10594 int sched_verbose ATTRIBUTE_UNUSED,
10595 rtx insn ATTRIBUTE_UNUSED,
10596 int last_clock_var,
10597 int clock_var,
10598 int *sort_p)
10600 if (reload_completed)
10601 return 0;
10603 if (skip_cycles)
10605 if ((clock_var - last_clock_var) < MAX_SKIPS)
10607 *sort_p = 0;
10608 return 1;
10610 /* If this is the last cycle we are skipping, allow reordering of R. */
10611 if ((clock_var - last_clock_var) == MAX_SKIPS)
10613 *sort_p = 1;
10614 return 1;
10618 skip_cycles = 0;
10620 return 0;
10623 /* SHmedia requires registers for branches, so we can't generate new
10624 branches past reload. */
10625 static bool
10626 sh_cannot_modify_jumps_p (void)
10628 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10631 static reg_class_t
10632 sh_target_reg_class (void)
10634 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10637 static bool
10638 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10640 HARD_REG_SET dummy;
10641 #if 0
10642 rtx insn;
10643 #endif
10645 if (! shmedia_space_reserved_for_target_registers)
10646 return 0;
10647 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10648 return 0;
10649 if (calc_live_regs (&dummy) >= 6 * 8)
10650 return 1;
10651 return 0;
10654 static bool
10655 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10657 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10661 On the SH1..SH4, the trampoline looks like
10662 2 0002 D202 mov.l l2,r2
10663 1 0000 D301 mov.l l1,r3
10664 3 0004 422B jmp @r2
10665 4 0006 0009 nop
10666 5 0008 00000000 l1: .long area
10667 6 000c 00000000 l2: .long function
10669 SH5 (compact) uses r1 instead of r3 for the static chain. */
10672 /* Emit RTL insns to initialize the variable parts of a trampoline.
10673 FNADDR is an RTX for the address of the function's pure code.
10674 CXT is an RTX for the static chain value for the function. */
10676 static void
10677 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10679 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10680 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10682 if (TARGET_SHMEDIA64)
10684 rtx tramp_templ;
10685 int fixed_len;
10687 rtx movi1 = GEN_INT (0xcc000010);
10688 rtx shori1 = GEN_INT (0xc8000010);
10689 rtx src, dst;
10691 /* The following trampoline works within a +- 128 KB range for cxt:
10692 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10693 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10694 gettr tr1,r1; blink tr0,r63 */
10695 /* Address rounding makes it hard to compute the exact bounds of the
10696 offset for this trampoline, but we have a rather generous offset
10697 range, so frame_offset should do fine as an upper bound. */
10698 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10700 /* ??? could optimize this trampoline initialization
10701 by writing DImode words with two insns each. */
10702 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10703 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10704 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10705 insn = gen_rtx_AND (DImode, insn, mask);
10706 /* Or in ptb/u .,tr1 pattern */
10707 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10708 insn = force_operand (insn, NULL_RTX);
10709 insn = gen_lowpart (SImode, insn);
10710 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10711 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10712 insn = gen_rtx_AND (DImode, insn, mask);
10713 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10714 insn = gen_lowpart (SImode, insn);
10715 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10716 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10717 insn = gen_rtx_AND (DImode, insn, mask);
10718 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10719 insn = gen_lowpart (SImode, insn);
10720 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10721 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10722 insn = gen_rtx_AND (DImode, insn, mask);
10723 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10724 insn = gen_lowpart (SImode, insn);
10725 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10726 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10727 insn = gen_rtx_AND (DImode, insn, mask);
10728 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10729 insn = gen_lowpart (SImode, insn);
10730 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10731 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10732 GEN_INT (0x6bf10600));
10733 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10734 GEN_INT (0x4415fc10));
10735 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10736 GEN_INT (0x4401fff0));
10737 emit_insn (gen_ic_invalidate_line (tramp));
10738 return;
10740 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10741 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10743 tramp_templ = gen_datalabel_ref (tramp_templ);
10744 dst = tramp_mem;
10745 src = gen_const_mem (BLKmode, tramp_templ);
10746 set_mem_align (dst, 256);
10747 set_mem_align (src, 64);
10748 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10750 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10751 emit_move_insn (adjust_address (tramp_mem, Pmode,
10752 fixed_len + GET_MODE_SIZE (Pmode)),
10753 cxt);
10754 emit_insn (gen_ic_invalidate_line (tramp));
10755 return;
10757 else if (TARGET_SHMEDIA)
10759 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10760 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10761 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10762 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10763 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10764 rotated 10 right, and higher 16 bit of every 32 selected. */
10765 rtx movishori
10766 = force_reg (V2HImode, (simplify_gen_subreg
10767 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10768 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10769 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10771 fnaddr = force_reg (SImode, fnaddr);
10772 cxt = force_reg (SImode, cxt);
10773 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10774 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10775 movishori));
10776 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10777 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10778 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10779 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10780 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10781 gen_rtx_SUBREG (V2HImode, cxt, 0),
10782 movishori));
10783 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10784 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10785 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10786 if (TARGET_LITTLE_ENDIAN)
10788 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10789 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10791 else
10793 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10794 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10796 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10797 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10798 emit_insn (gen_ic_invalidate_line (tramp));
10799 return;
10801 else if (TARGET_SHCOMPACT)
10803 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10804 return;
10806 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10807 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10808 SImode));
10809 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10810 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10811 SImode));
10812 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10813 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10814 if (TARGET_HARVARD)
10816 if (!TARGET_INLINE_IC_INVALIDATE
10817 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10818 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10819 FUNCTION_ORDINARY),
10820 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10821 else
10822 emit_insn (gen_ic_invalidate_line (tramp));
10826 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10828 static rtx
10829 sh_trampoline_adjust_address (rtx tramp)
10831 if (TARGET_SHMEDIA)
10832 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10833 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10834 return tramp;
10837 /* FIXME: This is overly conservative. A SHcompact function that
10838 receives arguments ``by reference'' will have them stored in its
10839 own stack frame, so it must not pass pointers or references to
10840 these arguments to other functions by means of sibling calls. */
10841 /* If PIC, we cannot make sibling calls to global functions
10842 because the PLT requires r12 to be live. */
10843 static bool
10844 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10846 return (1
10847 && (! TARGET_SHCOMPACT
10848 || crtl->args.info.stack_regs == 0)
10849 && ! sh_cfun_interrupt_handler_p ()
10850 && (! flag_pic
10851 || (decl && ! TREE_PUBLIC (decl))
10852 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10855 /* Machine specific built-in functions. */
10857 struct builtin_description
10859 const enum insn_code icode;
10860 const char *const name;
10861 int signature;
10862 tree fndecl;
10865 /* describe number and signedness of arguments; arg[0] == result
10866 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10867 /* 9: 64-bit pointer, 10: 32-bit pointer */
10868 static const char signature_args[][4] =
10870 #define SH_BLTIN_V2SI2 0
10871 { 4, 4 },
10872 #define SH_BLTIN_V4HI2 1
10873 { 4, 4 },
10874 #define SH_BLTIN_V2SI3 2
10875 { 4, 4, 4 },
10876 #define SH_BLTIN_V4HI3 3
10877 { 4, 4, 4 },
10878 #define SH_BLTIN_V8QI3 4
10879 { 4, 4, 4 },
10880 #define SH_BLTIN_MAC_HISI 5
10881 { 1, 4, 4, 1 },
10882 #define SH_BLTIN_SH_HI 6
10883 { 4, 4, 1 },
10884 #define SH_BLTIN_SH_SI 7
10885 { 4, 4, 1 },
10886 #define SH_BLTIN_V4HI2V2SI 8
10887 { 4, 4, 4 },
10888 #define SH_BLTIN_V4HI2V8QI 9
10889 { 4, 4, 4 },
10890 #define SH_BLTIN_SISF 10
10891 { 4, 2 },
10892 #define SH_BLTIN_LDUA_L 11
10893 { 2, 10 },
10894 #define SH_BLTIN_LDUA_Q 12
10895 { 1, 10 },
10896 #define SH_BLTIN_STUA_L 13
10897 { 0, 10, 2 },
10898 #define SH_BLTIN_STUA_Q 14
10899 { 0, 10, 1 },
10900 #define SH_BLTIN_LDUA_L64 15
10901 { 2, 9 },
10902 #define SH_BLTIN_LDUA_Q64 16
10903 { 1, 9 },
10904 #define SH_BLTIN_STUA_L64 17
10905 { 0, 9, 2 },
10906 #define SH_BLTIN_STUA_Q64 18
10907 { 0, 9, 1 },
10908 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10909 #define SH_BLTIN_2 19
10910 #define SH_BLTIN_SU 19
10911 { 1, 2 },
10912 #define SH_BLTIN_3 20
10913 #define SH_BLTIN_SUS 20
10914 { 2, 2, 1 },
10915 #define SH_BLTIN_PSSV 21
10916 { 0, 8, 2, 2 },
10917 #define SH_BLTIN_XXUU 22
10918 #define SH_BLTIN_UUUU 22
10919 { 1, 1, 1, 1 },
10920 #define SH_BLTIN_PV 23
10921 { 0, 8 },
10923 /* mcmv: operands considered unsigned. */
10924 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10925 /* mperm: control value considered unsigned int. */
10926 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10927 /* mshards_q: returns signed short. */
10928 /* nsb: takes long long arg, returns unsigned char. */
10929 static struct builtin_description bdesc[] =
10931 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10932 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10933 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10934 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10935 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10936 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10937 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10938 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10939 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10940 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10941 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10942 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10943 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10944 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10945 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10946 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10947 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10948 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10949 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10950 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10951 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10952 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10953 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10954 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10955 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10956 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10957 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10958 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10959 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10960 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10961 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10962 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10963 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10964 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10965 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10966 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10967 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10968 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10969 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10970 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10971 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10972 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10973 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10974 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10975 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10976 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10977 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10978 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10979 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10980 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10981 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10982 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10983 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10984 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10985 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10986 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10987 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10988 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10989 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10990 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10991 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10992 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10993 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10994 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10995 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10996 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10997 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10998 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10999 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11000 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11001 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11002 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11003 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11004 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11005 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11006 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11007 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11008 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11009 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11010 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11011 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11012 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11013 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11014 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11017 static void
11018 sh_media_init_builtins (void)
11020 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11021 struct builtin_description *d;
11023 memset (shared, 0, sizeof shared);
11024 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11026 tree type, arg_type = 0;
11027 int signature = d->signature;
11028 int i;
11030 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11031 type = shared[signature];
11032 else
11034 int has_result = signature_args[signature][0] != 0;
11036 if ((signature_args[signature][1] & 8)
11037 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11038 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11039 continue;
11040 if (! TARGET_FPU_ANY
11041 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11042 continue;
11043 type = void_list_node;
11044 for (i = 3; ; i--)
11046 int arg = signature_args[signature][i];
11047 int opno = i - 1 + has_result;
11049 if (arg & 8)
11050 arg_type = ptr_type_node;
11051 else if (arg)
11052 arg_type = (*lang_hooks.types.type_for_mode)
11053 (insn_data[d->icode].operand[opno].mode,
11054 (arg & 1));
11055 else if (i)
11056 continue;
11057 else
11058 arg_type = void_type_node;
11059 if (i == 0)
11060 break;
11061 type = tree_cons (NULL_TREE, arg_type, type);
11063 type = build_function_type (arg_type, type);
11064 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11065 shared[signature] = type;
11067 d->fndecl =
11068 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11069 NULL, NULL_TREE);
11073 /* Returns the shmedia builtin decl for CODE. */
11075 static tree
11076 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11078 if (code >= ARRAY_SIZE (bdesc))
11079 return error_mark_node;
11081 return bdesc[code].fndecl;
11084 /* Implements target hook vector_mode_supported_p. */
11085 bool
11086 sh_vector_mode_supported_p (enum machine_mode mode)
11088 if (TARGET_FPU_ANY
11089 && ((mode == V2SFmode)
11090 || (mode == V4SFmode)
11091 || (mode == V16SFmode)))
11092 return true;
11094 else if (TARGET_SHMEDIA
11095 && ((mode == V8QImode)
11096 || (mode == V2HImode)
11097 || (mode == V4HImode)
11098 || (mode == V2SImode)))
11099 return true;
11101 return false;
11104 bool
11105 sh_frame_pointer_required (void)
11107 /* If needed override this in other tm.h files to cope with various OS
11108 lossage requiring a frame pointer. */
11109 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11110 return true;
11112 if (crtl->profile)
11113 return true;
11115 return false;
11118 /* Implements target hook dwarf_calling_convention. Return an enum
11119 of dwarf_calling_convention. */
11121 sh_dwarf_calling_convention (const_tree func)
11123 if (sh_attr_renesas_p (func))
11124 return DW_CC_GNU_renesas_sh;
11126 return DW_CC_normal;
11129 static void
11130 sh_init_builtins (void)
11132 if (TARGET_SHMEDIA)
11133 sh_media_init_builtins ();
11136 /* Returns the sh builtin decl for CODE. */
11138 static tree
11139 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11141 if (TARGET_SHMEDIA)
11142 return sh_media_builtin_decl (code, initialize_p);
11144 return error_mark_node;
11147 /* Expand an expression EXP that calls a built-in function,
11148 with result going to TARGET if that's convenient
11149 (and in mode MODE if that's convenient).
11150 SUBTARGET may be used as the target for computing one of EXP's operands.
11151 IGNORE is nonzero if the value is to be ignored. */
11153 static rtx
11154 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11155 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11157 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11158 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11159 const struct builtin_description *d = &bdesc[fcode];
11160 enum insn_code icode = d->icode;
11161 int signature = d->signature;
11162 enum machine_mode tmode = VOIDmode;
11163 int nop = 0, i;
11164 rtx op[4];
11165 rtx pat = 0;
11167 if (signature_args[signature][0])
11169 if (ignore)
11170 return 0;
11172 tmode = insn_data[icode].operand[0].mode;
11173 if (! target
11174 || GET_MODE (target) != tmode
11175 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11176 target = gen_reg_rtx (tmode);
11177 op[nop++] = target;
11179 else
11180 target = 0;
11182 for (i = 1; i <= 3; i++, nop++)
11184 tree arg;
11185 enum machine_mode opmode, argmode;
11186 tree optype;
11188 if (! signature_args[signature][i])
11189 break;
11190 arg = CALL_EXPR_ARG (exp, i - 1);
11191 if (arg == error_mark_node)
11192 return const0_rtx;
11193 if (signature_args[signature][i] & 8)
11195 opmode = ptr_mode;
11196 optype = ptr_type_node;
11198 else
11200 opmode = insn_data[icode].operand[nop].mode;
11201 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11203 argmode = TYPE_MODE (TREE_TYPE (arg));
11204 if (argmode != opmode)
11205 arg = build1 (NOP_EXPR, optype, arg);
11206 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11207 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11208 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11211 switch (nop)
11213 case 1:
11214 pat = (*insn_data[d->icode].genfun) (op[0]);
11215 break;
11216 case 2:
11217 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11218 break;
11219 case 3:
11220 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11221 break;
11222 case 4:
11223 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11224 break;
11225 default:
11226 gcc_unreachable ();
11228 if (! pat)
11229 return 0;
11230 emit_insn (pat);
11231 return target;
11234 void
11235 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11237 rtx sel0 = const0_rtx;
11238 rtx sel1 = const1_rtx;
11239 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11240 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11242 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11243 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11246 void
11247 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11249 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11251 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11252 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11255 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11256 We can allow any mode in any general register. The special registers
11257 only allow SImode. Don't allow any mode in the PR.
11259 We cannot hold DCmode values in the XD registers because alter_reg
11260 handles subregs of them incorrectly. We could work around this by
11261 spacing the XD registers like the DR registers, but this would require
11262 additional memory in every compilation to hold larger register vectors.
11263 We could hold SFmode / SCmode values in XD registers, but that
11264 would require a tertiary reload when reloading from / to memory,
11265 and a secondary reload to reload from / to general regs; that
11266 seems to be a loosing proposition.
11268 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11269 it won't be ferried through GP registers first. */
11271 bool
11272 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11274 if (SPECIAL_REGISTER_P (regno))
11275 return mode == SImode;
11277 if (regno == FPUL_REG)
11278 return (mode == SImode || mode == SFmode);
11280 if (FP_REGISTER_P (regno) && mode == SFmode)
11281 return true;
11283 if (mode == V2SFmode)
11285 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11286 || GENERAL_REGISTER_P (regno)))
11287 return true;
11288 else
11289 return false;
11292 if (mode == V4SFmode)
11294 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11295 || GENERAL_REGISTER_P (regno))
11296 return true;
11297 else
11298 return false;
11301 if (mode == V16SFmode)
11303 if (TARGET_SHMEDIA)
11305 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11306 return true;
11307 else
11308 return false;
11310 else
11311 return regno == FIRST_XD_REG;
11314 if (FP_REGISTER_P (regno))
11316 if (mode == SFmode
11317 || mode == SImode
11318 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11319 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11320 || mode == DCmode
11321 || (TARGET_SHMEDIA
11322 && (mode == DFmode || mode == DImode
11323 || mode == V2SFmode || mode == TImode)))
11324 && ((regno - FIRST_FP_REG) & 1) == 0)
11325 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11326 && ((regno - FIRST_FP_REG) & 3) == 0))
11327 return true;
11328 else
11329 return false;
11332 if (XD_REGISTER_P (regno))
11333 return mode == DFmode;
11335 if (TARGET_REGISTER_P (regno))
11336 return (mode == DImode || mode == SImode || mode == PDImode);
11338 if (regno == PR_REG)
11339 return mode == SImode;
11341 if (regno == FPSCR_REG)
11342 return mode == PSImode;
11344 /* FIXME. This works around PR target/37633 for -O0. */
11345 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11347 unsigned int n = GET_MODE_SIZE (mode) / 8;
11349 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11350 && regno <= FIRST_GENERAL_REG + 14)
11351 return false;
11354 return true;
11357 /* Return the class of registers for which a mode change from FROM to TO
11358 is invalid. */
11359 bool
11360 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11361 enum reg_class rclass)
11363 /* We want to enable the use of SUBREGs as a means to
11364 VEC_SELECT a single element of a vector. */
11365 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11366 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11368 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11370 if (TARGET_LITTLE_ENDIAN)
11372 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11373 return reg_classes_intersect_p (DF_REGS, rclass);
11375 else
11377 if (GET_MODE_SIZE (from) < 8)
11378 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11381 return 0;
11384 /* Return true if registers in machine mode MODE will likely be
11385 allocated to registers in small register classes. */
11387 bool
11388 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11390 return (! TARGET_SHMEDIA);
11393 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11394 that label is used. */
11396 void
11397 sh_mark_label (rtx address, int nuses)
11399 if (GOTOFF_P (address))
11401 /* Extract the label or symbol. */
11402 address = XEXP (address, 0);
11403 if (GET_CODE (address) == PLUS)
11404 address = XEXP (address, 0);
11405 address = XVECEXP (address, 0, 0);
11407 if (GET_CODE (address) == LABEL_REF
11408 && LABEL_P (XEXP (address, 0)))
11409 LABEL_NUSES (XEXP (address, 0)) += nuses;
11412 /* Compute extra cost of moving data between one register class
11413 and another. */
11415 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11416 uses this information. Hence, the general register <-> floating point
11417 register information here is not used for SFmode. */
11419 static int
11420 sh_register_move_cost (enum machine_mode mode,
11421 reg_class_t srcclass, reg_class_t dstclass)
11423 if (dstclass == T_REGS || dstclass == PR_REGS)
11424 return 10;
11426 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11427 return 4;
11429 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11430 && REGCLASS_HAS_FP_REG (srcclass)
11431 && REGCLASS_HAS_FP_REG (dstclass))
11432 return 4;
11434 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11435 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11437 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11438 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11439 return 9;
11441 if ((REGCLASS_HAS_FP_REG (dstclass)
11442 && REGCLASS_HAS_GENERAL_REG (srcclass))
11443 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11444 && REGCLASS_HAS_FP_REG (srcclass)))
11445 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11446 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11448 if ((dstclass == FPUL_REGS
11449 && REGCLASS_HAS_GENERAL_REG (srcclass))
11450 || (srcclass == FPUL_REGS
11451 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11452 return 5;
11454 if ((dstclass == FPUL_REGS
11455 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11456 || (srcclass == FPUL_REGS
11457 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11458 return 7;
11460 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11461 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11462 return 20;
11464 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11465 if (TARGET_SHMEDIA
11466 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11468 if (sh_gettrcost >= 0)
11469 return sh_gettrcost;
11470 else if (!TARGET_PT_FIXED)
11471 return 100;
11474 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11475 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11476 return 4;
11478 if (TARGET_SHMEDIA
11479 || (TARGET_FMOVD
11480 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11481 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11482 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11484 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11487 static rtx emit_load_ptr (rtx, rtx);
11489 static rtx
11490 emit_load_ptr (rtx reg, rtx addr)
11492 rtx mem = gen_const_mem (ptr_mode, addr);
11494 if (Pmode != ptr_mode)
11495 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11496 return emit_move_insn (reg, mem);
11499 static void
11500 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11501 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11502 tree function)
11504 CUMULATIVE_ARGS cum;
11505 int structure_value_byref = 0;
11506 rtx this_rtx, this_value, sibcall, insns, funexp;
11507 tree funtype = TREE_TYPE (function);
11508 int simple_add = CONST_OK_FOR_ADD (delta);
11509 int did_load = 0;
11510 rtx scratch0, scratch1, scratch2;
11511 unsigned i;
11513 reload_completed = 1;
11514 epilogue_completed = 1;
11515 current_function_uses_only_leaf_regs = 1;
11517 emit_note (NOTE_INSN_PROLOGUE_END);
11519 /* Find the "this" pointer. We have such a wide range of ABIs for the
11520 SH that it's best to do this completely machine independently.
11521 "this" is passed as first argument, unless a structure return pointer
11522 comes first, in which case "this" comes second. */
11523 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11524 #ifndef PCC_STATIC_STRUCT_RETURN
11525 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11526 structure_value_byref = 1;
11527 #endif /* not PCC_STATIC_STRUCT_RETURN */
11528 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11530 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11532 sh_function_arg_advance (&cum, Pmode, ptype, true);
11534 this_rtx = sh_function_arg (&cum, Pmode, ptr_type_node, true);
11536 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11537 static chain pointer (even if you can't have nested virtual functions
11538 right now, someone might implement them sometime), and the rest of the
11539 registers are used for argument passing, are callee-saved, or reserved. */
11540 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11541 -ffixed-reg has been used. */
11542 if (! call_used_regs[0] || fixed_regs[0])
11543 error ("r0 needs to be available as a call-clobbered register");
11544 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11545 if (! TARGET_SH5)
11547 if (call_used_regs[1] && ! fixed_regs[1])
11548 scratch1 = gen_rtx_REG (ptr_mode, 1);
11549 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11550 pointing where to return struct values. */
11551 if (call_used_regs[3] && ! fixed_regs[3])
11552 scratch2 = gen_rtx_REG (Pmode, 3);
11554 else if (TARGET_SHMEDIA)
11556 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11557 if (i != REGNO (scratch0) &&
11558 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11560 scratch1 = gen_rtx_REG (ptr_mode, i);
11561 break;
11563 if (scratch1 == scratch0)
11564 error ("Need a second call-clobbered general purpose register");
11565 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11566 if (call_used_regs[i] && ! fixed_regs[i])
11568 scratch2 = gen_rtx_REG (Pmode, i);
11569 break;
11571 if (scratch2 == scratch0)
11572 error ("Need a call-clobbered target register");
11575 this_value = plus_constant (this_rtx, delta);
11576 if (vcall_offset
11577 && (simple_add || scratch0 != scratch1)
11578 && strict_memory_address_p (ptr_mode, this_value))
11580 emit_load_ptr (scratch0, this_value);
11581 did_load = 1;
11584 if (!delta)
11585 ; /* Do nothing. */
11586 else if (simple_add)
11587 emit_move_insn (this_rtx, this_value);
11588 else
11590 emit_move_insn (scratch1, GEN_INT (delta));
11591 emit_insn (gen_add2_insn (this_rtx, scratch1));
11594 if (vcall_offset)
11596 rtx offset_addr;
11598 if (!did_load)
11599 emit_load_ptr (scratch0, this_rtx);
11601 offset_addr = plus_constant (scratch0, vcall_offset);
11602 if (strict_memory_address_p (ptr_mode, offset_addr))
11603 ; /* Do nothing. */
11604 else if (! TARGET_SH5 && scratch0 != scratch1)
11606 /* scratch0 != scratch1, and we have indexed loads. Get better
11607 schedule by loading the offset into r1 and using an indexed
11608 load - then the load of r1 can issue before the load from
11609 (this_rtx + delta) finishes. */
11610 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11611 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11613 else if (CONST_OK_FOR_ADD (vcall_offset))
11615 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11616 offset_addr = scratch0;
11618 else if (scratch0 != scratch1)
11620 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11621 emit_insn (gen_add2_insn (scratch0, scratch1));
11622 offset_addr = scratch0;
11624 else
11625 gcc_unreachable (); /* FIXME */
11626 emit_load_ptr (scratch0, offset_addr);
11628 if (Pmode != ptr_mode)
11629 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11630 emit_insn (gen_add2_insn (this_rtx, scratch0));
11633 /* Generate a tail call to the target function. */
11634 if (! TREE_USED (function))
11636 assemble_external (function);
11637 TREE_USED (function) = 1;
11639 funexp = XEXP (DECL_RTL (function), 0);
11640 /* If the function is overridden, so is the thunk, hence we don't
11641 need GOT addressing even if this is a public symbol. */
11642 #if 0
11643 if (TARGET_SH1 && ! flag_weak)
11644 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11645 else
11646 #endif
11647 if (TARGET_SH2 && flag_pic)
11649 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11650 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11652 else
11654 if (TARGET_SHMEDIA && flag_pic)
11656 funexp = gen_sym2PIC (funexp);
11657 PUT_MODE (funexp, Pmode);
11659 emit_move_insn (scratch2, funexp);
11660 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11661 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11663 sibcall = emit_call_insn (sibcall);
11664 SIBLING_CALL_P (sibcall) = 1;
11665 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11666 emit_barrier ();
11668 /* Run just enough of rest_of_compilation to do scheduling and get
11669 the insns emitted. Note that use_thunk calls
11670 assemble_start_function and assemble_end_function. */
11672 insn_locators_alloc ();
11673 insns = get_insns ();
11675 if (optimize > 0)
11677 if (! cfun->cfg)
11678 init_flow (cfun);
11679 split_all_insns_noflow ();
11682 sh_reorg ();
11684 if (optimize > 0 && flag_delayed_branch)
11685 dbr_schedule (insns);
11687 shorten_branches (insns);
11688 final_start_function (insns, file, 1);
11689 final (insns, file, 1);
11690 final_end_function ();
11692 reload_completed = 0;
11693 epilogue_completed = 0;
11697 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11699 rtx sym;
11701 /* If this is not an ordinary function, the name usually comes from a
11702 string literal or an sprintf buffer. Make sure we use the same
11703 string consistently, so that cse will be able to unify address loads. */
11704 if (kind != FUNCTION_ORDINARY)
11705 name = IDENTIFIER_POINTER (get_identifier (name));
11706 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11707 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11708 if (flag_pic)
11709 switch (kind)
11711 case FUNCTION_ORDINARY:
11712 break;
11713 case SFUNC_GOT:
11715 rtx reg = target ? target : gen_reg_rtx (Pmode);
11717 emit_insn (gen_symGOT2reg (reg, sym));
11718 sym = reg;
11719 break;
11721 case SFUNC_STATIC:
11723 /* ??? To allow cse to work, we use GOTOFF relocations.
11724 we could add combiner patterns to transform this into
11725 straight pc-relative calls with sym2PIC / bsrf when
11726 label load and function call are still 1:1 and in the
11727 same basic block during combine. */
11728 rtx reg = target ? target : gen_reg_rtx (Pmode);
11730 emit_insn (gen_symGOTOFF2reg (reg, sym));
11731 sym = reg;
11732 break;
11735 if (target && sym != target)
11737 emit_move_insn (target, sym);
11738 return target;
11740 return sym;
11743 /* Find the number of a general purpose register in S. */
11744 static int
11745 scavenge_reg (HARD_REG_SET *s)
11747 int r;
11748 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11749 if (TEST_HARD_REG_BIT (*s, r))
11750 return r;
11751 return -1;
11755 sh_get_pr_initial_val (void)
11757 rtx val;
11759 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11760 PR register on SHcompact, because it might be clobbered by the prologue.
11761 We check first if that is known to be the case. */
11762 if (TARGET_SHCOMPACT
11763 && ((crtl->args.info.call_cookie
11764 & ~ CALL_COOKIE_RET_TRAMP (1))
11765 || crtl->saves_all_registers))
11766 return gen_frame_mem (SImode, return_address_pointer_rtx);
11768 /* If we haven't finished rtl generation, there might be a nonlocal label
11769 that we haven't seen yet.
11770 ??? get_hard_reg_initial_val fails if it is called after register
11771 allocation has started, unless it has been called before for the
11772 same register. And even then, we end in trouble if we didn't use
11773 the register in the same basic block before. So call
11774 get_hard_reg_initial_val now and wrap it in an unspec if we might
11775 need to replace it. */
11776 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11777 combine can put the pseudo returned by get_hard_reg_initial_val into
11778 instructions that need a general purpose registers, which will fail to
11779 be recognized when the pseudo becomes allocated to PR. */
11781 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11782 if (TARGET_SH1)
11783 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11784 return val;
11788 sh_expand_t_scc (rtx operands[])
11790 enum rtx_code code = GET_CODE (operands[1]);
11791 rtx target = operands[0];
11792 rtx op0 = operands[2];
11793 rtx op1 = operands[3];
11794 rtx result = target;
11795 HOST_WIDE_INT val;
11797 if (!REG_P (op0) || REGNO (op0) != T_REG
11798 || !CONST_INT_P (op1))
11799 return 0;
11800 if (!REG_P (result))
11801 result = gen_reg_rtx (SImode);
11802 val = INTVAL (op1);
11803 if ((code == EQ && val == 1) || (code == NE && val == 0))
11804 emit_insn (gen_movt (result));
11805 else if (TARGET_SH2A && ((code == EQ && val == 0)
11806 || (code == NE && val == 1)))
11807 emit_insn (gen_xorsi3_movrt (result));
11808 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11810 emit_clobber (result);
11811 emit_insn (gen_subc (result, result, result));
11812 emit_insn (gen_addsi3 (result, result, const1_rtx));
11814 else if (code == EQ || code == NE)
11815 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11816 else
11817 return 0;
11818 if (result != target)
11819 emit_move_insn (target, result);
11820 return 1;
11823 /* INSN is an sfunc; return the rtx that describes the address used. */
11824 static rtx
11825 extract_sfunc_addr (rtx insn)
11827 rtx pattern, part = NULL_RTX;
11828 int len, i;
11830 pattern = PATTERN (insn);
11831 len = XVECLEN (pattern, 0);
11832 for (i = 0; i < len; i++)
11834 part = XVECEXP (pattern, 0, i);
11835 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11836 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11837 return XEXP (part, 0);
11839 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11840 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11843 /* Verify that the register in use_sfunc_addr still agrees with the address
11844 used in the sfunc. This prevents fill_slots_from_thread from changing
11845 use_sfunc_addr.
11846 INSN is the use_sfunc_addr instruction, and REG is the register it
11847 guards. */
11849 check_use_sfunc_addr (rtx insn, rtx reg)
11851 /* Search for the sfunc. It should really come right after INSN. */
11852 while ((insn = NEXT_INSN (insn)))
11854 if (LABEL_P (insn) || JUMP_P (insn))
11855 break;
11856 if (! INSN_P (insn))
11857 continue;
11859 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11860 insn = XVECEXP (PATTERN (insn), 0, 0);
11861 if (GET_CODE (PATTERN (insn)) != PARALLEL
11862 || get_attr_type (insn) != TYPE_SFUNC)
11863 continue;
11864 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11866 gcc_unreachable ();
11869 /* This function returns a constant rtx that represents pi / 2**15 in
11870 SFmode. it's used to scale SFmode angles, in radians, to a
11871 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11872 maps to 0x10000). */
11874 static GTY(()) rtx sh_fsca_sf2int_rtx;
11877 sh_fsca_sf2int (void)
11879 if (! sh_fsca_sf2int_rtx)
11881 REAL_VALUE_TYPE rv;
11883 real_from_string (&rv, "10430.378350470453");
11884 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11887 return sh_fsca_sf2int_rtx;
11890 /* This function returns a constant rtx that represents pi / 2**15 in
11891 DFmode. it's used to scale DFmode angles, in radians, to a
11892 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11893 maps to 0x10000). */
11895 static GTY(()) rtx sh_fsca_df2int_rtx;
11898 sh_fsca_df2int (void)
11900 if (! sh_fsca_df2int_rtx)
11902 REAL_VALUE_TYPE rv;
11904 real_from_string (&rv, "10430.378350470453");
11905 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11908 return sh_fsca_df2int_rtx;
11911 /* This function returns a constant rtx that represents 2**15 / pi in
11912 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11913 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11914 2*pi). */
11916 static GTY(()) rtx sh_fsca_int2sf_rtx;
11919 sh_fsca_int2sf (void)
11921 if (! sh_fsca_int2sf_rtx)
11923 REAL_VALUE_TYPE rv;
11925 real_from_string (&rv, "9.587379924285257e-5");
11926 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11929 return sh_fsca_int2sf_rtx;
11932 /* Initialize the CUMULATIVE_ARGS structure. */
11934 void
11935 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11936 tree fntype,
11937 rtx libname ATTRIBUTE_UNUSED,
11938 tree fndecl,
11939 signed int n_named_args,
11940 enum machine_mode mode)
11942 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11943 pcum->free_single_fp_reg = 0;
11944 pcum->stack_regs = 0;
11945 pcum->byref_regs = 0;
11946 pcum->byref = 0;
11947 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11949 /* XXX - Should we check TARGET_HITACHI here ??? */
11950 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11952 if (fntype)
11954 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11955 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11956 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11957 pcum->arg_count [(int) SH_ARG_INT]
11958 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11960 pcum->call_cookie
11961 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11962 && pcum->arg_count [(int) SH_ARG_INT] == 0
11963 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11964 ? int_size_in_bytes (TREE_TYPE (fntype))
11965 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11966 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11967 == FIRST_RET_REG));
11969 else
11971 pcum->arg_count [(int) SH_ARG_INT] = 0;
11972 pcum->prototype_p = FALSE;
11973 if (mode != VOIDmode)
11975 pcum->call_cookie =
11976 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11977 && GET_MODE_SIZE (mode) > 4
11978 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11980 /* If the default ABI is the Renesas ABI then all library
11981 calls must assume that the library will be using the
11982 Renesas ABI. So if the function would return its result
11983 in memory then we must force the address of this memory
11984 block onto the stack. Ideally we would like to call
11985 targetm.calls.return_in_memory() here but we do not have
11986 the TYPE or the FNDECL available so we synthesize the
11987 contents of that function as best we can. */
11988 pcum->force_mem =
11989 (TARGET_DEFAULT & MASK_HITACHI)
11990 && (mode == BLKmode
11991 || (GET_MODE_SIZE (mode) > 4
11992 && !(mode == DFmode
11993 && TARGET_FPU_DOUBLE)));
11995 else
11997 pcum->call_cookie = 0;
11998 pcum->force_mem = FALSE;
12003 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12004 not enter into CONST_DOUBLE for the replace.
12006 Note that copying is not done so X must not be shared unless all copies
12007 are to be modified.
12009 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12010 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12011 replacements[n*2+1] - and that we take mode changes into account.
12013 If a replacement is ambiguous, return NULL_RTX.
12015 If MODIFY is zero, don't modify any rtl in place,
12016 just return zero or nonzero for failure / success. */
12019 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12021 int i, j;
12022 const char *fmt;
12024 /* The following prevents loops occurrence when we change MEM in
12025 CONST_DOUBLE onto the same CONST_DOUBLE. */
12026 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
12027 return x;
12029 for (i = n_replacements - 1; i >= 0 ; i--)
12030 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12031 return replacements[i*2+1];
12033 /* Allow this function to make replacements in EXPR_LISTs. */
12034 if (x == 0)
12035 return 0;
12037 if (GET_CODE (x) == SUBREG)
12039 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12040 n_replacements, modify);
12042 if (CONST_INT_P (new_rtx))
12044 x = simplify_subreg (GET_MODE (x), new_rtx,
12045 GET_MODE (SUBREG_REG (x)),
12046 SUBREG_BYTE (x));
12047 if (! x)
12048 abort ();
12050 else if (modify)
12051 SUBREG_REG (x) = new_rtx;
12053 return x;
12055 else if (REG_P (x))
12057 unsigned regno = REGNO (x);
12058 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12059 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12060 rtx result = NULL_RTX;
12062 for (i = n_replacements - 1; i >= 0; i--)
12064 rtx from = replacements[i*2];
12065 rtx to = replacements[i*2+1];
12066 unsigned from_regno, from_nregs, to_regno, new_regno;
12068 if (!REG_P (from))
12069 continue;
12070 from_regno = REGNO (from);
12071 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12072 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12073 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12075 if (regno < from_regno
12076 || regno + nregs > from_regno + nregs
12077 || !REG_P (to)
12078 || result)
12079 return NULL_RTX;
12080 to_regno = REGNO (to);
12081 if (to_regno < FIRST_PSEUDO_REGISTER)
12083 new_regno = regno + to_regno - from_regno;
12084 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12085 != nregs)
12086 return NULL_RTX;
12087 result = gen_rtx_REG (GET_MODE (x), new_regno);
12089 else if (GET_MODE (x) <= GET_MODE (to))
12090 result = gen_lowpart_common (GET_MODE (x), to);
12091 else
12092 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12095 return result ? result : x;
12097 else if (GET_CODE (x) == ZERO_EXTEND)
12099 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12100 n_replacements, modify);
12102 if (CONST_INT_P (new_rtx))
12104 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12105 new_rtx, GET_MODE (XEXP (x, 0)));
12106 if (! x)
12107 abort ();
12109 else if (modify)
12110 XEXP (x, 0) = new_rtx;
12112 return x;
12115 fmt = GET_RTX_FORMAT (GET_CODE (x));
12116 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12118 rtx new_rtx;
12120 if (fmt[i] == 'e')
12122 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12123 n_replacements, modify);
12124 if (!new_rtx)
12125 return NULL_RTX;
12126 if (modify)
12127 XEXP (x, i) = new_rtx;
12129 else if (fmt[i] == 'E')
12130 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12132 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12133 n_replacements, modify);
12134 if (!new_rtx)
12135 return NULL_RTX;
12136 if (modify)
12137 XVECEXP (x, i, j) = new_rtx;
12141 return x;
12145 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12147 enum rtx_code code = TRUNCATE;
12149 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12151 rtx inner = XEXP (x, 0);
12152 enum machine_mode inner_mode = GET_MODE (inner);
12154 if (inner_mode == mode)
12155 return inner;
12156 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12157 x = inner;
12158 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12159 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12161 code = GET_CODE (x);
12162 x = inner;
12165 return gen_rtx_fmt_e (code, mode, x);
12168 /* called via for_each_rtx after reload, to clean up truncates of
12169 registers that span multiple actual hard registers. */
12171 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12173 rtx x = *p, reg;
12175 if (GET_CODE (x) != TRUNCATE)
12176 return 0;
12177 reg = XEXP (x, 0);
12178 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12180 enum machine_mode reg_mode = GET_MODE (reg);
12181 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12182 subreg_lowpart_offset (DImode, reg_mode));
12183 *(int*) n_changes += 1;
12184 return -1;
12186 return 0;
12189 /* Load and store depend on the highpart of the address. However,
12190 set_attr_alternative does not give well-defined results before reload,
12191 so we must look at the rtl ourselves to see if any of the feeding
12192 registers is used in a memref. */
12194 /* Called by sh_contains_memref_p via for_each_rtx. */
12195 static int
12196 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12198 return (MEM_P (*loc));
12201 /* Return nonzero iff INSN contains a MEM. */
12203 sh_contains_memref_p (rtx insn)
12205 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12208 /* Return nonzero iff INSN loads a banked register. */
12210 sh_loads_bankedreg_p (rtx insn)
12212 if (GET_CODE (PATTERN (insn)) == SET)
12214 rtx op = SET_DEST (PATTERN(insn));
12215 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12216 return 1;
12219 return 0;
12222 /* FNADDR is the MEM expression from a call expander. Return an address
12223 to use in an SHmedia insn pattern. */
12225 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12227 int is_sym;
12229 fnaddr = XEXP (fnaddr, 0);
12230 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12231 if (flag_pic && is_sym)
12233 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12235 rtx reg = gen_reg_rtx (Pmode);
12237 /* We must not use GOTPLT for sibcalls, because PIC_REG
12238 must be restored before the PLT code gets to run. */
12239 if (is_sibcall)
12240 emit_insn (gen_symGOT2reg (reg, fnaddr));
12241 else
12242 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12243 fnaddr = reg;
12245 else
12247 fnaddr = gen_sym2PIC (fnaddr);
12248 PUT_MODE (fnaddr, Pmode);
12251 /* If ptabs might trap, make this visible to the rest of the compiler.
12252 We generally assume that symbols pertain to valid locations, but
12253 it is possible to generate invalid symbols with asm or linker tricks.
12254 In a list of functions where each returns its successor, an invalid
12255 symbol might denote an empty list. */
12256 if (!TARGET_PT_FIXED
12257 && (!is_sym || TARGET_INVALID_SYMBOLS)
12258 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12260 rtx tr = gen_reg_rtx (PDImode);
12262 emit_insn (gen_ptabs (tr, fnaddr));
12263 fnaddr = tr;
12265 else if (! target_reg_operand (fnaddr, Pmode))
12266 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12267 return fnaddr;
12270 reg_class_t
12271 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12272 enum machine_mode mode, secondary_reload_info *sri)
12274 enum reg_class rclass = (enum reg_class) rclass_i;
12276 if (in_p)
12278 if (REGCLASS_HAS_FP_REG (rclass)
12279 && ! TARGET_SHMEDIA
12280 && immediate_operand ((x), mode)
12281 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12282 && mode == SFmode && fldi_ok ()))
12283 switch (mode)
12285 case SFmode:
12286 sri->icode = CODE_FOR_reload_insf__frn;
12287 return NO_REGS;
12288 case DFmode:
12289 sri->icode = CODE_FOR_reload_indf__frn;
12290 return NO_REGS;
12291 case SImode:
12292 /* ??? If we knew that we are in the appropriate mode -
12293 single precision - we could use a reload pattern directly. */
12294 return FPUL_REGS;
12295 default:
12296 abort ();
12298 if (rclass == FPUL_REGS
12299 && ((REG_P (x)
12300 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12301 || REGNO (x) == T_REG))
12302 || GET_CODE (x) == PLUS))
12303 return GENERAL_REGS;
12304 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12306 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12307 return GENERAL_REGS;
12308 else if (mode == SFmode)
12309 return FP_REGS;
12310 sri->icode = CODE_FOR_reload_insi__i_fpul;
12311 return NO_REGS;
12313 if (rclass == FPSCR_REGS
12314 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12315 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12316 return GENERAL_REGS;
12317 if (REGCLASS_HAS_FP_REG (rclass)
12318 && TARGET_SHMEDIA
12319 && immediate_operand (x, mode)
12320 && x != CONST0_RTX (GET_MODE (x))
12321 && GET_MODE (x) != V4SFmode)
12322 return GENERAL_REGS;
12323 if ((mode == QImode || mode == HImode)
12324 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12326 sri->icode = ((mode == QImode)
12327 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12328 return NO_REGS;
12330 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12331 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12332 return TARGET_REGS;
12333 } /* end of input-only processing. */
12335 if (((REGCLASS_HAS_FP_REG (rclass)
12336 && (REG_P (x)
12337 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12338 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12339 && TARGET_FMOVD))))
12340 || (REGCLASS_HAS_GENERAL_REG (rclass)
12341 && REG_P (x)
12342 && FP_REGISTER_P (REGNO (x))))
12343 && ! TARGET_SHMEDIA
12344 && (mode == SFmode || mode == SImode))
12345 return FPUL_REGS;
12346 if ((rclass == FPUL_REGS
12347 || (REGCLASS_HAS_FP_REG (rclass)
12348 && ! TARGET_SHMEDIA && mode == SImode))
12349 && (MEM_P (x)
12350 || (REG_P (x)
12351 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12352 || REGNO (x) == T_REG
12353 || system_reg_operand (x, VOIDmode)))))
12355 if (rclass == FPUL_REGS)
12356 return GENERAL_REGS;
12357 return FPUL_REGS;
12359 if ((rclass == TARGET_REGS
12360 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12361 && !satisfies_constraint_Csy (x)
12362 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12363 return GENERAL_REGS;
12364 if ((rclass == MAC_REGS || rclass == PR_REGS)
12365 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12366 && rclass != REGNO_REG_CLASS (REGNO (x)))
12367 return GENERAL_REGS;
12368 if (rclass != GENERAL_REGS && REG_P (x)
12369 && TARGET_REGISTER_P (REGNO (x)))
12370 return GENERAL_REGS;
12371 return NO_REGS;
12374 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
12376 #include "gt-sh.h"