1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch
= CODE_FOR_indirect_jump_scratch
;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt
;
73 tree sh_deferred_function_attributes
;
74 tree
*sh_deferred_function_attributes_tail
= &sh_deferred_function_attributes
;
76 /* Global variables for machine-dependent things. */
78 /* Which cpu are we scheduling for. */
79 enum processor_type sh_cpu
;
81 /* Definitions used in ready queue reordering for first scheduling pass. */
83 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
84 static short *regmode_weight
[2];
86 /* Total SFmode and SImode weights of scheduled insns. */
87 static int curr_regmode_pressure
[2];
89 /* If true, skip cycles for Q -> R movement. */
90 static int skip_cycles
= 0;
92 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
93 and returned from sh_reorder2. */
94 static short cached_can_issue_more
;
96 /* Saved operands from the last compare to use when we generate an scc
102 /* Provides the class number of the smallest class containing
105 enum reg_class regno_reg_class
[FIRST_PSEUDO_REGISTER
] =
107 R0_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
108 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
109 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
110 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
111 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
112 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
113 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
114 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
115 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
116 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
117 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
118 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
119 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
120 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
121 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
122 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
123 FP0_REGS
,FP_REGS
, FP_REGS
, FP_REGS
,
124 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
125 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
126 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
127 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
128 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
129 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
130 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
131 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
132 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
133 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
134 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
135 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
136 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
137 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
138 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
139 TARGET_REGS
, TARGET_REGS
, TARGET_REGS
, TARGET_REGS
,
140 TARGET_REGS
, TARGET_REGS
, TARGET_REGS
, TARGET_REGS
,
141 DF_REGS
, DF_REGS
, DF_REGS
, DF_REGS
,
142 DF_REGS
, DF_REGS
, DF_REGS
, DF_REGS
,
143 NO_REGS
, GENERAL_REGS
, PR_REGS
, T_REGS
,
144 MAC_REGS
, MAC_REGS
, FPUL_REGS
, FPSCR_REGS
,
145 GENERAL_REGS
, GENERAL_REGS
,
148 char sh_register_names
[FIRST_PSEUDO_REGISTER
] \
149 [MAX_REGISTER_NAME_LENGTH
+ 1] = SH_REGISTER_NAMES_INITIALIZER
;
151 char sh_additional_register_names
[ADDREGNAMES_SIZE
] \
152 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH
+ 1]
153 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER
;
155 /* Provide reg_class from a letter such as appears in the machine
156 description. *: target independently reserved letter.
157 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
159 enum reg_class reg_class_from_letter
[] =
161 /* a */ ALL_REGS
, /* b */ TARGET_REGS
, /* c */ FPSCR_REGS
, /* d */ DF_REGS
,
162 /* e */ FP_REGS
, /* f */ FP_REGS
, /* g **/ NO_REGS
, /* h */ NO_REGS
,
163 /* i **/ NO_REGS
, /* j */ NO_REGS
, /* k */ SIBCALL_REGS
, /* l */ PR_REGS
,
164 /* m **/ NO_REGS
, /* n **/ NO_REGS
, /* o **/ NO_REGS
, /* p **/ NO_REGS
,
165 /* q */ NO_REGS
, /* r **/ NO_REGS
, /* s **/ NO_REGS
, /* t */ T_REGS
,
166 /* u */ NO_REGS
, /* v */ NO_REGS
, /* w */ FP0_REGS
, /* x */ MAC_REGS
,
167 /* y */ FPUL_REGS
, /* z */ R0_REGS
170 int assembler_dialect
;
172 static bool shmedia_space_reserved_for_target_registers
;
174 static bool sh_handle_option (size_t, const char *, int);
175 static void split_branches (rtx
);
176 static int branch_dest (rtx
);
177 static void force_into (rtx
, rtx
);
178 static void print_slot (rtx
);
179 static rtx
add_constant (rtx
, enum machine_mode
, rtx
);
180 static void dump_table (rtx
, rtx
);
181 static int hi_const (rtx
);
182 static int broken_move (rtx
);
183 static int mova_p (rtx
);
184 static rtx
find_barrier (int, rtx
, rtx
);
185 static int noncall_uses_reg (rtx
, rtx
, rtx
*);
186 static rtx
gen_block_redirect (rtx
, int, int);
187 static void sh_reorg (void);
188 static void output_stack_adjust (int, rtx
, int, HARD_REG_SET
*);
189 static rtx
frame_insn (rtx
);
190 static rtx
push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET
*, int);
193 static int calc_live_regs (HARD_REG_SET
*);
194 static void mark_use (rtx
, rtx
*);
195 static HOST_WIDE_INT
rounded_frame_size (int);
196 static rtx
mark_constant_pool_use (rtx
);
197 const struct attribute_spec sh_attribute_table
[];
198 static tree
sh_handle_interrupt_handler_attribute (tree
*, tree
, tree
, int, bool *);
199 static tree
sh_handle_sp_switch_attribute (tree
*, tree
, tree
, int, bool *);
200 static tree
sh_handle_trap_exit_attribute (tree
*, tree
, tree
, int, bool *);
201 static tree
sh_handle_renesas_attribute (tree
*, tree
, tree
, int, bool *);
202 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT
);
203 static void sh_insert_attributes (tree
, tree
*);
204 static const char *sh_check_pch_target_flags (int);
205 static int sh_adjust_cost (rtx
, rtx
, rtx
, int);
206 static int sh_issue_rate (void);
207 static int sh_dfa_new_cycle (FILE *, int, rtx
, int, int, int *sort_p
);
208 static short find_set_regmode_weight (rtx
, enum machine_mode
);
209 static short find_insn_regmode_weight (rtx
, enum machine_mode
);
210 static void find_regmode_weight (int, enum machine_mode
);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx
*, int);
215 static void ready_reorder (rtx
*, int);
216 static short high_pressure (enum machine_mode
);
217 static int sh_reorder (FILE *, int, rtx
*, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx
*, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx
, int);
222 static bool sh_function_ok_for_sibcall (tree
, tree
);
224 static bool sh_cannot_modify_jumps_p (void);
225 static int sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (tree
);
229 static void sh_init_builtins (void);
230 static void sh_media_init_builtins (void);
231 static rtx
sh_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
232 static void sh_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
233 static void sh_file_start (void);
234 static int flow_dependent_p (rtx
, rtx
);
235 static void flow_dependent_p_1 (rtx
, rtx
, void *);
236 static int shiftcosts (rtx
);
237 static int andcosts (rtx
);
238 static int addsubcosts (rtx
);
239 static int multcosts (rtx
);
240 static bool unspec_caller_rtx_p (rtx
);
241 static bool sh_cannot_copy_insn_p (rtx
);
242 static bool sh_rtx_costs (rtx
, int, int, int *);
243 static int sh_address_cost (rtx
);
244 #ifdef TARGET_ADJUST_UNROLL_MAX
245 static int sh_adjust_unroll_max (struct loop
*, int, int, int, int);
247 static int sh_pr_n_sets (void);
248 static rtx
sh_allocate_initial_value (rtx
);
249 static int shmedia_target_regs_stack_space (HARD_REG_SET
*);
250 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET
*);
251 static int shmedia_target_regs_stack_adjust (HARD_REG_SET
*);
252 static int scavenge_reg (HARD_REG_SET
*s
);
253 struct save_schedule_s
;
254 static struct save_entry_s
*sh5_schedule_saves (HARD_REG_SET
*,
255 struct save_schedule_s
*, int);
257 static rtx
sh_struct_value_rtx (tree
, int);
258 static bool sh_return_in_memory (tree
, tree
);
259 static rtx
sh_builtin_saveregs (void);
260 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
, tree
, int *, int);
261 static bool sh_strict_argument_naming (CUMULATIVE_ARGS
*);
262 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS
*);
263 static tree
sh_build_builtin_va_list (void);
264 static tree
sh_gimplify_va_arg_expr (tree
, tree
, tree
*, tree
*);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
267 static bool sh_callee_copies (CUMULATIVE_ARGS
*, enum machine_mode
,
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
271 static int sh_dwarf_calling_convention (tree
);
272 static int hard_regs_intersect_p (HARD_REG_SET
*, HARD_REG_SET
*);
275 /* Initialize the GCC target structure. */
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
279 /* The next two are used for debug info when compiling with -gdwarf. */
280 #undef TARGET_ASM_UNALIGNED_HI_OP
281 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
282 #undef TARGET_ASM_UNALIGNED_SI_OP
283 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
285 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
286 #undef TARGET_ASM_UNALIGNED_DI_OP
287 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
288 #undef TARGET_ASM_ALIGNED_DI_OP
289 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
291 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
294 #undef TARGET_ASM_OUTPUT_MI_THUNK
295 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
297 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
300 #undef TARGET_ASM_FILE_START
301 #define TARGET_ASM_FILE_START sh_file_start
302 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
303 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
305 #undef TARGET_DEFAULT_TARGET_FLAGS
306 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
307 #undef TARGET_HANDLE_OPTION
308 #define TARGET_HANDLE_OPTION sh_handle_option
310 #undef TARGET_INSERT_ATTRIBUTES
311 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
313 #undef TARGET_SCHED_ADJUST_COST
314 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
316 #undef TARGET_SCHED_ISSUE_RATE
317 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
319 /* The next 5 hooks have been implemented for reenabling sched1. With the
320 help of these macros we are limiting the movement of insns in sched1 to
321 reduce the register pressure. The overall idea is to keep count of SImode
322 and SFmode regs required by already scheduled insns. When these counts
323 cross some threshold values; give priority to insns that free registers.
324 The insn that frees registers is most likely to be the insn with lowest
325 LUID (original insn order); but such an insn might be there in the stalled
326 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
327 upto a max of 8 cycles so that such insns may move from Q -> R.
329 The description of the hooks are as below:
331 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
332 scheduler; it is called inside the sched_init function just after
333 find_insn_reg_weights function call. It is used to calculate the SImode
334 and SFmode weights of insns of basic blocks; much similar to what
335 find_insn_reg_weights does.
336 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
338 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
339 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
342 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
343 high; reorder the ready queue so that the insn with lowest LUID will be
346 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
347 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
349 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
350 can be returned from TARGET_SCHED_REORDER2.
352 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
354 #undef TARGET_SCHED_DFA_NEW_CYCLE
355 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
357 #undef TARGET_SCHED_INIT_GLOBAL
358 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
360 #undef TARGET_SCHED_FINISH_GLOBAL
361 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
363 #undef TARGET_SCHED_VARIABLE_ISSUE
364 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
366 #undef TARGET_SCHED_REORDER
367 #define TARGET_SCHED_REORDER sh_reorder
369 #undef TARGET_SCHED_REORDER2
370 #define TARGET_SCHED_REORDER2 sh_reorder2
372 #undef TARGET_SCHED_INIT
373 #define TARGET_SCHED_INIT sh_md_init
375 #undef TARGET_CANNOT_MODIFY_JUMPS_P
376 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
377 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
378 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
379 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
380 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
381 sh_optimize_target_register_callee_saved
383 #undef TARGET_MS_BITFIELD_LAYOUT_P
384 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
386 #undef TARGET_INIT_BUILTINS
387 #define TARGET_INIT_BUILTINS sh_init_builtins
388 #undef TARGET_EXPAND_BUILTIN
389 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
394 #undef TARGET_CANNOT_COPY_INSN_P
395 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
396 #undef TARGET_RTX_COSTS
397 #define TARGET_RTX_COSTS sh_rtx_costs
398 #undef TARGET_ADDRESS_COST
399 #define TARGET_ADDRESS_COST sh_address_cost
400 #undef TARGET_ALLOCATE_INITIAL_VALUE
401 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
403 #undef TARGET_MACHINE_DEPENDENT_REORG
404 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
407 #undef TARGET_HAVE_TLS
408 #define TARGET_HAVE_TLS true
411 #undef TARGET_PROMOTE_PROTOTYPES
412 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
413 #undef TARGET_PROMOTE_FUNCTION_ARGS
414 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_RETURN
416 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
418 #undef TARGET_STRUCT_VALUE_RTX
419 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
420 #undef TARGET_RETURN_IN_MEMORY
421 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
423 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
424 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
425 #undef TARGET_SETUP_INCOMING_VARARGS
426 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
427 #undef TARGET_STRICT_ARGUMENT_NAMING
428 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
429 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
430 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
431 #undef TARGET_MUST_PASS_IN_STACK
432 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
433 #undef TARGET_PASS_BY_REFERENCE
434 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
435 #undef TARGET_CALLEE_COPIES
436 #define TARGET_CALLEE_COPIES sh_callee_copies
437 #undef TARGET_ARG_PARTIAL_BYTES
438 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
440 #undef TARGET_BUILD_BUILTIN_VA_LIST
441 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
442 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
443 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
445 #undef TARGET_VECTOR_MODE_SUPPORTED_P
446 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
448 #undef TARGET_CHECK_PCH_TARGET_FLAGS
449 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
451 #undef TARGET_DWARF_CALLING_CONVENTION
452 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
454 /* Return regmode weight for insn. */
455 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
457 /* Return current register pressure for regmode. */
458 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
462 #undef TARGET_ENCODE_SECTION_INFO
463 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
464 #undef TARGET_STRIP_NAME_ENCODING
465 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
466 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
467 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
471 #ifdef TARGET_ADJUST_UNROLL_MAX
472 #undef TARGET_ADJUST_UNROLL_MAX
473 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
476 #undef TARGET_SECONDARY_RELOAD
477 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
479 struct gcc_target targetm
= TARGET_INITIALIZER
;
481 /* Implement TARGET_HANDLE_OPTION. */
484 sh_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
,
485 int value ATTRIBUTE_UNUSED
)
490 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH1
;
494 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2
;
498 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A
;
502 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A_NOFPU
;
506 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A_SINGLE
;
509 case OPT_m2a_single_only
:
510 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A_SINGLE_ONLY
;
514 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2E
;
518 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH3
;
522 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH3E
;
526 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4
;
530 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4_NOFPU
;
534 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4_SINGLE
;
537 case OPT_m4_single_only
:
538 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4_SINGLE_ONLY
;
542 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A
;
547 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A_NOFPU
;
551 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A_SINGLE
;
554 case OPT_m4a_single_only
:
555 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A_SINGLE_ONLY
;
559 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_32MEDIA
;
562 case OPT_m5_32media_nofpu
:
563 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_32MEDIA_NOFPU
;
567 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_64MEDIA
;
570 case OPT_m5_64media_nofpu
:
571 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_64MEDIA_NOFPU
;
575 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_COMPACT
;
578 case OPT_m5_compact_nofpu
:
579 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_COMPACT_NOFPU
;
587 /* Print the operand address in x to the stream. */
590 print_operand_address (FILE *stream
, rtx x
)
592 switch (GET_CODE (x
))
596 fprintf (stream
, "@%s", reg_names
[true_regnum (x
)]);
601 rtx base
= XEXP (x
, 0);
602 rtx index
= XEXP (x
, 1);
604 switch (GET_CODE (index
))
607 fprintf (stream
, "@(%d,%s)", (int) INTVAL (index
),
608 reg_names
[true_regnum (base
)]);
614 int base_num
= true_regnum (base
);
615 int index_num
= true_regnum (index
);
617 fprintf (stream
, "@(r0,%s)",
618 reg_names
[MAX (base_num
, index_num
)]);
629 fprintf (stream
, "@-%s", reg_names
[true_regnum (XEXP (x
, 0))]);
633 fprintf (stream
, "@%s+", reg_names
[true_regnum (XEXP (x
, 0))]);
637 x
= mark_constant_pool_use (x
);
638 output_addr_const (stream
, x
);
643 /* Print operand x (an rtx) in assembler syntax to file stream
644 according to modifier code.
646 '.' print a .s if insn needs delay slot
647 ',' print LOCAL_LABEL_PREFIX
648 '@' print trap, rte or rts depending upon pragma interruptness
649 '#' output a nop if there is nothing to put in the delay slot
650 ''' print likelihood suffix (/u for unlikely).
651 '>' print branch target if -fverbose-asm
652 'O' print a constant without the #
653 'R' print the LSW of a dp value - changes if in little endian
654 'S' print the MSW of a dp value - changes if in little endian
655 'T' print the next word of a dp value - same as 'R' in big endian mode.
656 'M' print an `x' if `m' will print `base,index'.
657 'N' print 'r63' if the operand is (const_int 0).
658 'd' print a V2SF reg as dN instead of fpN.
659 'm' print a pair `base,offset' or `base,index', for LD and ST.
660 'U' Likewise for {LD,ST}{HI,LO}.
661 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
662 'o' output an operator. */
665 print_operand (FILE *stream
, rtx x
, int code
)
668 enum machine_mode mode
;
676 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))
677 && get_attr_length (XVECEXP (final_sequence
, 0, 1)))
678 fprintf (stream
, ASSEMBLER_DIALECT
? "/s" : ".s");
681 fprintf (stream
, "%s", LOCAL_LABEL_PREFIX
);
684 trapa_attr
= lookup_attribute ("trap_exit",
685 DECL_ATTRIBUTES (current_function_decl
));
687 fprintf (stream
, "trapa #%ld",
688 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr
))));
689 else if (sh_cfun_interrupt_handler_p ())
690 fprintf (stream
, "rte");
692 fprintf (stream
, "rts");
695 /* Output a nop if there's nothing in the delay slot. */
696 if (dbr_sequence_length () == 0)
697 fprintf (stream
, "\n\tnop");
701 rtx note
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
703 if (note
&& INTVAL (XEXP (note
, 0)) * 2 < REG_BR_PROB_BASE
)
704 fputs ("/u", stream
);
708 if (flag_verbose_asm
&& JUMP_LABEL (current_output_insn
))
710 fputs ("\t! target: ", stream
);
711 output_addr_const (stream
, JUMP_LABEL (current_output_insn
));
715 x
= mark_constant_pool_use (x
);
716 output_addr_const (stream
, x
);
718 /* N.B.: %R / %S / %T adjust memory addresses by four.
719 For SHMEDIA, that means they can be used to access the first and
720 second 32 bit part of a 64 bit (or larger) value that
721 might be held in floating point registers or memory.
722 While they can be used to access 64 bit parts of a larger value
723 held in general purpose registers, that won't work with memory -
724 neither for fp registers, since the frxx names are used. */
726 if (REG_P (x
) || GET_CODE (x
) == SUBREG
)
728 regno
= true_regnum (x
);
729 regno
+= FP_REGISTER_P (regno
) ? 1 : LSW
;
730 fputs (reg_names
[regno
], (stream
));
734 x
= adjust_address (x
, SImode
, 4 * LSW
);
735 print_operand_address (stream
, XEXP (x
, 0));
742 if (mode
== VOIDmode
)
744 if (GET_MODE_SIZE (mode
) >= 8)
745 sub
= simplify_subreg (SImode
, x
, mode
, 4 * LSW
);
747 print_operand (stream
, sub
, 0);
749 output_operand_lossage ("invalid operand to %%R");
753 if (REG_P (x
) || GET_CODE (x
) == SUBREG
)
755 regno
= true_regnum (x
);
756 regno
+= FP_REGISTER_P (regno
) ? 0 : MSW
;
757 fputs (reg_names
[regno
], (stream
));
761 x
= adjust_address (x
, SImode
, 4 * MSW
);
762 print_operand_address (stream
, XEXP (x
, 0));
769 if (mode
== VOIDmode
)
771 if (GET_MODE_SIZE (mode
) >= 8)
772 sub
= simplify_subreg (SImode
, x
, mode
, 4 * MSW
);
774 print_operand (stream
, sub
, 0);
776 output_operand_lossage ("invalid operand to %%S");
780 /* Next word of a double. */
781 switch (GET_CODE (x
))
784 fputs (reg_names
[REGNO (x
) + 1], (stream
));
787 if (GET_CODE (XEXP (x
, 0)) != PRE_DEC
788 && GET_CODE (XEXP (x
, 0)) != POST_INC
)
789 x
= adjust_address (x
, SImode
, 4);
790 print_operand_address (stream
, XEXP (x
, 0));
797 switch (GET_CODE (x
))
799 case PLUS
: fputs ("add", stream
); break;
800 case MINUS
: fputs ("sub", stream
); break;
801 case MULT
: fputs ("mul", stream
); break;
802 case DIV
: fputs ("div", stream
); break;
803 case EQ
: fputs ("eq", stream
); break;
804 case NE
: fputs ("ne", stream
); break;
805 case GT
: case LT
: fputs ("gt", stream
); break;
806 case GE
: case LE
: fputs ("ge", stream
); break;
807 case GTU
: case LTU
: fputs ("gtu", stream
); break;
808 case GEU
: case LEU
: fputs ("geu", stream
); break;
814 if (GET_CODE (x
) == MEM
815 && GET_CODE (XEXP (x
, 0)) == PLUS
816 && (GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
817 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == SUBREG
))
822 gcc_assert (GET_CODE (x
) == MEM
);
826 switch (GET_CODE (x
))
830 print_operand (stream
, x
, 0);
831 fputs (", 0", stream
);
835 print_operand (stream
, XEXP (x
, 0), 0);
836 fputs (", ", stream
);
837 print_operand (stream
, XEXP (x
, 1), 0);
846 gcc_assert (GET_CODE (x
) == REG
&& GET_MODE (x
) == V2SFmode
);
848 fprintf ((stream
), "d%s", reg_names
[REGNO (x
)] + 1);
852 if (x
== CONST0_RTX (GET_MODE (x
)))
854 fprintf ((stream
), "r63");
859 if (GET_CODE (x
) == CONST_INT
)
861 fprintf ((stream
), "%u", (unsigned) INTVAL (x
) & (0x10000 - 1));
871 switch (GET_CODE (x
))
875 rtx inner
= XEXP (x
, 0);
877 enum machine_mode inner_mode
;
879 /* We might see SUBREGs with vector mode registers inside. */
880 if (GET_CODE (inner
) == SUBREG
881 && (GET_MODE_SIZE (GET_MODE (inner
))
882 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
883 && subreg_lowpart_p (inner
))
884 inner
= SUBREG_REG (inner
);
885 if (GET_CODE (inner
) == CONST_INT
)
887 x
= GEN_INT (trunc_int_for_mode (INTVAL (inner
), GET_MODE (x
)));
890 inner_mode
= GET_MODE (inner
);
891 if (GET_CODE (inner
) == SUBREG
892 && (GET_MODE_SIZE (GET_MODE (inner
))
893 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
894 && GET_CODE (SUBREG_REG (inner
)) == REG
)
896 offset
= subreg_regno_offset (REGNO (SUBREG_REG (inner
)),
897 GET_MODE (SUBREG_REG (inner
)),
900 inner
= SUBREG_REG (inner
);
902 if (GET_CODE (inner
) != REG
|| GET_MODE_SIZE (inner_mode
) > 8)
904 /* Floating point register pairs are always big endian;
905 general purpose registers are 64 bit wide. */
906 regno
= REGNO (inner
);
907 regno
= (HARD_REGNO_NREGS (regno
, inner_mode
)
908 - HARD_REGNO_NREGS (regno
, mode
))
916 /* FIXME: We need this on SHmedia32 because reload generates
917 some sign-extended HI or QI loads into DImode registers
918 but, because Pmode is SImode, the address ends up with a
919 subreg:SI of the DImode register. Maybe reload should be
920 fixed so as to apply alter_subreg to such loads? */
922 gcc_assert (trapping_target_operand (x
, VOIDmode
));
923 x
= XEXP (XEXP (x
, 2), 0);
926 gcc_assert (SUBREG_BYTE (x
) == 0
927 && GET_CODE (SUBREG_REG (x
)) == REG
);
935 if (FP_REGISTER_P (regno
)
936 && mode
== V16SFmode
)
937 fprintf ((stream
), "mtrx%s", reg_names
[regno
] + 2);
938 else if (FP_REGISTER_P (REGNO (x
))
940 fprintf ((stream
), "fv%s", reg_names
[regno
] + 2);
941 else if (GET_CODE (x
) == REG
943 fprintf ((stream
), "fp%s", reg_names
[regno
] + 2);
944 else if (FP_REGISTER_P (REGNO (x
))
945 && GET_MODE_SIZE (mode
) > 4)
946 fprintf ((stream
), "d%s", reg_names
[regno
] + 1);
948 fputs (reg_names
[regno
], (stream
));
952 output_address (XEXP (x
, 0));
957 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
958 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
959 && (GET_MODE (XEXP (x
, 0)) == DImode
960 || GET_MODE (XEXP (x
, 0)) == SImode
)
961 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == TRUNCATE
962 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == HImode
)
964 rtx val
= XEXP (XEXP (XEXP (x
, 0), 0), 0);
966 bool nested_expr
= false;
969 if (GET_CODE (val
) == ASHIFTRT
)
972 val2
= XEXP (val
, 0);
974 if (GET_CODE (val2
) == CONST
975 || GET_RTX_CLASS (GET_CODE (val2
)) != RTX_OBJ
)
980 output_addr_const (stream
, val2
);
983 if (GET_CODE (val
) == ASHIFTRT
)
985 fputs (" >> ", stream
);
986 output_addr_const (stream
, XEXP (val
, 1));
989 fputs (" & 65535)", stream
);
997 output_addr_const (stream
, x
);
1004 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1006 force_into (rtx value
, rtx target
)
1008 value
= force_operand (value
, target
);
1009 if (! rtx_equal_p (value
, target
))
1010 emit_insn (gen_move_insn (target
, value
));
1013 /* Emit code to perform a block move. Choose the best method.
1015 OPERANDS[0] is the destination.
1016 OPERANDS[1] is the source.
1017 OPERANDS[2] is the size.
1018 OPERANDS[3] is the alignment safe to use. */
1021 expand_block_move (rtx
*operands
)
1023 int align
= INTVAL (operands
[3]);
1024 int constp
= (GET_CODE (operands
[2]) == CONST_INT
);
1025 int bytes
= (constp
? INTVAL (operands
[2]) : 0);
1030 /* If we could use mov.l to move words and dest is word-aligned, we
1031 can use movua.l for loads and still generate a relatively short
1032 and efficient sequence. */
1033 if (TARGET_SH4A_ARCH
&& align
< 4
1034 && MEM_ALIGN (operands
[0]) >= 32
1035 && can_move_by_pieces (bytes
, 32))
1037 rtx dest
= copy_rtx (operands
[0]);
1038 rtx src
= copy_rtx (operands
[1]);
1039 /* We could use different pseudos for each copied word, but
1040 since movua can only load into r0, it's kind of
1042 rtx temp
= gen_reg_rtx (SImode
);
1043 rtx src_addr
= copy_addr_to_reg (XEXP (src
, 0));
1046 while (copied
+ 4 <= bytes
)
1048 rtx to
= adjust_address (dest
, SImode
, copied
);
1049 rtx from
= adjust_automodify_address (src
, SImode
, src_addr
, copied
);
1051 emit_insn (gen_movua (temp
, from
));
1052 emit_move_insn (src_addr
, plus_constant (src_addr
, 4));
1053 emit_move_insn (to
, temp
);
1058 move_by_pieces (adjust_address (dest
, BLKmode
, copied
),
1059 adjust_automodify_address (src
, BLKmode
,
1061 bytes
- copied
, align
, 0);
1066 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1067 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1068 if (align
< 4 || (bytes
% 4 != 0))
1071 if (TARGET_HARD_SH4
)
1075 else if (bytes
== 12)
1077 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1078 rtx r4
= gen_rtx_REG (SImode
, 4);
1079 rtx r5
= gen_rtx_REG (SImode
, 5);
1081 function_symbol (func_addr_rtx
, "__movmemSI12_i4", SFUNC_STATIC
);
1082 force_into (XEXP (operands
[0], 0), r4
);
1083 force_into (XEXP (operands
[1], 0), r5
);
1084 emit_insn (gen_block_move_real_i4 (func_addr_rtx
));
1087 else if (! TARGET_SMALLCODE
)
1089 const char *entry_name
;
1090 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1092 rtx r4
= gen_rtx_REG (SImode
, 4);
1093 rtx r5
= gen_rtx_REG (SImode
, 5);
1094 rtx r6
= gen_rtx_REG (SImode
, 6);
1096 entry_name
= (bytes
& 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1097 function_symbol (func_addr_rtx
, entry_name
, SFUNC_STATIC
);
1098 force_into (XEXP (operands
[0], 0), r4
);
1099 force_into (XEXP (operands
[1], 0), r5
);
1101 dwords
= bytes
>> 3;
1102 emit_insn (gen_move_insn (r6
, GEN_INT (dwords
- 1)));
1103 emit_insn (gen_block_lump_real_i4 (func_addr_rtx
));
1112 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1113 rtx r4
= gen_rtx_REG (SImode
, 4);
1114 rtx r5
= gen_rtx_REG (SImode
, 5);
1116 sprintf (entry
, "__movmemSI%d", bytes
);
1117 function_symbol (func_addr_rtx
, entry
, SFUNC_STATIC
);
1118 force_into (XEXP (operands
[0], 0), r4
);
1119 force_into (XEXP (operands
[1], 0), r5
);
1120 emit_insn (gen_block_move_real (func_addr_rtx
));
1124 /* This is the same number of bytes as a memcpy call, but to a different
1125 less common function name, so this will occasionally use more space. */
1126 if (! TARGET_SMALLCODE
)
1128 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1129 int final_switch
, while_loop
;
1130 rtx r4
= gen_rtx_REG (SImode
, 4);
1131 rtx r5
= gen_rtx_REG (SImode
, 5);
1132 rtx r6
= gen_rtx_REG (SImode
, 6);
1134 function_symbol (func_addr_rtx
, "__movmem", SFUNC_STATIC
);
1135 force_into (XEXP (operands
[0], 0), r4
);
1136 force_into (XEXP (operands
[1], 0), r5
);
1138 /* r6 controls the size of the move. 16 is decremented from it
1139 for each 64 bytes moved. Then the negative bit left over is used
1140 as an index into a list of move instructions. e.g., a 72 byte move
1141 would be set up with size(r6) = 14, for one iteration through the
1142 big while loop, and a switch of -2 for the last part. */
1144 final_switch
= 16 - ((bytes
/ 4) % 16);
1145 while_loop
= ((bytes
/ 4) / 16 - 1) * 16;
1146 emit_insn (gen_move_insn (r6
, GEN_INT (while_loop
+ final_switch
)));
1147 emit_insn (gen_block_lump_real (func_addr_rtx
));
1154 /* Prepare operands for a move define_expand; specifically, one of the
1155 operands must be in a register. */
1158 prepare_move_operands (rtx operands
[], enum machine_mode mode
)
1160 if ((mode
== SImode
|| mode
== DImode
)
1162 && ! ((mode
== Pmode
|| mode
== ptr_mode
)
1163 && tls_symbolic_operand (operands
[1], Pmode
) != 0))
1166 if (SYMBOLIC_CONST_P (operands
[1]))
1168 if (GET_CODE (operands
[0]) == MEM
)
1169 operands
[1] = force_reg (Pmode
, operands
[1]);
1170 else if (TARGET_SHMEDIA
1171 && GET_CODE (operands
[1]) == LABEL_REF
1172 && target_reg_operand (operands
[0], mode
))
1176 temp
= no_new_pseudos
? operands
[0] : gen_reg_rtx (Pmode
);
1177 operands
[1] = legitimize_pic_address (operands
[1], mode
, temp
);
1180 else if (GET_CODE (operands
[1]) == CONST
1181 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
1182 && SYMBOLIC_CONST_P (XEXP (XEXP (operands
[1], 0), 0)))
1184 temp
= no_new_pseudos
? operands
[0] : gen_reg_rtx (Pmode
);
1185 temp
= legitimize_pic_address (XEXP (XEXP (operands
[1], 0), 0),
1187 operands
[1] = expand_binop (mode
, add_optab
, temp
,
1188 XEXP (XEXP (operands
[1], 0), 1),
1189 no_new_pseudos
? temp
1190 : gen_reg_rtx (Pmode
),
1191 0, OPTAB_LIB_WIDEN
);
1195 if (! reload_in_progress
&& ! reload_completed
)
1197 /* Copy the source to a register if both operands aren't registers. */
1198 if (! register_operand (operands
[0], mode
)
1199 && ! sh_register_operand (operands
[1], mode
))
1200 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
1202 if (GET_CODE (operands
[0]) == MEM
&& ! memory_operand (operands
[0], mode
))
1204 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1205 except that we can't use that function because it is static. */
1206 rtx
new = change_address (operands
[0], mode
, 0);
1207 MEM_COPY_ATTRIBUTES (new, operands
[0]);
1211 /* This case can happen while generating code to move the result
1212 of a library call to the target. Reject `st r0,@(rX,rY)' because
1213 reload will fail to find a spill register for rX, since r0 is already
1214 being used for the source. */
1216 && refers_to_regno_p (R0_REG
, R0_REG
+ 1, operands
[1], (rtx
*)0)
1217 && GET_CODE (operands
[0]) == MEM
1218 && GET_CODE (XEXP (operands
[0], 0)) == PLUS
1219 && GET_CODE (XEXP (XEXP (operands
[0], 0), 1)) == REG
)
1220 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
1223 if (mode
== Pmode
|| mode
== ptr_mode
)
1226 enum tls_model tls_kind
;
1230 if (GET_CODE (op1
) == CONST
1231 && GET_CODE (XEXP (op1
, 0)) == PLUS
1232 && tls_symbolic_operand (XEXP (XEXP (op1
, 0), 0), Pmode
))
1234 opc
= XEXP (XEXP (op1
, 0), 1);
1235 op1
= XEXP (XEXP (op1
, 0), 0);
1240 if ((tls_kind
= tls_symbolic_operand (op1
, Pmode
)))
1242 rtx tga_op1
, tga_ret
, tmp
, tmp2
;
1246 case TLS_MODEL_GLOBAL_DYNAMIC
:
1247 tga_ret
= gen_rtx_REG (Pmode
, R0_REG
);
1248 emit_call_insn (gen_tls_global_dynamic (tga_ret
, op1
));
1252 case TLS_MODEL_LOCAL_DYNAMIC
:
1253 tga_ret
= gen_rtx_REG (Pmode
, R0_REG
);
1254 emit_call_insn (gen_tls_local_dynamic (tga_ret
, op1
));
1256 tmp
= gen_reg_rtx (Pmode
);
1257 emit_move_insn (tmp
, tga_ret
);
1259 if (register_operand (op0
, Pmode
))
1262 tmp2
= gen_reg_rtx (Pmode
);
1264 emit_insn (gen_symDTPOFF2reg (tmp2
, op1
, tmp
));
1268 case TLS_MODEL_INITIAL_EXEC
:
1271 /* Don't schedule insns for getting GOT address when
1272 the first scheduling is enabled, to avoid spill
1274 if (flag_schedule_insns
)
1275 emit_insn (gen_blockage ());
1276 emit_insn (gen_GOTaddr2picreg ());
1277 emit_insn (gen_rtx_USE (VOIDmode
, gen_rtx_REG (SImode
,
1279 if (flag_schedule_insns
)
1280 emit_insn (gen_blockage ());
1282 tga_op1
= no_new_pseudos
? op0
: gen_reg_rtx (Pmode
);
1283 tmp
= gen_sym2GOTTPOFF (op1
);
1284 emit_insn (gen_tls_initial_exec (tga_op1
, tmp
));
1288 case TLS_MODEL_LOCAL_EXEC
:
1289 tmp2
= gen_reg_rtx (Pmode
);
1290 emit_insn (gen_load_gbr (tmp2
));
1291 tmp
= gen_reg_rtx (Pmode
);
1292 emit_insn (gen_symTPOFF2reg (tmp
, op1
));
1294 if (register_operand (op0
, Pmode
))
1297 op1
= gen_reg_rtx (Pmode
);
1299 emit_insn (gen_addsi3 (op1
, tmp
, tmp2
));
1306 emit_insn (gen_addsi3 (op1
, op1
, force_reg (SImode
, opc
)));
1314 /* Prepare the operands for an scc instruction; make sure that the
1315 compare has been done. */
1317 prepare_scc_operands (enum rtx_code code
)
1319 rtx t_reg
= gen_rtx_REG (SImode
, T_REG
);
1320 enum rtx_code oldcode
= code
;
1321 enum machine_mode mode
;
1323 /* First need a compare insn. */
1327 /* It isn't possible to handle this case. */
1344 if (code
!= oldcode
)
1346 rtx tmp
= sh_compare_op0
;
1347 sh_compare_op0
= sh_compare_op1
;
1348 sh_compare_op1
= tmp
;
1351 mode
= GET_MODE (sh_compare_op0
);
1352 if (mode
== VOIDmode
)
1353 mode
= GET_MODE (sh_compare_op1
);
1355 sh_compare_op0
= force_reg (mode
, sh_compare_op0
);
1356 if ((code
!= EQ
&& code
!= NE
1357 && (sh_compare_op1
!= const0_rtx
1358 || code
== GTU
|| code
== GEU
|| code
== LTU
|| code
== LEU
))
1359 || (mode
== DImode
&& sh_compare_op1
!= const0_rtx
)
1360 || (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
))
1361 sh_compare_op1
= force_reg (mode
, sh_compare_op1
);
1363 if ((TARGET_SH4
|| TARGET_SH2A
) && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1364 (mode
== SFmode
? emit_sf_insn
: emit_df_insn
)
1365 (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2,
1366 gen_rtx_SET (VOIDmode
, t_reg
,
1367 gen_rtx_fmt_ee (code
, SImode
,
1368 sh_compare_op0
, sh_compare_op1
)),
1369 gen_rtx_USE (VOIDmode
, get_fpscr_rtx ()))));
1371 emit_insn (gen_rtx_SET (VOIDmode
, t_reg
,
1372 gen_rtx_fmt_ee (code
, SImode
,
1373 sh_compare_op0
, sh_compare_op1
)));
1378 /* Called from the md file, set up the operands of a compare instruction. */
1381 from_compare (rtx
*operands
, int code
)
1383 enum machine_mode mode
= GET_MODE (sh_compare_op0
);
1385 if (mode
== VOIDmode
)
1386 mode
= GET_MODE (sh_compare_op1
);
1389 || (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
))
1391 /* Force args into regs, since we can't use constants here. */
1392 sh_compare_op0
= force_reg (mode
, sh_compare_op0
);
1393 if (sh_compare_op1
!= const0_rtx
1394 || code
== GTU
|| code
== GEU
1395 || (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
))
1396 sh_compare_op1
= force_reg (mode
, sh_compare_op1
);
1398 if (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
&& code
== GE
)
1400 from_compare (operands
, GT
);
1401 insn
= gen_ieee_ccmpeqsf_t (sh_compare_op0
, sh_compare_op1
);
1404 insn
= gen_rtx_SET (VOIDmode
,
1405 gen_rtx_REG (SImode
, T_REG
),
1406 gen_rtx_fmt_ee (code
, SImode
,
1407 sh_compare_op0
, sh_compare_op1
));
1408 if ((TARGET_SH4
|| TARGET_SH2A
) && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1410 insn
= gen_rtx_PARALLEL (VOIDmode
,
1412 gen_rtx_USE (VOIDmode
, get_fpscr_rtx ())));
1413 (mode
== SFmode
? emit_sf_insn
: emit_df_insn
) (insn
);
1419 /* Functions to output assembly code. */
1421 /* Return a sequence of instructions to perform DI or DF move.
1423 Since the SH cannot move a DI or DF in one instruction, we have
1424 to take care when we see overlapping source and dest registers. */
1427 output_movedouble (rtx insn ATTRIBUTE_UNUSED
, rtx operands
[],
1428 enum machine_mode mode
)
1430 rtx dst
= operands
[0];
1431 rtx src
= operands
[1];
1433 if (GET_CODE (dst
) == MEM
1434 && GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
1435 return "mov.l %T1,%0\n\tmov.l %1,%0";
1437 if (register_operand (dst
, mode
)
1438 && register_operand (src
, mode
))
1440 if (REGNO (src
) == MACH_REG
)
1441 return "sts mach,%S0\n\tsts macl,%R0";
1443 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1444 when mov.d r1,r0 do r1->r0 then r2->r1. */
1446 if (REGNO (src
) + 1 == REGNO (dst
))
1447 return "mov %T1,%T0\n\tmov %1,%0";
1449 return "mov %1,%0\n\tmov %T1,%T0";
1451 else if (GET_CODE (src
) == CONST_INT
)
1453 if (INTVAL (src
) < 0)
1454 output_asm_insn ("mov #-1,%S0", operands
);
1456 output_asm_insn ("mov #0,%S0", operands
);
1458 return "mov %1,%R0";
1460 else if (GET_CODE (src
) == MEM
)
1463 int dreg
= REGNO (dst
);
1464 rtx inside
= XEXP (src
, 0);
1466 switch (GET_CODE (inside
))
1469 ptrreg
= REGNO (inside
);
1473 ptrreg
= subreg_regno (inside
);
1477 ptrreg
= REGNO (XEXP (inside
, 0));
1478 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1479 an offsettable address. Unfortunately, offsettable addresses use
1480 QImode to check the offset, and a QImode offsettable address
1481 requires r0 for the other operand, which is not currently
1482 supported, so we can't use the 'o' constraint.
1483 Thus we must check for and handle r0+REG addresses here.
1484 We punt for now, since this is likely very rare. */
1485 gcc_assert (GET_CODE (XEXP (inside
, 1)) != REG
);
1489 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1491 return "mov.l %1,%0\n\tmov.l %1,%T0";
1496 /* Work out the safe way to copy. Copy into the second half first. */
1498 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1501 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1504 /* Print an instruction which would have gone into a delay slot after
1505 another instruction, but couldn't because the other instruction expanded
1506 into a sequence where putting the slot insn at the end wouldn't work. */
1509 print_slot (rtx insn
)
1511 final_scan_insn (XVECEXP (insn
, 0, 1), asm_out_file
, optimize
, 1, NULL
);
1513 INSN_DELETED_P (XVECEXP (insn
, 0, 1)) = 1;
1517 output_far_jump (rtx insn
, rtx op
)
1519 struct { rtx lab
, reg
, op
; } this;
1520 rtx braf_base_lab
= NULL_RTX
;
1523 int offset
= branch_dest (insn
) - INSN_ADDRESSES (INSN_UID (insn
));
1526 this.lab
= gen_label_rtx ();
1530 && offset
- get_attr_length (insn
) <= 32766)
1533 jump
= "mov.w %O0,%1; braf %1";
1541 jump
= "mov.l %O0,%1; braf %1";
1543 jump
= "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1546 jump
= "mov.l %O0,%1; jmp @%1";
1548 /* If we have a scratch register available, use it. */
1549 if (GET_CODE ((prev
= prev_nonnote_insn (insn
))) == INSN
1550 && INSN_CODE (prev
) == CODE_FOR_indirect_jump_scratch
)
1552 this.reg
= SET_DEST (XVECEXP (PATTERN (prev
), 0, 0));
1553 if (REGNO (this.reg
) == R0_REG
&& flag_pic
&& ! TARGET_SH2
)
1554 jump
= "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1555 output_asm_insn (jump
, &this.lab
);
1556 if (dbr_sequence_length ())
1557 print_slot (final_sequence
);
1559 output_asm_insn ("nop", 0);
1563 /* Output the delay slot insn first if any. */
1564 if (dbr_sequence_length ())
1565 print_slot (final_sequence
);
1567 this.reg
= gen_rtx_REG (SImode
, 13);
1568 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1569 Fortunately, MACL is fixed and call-clobbered, and we never
1570 need its value across jumps, so save r13 in it instead of in
1573 output_asm_insn ("lds r13, macl", 0);
1575 output_asm_insn ("mov.l r13,@-r15", 0);
1576 output_asm_insn (jump
, &this.lab
);
1578 output_asm_insn ("sts macl, r13", 0);
1580 output_asm_insn ("mov.l @r15+,r13", 0);
1582 if (far
&& flag_pic
&& TARGET_SH2
)
1584 braf_base_lab
= gen_label_rtx ();
1585 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
1586 CODE_LABEL_NUMBER (braf_base_lab
));
1589 output_asm_insn (".align 2", 0);
1590 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L", CODE_LABEL_NUMBER (this.lab
));
1592 if (far
&& flag_pic
)
1595 this.lab
= braf_base_lab
;
1596 output_asm_insn (".long %O2-%O0", &this.lab
);
1599 output_asm_insn (far
? ".long %O2" : ".word %O2-%O0", &this.lab
);
1603 /* Local label counter, used for constants in the pool and inside
1604 pattern branches. */
1606 static int lf
= 100;
1608 /* Output code for ordinary branches. */
1611 output_branch (int logic
, rtx insn
, rtx
*operands
)
1613 switch (get_attr_length (insn
))
1616 /* This can happen if filling the delay slot has caused a forward
1617 branch to exceed its range (we could reverse it, but only
1618 when we know we won't overextend other branches; this should
1619 best be handled by relaxation).
1620 It can also happen when other condbranches hoist delay slot insn
1621 from their destination, thus leading to code size increase.
1622 But the branch will still be in the range -4092..+4098 bytes. */
1627 /* The call to print_slot will clobber the operands. */
1628 rtx op0
= operands
[0];
1630 /* If the instruction in the delay slot is annulled (true), then
1631 there is no delay slot where we can put it now. The only safe
1632 place for it is after the label. final will do that by default. */
1635 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))
1636 && get_attr_length (XVECEXP (final_sequence
, 0, 1)))
1638 asm_fprintf (asm_out_file
, "\tb%s%ss\t%LLF%d\n", logic
? "f" : "t",
1639 ASSEMBLER_DIALECT
? "/" : ".", label
);
1640 print_slot (final_sequence
);
1643 asm_fprintf (asm_out_file
, "\tb%s\t%LLF%d\n", logic
? "f" : "t", label
);
1645 output_asm_insn ("bra\t%l0", &op0
);
1646 fprintf (asm_out_file
, "\tnop\n");
1647 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LF", label
);
1651 /* When relaxing, handle this like a short branch. The linker
1652 will fix it up if it still doesn't fit after relaxation. */
1654 return logic
? "bt%.\t%l0" : "bf%.\t%l0";
1656 /* These are for SH2e, in which we have to account for the
1657 extra nop because of the hardware bug in annulled branches. */
1663 gcc_assert (!final_sequence
1664 || !(INSN_ANNULLED_BRANCH_P
1665 (XVECEXP (final_sequence
, 0, 0))));
1666 asm_fprintf (asm_out_file
, "b%s%ss\t%LLF%d\n",
1668 ASSEMBLER_DIALECT
? "/" : ".", label
);
1669 fprintf (asm_out_file
, "\tnop\n");
1670 output_asm_insn ("bra\t%l0", operands
);
1671 fprintf (asm_out_file
, "\tnop\n");
1672 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LF", label
);
1676 /* When relaxing, fall through. */
1681 sprintf (buffer
, "b%s%ss\t%%l0",
1683 ASSEMBLER_DIALECT
? "/" : ".");
1684 output_asm_insn (buffer
, &operands
[0]);
1689 /* There should be no longer branches now - that would
1690 indicate that something has destroyed the branches set
1691 up in machine_dependent_reorg. */
1697 output_branchy_insn (enum rtx_code code
, const char *template,
1698 rtx insn
, rtx
*operands
)
1700 rtx next_insn
= NEXT_INSN (insn
);
1702 if (next_insn
&& GET_CODE (next_insn
) == JUMP_INSN
&& condjump_p (next_insn
))
1704 rtx src
= SET_SRC (PATTERN (next_insn
));
1705 if (GET_CODE (src
) == IF_THEN_ELSE
&& GET_CODE (XEXP (src
, 0)) != code
)
1707 /* Following branch not taken */
1708 operands
[9] = gen_label_rtx ();
1709 emit_label_after (operands
[9], next_insn
);
1710 INSN_ADDRESSES_NEW (operands
[9],
1711 INSN_ADDRESSES (INSN_UID (next_insn
))
1712 + get_attr_length (next_insn
));
1717 int offset
= (branch_dest (next_insn
)
1718 - INSN_ADDRESSES (INSN_UID (next_insn
)) + 4);
1719 if (offset
>= -252 && offset
<= 258)
1721 if (GET_CODE (src
) == IF_THEN_ELSE
)
1723 src
= XEXP (src
, 1);
1729 operands
[9] = gen_label_rtx ();
1730 emit_label_after (operands
[9], insn
);
1731 INSN_ADDRESSES_NEW (operands
[9],
1732 INSN_ADDRESSES (INSN_UID (insn
))
1733 + get_attr_length (insn
));
1738 output_ieee_ccmpeq (rtx insn
, rtx
*operands
)
1740 return output_branchy_insn (NE
, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1744 /* Output the start of the assembler file. */
1747 sh_file_start (void)
1749 default_file_start ();
1752 /* Declare the .directive section before it is used. */
1753 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file
);
1754 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file
);
1758 /* We need to show the text section with the proper
1759 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1760 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1761 will complain. We can teach GAS specifically about the
1762 default attributes for our choice of text section, but
1763 then we would have to change GAS again if/when we change
1764 the text section name. */
1765 fprintf (asm_out_file
, "%s\n", TEXT_SECTION_ASM_OP
);
1767 /* Switch to the data section so that the coffsem symbol
1768 isn't in the text section. */
1769 switch_to_section (data_section
);
1771 if (TARGET_LITTLE_ENDIAN
)
1772 fputs ("\t.little\n", asm_out_file
);
1776 if (TARGET_SHCOMPACT
)
1777 fputs ("\t.mode\tSHcompact\n", asm_out_file
);
1778 else if (TARGET_SHMEDIA
)
1779 fprintf (asm_out_file
, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1780 TARGET_SHMEDIA64
? 64 : 32);
1784 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1787 unspec_caller_rtx_p (rtx pat
)
1789 switch (GET_CODE (pat
))
1792 return unspec_caller_rtx_p (XEXP (pat
, 0));
1795 if (unspec_caller_rtx_p (XEXP (pat
, 0)))
1797 return unspec_caller_rtx_p (XEXP (pat
, 1));
1799 if (XINT (pat
, 1) == UNSPEC_CALLER
)
1808 /* Indicate that INSN cannot be duplicated. This is true for insn
1809 that generates a unique label. */
1812 sh_cannot_copy_insn_p (rtx insn
)
1816 if (!reload_completed
|| !flag_pic
)
1819 if (GET_CODE (insn
) != INSN
)
1821 if (asm_noperands (insn
) >= 0)
1824 pat
= PATTERN (insn
);
1825 if (GET_CODE (pat
) != SET
)
1827 pat
= SET_SRC (pat
);
1829 if (unspec_caller_rtx_p (pat
))
1835 /* Actual number of instructions used to make a shift by N. */
1836 static const char ashiftrt_insns
[] =
1837 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1839 /* Left shift and logical right shift are the same. */
1840 static const char shift_insns
[] =
1841 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1843 /* Individual shift amounts needed to get the above length sequences.
1844 One bit right shifts clobber the T bit, so when possible, put one bit
1845 shifts in the middle of the sequence, so the ends are eligible for
1846 branch delay slots. */
1847 static const short shift_amounts
[32][5] = {
1848 {0}, {1}, {2}, {2, 1},
1849 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1850 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1851 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1852 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1853 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1854 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1855 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1857 /* Likewise, but for shift amounts < 16, up to three highmost bits
1858 might be clobbered. This is typically used when combined with some
1859 kind of sign or zero extension. */
1861 static const char ext_shift_insns
[] =
1862 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1864 static const short ext_shift_amounts
[32][4] = {
1865 {0}, {1}, {2}, {2, 1},
1866 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1867 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1868 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1869 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1870 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1871 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1872 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1874 /* Assuming we have a value that has been sign-extended by at least one bit,
1875 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1876 to shift it by N without data loss, and quicker than by other means? */
1877 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1879 /* This is used in length attributes in sh.md to help compute the length
1880 of arbitrary constant shift instructions. */
1883 shift_insns_rtx (rtx insn
)
1885 rtx set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
1886 int shift_count
= INTVAL (XEXP (set_src
, 1));
1887 enum rtx_code shift_code
= GET_CODE (set_src
);
1892 return ashiftrt_insns
[shift_count
];
1895 return shift_insns
[shift_count
];
1901 /* Return the cost of a shift. */
1911 if (GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
1913 if (GET_MODE (x
) == DImode
1914 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1915 && INTVAL (XEXP (x
, 1)) == 1)
1918 /* Everything else is invalid, because there is no pattern for it. */
1921 /* If shift by a non constant, then this will be expensive. */
1922 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
1923 return SH_DYNAMIC_SHIFT_COST
;
1925 value
= INTVAL (XEXP (x
, 1));
1927 /* Otherwise, return the true cost in instructions. */
1928 if (GET_CODE (x
) == ASHIFTRT
)
1930 int cost
= ashiftrt_insns
[value
];
1931 /* If SH3, then we put the constant in a reg and use shad. */
1932 if (cost
> 1 + SH_DYNAMIC_SHIFT_COST
)
1933 cost
= 1 + SH_DYNAMIC_SHIFT_COST
;
1937 return shift_insns
[value
];
1940 /* Return the cost of an AND operation. */
1947 /* Anding with a register is a single cycle and instruction. */
1948 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
1951 i
= INTVAL (XEXP (x
, 1));
1955 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
1956 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x
, 1)))
1957 || CONST_OK_FOR_J16 (INTVAL (XEXP (x
, 1)))))
1960 return 1 + rtx_cost (XEXP (x
, 1), AND
);
1963 /* These constants are single cycle extu.[bw] instructions. */
1964 if (i
== 0xff || i
== 0xffff)
1966 /* Constants that can be used in an and immediate instruction in a single
1967 cycle, but this requires r0, so make it a little more expensive. */
1968 if (CONST_OK_FOR_K08 (i
))
1970 /* Constants that can be loaded with a mov immediate and an and.
1971 This case is probably unnecessary. */
1972 if (CONST_OK_FOR_I08 (i
))
1974 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1975 This case is probably unnecessary. */
1979 /* Return the cost of an addition or a subtraction. */
1984 /* Adding a register is a single cycle insn. */
1985 if (GET_CODE (XEXP (x
, 1)) == REG
1986 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1989 /* Likewise for small constants. */
1990 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
1991 && CONST_OK_FOR_ADD (INTVAL (XEXP (x
, 1))))
1995 switch (GET_CODE (XEXP (x
, 1)))
2000 return TARGET_SHMEDIA64
? 5 : 3;
2003 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x
, 1))))
2005 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x
, 1)) >> 16))
2007 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x
, 1)) >> 16) >> 16))
2015 /* Any other constant requires a 2 cycle pc-relative load plus an
2020 /* Return the cost of a multiply. */
2022 multcosts (rtx x ATTRIBUTE_UNUSED
)
2024 if (sh_multcost
>= 0)
2027 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2028 accept constants. Ideally, we would use a cost of one or two and
2029 add the cost of the operand, but disregard the latter when inside loops
2030 and loop invariant code motion is still to follow.
2031 Using a multiply first and splitting it later if it's a loss
2032 doesn't work because of different sign / zero extension semantics
2033 of multiplies vs. shifts. */
2034 return TARGET_SMALLCODE
? 2 : 3;
2038 /* We have a mul insn, so we can never take more than the mul and the
2039 read of the mac reg, but count more because of the latency and extra
2041 if (TARGET_SMALLCODE
)
2046 /* If we're aiming at small code, then just count the number of
2047 insns in a multiply call sequence. */
2048 if (TARGET_SMALLCODE
)
2051 /* Otherwise count all the insns in the routine we'd be calling too. */
2055 /* Compute a (partial) cost for rtx X. Return true if the complete
2056 cost has been computed, and false if subexpressions should be
2057 scanned. In either case, *TOTAL contains the cost result. */
2060 sh_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
2067 if (INTVAL (x
) == 0)
2069 else if (outer_code
== AND
&& and_operand ((x
), DImode
))
2071 else if ((outer_code
== IOR
|| outer_code
== XOR
2072 || outer_code
== PLUS
)
2073 && CONST_OK_FOR_I10 (INTVAL (x
)))
2075 else if (CONST_OK_FOR_I16 (INTVAL (x
)))
2076 *total
= COSTS_N_INSNS (outer_code
!= SET
);
2077 else if (CONST_OK_FOR_I16 (INTVAL (x
) >> 16))
2078 *total
= COSTS_N_INSNS ((outer_code
!= SET
) + 1);
2079 else if (CONST_OK_FOR_I16 ((INTVAL (x
) >> 16) >> 16))
2080 *total
= COSTS_N_INSNS ((outer_code
!= SET
) + 2);
2082 *total
= COSTS_N_INSNS ((outer_code
!= SET
) + 3);
2085 if (CONST_OK_FOR_I08 (INTVAL (x
)))
2087 else if ((outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
2088 && CONST_OK_FOR_K08 (INTVAL (x
)))
2097 if (TARGET_SHMEDIA64
)
2098 *total
= COSTS_N_INSNS (4);
2099 else if (TARGET_SHMEDIA32
)
2100 *total
= COSTS_N_INSNS (2);
2107 *total
= COSTS_N_INSNS (4);
2112 if (x
== CONST0_RTX (GET_MODE (x
)))
2114 else if (sh_1el_vec (x
, VOIDmode
))
2115 *total
= outer_code
!= SET
;
2116 if (sh_rep_vec (x
, VOIDmode
))
2117 *total
= ((GET_MODE_UNIT_SIZE (GET_MODE (x
)) + 3) / 4
2118 + (outer_code
!= SET
));
2119 *total
= COSTS_N_INSNS (3) + (outer_code
!= SET
);
2124 *total
= COSTS_N_INSNS (addsubcosts (x
));
2128 *total
= COSTS_N_INSNS (andcosts (x
));
2132 *total
= COSTS_N_INSNS (multcosts (x
));
2138 *total
= COSTS_N_INSNS (shiftcosts (x
));
2145 *total
= COSTS_N_INSNS (20);
2149 if (sh_1el_vec (x
, VOIDmode
))
2150 *total
= outer_code
!= SET
;
2151 if (sh_rep_vec (x
, VOIDmode
))
2152 *total
= ((GET_MODE_UNIT_SIZE (GET_MODE (x
)) + 3) / 4
2153 + (outer_code
!= SET
));
2154 *total
= COSTS_N_INSNS (3) + (outer_code
!= SET
);
2167 /* Compute the cost of an address. For the SH, all valid addresses are
2168 the same cost. Use a slightly higher cost for reg + reg addressing,
2169 since it increases pressure on r0. */
2172 sh_address_cost (rtx X
)
2174 return (GET_CODE (X
) == PLUS
2175 && ! CONSTANT_P (XEXP (X
, 1))
2176 && ! TARGET_SHMEDIA
? 1 : 0);
2179 /* Code to expand a shift. */
2182 gen_ashift (int type
, int n
, rtx reg
)
2184 /* Negative values here come from the shift_amounts array. */
2197 emit_insn (gen_ashrsi3_k (reg
, reg
, GEN_INT (n
)));
2201 emit_insn (gen_lshrsi3_m (reg
, reg
, GEN_INT (n
)));
2203 emit_insn (gen_lshrsi3_k (reg
, reg
, GEN_INT (n
)));
2206 emit_insn (gen_ashlsi3_std (reg
, reg
, GEN_INT (n
)));
2211 /* Same for HImode */
2214 gen_ashift_hi (int type
, int n
, rtx reg
)
2216 /* Negative values here come from the shift_amounts array. */
2230 /* We don't have HImode right shift operations because using the
2231 ordinary 32 bit shift instructions for that doesn't generate proper
2232 zero/sign extension.
2233 gen_ashift_hi is only called in contexts where we know that the
2234 sign extension works out correctly. */
2237 if (GET_CODE (reg
) == SUBREG
)
2239 offset
= SUBREG_BYTE (reg
);
2240 reg
= SUBREG_REG (reg
);
2242 gen_ashift (type
, n
, gen_rtx_SUBREG (SImode
, reg
, offset
));
2246 emit_insn (gen_ashlhi3_k (reg
, reg
, GEN_INT (n
)));
2251 /* Output RTL to split a constant shift into its component SH constant
2252 shift instructions. */
2255 gen_shifty_op (int code
, rtx
*operands
)
2257 int value
= INTVAL (operands
[2]);
2260 /* Truncate the shift count in case it is out of bounds. */
2261 value
= value
& 0x1f;
2265 if (code
== LSHIFTRT
)
2267 emit_insn (gen_rotlsi3_1 (operands
[0], operands
[0]));
2268 emit_insn (gen_movt (operands
[0]));
2271 else if (code
== ASHIFT
)
2273 /* There is a two instruction sequence for 31 bit left shifts,
2274 but it requires r0. */
2275 if (GET_CODE (operands
[0]) == REG
&& REGNO (operands
[0]) == 0)
2277 emit_insn (gen_andsi3 (operands
[0], operands
[0], const1_rtx
));
2278 emit_insn (gen_rotlsi3_31 (operands
[0], operands
[0]));
2283 else if (value
== 0)
2285 /* This can happen even when optimizing, if there were subregs before
2286 reload. Don't output a nop here, as this is never optimized away;
2287 use a no-op move instead. */
2288 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[0]));
2292 max
= shift_insns
[value
];
2293 for (i
= 0; i
< max
; i
++)
2294 gen_ashift (code
, shift_amounts
[value
][i
], operands
[0]);
2297 /* Same as above, but optimized for values where the topmost bits don't
2301 gen_shifty_hi_op (int code
, rtx
*operands
)
2303 int value
= INTVAL (operands
[2]);
2305 void (*gen_fun
) (int, int, rtx
);
2307 /* This operation is used by and_shl for SImode values with a few
2308 high bits known to be cleared. */
2312 emit_insn (gen_nop ());
2316 gen_fun
= GET_MODE (operands
[0]) == HImode
? gen_ashift_hi
: gen_ashift
;
2319 max
= ext_shift_insns
[value
];
2320 for (i
= 0; i
< max
; i
++)
2321 gen_fun (code
, ext_shift_amounts
[value
][i
], operands
[0]);
2324 /* When shifting right, emit the shifts in reverse order, so that
2325 solitary negative values come first. */
2326 for (i
= ext_shift_insns
[value
] - 1; i
>= 0; i
--)
2327 gen_fun (code
, ext_shift_amounts
[value
][i
], operands
[0]);
2330 /* Output RTL for an arithmetic right shift. */
2332 /* ??? Rewrite to use super-optimizer sequences. */
2335 expand_ashiftrt (rtx
*operands
)
2343 if (GET_CODE (operands
[2]) != CONST_INT
)
2345 rtx count
= copy_to_mode_reg (SImode
, operands
[2]);
2346 emit_insn (gen_negsi2 (count
, count
));
2347 emit_insn (gen_ashrsi3_d (operands
[0], operands
[1], count
));
2350 else if (ashiftrt_insns
[INTVAL (operands
[2]) & 31]
2351 > 1 + SH_DYNAMIC_SHIFT_COST
)
2354 = force_reg (SImode
, GEN_INT (- (INTVAL (operands
[2]) & 31)));
2355 emit_insn (gen_ashrsi3_d (operands
[0], operands
[1], count
));
2359 if (GET_CODE (operands
[2]) != CONST_INT
)
2362 value
= INTVAL (operands
[2]) & 31;
2366 /* If we are called from abs expansion, arrange things so that we
2367 we can use a single MT instruction that doesn't clobber the source,
2368 if LICM can hoist out the load of the constant zero. */
2369 if (currently_expanding_to_rtl
)
2371 emit_insn (gen_cmpgtsi_t (force_reg (SImode
, CONST0_RTX (SImode
)),
2373 emit_insn (gen_mov_neg_si_t (operands
[0]));
2376 emit_insn (gen_ashrsi2_31 (operands
[0], operands
[1]));
2379 else if (value
>= 16 && value
<= 19)
2381 wrk
= gen_reg_rtx (SImode
);
2382 emit_insn (gen_ashrsi2_16 (wrk
, operands
[1]));
2385 gen_ashift (ASHIFTRT
, 1, wrk
);
2386 emit_move_insn (operands
[0], wrk
);
2389 /* Expand a short sequence inline, longer call a magic routine. */
2390 else if (value
<= 5)
2392 wrk
= gen_reg_rtx (SImode
);
2393 emit_move_insn (wrk
, operands
[1]);
2395 gen_ashift (ASHIFTRT
, 1, wrk
);
2396 emit_move_insn (operands
[0], wrk
);
2400 wrk
= gen_reg_rtx (Pmode
);
2402 /* Load the value into an arg reg and call a helper. */
2403 emit_move_insn (gen_rtx_REG (SImode
, 4), operands
[1]);
2404 sprintf (func
, "__ashiftrt_r4_%d", value
);
2405 function_symbol (wrk
, func
, SFUNC_STATIC
);
2406 emit_insn (gen_ashrsi3_n (GEN_INT (value
), wrk
));
2407 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 4));
2412 sh_dynamicalize_shift_p (rtx count
)
2414 return shift_insns
[INTVAL (count
)] > 1 + SH_DYNAMIC_SHIFT_COST
;
2417 /* Try to find a good way to implement the combiner pattern
2418 [(set (match_operand:SI 0 "register_operand" "r")
2419 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2420 (match_operand:SI 2 "const_int_operand" "n"))
2421 (match_operand:SI 3 "const_int_operand" "n"))) .
2422 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2423 return 0 for simple right / left or left/right shift combination.
2424 return 1 for a combination of shifts with zero_extend.
2425 return 2 for a combination of shifts with an AND that needs r0.
2426 return 3 for a combination of shifts with an AND that needs an extra
2427 scratch register, when the three highmost bits of the AND mask are clear.
2428 return 4 for a combination of shifts with an AND that needs an extra
2429 scratch register, when any of the three highmost bits of the AND mask
2431 If ATTRP is set, store an initial right shift width in ATTRP[0],
2432 and the instruction length in ATTRP[1] . These values are not valid
2434 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2435 shift_amounts for the last shift value that is to be used before the
2438 shl_and_kind (rtx left_rtx
, rtx mask_rtx
, int *attrp
)
2440 unsigned HOST_WIDE_INT mask
, lsb
, mask2
, lsb2
;
2441 int left
= INTVAL (left_rtx
), right
;
2443 int cost
, best_cost
= 10000;
2444 int best_right
= 0, best_len
= 0;
2448 if (left
< 0 || left
> 31)
2450 if (GET_CODE (mask_rtx
) == CONST_INT
)
2451 mask
= (unsigned HOST_WIDE_INT
) INTVAL (mask_rtx
) >> left
;
2453 mask
= (unsigned HOST_WIDE_INT
) GET_MODE_MASK (SImode
) >> left
;
2454 /* Can this be expressed as a right shift / left shift pair? */
2455 lsb
= ((mask
^ (mask
- 1)) >> 1) + 1;
2456 right
= exact_log2 (lsb
);
2457 mask2
= ~(mask
+ lsb
- 1);
2458 lsb2
= ((mask2
^ (mask2
- 1)) >> 1) + 1;
2459 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2461 best_cost
= shift_insns
[right
] + shift_insns
[right
+ left
];
2462 /* mask has no trailing zeroes <==> ! right */
2463 else if (! right
&& mask2
== ~(lsb2
- 1))
2465 int late_right
= exact_log2 (lsb2
);
2466 best_cost
= shift_insns
[left
+ late_right
] + shift_insns
[late_right
];
2468 /* Try to use zero extend. */
2469 if (mask2
== ~(lsb2
- 1))
2473 for (width
= 8; width
<= 16; width
+= 8)
2475 /* Can we zero-extend right away? */
2476 if (lsb2
== (unsigned HOST_WIDE_INT
) 1 << width
)
2479 = 1 + ext_shift_insns
[right
] + ext_shift_insns
[left
+ right
];
2480 if (cost
< best_cost
)
2491 /* ??? Could try to put zero extend into initial right shift,
2492 or even shift a bit left before the right shift. */
2493 /* Determine value of first part of left shift, to get to the
2494 zero extend cut-off point. */
2495 first
= width
- exact_log2 (lsb2
) + right
;
2496 if (first
>= 0 && right
+ left
- first
>= 0)
2498 cost
= ext_shift_insns
[right
] + ext_shift_insns
[first
] + 1
2499 + ext_shift_insns
[right
+ left
- first
];
2500 if (cost
< best_cost
)
2512 /* Try to use r0 AND pattern */
2513 for (i
= 0; i
<= 2; i
++)
2517 if (! CONST_OK_FOR_K08 (mask
>> i
))
2519 cost
= (i
!= 0) + 2 + ext_shift_insns
[left
+ i
];
2520 if (cost
< best_cost
)
2525 best_len
= cost
- 1;
2528 /* Try to use a scratch register to hold the AND operand. */
2529 can_ext
= ((mask
<< left
) & ((unsigned HOST_WIDE_INT
) 3 << 30)) == 0;
2530 for (i
= 0; i
<= 2; i
++)
2534 cost
= (i
!= 0) + (CONST_OK_FOR_I08 (mask
>> i
) ? 2 : 3)
2535 + (can_ext
? ext_shift_insns
: shift_insns
)[left
+ i
];
2536 if (cost
< best_cost
)
2541 best_len
= cost
- 1 - ! CONST_OK_FOR_I08 (mask
>> i
);
2547 attrp
[0] = best_right
;
2548 attrp
[1] = best_len
;
2553 /* This is used in length attributes of the unnamed instructions
2554 corresponding to shl_and_kind return values of 1 and 2. */
2556 shl_and_length (rtx insn
)
2558 rtx set_src
, left_rtx
, mask_rtx
;
2561 set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2562 left_rtx
= XEXP (XEXP (set_src
, 0), 1);
2563 mask_rtx
= XEXP (set_src
, 1);
2564 shl_and_kind (left_rtx
, mask_rtx
, attributes
);
2565 return attributes
[1];
2568 /* This is used in length attribute of the and_shl_scratch instruction. */
2571 shl_and_scr_length (rtx insn
)
2573 rtx set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2574 int len
= shift_insns
[INTVAL (XEXP (set_src
, 1))];
2575 rtx op
= XEXP (set_src
, 0);
2576 len
+= shift_insns
[INTVAL (XEXP (op
, 1))] + 1;
2577 op
= XEXP (XEXP (op
, 0), 0);
2578 return len
+ shift_insns
[INTVAL (XEXP (op
, 1))];
2581 /* Generate rtl for instructions for which shl_and_kind advised a particular
2582 method of generating them, i.e. returned zero. */
2585 gen_shl_and (rtx dest
, rtx left_rtx
, rtx mask_rtx
, rtx source
)
2588 unsigned HOST_WIDE_INT mask
;
2589 int kind
= shl_and_kind (left_rtx
, mask_rtx
, attributes
);
2590 int right
, total_shift
;
2591 void (*shift_gen_fun
) (int, rtx
*) = gen_shifty_hi_op
;
2593 right
= attributes
[0];
2594 total_shift
= INTVAL (left_rtx
) + right
;
2595 mask
= (unsigned HOST_WIDE_INT
) INTVAL (mask_rtx
) >> total_shift
;
2602 int first
= attributes
[2];
2607 emit_insn ((mask
<< right
) <= 0xff
2608 ? gen_zero_extendqisi2 (dest
,
2609 gen_lowpart (QImode
, source
))
2610 : gen_zero_extendhisi2 (dest
,
2611 gen_lowpart (HImode
, source
)));
2615 emit_insn (gen_movsi (dest
, source
));
2619 operands
[2] = GEN_INT (right
);
2620 gen_shifty_hi_op (LSHIFTRT
, operands
);
2624 operands
[2] = GEN_INT (first
);
2625 gen_shifty_hi_op (ASHIFT
, operands
);
2626 total_shift
-= first
;
2630 emit_insn (mask
<= 0xff
2631 ? gen_zero_extendqisi2 (dest
, gen_lowpart (QImode
, dest
))
2632 : gen_zero_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2633 if (total_shift
> 0)
2635 operands
[2] = GEN_INT (total_shift
);
2636 gen_shifty_hi_op (ASHIFT
, operands
);
2641 shift_gen_fun
= gen_shifty_op
;
2643 /* If the topmost bit that matters is set, set the topmost bits
2644 that don't matter. This way, we might be able to get a shorter
2646 if (mask
& ((HOST_WIDE_INT
) 1 << (31 - total_shift
)))
2647 mask
|= (HOST_WIDE_INT
) ~0 << (31 - total_shift
);
2649 /* Don't expand fine-grained when combining, because that will
2650 make the pattern fail. */
2651 if (currently_expanding_to_rtl
2652 || reload_in_progress
|| reload_completed
)
2656 /* Cases 3 and 4 should be handled by this split
2657 only while combining */
2658 gcc_assert (kind
<= 2);
2661 emit_insn (gen_lshrsi3 (dest
, source
, GEN_INT (right
)));
2664 emit_insn (gen_andsi3 (dest
, source
, GEN_INT (mask
)));
2669 operands
[2] = GEN_INT (total_shift
);
2670 shift_gen_fun (ASHIFT
, operands
);
2677 if (kind
!= 4 && total_shift
< 16)
2679 neg
= -ext_shift_amounts
[total_shift
][1];
2681 neg
-= ext_shift_amounts
[total_shift
][2];
2685 emit_insn (gen_and_shl_scratch (dest
, source
,
2688 GEN_INT (total_shift
+ neg
),
2690 emit_insn (gen_movsi (dest
, dest
));
2697 /* Try to find a good way to implement the combiner pattern
2698 [(set (match_operand:SI 0 "register_operand" "=r")
2699 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2700 (match_operand:SI 2 "const_int_operand" "n")
2701 (match_operand:SI 3 "const_int_operand" "n")
2703 (clobber (reg:SI T_REG))]
2704 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2705 return 0 for simple left / right shift combination.
2706 return 1 for left shift / 8 bit sign extend / left shift.
2707 return 2 for left shift / 16 bit sign extend / left shift.
2708 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2709 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2710 return 5 for left shift / 16 bit sign extend / right shift
2711 return 6 for < 8 bit sign extend / left shift.
2712 return 7 for < 8 bit sign extend / left shift / single right shift.
2713 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2716 shl_sext_kind (rtx left_rtx
, rtx size_rtx
, int *costp
)
2718 int left
, size
, insize
, ext
;
2719 int cost
= 0, best_cost
;
2722 left
= INTVAL (left_rtx
);
2723 size
= INTVAL (size_rtx
);
2724 insize
= size
- left
;
2725 gcc_assert (insize
> 0);
2726 /* Default to left / right shift. */
2728 best_cost
= shift_insns
[32 - insize
] + ashiftrt_insns
[32 - size
];
2731 /* 16 bit shift / sign extend / 16 bit shift */
2732 cost
= shift_insns
[16 - insize
] + 1 + ashiftrt_insns
[16 - size
];
2733 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2734 below, by alternative 3 or something even better. */
2735 if (cost
< best_cost
)
2741 /* Try a plain sign extend between two shifts. */
2742 for (ext
= 16; ext
>= insize
; ext
-= 8)
2746 cost
= ext_shift_insns
[ext
- insize
] + 1 + shift_insns
[size
- ext
];
2747 if (cost
< best_cost
)
2749 kind
= ext
/ (unsigned) 8;
2753 /* Check if we can do a sloppy shift with a final signed shift
2754 restoring the sign. */
2755 if (EXT_SHIFT_SIGNED (size
- ext
))
2756 cost
= ext_shift_insns
[ext
- insize
] + ext_shift_insns
[size
- ext
] + 1;
2757 /* If not, maybe it's still cheaper to do the second shift sloppy,
2758 and do a final sign extend? */
2759 else if (size
<= 16)
2760 cost
= ext_shift_insns
[ext
- insize
] + 1
2761 + ext_shift_insns
[size
> ext
? size
- ext
: ext
- size
] + 1;
2764 if (cost
< best_cost
)
2766 kind
= ext
/ (unsigned) 8 + 2;
2770 /* Check if we can sign extend in r0 */
2773 cost
= 3 + shift_insns
[left
];
2774 if (cost
< best_cost
)
2779 /* Try the same with a final signed shift. */
2782 cost
= 3 + ext_shift_insns
[left
+ 1] + 1;
2783 if (cost
< best_cost
)
2792 /* Try to use a dynamic shift. */
2793 cost
= shift_insns
[32 - insize
] + 1 + SH_DYNAMIC_SHIFT_COST
;
2794 if (cost
< best_cost
)
2805 /* Function to be used in the length attribute of the instructions
2806 implementing this pattern. */
2809 shl_sext_length (rtx insn
)
2811 rtx set_src
, left_rtx
, size_rtx
;
2814 set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2815 left_rtx
= XEXP (XEXP (set_src
, 0), 1);
2816 size_rtx
= XEXP (set_src
, 1);
2817 shl_sext_kind (left_rtx
, size_rtx
, &cost
);
2821 /* Generate rtl for this pattern */
2824 gen_shl_sext (rtx dest
, rtx left_rtx
, rtx size_rtx
, rtx source
)
2827 int left
, size
, insize
, cost
;
2830 kind
= shl_sext_kind (left_rtx
, size_rtx
, &cost
);
2831 left
= INTVAL (left_rtx
);
2832 size
= INTVAL (size_rtx
);
2833 insize
= size
- left
;
2841 int ext
= kind
& 1 ? 8 : 16;
2842 int shift2
= size
- ext
;
2844 /* Don't expand fine-grained when combining, because that will
2845 make the pattern fail. */
2846 if (! currently_expanding_to_rtl
2847 && ! reload_in_progress
&& ! reload_completed
)
2849 emit_insn (gen_shl_sext_ext (dest
, source
, left_rtx
, size_rtx
));
2850 emit_insn (gen_movsi (dest
, source
));
2854 emit_insn (gen_movsi (dest
, source
));
2858 operands
[2] = GEN_INT (ext
- insize
);
2859 gen_shifty_hi_op (ASHIFT
, operands
);
2862 ? gen_extendqisi2 (dest
, gen_lowpart (QImode
, dest
))
2863 : gen_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2868 operands
[2] = GEN_INT (shift2
);
2869 gen_shifty_op (ASHIFT
, operands
);
2876 if (EXT_SHIFT_SIGNED (shift2
))
2878 operands
[2] = GEN_INT (shift2
+ 1);
2879 gen_shifty_op (ASHIFT
, operands
);
2880 operands
[2] = const1_rtx
;
2881 gen_shifty_op (ASHIFTRT
, operands
);
2884 operands
[2] = GEN_INT (shift2
);
2885 gen_shifty_hi_op (ASHIFT
, operands
);
2889 operands
[2] = GEN_INT (-shift2
);
2890 gen_shifty_hi_op (LSHIFTRT
, operands
);
2892 emit_insn (size
<= 8
2893 ? gen_extendqisi2 (dest
, gen_lowpart (QImode
, dest
))
2894 : gen_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2901 if (! currently_expanding_to_rtl
2902 && ! reload_in_progress
&& ! reload_completed
)
2903 emit_insn (gen_shl_sext_ext (dest
, source
, left_rtx
, size_rtx
));
2907 operands
[2] = GEN_INT (16 - insize
);
2908 gen_shifty_hi_op (ASHIFT
, operands
);
2909 emit_insn (gen_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2911 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2913 gen_ashift (ASHIFTRT
, 1, dest
);
2918 /* Don't expand fine-grained when combining, because that will
2919 make the pattern fail. */
2920 if (! currently_expanding_to_rtl
2921 && ! reload_in_progress
&& ! reload_completed
)
2923 emit_insn (gen_shl_sext_ext (dest
, source
, left_rtx
, size_rtx
));
2924 emit_insn (gen_movsi (dest
, source
));
2927 emit_insn (gen_andsi3 (dest
, source
, GEN_INT ((1 << insize
) - 1)));
2928 emit_insn (gen_xorsi3 (dest
, dest
, GEN_INT (1 << (insize
- 1))));
2929 emit_insn (gen_addsi3 (dest
, dest
, GEN_INT (-1 << (insize
- 1))));
2931 operands
[2] = kind
== 7 ? GEN_INT (left
+ 1) : left_rtx
;
2932 gen_shifty_op (ASHIFT
, operands
);
2934 emit_insn (gen_ashrsi3_k (dest
, dest
, const1_rtx
));
2942 /* Prefix a symbol_ref name with "datalabel". */
2945 gen_datalabel_ref (rtx sym
)
2949 if (GET_CODE (sym
) == LABEL_REF
)
2950 return gen_rtx_CONST (GET_MODE (sym
),
2951 gen_rtx_UNSPEC (GET_MODE (sym
),
2955 gcc_assert (GET_CODE (sym
) == SYMBOL_REF
);
2957 str
= XSTR (sym
, 0);
2958 /* Share all SYMBOL_REF strings with the same value - that is important
2960 str
= IDENTIFIER_POINTER (get_identifier (str
));
2961 XSTR (sym
, 0) = str
;
2967 /* The SH cannot load a large constant into a register, constants have to
2968 come from a pc relative load. The reference of a pc relative load
2969 instruction must be less than 1k in front of the instruction. This
2970 means that we often have to dump a constant inside a function, and
2971 generate code to branch around it.
2973 It is important to minimize this, since the branches will slow things
2974 down and make things bigger.
2976 Worst case code looks like:
2994 We fix this by performing a scan before scheduling, which notices which
2995 instructions need to have their operands fetched from the constant table
2996 and builds the table.
3000 scan, find an instruction which needs a pcrel move. Look forward, find the
3001 last barrier which is within MAX_COUNT bytes of the requirement.
3002 If there isn't one, make one. Process all the instructions between
3003 the find and the barrier.
3005 In the above example, we can tell that L3 is within 1k of L1, so
3006 the first move can be shrunk from the 3 insn+constant sequence into
3007 just 1 insn, and the constant moved to L3 to make:
3018 Then the second move becomes the target for the shortening process. */
3022 rtx value
; /* Value in table. */
3023 rtx label
; /* Label of value. */
3024 rtx wend
; /* End of window. */
3025 enum machine_mode mode
; /* Mode of value. */
3027 /* True if this constant is accessed as part of a post-increment
3028 sequence. Note that HImode constants are never accessed in this way. */
3029 bool part_of_sequence_p
;
3032 /* The maximum number of constants that can fit into one pool, since
3033 constants in the range 0..510 are at least 2 bytes long, and in the
3034 range from there to 1018 at least 4 bytes. */
3036 #define MAX_POOL_SIZE 372
3037 static pool_node pool_vector
[MAX_POOL_SIZE
];
3038 static int pool_size
;
3039 static rtx pool_window_label
;
3040 static int pool_window_last
;
3042 /* ??? If we need a constant in HImode which is the truncated value of a
3043 constant we need in SImode, we could combine the two entries thus saving
3044 two bytes. Is this common enough to be worth the effort of implementing
3047 /* ??? This stuff should be done at the same time that we shorten branches.
3048 As it is now, we must assume that all branches are the maximum size, and
3049 this causes us to almost always output constant pools sooner than
3052 /* Add a constant to the pool and return its label. */
3055 add_constant (rtx x
, enum machine_mode mode
, rtx last_value
)
3058 rtx lab
, new, ref
, newref
;
3060 /* First see if we've already got it. */
3061 for (i
= 0; i
< pool_size
; i
++)
3063 if (x
->code
== pool_vector
[i
].value
->code
3064 && mode
== pool_vector
[i
].mode
)
3066 if (x
->code
== CODE_LABEL
)
3068 if (XINT (x
, 3) != XINT (pool_vector
[i
].value
, 3))
3071 if (rtx_equal_p (x
, pool_vector
[i
].value
))
3076 || ! rtx_equal_p (last_value
, pool_vector
[i
-1].value
))
3078 new = gen_label_rtx ();
3079 LABEL_REFS (new) = pool_vector
[i
].label
;
3080 pool_vector
[i
].label
= lab
= new;
3082 if (lab
&& pool_window_label
)
3084 newref
= gen_rtx_LABEL_REF (VOIDmode
, pool_window_label
);
3085 ref
= pool_vector
[pool_window_last
].wend
;
3086 LABEL_NEXTREF (newref
) = ref
;
3087 pool_vector
[pool_window_last
].wend
= newref
;
3090 pool_window_label
= new;
3091 pool_window_last
= i
;
3097 /* Need a new one. */
3098 pool_vector
[pool_size
].value
= x
;
3099 if (last_value
&& rtx_equal_p (last_value
, pool_vector
[pool_size
- 1].value
))
3102 pool_vector
[pool_size
- 1].part_of_sequence_p
= true;
3105 lab
= gen_label_rtx ();
3106 pool_vector
[pool_size
].mode
= mode
;
3107 pool_vector
[pool_size
].label
= lab
;
3108 pool_vector
[pool_size
].wend
= NULL_RTX
;
3109 pool_vector
[pool_size
].part_of_sequence_p
= (lab
== 0);
3110 if (lab
&& pool_window_label
)
3112 newref
= gen_rtx_LABEL_REF (VOIDmode
, pool_window_label
);
3113 ref
= pool_vector
[pool_window_last
].wend
;
3114 LABEL_NEXTREF (newref
) = ref
;
3115 pool_vector
[pool_window_last
].wend
= newref
;
3118 pool_window_label
= lab
;
3119 pool_window_last
= pool_size
;
3124 /* Output the literal table. START, if nonzero, is the first instruction
3125 this table is needed for, and also indicates that there is at least one
3126 casesi_worker_2 instruction; We have to emit the operand3 labels from
3127 these insns at a 4-byte aligned position. BARRIER is the barrier
3128 after which we are to place the table. */
3131 dump_table (rtx start
, rtx barrier
)
3139 /* Do two passes, first time dump out the HI sized constants. */
3141 for (i
= 0; i
< pool_size
; i
++)
3143 pool_node
*p
= &pool_vector
[i
];
3145 if (p
->mode
== HImode
)
3149 scan
= emit_insn_after (gen_align_2 (), scan
);
3152 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3153 scan
= emit_label_after (lab
, scan
);
3154 scan
= emit_insn_after (gen_consttable_2 (p
->value
, const0_rtx
),
3156 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3158 lab
= XEXP (ref
, 0);
3159 scan
= emit_insn_after (gen_consttable_window_end (lab
), scan
);
3162 else if (p
->mode
== DFmode
)
3170 scan
= emit_insn_after (gen_align_4 (), scan
);
3172 for (; start
!= barrier
; start
= NEXT_INSN (start
))
3173 if (GET_CODE (start
) == INSN
3174 && recog_memoized (start
) == CODE_FOR_casesi_worker_2
)
3176 rtx src
= SET_SRC (XVECEXP (PATTERN (start
), 0, 0));
3177 rtx lab
= XEXP (XVECEXP (src
, 0, 3), 0);
3179 scan
= emit_label_after (lab
, scan
);
3182 if (TARGET_FMOVD
&& TARGET_ALIGN_DOUBLE
&& have_df
)
3184 rtx align_insn
= NULL_RTX
;
3186 scan
= emit_label_after (gen_label_rtx (), scan
);
3187 scan
= emit_insn_after (gen_align_log (GEN_INT (3)), scan
);
3190 for (i
= 0; i
< pool_size
; i
++)
3192 pool_node
*p
= &pool_vector
[i
];
3200 if (align_insn
&& !p
->part_of_sequence_p
)
3202 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3203 emit_label_before (lab
, align_insn
);
3204 emit_insn_before (gen_consttable_4 (p
->value
, const0_rtx
),
3206 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3208 lab
= XEXP (ref
, 0);
3209 emit_insn_before (gen_consttable_window_end (lab
),
3212 delete_insn (align_insn
);
3213 align_insn
= NULL_RTX
;
3218 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3219 scan
= emit_label_after (lab
, scan
);
3220 scan
= emit_insn_after (gen_consttable_4 (p
->value
,
3222 need_align
= ! need_align
;
3228 scan
= emit_insn_after (gen_align_log (GEN_INT (3)), scan
);
3233 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3234 scan
= emit_label_after (lab
, scan
);
3235 scan
= emit_insn_after (gen_consttable_8 (p
->value
, const0_rtx
),
3242 if (p
->mode
!= HImode
)
3244 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3246 lab
= XEXP (ref
, 0);
3247 scan
= emit_insn_after (gen_consttable_window_end (lab
),
3256 for (i
= 0; i
< pool_size
; i
++)
3258 pool_node
*p
= &pool_vector
[i
];
3269 scan
= emit_label_after (gen_label_rtx (), scan
);
3270 scan
= emit_insn_after (gen_align_4 (), scan
);
3272 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3273 scan
= emit_label_after (lab
, scan
);
3274 scan
= emit_insn_after (gen_consttable_4 (p
->value
, const0_rtx
),
3282 scan
= emit_label_after (gen_label_rtx (), scan
);
3283 scan
= emit_insn_after (gen_align_4 (), scan
);
3285 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3286 scan
= emit_label_after (lab
, scan
);
3287 scan
= emit_insn_after (gen_consttable_8 (p
->value
, const0_rtx
),
3294 if (p
->mode
!= HImode
)
3296 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3298 lab
= XEXP (ref
, 0);
3299 scan
= emit_insn_after (gen_consttable_window_end (lab
), scan
);
3304 scan
= emit_insn_after (gen_consttable_end (), scan
);
3305 scan
= emit_barrier_after (scan
);
3307 pool_window_label
= NULL_RTX
;
3308 pool_window_last
= 0;
3311 /* Return nonzero if constant would be an ok source for a
3312 mov.w instead of a mov.l. */
3317 return (GET_CODE (src
) == CONST_INT
3318 && INTVAL (src
) >= -32768
3319 && INTVAL (src
) <= 32767);
3322 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3324 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3325 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3326 need to fix it if the input value is CONST_OK_FOR_I08. */
3329 broken_move (rtx insn
)
3331 if (GET_CODE (insn
) == INSN
)
3333 rtx pat
= PATTERN (insn
);
3334 if (GET_CODE (pat
) == PARALLEL
)
3335 pat
= XVECEXP (pat
, 0, 0);
3336 if (GET_CODE (pat
) == SET
3337 /* We can load any 8 bit value if we don't care what the high
3338 order bits end up as. */
3339 && GET_MODE (SET_DEST (pat
)) != QImode
3340 && (CONSTANT_P (SET_SRC (pat
))
3341 /* Match mova_const. */
3342 || (GET_CODE (SET_SRC (pat
)) == UNSPEC
3343 && XINT (SET_SRC (pat
), 1) == UNSPEC_MOVA
3344 && GET_CODE (XVECEXP (SET_SRC (pat
), 0, 0)) == CONST
))
3346 && GET_CODE (SET_SRC (pat
)) == CONST_DOUBLE
3347 && (fp_zero_operand (SET_SRC (pat
))
3348 || fp_one_operand (SET_SRC (pat
)))
3349 /* ??? If this is a -m4 or -m4-single compilation, in general
3350 we don't know the current setting of fpscr, so disable fldi.
3351 There is an exception if this was a register-register move
3352 before reload - and hence it was ascertained that we have
3353 single precision setting - and in a post-reload optimization
3354 we changed this to do a constant load. In that case
3355 we don't have an r0 clobber, hence we must use fldi. */
3356 && (! TARGET_SH4
|| TARGET_FMOVD
3357 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn
), 0, 2), 0))
3359 && GET_CODE (SET_DEST (pat
)) == REG
3360 && FP_REGISTER_P (REGNO (SET_DEST (pat
))))
3362 && GET_MODE (SET_DEST (pat
)) == SImode
3363 && GET_CODE (SET_SRC (pat
)) == CONST_INT
3364 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat
))))
3365 && (GET_CODE (SET_SRC (pat
)) != CONST_INT
3366 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat
)))))
3376 return (GET_CODE (insn
) == INSN
3377 && GET_CODE (PATTERN (insn
)) == SET
3378 && GET_CODE (SET_SRC (PATTERN (insn
))) == UNSPEC
3379 && XINT (SET_SRC (PATTERN (insn
)), 1) == UNSPEC_MOVA
3380 /* Don't match mova_const. */
3381 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn
)), 0, 0)) == LABEL_REF
);
3384 /* Fix up a mova from a switch that went out of range. */
3386 fixup_mova (rtx mova
)
3390 SET_SRC (PATTERN (mova
)) = XVECEXP (SET_SRC (PATTERN (mova
)), 0, 0);
3391 INSN_CODE (mova
) = -1;
3396 rtx lab
= gen_label_rtx ();
3397 rtx wpat
, wpat0
, wpat1
, wsrc
, diff
;
3401 worker
= NEXT_INSN (worker
);
3403 && GET_CODE (worker
) != CODE_LABEL
3404 && GET_CODE (worker
) != JUMP_INSN
);
3405 } while (GET_CODE (worker
) == NOTE
3406 || recog_memoized (worker
) != CODE_FOR_casesi_worker_1
);
3407 wpat
= PATTERN (worker
);
3408 wpat0
= XVECEXP (wpat
, 0, 0);
3409 wpat1
= XVECEXP (wpat
, 0, 1);
3410 wsrc
= SET_SRC (wpat0
);
3411 PATTERN (worker
) = (gen_casesi_worker_2
3412 (SET_DEST (wpat0
), XVECEXP (wsrc
, 0, 1),
3413 XEXP (XVECEXP (wsrc
, 0, 2), 0), lab
,
3415 INSN_CODE (worker
) = -1;
3416 diff
= gen_rtx_MINUS (Pmode
, XVECEXP (SET_SRC (PATTERN (mova
)), 0, 0),
3417 gen_rtx_LABEL_REF (Pmode
, lab
));
3418 diff
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, diff
), UNSPEC_PIC
);
3419 SET_SRC (PATTERN (mova
)) = gen_rtx_CONST (Pmode
, diff
);
3420 INSN_CODE (mova
) = -1;
3424 /* Find the last barrier from insn FROM which is close enough to hold the
3425 constant pool. If we can't find one, then create one near the end of
3429 find_barrier (int num_mova
, rtx mova
, rtx from
)
3438 int leading_mova
= num_mova
;
3439 rtx barrier_before_mova
= 0, found_barrier
= 0, good_barrier
= 0;
3443 /* For HImode: range is 510, add 4 because pc counts from address of
3444 second instruction after this one, subtract 2 for the jump instruction
3445 that we may need to emit before the table, subtract 2 for the instruction
3446 that fills the jump delay slot (in very rare cases, reorg will take an
3447 instruction from after the constant pool or will leave the delay slot
3448 empty). This gives 510.
3449 For SImode: range is 1020, add 4 because pc counts from address of
3450 second instruction after this one, subtract 2 in case pc is 2 byte
3451 aligned, subtract 2 for the jump instruction that we may need to emit
3452 before the table, subtract 2 for the instruction that fills the jump
3453 delay slot. This gives 1018. */
3455 /* The branch will always be shortened now that the reference address for
3456 forward branches is the successor address, thus we need no longer make
3457 adjustments to the [sh]i_limit for -O0. */
3462 while (from
&& count_si
< si_limit
&& count_hi
< hi_limit
)
3464 int inc
= get_attr_length (from
);
3467 if (GET_CODE (from
) == CODE_LABEL
)
3470 new_align
= 1 << label_to_alignment (from
);
3471 else if (GET_CODE (prev_nonnote_insn (from
)) == BARRIER
)
3472 new_align
= 1 << barrier_align (from
);
3478 if (GET_CODE (from
) == BARRIER
)
3481 found_barrier
= from
;
3483 /* If we are at the end of the function, or in front of an alignment
3484 instruction, we need not insert an extra alignment. We prefer
3485 this kind of barrier. */
3486 if (barrier_align (from
) > 2)
3487 good_barrier
= from
;
3490 if (broken_move (from
))
3493 enum machine_mode mode
;
3495 pat
= PATTERN (from
);
3496 if (GET_CODE (pat
) == PARALLEL
)
3497 pat
= XVECEXP (pat
, 0, 0);
3498 src
= SET_SRC (pat
);
3499 dst
= SET_DEST (pat
);
3500 mode
= GET_MODE (dst
);
3502 /* We must explicitly check the mode, because sometimes the
3503 front end will generate code to load unsigned constants into
3504 HImode targets without properly sign extending them. */
3506 || (mode
== SImode
&& hi_const (src
) && REGNO (dst
) != FPUL_REG
))
3509 /* We put the short constants before the long constants, so
3510 we must count the length of short constants in the range
3511 for the long constants. */
3512 /* ??? This isn't optimal, but is easy to do. */
3517 /* We dump DF/DI constants before SF/SI ones, because
3518 the limit is the same, but the alignment requirements
3519 are higher. We may waste up to 4 additional bytes
3520 for alignment, and the DF/DI constant may have
3521 another SF/SI constant placed before it. */
3522 if (TARGET_SHCOMPACT
3524 && (mode
== DFmode
|| mode
== DImode
))
3529 while (si_align
> 2 && found_si
+ si_align
- 2 > count_si
)
3531 if (found_si
> count_si
)
3532 count_si
= found_si
;
3533 found_si
+= GET_MODE_SIZE (mode
);
3535 si_limit
-= GET_MODE_SIZE (mode
);
3545 barrier_before_mova
= good_barrier
? good_barrier
: found_barrier
;
3547 if (found_si
> count_si
)
3548 count_si
= found_si
;
3550 else if (GET_CODE (from
) == JUMP_INSN
3551 && (GET_CODE (PATTERN (from
)) == ADDR_VEC
3552 || GET_CODE (PATTERN (from
)) == ADDR_DIFF_VEC
))
3556 if (barrier_align (next_real_insn (from
)) == align_jumps_log
)
3558 /* We have just passed the barrier in front of the
3559 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3560 the ADDR_DIFF_VEC is accessed as data, just like our pool
3561 constants, this is a good opportunity to accommodate what
3562 we have gathered so far.
3563 If we waited any longer, we could end up at a barrier in
3564 front of code, which gives worse cache usage for separated
3565 instruction / data caches. */
3566 good_barrier
= found_barrier
;
3571 rtx body
= PATTERN (from
);
3572 inc
= XVECLEN (body
, 1) * GET_MODE_SIZE (GET_MODE (body
));
3575 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3576 else if (GET_CODE (from
) == JUMP_INSN
3578 && ! TARGET_SMALLCODE
)
3584 if (new_align
> si_align
)
3586 si_limit
-= (count_si
- 1) & (new_align
- si_align
);
3587 si_align
= new_align
;
3589 count_si
= (count_si
+ new_align
- 1) & -new_align
;
3594 if (new_align
> hi_align
)
3596 hi_limit
-= (count_hi
- 1) & (new_align
- hi_align
);
3597 hi_align
= new_align
;
3599 count_hi
= (count_hi
+ new_align
- 1) & -new_align
;
3601 from
= NEXT_INSN (from
);
3608 /* Try as we might, the leading mova is out of range. Change
3609 it into a load (which will become a pcload) and retry. */
3611 return find_barrier (0, 0, mova
);
3615 /* Insert the constant pool table before the mova instruction,
3616 to prevent the mova label reference from going out of range. */
3618 good_barrier
= found_barrier
= barrier_before_mova
;
3624 if (good_barrier
&& next_real_insn (found_barrier
))
3625 found_barrier
= good_barrier
;
3629 /* We didn't find a barrier in time to dump our stuff,
3630 so we'll make one. */
3631 rtx label
= gen_label_rtx ();
3633 /* If we exceeded the range, then we must back up over the last
3634 instruction we looked at. Otherwise, we just need to undo the
3635 NEXT_INSN at the end of the loop. */
3636 if (count_hi
> hi_limit
|| count_si
> si_limit
)
3637 from
= PREV_INSN (PREV_INSN (from
));
3639 from
= PREV_INSN (from
);
3641 /* Walk back to be just before any jump or label.
3642 Putting it before a label reduces the number of times the branch
3643 around the constant pool table will be hit. Putting it before
3644 a jump makes it more likely that the bra delay slot will be
3646 while (GET_CODE (from
) == JUMP_INSN
|| GET_CODE (from
) == NOTE
3647 || GET_CODE (from
) == CODE_LABEL
)
3648 from
= PREV_INSN (from
);
3650 from
= emit_jump_insn_after (gen_jump (label
), from
);
3651 JUMP_LABEL (from
) = label
;
3652 LABEL_NUSES (label
) = 1;
3653 found_barrier
= emit_barrier_after (from
);
3654 emit_label_after (label
, found_barrier
);
3657 return found_barrier
;
3660 /* If the instruction INSN is implemented by a special function, and we can
3661 positively find the register that is used to call the sfunc, and this
3662 register is not used anywhere else in this instruction - except as the
3663 destination of a set, return this register; else, return 0. */
3665 sfunc_uses_reg (rtx insn
)
3668 rtx pattern
, part
, reg_part
, reg
;
3670 if (GET_CODE (insn
) != INSN
)
3672 pattern
= PATTERN (insn
);
3673 if (GET_CODE (pattern
) != PARALLEL
|| get_attr_type (insn
) != TYPE_SFUNC
)
3676 for (reg_part
= 0, i
= XVECLEN (pattern
, 0) - 1; i
>= 1; i
--)
3678 part
= XVECEXP (pattern
, 0, i
);
3679 if (GET_CODE (part
) == USE
&& GET_MODE (XEXP (part
, 0)) == SImode
)
3684 reg
= XEXP (reg_part
, 0);
3685 for (i
= XVECLEN (pattern
, 0) - 1; i
>= 0; i
--)
3687 part
= XVECEXP (pattern
, 0, i
);
3688 if (part
== reg_part
|| GET_CODE (part
) == CLOBBER
)
3690 if (reg_mentioned_p (reg
, ((GET_CODE (part
) == SET
3691 && GET_CODE (SET_DEST (part
)) == REG
)
3692 ? SET_SRC (part
) : part
)))
3698 /* See if the only way in which INSN uses REG is by calling it, or by
3699 setting it while calling it. Set *SET to a SET rtx if the register
3703 noncall_uses_reg (rtx reg
, rtx insn
, rtx
*set
)
3709 reg2
= sfunc_uses_reg (insn
);
3710 if (reg2
&& REGNO (reg2
) == REGNO (reg
))
3712 pattern
= single_set (insn
);
3714 && GET_CODE (SET_DEST (pattern
)) == REG
3715 && REGNO (reg
) == REGNO (SET_DEST (pattern
)))
3719 if (GET_CODE (insn
) != CALL_INSN
)
3721 /* We don't use rtx_equal_p because we don't care if the mode is
3723 pattern
= single_set (insn
);
3725 && GET_CODE (SET_DEST (pattern
)) == REG
3726 && REGNO (reg
) == REGNO (SET_DEST (pattern
)))
3732 par
= PATTERN (insn
);
3733 if (GET_CODE (par
) == PARALLEL
)
3734 for (i
= XVECLEN (par
, 0) - 1; i
>= 0; i
--)
3736 part
= XVECEXP (par
, 0, i
);
3737 if (GET_CODE (part
) != SET
&& reg_mentioned_p (reg
, part
))
3740 return reg_mentioned_p (reg
, SET_SRC (pattern
));
3746 pattern
= PATTERN (insn
);
3748 if (GET_CODE (pattern
) == PARALLEL
)
3752 for (i
= XVECLEN (pattern
, 0) - 1; i
>= 1; i
--)
3753 if (reg_mentioned_p (reg
, XVECEXP (pattern
, 0, i
)))
3755 pattern
= XVECEXP (pattern
, 0, 0);
3758 if (GET_CODE (pattern
) == SET
)
3760 if (reg_mentioned_p (reg
, SET_DEST (pattern
)))
3762 /* We don't use rtx_equal_p, because we don't care if the
3763 mode is different. */
3764 if (GET_CODE (SET_DEST (pattern
)) != REG
3765 || REGNO (reg
) != REGNO (SET_DEST (pattern
)))
3771 pattern
= SET_SRC (pattern
);
3774 if (GET_CODE (pattern
) != CALL
3775 || GET_CODE (XEXP (pattern
, 0)) != MEM
3776 || ! rtx_equal_p (reg
, XEXP (XEXP (pattern
, 0), 0)))
3782 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3783 general registers. Bits 0..15 mean that the respective registers
3784 are used as inputs in the instruction. Bits 16..31 mean that the
3785 registers 0..15, respectively, are used as outputs, or are clobbered.
3786 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3788 regs_used (rtx x
, int is_dest
)
3796 code
= GET_CODE (x
);
3801 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x
))) - 1)
3802 << (REGNO (x
) + is_dest
));
3806 rtx y
= SUBREG_REG (x
);
3808 if (GET_CODE (y
) != REG
)
3811 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x
))) - 1)
3813 subreg_regno_offset (REGNO (y
),
3816 GET_MODE (x
)) + is_dest
));
3820 return regs_used (SET_SRC (x
), 0) | regs_used (SET_DEST (x
), 16);
3822 /* If there was a return value, it must have been indicated with USE. */
3837 fmt
= GET_RTX_FORMAT (code
);
3839 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
3844 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3845 used
|= regs_used (XVECEXP (x
, i
, j
), is_dest
);
3847 else if (fmt
[i
] == 'e')
3848 used
|= regs_used (XEXP (x
, i
), is_dest
);
3853 /* Create an instruction that prevents redirection of a conditional branch
3854 to the destination of the JUMP with address ADDR.
3855 If the branch needs to be implemented as an indirect jump, try to find
3856 a scratch register for it.
3857 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3858 If any preceding insn that doesn't fit into a delay slot is good enough,
3859 pass 1. Pass 2 if a definite blocking insn is needed.
3860 -1 is used internally to avoid deep recursion.
3861 If a blocking instruction is made or recognized, return it. */
3864 gen_block_redirect (rtx jump
, int addr
, int need_block
)
3867 rtx prev
= prev_nonnote_insn (jump
);
3870 /* First, check if we already have an instruction that satisfies our need. */
3871 if (prev
&& GET_CODE (prev
) == INSN
&& ! INSN_DELETED_P (prev
))
3873 if (INSN_CODE (prev
) == CODE_FOR_indirect_jump_scratch
)
3875 if (GET_CODE (PATTERN (prev
)) == USE
3876 || GET_CODE (PATTERN (prev
)) == CLOBBER
3877 || get_attr_in_delay_slot (prev
) == IN_DELAY_SLOT_YES
)
3879 else if ((need_block
&= ~1) < 0)
3881 else if (recog_memoized (prev
) == CODE_FOR_block_branch_redirect
)
3884 if (GET_CODE (PATTERN (jump
)) == RETURN
)
3888 /* Reorg even does nasty things with return insns that cause branches
3889 to go out of range - see find_end_label and callers. */
3890 return emit_insn_before (gen_block_branch_redirect (const0_rtx
) , jump
);
3892 /* We can't use JUMP_LABEL here because it might be undefined
3893 when not optimizing. */
3894 dest
= XEXP (SET_SRC (PATTERN (jump
)), 0);
3895 /* If the branch is out of range, try to find a scratch register for it. */
3897 && (INSN_ADDRESSES (INSN_UID (dest
)) - addr
+ (unsigned) 4092
3901 /* Don't look for the stack pointer as a scratch register,
3902 it would cause trouble if an interrupt occurred. */
3903 unsigned try = 0x7fff, used
;
3904 int jump_left
= flag_expensive_optimizations
+ 1;
3906 /* It is likely that the most recent eligible instruction is wanted for
3907 the delay slot. Therefore, find out which registers it uses, and
3908 try to avoid using them. */
3910 for (scan
= jump
; (scan
= PREV_INSN (scan
)); )
3914 if (INSN_DELETED_P (scan
))
3916 code
= GET_CODE (scan
);
3917 if (code
== CODE_LABEL
|| code
== JUMP_INSN
)
3920 && GET_CODE (PATTERN (scan
)) != USE
3921 && GET_CODE (PATTERN (scan
)) != CLOBBER
3922 && get_attr_in_delay_slot (scan
) == IN_DELAY_SLOT_YES
)
3924 try &= ~regs_used (PATTERN (scan
), 0);
3928 for (used
= dead
= 0, scan
= JUMP_LABEL (jump
);
3929 (scan
= NEXT_INSN (scan
)); )
3933 if (INSN_DELETED_P (scan
))
3935 code
= GET_CODE (scan
);
3938 used
|= regs_used (PATTERN (scan
), 0);
3939 if (code
== CALL_INSN
)
3940 used
|= regs_used (CALL_INSN_FUNCTION_USAGE (scan
), 0);
3941 dead
|= (used
>> 16) & ~used
;
3947 if (code
== JUMP_INSN
)
3949 if (jump_left
-- && simplejump_p (scan
))
3950 scan
= JUMP_LABEL (scan
);
3956 /* Mask out the stack pointer again, in case it was
3957 the only 'free' register we have found. */
3960 /* If the immediate destination is still in range, check for possible
3961 threading with a jump beyond the delay slot insn.
3962 Don't check if we are called recursively; the jump has been or will be
3963 checked in a different invocation then. */
3965 else if (optimize
&& need_block
>= 0)
3967 rtx next
= next_active_insn (next_active_insn (dest
));
3968 if (next
&& GET_CODE (next
) == JUMP_INSN
3969 && GET_CODE (PATTERN (next
)) == SET
3970 && recog_memoized (next
) == CODE_FOR_jump_compact
)
3972 dest
= JUMP_LABEL (next
);
3974 && (INSN_ADDRESSES (INSN_UID (dest
)) - addr
+ (unsigned) 4092
3976 gen_block_redirect (next
, INSN_ADDRESSES (INSN_UID (next
)), -1);
3982 rtx reg
= gen_rtx_REG (SImode
, exact_log2 (dead
& -dead
));
3984 /* It would be nice if we could convert the jump into an indirect
3985 jump / far branch right now, and thus exposing all constituent
3986 instructions to further optimization. However, reorg uses
3987 simplejump_p to determine if there is an unconditional jump where
3988 it should try to schedule instructions from the target of the
3989 branch; simplejump_p fails for indirect jumps even if they have
3991 rtx insn
= emit_insn_before (gen_indirect_jump_scratch
3992 (reg
, GEN_INT (INSN_UID (JUMP_LABEL (jump
))))
3994 /* ??? We would like this to have the scope of the jump, but that
3995 scope will change when a delay slot insn of an inner scope is added.
3996 Hence, after delay slot scheduling, we'll have to expect
3997 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4000 INSN_LOCATOR (insn
) = INSN_LOCATOR (jump
);
4001 INSN_CODE (insn
) = CODE_FOR_indirect_jump_scratch
;
4004 else if (need_block
)
4005 /* We can't use JUMP_LABEL here because it might be undefined
4006 when not optimizing. */
4007 return emit_insn_before (gen_block_branch_redirect
4008 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump
)), 0))))
4013 #define CONDJUMP_MIN -252
4014 #define CONDJUMP_MAX 262
4017 /* A label (to be placed) in front of the jump
4018 that jumps to our ultimate destination. */
4020 /* Where we are going to insert it if we cannot move the jump any farther,
4021 or the jump itself if we have picked up an existing jump. */
4023 /* The ultimate destination. */
4025 struct far_branch
*prev
;
4026 /* If the branch has already been created, its address;
4027 else the address of its first prospective user. */
4031 static void gen_far_branch (struct far_branch
*);
4032 enum mdep_reorg_phase_e mdep_reorg_phase
;
4034 gen_far_branch (struct far_branch
*bp
)
4036 rtx insn
= bp
->insert_place
;
4038 rtx label
= gen_label_rtx ();
4041 emit_label_after (label
, insn
);
4044 jump
= emit_jump_insn_after (gen_jump (bp
->far_label
), insn
);
4045 LABEL_NUSES (bp
->far_label
)++;
4048 jump
= emit_jump_insn_after (gen_return (), insn
);
4049 /* Emit a barrier so that reorg knows that any following instructions
4050 are not reachable via a fall-through path.
4051 But don't do this when not optimizing, since we wouldn't suppress the
4052 alignment for the barrier then, and could end up with out-of-range
4053 pc-relative loads. */
4055 emit_barrier_after (jump
);
4056 emit_label_after (bp
->near_label
, insn
);
4057 JUMP_LABEL (jump
) = bp
->far_label
;
4058 ok
= invert_jump (insn
, label
, 1);
4061 /* If we are branching around a jump (rather than a return), prevent
4062 reorg from using an insn from the jump target as the delay slot insn -
4063 when reorg did this, it pessimized code (we rather hide the delay slot)
4064 and it could cause branches to go out of range. */
4067 (gen_stuff_delay_slot
4068 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump
)), 0))),
4069 GEN_INT (recog_memoized (insn
) == CODE_FOR_branch_false
)),
4071 /* Prevent reorg from undoing our splits. */
4072 gen_block_redirect (jump
, bp
->address
+= 2, 2);
4075 /* Fix up ADDR_DIFF_VECs. */
4077 fixup_addr_diff_vecs (rtx first
)
4081 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4083 rtx vec_lab
, pat
, prev
, prevpat
, x
, braf_label
;
4085 if (GET_CODE (insn
) != JUMP_INSN
4086 || GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
4088 pat
= PATTERN (insn
);
4089 vec_lab
= XEXP (XEXP (pat
, 0), 0);
4091 /* Search the matching casesi_jump_2. */
4092 for (prev
= vec_lab
; ; prev
= PREV_INSN (prev
))
4094 if (GET_CODE (prev
) != JUMP_INSN
)
4096 prevpat
= PATTERN (prev
);
4097 if (GET_CODE (prevpat
) != PARALLEL
|| XVECLEN (prevpat
, 0) != 2)
4099 x
= XVECEXP (prevpat
, 0, 1);
4100 if (GET_CODE (x
) != USE
)
4103 if (GET_CODE (x
) == LABEL_REF
&& XEXP (x
, 0) == vec_lab
)
4106 /* FIXME: This is a bug in the optimizer, but it seems harmless
4107 to just avoid panicing. */
4111 /* Emit the reference label of the braf where it belongs, right after
4112 the casesi_jump_2 (i.e. braf). */
4113 braf_label
= XEXP (XEXP (SET_SRC (XVECEXP (prevpat
, 0, 0)), 1), 0);
4114 emit_label_after (braf_label
, prev
);
4116 /* Fix up the ADDR_DIF_VEC to be relative
4117 to the reference address of the braf. */
4118 XEXP (XEXP (pat
, 0), 0) = braf_label
;
4122 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4123 a barrier. Return the base 2 logarithm of the desired alignment. */
4125 barrier_align (rtx barrier_or_label
)
4127 rtx next
= next_real_insn (barrier_or_label
), pat
, prev
;
4128 int slot
, credit
, jump_to_next
= 0;
4133 pat
= PATTERN (next
);
4135 if (GET_CODE (pat
) == ADDR_DIFF_VEC
)
4138 if (GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == UNSPECV_ALIGN
)
4139 /* This is a barrier in front of a constant table. */
4142 prev
= prev_real_insn (barrier_or_label
);
4143 if (GET_CODE (PATTERN (prev
)) == ADDR_DIFF_VEC
)
4145 pat
= PATTERN (prev
);
4146 /* If this is a very small table, we want to keep the alignment after
4147 the table to the minimum for proper code alignment. */
4148 return ((TARGET_SMALLCODE
4149 || ((unsigned) XVECLEN (pat
, 1) * GET_MODE_SIZE (GET_MODE (pat
))
4150 <= (unsigned) 1 << (CACHE_LOG
- 2)))
4151 ? 1 << TARGET_SHMEDIA
: align_jumps_log
);
4154 if (TARGET_SMALLCODE
)
4157 if (! TARGET_SH2
|| ! optimize
)
4158 return align_jumps_log
;
4160 /* When fixing up pcloads, a constant table might be inserted just before
4161 the basic block that ends with the barrier. Thus, we can't trust the
4162 instruction lengths before that. */
4163 if (mdep_reorg_phase
> SH_FIXUP_PCLOAD
)
4165 /* Check if there is an immediately preceding branch to the insn beyond
4166 the barrier. We must weight the cost of discarding useful information
4167 from the current cache line when executing this branch and there is
4168 an alignment, against that of fetching unneeded insn in front of the
4169 branch target when there is no alignment. */
4171 /* There are two delay_slot cases to consider. One is the simple case
4172 where the preceding branch is to the insn beyond the barrier (simple
4173 delay slot filling), and the other is where the preceding branch has
4174 a delay slot that is a duplicate of the insn after the barrier
4175 (fill_eager_delay_slots) and the branch is to the insn after the insn
4176 after the barrier. */
4178 /* PREV is presumed to be the JUMP_INSN for the barrier under
4179 investigation. Skip to the insn before it. */
4180 prev
= prev_real_insn (prev
);
4182 for (slot
= 2, credit
= (1 << (CACHE_LOG
- 2)) + 2;
4183 credit
>= 0 && prev
&& GET_CODE (prev
) == INSN
;
4184 prev
= prev_real_insn (prev
))
4187 if (GET_CODE (PATTERN (prev
)) == USE
4188 || GET_CODE (PATTERN (prev
)) == CLOBBER
)
4190 if (GET_CODE (PATTERN (prev
)) == SEQUENCE
)
4192 prev
= XVECEXP (PATTERN (prev
), 0, 1);
4193 if (INSN_UID (prev
) == INSN_UID (next
))
4195 /* Delay slot was filled with insn at jump target. */
4202 get_attr_in_delay_slot (prev
) == IN_DELAY_SLOT_YES
)
4204 credit
-= get_attr_length (prev
);
4207 && GET_CODE (prev
) == JUMP_INSN
4208 && JUMP_LABEL (prev
))
4212 || next_real_insn (JUMP_LABEL (prev
)) == next
4213 /* If relax_delay_slots() decides NEXT was redundant
4214 with some previous instruction, it will have
4215 redirected PREV's jump to the following insn. */
4216 || JUMP_LABEL (prev
) == next_nonnote_insn (next
)
4217 /* There is no upper bound on redundant instructions
4218 that might have been skipped, but we must not put an
4219 alignment where none had been before. */
4220 || (x
= (NEXT_INSN (NEXT_INSN (PREV_INSN (prev
)))),
4222 && (INSN_CODE (x
) == CODE_FOR_block_branch_redirect
4223 || INSN_CODE (x
) == CODE_FOR_indirect_jump_scratch
4224 || INSN_CODE (x
) == CODE_FOR_stuff_delay_slot
))))
4226 rtx pat
= PATTERN (prev
);
4227 if (GET_CODE (pat
) == PARALLEL
)
4228 pat
= XVECEXP (pat
, 0, 0);
4229 if (credit
- slot
>= (GET_CODE (SET_SRC (pat
)) == PC
? 2 : 0))
4235 return align_jumps_log
;
4238 /* If we are inside a phony loop, almost any kind of label can turn up as the
4239 first one in the loop. Aligning a braf label causes incorrect switch
4240 destination addresses; we can detect braf labels because they are
4241 followed by a BARRIER.
4242 Applying loop alignment to small constant or switch tables is a waste
4243 of space, so we suppress this too. */
4245 sh_loop_align (rtx label
)
4250 next
= next_nonnote_insn (next
);
4251 while (next
&& GET_CODE (next
) == CODE_LABEL
);
4255 || GET_CODE (PATTERN (next
)) == ADDR_DIFF_VEC
4256 || recog_memoized (next
) == CODE_FOR_consttable_2
)
4259 return align_loops_log
;
4262 /* Do a final pass over the function, just before delayed branch
4268 rtx first
, insn
, mova
= NULL_RTX
;
4270 rtx r0_rtx
= gen_rtx_REG (Pmode
, 0);
4271 rtx r0_inc_rtx
= gen_rtx_POST_INC (Pmode
, r0_rtx
);
4273 first
= get_insns ();
4275 /* We must split call insns before introducing `mova's. If we're
4276 optimizing, they'll have already been split. Otherwise, make
4277 sure we don't split them too late. */
4279 split_all_insns_noflow ();
4284 /* If relaxing, generate pseudo-ops to associate function calls with
4285 the symbols they call. It does no harm to not generate these
4286 pseudo-ops. However, when we can generate them, it enables to
4287 linker to potentially relax the jsr to a bsr, and eliminate the
4288 register load and, possibly, the constant pool entry. */
4290 mdep_reorg_phase
= SH_INSERT_USES_LABELS
;
4293 /* Remove all REG_LABEL notes. We want to use them for our own
4294 purposes. This works because none of the remaining passes
4295 need to look at them.
4297 ??? But it may break in the future. We should use a machine
4298 dependent REG_NOTE, or some other approach entirely. */
4299 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4305 while ((note
= find_reg_note (insn
, REG_LABEL
, NULL_RTX
)) != 0)
4306 remove_note (insn
, note
);
4310 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4312 rtx pattern
, reg
, link
, set
, scan
, dies
, label
;
4313 int rescan
= 0, foundinsn
= 0;
4315 if (GET_CODE (insn
) == CALL_INSN
)
4317 pattern
= PATTERN (insn
);
4319 if (GET_CODE (pattern
) == PARALLEL
)
4320 pattern
= XVECEXP (pattern
, 0, 0);
4321 if (GET_CODE (pattern
) == SET
)
4322 pattern
= SET_SRC (pattern
);
4324 if (GET_CODE (pattern
) != CALL
4325 || GET_CODE (XEXP (pattern
, 0)) != MEM
)
4328 reg
= XEXP (XEXP (pattern
, 0), 0);
4332 reg
= sfunc_uses_reg (insn
);
4337 if (GET_CODE (reg
) != REG
)
4340 /* This is a function call via REG. If the only uses of REG
4341 between the time that it is set and the time that it dies
4342 are in function calls, then we can associate all the
4343 function calls with the setting of REG. */
4345 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
4347 if (REG_NOTE_KIND (link
) != 0)
4349 set
= single_set (XEXP (link
, 0));
4350 if (set
&& rtx_equal_p (reg
, SET_DEST (set
)))
4352 link
= XEXP (link
, 0);
4359 /* ??? Sometimes global register allocation will have
4360 deleted the insn pointed to by LOG_LINKS. Try
4361 scanning backward to find where the register is set. */
4362 for (scan
= PREV_INSN (insn
);
4363 scan
&& GET_CODE (scan
) != CODE_LABEL
;
4364 scan
= PREV_INSN (scan
))
4366 if (! INSN_P (scan
))
4369 if (! reg_mentioned_p (reg
, scan
))
4372 if (noncall_uses_reg (reg
, scan
, &set
))
4386 /* The register is set at LINK. */
4388 /* We can only optimize the function call if the register is
4389 being set to a symbol. In theory, we could sometimes
4390 optimize calls to a constant location, but the assembler
4391 and linker do not support that at present. */
4392 if (GET_CODE (SET_SRC (set
)) != SYMBOL_REF
4393 && GET_CODE (SET_SRC (set
)) != LABEL_REF
)
4396 /* Scan forward from LINK to the place where REG dies, and
4397 make sure that the only insns which use REG are
4398 themselves function calls. */
4400 /* ??? This doesn't work for call targets that were allocated
4401 by reload, since there may not be a REG_DEAD note for the
4405 for (scan
= NEXT_INSN (link
); scan
; scan
= NEXT_INSN (scan
))
4409 /* Don't try to trace forward past a CODE_LABEL if we haven't
4410 seen INSN yet. Ordinarily, we will only find the setting insn
4411 in LOG_LINKS if it is in the same basic block. However,
4412 cross-jumping can insert code labels in between the load and
4413 the call, and can result in situations where a single call
4414 insn may have two targets depending on where we came from. */
4416 if (GET_CODE (scan
) == CODE_LABEL
&& ! foundinsn
)
4419 if (! INSN_P (scan
))
4422 /* Don't try to trace forward past a JUMP. To optimize
4423 safely, we would have to check that all the
4424 instructions at the jump destination did not use REG. */
4426 if (GET_CODE (scan
) == JUMP_INSN
)
4429 if (! reg_mentioned_p (reg
, scan
))
4432 if (noncall_uses_reg (reg
, scan
, &scanset
))
4439 && (GET_CODE (scan
) == CALL_INSN
|| sfunc_uses_reg (scan
)))
4441 /* There is a function call to this register other
4442 than the one we are checking. If we optimize
4443 this call, we need to rescan again below. */
4447 /* ??? We shouldn't have to worry about SCANSET here.
4448 We should just be able to check for a REG_DEAD note
4449 on a function call. However, the REG_DEAD notes are
4450 apparently not dependable around libcalls; c-torture
4451 execute/920501-2 is a test case. If SCANSET is set,
4452 then this insn sets the register, so it must have
4453 died earlier. Unfortunately, this will only handle
4454 the cases in which the register is, in fact, set in a
4457 /* ??? We shouldn't have to use FOUNDINSN here.
4458 However, the LOG_LINKS fields are apparently not
4459 entirely reliable around libcalls;
4460 newlib/libm/math/e_pow.c is a test case. Sometimes
4461 an insn will appear in LOG_LINKS even though it is
4462 not the most recent insn which sets the register. */
4466 || find_reg_note (scan
, REG_DEAD
, reg
)))
4475 /* Either there was a branch, or some insn used REG
4476 other than as a function call address. */
4480 /* Create a code label, and put it in a REG_LABEL note on
4481 the insn which sets the register, and on each call insn
4482 which uses the register. In final_prescan_insn we look
4483 for the REG_LABEL notes, and output the appropriate label
4486 label
= gen_label_rtx ();
4487 REG_NOTES (link
) = gen_rtx_INSN_LIST (REG_LABEL
, label
,
4489 REG_NOTES (insn
) = gen_rtx_INSN_LIST (REG_LABEL
, label
,
4498 scan
= NEXT_INSN (scan
);
4500 && ((GET_CODE (scan
) == CALL_INSN
4501 && reg_mentioned_p (reg
, scan
))
4502 || ((reg2
= sfunc_uses_reg (scan
))
4503 && REGNO (reg2
) == REGNO (reg
))))
4505 = gen_rtx_INSN_LIST (REG_LABEL
, label
, REG_NOTES (scan
));
4507 while (scan
!= dies
);
4513 fixup_addr_diff_vecs (first
);
4517 mdep_reorg_phase
= SH_SHORTEN_BRANCHES0
;
4518 shorten_branches (first
);
4520 /* Scan the function looking for move instructions which have to be
4521 changed to pc-relative loads and insert the literal tables. */
4523 mdep_reorg_phase
= SH_FIXUP_PCLOAD
;
4524 for (insn
= first
, num_mova
= 0; insn
; insn
= NEXT_INSN (insn
))
4528 /* ??? basic block reordering can move a switch table dispatch
4529 below the switch table. Check if that has happened.
4530 We only have the addresses available when optimizing; but then,
4531 this check shouldn't be needed when not optimizing. */
4532 rtx label_ref
= XVECEXP (SET_SRC (PATTERN (insn
)), 0, 0);
4534 && (INSN_ADDRESSES (INSN_UID (insn
))
4535 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref
, 0)))))
4537 /* Change the mova into a load.
4538 broken_move will then return true for it. */
4541 else if (! num_mova
++)
4544 else if (GET_CODE (insn
) == JUMP_INSN
4545 && GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
4553 /* Some code might have been inserted between the mova and
4554 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4555 for (scan
= mova
, total
= 0; scan
!= insn
; scan
= NEXT_INSN (scan
))
4556 total
+= get_attr_length (scan
);
4558 /* range of mova is 1020, add 4 because pc counts from address of
4559 second instruction after this one, subtract 2 in case pc is 2
4560 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4561 cancels out with alignment effects of the mova itself. */
4564 /* Change the mova into a load, and restart scanning
4565 there. broken_move will then return true for mova. */
4570 if (broken_move (insn
)
4571 || (GET_CODE (insn
) == INSN
4572 && recog_memoized (insn
) == CODE_FOR_casesi_worker_2
))
4575 /* Scan ahead looking for a barrier to stick the constant table
4577 rtx barrier
= find_barrier (num_mova
, mova
, insn
);
4578 rtx last_float_move
= NULL_RTX
, last_float
= 0, *last_float_addr
= NULL
;
4579 int need_aligned_label
= 0;
4581 if (num_mova
&& ! mova_p (mova
))
4583 /* find_barrier had to change the first mova into a
4584 pcload; thus, we have to start with this new pcload. */
4588 /* Now find all the moves between the points and modify them. */
4589 for (scan
= insn
; scan
!= barrier
; scan
= NEXT_INSN (scan
))
4591 if (GET_CODE (scan
) == CODE_LABEL
)
4593 if (GET_CODE (scan
) == INSN
4594 && recog_memoized (scan
) == CODE_FOR_casesi_worker_2
)
4595 need_aligned_label
= 1;
4596 if (broken_move (scan
))
4598 rtx
*patp
= &PATTERN (scan
), pat
= *patp
;
4602 enum machine_mode mode
;
4604 if (GET_CODE (pat
) == PARALLEL
)
4605 patp
= &XVECEXP (pat
, 0, 0), pat
= *patp
;
4606 src
= SET_SRC (pat
);
4607 dst
= SET_DEST (pat
);
4608 mode
= GET_MODE (dst
);
4610 if (mode
== SImode
&& hi_const (src
)
4611 && REGNO (dst
) != FPUL_REG
)
4616 while (GET_CODE (dst
) == SUBREG
)
4618 offset
+= subreg_regno_offset (REGNO (SUBREG_REG (dst
)),
4619 GET_MODE (SUBREG_REG (dst
)),
4622 dst
= SUBREG_REG (dst
);
4624 dst
= gen_rtx_REG (HImode
, REGNO (dst
) + offset
);
4626 if (GET_CODE (dst
) == REG
&& FP_ANY_REGISTER_P (REGNO (dst
)))
4628 /* This must be an insn that clobbers r0. */
4629 rtx
*clobberp
= &XVECEXP (PATTERN (scan
), 0,
4630 XVECLEN (PATTERN (scan
), 0)
4632 rtx clobber
= *clobberp
;
4634 gcc_assert (GET_CODE (clobber
) == CLOBBER
4635 && rtx_equal_p (XEXP (clobber
, 0), r0_rtx
));
4638 && reg_set_between_p (r0_rtx
, last_float_move
, scan
))
4642 && GET_MODE_SIZE (mode
) != 4
4643 && GET_MODE_SIZE (GET_MODE (last_float
)) == 4)
4645 lab
= add_constant (src
, mode
, last_float
);
4647 emit_insn_before (gen_mova (lab
), scan
);
4650 /* There will be a REG_UNUSED note for r0 on
4651 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4652 lest reorg:mark_target_live_regs will not
4653 consider r0 to be used, and we end up with delay
4654 slot insn in front of SCAN that clobbers r0. */
4656 = find_regno_note (last_float_move
, REG_UNUSED
, 0);
4658 /* If we are not optimizing, then there may not be
4661 PUT_MODE (note
, REG_INC
);
4663 *last_float_addr
= r0_inc_rtx
;
4665 last_float_move
= scan
;
4667 newsrc
= gen_const_mem (mode
,
4668 (((TARGET_SH4
&& ! TARGET_FMOVD
)
4669 || REGNO (dst
) == FPUL_REG
)
4672 last_float_addr
= &XEXP (newsrc
, 0);
4674 /* Remove the clobber of r0. */
4675 *clobberp
= gen_rtx_CLOBBER (GET_MODE (clobber
),
4676 gen_rtx_SCRATCH (Pmode
));
4678 /* This is a mova needing a label. Create it. */
4679 else if (GET_CODE (src
) == UNSPEC
4680 && XINT (src
, 1) == UNSPEC_MOVA
4681 && GET_CODE (XVECEXP (src
, 0, 0)) == CONST
)
4683 lab
= add_constant (XVECEXP (src
, 0, 0), mode
, 0);
4684 newsrc
= gen_rtx_LABEL_REF (VOIDmode
, lab
);
4685 newsrc
= gen_rtx_UNSPEC (SImode
,
4686 gen_rtvec (1, newsrc
),
4691 lab
= add_constant (src
, mode
, 0);
4692 newsrc
= gen_rtx_LABEL_REF (VOIDmode
, lab
);
4693 newsrc
= gen_const_mem (mode
, newsrc
);
4695 *patp
= gen_rtx_SET (VOIDmode
, dst
, newsrc
);
4696 INSN_CODE (scan
) = -1;
4699 dump_table (need_aligned_label
? insn
: 0, barrier
);
4704 mdep_reorg_phase
= SH_SHORTEN_BRANCHES1
;
4705 INSN_ADDRESSES_FREE ();
4706 split_branches (first
);
4708 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4709 also has an effect on the register that holds the address of the sfunc.
4710 Insert an extra dummy insn in front of each sfunc that pretends to
4711 use this register. */
4712 if (flag_delayed_branch
)
4714 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4716 rtx reg
= sfunc_uses_reg (insn
);
4720 emit_insn_before (gen_use_sfunc_addr (reg
), insn
);
4724 /* fpscr is not actually a user variable, but we pretend it is for the
4725 sake of the previous optimization passes, since we want it handled like
4726 one. However, we don't have any debugging information for it, so turn
4727 it into a non-user variable now. */
4729 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4731 mdep_reorg_phase
= SH_AFTER_MDEP_REORG
;
4735 get_dest_uid (rtx label
, int max_uid
)
4737 rtx dest
= next_real_insn (label
);
4740 /* This can happen for an undefined label. */
4742 dest_uid
= INSN_UID (dest
);
4743 /* If this is a newly created branch redirection blocking instruction,
4744 we cannot index the branch_uid or insn_addresses arrays with its
4745 uid. But then, we won't need to, because the actual destination is
4746 the following branch. */
4747 while (dest_uid
>= max_uid
)
4749 dest
= NEXT_INSN (dest
);
4750 dest_uid
= INSN_UID (dest
);
4752 if (GET_CODE (dest
) == JUMP_INSN
&& GET_CODE (PATTERN (dest
)) == RETURN
)
4757 /* Split condbranches that are out of range. Also add clobbers for
4758 scratch registers that are needed in far jumps.
4759 We do this before delay slot scheduling, so that it can take our
4760 newly created instructions into account. It also allows us to
4761 find branches with common targets more easily. */
4764 split_branches (rtx first
)
4767 struct far_branch
**uid_branch
, *far_branch_list
= 0;
4768 int max_uid
= get_max_uid ();
4771 /* Find out which branches are out of range. */
4772 shorten_branches (first
);
4774 uid_branch
= (struct far_branch
**) alloca (max_uid
* sizeof *uid_branch
);
4775 memset ((char *) uid_branch
, 0, max_uid
* sizeof *uid_branch
);
4777 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4778 if (! INSN_P (insn
))
4780 else if (INSN_DELETED_P (insn
))
4782 /* Shorten_branches would split this instruction again,
4783 so transform it into a note. */
4784 PUT_CODE (insn
, NOTE
);
4785 NOTE_LINE_NUMBER (insn
) = NOTE_INSN_DELETED
;
4786 NOTE_SOURCE_FILE (insn
) = 0;
4788 else if (GET_CODE (insn
) == JUMP_INSN
4789 /* Don't mess with ADDR_DIFF_VEC */
4790 && (GET_CODE (PATTERN (insn
)) == SET
4791 || GET_CODE (PATTERN (insn
)) == RETURN
))
4793 enum attr_type type
= get_attr_type (insn
);
4794 if (type
== TYPE_CBRANCH
)
4798 if (get_attr_length (insn
) > 4)
4800 rtx src
= SET_SRC (PATTERN (insn
));
4801 rtx olabel
= XEXP (XEXP (src
, 1), 0);
4802 int addr
= INSN_ADDRESSES (INSN_UID (insn
));
4804 int dest_uid
= get_dest_uid (olabel
, max_uid
);
4805 struct far_branch
*bp
= uid_branch
[dest_uid
];
4807 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4808 the label if the LABEL_NUSES count drops to zero. There is
4809 always a jump_optimize pass that sets these values, but it
4810 proceeds to delete unreferenced code, and then if not
4811 optimizing, to un-delete the deleted instructions, thus
4812 leaving labels with too low uses counts. */
4815 JUMP_LABEL (insn
) = olabel
;
4816 LABEL_NUSES (olabel
)++;
4820 bp
= (struct far_branch
*) alloca (sizeof *bp
);
4821 uid_branch
[dest_uid
] = bp
;
4822 bp
->prev
= far_branch_list
;
4823 far_branch_list
= bp
;
4825 = XEXP (XEXP (SET_SRC (PATTERN (insn
)), 1), 0);
4826 LABEL_NUSES (bp
->far_label
)++;
4830 label
= bp
->near_label
;
4831 if (! label
&& bp
->address
- addr
>= CONDJUMP_MIN
)
4833 rtx block
= bp
->insert_place
;
4835 if (GET_CODE (PATTERN (block
)) == RETURN
)
4836 block
= PREV_INSN (block
);
4838 block
= gen_block_redirect (block
,
4840 label
= emit_label_after (gen_label_rtx (),
4842 bp
->near_label
= label
;
4844 else if (label
&& ! NEXT_INSN (label
))
4846 if (addr
+ 2 - bp
->address
<= CONDJUMP_MAX
)
4847 bp
->insert_place
= insn
;
4849 gen_far_branch (bp
);
4853 || (NEXT_INSN (label
) && bp
->address
- addr
< CONDJUMP_MIN
))
4855 bp
->near_label
= label
= gen_label_rtx ();
4856 bp
->insert_place
= insn
;
4859 ok
= redirect_jump (insn
, label
, 1);
4864 /* get_attr_length (insn) == 2 */
4865 /* Check if we have a pattern where reorg wants to redirect
4866 the branch to a label from an unconditional branch that
4868 /* We can't use JUMP_LABEL here because it might be undefined
4869 when not optimizing. */
4870 /* A syntax error might cause beyond to be NULL_RTX. */
4872 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn
)), 1),
4876 && (GET_CODE (beyond
) == JUMP_INSN
4877 || ((beyond
= next_active_insn (beyond
))
4878 && GET_CODE (beyond
) == JUMP_INSN
))
4879 && GET_CODE (PATTERN (beyond
)) == SET
4880 && recog_memoized (beyond
) == CODE_FOR_jump_compact
4882 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond
)), 0)))
4883 - INSN_ADDRESSES (INSN_UID (insn
)) + (unsigned) 252)
4885 gen_block_redirect (beyond
,
4886 INSN_ADDRESSES (INSN_UID (beyond
)), 1);
4889 next
= next_active_insn (insn
);
4891 if ((GET_CODE (next
) == JUMP_INSN
4892 || ((next
= next_active_insn (next
))
4893 && GET_CODE (next
) == JUMP_INSN
))
4894 && GET_CODE (PATTERN (next
)) == SET
4895 && recog_memoized (next
) == CODE_FOR_jump_compact
4897 (INSN_UID (XEXP (SET_SRC (PATTERN (next
)), 0)))
4898 - INSN_ADDRESSES (INSN_UID (insn
)) + (unsigned) 252)
4900 gen_block_redirect (next
, INSN_ADDRESSES (INSN_UID (next
)), 1);
4902 else if (type
== TYPE_JUMP
|| type
== TYPE_RETURN
)
4904 int addr
= INSN_ADDRESSES (INSN_UID (insn
));
4907 struct far_branch
*bp
;
4909 if (type
== TYPE_JUMP
)
4911 far_label
= XEXP (SET_SRC (PATTERN (insn
)), 0);
4912 dest_uid
= get_dest_uid (far_label
, max_uid
);
4915 /* Parse errors can lead to labels outside
4917 if (! NEXT_INSN (far_label
))
4922 JUMP_LABEL (insn
) = far_label
;
4923 LABEL_NUSES (far_label
)++;
4925 redirect_jump (insn
, NULL_RTX
, 1);
4929 bp
= uid_branch
[dest_uid
];
4932 bp
= (struct far_branch
*) alloca (sizeof *bp
);
4933 uid_branch
[dest_uid
] = bp
;
4934 bp
->prev
= far_branch_list
;
4935 far_branch_list
= bp
;
4937 bp
->far_label
= far_label
;
4939 LABEL_NUSES (far_label
)++;
4941 else if (bp
->near_label
&& ! NEXT_INSN (bp
->near_label
))
4942 if (addr
- bp
->address
<= CONDJUMP_MAX
)
4943 emit_label_after (bp
->near_label
, PREV_INSN (insn
));
4946 gen_far_branch (bp
);
4952 bp
->insert_place
= insn
;
4954 emit_insn_before (gen_block_branch_redirect (const0_rtx
), insn
);
4956 gen_block_redirect (insn
, addr
, bp
->near_label
? 2 : 0);
4959 /* Generate all pending far branches,
4960 and free our references to the far labels. */
4961 while (far_branch_list
)
4963 if (far_branch_list
->near_label
4964 && ! NEXT_INSN (far_branch_list
->near_label
))
4965 gen_far_branch (far_branch_list
);
4967 && far_branch_list
->far_label
4968 && ! --LABEL_NUSES (far_branch_list
->far_label
))
4969 delete_insn (far_branch_list
->far_label
);
4970 far_branch_list
= far_branch_list
->prev
;
4973 /* Instruction length information is no longer valid due to the new
4974 instructions that have been generated. */
4975 init_insn_lengths ();
4978 /* Dump out instruction addresses, which is useful for debugging the
4979 constant pool table stuff.
4981 If relaxing, output the label and pseudo-ops used to link together
4982 calls and the instruction which set the registers. */
4984 /* ??? The addresses printed by this routine for insns are nonsense for
4985 insns which are inside of a sequence where none of the inner insns have
4986 variable length. This is because the second pass of shorten_branches
4987 does not bother to update them. */
4990 final_prescan_insn (rtx insn
, rtx
*opvec ATTRIBUTE_UNUSED
,
4991 int noperands ATTRIBUTE_UNUSED
)
4993 if (TARGET_DUMPISIZE
)
4994 fprintf (asm_out_file
, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn
)));
5000 note
= find_reg_note (insn
, REG_LABEL
, NULL_RTX
);
5005 pattern
= PATTERN (insn
);
5006 if (GET_CODE (pattern
) == PARALLEL
)
5007 pattern
= XVECEXP (pattern
, 0, 0);
5008 switch (GET_CODE (pattern
))
5011 if (GET_CODE (SET_SRC (pattern
)) != CALL
5012 && get_attr_type (insn
) != TYPE_SFUNC
)
5014 targetm
.asm_out
.internal_label
5015 (asm_out_file
, "L", CODE_LABEL_NUMBER (XEXP (note
, 0)));
5018 /* else FALLTHROUGH */
5020 asm_fprintf (asm_out_file
, "\t.uses %LL%d\n",
5021 CODE_LABEL_NUMBER (XEXP (note
, 0)));
5031 /* Dump out any constants accumulated in the final pass. These will
5035 output_jump_label_table (void)
5041 fprintf (asm_out_file
, "\t.align 2\n");
5042 for (i
= 0; i
< pool_size
; i
++)
5044 pool_node
*p
= &pool_vector
[i
];
5046 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5047 CODE_LABEL_NUMBER (p
->label
));
5048 output_asm_insn (".long %O0", &p
->value
);
5056 /* A full frame looks like:
5060 [ if current_function_anonymous_args
5073 local-0 <- fp points here. */
5075 /* Number of bytes pushed for anonymous args, used to pass information
5076 between expand_prologue and expand_epilogue. */
5078 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5079 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5080 for an epilogue and a negative value means that it's for a sibcall
5081 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5082 all the registers that are about to be restored, and hence dead. */
5085 output_stack_adjust (int size
, rtx reg
, int epilogue_p
,
5086 HARD_REG_SET
*live_regs_mask
)
5088 rtx (*emit_fn
) (rtx
) = epilogue_p
? &emit_insn
: &frame_insn
;
5091 HOST_WIDE_INT align
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5093 /* This test is bogus, as output_stack_adjust is used to re-align the
5096 gcc_assert (!(size
% align
));
5099 if (CONST_OK_FOR_ADD (size
))
5100 emit_fn (GEN_ADD3 (reg
, reg
, GEN_INT (size
)));
5101 /* Try to do it with two partial adjustments; however, we must make
5102 sure that the stack is properly aligned at all times, in case
5103 an interrupt occurs between the two partial adjustments. */
5104 else if (CONST_OK_FOR_ADD (size
/ 2 & -align
)
5105 && CONST_OK_FOR_ADD (size
- (size
/ 2 & -align
)))
5107 emit_fn (GEN_ADD3 (reg
, reg
, GEN_INT (size
/ 2 & -align
)));
5108 emit_fn (GEN_ADD3 (reg
, reg
, GEN_INT (size
- (size
/ 2 & -align
))));
5114 int temp
= epilogue_p
? 7 : (TARGET_SH5
? 0 : 1);
5117 /* If TEMP is invalid, we could temporarily save a general
5118 register to MACL. However, there is currently no need
5119 to handle this case, so just die when we see it. */
5121 || current_function_interrupt
5122 || ! call_really_used_regs
[temp
] || fixed_regs
[temp
])
5124 if (temp
< 0 && ! current_function_interrupt
5125 && (TARGET_SHMEDIA
|| epilogue_p
>= 0))
5128 COPY_HARD_REG_SET (temps
, call_used_reg_set
);
5129 AND_COMPL_HARD_REG_SET (temps
, call_fixed_reg_set
);
5133 if (current_function_return_rtx
)
5135 enum machine_mode mode
;
5136 mode
= GET_MODE (current_function_return_rtx
);
5137 if (BASE_RETURN_VALUE_REG (mode
) == FIRST_RET_REG
)
5138 nreg
= HARD_REGNO_NREGS (FIRST_RET_REG
, mode
);
5140 for (i
= 0; i
< nreg
; i
++)
5141 CLEAR_HARD_REG_BIT (temps
, FIRST_RET_REG
+ i
);
5142 if (current_function_calls_eh_return
)
5144 CLEAR_HARD_REG_BIT (temps
, EH_RETURN_STACKADJ_REGNO
);
5145 for (i
= 0; i
<= 3; i
++)
5146 CLEAR_HARD_REG_BIT (temps
, EH_RETURN_DATA_REGNO (i
));
5149 if (TARGET_SHMEDIA
&& epilogue_p
< 0)
5150 for (i
= FIRST_TARGET_REG
; i
<= LAST_TARGET_REG
; i
++)
5151 CLEAR_HARD_REG_BIT (temps
, i
);
5152 if (epilogue_p
<= 0)
5154 for (i
= FIRST_PARM_REG
;
5155 i
< FIRST_PARM_REG
+ NPARM_REGS (SImode
); i
++)
5156 CLEAR_HARD_REG_BIT (temps
, i
);
5157 if (cfun
->static_chain_decl
!= NULL
)
5158 CLEAR_HARD_REG_BIT (temps
, STATIC_CHAIN_REGNUM
);
5160 temp
= scavenge_reg (&temps
);
5162 if (temp
< 0 && live_regs_mask
)
5163 temp
= scavenge_reg (live_regs_mask
);
5166 rtx adj_reg
, tmp_reg
, mem
;
5168 /* If we reached here, the most likely case is the (sibcall)
5169 epilogue for non SHmedia. Put a special push/pop sequence
5170 for such case as the last resort. This looks lengthy but
5171 would not be problem because it seems to be very
5174 gcc_assert (!TARGET_SHMEDIA
&& epilogue_p
);
5177 /* ??? There is still the slight possibility that r4 or
5178 r5 have been reserved as fixed registers or assigned
5179 as global registers, and they change during an
5180 interrupt. There are possible ways to handle this:
5182 - If we are adjusting the frame pointer (r14), we can do
5183 with a single temp register and an ordinary push / pop
5185 - Grab any call-used or call-saved registers (i.e. not
5186 fixed or globals) for the temps we need. We might
5187 also grab r14 if we are adjusting the stack pointer.
5188 If we can't find enough available registers, issue
5189 a diagnostic and die - the user must have reserved
5190 way too many registers.
5191 But since all this is rather unlikely to happen and
5192 would require extra testing, we just die if r4 / r5
5193 are not available. */
5194 gcc_assert (!fixed_regs
[4] && !fixed_regs
[5]
5195 && !global_regs
[4] && !global_regs
[5]);
5197 adj_reg
= gen_rtx_REG (GET_MODE (reg
), 4);
5198 tmp_reg
= gen_rtx_REG (GET_MODE (reg
), 5);
5199 emit_move_insn (gen_tmp_stack_mem (Pmode
, reg
), adj_reg
);
5200 emit_insn (GEN_MOV (adj_reg
, GEN_INT (size
)));
5201 emit_insn (GEN_ADD3 (adj_reg
, adj_reg
, reg
));
5202 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
, adj_reg
));
5203 emit_move_insn (mem
, tmp_reg
);
5204 emit_move_insn (tmp_reg
, gen_tmp_stack_mem (Pmode
, reg
));
5205 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
, adj_reg
));
5206 emit_move_insn (mem
, tmp_reg
);
5207 emit_move_insn (reg
, adj_reg
);
5208 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_POST_INC (Pmode
, reg
));
5209 emit_move_insn (adj_reg
, mem
);
5210 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_POST_INC (Pmode
, reg
));
5211 emit_move_insn (tmp_reg
, mem
);
5214 const_reg
= gen_rtx_REG (GET_MODE (reg
), temp
);
5216 /* If SIZE is negative, subtract the positive value.
5217 This sometimes allows a constant pool entry to be shared
5218 between prologue and epilogue code. */
5221 emit_insn (GEN_MOV (const_reg
, GEN_INT (-size
)));
5222 insn
= emit_fn (GEN_SUB3 (reg
, reg
, const_reg
));
5226 emit_insn (GEN_MOV (const_reg
, GEN_INT (size
)));
5227 insn
= emit_fn (GEN_ADD3 (reg
, reg
, const_reg
));
5231 = (gen_rtx_EXPR_LIST
5232 (REG_FRAME_RELATED_EXPR
,
5233 gen_rtx_SET (VOIDmode
, reg
,
5234 gen_rtx_PLUS (SImode
, reg
, GEN_INT (size
))),
5244 RTX_FRAME_RELATED_P (x
) = 1;
5248 /* Output RTL to push register RN onto the stack. */
5255 x
= gen_push_fpul ();
5256 else if (rn
== FPSCR_REG
)
5257 x
= gen_push_fpscr ();
5258 else if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& ! TARGET_FPU_SINGLE
5259 && FP_OR_XD_REGISTER_P (rn
))
5261 if (FP_REGISTER_P (rn
) && (rn
- FIRST_FP_REG
) & 1)
5263 x
= gen_push_4 (gen_rtx_REG (DFmode
, rn
));
5265 else if (TARGET_SH2E
&& FP_REGISTER_P (rn
))
5266 x
= gen_push_e (gen_rtx_REG (SFmode
, rn
));
5268 x
= gen_push (gen_rtx_REG (SImode
, rn
));
5272 = gen_rtx_EXPR_LIST (REG_INC
,
5273 gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
), 0);
5277 /* Output RTL to pop register RN from the stack. */
5284 x
= gen_pop_fpul ();
5285 else if (rn
== FPSCR_REG
)
5286 x
= gen_pop_fpscr ();
5287 else if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& ! TARGET_FPU_SINGLE
5288 && FP_OR_XD_REGISTER_P (rn
))
5290 if (FP_REGISTER_P (rn
) && (rn
- FIRST_FP_REG
) & 1)
5292 x
= gen_pop_4 (gen_rtx_REG (DFmode
, rn
));
5294 else if (TARGET_SH2E
&& FP_REGISTER_P (rn
))
5295 x
= gen_pop_e (gen_rtx_REG (SFmode
, rn
));
5297 x
= gen_pop (gen_rtx_REG (SImode
, rn
));
5301 = gen_rtx_EXPR_LIST (REG_INC
,
5302 gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
), 0);
5305 /* Generate code to push the regs specified in the mask. */
5308 push_regs (HARD_REG_SET
*mask
, int interrupt_handler
)
5313 /* Push PR last; this gives better latencies after the prologue, and
5314 candidates for the return delay slot when there are no general
5315 registers pushed. */
5316 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
5318 /* If this is an interrupt handler, and the SZ bit varies,
5319 and we have to push any floating point register, we need
5320 to switch to the correct precision first. */
5321 if (i
== FIRST_FP_REG
&& interrupt_handler
&& TARGET_FMOVD
5322 && hard_regs_intersect_p (mask
, ®_class_contents
[DF_REGS
]))
5324 HARD_REG_SET unsaved
;
5327 COMPL_HARD_REG_SET (unsaved
, *mask
);
5328 fpscr_set_from_mem (NORMAL_MODE (FP_MODE
), unsaved
);
5332 && (i
!= FPSCR_REG
|| ! skip_fpscr
)
5333 && TEST_HARD_REG_BIT (*mask
, i
))
5336 if (TEST_HARD_REG_BIT (*mask
, PR_REG
))
5340 /* Calculate how much extra space is needed to save all callee-saved
5342 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5345 shmedia_target_regs_stack_space (HARD_REG_SET
*live_regs_mask
)
5348 int stack_space
= 0;
5349 int interrupt_handler
= sh_cfun_interrupt_handler_p ();
5351 for (reg
= LAST_TARGET_REG
; reg
>= FIRST_TARGET_REG
; reg
--)
5352 if ((! call_really_used_regs
[reg
] || interrupt_handler
)
5353 && ! TEST_HARD_REG_BIT (*live_regs_mask
, reg
))
5354 /* Leave space to save this target register on the stack,
5355 in case target register allocation wants to use it. */
5356 stack_space
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
));
5360 /* Decide whether we should reserve space for callee-save target registers,
5361 in case target register allocation wants to use them. REGS_SAVED is
5362 the space, in bytes, that is already required for register saves.
5363 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5366 shmedia_reserve_space_for_target_registers_p (int regs_saved
,
5367 HARD_REG_SET
*live_regs_mask
)
5371 return shmedia_target_regs_stack_space (live_regs_mask
) <= regs_saved
;
5374 /* Decide how much space to reserve for callee-save target registers
5375 in case target register allocation wants to use them.
5376 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5379 shmedia_target_regs_stack_adjust (HARD_REG_SET
*live_regs_mask
)
5381 if (shmedia_space_reserved_for_target_registers
)
5382 return shmedia_target_regs_stack_space (live_regs_mask
);
5387 /* Work out the registers which need to be saved, both as a mask and a
5388 count of saved words. Return the count.
5390 If doing a pragma interrupt function, then push all regs used by the
5391 function, and if we call another function (we can tell by looking at PR),
5392 make sure that all the regs it clobbers are safe too. */
5395 calc_live_regs (HARD_REG_SET
*live_regs_mask
)
5400 bool interrupt_or_trapa_handler
, trapa_handler
, interrupt_handler
;
5401 bool nosave_low_regs
;
5402 int pr_live
, has_call
;
5404 attrs
= DECL_ATTRIBUTES (current_function_decl
);
5405 interrupt_or_trapa_handler
= sh_cfun_interrupt_handler_p ();
5406 trapa_handler
= lookup_attribute ("trapa_handler", attrs
) != NULL_TREE
;
5407 interrupt_handler
= interrupt_or_trapa_handler
&& ! trapa_handler
;
5408 nosave_low_regs
= lookup_attribute ("nosave_low_regs", attrs
) != NULL_TREE
;
5410 CLEAR_HARD_REG_SET (*live_regs_mask
);
5411 if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& interrupt_handler
5412 && regs_ever_live
[FPSCR_REG
])
5413 target_flags
&= ~MASK_FPU_SINGLE
;
5414 /* If we can save a lot of saves by switching to double mode, do that. */
5415 else if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& TARGET_FPU_SINGLE
)
5416 for (count
= 0, reg
= FIRST_FP_REG
; reg
<= LAST_FP_REG
; reg
+= 2)
5417 if (regs_ever_live
[reg
] && regs_ever_live
[reg
+1]
5418 && (! call_really_used_regs
[reg
]
5419 || interrupt_handler
)
5422 target_flags
&= ~MASK_FPU_SINGLE
;
5425 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5426 knows how to use it. That means the pseudo originally allocated for
5427 the initial value can become the PR_MEDIA_REG hard register, as seen for
5428 execute/20010122-1.c:test9. */
5430 /* ??? this function is called from initial_elimination_offset, hence we
5431 can't use the result of sh_media_register_for_return here. */
5432 pr_live
= sh_pr_n_sets ();
5435 rtx pr_initial
= has_hard_reg_initial_val (Pmode
, PR_REG
);
5436 pr_live
= (pr_initial
5437 ? (GET_CODE (pr_initial
) != REG
5438 || REGNO (pr_initial
) != (PR_REG
))
5439 : regs_ever_live
[PR_REG
]);
5440 /* For Shcompact, if not optimizing, we end up with a memory reference
5441 using the return address pointer for __builtin_return_address even
5442 though there is no actual need to put the PR register on the stack. */
5443 pr_live
|= regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
];
5445 /* Force PR to be live if the prologue has to call the SHmedia
5446 argument decoder or register saver. */
5447 if (TARGET_SHCOMPACT
5448 && ((current_function_args_info
.call_cookie
5449 & ~ CALL_COOKIE_RET_TRAMP (1))
5450 || current_function_has_nonlocal_label
))
5452 has_call
= TARGET_SHMEDIA
? ! leaf_function_p () : pr_live
;
5453 for (count
= 0, reg
= FIRST_PSEUDO_REGISTER
; reg
-- != 0; )
5455 if (reg
== (TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
)
5458 ? (/* Need to save all the regs ever live. */
5459 (regs_ever_live
[reg
]
5460 || (call_really_used_regs
[reg
]
5461 && (! fixed_regs
[reg
] || reg
== MACH_REG
|| reg
== MACL_REG
5462 || reg
== PIC_OFFSET_TABLE_REGNUM
)
5464 || (TARGET_SHMEDIA
&& has_call
5465 && REGISTER_NATURAL_MODE (reg
) == SImode
5466 && (GENERAL_REGISTER_P (reg
) || TARGET_REGISTER_P (reg
))))
5467 && reg
!= STACK_POINTER_REGNUM
&& reg
!= ARG_POINTER_REGNUM
5468 && reg
!= RETURN_ADDRESS_POINTER_REGNUM
5469 && reg
!= T_REG
&& reg
!= GBR_REG
5470 /* Push fpscr only on targets which have FPU */
5471 && (reg
!= FPSCR_REG
|| TARGET_FPU_ANY
))
5472 : (/* Only push those regs which are used and need to be saved. */
5475 && current_function_args_info
.call_cookie
5476 && reg
== PIC_OFFSET_TABLE_REGNUM
)
5477 || (regs_ever_live
[reg
]
5478 && (!call_really_used_regs
[reg
]
5479 || (trapa_handler
&& reg
== FPSCR_REG
&& TARGET_FPU_ANY
)))
5480 || (current_function_calls_eh_return
5481 && (reg
== EH_RETURN_DATA_REGNO (0)
5482 || reg
== EH_RETURN_DATA_REGNO (1)
5483 || reg
== EH_RETURN_DATA_REGNO (2)
5484 || reg
== EH_RETURN_DATA_REGNO (3)))
5485 || ((reg
== MACL_REG
|| reg
== MACH_REG
)
5486 && regs_ever_live
[reg
]
5487 && sh_cfun_attr_renesas_p ())
5490 SET_HARD_REG_BIT (*live_regs_mask
, reg
);
5491 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
));
5493 if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
|| TARGET_SH5
) && TARGET_FMOVD
5494 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg
)) == MODE_FLOAT
)
5496 if (FP_REGISTER_P (reg
))
5498 if (! TARGET_FPU_SINGLE
&& ! regs_ever_live
[reg
^ 1])
5500 SET_HARD_REG_BIT (*live_regs_mask
, (reg
^ 1));
5501 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
^ 1));
5504 else if (XD_REGISTER_P (reg
))
5506 /* Must switch to double mode to access these registers. */
5507 target_flags
&= ~MASK_FPU_SINGLE
;
5511 if (nosave_low_regs
&& reg
== R8_REG
)
5514 /* If we have a target register optimization pass after prologue / epilogue
5515 threading, we need to assume all target registers will be live even if
5517 if (flag_branch_target_load_optimize2
5518 && TARGET_SAVE_ALL_TARGET_REGS
5519 && shmedia_space_reserved_for_target_registers
)
5520 for (reg
= LAST_TARGET_REG
; reg
>= FIRST_TARGET_REG
; reg
--)
5521 if ((! call_really_used_regs
[reg
] || interrupt_handler
)
5522 && ! TEST_HARD_REG_BIT (*live_regs_mask
, reg
))
5524 SET_HARD_REG_BIT (*live_regs_mask
, reg
);
5525 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
));
5527 /* If this is an interrupt handler, we don't have any call-clobbered
5528 registers we can conveniently use for target register save/restore.
5529 Make sure we save at least one general purpose register when we need
5530 to save target registers. */
5531 if (interrupt_handler
5532 && hard_regs_intersect_p (live_regs_mask
,
5533 ®_class_contents
[TARGET_REGS
])
5534 && ! hard_regs_intersect_p (live_regs_mask
,
5535 ®_class_contents
[GENERAL_REGS
]))
5537 SET_HARD_REG_BIT (*live_regs_mask
, R0_REG
);
5538 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG
));
5544 /* Code to generate prologue and epilogue sequences */
5546 /* PUSHED is the number of bytes that are being pushed on the
5547 stack for register saves. Return the frame size, padded
5548 appropriately so that the stack stays properly aligned. */
5549 static HOST_WIDE_INT
5550 rounded_frame_size (int pushed
)
5552 HOST_WIDE_INT size
= get_frame_size ();
5553 HOST_WIDE_INT align
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5555 return ((size
+ pushed
+ align
- 1) & -align
) - pushed
;
5558 /* Choose a call-clobbered target-branch register that remains
5559 unchanged along the whole function. We set it up as the return
5560 value in the prologue. */
5562 sh_media_register_for_return (void)
5567 if (! current_function_is_leaf
)
5569 if (lookup_attribute ("interrupt_handler",
5570 DECL_ATTRIBUTES (current_function_decl
)))
5572 if (sh_cfun_interrupt_handler_p ())
5575 tr0_used
= flag_pic
&& regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
];
5577 for (regno
= FIRST_TARGET_REG
+ tr0_used
; regno
<= LAST_TARGET_REG
; regno
++)
5578 if (call_really_used_regs
[regno
] && ! regs_ever_live
[regno
])
5584 /* The maximum registers we need to save are:
5585 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5586 - 32 floating point registers (for each pair, we save none,
5587 one single precision value, or a double precision value).
5588 - 8 target registers
5589 - add 1 entry for a delimiter. */
5590 #define MAX_SAVED_REGS (62+32+8)
5592 typedef struct save_entry_s
5601 /* There will be a delimiter entry with VOIDmode both at the start and the
5602 end of a filled in schedule. The end delimiter has the offset of the
5603 save with the smallest (i.e. most negative) offset. */
5604 typedef struct save_schedule_s
5606 save_entry entries
[MAX_SAVED_REGS
+ 2];
5607 int temps
[MAX_TEMPS
+1];
5610 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5611 use reverse order. Returns the last entry written to (not counting
5612 the delimiter). OFFSET_BASE is a number to be added to all offset
5616 sh5_schedule_saves (HARD_REG_SET
*live_regs_mask
, save_schedule
*schedule
,
5620 save_entry
*entry
= schedule
->entries
;
5624 if (! current_function_interrupt
)
5625 for (i
= FIRST_GENERAL_REG
; tmpx
< MAX_TEMPS
&& i
<= LAST_GENERAL_REG
; i
++)
5626 if (call_really_used_regs
[i
] && ! fixed_regs
[i
] && i
!= PR_MEDIA_REG
5627 && ! FUNCTION_ARG_REGNO_P (i
)
5628 && i
!= FIRST_RET_REG
5629 && ! (cfun
->static_chain_decl
!= NULL
&& i
== STATIC_CHAIN_REGNUM
)
5630 && ! (current_function_calls_eh_return
5631 && (i
== EH_RETURN_STACKADJ_REGNO
5632 || ((unsigned) i
>= EH_RETURN_DATA_REGNO (0)
5633 && (unsigned) i
<= EH_RETURN_DATA_REGNO (3)))))
5634 schedule
->temps
[tmpx
++] = i
;
5636 entry
->mode
= VOIDmode
;
5637 entry
->offset
= offset_base
;
5639 /* We loop twice: first, we save 8-byte aligned registers in the
5640 higher addresses, that are known to be aligned. Then, we
5641 proceed to saving 32-bit registers that don't need 8-byte
5643 If this is an interrupt function, all registers that need saving
5644 need to be saved in full. moreover, we need to postpone saving
5645 target registers till we have saved some general purpose registers
5646 we can then use as scratch registers. */
5647 offset
= offset_base
;
5648 for (align
= 1; align
>= 0; align
--)
5650 for (i
= FIRST_PSEUDO_REGISTER
- 1; i
>= 0; i
--)
5651 if (TEST_HARD_REG_BIT (*live_regs_mask
, i
))
5653 enum machine_mode mode
= REGISTER_NATURAL_MODE (i
);
5656 if (current_function_interrupt
)
5658 if (TARGET_REGISTER_P (i
))
5660 if (GENERAL_REGISTER_P (i
))
5663 if (mode
== SFmode
&& (i
% 2) == 1
5664 && ! TARGET_FPU_SINGLE
&& FP_REGISTER_P (i
)
5665 && (TEST_HARD_REG_BIT (*live_regs_mask
, (i
^ 1))))
5672 /* If we're doing the aligned pass and this is not aligned,
5673 or we're doing the unaligned pass and this is aligned,
5675 if ((GET_MODE_SIZE (mode
) % (STACK_BOUNDARY
/ BITS_PER_UNIT
) == 0)
5679 if (current_function_interrupt
5680 && GENERAL_REGISTER_P (i
)
5681 && tmpx
< MAX_TEMPS
)
5682 schedule
->temps
[tmpx
++] = i
;
5684 offset
-= GET_MODE_SIZE (mode
);
5687 entry
->offset
= offset
;
5690 if (align
&& current_function_interrupt
)
5691 for (i
= LAST_TARGET_REG
; i
>= FIRST_TARGET_REG
; i
--)
5692 if (TEST_HARD_REG_BIT (*live_regs_mask
, i
))
5694 offset
-= GET_MODE_SIZE (DImode
);
5696 entry
->mode
= DImode
;
5697 entry
->offset
= offset
;
5702 entry
->mode
= VOIDmode
;
5703 entry
->offset
= offset
;
5704 schedule
->temps
[tmpx
] = -1;
5709 sh_expand_prologue (void)
5711 HARD_REG_SET live_regs_mask
;
5714 int save_flags
= target_flags
;
5717 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl
));
5719 current_function_interrupt
= sh_cfun_interrupt_handler_p ();
5721 /* We have pretend args if we had an object sent partially in registers
5722 and partially on the stack, e.g. a large structure. */
5723 pretend_args
= current_function_pretend_args_size
;
5724 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl
)
5725 && (NPARM_REGS(SImode
)
5726 > current_function_args_info
.arg_count
[(int) SH_ARG_INT
]))
5728 output_stack_adjust (-pretend_args
5729 - current_function_args_info
.stack_regs
* 8,
5730 stack_pointer_rtx
, 0, NULL
);
5732 if (TARGET_SHCOMPACT
&& flag_pic
&& current_function_args_info
.call_cookie
)
5733 /* We're going to use the PIC register to load the address of the
5734 incoming-argument decoder and/or of the return trampoline from
5735 the GOT, so make sure the PIC register is preserved and
5737 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5739 if (TARGET_SHCOMPACT
5740 && (current_function_args_info
.call_cookie
& ~ CALL_COOKIE_RET_TRAMP(1)))
5744 /* First, make all registers with incoming arguments that will
5745 be pushed onto the stack live, so that register renaming
5746 doesn't overwrite them. */
5747 for (reg
= 0; reg
< NPARM_REGS (SImode
); reg
++)
5748 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info
.call_cookie
)
5749 >= NPARM_REGS (SImode
) - reg
)
5750 for (; reg
< NPARM_REGS (SImode
); reg
++)
5751 emit_insn (gen_shcompact_preserve_incoming_args
5752 (gen_rtx_REG (SImode
, FIRST_PARM_REG
+ reg
)));
5753 else if (CALL_COOKIE_INT_REG_GET
5754 (current_function_args_info
.call_cookie
, reg
) == 1)
5755 emit_insn (gen_shcompact_preserve_incoming_args
5756 (gen_rtx_REG (SImode
, FIRST_PARM_REG
+ reg
)));
5758 emit_move_insn (gen_rtx_REG (Pmode
, MACL_REG
),
5760 emit_move_insn (gen_rtx_REG (SImode
, R0_REG
),
5761 GEN_INT (current_function_args_info
.call_cookie
));
5762 emit_move_insn (gen_rtx_REG (SImode
, MACH_REG
),
5763 gen_rtx_REG (SImode
, R0_REG
));
5765 else if (TARGET_SHMEDIA
)
5767 int tr
= sh_media_register_for_return ();
5771 rtx insn
= emit_move_insn (gen_rtx_REG (DImode
, tr
),
5772 gen_rtx_REG (DImode
, PR_MEDIA_REG
));
5774 /* ??? We should suppress saving pr when we don't need it, but this
5775 is tricky because of builtin_return_address. */
5777 /* If this function only exits with sibcalls, this copy
5778 will be flagged as dead. */
5779 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
5785 /* Emit the code for SETUP_VARARGS. */
5786 if (current_function_stdarg
)
5788 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl
))
5790 /* Push arg regs as if they'd been provided by caller in stack. */
5791 for (i
= 0; i
< NPARM_REGS(SImode
); i
++)
5793 int rn
= NPARM_REGS(SImode
) + FIRST_PARM_REG
- i
- 1;
5796 if (i
>= (NPARM_REGS(SImode
)
5797 - current_function_args_info
.arg_count
[(int) SH_ARG_INT
]
5801 RTX_FRAME_RELATED_P (insn
) = 0;
5806 /* If we're supposed to switch stacks at function entry, do so now. */
5809 /* The argument specifies a variable holding the address of the
5810 stack the interrupt function should switch to/from at entry/exit. */
5812 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr
)));
5813 rtx sp_switch
= gen_rtx_SYMBOL_REF (Pmode
, s
);
5815 emit_insn (gen_sp_switch_1 (sp_switch
));
5818 d
= calc_live_regs (&live_regs_mask
);
5819 /* ??? Maybe we could save some switching if we can move a mode switch
5820 that already happens to be at the function start into the prologue. */
5821 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
5822 emit_insn (gen_toggle_sz ());
5826 int offset_base
, offset
;
5828 int offset_in_r0
= -1;
5830 int tregs_space
= shmedia_target_regs_stack_adjust (&live_regs_mask
);
5831 int total_size
, save_size
;
5832 save_schedule schedule
;
5836 if (call_really_used_regs
[R0_REG
] && ! fixed_regs
[R0_REG
]
5837 && ! current_function_interrupt
)
5838 r0
= gen_rtx_REG (Pmode
, R0_REG
);
5840 /* D is the actual number of bytes that we need for saving registers,
5841 however, in initial_elimination_offset we have committed to using
5842 an additional TREGS_SPACE amount of bytes - in order to keep both
5843 addresses to arguments supplied by the caller and local variables
5844 valid, we must keep this gap. Place it between the incoming
5845 arguments and the actually saved registers in a bid to optimize
5846 locality of reference. */
5847 total_size
= d
+ tregs_space
;
5848 total_size
+= rounded_frame_size (total_size
);
5849 save_size
= total_size
- rounded_frame_size (d
);
5850 if (save_size
% (STACK_BOUNDARY
/ BITS_PER_UNIT
))
5851 d_rounding
= ((STACK_BOUNDARY
/ BITS_PER_UNIT
)
5852 - save_size
% (STACK_BOUNDARY
/ BITS_PER_UNIT
));
5854 /* If adjusting the stack in a single step costs nothing extra, do so.
5855 I.e. either if a single addi is enough, or we need a movi anyway,
5856 and we don't exceed the maximum offset range (the test for the
5857 latter is conservative for simplicity). */
5859 && (CONST_OK_FOR_I10 (-total_size
)
5860 || (! CONST_OK_FOR_I10 (-(save_size
+ d_rounding
))
5861 && total_size
<= 2044)))
5862 d_rounding
= total_size
- save_size
;
5864 offset_base
= d
+ d_rounding
;
5866 output_stack_adjust (-(save_size
+ d_rounding
), stack_pointer_rtx
,
5869 sh5_schedule_saves (&live_regs_mask
, &schedule
, offset_base
);
5870 tmp_pnt
= schedule
.temps
;
5871 for (entry
= &schedule
.entries
[1]; entry
->mode
!= VOIDmode
; entry
++)
5873 enum machine_mode mode
= entry
->mode
;
5874 unsigned int reg
= entry
->reg
;
5875 rtx reg_rtx
, mem_rtx
, pre_dec
= NULL_RTX
;
5878 offset
= entry
->offset
;
5880 reg_rtx
= gen_rtx_REG (mode
, reg
);
5882 mem_rtx
= gen_frame_mem (mode
,
5883 gen_rtx_PLUS (Pmode
,
5887 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (mem_rtx
, 0), try_pre_dec
);
5894 if (HAVE_PRE_DECREMENT
5895 && (offset_in_r0
- offset
== GET_MODE_SIZE (mode
)
5896 || mem_rtx
== NULL_RTX
5897 || reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
)))
5899 pre_dec
= gen_frame_mem (mode
, gen_rtx_PRE_DEC (Pmode
, r0
));
5901 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (pre_dec
, 0),
5910 offset
+= GET_MODE_SIZE (mode
);
5914 if (mem_rtx
!= NULL_RTX
)
5917 if (offset_in_r0
== -1)
5919 emit_move_insn (r0
, GEN_INT (offset
));
5920 offset_in_r0
= offset
;
5922 else if (offset
!= offset_in_r0
)
5927 GEN_INT (offset
- offset_in_r0
)));
5928 offset_in_r0
+= offset
- offset_in_r0
;
5931 if (pre_dec
!= NULL_RTX
)
5937 (Pmode
, r0
, stack_pointer_rtx
));
5941 offset
-= GET_MODE_SIZE (mode
);
5942 offset_in_r0
-= GET_MODE_SIZE (mode
);
5947 mem_rtx
= gen_frame_mem (mode
, r0
);
5949 mem_rtx
= gen_frame_mem (mode
,
5950 gen_rtx_PLUS (Pmode
,
5954 /* We must not use an r0-based address for target-branch
5955 registers or for special registers without pre-dec
5956 memory addresses, since we store their values in r0
5958 gcc_assert (!TARGET_REGISTER_P (reg
)
5959 && ((reg
!= PR_REG
&& !SPECIAL_REGISTER_P (reg
))
5960 || mem_rtx
== pre_dec
));
5963 orig_reg_rtx
= reg_rtx
;
5964 if (TARGET_REGISTER_P (reg
)
5965 || ((reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
))
5966 && mem_rtx
!= pre_dec
))
5968 rtx tmp_reg
= gen_rtx_REG (GET_MODE (reg_rtx
), *tmp_pnt
);
5970 emit_move_insn (tmp_reg
, reg_rtx
);
5972 if (REGNO (tmp_reg
) == R0_REG
)
5976 gcc_assert (!refers_to_regno_p
5977 (R0_REG
, R0_REG
+1, mem_rtx
, (rtx
*) 0));
5980 if (*++tmp_pnt
<= 0)
5981 tmp_pnt
= schedule
.temps
;
5988 /* Mark as interesting for dwarf cfi generator */
5989 insn
= emit_move_insn (mem_rtx
, reg_rtx
);
5990 RTX_FRAME_RELATED_P (insn
) = 1;
5991 /* If we use an intermediate register for the save, we can't
5992 describe this exactly in cfi as a copy of the to-be-saved
5993 register into the temporary register and then the temporary
5994 register on the stack, because the temporary register can
5995 have a different natural size than the to-be-saved register.
5996 Thus, we gloss over the intermediate copy and pretend we do
5997 a direct save from the to-be-saved register. */
5998 if (REGNO (reg_rtx
) != reg
)
6002 set
= gen_rtx_SET (VOIDmode
, mem_rtx
, orig_reg_rtx
);
6003 note_rtx
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, set
,
6005 REG_NOTES (insn
) = note_rtx
;
6008 if (TARGET_SHCOMPACT
&& (offset_in_r0
!= -1))
6010 rtx reg_rtx
= gen_rtx_REG (mode
, reg
);
6012 rtx mem_rtx
= gen_frame_mem (mode
,
6013 gen_rtx_PLUS (Pmode
,
6017 set
= gen_rtx_SET (VOIDmode
, mem_rtx
, reg_rtx
);
6018 note_rtx
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, set
,
6020 REG_NOTES (insn
) = note_rtx
;
6025 gcc_assert (entry
->offset
== d_rounding
);
6028 push_regs (&live_regs_mask
, current_function_interrupt
);
6030 if (flag_pic
&& regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
])
6032 rtx insn
= get_last_insn ();
6033 rtx last
= emit_insn (gen_GOTaddr2picreg ());
6035 /* Mark these insns as possibly dead. Sometimes, flow2 may
6036 delete all uses of the PIC register. In this case, let it
6037 delete the initialization too. */
6040 insn
= NEXT_INSN (insn
);
6042 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
6046 while (insn
!= last
);
6049 if (SHMEDIA_REGS_STACK_ADJUST ())
6051 /* This must NOT go through the PLT, otherwise mach and macl
6052 may be clobbered. */
6053 function_symbol (gen_rtx_REG (Pmode
, R0_REG
),
6055 ? "__GCC_push_shmedia_regs"
6056 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT
);
6057 emit_insn (gen_shmedia_save_restore_regs_compact
6058 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6061 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
6063 rtx insn
= emit_insn (gen_toggle_sz ());
6065 /* If we're lucky, a mode switch in the function body will
6066 overwrite fpscr, turning this insn dead. Tell flow this
6067 insn is ok to delete. */
6068 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
6073 target_flags
= save_flags
;
6075 output_stack_adjust (-rounded_frame_size (d
) + d_rounding
,
6076 stack_pointer_rtx
, 0, NULL
);
6078 if (frame_pointer_needed
)
6079 frame_insn (GEN_MOV (hard_frame_pointer_rtx
, stack_pointer_rtx
));
6081 if (TARGET_SHCOMPACT
6082 && (current_function_args_info
.call_cookie
& ~ CALL_COOKIE_RET_TRAMP(1)))
6084 /* This must NOT go through the PLT, otherwise mach and macl
6085 may be clobbered. */
6086 function_symbol (gen_rtx_REG (Pmode
, R0_REG
),
6087 "__GCC_shcompact_incoming_args", SFUNC_GOT
);
6088 emit_insn (gen_shcompact_incoming_args ());
6093 sh_expand_epilogue (bool sibcall_p
)
6095 HARD_REG_SET live_regs_mask
;
6099 int save_flags
= target_flags
;
6100 int frame_size
, save_size
;
6101 int fpscr_deferred
= 0;
6102 int e
= sibcall_p
? -1 : 1;
6104 d
= calc_live_regs (&live_regs_mask
);
6107 frame_size
= rounded_frame_size (d
);
6111 int tregs_space
= shmedia_target_regs_stack_adjust (&live_regs_mask
);
6113 if (d
% (STACK_BOUNDARY
/ BITS_PER_UNIT
))
6114 d_rounding
= ((STACK_BOUNDARY
/ BITS_PER_UNIT
)
6115 - d
% (STACK_BOUNDARY
/ BITS_PER_UNIT
));
6117 total_size
= d
+ tregs_space
;
6118 total_size
+= rounded_frame_size (total_size
);
6119 save_size
= total_size
- frame_size
;
6121 /* If adjusting the stack in a single step costs nothing extra, do so.
6122 I.e. either if a single addi is enough, or we need a movi anyway,
6123 and we don't exceed the maximum offset range (the test for the
6124 latter is conservative for simplicity). */
6126 && ! frame_pointer_needed
6127 && (CONST_OK_FOR_I10 (total_size
)
6128 || (! CONST_OK_FOR_I10 (save_size
+ d_rounding
)
6129 && total_size
<= 2044)))
6130 d_rounding
= frame_size
;
6132 frame_size
-= d_rounding
;
6135 if (frame_pointer_needed
)
6137 /* We must avoid scheduling the epilogue with previous basic blocks
6138 when exception handling is enabled. See PR/18032. */
6139 if (flag_exceptions
)
6140 emit_insn (gen_blockage ());
6141 output_stack_adjust (frame_size
, hard_frame_pointer_rtx
, e
,
6144 /* We must avoid moving the stack pointer adjustment past code
6145 which reads from the local frame, else an interrupt could
6146 occur after the SP adjustment and clobber data in the local
6148 emit_insn (gen_blockage ());
6149 emit_insn (GEN_MOV (stack_pointer_rtx
, hard_frame_pointer_rtx
));
6151 else if (frame_size
)
6153 /* We must avoid moving the stack pointer adjustment past code
6154 which reads from the local frame, else an interrupt could
6155 occur after the SP adjustment and clobber data in the local
6157 emit_insn (gen_blockage ());
6158 output_stack_adjust (frame_size
, stack_pointer_rtx
, e
, &live_regs_mask
);
6161 if (SHMEDIA_REGS_STACK_ADJUST ())
6163 function_symbol (gen_rtx_REG (Pmode
, R0_REG
),
6165 ? "__GCC_pop_shmedia_regs"
6166 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT
);
6167 /* This must NOT go through the PLT, otherwise mach and macl
6168 may be clobbered. */
6169 emit_insn (gen_shmedia_save_restore_regs_compact
6170 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6173 /* Pop all the registers. */
6175 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
6176 emit_insn (gen_toggle_sz ());
6179 int offset_base
, offset
;
6180 int offset_in_r0
= -1;
6182 rtx r0
= gen_rtx_REG (Pmode
, R0_REG
);
6183 save_schedule schedule
;
6187 entry
= sh5_schedule_saves (&live_regs_mask
, &schedule
, d_rounding
);
6188 offset_base
= -entry
[1].offset
+ d_rounding
;
6189 tmp_pnt
= schedule
.temps
;
6190 for (; entry
->mode
!= VOIDmode
; entry
--)
6192 enum machine_mode mode
= entry
->mode
;
6193 int reg
= entry
->reg
;
6194 rtx reg_rtx
, mem_rtx
, post_inc
= NULL_RTX
, insn
;
6196 offset
= offset_base
+ entry
->offset
;
6197 reg_rtx
= gen_rtx_REG (mode
, reg
);
6199 mem_rtx
= gen_frame_mem (mode
,
6200 gen_rtx_PLUS (Pmode
,
6204 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (mem_rtx
, 0), try_post_inc
);
6210 if (HAVE_POST_INCREMENT
6211 && (offset
== offset_in_r0
6212 || (offset
+ GET_MODE_SIZE (mode
) != d
+ d_rounding
6213 && mem_rtx
== NULL_RTX
)
6214 || reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
)))
6216 post_inc
= gen_frame_mem (mode
, gen_rtx_POST_INC (Pmode
, r0
));
6218 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (post_inc
, 0),
6221 post_inc
= NULL_RTX
;
6230 if (mem_rtx
!= NULL_RTX
)
6233 if (offset_in_r0
== -1)
6235 emit_move_insn (r0
, GEN_INT (offset
));
6236 offset_in_r0
= offset
;
6238 else if (offset
!= offset_in_r0
)
6243 GEN_INT (offset
- offset_in_r0
)));
6244 offset_in_r0
+= offset
- offset_in_r0
;
6247 if (post_inc
!= NULL_RTX
)
6253 (Pmode
, r0
, stack_pointer_rtx
));
6259 offset_in_r0
+= GET_MODE_SIZE (mode
);
6262 mem_rtx
= gen_frame_mem (mode
, r0
);
6264 mem_rtx
= gen_frame_mem (mode
,
6265 gen_rtx_PLUS (Pmode
,
6269 gcc_assert ((reg
!= PR_REG
&& !SPECIAL_REGISTER_P (reg
))
6270 || mem_rtx
== post_inc
);
6273 if ((reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
))
6274 && mem_rtx
!= post_inc
)
6276 insn
= emit_move_insn (r0
, mem_rtx
);
6279 else if (TARGET_REGISTER_P (reg
))
6281 rtx tmp_reg
= gen_rtx_REG (mode
, *tmp_pnt
);
6283 /* Give the scheduler a bit of freedom by using up to
6284 MAX_TEMPS registers in a round-robin fashion. */
6285 insn
= emit_move_insn (tmp_reg
, mem_rtx
);
6288 tmp_pnt
= schedule
.temps
;
6291 insn
= emit_move_insn (reg_rtx
, mem_rtx
);
6292 if (reg
== PR_MEDIA_REG
&& sh_media_register_for_return () >= 0)
6293 /* This is dead, unless we return with a sibcall. */
6294 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
6299 gcc_assert (entry
->offset
+ offset_base
== d
+ d_rounding
);
6301 else /* ! TARGET_SH5 */
6304 if (TEST_HARD_REG_BIT (live_regs_mask
, PR_REG
))
6306 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
6308 int j
= (FIRST_PSEUDO_REGISTER
- 1) - i
;
6310 if (j
== FPSCR_REG
&& current_function_interrupt
&& TARGET_FMOVD
6311 && hard_regs_intersect_p (&live_regs_mask
,
6312 ®_class_contents
[DF_REGS
]))
6314 else if (j
!= PR_REG
&& TEST_HARD_REG_BIT (live_regs_mask
, j
))
6316 if (j
== FIRST_FP_REG
&& fpscr_deferred
)
6321 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
6322 emit_insn (gen_toggle_sz ());
6323 target_flags
= save_flags
;
6325 output_stack_adjust (current_function_pretend_args_size
6326 + save_size
+ d_rounding
6327 + current_function_args_info
.stack_regs
* 8,
6328 stack_pointer_rtx
, e
, NULL
);
6330 if (current_function_calls_eh_return
)
6331 emit_insn (GEN_ADD3 (stack_pointer_rtx
, stack_pointer_rtx
,
6332 EH_RETURN_STACKADJ_RTX
));
6334 /* Switch back to the normal stack if necessary. */
6335 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl
)))
6336 emit_insn (gen_sp_switch_2 ());
6338 /* Tell flow the insn that pops PR isn't dead. */
6339 /* PR_REG will never be live in SHmedia mode, and we don't need to
6340 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6341 by the return pattern. */
6342 if (TEST_HARD_REG_BIT (live_regs_mask
, PR_REG
))
6343 emit_insn (gen_rtx_USE (VOIDmode
, gen_rtx_REG (SImode
, PR_REG
)));
6346 static int sh_need_epilogue_known
= 0;
6349 sh_need_epilogue (void)
6351 if (! sh_need_epilogue_known
)
6356 sh_expand_epilogue (0);
6357 epilogue
= get_insns ();
6359 sh_need_epilogue_known
= (epilogue
== NULL
? -1 : 1);
6361 return sh_need_epilogue_known
> 0;
6364 /* Emit code to change the current function's return address to RA.
6365 TEMP is available as a scratch register, if needed. */
6368 sh_set_return_address (rtx ra
, rtx tmp
)
6370 HARD_REG_SET live_regs_mask
;
6372 int pr_reg
= TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
;
6375 d
= calc_live_regs (&live_regs_mask
);
6377 /* If pr_reg isn't life, we can set it (or the register given in
6378 sh_media_register_for_return) directly. */
6379 if (! TEST_HARD_REG_BIT (live_regs_mask
, pr_reg
))
6385 int rr_regno
= sh_media_register_for_return ();
6390 rr
= gen_rtx_REG (DImode
, rr_regno
);
6393 rr
= gen_rtx_REG (SImode
, pr_reg
);
6395 emit_insn (GEN_MOV (rr
, ra
));
6396 /* Tell flow the register for return isn't dead. */
6397 emit_insn (gen_rtx_USE (VOIDmode
, rr
));
6404 save_schedule schedule
;
6407 entry
= sh5_schedule_saves (&live_regs_mask
, &schedule
, 0);
6408 offset
= entry
[1].offset
;
6409 for (; entry
->mode
!= VOIDmode
; entry
--)
6410 if (entry
->reg
== pr_reg
)
6413 /* We can't find pr register. */
6417 offset
= entry
->offset
- offset
;
6418 pr_offset
= (rounded_frame_size (d
) + offset
6419 + SHMEDIA_REGS_STACK_ADJUST ());
6422 pr_offset
= rounded_frame_size (d
);
6424 emit_insn (GEN_MOV (tmp
, GEN_INT (pr_offset
)));
6425 emit_insn (GEN_ADD3 (tmp
, tmp
, hard_frame_pointer_rtx
));
6427 tmp
= gen_frame_mem (Pmode
, tmp
);
6428 emit_insn (GEN_MOV (tmp
, ra
));
6431 /* Clear variables at function end. */
6434 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6435 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6437 sh_need_epilogue_known
= 0;
6441 sh_builtin_saveregs (void)
6443 /* First unnamed integer register. */
6444 int first_intreg
= current_function_args_info
.arg_count
[(int) SH_ARG_INT
];
6445 /* Number of integer registers we need to save. */
6446 int n_intregs
= MAX (0, NPARM_REGS (SImode
) - first_intreg
);
6447 /* First unnamed SFmode float reg */
6448 int first_floatreg
= current_function_args_info
.arg_count
[(int) SH_ARG_FLOAT
];
6449 /* Number of SFmode float regs to save. */
6450 int n_floatregs
= MAX (0, NPARM_REGS (SFmode
) - first_floatreg
);
6453 HOST_WIDE_INT alias_set
;
6459 int pushregs
= n_intregs
;
6461 while (pushregs
< NPARM_REGS (SImode
) - 1
6462 && (CALL_COOKIE_INT_REG_GET
6463 (current_function_args_info
.call_cookie
,
6464 NPARM_REGS (SImode
) - pushregs
)
6467 current_function_args_info
.call_cookie
6468 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode
)
6473 if (pushregs
== NPARM_REGS (SImode
))
6474 current_function_args_info
.call_cookie
6475 |= (CALL_COOKIE_INT_REG (0, 1)
6476 | CALL_COOKIE_STACKSEQ (pushregs
- 1));
6478 current_function_args_info
.call_cookie
6479 |= CALL_COOKIE_STACKSEQ (pushregs
);
6481 current_function_pretend_args_size
+= 8 * n_intregs
;
6483 if (TARGET_SHCOMPACT
)
6487 if (! TARGET_SH2E
&& ! TARGET_SH4
&& ! TARGET_SH5
)
6489 error ("__builtin_saveregs not supported by this subtarget");
6496 /* Allocate block of memory for the regs. */
6497 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6498 Or can assign_stack_local accept a 0 SIZE argument? */
6499 bufsize
= (n_intregs
* UNITS_PER_WORD
) + (n_floatregs
* UNITS_PER_WORD
);
6502 regbuf
= gen_frame_mem (BLKmode
, gen_rtx_REG (Pmode
, ARG_POINTER_REGNUM
));
6503 else if (n_floatregs
& 1)
6507 regbuf
= assign_stack_local (BLKmode
, bufsize
+ UNITS_PER_WORD
, 0);
6508 addr
= copy_to_mode_reg (Pmode
, XEXP (regbuf
, 0));
6509 emit_insn (gen_iorsi3 (addr
, addr
, GEN_INT (UNITS_PER_WORD
)));
6510 regbuf
= change_address (regbuf
, BLKmode
, addr
);
6512 else if (STACK_BOUNDARY
< 64 && TARGET_FPU_DOUBLE
&& n_floatregs
)
6516 regbuf
= assign_stack_local (BLKmode
, bufsize
+ UNITS_PER_WORD
, 0);
6517 addr
= copy_to_mode_reg (Pmode
, plus_constant (XEXP (regbuf
, 0), 4));
6518 mask
= copy_to_mode_reg (Pmode
, GEN_INT (-8));
6519 emit_insn (gen_andsi3 (addr
, addr
, mask
));
6520 regbuf
= change_address (regbuf
, BLKmode
, addr
);
6523 regbuf
= assign_stack_local (BLKmode
, bufsize
, TARGET_FPU_DOUBLE
? 64 : 0);
6524 alias_set
= get_varargs_alias_set ();
6525 set_mem_alias_set (regbuf
, alias_set
);
6528 This is optimized to only save the regs that are necessary. Explicitly
6529 named args need not be saved. */
6531 move_block_from_reg (BASE_ARG_REG (SImode
) + first_intreg
,
6532 adjust_address (regbuf
, BLKmode
,
6533 n_floatregs
* UNITS_PER_WORD
),
6537 /* Return the address of the regbuf. */
6538 return XEXP (regbuf
, 0);
6541 This is optimized to only save the regs that are necessary. Explicitly
6542 named args need not be saved.
6543 We explicitly build a pointer to the buffer because it halves the insn
6544 count when not optimizing (otherwise the pointer is built for each reg
6546 We emit the moves in reverse order so that we can use predecrement. */
6548 fpregs
= copy_to_mode_reg (Pmode
,
6549 plus_constant (XEXP (regbuf
, 0),
6550 n_floatregs
* UNITS_PER_WORD
));
6551 if (TARGET_SH4
|| TARGET_SH2A_DOUBLE
)
6554 for (regno
= NPARM_REGS (DFmode
) - 2; regno
>= first_floatreg
; regno
-= 2)
6556 emit_insn (gen_addsi3 (fpregs
, fpregs
,
6557 GEN_INT (-2 * UNITS_PER_WORD
)));
6558 mem
= change_address (regbuf
, DFmode
, fpregs
);
6559 emit_move_insn (mem
,
6560 gen_rtx_REG (DFmode
, BASE_ARG_REG (DFmode
) + regno
));
6562 regno
= first_floatreg
;
6565 emit_insn (gen_addsi3 (fpregs
, fpregs
, GEN_INT (-UNITS_PER_WORD
)));
6566 mem
= change_address (regbuf
, SFmode
, fpregs
);
6567 emit_move_insn (mem
,
6568 gen_rtx_REG (SFmode
, BASE_ARG_REG (SFmode
) + regno
6569 - (TARGET_LITTLE_ENDIAN
!= 0)));
6573 for (regno
= NPARM_REGS (SFmode
) - 1; regno
>= first_floatreg
; regno
--)
6577 emit_insn (gen_addsi3 (fpregs
, fpregs
, GEN_INT (-UNITS_PER_WORD
)));
6578 mem
= change_address (regbuf
, SFmode
, fpregs
);
6579 emit_move_insn (mem
,
6580 gen_rtx_REG (SFmode
, BASE_ARG_REG (SFmode
) + regno
));
6583 /* Return the address of the regbuf. */
6584 return XEXP (regbuf
, 0);
6587 /* Define the `__builtin_va_list' type for the ABI. */
6590 sh_build_builtin_va_list (void)
6592 tree f_next_o
, f_next_o_limit
, f_next_fp
, f_next_fp_limit
, f_next_stack
;
6595 if (TARGET_SH5
|| (! TARGET_SH2E
&& ! TARGET_SH4
)
6596 || TARGET_HITACHI
|| sh_cfun_attr_renesas_p ())
6597 return ptr_type_node
;
6599 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
6601 f_next_o
= build_decl (FIELD_DECL
, get_identifier ("__va_next_o"),
6603 f_next_o_limit
= build_decl (FIELD_DECL
,
6604 get_identifier ("__va_next_o_limit"),
6606 f_next_fp
= build_decl (FIELD_DECL
, get_identifier ("__va_next_fp"),
6608 f_next_fp_limit
= build_decl (FIELD_DECL
,
6609 get_identifier ("__va_next_fp_limit"),
6611 f_next_stack
= build_decl (FIELD_DECL
, get_identifier ("__va_next_stack"),
6614 DECL_FIELD_CONTEXT (f_next_o
) = record
;
6615 DECL_FIELD_CONTEXT (f_next_o_limit
) = record
;
6616 DECL_FIELD_CONTEXT (f_next_fp
) = record
;
6617 DECL_FIELD_CONTEXT (f_next_fp_limit
) = record
;
6618 DECL_FIELD_CONTEXT (f_next_stack
) = record
;
6620 TYPE_FIELDS (record
) = f_next_o
;
6621 TREE_CHAIN (f_next_o
) = f_next_o_limit
;
6622 TREE_CHAIN (f_next_o_limit
) = f_next_fp
;
6623 TREE_CHAIN (f_next_fp
) = f_next_fp_limit
;
6624 TREE_CHAIN (f_next_fp_limit
) = f_next_stack
;
6626 layout_type (record
);
6631 /* Implement `va_start' for varargs and stdarg. */
6634 sh_va_start (tree valist
, rtx nextarg
)
6636 tree f_next_o
, f_next_o_limit
, f_next_fp
, f_next_fp_limit
, f_next_stack
;
6637 tree next_o
, next_o_limit
, next_fp
, next_fp_limit
, next_stack
;
6643 expand_builtin_saveregs ();
6644 std_expand_builtin_va_start (valist
, nextarg
);
6648 if ((! TARGET_SH2E
&& ! TARGET_SH4
)
6649 || TARGET_HITACHI
|| sh_cfun_attr_renesas_p ())
6651 std_expand_builtin_va_start (valist
, nextarg
);
6655 f_next_o
= TYPE_FIELDS (va_list_type_node
);
6656 f_next_o_limit
= TREE_CHAIN (f_next_o
);
6657 f_next_fp
= TREE_CHAIN (f_next_o_limit
);
6658 f_next_fp_limit
= TREE_CHAIN (f_next_fp
);
6659 f_next_stack
= TREE_CHAIN (f_next_fp_limit
);
6661 next_o
= build3 (COMPONENT_REF
, TREE_TYPE (f_next_o
), valist
, f_next_o
,
6663 next_o_limit
= build3 (COMPONENT_REF
, TREE_TYPE (f_next_o_limit
),
6664 valist
, f_next_o_limit
, NULL_TREE
);
6665 next_fp
= build3 (COMPONENT_REF
, TREE_TYPE (f_next_fp
), valist
, f_next_fp
,
6667 next_fp_limit
= build3 (COMPONENT_REF
, TREE_TYPE (f_next_fp_limit
),
6668 valist
, f_next_fp_limit
, NULL_TREE
);
6669 next_stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_next_stack
),
6670 valist
, f_next_stack
, NULL_TREE
);
6672 /* Call __builtin_saveregs. */
6673 u
= make_tree (ptr_type_node
, expand_builtin_saveregs ());
6674 t
= build2 (MODIFY_EXPR
, ptr_type_node
, next_fp
, u
);
6675 TREE_SIDE_EFFECTS (t
) = 1;
6676 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6678 nfp
= current_function_args_info
.arg_count
[SH_ARG_FLOAT
];
6683 u
= fold_build2 (PLUS_EXPR
, ptr_type_node
, u
,
6684 build_int_cst (NULL_TREE
, UNITS_PER_WORD
* nfp
));
6685 t
= build2 (MODIFY_EXPR
, ptr_type_node
, next_fp_limit
, u
);
6686 TREE_SIDE_EFFECTS (t
) = 1;
6687 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6689 t
= build2 (MODIFY_EXPR
, ptr_type_node
, next_o
, u
);
6690 TREE_SIDE_EFFECTS (t
) = 1;
6691 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6693 nint
= current_function_args_info
.arg_count
[SH_ARG_INT
];
6698 u
= fold_build2 (PLUS_EXPR
, ptr_type_node
, u
,
6699 build_int_cst (NULL_TREE
, UNITS_PER_WORD
* nint
));
6700 t
= build2 (MODIFY_EXPR
, ptr_type_node
, next_o_limit
, u
);
6701 TREE_SIDE_EFFECTS (t
) = 1;
6702 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6704 u
= make_tree (ptr_type_node
, nextarg
);
6705 t
= build2 (MODIFY_EXPR
, ptr_type_node
, next_stack
, u
);
6706 TREE_SIDE_EFFECTS (t
) = 1;
6707 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6710 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6711 member, return it. */
6713 find_sole_member (tree type
)
6715 tree field
, member
= NULL_TREE
;
6717 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6719 if (TREE_CODE (field
) != FIELD_DECL
)
6721 if (!DECL_SIZE (field
))
6723 if (integer_zerop (DECL_SIZE (field
)))
6731 /* Implement `va_arg'. */
6734 sh_gimplify_va_arg_expr (tree valist
, tree type
, tree
*pre_p
,
6735 tree
*post_p ATTRIBUTE_UNUSED
)
6737 HOST_WIDE_INT size
, rsize
;
6738 tree tmp
, pptr_type_node
;
6739 tree addr
, lab_over
= NULL
, result
= NULL
;
6740 int pass_by_ref
= targetm
.calls
.must_pass_in_stack (TYPE_MODE (type
), type
);
6743 type
= build_pointer_type (type
);
6745 size
= int_size_in_bytes (type
);
6746 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
6747 pptr_type_node
= build_pointer_type (ptr_type_node
);
6749 if (! TARGET_SH5
&& (TARGET_SH2E
|| TARGET_SH4
)
6750 && ! (TARGET_HITACHI
|| sh_cfun_attr_renesas_p ()))
6752 tree f_next_o
, f_next_o_limit
, f_next_fp
, f_next_fp_limit
, f_next_stack
;
6753 tree next_o
, next_o_limit
, next_fp
, next_fp_limit
, next_stack
;
6758 f_next_o
= TYPE_FIELDS (va_list_type_node
);
6759 f_next_o_limit
= TREE_CHAIN (f_next_o
);
6760 f_next_fp
= TREE_CHAIN (f_next_o_limit
);
6761 f_next_fp_limit
= TREE_CHAIN (f_next_fp
);
6762 f_next_stack
= TREE_CHAIN (f_next_fp_limit
);
6764 next_o
= build3 (COMPONENT_REF
, TREE_TYPE (f_next_o
), valist
, f_next_o
,
6766 next_o_limit
= build3 (COMPONENT_REF
, TREE_TYPE (f_next_o_limit
),
6767 valist
, f_next_o_limit
, NULL_TREE
);
6768 next_fp
= build3 (COMPONENT_REF
, TREE_TYPE (f_next_fp
),
6769 valist
, f_next_fp
, NULL_TREE
);
6770 next_fp_limit
= build3 (COMPONENT_REF
, TREE_TYPE (f_next_fp_limit
),
6771 valist
, f_next_fp_limit
, NULL_TREE
);
6772 next_stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_next_stack
),
6773 valist
, f_next_stack
, NULL_TREE
);
6775 /* Structures with a single member with a distinct mode are passed
6776 like their member. This is relevant if the latter has a REAL_TYPE
6777 or COMPLEX_TYPE type. */
6778 while (TREE_CODE (type
) == RECORD_TYPE
6779 && (member
= find_sole_member (type
))
6780 && (TREE_CODE (TREE_TYPE (member
)) == REAL_TYPE
6781 || TREE_CODE (TREE_TYPE (member
)) == COMPLEX_TYPE
6782 || TREE_CODE (TREE_TYPE (member
)) == RECORD_TYPE
))
6784 tree field_type
= TREE_TYPE (member
);
6786 if (TYPE_MODE (type
) == TYPE_MODE (field_type
))
6790 gcc_assert ((TYPE_ALIGN (type
)
6791 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type
)))
6792 || (TYPE_ALIGN (type
)
6793 > GET_MODE_BITSIZE (TYPE_MODE (field_type
))));
6800 pass_as_float
= ((TREE_CODE (type
) == REAL_TYPE
&& size
<= 8)
6801 || (TREE_CODE (type
) == COMPLEX_TYPE
6802 && TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
6807 pass_as_float
= (TREE_CODE (type
) == REAL_TYPE
&& size
== 4);
6810 addr
= create_tmp_var (pptr_type_node
, NULL
);
6811 lab_false
= create_artificial_label ();
6812 lab_over
= create_artificial_label ();
6814 valist
= build1 (INDIRECT_REF
, ptr_type_node
, addr
);
6818 tree next_fp_tmp
= create_tmp_var (TREE_TYPE (f_next_fp
), NULL
);
6820 bool is_double
= size
== 8 && TREE_CODE (type
) == REAL_TYPE
;
6822 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_fp
);
6823 tmp
= build2 (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6824 gimplify_and_add (tmp
, pre_p
);
6826 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, next_fp_tmp
, valist
);
6827 gimplify_and_add (tmp
, pre_p
);
6828 tmp
= next_fp_limit
;
6829 if (size
> 4 && !is_double
)
6830 tmp
= build2 (PLUS_EXPR
, TREE_TYPE (tmp
), tmp
,
6831 fold_convert (TREE_TYPE (tmp
), size_int (4 - size
)));
6832 tmp
= build2 (GE_EXPR
, boolean_type_node
, next_fp_tmp
, tmp
);
6833 cmp
= build3 (COND_EXPR
, void_type_node
, tmp
,
6834 build1 (GOTO_EXPR
, void_type_node
, lab_false
),
6837 gimplify_and_add (cmp
, pre_p
);
6839 if (TYPE_ALIGN (type
) > BITS_PER_WORD
|| (is_double
|| size
== 16))
6841 tmp
= fold_convert (ptr_type_node
, size_int (UNITS_PER_WORD
));
6842 tmp
= build2 (BIT_AND_EXPR
, ptr_type_node
, next_fp_tmp
, tmp
);
6843 tmp
= build2 (PLUS_EXPR
, ptr_type_node
, next_fp_tmp
, tmp
);
6844 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, next_fp_tmp
, tmp
);
6845 gimplify_and_add (tmp
, pre_p
);
6848 gimplify_and_add (cmp
, pre_p
);
6850 #ifdef FUNCTION_ARG_SCmode_WART
6851 if (TYPE_MODE (type
) == SCmode
&& TARGET_SH4
&& TARGET_LITTLE_ENDIAN
)
6853 tree subtype
= TREE_TYPE (type
);
6857 = std_gimplify_va_arg_expr (next_fp_tmp
, subtype
, pre_p
, NULL
);
6858 imag
= get_initialized_tmp_var (imag
, pre_p
, NULL
);
6861 = std_gimplify_va_arg_expr (next_fp_tmp
, subtype
, pre_p
, NULL
);
6862 real
= get_initialized_tmp_var (real
, pre_p
, NULL
);
6864 result
= build2 (COMPLEX_EXPR
, type
, real
, imag
);
6865 result
= get_initialized_tmp_var (result
, pre_p
, NULL
);
6867 #endif /* FUNCTION_ARG_SCmode_WART */
6869 tmp
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
6870 gimplify_and_add (tmp
, pre_p
);
6872 tmp
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
6873 gimplify_and_add (tmp
, pre_p
);
6875 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_stack
);
6876 tmp
= build2 (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6877 gimplify_and_add (tmp
, pre_p
);
6878 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, next_fp_tmp
, valist
);
6879 gimplify_and_add (tmp
, pre_p
);
6881 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, valist
, next_fp_tmp
);
6882 gimplify_and_add (tmp
, post_p
);
6883 valist
= next_fp_tmp
;
6887 tmp
= fold_convert (ptr_type_node
, size_int (rsize
));
6888 tmp
= build2 (PLUS_EXPR
, ptr_type_node
, next_o
, tmp
);
6889 tmp
= build2 (GT_EXPR
, boolean_type_node
, tmp
, next_o_limit
);
6890 tmp
= build3 (COND_EXPR
, void_type_node
, tmp
,
6891 build1 (GOTO_EXPR
, void_type_node
, lab_false
),
6893 gimplify_and_add (tmp
, pre_p
);
6895 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_o
);
6896 tmp
= build2 (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6897 gimplify_and_add (tmp
, pre_p
);
6899 tmp
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
6900 gimplify_and_add (tmp
, pre_p
);
6902 tmp
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
6903 gimplify_and_add (tmp
, pre_p
);
6905 if (size
> 4 && ! TARGET_SH4
)
6907 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, next_o
, next_o_limit
);
6908 gimplify_and_add (tmp
, pre_p
);
6911 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_stack
);
6912 tmp
= build2 (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6913 gimplify_and_add (tmp
, pre_p
);
6918 tmp
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
6919 gimplify_and_add (tmp
, pre_p
);
6923 /* ??? In va-sh.h, there had been code to make values larger than
6924 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6926 tmp
= std_gimplify_va_arg_expr (valist
, type
, pre_p
, NULL
);
6929 tmp
= build2 (MODIFY_EXPR
, void_type_node
, result
, tmp
);
6930 gimplify_and_add (tmp
, pre_p
);
6932 tmp
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
6933 gimplify_and_add (tmp
, pre_p
);
6939 result
= build_va_arg_indirect_ref (result
);
6945 sh_promote_prototypes (tree type
)
6951 return ! sh_attr_renesas_p (type
);
6954 /* Whether an argument must be passed by reference. On SHcompact, we
6955 pretend arguments wider than 32-bits that would have been passed in
6956 registers are passed by reference, so that an SHmedia trampoline
6957 loads them into the full 64-bits registers. */
6960 shcompact_byref (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6961 tree type
, bool named
)
6963 unsigned HOST_WIDE_INT size
;
6966 size
= int_size_in_bytes (type
);
6968 size
= GET_MODE_SIZE (mode
);
6970 if (cum
->arg_count
[SH_ARG_INT
] < NPARM_REGS (SImode
)
6972 || GET_SH_ARG_CLASS (mode
) == SH_ARG_INT
6973 || (GET_SH_ARG_CLASS (mode
) == SH_ARG_FLOAT
6974 && cum
->arg_count
[SH_ARG_FLOAT
] >= NPARM_REGS (SFmode
)))
6976 && !SHCOMPACT_FORCE_ON_STACK (mode
, type
)
6977 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum
, mode
, type
, named
))
6984 sh_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6985 tree type
, bool named
)
6987 if (targetm
.calls
.must_pass_in_stack (mode
, type
))
6990 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6991 wants to know about pass-by-reference semantics for incoming
6996 if (TARGET_SHCOMPACT
)
6998 cum
->byref
= shcompact_byref (cum
, mode
, type
, named
);
6999 return cum
->byref
!= 0;
7006 sh_callee_copies (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7007 tree type
, bool named ATTRIBUTE_UNUSED
)
7009 /* ??? How can it possibly be correct to return true only on the
7010 caller side of the equation? Is there someplace else in the
7011 sh backend that's magically producing the copies? */
7012 return (cum
->outgoing
7013 && ((mode
== BLKmode
? TYPE_ALIGN (type
) : GET_MODE_ALIGNMENT (mode
))
7014 % SH_MIN_ALIGN_FOR_CALLEE_COPY
== 0));
7018 sh_arg_partial_bytes (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7019 tree type
, bool named ATTRIBUTE_UNUSED
)
7024 && PASS_IN_REG_P (*cum
, mode
, type
)
7025 && !(TARGET_SH4
|| TARGET_SH2A_DOUBLE
)
7026 && (ROUND_REG (*cum
, mode
)
7028 ? ROUND_ADVANCE (GET_MODE_SIZE (mode
))
7029 : ROUND_ADVANCE (int_size_in_bytes (type
)))
7030 > NPARM_REGS (mode
)))
7031 words
= NPARM_REGS (mode
) - ROUND_REG (*cum
, mode
);
7033 else if (!TARGET_SHCOMPACT
7034 && SH5_WOULD_BE_PARTIAL_NREGS (*cum
, mode
, type
, named
))
7035 words
= NPARM_REGS (SImode
) - cum
->arg_count
[SH_ARG_INT
];
7037 return words
* UNITS_PER_WORD
;
7041 /* Define where to put the arguments to a function.
7042 Value is zero to push the argument on the stack,
7043 or a hard register in which to store the argument.
7045 MODE is the argument's machine mode.
7046 TYPE is the data type of the argument (as a tree).
7047 This is null for libcalls where that information may
7049 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7050 the preceding args and about the function being called.
7051 NAMED is nonzero if this argument is a named parameter
7052 (otherwise it is an extra parameter matching an ellipsis).
7054 On SH the first args are normally in registers
7055 and the rest are pushed. Any arg that starts within the first
7056 NPARM_REGS words is at least partially passed in a register unless
7057 its data type forbids. */
7061 sh_function_arg (CUMULATIVE_ARGS
*ca
, enum machine_mode mode
,
7062 tree type
, int named
)
7064 if (! TARGET_SH5
&& mode
== VOIDmode
)
7065 return GEN_INT (ca
->renesas_abi
? 1 : 0);
7068 && PASS_IN_REG_P (*ca
, mode
, type
)
7069 && (named
|| ! (TARGET_HITACHI
|| ca
->renesas_abi
)))
7073 if (mode
== SCmode
&& TARGET_SH4
&& TARGET_LITTLE_ENDIAN
7074 && (! FUNCTION_ARG_SCmode_WART
|| (ROUND_REG (*ca
, mode
) & 1)))
7076 rtx r1
= gen_rtx_EXPR_LIST (VOIDmode
,
7077 gen_rtx_REG (SFmode
,
7079 + (ROUND_REG (*ca
, mode
) ^ 1)),
7081 rtx r2
= gen_rtx_EXPR_LIST (VOIDmode
,
7082 gen_rtx_REG (SFmode
,
7084 + ((ROUND_REG (*ca
, mode
) + 1) ^ 1)),
7086 return gen_rtx_PARALLEL(SCmode
, gen_rtvec(2, r1
, r2
));
7089 /* If the alignment of a DF value causes an SF register to be
7090 skipped, we will use that skipped register for the next SF
7092 if ((TARGET_HITACHI
|| ca
->renesas_abi
)
7093 && ca
->free_single_fp_reg
7095 return gen_rtx_REG (mode
, ca
->free_single_fp_reg
);
7097 regno
= (BASE_ARG_REG (mode
) + ROUND_REG (*ca
, mode
))
7098 ^ (mode
== SFmode
&& TARGET_SH4
7099 && TARGET_LITTLE_ENDIAN
!= 0
7100 && ! TARGET_HITACHI
&& ! ca
->renesas_abi
);
7101 return gen_rtx_REG (mode
, regno
);
7107 if (mode
== VOIDmode
&& TARGET_SHCOMPACT
)
7108 return GEN_INT (ca
->call_cookie
);
7110 /* The following test assumes unnamed arguments are promoted to
7112 if (mode
== SFmode
&& ca
->free_single_fp_reg
)
7113 return SH5_PROTOTYPED_FLOAT_ARG (*ca
, mode
, ca
->free_single_fp_reg
);
7115 if ((GET_SH_ARG_CLASS (mode
) == SH_ARG_FLOAT
)
7116 && (named
|| ! ca
->prototype_p
)
7117 && ca
->arg_count
[(int) SH_ARG_FLOAT
] < NPARM_REGS (SFmode
))
7119 if (! ca
->prototype_p
&& TARGET_SHMEDIA
)
7120 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca
, mode
);
7122 return SH5_PROTOTYPED_FLOAT_ARG (*ca
, mode
,
7124 + ca
->arg_count
[(int) SH_ARG_FLOAT
]);
7127 if (ca
->arg_count
[(int) SH_ARG_INT
] < NPARM_REGS (SImode
)
7128 && (! TARGET_SHCOMPACT
7129 || (! SHCOMPACT_FORCE_ON_STACK (mode
, type
)
7130 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca
, mode
,
7133 return gen_rtx_REG (mode
, (FIRST_PARM_REG
7134 + ca
->arg_count
[(int) SH_ARG_INT
]));
7143 /* Update the data in CUM to advance over an argument
7144 of mode MODE and data type TYPE.
7145 (TYPE is null for libcalls where that information may not be
7149 sh_function_arg_advance (CUMULATIVE_ARGS
*ca
, enum machine_mode mode
,
7150 tree type
, int named
)
7154 else if (TARGET_SH5
)
7156 tree type2
= (ca
->byref
&& type
7159 enum machine_mode mode2
= (ca
->byref
&& type
7162 int dwords
= ((ca
->byref
7165 ? int_size_in_bytes (type2
)
7166 : GET_MODE_SIZE (mode2
)) + 7) / 8;
7167 int numregs
= MIN (dwords
, NPARM_REGS (SImode
)
7168 - ca
->arg_count
[(int) SH_ARG_INT
]);
7172 ca
->arg_count
[(int) SH_ARG_INT
] += numregs
;
7173 if (TARGET_SHCOMPACT
7174 && SHCOMPACT_FORCE_ON_STACK (mode2
, type2
))
7177 |= CALL_COOKIE_INT_REG (ca
->arg_count
[(int) SH_ARG_INT
]
7179 /* N.B. We want this also for outgoing. */
7180 ca
->stack_regs
+= numregs
;
7185 ca
->stack_regs
+= numregs
;
7186 ca
->byref_regs
+= numregs
;
7190 |= CALL_COOKIE_INT_REG (ca
->arg_count
[(int) SH_ARG_INT
]
7194 |= CALL_COOKIE_INT_REG (ca
->arg_count
[(int) SH_ARG_INT
]
7197 else if (dwords
> numregs
)
7199 int pushregs
= numregs
;
7201 if (TARGET_SHCOMPACT
)
7202 ca
->stack_regs
+= numregs
;
7203 while (pushregs
< NPARM_REGS (SImode
) - 1
7204 && (CALL_COOKIE_INT_REG_GET
7206 NPARM_REGS (SImode
) - pushregs
)
7210 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode
)
7214 if (numregs
== NPARM_REGS (SImode
))
7216 |= CALL_COOKIE_INT_REG (0, 1)
7217 | CALL_COOKIE_STACKSEQ (numregs
- 1);
7220 |= CALL_COOKIE_STACKSEQ (numregs
);
7223 if (GET_SH_ARG_CLASS (mode2
) == SH_ARG_FLOAT
7224 && (named
|| ! ca
->prototype_p
))
7226 if (mode2
== SFmode
&& ca
->free_single_fp_reg
)
7227 ca
->free_single_fp_reg
= 0;
7228 else if (ca
->arg_count
[(int) SH_ARG_FLOAT
]
7229 < NPARM_REGS (SFmode
))
7232 = MIN ((GET_MODE_SIZE (mode2
) + 7) / 8 * 2,
7234 - ca
->arg_count
[(int) SH_ARG_FLOAT
]);
7236 ca
->arg_count
[(int) SH_ARG_FLOAT
] += numfpregs
;
7238 if (TARGET_SHCOMPACT
&& ! ca
->prototype_p
)
7240 if (ca
->outgoing
&& numregs
> 0)
7244 |= (CALL_COOKIE_INT_REG
7245 (ca
->arg_count
[(int) SH_ARG_INT
]
7246 - numregs
+ ((numfpregs
- 2) / 2),
7247 4 + (ca
->arg_count
[(int) SH_ARG_FLOAT
]
7250 while (numfpregs
-= 2);
7252 else if (mode2
== SFmode
&& (named
)
7253 && (ca
->arg_count
[(int) SH_ARG_FLOAT
]
7254 < NPARM_REGS (SFmode
)))
7255 ca
->free_single_fp_reg
7256 = FIRST_FP_PARM_REG
- numfpregs
7257 + ca
->arg_count
[(int) SH_ARG_FLOAT
] + 1;
7263 if ((TARGET_HITACHI
|| ca
->renesas_abi
) && TARGET_FPU_DOUBLE
)
7265 /* Note that we've used the skipped register. */
7266 if (mode
== SFmode
&& ca
->free_single_fp_reg
)
7268 ca
->free_single_fp_reg
= 0;
7271 /* When we have a DF after an SF, there's an SF register that get
7272 skipped in order to align the DF value. We note this skipped
7273 register, because the next SF value will use it, and not the
7274 SF that follows the DF. */
7276 && ROUND_REG (*ca
, DFmode
) != ROUND_REG (*ca
, SFmode
))
7278 ca
->free_single_fp_reg
= (ROUND_REG (*ca
, SFmode
)
7279 + BASE_ARG_REG (mode
));
7283 if (! ((TARGET_SH4
|| TARGET_SH2A
) || ca
->renesas_abi
)
7284 || PASS_IN_REG_P (*ca
, mode
, type
))
7285 (ca
->arg_count
[(int) GET_SH_ARG_CLASS (mode
)]
7286 = (ROUND_REG (*ca
, mode
)
7288 ? ROUND_ADVANCE (int_size_in_bytes (type
))
7289 : ROUND_ADVANCE (GET_MODE_SIZE (mode
)))));
7292 /* The Renesas calling convention doesn't quite fit into this scheme since
7293 the address is passed like an invisible argument, but one that is always
7294 passed in memory. */
7296 sh_struct_value_rtx (tree fndecl
, int incoming ATTRIBUTE_UNUSED
)
7298 if (TARGET_HITACHI
|| sh_attr_renesas_p (fndecl
))
7300 return gen_rtx_REG (Pmode
, 2);
7303 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7306 sh_return_in_memory (tree type
, tree fndecl
)
7310 if (TYPE_MODE (type
) == BLKmode
)
7311 return ((unsigned HOST_WIDE_INT
) int_size_in_bytes (type
)) > 8;
7313 return GET_MODE_SIZE (TYPE_MODE (type
)) > 8;
7317 return (TYPE_MODE (type
) == BLKmode
7318 || ((TARGET_HITACHI
|| sh_attr_renesas_p (fndecl
))
7319 && TREE_CODE (type
) == RECORD_TYPE
));
7323 /* We actually emit the code in sh_expand_prologue. We used to use
7324 a static variable to flag that we need to emit this code, but that
7325 doesn't when inlining, when functions are deferred and then emitted
7326 later. Fortunately, we already have two flags that are part of struct
7327 function that tell if a function uses varargs or stdarg. */
7329 sh_setup_incoming_varargs (CUMULATIVE_ARGS
*ca
,
7330 enum machine_mode mode
,
7332 int *pretend_arg_size
,
7333 int second_time ATTRIBUTE_UNUSED
)
7335 gcc_assert (current_function_stdarg
);
7336 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl
))
7338 int named_parm_regs
, anon_parm_regs
;
7340 named_parm_regs
= (ROUND_REG (*ca
, mode
)
7342 ? ROUND_ADVANCE (int_size_in_bytes (type
))
7343 : ROUND_ADVANCE (GET_MODE_SIZE (mode
))));
7344 anon_parm_regs
= NPARM_REGS (SImode
) - named_parm_regs
;
7345 if (anon_parm_regs
> 0)
7346 *pretend_arg_size
= anon_parm_regs
* 4;
7351 sh_strict_argument_naming (CUMULATIVE_ARGS
*ca ATTRIBUTE_UNUSED
)
7357 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS
*ca
)
7359 return ! (TARGET_HITACHI
|| ca
->renesas_abi
) && ! TARGET_SH5
;
7363 /* Define the offset between two registers, one to be eliminated, and
7364 the other its replacement, at the start of a routine. */
7367 initial_elimination_offset (int from
, int to
)
7370 int regs_saved_rounding
= 0;
7371 int total_saved_regs_space
;
7372 int total_auto_space
;
7373 int save_flags
= target_flags
;
7375 HARD_REG_SET live_regs_mask
;
7377 shmedia_space_reserved_for_target_registers
= false;
7378 regs_saved
= calc_live_regs (&live_regs_mask
);
7379 regs_saved
+= SHMEDIA_REGS_STACK_ADJUST ();
7381 if (shmedia_reserve_space_for_target_registers_p (regs_saved
, &live_regs_mask
))
7383 shmedia_space_reserved_for_target_registers
= true;
7384 regs_saved
+= shmedia_target_regs_stack_adjust (&live_regs_mask
);
7387 if (TARGET_SH5
&& regs_saved
% (STACK_BOUNDARY
/ BITS_PER_UNIT
))
7388 regs_saved_rounding
= ((STACK_BOUNDARY
/ BITS_PER_UNIT
)
7389 - regs_saved
% (STACK_BOUNDARY
/ BITS_PER_UNIT
));
7391 total_auto_space
= rounded_frame_size (regs_saved
) - regs_saved_rounding
;
7392 copy_flags
= target_flags
;
7393 target_flags
= save_flags
;
7395 total_saved_regs_space
= regs_saved
+ regs_saved_rounding
;
7397 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
7398 return total_saved_regs_space
+ total_auto_space
7399 + current_function_args_info
.byref_regs
* 8;
7401 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
7402 return total_saved_regs_space
+ total_auto_space
7403 + current_function_args_info
.byref_regs
* 8;
7405 /* Initial gap between fp and sp is 0. */
7406 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
7409 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
7410 return rounded_frame_size (0);
7412 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
7413 return rounded_frame_size (0);
7415 gcc_assert (from
== RETURN_ADDRESS_POINTER_REGNUM
7416 && (to
== HARD_FRAME_POINTER_REGNUM
7417 || to
== STACK_POINTER_REGNUM
));
7420 int n
= total_saved_regs_space
;
7421 int pr_reg
= TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
;
7422 save_schedule schedule
;
7425 n
+= total_auto_space
;
7427 /* If it wasn't saved, there's not much we can do. */
7428 if (! TEST_HARD_REG_BIT (live_regs_mask
, pr_reg
))
7431 target_flags
= copy_flags
;
7433 sh5_schedule_saves (&live_regs_mask
, &schedule
, n
);
7434 for (entry
= &schedule
.entries
[1]; entry
->mode
!= VOIDmode
; entry
++)
7435 if (entry
->reg
== pr_reg
)
7437 target_flags
= save_flags
;
7438 return entry
->offset
;
7443 return total_auto_space
;
7446 /* Insert any deferred function attributes from earlier pragmas. */
7448 sh_insert_attributes (tree node
, tree
*attributes
)
7452 if (TREE_CODE (node
) != FUNCTION_DECL
)
7455 /* We are only interested in fields. */
7459 /* Append the attributes to the deferred attributes. */
7460 *sh_deferred_function_attributes_tail
= *attributes
;
7461 attrs
= sh_deferred_function_attributes
;
7465 /* Some attributes imply or require the interrupt attribute. */
7466 if (!lookup_attribute ("interrupt_handler", attrs
)
7467 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node
)))
7469 /* If we have a trapa_handler, but no interrupt_handler attribute,
7470 insert an interrupt_handler attribute. */
7471 if (lookup_attribute ("trapa_handler", attrs
) != NULL_TREE
)
7472 /* We can't use sh_pr_interrupt here because that's not in the
7475 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE
, attrs
);
7476 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7477 interrupt attribute is missing, we ignore the attribute and warn. */
7478 else if (lookup_attribute ("sp_switch", attrs
)
7479 || lookup_attribute ("trap_exit", attrs
)
7480 || lookup_attribute ("nosave_low_regs", attrs
))
7484 for (tail
= attributes
; attrs
; attrs
= TREE_CHAIN (attrs
))
7486 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs
))
7487 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs
))
7488 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs
)))
7489 warning (OPT_Wattributes
,
7490 "%qs attribute only applies to interrupt functions",
7491 IDENTIFIER_POINTER (TREE_PURPOSE (attrs
)));
7494 *tail
= tree_cons (TREE_PURPOSE (attrs
), NULL_TREE
,
7496 tail
= &TREE_CHAIN (*tail
);
7499 attrs
= *attributes
;
7503 /* Install the processed list. */
7504 *attributes
= attrs
;
7506 /* Clear deferred attributes. */
7507 sh_deferred_function_attributes
= NULL_TREE
;
7508 sh_deferred_function_attributes_tail
= &sh_deferred_function_attributes
;
7513 /* Supported attributes:
7515 interrupt_handler -- specifies this function is an interrupt handler.
7517 trapa_handler - like above, but don't save all registers.
7519 sp_switch -- specifies an alternate stack for an interrupt handler
7522 trap_exit -- use a trapa to exit an interrupt function instead of
7525 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7526 This is useful on the SH3 and upwards,
7527 which has a separate set of low regs for User and Supervisor modes.
7528 This should only be used for the lowest level of interrupts. Higher levels
7529 of interrupts must save the registers in case they themselves are
7532 renesas -- use Renesas calling/layout conventions (functions and
7537 const struct attribute_spec sh_attribute_table
[] =
7539 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7540 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute
},
7541 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute
},
7542 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute
},
7543 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute
},
7544 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute
},
7545 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute
},
7547 /* Symbian support adds three new attributes:
7548 dllexport - for exporting a function/variable that will live in a dll
7549 dllimport - for importing a function/variable from a dll
7551 Microsoft allows multiple declspecs in one __declspec, separating
7552 them with spaces. We do NOT support this. Instead, use __declspec
7554 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute
},
7555 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute
},
7557 { NULL
, 0, 0, false, false, false, NULL
}
7560 /* Handle an "interrupt_handler" attribute; arguments as in
7561 struct attribute_spec.handler. */
7563 sh_handle_interrupt_handler_attribute (tree
*node
, tree name
,
7564 tree args ATTRIBUTE_UNUSED
,
7565 int flags ATTRIBUTE_UNUSED
,
7568 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7570 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
7571 IDENTIFIER_POINTER (name
));
7572 *no_add_attrs
= true;
7574 else if (TARGET_SHCOMPACT
)
7576 error ("attribute interrupt_handler is not compatible with -m5-compact");
7577 *no_add_attrs
= true;
7583 /* Handle an "sp_switch" attribute; arguments as in
7584 struct attribute_spec.handler. */
7586 sh_handle_sp_switch_attribute (tree
*node
, tree name
, tree args
,
7587 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7589 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7591 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
7592 IDENTIFIER_POINTER (name
));
7593 *no_add_attrs
= true;
7595 else if (TREE_CODE (TREE_VALUE (args
)) != STRING_CST
)
7597 /* The argument must be a constant string. */
7598 warning (OPT_Wattributes
, "%qs attribute argument not a string constant",
7599 IDENTIFIER_POINTER (name
));
7600 *no_add_attrs
= true;
7606 /* Handle an "trap_exit" attribute; arguments as in
7607 struct attribute_spec.handler. */
7609 sh_handle_trap_exit_attribute (tree
*node
, tree name
, tree args
,
7610 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7612 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7614 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
7615 IDENTIFIER_POINTER (name
));
7616 *no_add_attrs
= true;
7618 /* The argument specifies a trap number to be used in a trapa instruction
7619 at function exit (instead of an rte instruction). */
7620 else if (TREE_CODE (TREE_VALUE (args
)) != INTEGER_CST
)
7622 /* The argument must be a constant integer. */
7623 warning (OPT_Wattributes
, "%qs attribute argument not an "
7624 "integer constant", IDENTIFIER_POINTER (name
));
7625 *no_add_attrs
= true;
7632 sh_handle_renesas_attribute (tree
*node ATTRIBUTE_UNUSED
,
7633 tree name ATTRIBUTE_UNUSED
,
7634 tree args ATTRIBUTE_UNUSED
,
7635 int flags ATTRIBUTE_UNUSED
,
7636 bool *no_add_attrs ATTRIBUTE_UNUSED
)
7641 /* True if __attribute__((renesas)) or -mrenesas. */
7643 sh_attr_renesas_p (tree td
)
7650 td
= TREE_TYPE (td
);
7651 if (td
== error_mark_node
)
7653 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td
))
7657 /* True if __attribute__((renesas)) or -mrenesas, for the current
7660 sh_cfun_attr_renesas_p (void)
7662 return sh_attr_renesas_p (current_function_decl
);
7666 sh_cfun_interrupt_handler_p (void)
7668 return (lookup_attribute ("interrupt_handler",
7669 DECL_ATTRIBUTES (current_function_decl
))
7673 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7676 sh_check_pch_target_flags (int old_flags
)
7678 if ((old_flags
^ target_flags
) & (MASK_SH1
| MASK_SH2
| MASK_SH3
7679 | MASK_SH_E
| MASK_HARD_SH4
7680 | MASK_FPU_SINGLE
| MASK_SH4
))
7681 return _("created and used with different architectures / ABIs");
7682 if ((old_flags
^ target_flags
) & MASK_HITACHI
)
7683 return _("created and used with different ABIs");
7684 if ((old_flags
^ target_flags
) & MASK_LITTLE_ENDIAN
)
7685 return _("created and used with different endianness");
7689 /* Predicates used by the templates. */
7691 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7692 Used only in general_movsrc_operand. */
7695 system_reg_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7707 /* Nonzero if OP is a floating point value with value 0.0. */
7710 fp_zero_operand (rtx op
)
7714 if (GET_MODE (op
) != SFmode
)
7717 REAL_VALUE_FROM_CONST_DOUBLE (r
, op
);
7718 return REAL_VALUES_EQUAL (r
, dconst0
) && ! REAL_VALUE_MINUS_ZERO (r
);
7721 /* Nonzero if OP is a floating point value with value 1.0. */
7724 fp_one_operand (rtx op
)
7728 if (GET_MODE (op
) != SFmode
)
7731 REAL_VALUE_FROM_CONST_DOUBLE (r
, op
);
7732 return REAL_VALUES_EQUAL (r
, dconst1
);
7735 /* For -m4 and -m4-single-only, mode switching is used. If we are
7736 compiling without -mfmovd, movsf_ie isn't taken into account for
7737 mode switching. We could check in machine_dependent_reorg for
7738 cases where we know we are in single precision mode, but there is
7739 interface to find that out during reload, so we must avoid
7740 choosing an fldi alternative during reload and thus failing to
7741 allocate a scratch register for the constant loading. */
7745 return ! TARGET_SH4
|| TARGET_FMOVD
|| reload_completed
;
7749 tertiary_reload_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7751 enum rtx_code code
= GET_CODE (op
);
7752 return code
== MEM
|| (TARGET_SH4
&& code
== CONST_DOUBLE
);
7755 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7757 tls_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7759 if (GET_CODE (op
) != SYMBOL_REF
)
7761 return SYMBOL_REF_TLS_MODEL (op
);
7764 /* Return the destination address of a branch. */
7767 branch_dest (rtx branch
)
7769 rtx dest
= SET_SRC (PATTERN (branch
));
7772 if (GET_CODE (dest
) == IF_THEN_ELSE
)
7773 dest
= XEXP (dest
, 1);
7774 dest
= XEXP (dest
, 0);
7775 dest_uid
= INSN_UID (dest
);
7776 return INSN_ADDRESSES (dest_uid
);
7779 /* Return nonzero if REG is not used after INSN.
7780 We assume REG is a reload reg, and therefore does
7781 not live past labels. It may live past calls or jumps though. */
7783 reg_unused_after (rtx reg
, rtx insn
)
7788 /* If the reg is set by this instruction, then it is safe for our
7789 case. Disregard the case where this is a store to memory, since
7790 we are checking a register used in the store address. */
7791 set
= single_set (insn
);
7792 if (set
&& GET_CODE (SET_DEST (set
)) != MEM
7793 && reg_overlap_mentioned_p (reg
, SET_DEST (set
)))
7796 while ((insn
= NEXT_INSN (insn
)))
7802 code
= GET_CODE (insn
);
7805 /* If this is a label that existed before reload, then the register
7806 if dead here. However, if this is a label added by reorg, then
7807 the register may still be live here. We can't tell the difference,
7808 so we just ignore labels completely. */
7809 if (code
== CODE_LABEL
)
7814 if (code
== JUMP_INSN
)
7817 /* If this is a sequence, we must handle them all at once.
7818 We could have for instance a call that sets the target register,
7819 and an insn in a delay slot that uses the register. In this case,
7820 we must return 0. */
7821 else if (code
== INSN
&& GET_CODE (PATTERN (insn
)) == SEQUENCE
)
7826 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
7828 rtx this_insn
= XVECEXP (PATTERN (insn
), 0, i
);
7829 rtx set
= single_set (this_insn
);
7831 if (GET_CODE (this_insn
) == CALL_INSN
)
7833 else if (GET_CODE (this_insn
) == JUMP_INSN
)
7835 if (INSN_ANNULLED_BRANCH_P (this_insn
))
7840 if (set
&& reg_overlap_mentioned_p (reg
, SET_SRC (set
)))
7842 if (set
&& reg_overlap_mentioned_p (reg
, SET_DEST (set
)))
7844 if (GET_CODE (SET_DEST (set
)) != MEM
)
7850 && reg_overlap_mentioned_p (reg
, PATTERN (this_insn
)))
7855 else if (code
== JUMP_INSN
)
7859 set
= single_set (insn
);
7860 if (set
&& reg_overlap_mentioned_p (reg
, SET_SRC (set
)))
7862 if (set
&& reg_overlap_mentioned_p (reg
, SET_DEST (set
)))
7863 return GET_CODE (SET_DEST (set
)) != MEM
;
7864 if (set
== 0 && reg_overlap_mentioned_p (reg
, PATTERN (insn
)))
7867 if (code
== CALL_INSN
&& call_really_used_regs
[REGNO (reg
)])
7875 static GTY(()) rtx fpscr_rtx
;
7877 get_fpscr_rtx (void)
7881 fpscr_rtx
= gen_rtx_REG (PSImode
, FPSCR_REG
);
7882 REG_USERVAR_P (fpscr_rtx
) = 1;
7883 mark_user_reg (fpscr_rtx
);
7885 if (! reload_completed
|| mdep_reorg_phase
!= SH_AFTER_MDEP_REORG
)
7886 mark_user_reg (fpscr_rtx
);
7890 static GTY(()) tree fpscr_values
;
7893 emit_fpu_switch (rtx scratch
, int index
)
7897 if (fpscr_values
== NULL
)
7901 t
= build_index_type (integer_one_node
);
7902 t
= build_array_type (integer_type_node
, t
);
7903 t
= build_decl (VAR_DECL
, get_identifier ("__fpscr_values"), t
);
7904 DECL_ARTIFICIAL (t
) = 1;
7905 DECL_IGNORED_P (t
) = 1;
7906 DECL_EXTERNAL (t
) = 1;
7907 TREE_STATIC (t
) = 1;
7908 TREE_PUBLIC (t
) = 1;
7914 src
= DECL_RTL (fpscr_values
);
7917 emit_move_insn (scratch
, XEXP (src
, 0));
7919 emit_insn (gen_addsi3 (scratch
, scratch
, GEN_INT (index
* 4)));
7920 src
= adjust_automodify_address (src
, PSImode
, scratch
, index
* 4);
7923 src
= adjust_address (src
, PSImode
, index
* 4);
7925 dst
= get_fpscr_rtx ();
7926 emit_move_insn (dst
, src
);
7930 emit_sf_insn (rtx pat
)
7936 emit_df_insn (rtx pat
)
7942 expand_sf_unop (rtx (*fun
) (rtx
, rtx
, rtx
), rtx
*operands
)
7944 emit_sf_insn ((*fun
) (operands
[0], operands
[1], get_fpscr_rtx ()));
7948 expand_sf_binop (rtx (*fun
) (rtx
, rtx
, rtx
, rtx
), rtx
*operands
)
7950 emit_sf_insn ((*fun
) (operands
[0], operands
[1], operands
[2],
7955 expand_df_unop (rtx (*fun
) (rtx
, rtx
, rtx
), rtx
*operands
)
7957 emit_df_insn ((*fun
) (operands
[0], operands
[1], get_fpscr_rtx ()));
7961 expand_df_binop (rtx (*fun
) (rtx
, rtx
, rtx
, rtx
), rtx
*operands
)
7963 emit_df_insn ((*fun
) (operands
[0], operands
[1], operands
[2],
7967 /* ??? gcc does flow analysis strictly after common subexpression
7968 elimination. As a result, common subexpression elimination fails
7969 when there are some intervening statements setting the same register.
7970 If we did nothing about this, this would hurt the precision switching
7971 for SH4 badly. There is some cse after reload, but it is unable to
7972 undo the extra register pressure from the unused instructions, and
7973 it cannot remove auto-increment loads.
7975 A C code example that shows this flow/cse weakness for (at least) SH
7976 and sparc (as of gcc ss-970706) is this:
7990 So we add another pass before common subexpression elimination, to
7991 remove assignments that are dead due to a following assignment in the
7992 same basic block. */
7995 mark_use (rtx x
, rtx
*reg_set_block
)
8001 code
= GET_CODE (x
);
8006 int regno
= REGNO (x
);
8007 int nregs
= (regno
< FIRST_PSEUDO_REGISTER
8008 ? HARD_REGNO_NREGS (regno
, GET_MODE (x
))
8012 reg_set_block
[regno
+ nregs
- 1] = 0;
8019 rtx dest
= SET_DEST (x
);
8021 if (GET_CODE (dest
) == SUBREG
)
8022 dest
= SUBREG_REG (dest
);
8023 if (GET_CODE (dest
) != REG
)
8024 mark_use (dest
, reg_set_block
);
8025 mark_use (SET_SRC (x
), reg_set_block
);
8032 const char *fmt
= GET_RTX_FORMAT (code
);
8034 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
8037 mark_use (XEXP (x
, i
), reg_set_block
);
8038 else if (fmt
[i
] == 'E')
8039 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
8040 mark_use (XVECEXP (x
, i
, j
), reg_set_block
);
8047 static rtx
get_free_reg (HARD_REG_SET
);
8049 /* This function returns a register to use to load the address to load
8050 the fpscr from. Currently it always returns r1 or r7, but when we are
8051 able to use pseudo registers after combine, or have a better mechanism
8052 for choosing a register, it should be done here. */
8053 /* REGS_LIVE is the liveness information for the point for which we
8054 need this allocation. In some bare-bones exit blocks, r1 is live at the
8055 start. We can even have all of r0..r3 being live:
8056 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8057 INSN before which new insns are placed with will clobber the register
8058 we return. If a basic block consists only of setting the return value
8059 register to a pseudo and using that register, the return value is not
8060 live before or after this block, yet we we'll insert our insns right in
8064 get_free_reg (HARD_REG_SET regs_live
)
8066 if (! TEST_HARD_REG_BIT (regs_live
, 1))
8067 return gen_rtx_REG (Pmode
, 1);
8069 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8070 there shouldn't be anything but a jump before the function end. */
8071 gcc_assert (!TEST_HARD_REG_BIT (regs_live
, 7));
8072 return gen_rtx_REG (Pmode
, 7);
8075 /* This function will set the fpscr from memory.
8076 MODE is the mode we are setting it to. */
8078 fpscr_set_from_mem (int mode
, HARD_REG_SET regs_live
)
8080 enum attr_fp_mode fp_mode
= mode
;
8081 enum attr_fp_mode norm_mode
= ACTUAL_NORMAL_MODE (FP_MODE
);
8082 rtx addr_reg
= get_free_reg (regs_live
);
8084 emit_fpu_switch (addr_reg
, fp_mode
== norm_mode
);
8087 /* Is the given character a logical line separator for the assembler? */
8088 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8089 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8093 sh_insn_length_adjustment (rtx insn
)
8095 /* Instructions with unfilled delay slots take up an extra two bytes for
8096 the nop in the delay slot. */
8097 if (((GET_CODE (insn
) == INSN
8098 && GET_CODE (PATTERN (insn
)) != USE
8099 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
8100 || GET_CODE (insn
) == CALL_INSN
8101 || (GET_CODE (insn
) == JUMP_INSN
8102 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
8103 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
))
8104 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn
)))) != SEQUENCE
8105 && get_attr_needs_delay_slot (insn
) == NEEDS_DELAY_SLOT_YES
)
8108 /* SH2e has a bug that prevents the use of annulled branches, so if
8109 the delay slot is not filled, we'll have to put a NOP in it. */
8110 if (sh_cpu
== CPU_SH2E
8111 && GET_CODE (insn
) == JUMP_INSN
8112 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
8113 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
8114 && get_attr_type (insn
) == TYPE_CBRANCH
8115 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn
)))) != SEQUENCE
)
8118 /* sh-dsp parallel processing insn take four bytes instead of two. */
8120 if (GET_CODE (insn
) == INSN
)
8123 rtx body
= PATTERN (insn
);
8124 const char *template;
8126 int maybe_label
= 1;
8128 if (GET_CODE (body
) == ASM_INPUT
)
8129 template = XSTR (body
, 0);
8130 else if (asm_noperands (body
) >= 0)
8132 = decode_asm_operands (body
, NULL
, NULL
, NULL
, NULL
);
8141 while (c
== ' ' || c
== '\t');
8142 /* all sh-dsp parallel-processing insns start with p.
8143 The only non-ppi sh insn starting with p is pref.
8144 The only ppi starting with pr is prnd. */
8145 if ((c
== 'p' || c
== 'P') && strncasecmp ("re", template, 2))
8147 /* The repeat pseudo-insn expands two three insns, a total of
8148 six bytes in size. */
8149 else if ((c
== 'r' || c
== 'R')
8150 && ! strncasecmp ("epeat", template, 5))
8152 while (c
&& c
!= '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c
))
8154 /* If this is a label, it is obviously not a ppi insn. */
8155 if (c
== ':' && maybe_label
)
8160 else if (c
== '\'' || c
== '"')
8165 maybe_label
= c
!= ':';
8173 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8174 isn't protected by a PIC unspec. */
8176 nonpic_symbol_mentioned_p (rtx x
)
8178 register const char *fmt
;
8181 if (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
8182 || GET_CODE (x
) == PC
)
8185 /* We don't want to look into the possible MEM location of a
8186 CONST_DOUBLE, since we're not going to use it, in general. */
8187 if (GET_CODE (x
) == CONST_DOUBLE
)
8190 if (GET_CODE (x
) == UNSPEC
8191 && (XINT (x
, 1) == UNSPEC_PIC
8192 || XINT (x
, 1) == UNSPEC_GOT
8193 || XINT (x
, 1) == UNSPEC_GOTOFF
8194 || XINT (x
, 1) == UNSPEC_GOTPLT
8195 || XINT (x
, 1) == UNSPEC_GOTTPOFF
8196 || XINT (x
, 1) == UNSPEC_DTPOFF
8197 || XINT (x
, 1) == UNSPEC_PLT
))
8200 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8201 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8207 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
8208 if (nonpic_symbol_mentioned_p (XVECEXP (x
, i
, j
)))
8211 else if (fmt
[i
] == 'e' && nonpic_symbol_mentioned_p (XEXP (x
, i
)))
8218 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8219 @GOTOFF in `reg'. */
8221 legitimize_pic_address (rtx orig
, enum machine_mode mode ATTRIBUTE_UNUSED
,
8224 if (tls_symbolic_operand (orig
, Pmode
))
8227 if (GET_CODE (orig
) == LABEL_REF
8228 || (GET_CODE (orig
) == SYMBOL_REF
&& SYMBOL_REF_LOCAL_P (orig
)))
8231 reg
= gen_reg_rtx (Pmode
);
8233 emit_insn (gen_symGOTOFF2reg (reg
, orig
));
8236 else if (GET_CODE (orig
) == SYMBOL_REF
)
8239 reg
= gen_reg_rtx (Pmode
);
8241 emit_insn (gen_symGOT2reg (reg
, orig
));
8247 /* Mark the use of a constant in the literal table. If the constant
8248 has multiple labels, make it unique. */
8250 mark_constant_pool_use (rtx x
)
8252 rtx insn
, lab
, pattern
;
8257 switch (GET_CODE (x
))
8267 /* Get the first label in the list of labels for the same constant
8268 and delete another labels in the list. */
8270 for (insn
= PREV_INSN (x
); insn
; insn
= PREV_INSN (insn
))
8272 if (GET_CODE (insn
) != CODE_LABEL
8273 || LABEL_REFS (insn
) != NEXT_INSN (insn
))
8278 for (insn
= LABEL_REFS (lab
); insn
; insn
= LABEL_REFS (insn
))
8279 INSN_DELETED_P (insn
) = 1;
8281 /* Mark constants in a window. */
8282 for (insn
= NEXT_INSN (x
); insn
; insn
= NEXT_INSN (insn
))
8284 if (GET_CODE (insn
) != INSN
)
8287 pattern
= PATTERN (insn
);
8288 if (GET_CODE (pattern
) != UNSPEC_VOLATILE
)
8291 switch (XINT (pattern
, 1))
8293 case UNSPECV_CONST2
:
8294 case UNSPECV_CONST4
:
8295 case UNSPECV_CONST8
:
8296 XVECEXP (pattern
, 0, 1) = const1_rtx
;
8298 case UNSPECV_WINDOW_END
:
8299 if (XVECEXP (pattern
, 0, 0) == x
)
8302 case UNSPECV_CONST_END
:
8312 /* Return true if it's possible to redirect BRANCH1 to the destination
8313 of an unconditional jump BRANCH2. We only want to do this if the
8314 resulting branch will have a short displacement. */
8316 sh_can_redirect_branch (rtx branch1
, rtx branch2
)
8318 if (flag_expensive_optimizations
&& simplejump_p (branch2
))
8320 rtx dest
= XEXP (SET_SRC (single_set (branch2
)), 0);
8324 for (distance
= 0, insn
= NEXT_INSN (branch1
);
8325 insn
&& distance
< 256;
8326 insn
= PREV_INSN (insn
))
8331 distance
+= get_attr_length (insn
);
8333 for (distance
= 0, insn
= NEXT_INSN (branch1
);
8334 insn
&& distance
< 256;
8335 insn
= NEXT_INSN (insn
))
8340 distance
+= get_attr_length (insn
);
8346 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8348 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED
,
8349 unsigned int new_reg
)
8351 /* Interrupt functions can only use registers that have already been
8352 saved by the prologue, even if they would normally be
8355 if (sh_cfun_interrupt_handler_p () && !regs_ever_live
[new_reg
])
8361 /* Function to update the integer COST
8362 based on the relationship between INSN that is dependent on
8363 DEP_INSN through the dependence LINK. The default is to make no
8364 adjustment to COST. This can be used for example to specify to
8365 the scheduler that an output- or anti-dependence does not incur
8366 the same cost as a data-dependence. The return value should be
8367 the new value for COST. */
8369 sh_adjust_cost (rtx insn
, rtx link ATTRIBUTE_UNUSED
, rtx dep_insn
, int cost
)
8375 /* On SHmedia, if the dependence is an anti-dependence or
8376 output-dependence, there is no cost. */
8377 if (REG_NOTE_KIND (link
) != 0)
8379 /* However, dependencies between target register loads and
8380 uses of the register in a subsequent block that are separated
8381 by a conditional branch are not modelled - we have to do with
8382 the anti-dependency between the target register load and the
8383 conditional branch that ends the current block. */
8384 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8385 && GET_CODE (PATTERN (dep_insn
)) == SET
8386 && (get_attr_type (dep_insn
) == TYPE_PT_MEDIA
8387 || get_attr_type (dep_insn
) == TYPE_PTABS_MEDIA
)
8388 && get_attr_type (insn
) == TYPE_CBRANCH_MEDIA
)
8390 int orig_cost
= cost
;
8391 rtx note
= find_reg_note (insn
, REG_BR_PROB
, 0);
8392 rtx target
= ((! note
8393 || INTVAL (XEXP (note
, 0)) * 2 < REG_BR_PROB_BASE
)
8394 ? insn
: JUMP_LABEL (insn
));
8395 /* On the likely path, the branch costs 1, on the unlikely path,
8399 target
= next_active_insn (target
);
8400 while (target
&& ! flow_dependent_p (target
, dep_insn
)
8402 /* If two branches are executed in immediate succession, with the
8403 first branch properly predicted, this causes a stall at the
8404 second branch, hence we won't need the target for the
8405 second branch for two cycles after the launch of the first
8407 if (cost
> orig_cost
- 2)
8408 cost
= orig_cost
- 2;
8414 else if (get_attr_is_mac_media (insn
)
8415 && get_attr_is_mac_media (dep_insn
))
8418 else if (! reload_completed
8419 && GET_CODE (PATTERN (insn
)) == SET
8420 && GET_CODE (SET_SRC (PATTERN (insn
))) == FLOAT
8421 && GET_CODE (PATTERN (dep_insn
)) == SET
8422 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn
)), VOIDmode
)
8425 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8426 that is needed at the target. */
8427 else if (get_attr_type (insn
) == TYPE_JUMP_MEDIA
8428 && ! flow_dependent_p (insn
, dep_insn
))
8431 else if (REG_NOTE_KIND (link
) == 0)
8433 enum attr_type dep_type
, type
;
8435 if (recog_memoized (insn
) < 0
8436 || recog_memoized (dep_insn
) < 0)
8439 dep_type
= get_attr_type (dep_insn
);
8440 if (dep_type
== TYPE_FLOAD
|| dep_type
== TYPE_PCFLOAD
)
8442 if ((dep_type
== TYPE_LOAD_SI
|| dep_type
== TYPE_PCLOAD_SI
)
8443 && (type
= get_attr_type (insn
)) != TYPE_CALL
8444 && type
!= TYPE_SFUNC
)
8447 /* The only input for a call that is timing-critical is the
8448 function's address. */
8449 if (GET_CODE(insn
) == CALL_INSN
)
8451 rtx call
= PATTERN (insn
);
8453 if (GET_CODE (call
) == PARALLEL
)
8454 call
= XVECEXP (call
, 0 ,0);
8455 if (GET_CODE (call
) == SET
)
8456 call
= SET_SRC (call
);
8457 if (GET_CODE (call
) == CALL
&& GET_CODE (XEXP (call
, 0)) == MEM
8458 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8459 && (GET_CODE (XEXP (XEXP (call
, 0), 0)) == UNSPEC
8460 || ! reg_set_p (XEXP (XEXP (call
, 0), 0), dep_insn
)))
8463 /* Likewise, the most timing critical input for an sfuncs call
8464 is the function address. However, sfuncs typically start
8465 using their arguments pretty quickly.
8466 Assume a four cycle delay before they are needed. */
8467 /* All sfunc calls are parallels with at least four components.
8468 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8469 else if (GET_CODE (PATTERN (insn
)) == PARALLEL
8470 && XVECLEN (PATTERN (insn
), 0) >= 4
8471 && (reg
= sfunc_uses_reg (insn
)))
8473 if (! reg_set_p (reg
, dep_insn
))
8476 /* When the preceding instruction loads the shift amount of
8477 the following SHAD/SHLD, the latency of the load is increased
8480 && get_attr_type (insn
) == TYPE_DYN_SHIFT
8481 && get_attr_any_int_load (dep_insn
) == ANY_INT_LOAD_YES
8482 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn
)),
8483 XEXP (SET_SRC (single_set (insn
)),
8486 /* When an LS group instruction with a latency of less than
8487 3 cycles is followed by a double-precision floating-point
8488 instruction, FIPR, or FTRV, the latency of the first
8489 instruction is increased to 3 cycles. */
8491 && get_attr_insn_class (dep_insn
) == INSN_CLASS_LS_GROUP
8492 && get_attr_dfp_comp (insn
) == DFP_COMP_YES
)
8494 /* The lsw register of a double-precision computation is ready one
8496 else if (reload_completed
8497 && get_attr_dfp_comp (dep_insn
) == DFP_COMP_YES
8498 && (use_pat
= single_set (insn
))
8499 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn
))),
8503 if (get_attr_any_fp_comp (dep_insn
) == ANY_FP_COMP_YES
8504 && get_attr_late_fp_use (insn
) == LATE_FP_USE_YES
)
8507 /* An anti-dependence penalty of two applies if the first insn is a double
8508 precision fadd / fsub / fmul. */
8509 else if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8510 && recog_memoized (dep_insn
) >= 0
8511 && get_attr_type (dep_insn
) == TYPE_DFP_ARITH
8512 /* A lot of alleged anti-flow dependences are fake,
8513 so check this one is real. */
8514 && flow_dependent_p (dep_insn
, insn
))
8521 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8522 if DEP_INSN is anti-flow dependent on INSN. */
8524 flow_dependent_p (rtx insn
, rtx dep_insn
)
8526 rtx tmp
= PATTERN (insn
);
8528 note_stores (PATTERN (dep_insn
), flow_dependent_p_1
, &tmp
);
8529 return tmp
== NULL_RTX
;
8532 /* A helper function for flow_dependent_p called through note_stores. */
8534 flow_dependent_p_1 (rtx x
, rtx pat ATTRIBUTE_UNUSED
, void *data
)
8536 rtx
* pinsn
= (rtx
*) data
;
8538 if (*pinsn
&& reg_referenced_p (x
, *pinsn
))
8542 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8543 'special function' patterns (type sfunc) that clobber pr, but that
8544 do not look like function calls to leaf_function_p. Hence we must
8545 do this extra check. */
8549 return REG_N_SETS (TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
);
8552 /* Return where to allocate pseudo for a given hard register initial
8555 sh_allocate_initial_value (rtx hard_reg
)
8559 if (REGNO (hard_reg
) == (TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
))
8561 if (current_function_is_leaf
8562 && ! sh_pr_n_sets ()
8563 && ! (TARGET_SHCOMPACT
8564 && ((current_function_args_info
.call_cookie
8565 & ~ CALL_COOKIE_RET_TRAMP (1))
8566 || current_function_has_nonlocal_label
)))
8569 x
= gen_frame_mem (Pmode
, return_address_pointer_rtx
);
8577 /* This function returns "2" to indicate dual issue for the SH4
8578 processor. To be used by the DFA pipeline description. */
8580 sh_issue_rate (void)
8582 if (TARGET_SUPERSCALAR
)
8588 /* Functions for ready queue reordering for sched1. */
8590 /* Get weight for mode for a set x. */
8592 find_set_regmode_weight (rtx x
, enum machine_mode mode
)
8594 if (GET_CODE (x
) == CLOBBER
&& register_operand (SET_DEST (x
), mode
))
8596 if (GET_CODE (x
) == SET
&& register_operand (SET_DEST (x
), mode
))
8598 if (GET_CODE (SET_DEST (x
)) == REG
)
8600 if (!reg_mentioned_p (SET_DEST (x
), SET_SRC (x
)))
8610 /* Get regmode weight for insn. */
8612 find_insn_regmode_weight (rtx insn
, enum machine_mode mode
)
8614 short reg_weight
= 0;
8617 /* Increment weight for each register born here. */
8619 reg_weight
+= find_set_regmode_weight (x
, mode
);
8620 if (GET_CODE (x
) == PARALLEL
)
8623 for (j
= XVECLEN (x
, 0) - 1; j
>= 0; j
--)
8625 x
= XVECEXP (PATTERN (insn
), 0, j
);
8626 reg_weight
+= find_set_regmode_weight (x
, mode
);
8629 /* Decrement weight for each register that dies here. */
8630 for (x
= REG_NOTES (insn
); x
; x
= XEXP (x
, 1))
8632 if (REG_NOTE_KIND (x
) == REG_DEAD
|| REG_NOTE_KIND (x
) == REG_UNUSED
)
8634 rtx note
= XEXP (x
, 0);
8635 if (GET_CODE (note
) == REG
&& GET_MODE (note
) == mode
)
8642 /* Calculate regmode weights for all insns of a basic block. */
8644 find_regmode_weight (int b
, enum machine_mode mode
)
8646 rtx insn
, next_tail
, head
, tail
;
8648 get_block_head_tail (b
, &head
, &tail
);
8649 next_tail
= NEXT_INSN (tail
);
8651 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
8653 /* Handle register life information. */
8658 INSN_REGMODE_WEIGHT (insn
, mode
) =
8659 find_insn_regmode_weight (insn
, mode
) + 2 * find_insn_regmode_weight (insn
, DFmode
);
8660 else if (mode
== SImode
)
8661 INSN_REGMODE_WEIGHT (insn
, mode
) =
8662 find_insn_regmode_weight (insn
, mode
) + 2 * find_insn_regmode_weight (insn
, DImode
);
8666 /* Comparison function for ready queue sorting. */
8668 rank_for_reorder (const void *x
, const void *y
)
8670 rtx tmp
= *(const rtx
*) y
;
8671 rtx tmp2
= *(const rtx
*) x
;
8673 /* The insn in a schedule group should be issued the first. */
8674 if (SCHED_GROUP_P (tmp
) != SCHED_GROUP_P (tmp2
))
8675 return SCHED_GROUP_P (tmp2
) ? 1 : -1;
8677 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8678 minimizes instruction movement, thus minimizing sched's effect on
8679 register pressure. */
8680 return INSN_LUID (tmp
) - INSN_LUID (tmp2
);
8683 /* Resort the array A in which only element at index N may be out of order. */
8685 swap_reorder (rtx
*a
, int n
)
8687 rtx insn
= a
[n
- 1];
8690 while (i
>= 0 && rank_for_reorder (a
+ i
, &insn
) >= 0)
8698 #define SCHED_REORDER(READY, N_READY) \
8701 if ((N_READY) == 2) \
8702 swap_reorder (READY, N_READY); \
8703 else if ((N_READY) > 2) \
8704 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8708 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8711 ready_reorder (rtx
*ready
, int nready
)
8713 SCHED_REORDER (ready
, nready
);
8716 /* Calculate regmode weights for all insns of all basic block. */
8718 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED
,
8719 int verbose ATTRIBUTE_UNUSED
,
8724 regmode_weight
[0] = (short *) xcalloc (old_max_uid
, sizeof (short));
8725 regmode_weight
[1] = (short *) xcalloc (old_max_uid
, sizeof (short));
8727 FOR_EACH_BB_REVERSE (b
)
8729 find_regmode_weight (b
->index
, SImode
);
8730 find_regmode_weight (b
->index
, SFmode
);
8733 CURR_REGMODE_PRESSURE (SImode
) = 0;
8734 CURR_REGMODE_PRESSURE (SFmode
) = 0;
8740 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
8741 int verbose ATTRIBUTE_UNUSED
)
8743 if (regmode_weight
[0])
8745 free (regmode_weight
[0]);
8746 regmode_weight
[0] = NULL
;
8748 if (regmode_weight
[1])
8750 free (regmode_weight
[1]);
8751 regmode_weight
[1] = NULL
;
8755 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8756 keep count of register pressures on SImode and SFmode. */
8758 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
8759 int sched_verbose ATTRIBUTE_UNUSED
,
8763 if (GET_CODE (PATTERN (insn
)) != USE
8764 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
8765 cached_can_issue_more
= can_issue_more
- 1;
8767 cached_can_issue_more
= can_issue_more
;
8769 if (reload_completed
)
8770 return cached_can_issue_more
;
8772 CURR_REGMODE_PRESSURE (SImode
) += INSN_REGMODE_WEIGHT (insn
, SImode
);
8773 CURR_REGMODE_PRESSURE (SFmode
) += INSN_REGMODE_WEIGHT (insn
, SFmode
);
8775 return cached_can_issue_more
;
8779 sh_md_init (FILE *dump ATTRIBUTE_UNUSED
,
8780 int verbose ATTRIBUTE_UNUSED
,
8781 int veclen ATTRIBUTE_UNUSED
)
8783 CURR_REGMODE_PRESSURE (SImode
) = 0;
8784 CURR_REGMODE_PRESSURE (SFmode
) = 0;
8787 /* Some magic numbers. */
8788 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8789 functions that already have high pressure on r0. */
8790 #define R0_MAX_LIFE_REGIONS 2
8791 #define R0_MAX_LIVE_LENGTH 12
8792 /* Register Pressure thresholds for SImode and SFmode registers. */
8793 #define SIMODE_MAX_WEIGHT 5
8794 #define SFMODE_MAX_WEIGHT 10
8796 /* Return true if the pressure is high for MODE. */
8798 high_pressure (enum machine_mode mode
)
8800 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8801 functions that already have high pressure on r0. */
8802 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8803 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH
)
8807 return (CURR_REGMODE_PRESSURE (SFmode
) > SFMODE_MAX_WEIGHT
);
8809 return (CURR_REGMODE_PRESSURE (SImode
) > SIMODE_MAX_WEIGHT
);
8812 /* Reorder ready queue if register pressure is high. */
8814 sh_reorder (FILE *dump ATTRIBUTE_UNUSED
,
8815 int sched_verbose ATTRIBUTE_UNUSED
,
8818 int clock_var ATTRIBUTE_UNUSED
)
8820 if (reload_completed
)
8821 return sh_issue_rate ();
8823 if (high_pressure (SFmode
) || high_pressure (SImode
))
8825 ready_reorder (ready
, *n_readyp
);
8828 return sh_issue_rate ();
8831 /* Skip cycles if the current register pressure is high. */
8833 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
8834 int sched_verbose ATTRIBUTE_UNUSED
,
8835 rtx
*ready ATTRIBUTE_UNUSED
,
8836 int *n_readyp ATTRIBUTE_UNUSED
,
8837 int clock_var ATTRIBUTE_UNUSED
)
8839 if (reload_completed
)
8840 return cached_can_issue_more
;
8842 if (high_pressure(SFmode
) || high_pressure (SImode
))
8845 return cached_can_issue_more
;
8848 /* Skip cycles without sorting the ready queue. This will move insn from
8849 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8850 queue by sh_reorder. */
8852 /* Generally, skipping these many cycles are sufficient for all insns to move
8857 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED
,
8858 int sched_verbose ATTRIBUTE_UNUSED
,
8859 rtx insn ATTRIBUTE_UNUSED
,
8864 if (reload_completed
)
8869 if ((clock_var
- last_clock_var
) < MAX_SKIPS
)
8874 /* If this is the last cycle we are skipping, allow reordering of R. */
8875 if ((clock_var
- last_clock_var
) == MAX_SKIPS
)
8887 /* SHmedia requires registers for branches, so we can't generate new
8888 branches past reload. */
8890 sh_cannot_modify_jumps_p (void)
8892 return (TARGET_SHMEDIA
&& (reload_in_progress
|| reload_completed
));
8896 sh_target_reg_class (void)
8898 return TARGET_SHMEDIA
? TARGET_REGS
: NO_REGS
;
8902 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen
)
8907 if (! shmedia_space_reserved_for_target_registers
)
8909 if (after_prologue_epilogue_gen
&& ! TARGET_SAVE_ALL_TARGET_REGS
)
8911 if (calc_live_regs (&dummy
) >= 6 * 8)
8913 /* This is a borderline case. See if we got a nested loop, or a loop
8914 with a call, or with more than 4 labels inside. */
8915 for (insn
= get_insns(); insn
; insn
= NEXT_INSN (insn
))
8917 if (GET_CODE (insn
) == NOTE
8918 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_BEG
)
8924 insn
= NEXT_INSN (insn
);
8925 if ((GET_CODE (insn
) == NOTE
8926 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_BEG
)
8927 || GET_CODE (insn
) == CALL_INSN
8928 || (GET_CODE (insn
) == CODE_LABEL
&& ++labels
> 4))
8931 while (GET_CODE (insn
) != NOTE
8932 || NOTE_LINE_NUMBER (insn
) != NOTE_INSN_LOOP_END
);
8939 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED
)
8941 return (TARGET_SH5
|| TARGET_HITACHI
|| sh_attr_renesas_p (record_type
));
8945 On the SH1..SH4, the trampoline looks like
8946 2 0002 D202 mov.l l2,r2
8947 1 0000 D301 mov.l l1,r3
8950 5 0008 00000000 l1: .long area
8951 6 000c 00000000 l2: .long function
8953 SH5 (compact) uses r1 instead of r3 for the static chain. */
8956 /* Emit RTL insns to initialize the variable parts of a trampoline.
8957 FNADDR is an RTX for the address of the function's pure code.
8958 CXT is an RTX for the static chain value for the function. */
8961 sh_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
8963 rtx tramp_mem
= gen_frame_mem (BLKmode
, tramp
);
8965 if (TARGET_SHMEDIA64
)
8970 rtx movi1
= GEN_INT (0xcc000010);
8971 rtx shori1
= GEN_INT (0xc8000010);
8974 /* The following trampoline works within a +- 128 KB range for cxt:
8975 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8976 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8977 gettr tr1,r1; blink tr0,r63 */
8978 /* Address rounding makes it hard to compute the exact bounds of the
8979 offset for this trampoline, but we have a rather generous offset
8980 range, so frame_offset should do fine as an upper bound. */
8981 if (cxt
== virtual_stack_vars_rtx
&& frame_offset
< 0x20000)
8983 /* ??? could optimize this trampoline initialization
8984 by writing DImode words with two insns each. */
8985 rtx mask
= force_reg (DImode
, GEN_INT (0x3fffc00));
8986 rtx insn
= gen_rtx_MINUS (DImode
, cxt
, tramp
);
8987 insn
= gen_rtx_ASHIFT (DImode
, insn
, GEN_INT (10-2));
8988 insn
= gen_rtx_AND (DImode
, insn
, mask
);
8989 /* Or in ptb/u .,tr1 pattern */
8990 insn
= gen_rtx_IOR (DImode
, insn
, gen_int_mode (0xec000010, SImode
));
8991 insn
= force_operand (insn
, NULL_RTX
);
8992 insn
= gen_lowpart (SImode
, insn
);
8993 emit_move_insn (change_address (tramp_mem
, SImode
, NULL_RTX
), insn
);
8994 insn
= gen_rtx_LSHIFTRT (DImode
, fnaddr
, GEN_INT (38));
8995 insn
= gen_rtx_AND (DImode
, insn
, mask
);
8996 insn
= force_operand (gen_rtx_IOR (DImode
, movi1
, insn
), NULL_RTX
);
8997 insn
= gen_lowpart (SImode
, insn
);
8998 emit_move_insn (adjust_address (tramp_mem
, SImode
, 4), insn
);
8999 insn
= gen_rtx_LSHIFTRT (DImode
, fnaddr
, GEN_INT (22));
9000 insn
= gen_rtx_AND (DImode
, insn
, mask
);
9001 insn
= force_operand (gen_rtx_IOR (DImode
, shori1
, insn
), NULL_RTX
);
9002 insn
= gen_lowpart (SImode
, insn
);
9003 emit_move_insn (adjust_address (tramp_mem
, SImode
, 8), insn
);
9004 insn
= gen_rtx_LSHIFTRT (DImode
, fnaddr
, GEN_INT (6));
9005 insn
= gen_rtx_AND (DImode
, insn
, mask
);
9006 insn
= force_operand (gen_rtx_IOR (DImode
, shori1
, insn
), NULL_RTX
);
9007 insn
= gen_lowpart (SImode
, insn
);
9008 emit_move_insn (adjust_address (tramp_mem
, SImode
, 12), insn
);
9009 insn
= gen_rtx_ASHIFT (DImode
, fnaddr
, GEN_INT (10));
9010 insn
= gen_rtx_AND (DImode
, insn
, mask
);
9011 insn
= force_operand (gen_rtx_IOR (DImode
, shori1
, insn
), NULL_RTX
);
9012 insn
= gen_lowpart (SImode
, insn
);
9013 emit_move_insn (adjust_address (tramp_mem
, SImode
, 16), insn
);
9014 emit_move_insn (adjust_address (tramp_mem
, SImode
, 20),
9015 GEN_INT (0x6bf10600));
9016 emit_move_insn (adjust_address (tramp_mem
, SImode
, 24),
9017 GEN_INT (0x4415fc10));
9018 emit_move_insn (adjust_address (tramp_mem
, SImode
, 28),
9019 GEN_INT (0x4401fff0));
9020 emit_insn (gen_ic_invalidate_line (tramp
));
9023 tramp_templ
= gen_rtx_SYMBOL_REF (Pmode
,"__GCC_nested_trampoline");
9024 fixed_len
= TRAMPOLINE_SIZE
- 2 * GET_MODE_SIZE (Pmode
);
9026 tramp_templ
= gen_datalabel_ref (tramp_templ
);
9028 src
= gen_const_mem (BLKmode
, tramp_templ
);
9029 set_mem_align (dst
, 256);
9030 set_mem_align (src
, 64);
9031 emit_block_move (dst
, src
, GEN_INT (fixed_len
), BLOCK_OP_NORMAL
);
9033 emit_move_insn (adjust_address (tramp_mem
, Pmode
, fixed_len
), fnaddr
);
9034 emit_move_insn (adjust_address (tramp_mem
, Pmode
,
9035 fixed_len
+ GET_MODE_SIZE (Pmode
)),
9037 emit_insn (gen_ic_invalidate_line (tramp
));
9040 else if (TARGET_SHMEDIA
)
9042 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9043 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9044 rtx quad0
= gen_reg_rtx (DImode
), cxtload
= gen_reg_rtx (DImode
);
9045 rtx quad1
= gen_reg_rtx (DImode
), quad2
= gen_reg_rtx (DImode
);
9046 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9047 rotated 10 right, and higher 16 bit of every 32 selected. */
9049 = force_reg (V2HImode
, (simplify_gen_subreg
9050 (V2HImode
, GEN_INT (0x4330432), SImode
, 0)));
9051 rtx ptabs
= force_reg (DImode
, GEN_INT (0x6bf10600));
9052 rtx blink
= force_reg (DImode
, GEN_INT (0x4401fff0));
9054 tramp
= force_reg (Pmode
, tramp
);
9055 fnaddr
= force_reg (SImode
, fnaddr
);
9056 cxt
= force_reg (SImode
, cxt
);
9057 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode
, quad0
, 0),
9058 gen_rtx_SUBREG (V2HImode
, fnaddr
, 0),
9060 emit_insn (gen_rotrdi3_mextr (quad0
, quad0
,
9061 GEN_INT (TARGET_LITTLE_ENDIAN
? 24 : 56)));
9062 emit_insn (gen_ashldi3_media (quad0
, quad0
, const2_rtx
));
9063 emit_move_insn (change_address (tramp_mem
, DImode
, NULL_RTX
), quad0
);
9064 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode
, cxtload
, 0),
9065 gen_rtx_SUBREG (V2HImode
, cxt
, 0),
9067 emit_insn (gen_rotrdi3_mextr (cxtload
, cxtload
,
9068 GEN_INT (TARGET_LITTLE_ENDIAN
? 24 : 56)));
9069 emit_insn (gen_ashldi3_media (cxtload
, cxtload
, const2_rtx
));
9070 if (TARGET_LITTLE_ENDIAN
)
9072 emit_insn (gen_mshflo_l_di (quad1
, ptabs
, cxtload
));
9073 emit_insn (gen_mextr4 (quad2
, cxtload
, blink
));
9077 emit_insn (gen_mextr4 (quad1
, cxtload
, ptabs
));
9078 emit_insn (gen_mshflo_l_di (quad2
, blink
, cxtload
));
9080 emit_move_insn (adjust_address (tramp_mem
, DImode
, 8), quad1
);
9081 emit_move_insn (adjust_address (tramp_mem
, DImode
, 16), quad2
);
9082 emit_insn (gen_ic_invalidate_line (tramp
));
9085 else if (TARGET_SHCOMPACT
)
9087 emit_insn (gen_initialize_trampoline (tramp
, cxt
, fnaddr
));
9090 emit_move_insn (change_address (tramp_mem
, SImode
, NULL_RTX
),
9091 gen_int_mode (TARGET_LITTLE_ENDIAN
? 0xd301d202 : 0xd202d301,
9093 emit_move_insn (adjust_address (tramp_mem
, SImode
, 4),
9094 gen_int_mode (TARGET_LITTLE_ENDIAN
? 0x0009422b : 0x422b0009,
9096 emit_move_insn (adjust_address (tramp_mem
, SImode
, 8), cxt
);
9097 emit_move_insn (adjust_address (tramp_mem
, SImode
, 12), fnaddr
);
9100 if (TARGET_USERMODE
)
9101 emit_library_call (function_symbol (NULL
, "__ic_invalidate",
9103 0, VOIDmode
, 1, tramp
, SImode
);
9105 emit_insn (gen_ic_invalidate_line (tramp
));
9109 /* FIXME: This is overly conservative. A SHcompact function that
9110 receives arguments ``by reference'' will have them stored in its
9111 own stack frame, so it must not pass pointers or references to
9112 these arguments to other functions by means of sibling calls. */
9113 /* If PIC, we cannot make sibling calls to global functions
9114 because the PLT requires r12 to be live. */
9116 sh_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
9119 && (! TARGET_SHCOMPACT
9120 || current_function_args_info
.stack_regs
== 0)
9121 && ! sh_cfun_interrupt_handler_p ()
9123 || (decl
&& ! TREE_PUBLIC (decl
))
9124 || (decl
&& DECL_VISIBILITY (decl
) != VISIBILITY_DEFAULT
)));
9127 /* Machine specific built-in functions. */
9129 struct builtin_description
9131 const enum insn_code icode
;
9132 const char *const name
;
9136 /* describe number and signedness of arguments; arg[0] == result
9137 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9138 /* 9: 64 bit pointer, 10: 32 bit pointer */
9139 static const char signature_args
[][4] =
9141 #define SH_BLTIN_V2SI2 0
9143 #define SH_BLTIN_V4HI2 1
9145 #define SH_BLTIN_V2SI3 2
9147 #define SH_BLTIN_V4HI3 3
9149 #define SH_BLTIN_V8QI3 4
9151 #define SH_BLTIN_MAC_HISI 5
9153 #define SH_BLTIN_SH_HI 6
9155 #define SH_BLTIN_SH_SI 7
9157 #define SH_BLTIN_V4HI2V2SI 8
9159 #define SH_BLTIN_V4HI2V8QI 9
9161 #define SH_BLTIN_SISF 10
9163 #define SH_BLTIN_LDUA_L 11
9165 #define SH_BLTIN_LDUA_Q 12
9167 #define SH_BLTIN_STUA_L 13
9169 #define SH_BLTIN_STUA_Q 14
9171 #define SH_BLTIN_LDUA_L64 15
9173 #define SH_BLTIN_LDUA_Q64 16
9175 #define SH_BLTIN_STUA_L64 17
9177 #define SH_BLTIN_STUA_Q64 18
9179 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9180 #define SH_BLTIN_2 19
9181 #define SH_BLTIN_SU 19
9183 #define SH_BLTIN_3 20
9184 #define SH_BLTIN_SUS 20
9186 #define SH_BLTIN_PSSV 21
9188 #define SH_BLTIN_XXUU 22
9189 #define SH_BLTIN_UUUU 22
9191 #define SH_BLTIN_PV 23
9194 /* mcmv: operands considered unsigned. */
9195 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9196 /* mperm: control value considered unsigned int. */
9197 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9198 /* mshards_q: returns signed short. */
9199 /* nsb: takes long long arg, returns unsigned char. */
9200 static const struct builtin_description bdesc
[] =
9202 { CODE_FOR_absv2si2
, "__builtin_absv2si2", SH_BLTIN_V2SI2
},
9203 { CODE_FOR_absv4hi2
, "__builtin_absv4hi2", SH_BLTIN_V4HI2
},
9204 { CODE_FOR_addv2si3
, "__builtin_addv2si3", SH_BLTIN_V2SI3
},
9205 { CODE_FOR_addv4hi3
, "__builtin_addv4hi3", SH_BLTIN_V4HI3
},
9206 { CODE_FOR_ssaddv2si3
,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3
},
9207 { CODE_FOR_usaddv8qi3
,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3
},
9208 { CODE_FOR_ssaddv4hi3
,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3
},
9209 { CODE_FOR_alloco_i
, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV
},
9210 { CODE_FOR_negcmpeqv8qi
,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3
},
9211 { CODE_FOR_negcmpeqv2si
,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3
},
9212 { CODE_FOR_negcmpeqv4hi
,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3
},
9213 { CODE_FOR_negcmpgtuv8qi
,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3
},
9214 { CODE_FOR_negcmpgtv2si
,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3
},
9215 { CODE_FOR_negcmpgtv4hi
,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3
},
9216 { CODE_FOR_mcmv
, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU
},
9217 { CODE_FOR_mcnvs_lw
, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3
},
9218 { CODE_FOR_mcnvs_wb
, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI
},
9219 { CODE_FOR_mcnvs_wub
, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI
},
9220 { CODE_FOR_mextr1
, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3
},
9221 { CODE_FOR_mextr2
, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3
},
9222 { CODE_FOR_mextr3
, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3
},
9223 { CODE_FOR_mextr4
, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3
},
9224 { CODE_FOR_mextr5
, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3
},
9225 { CODE_FOR_mextr6
, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3
},
9226 { CODE_FOR_mextr7
, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3
},
9227 { CODE_FOR_mmacfx_wl
, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI
},
9228 { CODE_FOR_mmacnfx_wl
,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI
},
9229 { CODE_FOR_mulv2si3
, "__builtin_mulv2si3", SH_BLTIN_V2SI3
, },
9230 { CODE_FOR_mulv4hi3
, "__builtin_mulv4hi3", SH_BLTIN_V4HI3
},
9231 { CODE_FOR_mmulfx_l
, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3
},
9232 { CODE_FOR_mmulfx_w
, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3
},
9233 { CODE_FOR_mmulfxrp_w
,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3
},
9234 { CODE_FOR_mmulhi_wl
, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI
},
9235 { CODE_FOR_mmullo_wl
, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI
},
9236 { CODE_FOR_mmulsum_wq
,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU
},
9237 { CODE_FOR_mperm_w
, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI
},
9238 { CODE_FOR_msad_ubq
, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU
},
9239 { CODE_FOR_mshalds_l
, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI
},
9240 { CODE_FOR_mshalds_w
, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI
},
9241 { CODE_FOR_ashrv2si3
, "__builtin_ashrv2si3", SH_BLTIN_SH_SI
},
9242 { CODE_FOR_ashrv4hi3
, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI
},
9243 { CODE_FOR_mshards_q
, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS
},
9244 { CODE_FOR_mshfhi_b
, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3
},
9245 { CODE_FOR_mshfhi_l
, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3
},
9246 { CODE_FOR_mshfhi_w
, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3
},
9247 { CODE_FOR_mshflo_b
, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3
},
9248 { CODE_FOR_mshflo_l
, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3
},
9249 { CODE_FOR_mshflo_w
, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3
},
9250 { CODE_FOR_ashlv2si3
, "__builtin_ashlv2si3", SH_BLTIN_SH_SI
},
9251 { CODE_FOR_ashlv4hi3
, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI
},
9252 { CODE_FOR_lshrv2si3
, "__builtin_lshrv2si3", SH_BLTIN_SH_SI
},
9253 { CODE_FOR_lshrv4hi3
, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI
},
9254 { CODE_FOR_subv2si3
, "__builtin_subv2si3", SH_BLTIN_V2SI3
},
9255 { CODE_FOR_subv4hi3
, "__builtin_subv4hi3", SH_BLTIN_V4HI3
},
9256 { CODE_FOR_sssubv2si3
,"__builtin_sssubv2si3", SH_BLTIN_V2SI3
},
9257 { CODE_FOR_ussubv8qi3
,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3
},
9258 { CODE_FOR_sssubv4hi3
,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3
},
9259 { CODE_FOR_fcosa_s
, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF
},
9260 { CODE_FOR_fsina_s
, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF
},
9261 { CODE_FOR_fipr
, "__builtin_sh_media_FIPR_S", SH_BLTIN_3
},
9262 { CODE_FOR_ftrv
, "__builtin_sh_media_FTRV_S", SH_BLTIN_3
},
9263 { CODE_FOR_mac_media
, "__builtin_sh_media_FMAC_S", SH_BLTIN_3
},
9264 { CODE_FOR_sqrtdf2
, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2
},
9265 { CODE_FOR_sqrtsf2
, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2
},
9266 { CODE_FOR_fsrra_s
, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2
},
9267 { CODE_FOR_ldhi_l
, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L
},
9268 { CODE_FOR_ldhi_q
, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q
},
9269 { CODE_FOR_ldlo_l
, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L
},
9270 { CODE_FOR_ldlo_q
, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q
},
9271 { CODE_FOR_sthi_l
, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L
},
9272 { CODE_FOR_sthi_q
, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q
},
9273 { CODE_FOR_stlo_l
, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L
},
9274 { CODE_FOR_stlo_q
, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q
},
9275 { CODE_FOR_ldhi_l64
, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64
},
9276 { CODE_FOR_ldhi_q64
, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64
},
9277 { CODE_FOR_ldlo_l64
, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64
},
9278 { CODE_FOR_ldlo_q64
, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64
},
9279 { CODE_FOR_sthi_l64
, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64
},
9280 { CODE_FOR_sthi_q64
, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64
},
9281 { CODE_FOR_stlo_l64
, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64
},
9282 { CODE_FOR_stlo_q64
, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64
},
9283 { CODE_FOR_nsb
, "__builtin_sh_media_NSB", SH_BLTIN_SU
},
9284 { CODE_FOR_byterev
, "__builtin_sh_media_BYTEREV", SH_BLTIN_2
},
9285 { CODE_FOR_prefetch
, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV
},
9289 sh_media_init_builtins (void)
9291 tree shared
[SH_BLTIN_NUM_SHARED_SIGNATURES
];
9292 const struct builtin_description
*d
;
9294 memset (shared
, 0, sizeof shared
);
9295 for (d
= bdesc
; d
- bdesc
< (int) ARRAY_SIZE (bdesc
); d
++)
9297 tree type
, arg_type
= 0;
9298 int signature
= d
->signature
;
9301 if (signature
< SH_BLTIN_NUM_SHARED_SIGNATURES
&& shared
[signature
])
9302 type
= shared
[signature
];
9305 int has_result
= signature_args
[signature
][0] != 0;
9307 if ((signature_args
[signature
][1] & 8)
9308 && (((signature_args
[signature
][1] & 1) && TARGET_SHMEDIA32
)
9309 || ((signature_args
[signature
][1] & 2) && TARGET_SHMEDIA64
)))
9311 if (! TARGET_FPU_ANY
9312 && FLOAT_MODE_P (insn_data
[d
->icode
].operand
[0].mode
))
9314 type
= void_list_node
;
9317 int arg
= signature_args
[signature
][i
];
9318 int opno
= i
- 1 + has_result
;
9321 arg_type
= ptr_type_node
;
9323 arg_type
= (*lang_hooks
.types
.type_for_mode
)
9324 (insn_data
[d
->icode
].operand
[opno
].mode
,
9329 arg_type
= void_type_node
;
9332 type
= tree_cons (NULL_TREE
, arg_type
, type
);
9334 type
= build_function_type (arg_type
, type
);
9335 if (signature
< SH_BLTIN_NUM_SHARED_SIGNATURES
)
9336 shared
[signature
] = type
;
9338 lang_hooks
.builtin_function (d
->name
, type
, d
- bdesc
, BUILT_IN_MD
,
9343 /* Implements target hook vector_mode_supported_p. */
9345 sh_vector_mode_supported_p (enum machine_mode mode
)
9348 && ((mode
== V2SFmode
)
9349 || (mode
== V4SFmode
)
9350 || (mode
== V16SFmode
)))
9353 else if (TARGET_SHMEDIA
9354 && ((mode
== V8QImode
)
9355 || (mode
== V2HImode
)
9356 || (mode
== V4HImode
)
9357 || (mode
== V2SImode
)))
9363 /* Implements target hook dwarf_calling_convention. Return an enum
9364 of dwarf_calling_convention. */
9366 sh_dwarf_calling_convention (tree func
)
9368 if (sh_attr_renesas_p (func
))
9369 return DW_CC_GNU_renesas_sh
;
9371 return DW_CC_normal
;
9375 sh_init_builtins (void)
9378 sh_media_init_builtins ();
9381 /* Expand an expression EXP that calls a built-in function,
9382 with result going to TARGET if that's convenient
9383 (and in mode MODE if that's convenient).
9384 SUBTARGET may be used as the target for computing one of EXP's operands.
9385 IGNORE is nonzero if the value is to be ignored. */
9388 sh_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
9389 enum machine_mode mode ATTRIBUTE_UNUSED
, int ignore
)
9391 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
9392 tree arglist
= TREE_OPERAND (exp
, 1);
9393 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
9394 const struct builtin_description
*d
= &bdesc
[fcode
];
9395 enum insn_code icode
= d
->icode
;
9396 int signature
= d
->signature
;
9397 enum machine_mode tmode
= VOIDmode
;
9402 if (signature_args
[signature
][0])
9407 tmode
= insn_data
[icode
].operand
[0].mode
;
9409 || GET_MODE (target
) != tmode
9410 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
9411 target
= gen_reg_rtx (tmode
);
9417 for (i
= 1; i
<= 3; i
++, nop
++)
9420 enum machine_mode opmode
, argmode
;
9423 if (! signature_args
[signature
][i
])
9425 arg
= TREE_VALUE (arglist
);
9426 if (arg
== error_mark_node
)
9428 arglist
= TREE_CHAIN (arglist
);
9429 if (signature_args
[signature
][i
] & 8)
9432 optype
= ptr_type_node
;
9436 opmode
= insn_data
[icode
].operand
[nop
].mode
;
9437 optype
= (*lang_hooks
.types
.type_for_mode
) (opmode
, 0);
9439 argmode
= TYPE_MODE (TREE_TYPE (arg
));
9440 if (argmode
!= opmode
)
9441 arg
= build1 (NOP_EXPR
, optype
, arg
);
9442 op
[nop
] = expand_expr (arg
, NULL_RTX
, opmode
, 0);
9443 if (! (*insn_data
[icode
].operand
[nop
].predicate
) (op
[nop
], opmode
))
9444 op
[nop
] = copy_to_mode_reg (opmode
, op
[nop
]);
9450 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0]);
9453 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0], op
[1]);
9456 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0], op
[1], op
[2]);
9459 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0], op
[1], op
[2], op
[3]);
9471 sh_expand_unop_v2sf (enum rtx_code code
, rtx op0
, rtx op1
)
9473 rtx sel0
= const0_rtx
;
9474 rtx sel1
= const1_rtx
;
9475 rtx (*fn
) (rtx
, rtx
, rtx
, rtx
, rtx
) = gen_unary_sf_op
;
9476 rtx op
= gen_rtx_fmt_e (code
, SFmode
, op1
);
9478 emit_insn ((*fn
) (op0
, op1
, op
, sel0
, sel0
));
9479 emit_insn ((*fn
) (op0
, op1
, op
, sel1
, sel1
));
9483 sh_expand_binop_v2sf (enum rtx_code code
, rtx op0
, rtx op1
, rtx op2
)
9485 rtx sel0
= const0_rtx
;
9486 rtx sel1
= const1_rtx
;
9487 rtx (*fn
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
)
9489 rtx op
= gen_rtx_fmt_ee (code
, SFmode
, op1
, op2
);
9491 emit_insn ((*fn
) (op0
, op1
, op2
, op
, sel0
, sel0
, sel0
, sel1
));
9492 emit_insn ((*fn
) (op0
, op1
, op2
, op
, sel1
, sel1
, sel1
, sel0
));
9495 /* Return the class of registers for which a mode change from FROM to TO
9498 sh_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
9499 enum reg_class
class)
9501 /* We want to enable the use of SUBREGs as a means to
9502 VEC_SELECT a single element of a vector. */
9503 if (to
== SFmode
&& VECTOR_MODE_P (from
) && GET_MODE_INNER (from
) == SFmode
)
9504 return (reg_classes_intersect_p (GENERAL_REGS
, class));
9506 if (GET_MODE_SIZE (from
) != GET_MODE_SIZE (to
))
9508 if (TARGET_LITTLE_ENDIAN
)
9510 if (GET_MODE_SIZE (to
) < 8 || GET_MODE_SIZE (from
) < 8)
9511 return reg_classes_intersect_p (DF_REGS
, class);
9515 if (GET_MODE_SIZE (from
) < 8)
9516 return reg_classes_intersect_p (DF_HI_REGS
, class);
9523 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9524 that label is used. */
9527 sh_mark_label (rtx address
, int nuses
)
9529 if (GOTOFF_P (address
))
9531 /* Extract the label or symbol. */
9532 address
= XEXP (address
, 0);
9533 if (GET_CODE (address
) == PLUS
)
9534 address
= XEXP (address
, 0);
9535 address
= XVECEXP (address
, 0, 0);
9537 if (GET_CODE (address
) == LABEL_REF
9538 && GET_CODE (XEXP (address
, 0)) == CODE_LABEL
)
9539 LABEL_NUSES (XEXP (address
, 0)) += nuses
;
9542 /* Compute extra cost of moving data between one register class
9545 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9546 uses this information. Hence, the general register <-> floating point
9547 register information here is not used for SFmode. */
9550 sh_register_move_cost (enum machine_mode mode
,
9551 enum reg_class srcclass
, enum reg_class dstclass
)
9553 if (dstclass
== T_REGS
|| dstclass
== PR_REGS
)
9556 if (dstclass
== MAC_REGS
&& srcclass
== MAC_REGS
)
9559 if (mode
== SImode
&& ! TARGET_SHMEDIA
&& TARGET_FMOVD
9560 && REGCLASS_HAS_FP_REG (srcclass
)
9561 && REGCLASS_HAS_FP_REG (dstclass
))
9564 if (REGCLASS_HAS_FP_REG (dstclass
) && srcclass
== T_REGS
)
9565 return ((TARGET_HARD_SH4
&& !optimize_size
) ? 10 : 7);
9567 if ((REGCLASS_HAS_FP_REG (dstclass
) && srcclass
== MAC_REGS
)
9568 || (dstclass
== MAC_REGS
&& REGCLASS_HAS_FP_REG (srcclass
)))
9571 if ((REGCLASS_HAS_FP_REG (dstclass
)
9572 && REGCLASS_HAS_GENERAL_REG (srcclass
))
9573 || (REGCLASS_HAS_GENERAL_REG (dstclass
)
9574 && REGCLASS_HAS_FP_REG (srcclass
)))
9575 return ((TARGET_SHMEDIA
? 4 : TARGET_FMOVD
? 8 : 12)
9576 * ((GET_MODE_SIZE (mode
) + 7) / 8U));
9578 if ((dstclass
== FPUL_REGS
9579 && REGCLASS_HAS_GENERAL_REG (srcclass
))
9580 || (srcclass
== FPUL_REGS
9581 && REGCLASS_HAS_GENERAL_REG (dstclass
)))
9584 if ((dstclass
== FPUL_REGS
9585 && (srcclass
== PR_REGS
|| srcclass
== MAC_REGS
|| srcclass
== T_REGS
))
9586 || (srcclass
== FPUL_REGS
9587 && (dstclass
== PR_REGS
|| dstclass
== MAC_REGS
)))
9590 if ((srcclass
== TARGET_REGS
&& ! REGCLASS_HAS_GENERAL_REG (dstclass
))
9591 || ((dstclass
) == TARGET_REGS
&& ! REGCLASS_HAS_GENERAL_REG (srcclass
)))
9594 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9596 && ((srcclass
) == TARGET_REGS
|| (srcclass
) == SIBCALL_REGS
))
9598 if (sh_gettrcost
>= 0)
9599 return sh_gettrcost
;
9600 else if (!TARGET_PT_FIXED
)
9604 if ((srcclass
== FPSCR_REGS
&& ! REGCLASS_HAS_GENERAL_REG (dstclass
))
9605 || (dstclass
== FPSCR_REGS
&& ! REGCLASS_HAS_GENERAL_REG (srcclass
)))
9610 && ! REGCLASS_HAS_GENERAL_REG (srcclass
)
9611 && ! REGCLASS_HAS_GENERAL_REG (dstclass
)))
9612 return 2 * ((GET_MODE_SIZE (mode
) + 7) / 8U);
9614 return 2 * ((GET_MODE_SIZE (mode
) + 3) / 4U);
9617 static rtx
emit_load_ptr (rtx
, rtx
);
9620 emit_load_ptr (rtx reg
, rtx addr
)
9622 rtx mem
= gen_const_mem (ptr_mode
, addr
);
9624 if (Pmode
!= ptr_mode
)
9625 mem
= gen_rtx_SIGN_EXTEND (Pmode
, mem
);
9626 return emit_move_insn (reg
, mem
);
9630 sh_output_mi_thunk (FILE *file
, tree thunk_fndecl ATTRIBUTE_UNUSED
,
9631 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
9634 CUMULATIVE_ARGS cum
;
9635 int structure_value_byref
= 0;
9636 rtx
this, this_value
, sibcall
, insns
, funexp
;
9637 tree funtype
= TREE_TYPE (function
);
9638 int simple_add
= CONST_OK_FOR_ADD (delta
);
9640 rtx scratch0
, scratch1
, scratch2
;
9643 reload_completed
= 1;
9644 epilogue_completed
= 1;
9646 current_function_uses_only_leaf_regs
= 1;
9647 reset_block_changes ();
9649 emit_note (NOTE_INSN_PROLOGUE_END
);
9651 /* Find the "this" pointer. We have such a wide range of ABIs for the
9652 SH that it's best to do this completely machine independently.
9653 "this" is passed as first argument, unless a structure return pointer
9654 comes first, in which case "this" comes second. */
9655 INIT_CUMULATIVE_ARGS (cum
, funtype
, NULL_RTX
, 0, 1);
9656 #ifndef PCC_STATIC_STRUCT_RETURN
9657 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
9658 structure_value_byref
= 1;
9659 #endif /* not PCC_STATIC_STRUCT_RETURN */
9660 if (structure_value_byref
&& sh_struct_value_rtx (function
, 0) == 0)
9662 tree ptype
= build_pointer_type (TREE_TYPE (funtype
));
9664 FUNCTION_ARG_ADVANCE (cum
, Pmode
, ptype
, 1);
9666 this = FUNCTION_ARG (cum
, Pmode
, ptr_type_node
, 1);
9668 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9669 static chain pointer (even if you can't have nested virtual functions
9670 right now, someone might implement them sometime), and the rest of the
9671 registers are used for argument passing, are callee-saved, or reserved. */
9672 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9673 -ffixed-reg has been used. */
9674 if (! call_used_regs
[0] || fixed_regs
[0])
9675 error ("r0 needs to be available as a call-clobbered register");
9676 scratch0
= scratch1
= scratch2
= gen_rtx_REG (Pmode
, 0);
9679 if (call_used_regs
[1] && ! fixed_regs
[1])
9680 scratch1
= gen_rtx_REG (ptr_mode
, 1);
9681 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9682 pointing where to return struct values. */
9683 if (call_used_regs
[3] && ! fixed_regs
[3])
9684 scratch2
= gen_rtx_REG (Pmode
, 3);
9686 else if (TARGET_SHMEDIA
)
9688 for (i
= FIRST_GENERAL_REG
; i
<= LAST_GENERAL_REG
; i
++)
9689 if (i
!= REGNO (scratch0
) &&
9690 call_used_regs
[i
] && ! fixed_regs
[i
] && ! FUNCTION_ARG_REGNO_P (i
))
9692 scratch1
= gen_rtx_REG (ptr_mode
, i
);
9695 if (scratch1
== scratch0
)
9696 error ("Need a second call-clobbered general purpose register");
9697 for (i
= FIRST_TARGET_REG
; i
<= LAST_TARGET_REG
; i
++)
9698 if (call_used_regs
[i
] && ! fixed_regs
[i
])
9700 scratch2
= gen_rtx_REG (Pmode
, i
);
9703 if (scratch2
== scratch0
)
9704 error ("Need a call-clobbered target register");
9707 this_value
= plus_constant (this, delta
);
9709 && (simple_add
|| scratch0
!= scratch1
)
9710 && strict_memory_address_p (ptr_mode
, this_value
))
9712 emit_load_ptr (scratch0
, this_value
);
9718 else if (simple_add
)
9719 emit_move_insn (this, this_value
);
9722 emit_move_insn (scratch1
, GEN_INT (delta
));
9723 emit_insn (gen_add2_insn (this, scratch1
));
9731 emit_load_ptr (scratch0
, this);
9733 offset_addr
= plus_constant (scratch0
, vcall_offset
);
9734 if (strict_memory_address_p (ptr_mode
, offset_addr
))
9736 else if (! TARGET_SH5
&& scratch0
!= scratch1
)
9738 /* scratch0 != scratch1, and we have indexed loads. Get better
9739 schedule by loading the offset into r1 and using an indexed
9740 load - then the load of r1 can issue before the load from
9741 (this + delta) finishes. */
9742 emit_move_insn (scratch1
, GEN_INT (vcall_offset
));
9743 offset_addr
= gen_rtx_PLUS (Pmode
, scratch0
, scratch1
);
9745 else if (CONST_OK_FOR_ADD (vcall_offset
))
9747 emit_insn (gen_add2_insn (scratch0
, GEN_INT (vcall_offset
)));
9748 offset_addr
= scratch0
;
9750 else if (scratch0
!= scratch1
)
9752 emit_move_insn (scratch1
, GEN_INT (vcall_offset
));
9753 emit_insn (gen_add2_insn (scratch0
, scratch1
));
9754 offset_addr
= scratch0
;
9757 gcc_unreachable (); /* FIXME */
9758 emit_load_ptr (scratch0
, offset_addr
);
9760 if (Pmode
!= ptr_mode
)
9761 scratch0
= gen_rtx_TRUNCATE (ptr_mode
, scratch0
);
9762 emit_insn (gen_add2_insn (this, scratch0
));
9765 /* Generate a tail call to the target function. */
9766 if (! TREE_USED (function
))
9768 assemble_external (function
);
9769 TREE_USED (function
) = 1;
9771 funexp
= XEXP (DECL_RTL (function
), 0);
9772 /* If the function is overridden, so is the thunk, hence we don't
9773 need GOT addressing even if this is a public symbol. */
9775 if (TARGET_SH1
&& ! flag_weak
)
9776 sibcall
= gen_sibcalli_thunk (funexp
, const0_rtx
);
9779 if (TARGET_SH2
&& flag_pic
)
9781 sibcall
= gen_sibcall_pcrel (funexp
, const0_rtx
);
9782 XEXP (XVECEXP (sibcall
, 0, 2), 0) = scratch2
;
9786 if (TARGET_SHMEDIA
&& flag_pic
)
9788 funexp
= gen_sym2PIC (funexp
);
9789 PUT_MODE (funexp
, Pmode
);
9791 emit_move_insn (scratch2
, funexp
);
9792 funexp
= gen_rtx_MEM (FUNCTION_MODE
, scratch2
);
9793 sibcall
= gen_sibcall (funexp
, const0_rtx
, NULL_RTX
);
9795 sibcall
= emit_call_insn (sibcall
);
9796 SIBLING_CALL_P (sibcall
) = 1;
9797 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall
), this);
9800 /* Run just enough of rest_of_compilation to do scheduling and get
9801 the insns emitted. Note that use_thunk calls
9802 assemble_start_function and assemble_end_function. */
9804 insn_locators_initialize ();
9805 insns
= get_insns ();
9809 /* Initialize the bitmap obstacks. */
9810 bitmap_obstack_initialize (NULL
);
9811 bitmap_obstack_initialize (®_obstack
);
9814 rtl_register_cfg_hooks ();
9815 init_rtl_bb_info (ENTRY_BLOCK_PTR
);
9816 init_rtl_bb_info (EXIT_BLOCK_PTR
);
9817 ENTRY_BLOCK_PTR
->flags
|= BB_RTL
;
9818 EXIT_BLOCK_PTR
->flags
|= BB_RTL
;
9819 find_basic_blocks (insns
);
9821 if (flag_schedule_insns_after_reload
)
9823 life_analysis (dump_file
, PROP_FINAL
);
9825 split_all_insns (1);
9827 schedule_insns (dump_file
);
9829 /* We must split jmp insn in PIC case. */
9831 split_all_insns_noflow ();
9836 if (optimize
> 0 && flag_delayed_branch
)
9837 dbr_schedule (insns
, dump_file
);
9839 shorten_branches (insns
);
9840 final_start_function (insns
, file
, 1);
9841 final (insns
, file
, 1);
9842 final_end_function ();
9846 /* Release all memory allocated by flow. */
9847 free_basic_block_vars ();
9849 /* Release the bitmap obstacks. */
9850 bitmap_obstack_release (®_obstack
);
9851 bitmap_obstack_release (NULL
);
9854 reload_completed
= 0;
9855 epilogue_completed
= 0;
9860 function_symbol (rtx target
, const char *name
, enum sh_function_kind kind
)
9864 /* If this is not an ordinary function, the name usually comes from a
9865 string literal or an sprintf buffer. Make sure we use the same
9866 string consistently, so that cse will be able to unify address loads. */
9867 if (kind
!= FUNCTION_ORDINARY
)
9868 name
= IDENTIFIER_POINTER (get_identifier (name
));
9869 sym
= gen_rtx_SYMBOL_REF (Pmode
, name
);
9870 SYMBOL_REF_FLAGS (sym
) = SYMBOL_FLAG_FUNCTION
;
9874 case FUNCTION_ORDINARY
:
9878 rtx reg
= target
? target
: gen_reg_rtx (Pmode
);
9880 emit_insn (gen_symGOT2reg (reg
, sym
));
9886 /* ??? To allow cse to work, we use GOTOFF relocations.
9887 we could add combiner patterns to transform this into
9888 straight pc-relative calls with sym2PIC / bsrf when
9889 label load and function call are still 1:1 and in the
9890 same basic block during combine. */
9891 rtx reg
= target
? target
: gen_reg_rtx (Pmode
);
9893 emit_insn (gen_symGOTOFF2reg (reg
, sym
));
9898 if (target
&& sym
!= target
)
9900 emit_move_insn (target
, sym
);
9906 /* Find the number of a general purpose register in S. */
9908 scavenge_reg (HARD_REG_SET
*s
)
9911 for (r
= FIRST_GENERAL_REG
; r
<= LAST_GENERAL_REG
; r
++)
9912 if (TEST_HARD_REG_BIT (*s
, r
))
9918 sh_get_pr_initial_val (void)
9922 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9923 PR register on SHcompact, because it might be clobbered by the prologue.
9924 We check first if that is known to be the case. */
9925 if (TARGET_SHCOMPACT
9926 && ((current_function_args_info
.call_cookie
9927 & ~ CALL_COOKIE_RET_TRAMP (1))
9928 || current_function_has_nonlocal_label
))
9929 return gen_frame_mem (SImode
, return_address_pointer_rtx
);
9931 /* If we haven't finished rtl generation, there might be a nonlocal label
9932 that we haven't seen yet.
9933 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9934 is set, unless it has been called before for the same register. And even
9935 then, we end in trouble if we didn't use the register in the same
9936 basic block before. So call get_hard_reg_initial_val now and wrap it
9937 in an unspec if we might need to replace it. */
9938 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9939 combine can put the pseudo returned by get_hard_reg_initial_val into
9940 instructions that need a general purpose registers, which will fail to
9941 be recognized when the pseudo becomes allocated to PR. */
9943 = get_hard_reg_initial_val (Pmode
, TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
);
9945 return gen_rtx_UNSPEC (SImode
, gen_rtvec (1, val
), UNSPEC_RA
);
9950 sh_expand_t_scc (enum rtx_code code
, rtx target
)
9952 rtx result
= target
;
9955 if (GET_CODE (sh_compare_op0
) != REG
|| REGNO (sh_compare_op0
) != T_REG
9956 || GET_CODE (sh_compare_op1
) != CONST_INT
)
9958 if (GET_CODE (result
) != REG
)
9959 result
= gen_reg_rtx (SImode
);
9960 val
= INTVAL (sh_compare_op1
);
9961 if ((code
== EQ
&& val
== 1) || (code
== NE
&& val
== 0))
9962 emit_insn (gen_movt (result
));
9963 else if ((code
== EQ
&& val
== 0) || (code
== NE
&& val
== 1))
9965 emit_insn (gen_rtx_CLOBBER (VOIDmode
, result
));
9966 emit_insn (gen_subc (result
, result
, result
));
9967 emit_insn (gen_addsi3 (result
, result
, const1_rtx
));
9969 else if (code
== EQ
|| code
== NE
)
9970 emit_insn (gen_move_insn (result
, GEN_INT (code
== NE
)));
9973 if (result
!= target
)
9974 emit_move_insn (target
, result
);
9978 /* INSN is an sfunc; return the rtx that describes the address used. */
9980 extract_sfunc_addr (rtx insn
)
9982 rtx pattern
, part
= NULL_RTX
;
9985 pattern
= PATTERN (insn
);
9986 len
= XVECLEN (pattern
, 0);
9987 for (i
= 0; i
< len
; i
++)
9989 part
= XVECEXP (pattern
, 0, i
);
9990 if (GET_CODE (part
) == USE
&& GET_MODE (XEXP (part
, 0)) == Pmode
9991 && GENERAL_REGISTER_P (true_regnum (XEXP (part
, 0))))
9992 return XEXP (part
, 0);
9994 gcc_assert (GET_CODE (XVECEXP (pattern
, 0, 0)) == UNSPEC_VOLATILE
);
9995 return XVECEXP (XVECEXP (pattern
, 0, 0), 0, 1);
9998 /* Verify that the register in use_sfunc_addr still agrees with the address
9999 used in the sfunc. This prevents fill_slots_from_thread from changing
10001 INSN is the use_sfunc_addr instruction, and REG is the register it
10004 check_use_sfunc_addr (rtx insn
, rtx reg
)
10006 /* Search for the sfunc. It should really come right after INSN. */
10007 while ((insn
= NEXT_INSN (insn
)))
10009 if (GET_CODE (insn
) == CODE_LABEL
|| GET_CODE (insn
) == JUMP_INSN
)
10011 if (! INSN_P (insn
))
10014 if (GET_CODE (PATTERN (insn
)) == SEQUENCE
)
10015 insn
= XVECEXP (PATTERN (insn
), 0, 0);
10016 if (GET_CODE (PATTERN (insn
)) != PARALLEL
10017 || get_attr_type (insn
) != TYPE_SFUNC
)
10019 return rtx_equal_p (extract_sfunc_addr (insn
), reg
);
10021 gcc_unreachable ();
10024 /* This function returns a constant rtx that represents pi / 2**15 in
10025 SFmode. it's used to scale SFmode angles, in radians, to a
10026 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10027 maps to 0x10000). */
10029 static GTY(()) rtx sh_fsca_sf2int_rtx
;
10032 sh_fsca_sf2int (void)
10034 if (! sh_fsca_sf2int_rtx
)
10036 REAL_VALUE_TYPE rv
;
10038 real_from_string (&rv
, "10430.378350470453");
10039 sh_fsca_sf2int_rtx
= const_double_from_real_value (rv
, SFmode
);
10042 return sh_fsca_sf2int_rtx
;
10045 /* This function returns a constant rtx that represents pi / 2**15 in
10046 DFmode. it's used to scale DFmode angles, in radians, to a
10047 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10048 maps to 0x10000). */
10050 static GTY(()) rtx sh_fsca_df2int_rtx
;
10053 sh_fsca_df2int (void)
10055 if (! sh_fsca_df2int_rtx
)
10057 REAL_VALUE_TYPE rv
;
10059 real_from_string (&rv
, "10430.378350470453");
10060 sh_fsca_df2int_rtx
= const_double_from_real_value (rv
, DFmode
);
10063 return sh_fsca_df2int_rtx
;
10066 /* This function returns a constant rtx that represents 2**15 / pi in
10067 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10068 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10071 static GTY(()) rtx sh_fsca_int2sf_rtx
;
10074 sh_fsca_int2sf (void)
10076 if (! sh_fsca_int2sf_rtx
)
10078 REAL_VALUE_TYPE rv
;
10080 real_from_string (&rv
, "9.587379924285257e-5");
10081 sh_fsca_int2sf_rtx
= const_double_from_real_value (rv
, SFmode
);
10084 return sh_fsca_int2sf_rtx
;
10087 /* Initialize the CUMULATIVE_ARGS structure. */
10090 sh_init_cumulative_args (CUMULATIVE_ARGS
* pcum
,
10092 rtx libname ATTRIBUTE_UNUSED
,
10094 signed int n_named_args
,
10095 enum machine_mode mode
)
10097 pcum
->arg_count
[(int) SH_ARG_FLOAT
] = 0;
10098 pcum
->free_single_fp_reg
= 0;
10099 pcum
->stack_regs
= 0;
10100 pcum
->byref_regs
= 0;
10102 pcum
->outgoing
= (n_named_args
== -1) ? 0 : 1;
10104 /* XXX - Should we check TARGET_HITACHI here ??? */
10105 pcum
->renesas_abi
= sh_attr_renesas_p (fntype
) ? 1 : 0;
10109 pcum
->force_mem
= ((TARGET_HITACHI
|| pcum
->renesas_abi
)
10110 && aggregate_value_p (TREE_TYPE (fntype
), fndecl
));
10111 pcum
->prototype_p
= TYPE_ARG_TYPES (fntype
) ? TRUE
: FALSE
;
10112 pcum
->arg_count
[(int) SH_ARG_INT
]
10113 = TARGET_SH5
&& aggregate_value_p (TREE_TYPE (fntype
), fndecl
);
10116 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10117 && pcum
->arg_count
[(int) SH_ARG_INT
] == 0
10118 && (TYPE_MODE (TREE_TYPE (fntype
)) == BLKmode
10119 ? int_size_in_bytes (TREE_TYPE (fntype
))
10120 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype
)))) > 4
10121 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype
)))
10122 == FIRST_RET_REG
));
10126 pcum
->arg_count
[(int) SH_ARG_INT
] = 0;
10127 pcum
->prototype_p
= FALSE
;
10128 if (mode
!= VOIDmode
)
10130 pcum
->call_cookie
=
10131 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10132 && GET_MODE_SIZE (mode
) > 4
10133 && BASE_RETURN_VALUE_REG (mode
) == FIRST_RET_REG
);
10135 /* If the default ABI is the Renesas ABI then all library
10136 calls must assume that the library will be using the
10137 Renesas ABI. So if the function would return its result
10138 in memory then we must force the address of this memory
10139 block onto the stack. Ideally we would like to call
10140 targetm.calls.return_in_memory() here but we do not have
10141 the TYPE or the FNDECL available so we synthesize the
10142 contents of that function as best we can. */
10144 (TARGET_DEFAULT
& MASK_HITACHI
)
10145 && (mode
== BLKmode
10146 || (GET_MODE_SIZE (mode
) > 4
10147 && !(mode
== DFmode
10148 && TARGET_FPU_DOUBLE
)));
10152 pcum
->call_cookie
= 0;
10153 pcum
->force_mem
= FALSE
;
10158 /* Determine if two hard register sets intersect.
10159 Return 1 if they do. */
10162 hard_regs_intersect_p (HARD_REG_SET
*a
, HARD_REG_SET
*b
)
10165 COPY_HARD_REG_SET (c
, *a
);
10166 AND_HARD_REG_SET (c
, *b
);
10167 GO_IF_HARD_REG_SUBSET (c
, reg_class_contents
[(int) NO_REGS
], lose
);
10173 #ifdef TARGET_ADJUST_UNROLL_MAX
10175 sh_adjust_unroll_max (struct loop
* loop
, int insn_count
,
10176 int max_unrolled_insns
, int strength_reduce_p
,
10179 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10180 if (TARGET_ADJUST_UNROLL
&& TARGET_SHMEDIA
)
10182 /* Throttle back loop unrolling so that the costs of using more
10183 targets than the eight target register we have don't outweigh
10184 the benefits of unrolling. */
10186 int n_labels
= 0, n_calls
= 0, n_exit_dest
= 0, n_inner_loops
= -1;
10187 int n_barriers
= 0;
10192 int unroll_benefit
= 0, mem_latency
= 0;
10193 int base_cost
, best_cost
, cost
;
10194 int factor
, best_factor
;
10196 unsigned max_iterations
= 32767;
10198 int need_precond
= 0, precond
= 0;
10199 basic_block
* bbs
= get_loop_body (loop
);
10200 struct niter_desc
*desc
;
10202 /* Assume that all labels inside the loop are used from inside the
10203 loop. If the loop has multiple entry points, it is unlikely to
10204 be unrolled anyways.
10205 Also assume that all calls are to different functions. That is
10206 somewhat pessimistic, but if you have lots of calls, unrolling the
10207 loop is not likely to gain you much in the first place. */
10208 i
= loop
->num_nodes
- 1;
10209 for (insn
= BB_HEAD (bbs
[i
]); ; )
10211 if (GET_CODE (insn
) == CODE_LABEL
)
10213 else if (GET_CODE (insn
) == CALL_INSN
)
10215 else if (GET_CODE (insn
) == NOTE
10216 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_BEG
)
10218 else if (GET_CODE (insn
) == BARRIER
)
10220 if (insn
!= BB_END (bbs
[i
]))
10221 insn
= NEXT_INSN (insn
);
10223 insn
= BB_HEAD (bbs
[i
]);
10228 /* One label for the loop top is normal, and it won't be duplicated by
10231 return max_unrolled_insns
;
10232 if (n_inner_loops
> 0)
10234 for (dest
= loop
->exit_labels
; dest
&& n_exit_dest
< 8;
10235 dest
= LABEL_NEXTREF (dest
))
10237 for (i
= n_exit_dest
- 1;
10238 i
>= 0 && XEXP (dest
, 0) != XEXP (exit_dest
[i
], 0); i
--);
10240 exit_dest
[n_exit_dest
++] = dest
;
10242 /* If the loop top and call and exit destinations are enough to fill up
10243 the target registers, we're unlikely to do any more damage by
10245 if (n_calls
+ n_exit_dest
>= 7)
10246 return max_unrolled_insns
;
10248 /* ??? In the new loop unroller, there is no longer any strength
10249 reduction information available. Thus, when it comes to unrolling,
10250 we know the cost of everything, but we know the value of nothing. */
10252 if (strength_reduce_p
10253 && (unroll_type
== LPT_UNROLL_RUNTIME
10254 || unroll_type
== LPT_UNROLL_CONSTANT
10255 || unroll_type
== LPT_PEEL_COMPLETELY
))
10257 struct loop_ivs
*ivs
= LOOP_IVS (loop
);
10258 struct iv_class
*bl
;
10260 /* We'll save one compare-and-branch in each loop body copy
10261 but the last one. */
10262 unroll_benefit
= 1;
10263 /* Assess the benefit of removing biv & giv updates. */
10264 for (bl
= ivs
->list
; bl
; bl
= bl
->next
)
10266 rtx increment
= biv_total_increment (bl
);
10267 struct induction
*v
;
10269 if (increment
&& GET_CODE (increment
) == CONST_INT
)
10272 for (v
= bl
->giv
; v
; v
= v
->next_iv
)
10274 if (! v
->ignore
&& v
->same
== 0
10275 && GET_CODE (v
->mult_val
) == CONST_INT
)
10277 /* If this giv uses an array, try to determine
10278 a maximum iteration count from the size of the
10279 array. This need not be correct all the time,
10280 but should not be too far off the mark too often. */
10281 while (v
->giv_type
== DEST_ADDR
)
10283 rtx mem
= PATTERN (v
->insn
);
10284 tree mem_expr
, type
, size_tree
;
10286 if (GET_CODE (SET_SRC (mem
)) == MEM
)
10287 mem
= SET_SRC (mem
);
10288 else if (GET_CODE (SET_DEST (mem
)) == MEM
)
10289 mem
= SET_DEST (mem
);
10292 mem_expr
= MEM_EXPR (mem
);
10295 type
= TREE_TYPE (mem_expr
);
10296 if (TREE_CODE (type
) != ARRAY_TYPE
10297 || ! TYPE_SIZE (type
) || ! TYPE_SIZE_UNIT (type
))
10299 size_tree
= fold_build2 (TRUNC_DIV_EXPR
,
10302 TYPE_SIZE_UNIT (type
));
10303 if (TREE_CODE (size_tree
) == INTEGER_CST
10304 && ! TREE_INT_CST_HIGH (size_tree
)
10305 && TREE_INT_CST_LOW (size_tree
) < max_iterations
)
10306 max_iterations
= TREE_INT_CST_LOW (size_tree
);
10314 /* Assume there is at least some benefit. */
10315 unroll_benefit
= 1;
10318 desc
= get_simple_loop_desc (loop
);
10319 n_iterations
= desc
->const_iter
? desc
->niter
: 0;
10321 = max_iterations
< desc
->niter_max
? max_iterations
: desc
->niter_max
;
10323 if (! strength_reduce_p
|| ! n_iterations
)
10325 if (! n_iterations
)
10328 = max_iterations
< 3 ? max_iterations
: max_iterations
* 3 / 4;
10329 if (! n_iterations
)
10332 #if 0 /* ??? See above - missing induction variable information. */
10333 while (unroll_benefit
> 1) /* no loop */
10335 /* We include the benefit of biv/ giv updates. Check if some or
10336 all of these updates are likely to fit into a scheduling
10338 We check for the following case:
10339 - All the insns leading to the first JUMP_INSN are in a strict
10341 - there is at least one memory reference in them.
10343 When we find such a pattern, we assume that we can hide as many
10344 updates as the total of the load latency is, if we have an
10345 unroll factor of at least two. We might or might not also do
10346 this without unrolling, so rather than considering this as an
10347 extra unroll benefit, discount it in the unroll benefits of unroll
10348 factors higher than two. */
10352 insn
= next_active_insn (loop
->start
);
10353 last_set
= single_set (insn
);
10356 if (GET_CODE (SET_SRC (last_set
)) == MEM
)
10358 for (insn
= NEXT_INSN (insn
); insn
!= end
; insn
= NEXT_INSN (insn
))
10360 if (! INSN_P (insn
))
10362 if (GET_CODE (insn
) == JUMP_INSN
)
10364 if (! reg_referenced_p (SET_DEST (last_set
), PATTERN (insn
)))
10366 /* Check if this is a to-be-reduced giv insn. */
10367 struct loop_ivs
*ivs
= LOOP_IVS (loop
);
10368 struct iv_class
*bl
;
10369 struct induction
*v
;
10370 for (bl
= ivs
->list
; bl
; bl
= bl
->next
)
10372 if (bl
->biv
->insn
== insn
)
10374 for (v
= bl
->giv
; v
; v
= v
->next_iv
)
10375 if (v
->insn
== insn
)
10383 set
= single_set (insn
);
10386 if (GET_CODE (SET_SRC (set
)) == MEM
)
10390 if (mem_latency
< 0)
10392 else if (mem_latency
> unroll_benefit
- 1)
10393 mem_latency
= unroll_benefit
- 1;
10397 if (n_labels
+ (unroll_benefit
+ n_labels
* 8) / n_iterations
10399 return max_unrolled_insns
;
10401 n_dest
= n_labels
+ n_calls
+ n_exit_dest
;
10402 base_cost
= n_dest
<= 8 ? 0 : n_dest
- 7;
10405 if (n_barriers
* 2 > n_labels
- 1)
10406 n_barriers
= (n_labels
- 1) / 2;
10407 for (factor
= 2; factor
<= 8; factor
++)
10409 /* Bump up preconditioning cost for each power of two. */
10410 if (! (factor
& (factor
-1)))
10412 /* When preconditioning, only powers of two will be considered. */
10413 else if (need_precond
)
10415 n_dest
= ((unroll_type
!= LPT_PEEL_COMPLETELY
)
10416 + (n_labels
- 1) * factor
+ n_calls
+ n_exit_dest
10417 - (n_barriers
* factor
>> 1)
10420 = ((n_dest
<= 8 ? 0 : n_dest
- 7)
10421 - base_cost
* factor
10422 - ((factor
> 2 ? unroll_benefit
- mem_latency
: unroll_benefit
)
10423 * (factor
- (unroll_type
!= LPT_PEEL_COMPLETELY
)))
10424 + ((unroll_benefit
+ 1 + (n_labels
- 1) * factor
)
10427 cost
+= (precond
+ unroll_benefit
* factor
/ 2) / n_iterations
;
10428 if (cost
< best_cost
)
10431 best_factor
= factor
;
10434 threshold
= best_factor
* insn_count
;
10435 if (max_unrolled_insns
> threshold
)
10436 max_unrolled_insns
= threshold
;
10438 return max_unrolled_insns
;
10440 #endif /* TARGET_ADJUST_UNROLL_MAX */
10442 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10443 not enter into CONST_DOUBLE for the replace.
10445 Note that copying is not done so X must not be shared unless all copies
10446 are to be modified.
10448 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10449 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10450 replacements[n*2+1] - and that we take mode changes into account.
10452 If a replacement is ambiguous, return NULL_RTX.
10454 If MODIFY is zero, don't modify any rtl in place,
10455 just return zero or nonzero for failure / success. */
10458 replace_n_hard_rtx (rtx x
, rtx
*replacements
, int n_replacements
, int modify
)
10463 /* The following prevents loops occurrence when we change MEM in
10464 CONST_DOUBLE onto the same CONST_DOUBLE. */
10465 if (x
!= 0 && GET_CODE (x
) == CONST_DOUBLE
)
10468 for (i
= n_replacements
- 1; i
>= 0 ; i
--)
10469 if (x
== replacements
[i
*2] && GET_MODE (x
) == GET_MODE (replacements
[i
*2+1]))
10470 return replacements
[i
*2+1];
10472 /* Allow this function to make replacements in EXPR_LISTs. */
10476 if (GET_CODE (x
) == SUBREG
)
10478 rtx
new = replace_n_hard_rtx (SUBREG_REG (x
), replacements
,
10479 n_replacements
, modify
);
10481 if (GET_CODE (new) == CONST_INT
)
10483 x
= simplify_subreg (GET_MODE (x
), new,
10484 GET_MODE (SUBREG_REG (x
)),
10490 SUBREG_REG (x
) = new;
10494 else if (GET_CODE (x
) == REG
)
10496 unsigned regno
= REGNO (x
);
10497 unsigned nregs
= (regno
< FIRST_PSEUDO_REGISTER
10498 ? HARD_REGNO_NREGS (regno
, GET_MODE (x
)) : 1);
10499 rtx result
= NULL_RTX
;
10501 for (i
= n_replacements
- 1; i
>= 0; i
--)
10503 rtx from
= replacements
[i
*2];
10504 rtx to
= replacements
[i
*2+1];
10505 unsigned from_regno
, from_nregs
, to_regno
, new_regno
;
10507 if (GET_CODE (from
) != REG
)
10509 from_regno
= REGNO (from
);
10510 from_nregs
= (from_regno
< FIRST_PSEUDO_REGISTER
10511 ? HARD_REGNO_NREGS (from_regno
, GET_MODE (from
)) : 1);
10512 if (regno
< from_regno
+ from_nregs
&& regno
+ nregs
> from_regno
)
10514 if (regno
< from_regno
10515 || regno
+ nregs
> from_regno
+ nregs
10516 || GET_CODE (to
) != REG
10519 to_regno
= REGNO (to
);
10520 if (to_regno
< FIRST_PSEUDO_REGISTER
)
10522 new_regno
= regno
+ to_regno
- from_regno
;
10523 if ((unsigned) HARD_REGNO_NREGS (new_regno
, GET_MODE (x
))
10526 result
= gen_rtx_REG (GET_MODE (x
), new_regno
);
10528 else if (GET_MODE (x
) <= GET_MODE (to
))
10529 result
= gen_lowpart_common (GET_MODE (x
), to
);
10531 result
= gen_lowpart_SUBREG (GET_MODE (x
), to
);
10534 return result
? result
: x
;
10536 else if (GET_CODE (x
) == ZERO_EXTEND
)
10538 rtx
new = replace_n_hard_rtx (XEXP (x
, 0), replacements
,
10539 n_replacements
, modify
);
10541 if (GET_CODE (new) == CONST_INT
)
10543 x
= simplify_unary_operation (ZERO_EXTEND
, GET_MODE (x
),
10544 new, GET_MODE (XEXP (x
, 0)));
10554 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
10555 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
10561 new = replace_n_hard_rtx (XEXP (x
, i
), replacements
,
10562 n_replacements
, modify
);
10568 else if (fmt
[i
] == 'E')
10569 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
10571 new = replace_n_hard_rtx (XVECEXP (x
, i
, j
), replacements
,
10572 n_replacements
, modify
);
10576 XVECEXP (x
, i
, j
) = new;
10584 sh_gen_truncate (enum machine_mode mode
, rtx x
, int need_sign_ext
)
10586 enum rtx_code code
= TRUNCATE
;
10588 if (GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
)
10590 rtx inner
= XEXP (x
, 0);
10591 enum machine_mode inner_mode
= GET_MODE (inner
);
10593 if (inner_mode
== mode
)
10595 else if (GET_MODE_SIZE (inner_mode
) >= GET_MODE_SIZE (mode
))
10597 else if (GET_MODE_SIZE (inner_mode
) < GET_MODE_SIZE (mode
)
10598 && (! need_sign_ext
|| GET_CODE (x
) == SIGN_EXTEND
))
10600 code
= GET_CODE (x
);
10604 return gen_rtx_fmt_e (code
, mode
, x
);
10607 /* called via for_each_rtx after reload, to clean up truncates of
10608 registers that span multiple actual hard registers. */
10610 shmedia_cleanup_truncate (rtx
*p
, void *n_changes
)
10614 if (GET_CODE (x
) != TRUNCATE
)
10617 if (GET_MODE_SIZE (GET_MODE (reg
)) > 8 && GET_CODE (reg
) == REG
)
10619 enum machine_mode reg_mode
= GET_MODE (reg
);
10620 XEXP (x
, 0) = simplify_subreg (DImode
, reg
, reg_mode
,
10621 subreg_lowpart_offset (DImode
, reg_mode
));
10622 *(int*) n_changes
+= 1;
10628 /* Load and store depend on the highpart of the address. However,
10629 set_attr_alternative does not give well-defined results before reload,
10630 so we must look at the rtl ourselves to see if any of the feeding
10631 registers is used in a memref. */
10633 /* Called by sh_contains_memref_p via for_each_rtx. */
10635 sh_contains_memref_p_1 (rtx
*loc
, void *data ATTRIBUTE_UNUSED
)
10637 return (GET_CODE (*loc
) == MEM
);
10640 /* Return nonzero iff INSN contains a MEM. */
10642 sh_contains_memref_p (rtx insn
)
10644 return for_each_rtx (&PATTERN (insn
), &sh_contains_memref_p_1
, NULL
);
10647 /* FNADDR is the MEM expression from a call expander. Return an address
10648 to use in an SHmedia insn pattern. */
10650 shmedia_prepare_call_address (rtx fnaddr
, int is_sibcall
)
10654 fnaddr
= XEXP (fnaddr
, 0);
10655 is_sym
= GET_CODE (fnaddr
) == SYMBOL_REF
;
10656 if (flag_pic
&& is_sym
)
10658 if (! SYMBOL_REF_LOCAL_P (fnaddr
))
10660 rtx reg
= gen_reg_rtx (Pmode
);
10662 /* We must not use GOTPLT for sibcalls, because PIC_REG
10663 must be restored before the PLT code gets to run. */
10665 emit_insn (gen_symGOT2reg (reg
, fnaddr
));
10667 emit_insn (gen_symGOTPLT2reg (reg
, fnaddr
));
10672 fnaddr
= gen_sym2PIC (fnaddr
);
10673 PUT_MODE (fnaddr
, Pmode
);
10676 /* If ptabs might trap, make this visible to the rest of the compiler.
10677 We generally assume that symbols pertain to valid locations, but
10678 it is possible to generate invalid symbols with asm or linker tricks.
10679 In a list of functions where each returns its successor, an invalid
10680 symbol might denote an empty list. */
10681 if (!TARGET_PT_FIXED
10682 && (!is_sym
|| TARGET_INVALID_SYMBOLS
)
10683 && (!REG_P (fnaddr
) || ! TARGET_REGISTER_P (REGNO (fnaddr
))))
10685 rtx tr
= gen_reg_rtx (PDImode
);
10687 emit_insn (gen_ptabs (tr
, fnaddr
));
10690 else if (! target_reg_operand (fnaddr
, Pmode
))
10691 fnaddr
= copy_to_mode_reg (Pmode
, fnaddr
);
10696 sh_secondary_reload (bool in_p
, rtx x
, enum reg_class
class,
10697 enum machine_mode mode
, secondary_reload_info
*sri
)
10701 if (REGCLASS_HAS_FP_REG (class)
10702 && ! TARGET_SHMEDIA
10703 && immediate_operand ((x
), mode
)
10704 && ! ((fp_zero_operand (x
) || fp_one_operand (x
))
10705 && mode
== SFmode
&& fldi_ok ()))
10709 sri
->icode
= CODE_FOR_reload_insf__frn
;
10712 sri
->icode
= CODE_FOR_reload_indf__frn
;
10715 /* ??? If we knew that we are in the appropriate mode -
10716 single precision - we could use a reload pattern directly. */
10721 if (class == FPUL_REGS
10722 && ((GET_CODE (x
) == REG
10723 && (REGNO (x
) == MACL_REG
|| REGNO (x
) == MACH_REG
10724 || REGNO (x
) == T_REG
))
10725 || GET_CODE (x
) == PLUS
))
10726 return GENERAL_REGS
;
10727 if (class == FPUL_REGS
&& immediate_operand (x
, mode
))
10729 if (GET_CODE (x
) == CONST_INT
&& CONST_OK_FOR_I08 (INTVAL (x
)))
10730 return GENERAL_REGS
;
10731 sri
->icode
= CODE_FOR_reload_insi__i_fpul
;
10734 if (class == FPSCR_REGS
10735 && ((GET_CODE (x
) == REG
&& REGNO (x
) >= FIRST_PSEUDO_REGISTER
)
10736 || (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == PLUS
)))
10737 return GENERAL_REGS
;
10738 if (REGCLASS_HAS_FP_REG (class)
10740 && immediate_operand (x
, mode
)
10741 && x
!= CONST0_RTX (GET_MODE (x
))
10742 && GET_MODE (x
) != V4SFmode
)
10743 return GENERAL_REGS
;
10744 if ((mode
== QImode
|| mode
== HImode
)
10745 && TARGET_SHMEDIA
&& inqhi_operand (x
, mode
))
10747 sri
->icode
= ((mode
== QImode
)
10748 ? CODE_FOR_reload_inqi
: CODE_FOR_reload_inhi
);
10751 if (TARGET_SHMEDIA
&& class == GENERAL_REGS
10752 && (GET_CODE (x
) == LABEL_REF
|| PIC_DIRECT_ADDR_P (x
)))
10753 return TARGET_REGS
;
10754 } /* end of input-only processing. */
10756 if (((REGCLASS_HAS_FP_REG (class)
10757 && (GET_CODE (x
) == REG
10758 && (GENERAL_OR_AP_REGISTER_P (REGNO (x
))
10759 || (FP_REGISTER_P (REGNO (x
)) && mode
== SImode
10760 && TARGET_FMOVD
))))
10761 || (REGCLASS_HAS_GENERAL_REG (class)
10762 && GET_CODE (x
) == REG
10763 && FP_REGISTER_P (REGNO (x
))))
10764 && ! TARGET_SHMEDIA
10765 && (mode
== SFmode
|| mode
== SImode
))
10767 if ((class == FPUL_REGS
10768 || (REGCLASS_HAS_FP_REG (class)
10769 && ! TARGET_SHMEDIA
&& mode
== SImode
))
10770 && (GET_CODE (x
) == MEM
10771 || (GET_CODE (x
) == REG
10772 && (REGNO (x
) >= FIRST_PSEUDO_REGISTER
10773 || REGNO (x
) == T_REG
10774 || system_reg_operand (x
, VOIDmode
)))))
10776 if (class == FPUL_REGS
)
10777 return GENERAL_REGS
;
10780 if ((class == TARGET_REGS
10781 || (TARGET_SHMEDIA
&& class == SIBCALL_REGS
))
10782 && !EXTRA_CONSTRAINT_Csy (x
)
10783 && (GET_CODE (x
) != REG
|| ! GENERAL_REGISTER_P (REGNO (x
))))
10784 return GENERAL_REGS
;
10785 if ((class == MAC_REGS
|| class == PR_REGS
)
10786 && GET_CODE (x
) == REG
&& ! GENERAL_REGISTER_P (REGNO (x
))
10787 && class != REGNO_REG_CLASS (REGNO (x
)))
10788 return GENERAL_REGS
;
10789 if (class != GENERAL_REGS
&& GET_CODE (x
) == REG
10790 && TARGET_REGISTER_P (REGNO (x
)))
10791 return GENERAL_REGS
;
10795 enum sh_divide_strategy_e sh_div_strategy
= SH_DIV_STRATEGY_DEFAULT
;
10797 /* This defines the storage for the variable part of a -mboard= option.
10798 It is only required when using the sh-superh-elf target */
10800 const char * boardtype
= "7750p2";
10801 const char * osruntime
= "bare";