1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch
= CODE_FOR_indirect_jump_scratch
;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt
;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa
;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
97 int pragma_nosave_low_regs
;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args
;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu
;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight
[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure
[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles
= 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more
;
123 /* Saved operands from the last compare to use when we generate an scc
129 /* Provides the class number of the smallest class containing
132 enum reg_class regno_reg_class
[FIRST_PSEUDO_REGISTER
] =
134 R0_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
135 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
136 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
137 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
138 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
139 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
140 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
141 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
142 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
143 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
144 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
145 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
146 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
147 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
148 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
149 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
150 FP0_REGS
,FP_REGS
, FP_REGS
, FP_REGS
,
151 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
152 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
153 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
154 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
155 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
156 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
157 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
158 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
159 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
160 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
161 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
162 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
163 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
164 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
165 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
166 TARGET_REGS
, TARGET_REGS
, TARGET_REGS
, TARGET_REGS
,
167 TARGET_REGS
, TARGET_REGS
, TARGET_REGS
, TARGET_REGS
,
168 DF_REGS
, DF_REGS
, DF_REGS
, DF_REGS
,
169 DF_REGS
, DF_REGS
, DF_REGS
, DF_REGS
,
170 NO_REGS
, GENERAL_REGS
, PR_REGS
, T_REGS
,
171 MAC_REGS
, MAC_REGS
, FPUL_REGS
, FPSCR_REGS
,
172 GENERAL_REGS
, GENERAL_REGS
,
175 char sh_register_names
[FIRST_PSEUDO_REGISTER
] \
176 [MAX_REGISTER_NAME_LENGTH
+ 1] = SH_REGISTER_NAMES_INITIALIZER
;
178 char sh_additional_register_names
[ADDREGNAMES_SIZE
] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH
+ 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER
;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter
[] =
188 /* a */ ALL_REGS
, /* b */ TARGET_REGS
, /* c */ FPSCR_REGS
, /* d */ DF_REGS
,
189 /* e */ FP_REGS
, /* f */ FP_REGS
, /* g **/ NO_REGS
, /* h */ NO_REGS
,
190 /* i **/ NO_REGS
, /* j */ NO_REGS
, /* k */ SIBCALL_REGS
, /* l */ PR_REGS
,
191 /* m **/ NO_REGS
, /* n **/ NO_REGS
, /* o **/ NO_REGS
, /* p **/ NO_REGS
,
192 /* q */ NO_REGS
, /* r **/ NO_REGS
, /* s **/ NO_REGS
, /* t */ T_REGS
,
193 /* u */ NO_REGS
, /* v */ NO_REGS
, /* w */ FP0_REGS
, /* x */ MAC_REGS
,
194 /* y */ FPUL_REGS
, /* z */ R0_REGS
197 int assembler_dialect
;
199 static bool shmedia_space_reserved_for_target_registers
;
201 static bool sh_handle_option (size_t, const char *, int);
202 static void split_branches (rtx
);
203 static int branch_dest (rtx
);
204 static void force_into (rtx
, rtx
);
205 static void print_slot (rtx
);
206 static rtx
add_constant (rtx
, enum machine_mode
, rtx
);
207 static void dump_table (rtx
, rtx
);
208 static int hi_const (rtx
);
209 static int broken_move (rtx
);
210 static int mova_p (rtx
);
211 static rtx
find_barrier (int, rtx
, rtx
);
212 static int noncall_uses_reg (rtx
, rtx
, rtx
*);
213 static rtx
gen_block_redirect (rtx
, int, int);
214 static void sh_reorg (void);
215 static void output_stack_adjust (int, rtx
, int, HARD_REG_SET
*);
216 static rtx
frame_insn (rtx
);
217 static rtx
push (int);
218 static void pop (int);
219 static void push_regs (HARD_REG_SET
*, int);
220 static int calc_live_regs (HARD_REG_SET
*);
221 static void mark_use (rtx
, rtx
*);
222 static HOST_WIDE_INT
rounded_frame_size (int);
223 static rtx
mark_constant_pool_use (rtx
);
224 const struct attribute_spec sh_attribute_table
[];
225 static tree
sh_handle_interrupt_handler_attribute (tree
*, tree
, tree
, int, bool *);
226 static tree
sh_handle_sp_switch_attribute (tree
*, tree
, tree
, int, bool *);
227 static tree
sh_handle_trap_exit_attribute (tree
*, tree
, tree
, int, bool *);
228 static tree
sh_handle_renesas_attribute (tree
*, tree
, tree
, int, bool *);
229 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT
);
230 static void sh_insert_attributes (tree
, tree
*);
231 static const char *sh_check_pch_target_flags (int);
232 static int sh_adjust_cost (rtx
, rtx
, rtx
, int);
233 static int sh_issue_rate (void);
234 static int sh_dfa_new_cycle (FILE *, int, rtx
, int, int, int *sort_p
);
235 static short find_set_regmode_weight (rtx
, enum machine_mode
);
236 static short find_insn_regmode_weight (rtx
, enum machine_mode
);
237 static void find_regmode_weight (int, enum machine_mode
);
238 static void sh_md_init_global (FILE *, int, int);
239 static void sh_md_finish_global (FILE *, int);
240 static int rank_for_reorder (const void *, const void *);
241 static void swap_reorder (rtx
*, int);
242 static void ready_reorder (rtx
*, int);
243 static short high_pressure (enum machine_mode
);
244 static int sh_reorder (FILE *, int, rtx
*, int *, int);
245 static int sh_reorder2 (FILE *, int, rtx
*, int *, int);
246 static void sh_md_init (FILE *, int, int);
247 static int sh_variable_issue (FILE *, int, rtx
, int);
249 static bool sh_function_ok_for_sibcall (tree
, tree
);
251 static bool sh_cannot_modify_jumps_p (void);
252 static int sh_target_reg_class (void);
253 static bool sh_optimize_target_register_callee_saved (bool);
254 static bool sh_ms_bitfield_layout_p (tree
);
256 static void sh_init_builtins (void);
257 static void sh_media_init_builtins (void);
258 static rtx
sh_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
259 static void sh_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
260 static void sh_file_start (void);
261 static int flow_dependent_p (rtx
, rtx
);
262 static void flow_dependent_p_1 (rtx
, rtx
, void *);
263 static int shiftcosts (rtx
);
264 static int andcosts (rtx
);
265 static int addsubcosts (rtx
);
266 static int multcosts (rtx
);
267 static bool unspec_caller_rtx_p (rtx
);
268 static bool sh_cannot_copy_insn_p (rtx
);
269 static bool sh_rtx_costs (rtx
, int, int, int *);
270 static int sh_address_cost (rtx
);
271 #ifdef TARGET_ADJUST_UNROLL_MAX
272 static int sh_adjust_unroll_max (struct loop
*, int, int, int, int);
274 static int sh_pr_n_sets (void);
275 static rtx
sh_allocate_initial_value (rtx
);
276 static int shmedia_target_regs_stack_space (HARD_REG_SET
*);
277 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET
*);
278 static int shmedia_target_regs_stack_adjust (HARD_REG_SET
*);
279 static int scavenge_reg (HARD_REG_SET
*s
);
280 struct save_schedule_s
;
281 static struct save_entry_s
*sh5_schedule_saves (HARD_REG_SET
*,
282 struct save_schedule_s
*, int);
284 static rtx
sh_struct_value_rtx (tree
, int);
285 static bool sh_return_in_memory (tree
, tree
);
286 static rtx
sh_builtin_saveregs (void);
287 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
, tree
, int *, int);
288 static bool sh_strict_argument_naming (CUMULATIVE_ARGS
*);
289 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS
*);
290 static tree
sh_build_builtin_va_list (void);
291 static tree
sh_gimplify_va_arg_expr (tree
, tree
, tree
*, tree
*);
292 static bool sh_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
294 static bool sh_callee_copies (CUMULATIVE_ARGS
*, enum machine_mode
,
296 static int sh_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
298 static int sh_dwarf_calling_convention (tree
);
299 static int hard_regs_intersect_p (HARD_REG_SET
*, HARD_REG_SET
*);
302 /* Initialize the GCC target structure. */
303 #undef TARGET_ATTRIBUTE_TABLE
304 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
306 /* The next two are used for debug info when compiling with -gdwarf. */
307 #undef TARGET_ASM_UNALIGNED_HI_OP
308 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
309 #undef TARGET_ASM_UNALIGNED_SI_OP
310 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
312 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
313 #undef TARGET_ASM_UNALIGNED_DI_OP
314 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
315 #undef TARGET_ASM_ALIGNED_DI_OP
316 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
318 #undef TARGET_ASM_FUNCTION_EPILOGUE
319 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
321 #undef TARGET_ASM_OUTPUT_MI_THUNK
322 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
324 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
325 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
327 #undef TARGET_ASM_FILE_START
328 #define TARGET_ASM_FILE_START sh_file_start
329 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
330 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
332 #undef TARGET_DEFAULT_TARGET_FLAGS
333 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
334 #undef TARGET_HANDLE_OPTION
335 #define TARGET_HANDLE_OPTION sh_handle_option
337 #undef TARGET_INSERT_ATTRIBUTES
338 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
340 #undef TARGET_SCHED_ADJUST_COST
341 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
343 #undef TARGET_SCHED_ISSUE_RATE
344 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
346 /* The next 5 hooks have been implemented for reenabling sched1. With the
347 help of these macros we are limiting the movement of insns in sched1 to
348 reduce the register pressure. The overall idea is to keep count of SImode
349 and SFmode regs required by already scheduled insns. When these counts
350 cross some threshold values; give priority to insns that free registers.
351 The insn that frees registers is most likely to be the insn with lowest
352 LUID (original insn order); but such an insn might be there in the stalled
353 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
354 upto a max of 8 cycles so that such insns may move from Q -> R.
356 The description of the hooks are as below:
358 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
359 scheduler; it is called inside the sched_init function just after
360 find_insn_reg_weights function call. It is used to calculate the SImode
361 and SFmode weights of insns of basic blocks; much similar to what
362 find_insn_reg_weights does.
363 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
365 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
366 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
369 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
370 high; reorder the ready queue so that the insn with lowest LUID will be
373 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
374 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
376 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
377 can be returned from TARGET_SCHED_REORDER2.
379 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
381 #undef TARGET_SCHED_DFA_NEW_CYCLE
382 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
384 #undef TARGET_SCHED_INIT_GLOBAL
385 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
387 #undef TARGET_SCHED_FINISH_GLOBAL
388 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
390 #undef TARGET_SCHED_VARIABLE_ISSUE
391 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
393 #undef TARGET_SCHED_REORDER
394 #define TARGET_SCHED_REORDER sh_reorder
396 #undef TARGET_SCHED_REORDER2
397 #define TARGET_SCHED_REORDER2 sh_reorder2
399 #undef TARGET_SCHED_INIT
400 #define TARGET_SCHED_INIT sh_md_init
402 #undef TARGET_CANNOT_MODIFY_JUMPS_P
403 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
404 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
405 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
406 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
407 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
408 sh_optimize_target_register_callee_saved
410 #undef TARGET_MS_BITFIELD_LAYOUT_P
411 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
413 #undef TARGET_INIT_BUILTINS
414 #define TARGET_INIT_BUILTINS sh_init_builtins
415 #undef TARGET_EXPAND_BUILTIN
416 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
418 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
419 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
421 #undef TARGET_CANNOT_COPY_INSN_P
422 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
423 #undef TARGET_RTX_COSTS
424 #define TARGET_RTX_COSTS sh_rtx_costs
425 #undef TARGET_ADDRESS_COST
426 #define TARGET_ADDRESS_COST sh_address_cost
427 #undef TARGET_ALLOCATE_INITIAL_VALUE
428 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
430 #undef TARGET_MACHINE_DEPENDENT_REORG
431 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
434 #undef TARGET_HAVE_TLS
435 #define TARGET_HAVE_TLS true
438 #undef TARGET_PROMOTE_PROTOTYPES
439 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
440 #undef TARGET_PROMOTE_FUNCTION_ARGS
441 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
442 #undef TARGET_PROMOTE_FUNCTION_RETURN
443 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
445 #undef TARGET_STRUCT_VALUE_RTX
446 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
447 #undef TARGET_RETURN_IN_MEMORY
448 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
450 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
451 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
456 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
457 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
458 #undef TARGET_MUST_PASS_IN_STACK
459 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
460 #undef TARGET_PASS_BY_REFERENCE
461 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
462 #undef TARGET_CALLEE_COPIES
463 #define TARGET_CALLEE_COPIES sh_callee_copies
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
467 #undef TARGET_BUILD_BUILTIN_VA_LIST
468 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
469 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
470 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
475 #undef TARGET_CHECK_PCH_TARGET_FLAGS
476 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
478 #undef TARGET_DWARF_CALLING_CONVENTION
479 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
481 /* Return regmode weight for insn. */
482 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
484 /* Return current register pressure for regmode. */
485 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
489 #undef TARGET_ENCODE_SECTION_INFO
490 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
491 #undef TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
493 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
494 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
498 #ifdef TARGET_ADJUST_UNROLL_MAX
499 #undef TARGET_ADJUST_UNROLL_MAX
500 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
503 #undef TARGET_SECONDARY_RELOAD
504 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
506 struct gcc_target targetm
= TARGET_INITIALIZER
;
508 /* Implement TARGET_HANDLE_OPTION. */
511 sh_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
,
512 int value ATTRIBUTE_UNUSED
)
517 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH1
;
521 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2
;
525 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A
;
529 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A_NOFPU
;
533 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A_SINGLE
;
536 case OPT_m2a_single_only
:
537 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A_SINGLE_ONLY
;
541 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2E
;
545 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH3
;
549 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH3E
;
553 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4
;
557 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4_NOFPU
;
561 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4_SINGLE
;
564 case OPT_m4_single_only
:
565 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4_SINGLE_ONLY
;
569 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A
;
574 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A_NOFPU
;
578 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A_SINGLE
;
581 case OPT_m4a_single_only
:
582 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A_SINGLE_ONLY
;
586 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_32MEDIA
;
589 case OPT_m5_32media_nofpu
:
590 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_32MEDIA_NOFPU
;
594 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_64MEDIA
;
597 case OPT_m5_64media_nofpu
:
598 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_64MEDIA_NOFPU
;
602 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_COMPACT
;
605 case OPT_m5_compact_nofpu
:
606 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_COMPACT_NOFPU
;
614 /* Print the operand address in x to the stream. */
617 print_operand_address (FILE *stream
, rtx x
)
619 switch (GET_CODE (x
))
623 fprintf (stream
, "@%s", reg_names
[true_regnum (x
)]);
628 rtx base
= XEXP (x
, 0);
629 rtx index
= XEXP (x
, 1);
631 switch (GET_CODE (index
))
634 fprintf (stream
, "@(%d,%s)", (int) INTVAL (index
),
635 reg_names
[true_regnum (base
)]);
641 int base_num
= true_regnum (base
);
642 int index_num
= true_regnum (index
);
644 fprintf (stream
, "@(r0,%s)",
645 reg_names
[MAX (base_num
, index_num
)]);
656 fprintf (stream
, "@-%s", reg_names
[true_regnum (XEXP (x
, 0))]);
660 fprintf (stream
, "@%s+", reg_names
[true_regnum (XEXP (x
, 0))]);
664 x
= mark_constant_pool_use (x
);
665 output_addr_const (stream
, x
);
670 /* Print operand x (an rtx) in assembler syntax to file stream
671 according to modifier code.
673 '.' print a .s if insn needs delay slot
674 ',' print LOCAL_LABEL_PREFIX
675 '@' print trap, rte or rts depending upon pragma interruptness
676 '#' output a nop if there is nothing to put in the delay slot
677 ''' print likelihood suffix (/u for unlikely).
678 '>' print branch target if -fverbose-asm
679 'O' print a constant without the #
680 'R' print the LSW of a dp value - changes if in little endian
681 'S' print the MSW of a dp value - changes if in little endian
682 'T' print the next word of a dp value - same as 'R' in big endian mode.
683 'M' print an `x' if `m' will print `base,index'.
684 'N' print 'r63' if the operand is (const_int 0).
685 'd' print a V2SF reg as dN instead of fpN.
686 'm' print a pair `base,offset' or `base,index', for LD and ST.
687 'U' Likewise for {LD,ST}{HI,LO}.
688 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
689 'o' output an operator. */
692 print_operand (FILE *stream
, rtx x
, int code
)
695 enum machine_mode mode
;
701 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))
702 && get_attr_length (XVECEXP (final_sequence
, 0, 1)))
703 fprintf (stream
, ASSEMBLER_DIALECT
? "/s" : ".s");
706 fprintf (stream
, "%s", LOCAL_LABEL_PREFIX
);
710 fprintf (stream
, "trapa #%d", trap_exit
);
711 else if (sh_cfun_interrupt_handler_p ())
712 fprintf (stream
, "rte");
714 fprintf (stream
, "rts");
717 /* Output a nop if there's nothing in the delay slot. */
718 if (dbr_sequence_length () == 0)
719 fprintf (stream
, "\n\tnop");
723 rtx note
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
725 if (note
&& INTVAL (XEXP (note
, 0)) * 2 < REG_BR_PROB_BASE
)
726 fputs ("/u", stream
);
730 if (flag_verbose_asm
&& JUMP_LABEL (current_output_insn
))
732 fputs ("\t! target: ", stream
);
733 output_addr_const (stream
, JUMP_LABEL (current_output_insn
));
737 x
= mark_constant_pool_use (x
);
738 output_addr_const (stream
, x
);
740 /* N.B.: %R / %S / %T adjust memory addresses by four.
741 For SHMEDIA, that means they can be used to access the first and
742 second 32 bit part of a 64 bit (or larger) value that
743 might be held in floating point registers or memory.
744 While they can be used to access 64 bit parts of a larger value
745 held in general purpose registers, that won't work with memory -
746 neither for fp registers, since the frxx names are used. */
748 if (REG_P (x
) || GET_CODE (x
) == SUBREG
)
750 regno
= true_regnum (x
);
751 regno
+= FP_REGISTER_P (regno
) ? 1 : LSW
;
752 fputs (reg_names
[regno
], (stream
));
756 x
= adjust_address (x
, SImode
, 4 * LSW
);
757 print_operand_address (stream
, XEXP (x
, 0));
764 if (mode
== VOIDmode
)
766 if (GET_MODE_SIZE (mode
) >= 8)
767 sub
= simplify_subreg (SImode
, x
, mode
, 4 * LSW
);
769 print_operand (stream
, sub
, 0);
771 output_operand_lossage ("invalid operand to %%R");
775 if (REG_P (x
) || GET_CODE (x
) == SUBREG
)
777 regno
= true_regnum (x
);
778 regno
+= FP_REGISTER_P (regno
) ? 0 : MSW
;
779 fputs (reg_names
[regno
], (stream
));
783 x
= adjust_address (x
, SImode
, 4 * MSW
);
784 print_operand_address (stream
, XEXP (x
, 0));
791 if (mode
== VOIDmode
)
793 if (GET_MODE_SIZE (mode
) >= 8)
794 sub
= simplify_subreg (SImode
, x
, mode
, 4 * MSW
);
796 print_operand (stream
, sub
, 0);
798 output_operand_lossage ("invalid operand to %%S");
802 /* Next word of a double. */
803 switch (GET_CODE (x
))
806 fputs (reg_names
[REGNO (x
) + 1], (stream
));
809 if (GET_CODE (XEXP (x
, 0)) != PRE_DEC
810 && GET_CODE (XEXP (x
, 0)) != POST_INC
)
811 x
= adjust_address (x
, SImode
, 4);
812 print_operand_address (stream
, XEXP (x
, 0));
819 switch (GET_CODE (x
))
821 case PLUS
: fputs ("add", stream
); break;
822 case MINUS
: fputs ("sub", stream
); break;
823 case MULT
: fputs ("mul", stream
); break;
824 case DIV
: fputs ("div", stream
); break;
825 case EQ
: fputs ("eq", stream
); break;
826 case NE
: fputs ("ne", stream
); break;
827 case GT
: case LT
: fputs ("gt", stream
); break;
828 case GE
: case LE
: fputs ("ge", stream
); break;
829 case GTU
: case LTU
: fputs ("gtu", stream
); break;
830 case GEU
: case LEU
: fputs ("geu", stream
); break;
836 if (GET_CODE (x
) == MEM
837 && GET_CODE (XEXP (x
, 0)) == PLUS
838 && (GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
839 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == SUBREG
))
844 gcc_assert (GET_CODE (x
) == MEM
);
848 switch (GET_CODE (x
))
852 print_operand (stream
, x
, 0);
853 fputs (", 0", stream
);
857 print_operand (stream
, XEXP (x
, 0), 0);
858 fputs (", ", stream
);
859 print_operand (stream
, XEXP (x
, 1), 0);
868 gcc_assert (GET_CODE (x
) == REG
&& GET_MODE (x
) == V2SFmode
);
870 fprintf ((stream
), "d%s", reg_names
[REGNO (x
)] + 1);
874 if (x
== CONST0_RTX (GET_MODE (x
)))
876 fprintf ((stream
), "r63");
881 if (GET_CODE (x
) == CONST_INT
)
883 fprintf ((stream
), "%u", (unsigned) INTVAL (x
) & (0x10000 - 1));
893 switch (GET_CODE (x
))
897 rtx inner
= XEXP (x
, 0);
899 enum machine_mode inner_mode
;
901 /* We might see SUBREGs with vector mode registers inside. */
902 if (GET_CODE (inner
) == SUBREG
903 && (GET_MODE_SIZE (GET_MODE (inner
))
904 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
905 && subreg_lowpart_p (inner
))
906 inner
= SUBREG_REG (inner
);
907 if (GET_CODE (inner
) == CONST_INT
)
909 x
= GEN_INT (trunc_int_for_mode (INTVAL (inner
), GET_MODE (x
)));
912 inner_mode
= GET_MODE (inner
);
913 if (GET_CODE (inner
) == SUBREG
914 && (GET_MODE_SIZE (GET_MODE (inner
))
915 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
916 && GET_CODE (SUBREG_REG (inner
)) == REG
)
918 offset
= subreg_regno_offset (REGNO (SUBREG_REG (inner
)),
919 GET_MODE (SUBREG_REG (inner
)),
922 inner
= SUBREG_REG (inner
);
924 if (GET_CODE (inner
) != REG
|| GET_MODE_SIZE (inner_mode
) > 8)
926 /* Floating point register pairs are always big endian;
927 general purpose registers are 64 bit wide. */
928 regno
= REGNO (inner
);
929 regno
= (HARD_REGNO_NREGS (regno
, inner_mode
)
930 - HARD_REGNO_NREGS (regno
, mode
))
938 /* FIXME: We need this on SHmedia32 because reload generates
939 some sign-extended HI or QI loads into DImode registers
940 but, because Pmode is SImode, the address ends up with a
941 subreg:SI of the DImode register. Maybe reload should be
942 fixed so as to apply alter_subreg to such loads? */
944 gcc_assert (trapping_target_operand (x
, VOIDmode
));
945 x
= XEXP (XEXP (x
, 2), 0);
948 gcc_assert (SUBREG_BYTE (x
) == 0
949 && GET_CODE (SUBREG_REG (x
)) == REG
);
957 if (FP_REGISTER_P (regno
)
958 && mode
== V16SFmode
)
959 fprintf ((stream
), "mtrx%s", reg_names
[regno
] + 2);
960 else if (FP_REGISTER_P (REGNO (x
))
962 fprintf ((stream
), "fv%s", reg_names
[regno
] + 2);
963 else if (GET_CODE (x
) == REG
965 fprintf ((stream
), "fp%s", reg_names
[regno
] + 2);
966 else if (FP_REGISTER_P (REGNO (x
))
967 && GET_MODE_SIZE (mode
) > 4)
968 fprintf ((stream
), "d%s", reg_names
[regno
] + 1);
970 fputs (reg_names
[regno
], (stream
));
974 output_address (XEXP (x
, 0));
979 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
980 && (GET_MODE (XEXP (x
, 0)) == DImode
981 || GET_MODE (XEXP (x
, 0)) == SImode
)
982 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == TRUNCATE
983 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == HImode
)
985 rtx val
= XEXP (XEXP (XEXP (x
, 0), 0), 0);
988 if (GET_CODE (val
) == ASHIFTRT
)
991 if (GET_CODE (XEXP (val
, 0)) == CONST
)
993 output_addr_const (stream
, XEXP (val
, 0));
994 if (GET_CODE (XEXP (val
, 0)) == CONST
)
996 fputs (" >> ", stream
);
997 output_addr_const (stream
, XEXP (val
, 1));
1002 if (GET_CODE (val
) == CONST
)
1003 fputc ('(', stream
);
1004 output_addr_const (stream
, val
);
1005 if (GET_CODE (val
) == CONST
)
1006 fputc (')', stream
);
1008 fputs (" & 65535)", stream
);
1015 fputc ('#', stream
);
1016 output_addr_const (stream
, x
);
1023 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1025 force_into (rtx value
, rtx target
)
1027 value
= force_operand (value
, target
);
1028 if (! rtx_equal_p (value
, target
))
1029 emit_insn (gen_move_insn (target
, value
));
1032 /* Emit code to perform a block move. Choose the best method.
1034 OPERANDS[0] is the destination.
1035 OPERANDS[1] is the source.
1036 OPERANDS[2] is the size.
1037 OPERANDS[3] is the alignment safe to use. */
1040 expand_block_move (rtx
*operands
)
1042 int align
= INTVAL (operands
[3]);
1043 int constp
= (GET_CODE (operands
[2]) == CONST_INT
);
1044 int bytes
= (constp
? INTVAL (operands
[2]) : 0);
1049 /* If we could use mov.l to move words and dest is word-aligned, we
1050 can use movua.l for loads and still generate a relatively short
1051 and efficient sequence. */
1052 if (TARGET_SH4A_ARCH
&& align
< 4
1053 && MEM_ALIGN (operands
[0]) >= 32
1054 && can_move_by_pieces (bytes
, 32))
1056 rtx dest
= copy_rtx (operands
[0]);
1057 rtx src
= copy_rtx (operands
[1]);
1058 /* We could use different pseudos for each copied word, but
1059 since movua can only load into r0, it's kind of
1061 rtx temp
= gen_reg_rtx (SImode
);
1062 rtx src_addr
= copy_addr_to_reg (XEXP (src
, 0));
1065 while (copied
+ 4 <= bytes
)
1067 rtx to
= adjust_address (dest
, SImode
, copied
);
1068 rtx from
= adjust_automodify_address (src
, SImode
, src_addr
, copied
);
1070 emit_insn (gen_movua (temp
, from
));
1071 emit_move_insn (src_addr
, plus_constant (src_addr
, 4));
1072 emit_move_insn (to
, temp
);
1077 move_by_pieces (adjust_address (dest
, BLKmode
, copied
),
1078 adjust_automodify_address (src
, BLKmode
,
1080 bytes
- copied
, align
, 0);
1085 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1086 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1087 if (align
< 4 || (bytes
% 4 != 0))
1090 if (TARGET_HARD_SH4
)
1094 else if (bytes
== 12)
1096 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1097 rtx r4
= gen_rtx_REG (SImode
, 4);
1098 rtx r5
= gen_rtx_REG (SImode
, 5);
1100 function_symbol (func_addr_rtx
, "__movmemSI12_i4", SFUNC_STATIC
);
1101 force_into (XEXP (operands
[0], 0), r4
);
1102 force_into (XEXP (operands
[1], 0), r5
);
1103 emit_insn (gen_block_move_real_i4 (func_addr_rtx
));
1106 else if (! TARGET_SMALLCODE
)
1108 const char *entry_name
;
1109 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1111 rtx r4
= gen_rtx_REG (SImode
, 4);
1112 rtx r5
= gen_rtx_REG (SImode
, 5);
1113 rtx r6
= gen_rtx_REG (SImode
, 6);
1115 entry_name
= (bytes
& 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1116 function_symbol (func_addr_rtx
, entry_name
, SFUNC_STATIC
);
1117 force_into (XEXP (operands
[0], 0), r4
);
1118 force_into (XEXP (operands
[1], 0), r5
);
1120 dwords
= bytes
>> 3;
1121 emit_insn (gen_move_insn (r6
, GEN_INT (dwords
- 1)));
1122 emit_insn (gen_block_lump_real_i4 (func_addr_rtx
));
1131 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1132 rtx r4
= gen_rtx_REG (SImode
, 4);
1133 rtx r5
= gen_rtx_REG (SImode
, 5);
1135 sprintf (entry
, "__movmemSI%d", bytes
);
1136 function_symbol (func_addr_rtx
, entry
, SFUNC_STATIC
);
1137 force_into (XEXP (operands
[0], 0), r4
);
1138 force_into (XEXP (operands
[1], 0), r5
);
1139 emit_insn (gen_block_move_real (func_addr_rtx
));
1143 /* This is the same number of bytes as a memcpy call, but to a different
1144 less common function name, so this will occasionally use more space. */
1145 if (! TARGET_SMALLCODE
)
1147 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1148 int final_switch
, while_loop
;
1149 rtx r4
= gen_rtx_REG (SImode
, 4);
1150 rtx r5
= gen_rtx_REG (SImode
, 5);
1151 rtx r6
= gen_rtx_REG (SImode
, 6);
1153 function_symbol (func_addr_rtx
, "__movmem", SFUNC_STATIC
);
1154 force_into (XEXP (operands
[0], 0), r4
);
1155 force_into (XEXP (operands
[1], 0), r5
);
1157 /* r6 controls the size of the move. 16 is decremented from it
1158 for each 64 bytes moved. Then the negative bit left over is used
1159 as an index into a list of move instructions. e.g., a 72 byte move
1160 would be set up with size(r6) = 14, for one iteration through the
1161 big while loop, and a switch of -2 for the last part. */
1163 final_switch
= 16 - ((bytes
/ 4) % 16);
1164 while_loop
= ((bytes
/ 4) / 16 - 1) * 16;
1165 emit_insn (gen_move_insn (r6
, GEN_INT (while_loop
+ final_switch
)));
1166 emit_insn (gen_block_lump_real (func_addr_rtx
));
1173 /* Prepare operands for a move define_expand; specifically, one of the
1174 operands must be in a register. */
1177 prepare_move_operands (rtx operands
[], enum machine_mode mode
)
1179 if ((mode
== SImode
|| mode
== DImode
)
1181 && ! ((mode
== Pmode
|| mode
== ptr_mode
)
1182 && tls_symbolic_operand (operands
[1], Pmode
) != 0))
1185 if (SYMBOLIC_CONST_P (operands
[1]))
1187 if (GET_CODE (operands
[0]) == MEM
)
1188 operands
[1] = force_reg (Pmode
, operands
[1]);
1189 else if (TARGET_SHMEDIA
1190 && GET_CODE (operands
[1]) == LABEL_REF
1191 && target_reg_operand (operands
[0], mode
))
1195 temp
= no_new_pseudos
? operands
[0] : gen_reg_rtx (Pmode
);
1196 operands
[1] = legitimize_pic_address (operands
[1], mode
, temp
);
1199 else if (GET_CODE (operands
[1]) == CONST
1200 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
1201 && SYMBOLIC_CONST_P (XEXP (XEXP (operands
[1], 0), 0)))
1203 temp
= no_new_pseudos
? operands
[0] : gen_reg_rtx (Pmode
);
1204 temp
= legitimize_pic_address (XEXP (XEXP (operands
[1], 0), 0),
1206 operands
[1] = expand_binop (mode
, add_optab
, temp
,
1207 XEXP (XEXP (operands
[1], 0), 1),
1208 no_new_pseudos
? temp
1209 : gen_reg_rtx (Pmode
),
1210 0, OPTAB_LIB_WIDEN
);
1214 if (! reload_in_progress
&& ! reload_completed
)
1216 /* Copy the source to a register if both operands aren't registers. */
1217 if (! register_operand (operands
[0], mode
)
1218 && ! sh_register_operand (operands
[1], mode
))
1219 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
1221 if (GET_CODE (operands
[0]) == MEM
&& ! memory_operand (operands
[0], mode
))
1223 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1224 except that we can't use that function because it is static. */
1225 rtx
new = change_address (operands
[0], mode
, 0);
1226 MEM_COPY_ATTRIBUTES (new, operands
[0]);
1230 /* This case can happen while generating code to move the result
1231 of a library call to the target. Reject `st r0,@(rX,rY)' because
1232 reload will fail to find a spill register for rX, since r0 is already
1233 being used for the source. */
1235 && refers_to_regno_p (R0_REG
, R0_REG
+ 1, operands
[1], (rtx
*)0)
1236 && GET_CODE (operands
[0]) == MEM
1237 && GET_CODE (XEXP (operands
[0], 0)) == PLUS
1238 && GET_CODE (XEXP (XEXP (operands
[0], 0), 1)) == REG
)
1239 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
1242 if (mode
== Pmode
|| mode
== ptr_mode
)
1245 enum tls_model tls_kind
;
1249 if (GET_CODE (op1
) == CONST
1250 && GET_CODE (XEXP (op1
, 0)) == PLUS
1251 && tls_symbolic_operand (XEXP (XEXP (op1
, 0), 0), Pmode
))
1253 opc
= XEXP (XEXP (op1
, 0), 1);
1254 op1
= XEXP (XEXP (op1
, 0), 0);
1259 if ((tls_kind
= tls_symbolic_operand (op1
, Pmode
)))
1261 rtx tga_op1
, tga_ret
, tmp
, tmp2
;
1265 case TLS_MODEL_GLOBAL_DYNAMIC
:
1266 tga_ret
= gen_rtx_REG (Pmode
, R0_REG
);
1267 emit_call_insn (gen_tls_global_dynamic (tga_ret
, op1
));
1271 case TLS_MODEL_LOCAL_DYNAMIC
:
1272 tga_ret
= gen_rtx_REG (Pmode
, R0_REG
);
1273 emit_call_insn (gen_tls_local_dynamic (tga_ret
, op1
));
1275 tmp
= gen_reg_rtx (Pmode
);
1276 emit_move_insn (tmp
, tga_ret
);
1278 if (register_operand (op0
, Pmode
))
1281 tmp2
= gen_reg_rtx (Pmode
);
1283 emit_insn (gen_symDTPOFF2reg (tmp2
, op1
, tmp
));
1287 case TLS_MODEL_INITIAL_EXEC
:
1290 /* Don't schedule insns for getting GOT address when
1291 the first scheduling is enabled, to avoid spill
1293 if (flag_schedule_insns
)
1294 emit_insn (gen_blockage ());
1295 emit_insn (gen_GOTaddr2picreg ());
1296 emit_insn (gen_rtx_USE (VOIDmode
, gen_rtx_REG (SImode
,
1298 if (flag_schedule_insns
)
1299 emit_insn (gen_blockage ());
1301 tga_op1
= no_new_pseudos
? op0
: gen_reg_rtx (Pmode
);
1302 tmp
= gen_sym2GOTTPOFF (op1
);
1303 emit_insn (gen_tls_initial_exec (tga_op1
, tmp
));
1307 case TLS_MODEL_LOCAL_EXEC
:
1308 tmp2
= gen_reg_rtx (Pmode
);
1309 emit_insn (gen_load_gbr (tmp2
));
1310 tmp
= gen_reg_rtx (Pmode
);
1311 emit_insn (gen_symTPOFF2reg (tmp
, op1
));
1313 if (register_operand (op0
, Pmode
))
1316 op1
= gen_reg_rtx (Pmode
);
1318 emit_insn (gen_addsi3 (op1
, tmp
, tmp2
));
1325 emit_insn (gen_addsi3 (op1
, op1
, force_reg (SImode
, opc
)));
1333 /* Prepare the operands for an scc instruction; make sure that the
1334 compare has been done. */
1336 prepare_scc_operands (enum rtx_code code
)
1338 rtx t_reg
= gen_rtx_REG (SImode
, T_REG
);
1339 enum rtx_code oldcode
= code
;
1340 enum machine_mode mode
;
1342 /* First need a compare insn. */
1346 /* It isn't possible to handle this case. */
1363 if (code
!= oldcode
)
1365 rtx tmp
= sh_compare_op0
;
1366 sh_compare_op0
= sh_compare_op1
;
1367 sh_compare_op1
= tmp
;
1370 mode
= GET_MODE (sh_compare_op0
);
1371 if (mode
== VOIDmode
)
1372 mode
= GET_MODE (sh_compare_op1
);
1374 sh_compare_op0
= force_reg (mode
, sh_compare_op0
);
1375 if ((code
!= EQ
&& code
!= NE
1376 && (sh_compare_op1
!= const0_rtx
1377 || code
== GTU
|| code
== GEU
|| code
== LTU
|| code
== LEU
))
1378 || (mode
== DImode
&& sh_compare_op1
!= const0_rtx
)
1379 || (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
))
1380 sh_compare_op1
= force_reg (mode
, sh_compare_op1
);
1382 if ((TARGET_SH4
|| TARGET_SH2A
) && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1383 (mode
== SFmode
? emit_sf_insn
: emit_df_insn
)
1384 (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2,
1385 gen_rtx_SET (VOIDmode
, t_reg
,
1386 gen_rtx_fmt_ee (code
, SImode
,
1387 sh_compare_op0
, sh_compare_op1
)),
1388 gen_rtx_USE (VOIDmode
, get_fpscr_rtx ()))));
1390 emit_insn (gen_rtx_SET (VOIDmode
, t_reg
,
1391 gen_rtx_fmt_ee (code
, SImode
,
1392 sh_compare_op0
, sh_compare_op1
)));
1397 /* Called from the md file, set up the operands of a compare instruction. */
1400 from_compare (rtx
*operands
, int code
)
1402 enum machine_mode mode
= GET_MODE (sh_compare_op0
);
1404 if (mode
== VOIDmode
)
1405 mode
= GET_MODE (sh_compare_op1
);
1408 || (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
))
1410 /* Force args into regs, since we can't use constants here. */
1411 sh_compare_op0
= force_reg (mode
, sh_compare_op0
);
1412 if (sh_compare_op1
!= const0_rtx
1413 || code
== GTU
|| code
== GEU
1414 || (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
))
1415 sh_compare_op1
= force_reg (mode
, sh_compare_op1
);
1417 if (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
&& code
== GE
)
1419 from_compare (operands
, GT
);
1420 insn
= gen_ieee_ccmpeqsf_t (sh_compare_op0
, sh_compare_op1
);
1423 insn
= gen_rtx_SET (VOIDmode
,
1424 gen_rtx_REG (SImode
, T_REG
),
1425 gen_rtx_fmt_ee (code
, SImode
,
1426 sh_compare_op0
, sh_compare_op1
));
1427 if ((TARGET_SH4
|| TARGET_SH2A
) && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1429 insn
= gen_rtx_PARALLEL (VOIDmode
,
1431 gen_rtx_USE (VOIDmode
, get_fpscr_rtx ())));
1432 (mode
== SFmode
? emit_sf_insn
: emit_df_insn
) (insn
);
1438 /* Functions to output assembly code. */
1440 /* Return a sequence of instructions to perform DI or DF move.
1442 Since the SH cannot move a DI or DF in one instruction, we have
1443 to take care when we see overlapping source and dest registers. */
1446 output_movedouble (rtx insn ATTRIBUTE_UNUSED
, rtx operands
[],
1447 enum machine_mode mode
)
1449 rtx dst
= operands
[0];
1450 rtx src
= operands
[1];
1452 if (GET_CODE (dst
) == MEM
1453 && GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
1454 return "mov.l %T1,%0\n\tmov.l %1,%0";
1456 if (register_operand (dst
, mode
)
1457 && register_operand (src
, mode
))
1459 if (REGNO (src
) == MACH_REG
)
1460 return "sts mach,%S0\n\tsts macl,%R0";
1462 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1463 when mov.d r1,r0 do r1->r0 then r2->r1. */
1465 if (REGNO (src
) + 1 == REGNO (dst
))
1466 return "mov %T1,%T0\n\tmov %1,%0";
1468 return "mov %1,%0\n\tmov %T1,%T0";
1470 else if (GET_CODE (src
) == CONST_INT
)
1472 if (INTVAL (src
) < 0)
1473 output_asm_insn ("mov #-1,%S0", operands
);
1475 output_asm_insn ("mov #0,%S0", operands
);
1477 return "mov %1,%R0";
1479 else if (GET_CODE (src
) == MEM
)
1482 int dreg
= REGNO (dst
);
1483 rtx inside
= XEXP (src
, 0);
1485 switch (GET_CODE (inside
))
1488 ptrreg
= REGNO (inside
);
1492 ptrreg
= subreg_regno (inside
);
1496 ptrreg
= REGNO (XEXP (inside
, 0));
1497 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1498 an offsettable address. Unfortunately, offsettable addresses use
1499 QImode to check the offset, and a QImode offsettable address
1500 requires r0 for the other operand, which is not currently
1501 supported, so we can't use the 'o' constraint.
1502 Thus we must check for and handle r0+REG addresses here.
1503 We punt for now, since this is likely very rare. */
1504 gcc_assert (GET_CODE (XEXP (inside
, 1)) != REG
);
1508 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1510 return "mov.l %1,%0\n\tmov.l %1,%T0";
1515 /* Work out the safe way to copy. Copy into the second half first. */
1517 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1520 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1523 /* Print an instruction which would have gone into a delay slot after
1524 another instruction, but couldn't because the other instruction expanded
1525 into a sequence where putting the slot insn at the end wouldn't work. */
1528 print_slot (rtx insn
)
1530 final_scan_insn (XVECEXP (insn
, 0, 1), asm_out_file
, optimize
, 1, NULL
);
1532 INSN_DELETED_P (XVECEXP (insn
, 0, 1)) = 1;
1536 output_far_jump (rtx insn
, rtx op
)
1538 struct { rtx lab
, reg
, op
; } this;
1539 rtx braf_base_lab
= NULL_RTX
;
1542 int offset
= branch_dest (insn
) - INSN_ADDRESSES (INSN_UID (insn
));
1545 this.lab
= gen_label_rtx ();
1549 && offset
- get_attr_length (insn
) <= 32766)
1552 jump
= "mov.w %O0,%1; braf %1";
1560 jump
= "mov.l %O0,%1; braf %1";
1562 jump
= "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1565 jump
= "mov.l %O0,%1; jmp @%1";
1567 /* If we have a scratch register available, use it. */
1568 if (GET_CODE ((prev
= prev_nonnote_insn (insn
))) == INSN
1569 && INSN_CODE (prev
) == CODE_FOR_indirect_jump_scratch
)
1571 this.reg
= SET_DEST (XVECEXP (PATTERN (prev
), 0, 0));
1572 if (REGNO (this.reg
) == R0_REG
&& flag_pic
&& ! TARGET_SH2
)
1573 jump
= "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1574 output_asm_insn (jump
, &this.lab
);
1575 if (dbr_sequence_length ())
1576 print_slot (final_sequence
);
1578 output_asm_insn ("nop", 0);
1582 /* Output the delay slot insn first if any. */
1583 if (dbr_sequence_length ())
1584 print_slot (final_sequence
);
1586 this.reg
= gen_rtx_REG (SImode
, 13);
1587 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1588 Fortunately, MACL is fixed and call-clobbered, and we never
1589 need its value across jumps, so save r13 in it instead of in
1592 output_asm_insn ("lds r13, macl", 0);
1594 output_asm_insn ("mov.l r13,@-r15", 0);
1595 output_asm_insn (jump
, &this.lab
);
1597 output_asm_insn ("sts macl, r13", 0);
1599 output_asm_insn ("mov.l @r15+,r13", 0);
1601 if (far
&& flag_pic
&& TARGET_SH2
)
1603 braf_base_lab
= gen_label_rtx ();
1604 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
1605 CODE_LABEL_NUMBER (braf_base_lab
));
1608 output_asm_insn (".align 2", 0);
1609 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L", CODE_LABEL_NUMBER (this.lab
));
1611 if (far
&& flag_pic
)
1614 this.lab
= braf_base_lab
;
1615 output_asm_insn (".long %O2-%O0", &this.lab
);
1618 output_asm_insn (far
? ".long %O2" : ".word %O2-%O0", &this.lab
);
1622 /* Local label counter, used for constants in the pool and inside
1623 pattern branches. */
1625 static int lf
= 100;
1627 /* Output code for ordinary branches. */
1630 output_branch (int logic
, rtx insn
, rtx
*operands
)
1632 switch (get_attr_length (insn
))
1635 /* This can happen if filling the delay slot has caused a forward
1636 branch to exceed its range (we could reverse it, but only
1637 when we know we won't overextend other branches; this should
1638 best be handled by relaxation).
1639 It can also happen when other condbranches hoist delay slot insn
1640 from their destination, thus leading to code size increase.
1641 But the branch will still be in the range -4092..+4098 bytes. */
1646 /* The call to print_slot will clobber the operands. */
1647 rtx op0
= operands
[0];
1649 /* If the instruction in the delay slot is annulled (true), then
1650 there is no delay slot where we can put it now. The only safe
1651 place for it is after the label. final will do that by default. */
1654 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))
1655 && get_attr_length (XVECEXP (final_sequence
, 0, 1)))
1657 asm_fprintf (asm_out_file
, "\tb%s%ss\t%LLF%d\n", logic
? "f" : "t",
1658 ASSEMBLER_DIALECT
? "/" : ".", label
);
1659 print_slot (final_sequence
);
1662 asm_fprintf (asm_out_file
, "\tb%s\t%LLF%d\n", logic
? "f" : "t", label
);
1664 output_asm_insn ("bra\t%l0", &op0
);
1665 fprintf (asm_out_file
, "\tnop\n");
1666 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LF", label
);
1670 /* When relaxing, handle this like a short branch. The linker
1671 will fix it up if it still doesn't fit after relaxation. */
1673 return logic
? "bt%.\t%l0" : "bf%.\t%l0";
1675 /* These are for SH2e, in which we have to account for the
1676 extra nop because of the hardware bug in annulled branches. */
1682 gcc_assert (!final_sequence
1683 || !(INSN_ANNULLED_BRANCH_P
1684 (XVECEXP (final_sequence
, 0, 0))));
1685 asm_fprintf (asm_out_file
, "b%s%ss\t%LLF%d\n",
1687 ASSEMBLER_DIALECT
? "/" : ".", label
);
1688 fprintf (asm_out_file
, "\tnop\n");
1689 output_asm_insn ("bra\t%l0", operands
);
1690 fprintf (asm_out_file
, "\tnop\n");
1691 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LF", label
);
1695 /* When relaxing, fall through. */
1700 sprintf (buffer
, "b%s%ss\t%%l0",
1702 ASSEMBLER_DIALECT
? "/" : ".");
1703 output_asm_insn (buffer
, &operands
[0]);
1708 /* There should be no longer branches now - that would
1709 indicate that something has destroyed the branches set
1710 up in machine_dependent_reorg. */
1716 output_branchy_insn (enum rtx_code code
, const char *template,
1717 rtx insn
, rtx
*operands
)
1719 rtx next_insn
= NEXT_INSN (insn
);
1721 if (next_insn
&& GET_CODE (next_insn
) == JUMP_INSN
&& condjump_p (next_insn
))
1723 rtx src
= SET_SRC (PATTERN (next_insn
));
1724 if (GET_CODE (src
) == IF_THEN_ELSE
&& GET_CODE (XEXP (src
, 0)) != code
)
1726 /* Following branch not taken */
1727 operands
[9] = gen_label_rtx ();
1728 emit_label_after (operands
[9], next_insn
);
1729 INSN_ADDRESSES_NEW (operands
[9],
1730 INSN_ADDRESSES (INSN_UID (next_insn
))
1731 + get_attr_length (next_insn
));
1736 int offset
= (branch_dest (next_insn
)
1737 - INSN_ADDRESSES (INSN_UID (next_insn
)) + 4);
1738 if (offset
>= -252 && offset
<= 258)
1740 if (GET_CODE (src
) == IF_THEN_ELSE
)
1742 src
= XEXP (src
, 1);
1748 operands
[9] = gen_label_rtx ();
1749 emit_label_after (operands
[9], insn
);
1750 INSN_ADDRESSES_NEW (operands
[9],
1751 INSN_ADDRESSES (INSN_UID (insn
))
1752 + get_attr_length (insn
));
1757 output_ieee_ccmpeq (rtx insn
, rtx
*operands
)
1759 return output_branchy_insn (NE
, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1763 /* Output the start of the assembler file. */
1766 sh_file_start (void)
1768 default_file_start ();
1771 /* Declare the .directive section before it is used. */
1772 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file
);
1773 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file
);
1777 /* We need to show the text section with the proper
1778 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1779 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1780 will complain. We can teach GAS specifically about the
1781 default attributes for our choice of text section, but
1782 then we would have to change GAS again if/when we change
1783 the text section name. */
1784 fprintf (asm_out_file
, "%s\n", TEXT_SECTION_ASM_OP
);
1786 /* Switch to the data section so that the coffsem symbol
1787 isn't in the text section. */
1790 if (TARGET_LITTLE_ENDIAN
)
1791 fputs ("\t.little\n", asm_out_file
);
1795 if (TARGET_SHCOMPACT
)
1796 fputs ("\t.mode\tSHcompact\n", asm_out_file
);
1797 else if (TARGET_SHMEDIA
)
1798 fprintf (asm_out_file
, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1799 TARGET_SHMEDIA64
? 64 : 32);
1803 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1806 unspec_caller_rtx_p (rtx pat
)
1808 switch (GET_CODE (pat
))
1811 return unspec_caller_rtx_p (XEXP (pat
, 0));
1814 if (unspec_caller_rtx_p (XEXP (pat
, 0)))
1816 return unspec_caller_rtx_p (XEXP (pat
, 1));
1818 if (XINT (pat
, 1) == UNSPEC_CALLER
)
1827 /* Indicate that INSN cannot be duplicated. This is true for insn
1828 that generates a unique label. */
1831 sh_cannot_copy_insn_p (rtx insn
)
1835 if (!reload_completed
|| !flag_pic
)
1838 if (GET_CODE (insn
) != INSN
)
1840 if (asm_noperands (insn
) >= 0)
1843 pat
= PATTERN (insn
);
1844 if (GET_CODE (pat
) != SET
)
1846 pat
= SET_SRC (pat
);
1848 if (unspec_caller_rtx_p (pat
))
1854 /* Actual number of instructions used to make a shift by N. */
1855 static const char ashiftrt_insns
[] =
1856 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1858 /* Left shift and logical right shift are the same. */
1859 static const char shift_insns
[] =
1860 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1862 /* Individual shift amounts needed to get the above length sequences.
1863 One bit right shifts clobber the T bit, so when possible, put one bit
1864 shifts in the middle of the sequence, so the ends are eligible for
1865 branch delay slots. */
1866 static const short shift_amounts
[32][5] = {
1867 {0}, {1}, {2}, {2, 1},
1868 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1869 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1870 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1871 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1872 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1873 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1874 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1876 /* Likewise, but for shift amounts < 16, up to three highmost bits
1877 might be clobbered. This is typically used when combined with some
1878 kind of sign or zero extension. */
1880 static const char ext_shift_insns
[] =
1881 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1883 static const short ext_shift_amounts
[32][4] = {
1884 {0}, {1}, {2}, {2, 1},
1885 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1886 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1887 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1888 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1889 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1890 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1891 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1893 /* Assuming we have a value that has been sign-extended by at least one bit,
1894 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1895 to shift it by N without data loss, and quicker than by other means? */
1896 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1898 /* This is used in length attributes in sh.md to help compute the length
1899 of arbitrary constant shift instructions. */
1902 shift_insns_rtx (rtx insn
)
1904 rtx set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
1905 int shift_count
= INTVAL (XEXP (set_src
, 1));
1906 enum rtx_code shift_code
= GET_CODE (set_src
);
1911 return ashiftrt_insns
[shift_count
];
1914 return shift_insns
[shift_count
];
1920 /* Return the cost of a shift. */
1930 if (GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
1932 if (GET_MODE (x
) == DImode
1933 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1934 && INTVAL (XEXP (x
, 1)) == 1)
1937 /* Everything else is invalid, because there is no pattern for it. */
1940 /* If shift by a non constant, then this will be expensive. */
1941 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
1942 return SH_DYNAMIC_SHIFT_COST
;
1944 value
= INTVAL (XEXP (x
, 1));
1946 /* Otherwise, return the true cost in instructions. */
1947 if (GET_CODE (x
) == ASHIFTRT
)
1949 int cost
= ashiftrt_insns
[value
];
1950 /* If SH3, then we put the constant in a reg and use shad. */
1951 if (cost
> 1 + SH_DYNAMIC_SHIFT_COST
)
1952 cost
= 1 + SH_DYNAMIC_SHIFT_COST
;
1956 return shift_insns
[value
];
1959 /* Return the cost of an AND operation. */
1966 /* Anding with a register is a single cycle and instruction. */
1967 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
1970 i
= INTVAL (XEXP (x
, 1));
1974 if ((GET_CODE (XEXP (x
, 1)) == CONST_INT
1975 && CONST_OK_FOR_I16 (INTVAL (XEXP (x
, 1))))
1976 || EXTRA_CONSTRAINT_C16 (XEXP (x
, 1)))
1982 /* These constants are single cycle extu.[bw] instructions. */
1983 if (i
== 0xff || i
== 0xffff)
1985 /* Constants that can be used in an and immediate instruction in a single
1986 cycle, but this requires r0, so make it a little more expensive. */
1987 if (CONST_OK_FOR_K08 (i
))
1989 /* Constants that can be loaded with a mov immediate and an and.
1990 This case is probably unnecessary. */
1991 if (CONST_OK_FOR_I08 (i
))
1993 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1994 This case is probably unnecessary. */
1998 /* Return the cost of an addition or a subtraction. */
2003 /* Adding a register is a single cycle insn. */
2004 if (GET_CODE (XEXP (x
, 1)) == REG
2005 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
2008 /* Likewise for small constants. */
2009 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
2010 && CONST_OK_FOR_ADD (INTVAL (XEXP (x
, 1))))
2014 switch (GET_CODE (XEXP (x
, 1)))
2019 return TARGET_SHMEDIA64
? 5 : 3;
2022 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x
, 1))))
2024 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x
, 1)) >> 16))
2026 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x
, 1)) >> 16) >> 16))
2034 /* Any other constant requires a 2 cycle pc-relative load plus an
2039 /* Return the cost of a multiply. */
2041 multcosts (rtx x ATTRIBUTE_UNUSED
)
2043 if (sh_multcost
>= 0)
2046 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2047 accept constants. Ideally, we would use a cost of one or two and
2048 add the cost of the operand, but disregard the latter when inside loops
2049 and loop invariant code motion is still to follow.
2050 Using a multiply first and splitting it later if it's a loss
2051 doesn't work because of different sign / zero extension semantics
2052 of multiplies vs. shifts. */
2053 return TARGET_SMALLCODE
? 2 : 3;
2057 /* We have a mul insn, so we can never take more than the mul and the
2058 read of the mac reg, but count more because of the latency and extra
2060 if (TARGET_SMALLCODE
)
2065 /* If we're aiming at small code, then just count the number of
2066 insns in a multiply call sequence. */
2067 if (TARGET_SMALLCODE
)
2070 /* Otherwise count all the insns in the routine we'd be calling too. */
2074 /* Compute a (partial) cost for rtx X. Return true if the complete
2075 cost has been computed, and false if subexpressions should be
2076 scanned. In either case, *TOTAL contains the cost result. */
2079 sh_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
2086 if (INTVAL (x
) == 0)
2088 else if (outer_code
== AND
&& and_operand ((x
), DImode
))
2090 else if ((outer_code
== IOR
|| outer_code
== XOR
2091 || outer_code
== PLUS
)
2092 && CONST_OK_FOR_I10 (INTVAL (x
)))
2094 else if (CONST_OK_FOR_I16 (INTVAL (x
)))
2095 *total
= COSTS_N_INSNS (outer_code
!= SET
);
2096 else if (CONST_OK_FOR_I16 (INTVAL (x
) >> 16))
2097 *total
= COSTS_N_INSNS ((outer_code
!= SET
) + 1);
2098 else if (CONST_OK_FOR_I16 ((INTVAL (x
) >> 16) >> 16))
2099 *total
= COSTS_N_INSNS (3);
2101 *total
= COSTS_N_INSNS (4);
2104 if (CONST_OK_FOR_I08 (INTVAL (x
)))
2106 else if ((outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
2107 && CONST_OK_FOR_K08 (INTVAL (x
)))
2116 if (TARGET_SHMEDIA64
)
2117 *total
= COSTS_N_INSNS (4);
2118 else if (TARGET_SHMEDIA32
)
2119 *total
= COSTS_N_INSNS (2);
2126 *total
= COSTS_N_INSNS (4);
2131 if (x
== CONST0_RTX (GET_MODE (x
)))
2133 else if (sh_1el_vec (x
, VOIDmode
))
2134 *total
= outer_code
!= SET
;
2135 if (sh_rep_vec (x
, VOIDmode
))
2136 *total
= ((GET_MODE_UNIT_SIZE (GET_MODE (x
)) + 3) / 4
2137 + (outer_code
!= SET
));
2138 *total
= COSTS_N_INSNS (3) + (outer_code
!= SET
);
2143 *total
= COSTS_N_INSNS (addsubcosts (x
));
2147 *total
= COSTS_N_INSNS (andcosts (x
));
2151 *total
= COSTS_N_INSNS (multcosts (x
));
2157 *total
= COSTS_N_INSNS (shiftcosts (x
));
2164 *total
= COSTS_N_INSNS (20);
2168 if (sh_1el_vec (x
, VOIDmode
))
2169 *total
= outer_code
!= SET
;
2170 if (sh_rep_vec (x
, VOIDmode
))
2171 *total
= ((GET_MODE_UNIT_SIZE (GET_MODE (x
)) + 3) / 4
2172 + (outer_code
!= SET
));
2173 *total
= COSTS_N_INSNS (3) + (outer_code
!= SET
);
2186 /* Compute the cost of an address. For the SH, all valid addresses are
2187 the same cost. Use a slightly higher cost for reg + reg addressing,
2188 since it increases pressure on r0. */
2191 sh_address_cost (rtx X
)
2193 return (GET_CODE (X
) == PLUS
2194 && ! CONSTANT_P (XEXP (X
, 1))
2195 && ! TARGET_SHMEDIA
? 1 : 0);
2198 /* Code to expand a shift. */
2201 gen_ashift (int type
, int n
, rtx reg
)
2203 /* Negative values here come from the shift_amounts array. */
2216 emit_insn (gen_ashrsi3_k (reg
, reg
, GEN_INT (n
)));
2220 emit_insn (gen_lshrsi3_m (reg
, reg
, GEN_INT (n
)));
2222 emit_insn (gen_lshrsi3_k (reg
, reg
, GEN_INT (n
)));
2225 emit_insn (gen_ashlsi3_std (reg
, reg
, GEN_INT (n
)));
2230 /* Same for HImode */
2233 gen_ashift_hi (int type
, int n
, rtx reg
)
2235 /* Negative values here come from the shift_amounts array. */
2249 /* We don't have HImode right shift operations because using the
2250 ordinary 32 bit shift instructions for that doesn't generate proper
2251 zero/sign extension.
2252 gen_ashift_hi is only called in contexts where we know that the
2253 sign extension works out correctly. */
2256 if (GET_CODE (reg
) == SUBREG
)
2258 offset
= SUBREG_BYTE (reg
);
2259 reg
= SUBREG_REG (reg
);
2261 gen_ashift (type
, n
, gen_rtx_SUBREG (SImode
, reg
, offset
));
2265 emit_insn (gen_ashlhi3_k (reg
, reg
, GEN_INT (n
)));
2270 /* Output RTL to split a constant shift into its component SH constant
2271 shift instructions. */
2274 gen_shifty_op (int code
, rtx
*operands
)
2276 int value
= INTVAL (operands
[2]);
2279 /* Truncate the shift count in case it is out of bounds. */
2280 value
= value
& 0x1f;
2284 if (code
== LSHIFTRT
)
2286 emit_insn (gen_rotlsi3_1 (operands
[0], operands
[0]));
2287 emit_insn (gen_movt (operands
[0]));
2290 else if (code
== ASHIFT
)
2292 /* There is a two instruction sequence for 31 bit left shifts,
2293 but it requires r0. */
2294 if (GET_CODE (operands
[0]) == REG
&& REGNO (operands
[0]) == 0)
2296 emit_insn (gen_andsi3 (operands
[0], operands
[0], const1_rtx
));
2297 emit_insn (gen_rotlsi3_31 (operands
[0], operands
[0]));
2302 else if (value
== 0)
2304 /* This can happen even when optimizing, if there were subregs before
2305 reload. Don't output a nop here, as this is never optimized away;
2306 use a no-op move instead. */
2307 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[0]));
2311 max
= shift_insns
[value
];
2312 for (i
= 0; i
< max
; i
++)
2313 gen_ashift (code
, shift_amounts
[value
][i
], operands
[0]);
2316 /* Same as above, but optimized for values where the topmost bits don't
2320 gen_shifty_hi_op (int code
, rtx
*operands
)
2322 int value
= INTVAL (operands
[2]);
2324 void (*gen_fun
) (int, int, rtx
);
2326 /* This operation is used by and_shl for SImode values with a few
2327 high bits known to be cleared. */
2331 emit_insn (gen_nop ());
2335 gen_fun
= GET_MODE (operands
[0]) == HImode
? gen_ashift_hi
: gen_ashift
;
2338 max
= ext_shift_insns
[value
];
2339 for (i
= 0; i
< max
; i
++)
2340 gen_fun (code
, ext_shift_amounts
[value
][i
], operands
[0]);
2343 /* When shifting right, emit the shifts in reverse order, so that
2344 solitary negative values come first. */
2345 for (i
= ext_shift_insns
[value
] - 1; i
>= 0; i
--)
2346 gen_fun (code
, ext_shift_amounts
[value
][i
], operands
[0]);
2349 /* Output RTL for an arithmetic right shift. */
2351 /* ??? Rewrite to use super-optimizer sequences. */
2354 expand_ashiftrt (rtx
*operands
)
2362 if (GET_CODE (operands
[2]) != CONST_INT
)
2364 rtx count
= copy_to_mode_reg (SImode
, operands
[2]);
2365 emit_insn (gen_negsi2 (count
, count
));
2366 emit_insn (gen_ashrsi3_d (operands
[0], operands
[1], count
));
2369 else if (ashiftrt_insns
[INTVAL (operands
[2]) & 31]
2370 > 1 + SH_DYNAMIC_SHIFT_COST
)
2373 = force_reg (SImode
, GEN_INT (- (INTVAL (operands
[2]) & 31)));
2374 emit_insn (gen_ashrsi3_d (operands
[0], operands
[1], count
));
2378 if (GET_CODE (operands
[2]) != CONST_INT
)
2381 value
= INTVAL (operands
[2]) & 31;
2385 /* If we are called from abs expansion, arrange things so that we
2386 we can use a single MT instruction that doesn't clobber the source,
2387 if LICM can hoist out the load of the constant zero. */
2388 if (currently_expanding_to_rtl
)
2390 emit_insn (gen_cmpgtsi_t (force_reg (SImode
, CONST0_RTX (SImode
)),
2392 emit_insn (gen_mov_neg_si_t (operands
[0]));
2395 emit_insn (gen_ashrsi2_31 (operands
[0], operands
[1]));
2398 else if (value
>= 16 && value
<= 19)
2400 wrk
= gen_reg_rtx (SImode
);
2401 emit_insn (gen_ashrsi2_16 (wrk
, operands
[1]));
2404 gen_ashift (ASHIFTRT
, 1, wrk
);
2405 emit_move_insn (operands
[0], wrk
);
2408 /* Expand a short sequence inline, longer call a magic routine. */
2409 else if (value
<= 5)
2411 wrk
= gen_reg_rtx (SImode
);
2412 emit_move_insn (wrk
, operands
[1]);
2414 gen_ashift (ASHIFTRT
, 1, wrk
);
2415 emit_move_insn (operands
[0], wrk
);
2419 wrk
= gen_reg_rtx (Pmode
);
2421 /* Load the value into an arg reg and call a helper. */
2422 emit_move_insn (gen_rtx_REG (SImode
, 4), operands
[1]);
2423 sprintf (func
, "__ashiftrt_r4_%d", value
);
2424 function_symbol (wrk
, func
, SFUNC_STATIC
);
2425 emit_insn (gen_ashrsi3_n (GEN_INT (value
), wrk
));
2426 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 4));
2431 sh_dynamicalize_shift_p (rtx count
)
2433 return shift_insns
[INTVAL (count
)] > 1 + SH_DYNAMIC_SHIFT_COST
;
2436 /* Try to find a good way to implement the combiner pattern
2437 [(set (match_operand:SI 0 "register_operand" "r")
2438 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2439 (match_operand:SI 2 "const_int_operand" "n"))
2440 (match_operand:SI 3 "const_int_operand" "n"))) .
2441 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2442 return 0 for simple right / left or left/right shift combination.
2443 return 1 for a combination of shifts with zero_extend.
2444 return 2 for a combination of shifts with an AND that needs r0.
2445 return 3 for a combination of shifts with an AND that needs an extra
2446 scratch register, when the three highmost bits of the AND mask are clear.
2447 return 4 for a combination of shifts with an AND that needs an extra
2448 scratch register, when any of the three highmost bits of the AND mask
2450 If ATTRP is set, store an initial right shift width in ATTRP[0],
2451 and the instruction length in ATTRP[1] . These values are not valid
2453 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2454 shift_amounts for the last shift value that is to be used before the
2457 shl_and_kind (rtx left_rtx
, rtx mask_rtx
, int *attrp
)
2459 unsigned HOST_WIDE_INT mask
, lsb
, mask2
, lsb2
;
2460 int left
= INTVAL (left_rtx
), right
;
2462 int cost
, best_cost
= 10000;
2463 int best_right
= 0, best_len
= 0;
2467 if (left
< 0 || left
> 31)
2469 if (GET_CODE (mask_rtx
) == CONST_INT
)
2470 mask
= (unsigned HOST_WIDE_INT
) INTVAL (mask_rtx
) >> left
;
2472 mask
= (unsigned HOST_WIDE_INT
) GET_MODE_MASK (SImode
) >> left
;
2473 /* Can this be expressed as a right shift / left shift pair? */
2474 lsb
= ((mask
^ (mask
- 1)) >> 1) + 1;
2475 right
= exact_log2 (lsb
);
2476 mask2
= ~(mask
+ lsb
- 1);
2477 lsb2
= ((mask2
^ (mask2
- 1)) >> 1) + 1;
2478 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2480 best_cost
= shift_insns
[right
] + shift_insns
[right
+ left
];
2481 /* mask has no trailing zeroes <==> ! right */
2482 else if (! right
&& mask2
== ~(lsb2
- 1))
2484 int late_right
= exact_log2 (lsb2
);
2485 best_cost
= shift_insns
[left
+ late_right
] + shift_insns
[late_right
];
2487 /* Try to use zero extend. */
2488 if (mask2
== ~(lsb2
- 1))
2492 for (width
= 8; width
<= 16; width
+= 8)
2494 /* Can we zero-extend right away? */
2495 if (lsb2
== (unsigned HOST_WIDE_INT
) 1 << width
)
2498 = 1 + ext_shift_insns
[right
] + ext_shift_insns
[left
+ right
];
2499 if (cost
< best_cost
)
2510 /* ??? Could try to put zero extend into initial right shift,
2511 or even shift a bit left before the right shift. */
2512 /* Determine value of first part of left shift, to get to the
2513 zero extend cut-off point. */
2514 first
= width
- exact_log2 (lsb2
) + right
;
2515 if (first
>= 0 && right
+ left
- first
>= 0)
2517 cost
= ext_shift_insns
[right
] + ext_shift_insns
[first
] + 1
2518 + ext_shift_insns
[right
+ left
- first
];
2519 if (cost
< best_cost
)
2531 /* Try to use r0 AND pattern */
2532 for (i
= 0; i
<= 2; i
++)
2536 if (! CONST_OK_FOR_K08 (mask
>> i
))
2538 cost
= (i
!= 0) + 2 + ext_shift_insns
[left
+ i
];
2539 if (cost
< best_cost
)
2544 best_len
= cost
- 1;
2547 /* Try to use a scratch register to hold the AND operand. */
2548 can_ext
= ((mask
<< left
) & ((unsigned HOST_WIDE_INT
) 3 << 30)) == 0;
2549 for (i
= 0; i
<= 2; i
++)
2553 cost
= (i
!= 0) + (CONST_OK_FOR_I08 (mask
>> i
) ? 2 : 3)
2554 + (can_ext
? ext_shift_insns
: shift_insns
)[left
+ i
];
2555 if (cost
< best_cost
)
2560 best_len
= cost
- 1 - ! CONST_OK_FOR_I08 (mask
>> i
);
2566 attrp
[0] = best_right
;
2567 attrp
[1] = best_len
;
2572 /* This is used in length attributes of the unnamed instructions
2573 corresponding to shl_and_kind return values of 1 and 2. */
2575 shl_and_length (rtx insn
)
2577 rtx set_src
, left_rtx
, mask_rtx
;
2580 set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2581 left_rtx
= XEXP (XEXP (set_src
, 0), 1);
2582 mask_rtx
= XEXP (set_src
, 1);
2583 shl_and_kind (left_rtx
, mask_rtx
, attributes
);
2584 return attributes
[1];
2587 /* This is used in length attribute of the and_shl_scratch instruction. */
2590 shl_and_scr_length (rtx insn
)
2592 rtx set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2593 int len
= shift_insns
[INTVAL (XEXP (set_src
, 1))];
2594 rtx op
= XEXP (set_src
, 0);
2595 len
+= shift_insns
[INTVAL (XEXP (op
, 1))] + 1;
2596 op
= XEXP (XEXP (op
, 0), 0);
2597 return len
+ shift_insns
[INTVAL (XEXP (op
, 1))];
2600 /* Generate rtl for instructions for which shl_and_kind advised a particular
2601 method of generating them, i.e. returned zero. */
2604 gen_shl_and (rtx dest
, rtx left_rtx
, rtx mask_rtx
, rtx source
)
2607 unsigned HOST_WIDE_INT mask
;
2608 int kind
= shl_and_kind (left_rtx
, mask_rtx
, attributes
);
2609 int right
, total_shift
;
2610 void (*shift_gen_fun
) (int, rtx
*) = gen_shifty_hi_op
;
2612 right
= attributes
[0];
2613 total_shift
= INTVAL (left_rtx
) + right
;
2614 mask
= (unsigned HOST_WIDE_INT
) INTVAL (mask_rtx
) >> total_shift
;
2621 int first
= attributes
[2];
2626 emit_insn ((mask
<< right
) <= 0xff
2627 ? gen_zero_extendqisi2 (dest
,
2628 gen_lowpart (QImode
, source
))
2629 : gen_zero_extendhisi2 (dest
,
2630 gen_lowpart (HImode
, source
)));
2634 emit_insn (gen_movsi (dest
, source
));
2638 operands
[2] = GEN_INT (right
);
2639 gen_shifty_hi_op (LSHIFTRT
, operands
);
2643 operands
[2] = GEN_INT (first
);
2644 gen_shifty_hi_op (ASHIFT
, operands
);
2645 total_shift
-= first
;
2649 emit_insn (mask
<= 0xff
2650 ? gen_zero_extendqisi2 (dest
, gen_lowpart (QImode
, dest
))
2651 : gen_zero_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2652 if (total_shift
> 0)
2654 operands
[2] = GEN_INT (total_shift
);
2655 gen_shifty_hi_op (ASHIFT
, operands
);
2660 shift_gen_fun
= gen_shifty_op
;
2662 /* If the topmost bit that matters is set, set the topmost bits
2663 that don't matter. This way, we might be able to get a shorter
2665 if (mask
& ((HOST_WIDE_INT
) 1 << (31 - total_shift
)))
2666 mask
|= (HOST_WIDE_INT
) ~0 << (31 - total_shift
);
2668 /* Don't expand fine-grained when combining, because that will
2669 make the pattern fail. */
2670 if (currently_expanding_to_rtl
2671 || reload_in_progress
|| reload_completed
)
2675 /* Cases 3 and 4 should be handled by this split
2676 only while combining */
2677 gcc_assert (kind
<= 2);
2680 emit_insn (gen_lshrsi3 (dest
, source
, GEN_INT (right
)));
2683 emit_insn (gen_andsi3 (dest
, source
, GEN_INT (mask
)));
2688 operands
[2] = GEN_INT (total_shift
);
2689 shift_gen_fun (ASHIFT
, operands
);
2696 if (kind
!= 4 && total_shift
< 16)
2698 neg
= -ext_shift_amounts
[total_shift
][1];
2700 neg
-= ext_shift_amounts
[total_shift
][2];
2704 emit_insn (gen_and_shl_scratch (dest
, source
,
2707 GEN_INT (total_shift
+ neg
),
2709 emit_insn (gen_movsi (dest
, dest
));
2716 /* Try to find a good way to implement the combiner pattern
2717 [(set (match_operand:SI 0 "register_operand" "=r")
2718 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2719 (match_operand:SI 2 "const_int_operand" "n")
2720 (match_operand:SI 3 "const_int_operand" "n")
2722 (clobber (reg:SI T_REG))]
2723 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2724 return 0 for simple left / right shift combination.
2725 return 1 for left shift / 8 bit sign extend / left shift.
2726 return 2 for left shift / 16 bit sign extend / left shift.
2727 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2728 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2729 return 5 for left shift / 16 bit sign extend / right shift
2730 return 6 for < 8 bit sign extend / left shift.
2731 return 7 for < 8 bit sign extend / left shift / single right shift.
2732 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2735 shl_sext_kind (rtx left_rtx
, rtx size_rtx
, int *costp
)
2737 int left
, size
, insize
, ext
;
2738 int cost
= 0, best_cost
;
2741 left
= INTVAL (left_rtx
);
2742 size
= INTVAL (size_rtx
);
2743 insize
= size
- left
;
2744 gcc_assert (insize
> 0);
2745 /* Default to left / right shift. */
2747 best_cost
= shift_insns
[32 - insize
] + ashiftrt_insns
[32 - size
];
2750 /* 16 bit shift / sign extend / 16 bit shift */
2751 cost
= shift_insns
[16 - insize
] + 1 + ashiftrt_insns
[16 - size
];
2752 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2753 below, by alternative 3 or something even better. */
2754 if (cost
< best_cost
)
2760 /* Try a plain sign extend between two shifts. */
2761 for (ext
= 16; ext
>= insize
; ext
-= 8)
2765 cost
= ext_shift_insns
[ext
- insize
] + 1 + shift_insns
[size
- ext
];
2766 if (cost
< best_cost
)
2768 kind
= ext
/ (unsigned) 8;
2772 /* Check if we can do a sloppy shift with a final signed shift
2773 restoring the sign. */
2774 if (EXT_SHIFT_SIGNED (size
- ext
))
2775 cost
= ext_shift_insns
[ext
- insize
] + ext_shift_insns
[size
- ext
] + 1;
2776 /* If not, maybe it's still cheaper to do the second shift sloppy,
2777 and do a final sign extend? */
2778 else if (size
<= 16)
2779 cost
= ext_shift_insns
[ext
- insize
] + 1
2780 + ext_shift_insns
[size
> ext
? size
- ext
: ext
- size
] + 1;
2783 if (cost
< best_cost
)
2785 kind
= ext
/ (unsigned) 8 + 2;
2789 /* Check if we can sign extend in r0 */
2792 cost
= 3 + shift_insns
[left
];
2793 if (cost
< best_cost
)
2798 /* Try the same with a final signed shift. */
2801 cost
= 3 + ext_shift_insns
[left
+ 1] + 1;
2802 if (cost
< best_cost
)
2811 /* Try to use a dynamic shift. */
2812 cost
= shift_insns
[32 - insize
] + 1 + SH_DYNAMIC_SHIFT_COST
;
2813 if (cost
< best_cost
)
2824 /* Function to be used in the length attribute of the instructions
2825 implementing this pattern. */
2828 shl_sext_length (rtx insn
)
2830 rtx set_src
, left_rtx
, size_rtx
;
2833 set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2834 left_rtx
= XEXP (XEXP (set_src
, 0), 1);
2835 size_rtx
= XEXP (set_src
, 1);
2836 shl_sext_kind (left_rtx
, size_rtx
, &cost
);
2840 /* Generate rtl for this pattern */
2843 gen_shl_sext (rtx dest
, rtx left_rtx
, rtx size_rtx
, rtx source
)
2846 int left
, size
, insize
, cost
;
2849 kind
= shl_sext_kind (left_rtx
, size_rtx
, &cost
);
2850 left
= INTVAL (left_rtx
);
2851 size
= INTVAL (size_rtx
);
2852 insize
= size
- left
;
2860 int ext
= kind
& 1 ? 8 : 16;
2861 int shift2
= size
- ext
;
2863 /* Don't expand fine-grained when combining, because that will
2864 make the pattern fail. */
2865 if (! currently_expanding_to_rtl
2866 && ! reload_in_progress
&& ! reload_completed
)
2868 emit_insn (gen_shl_sext_ext (dest
, source
, left_rtx
, size_rtx
));
2869 emit_insn (gen_movsi (dest
, source
));
2873 emit_insn (gen_movsi (dest
, source
));
2877 operands
[2] = GEN_INT (ext
- insize
);
2878 gen_shifty_hi_op (ASHIFT
, operands
);
2881 ? gen_extendqisi2 (dest
, gen_lowpart (QImode
, dest
))
2882 : gen_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2887 operands
[2] = GEN_INT (shift2
);
2888 gen_shifty_op (ASHIFT
, operands
);
2895 if (EXT_SHIFT_SIGNED (shift2
))
2897 operands
[2] = GEN_INT (shift2
+ 1);
2898 gen_shifty_op (ASHIFT
, operands
);
2899 operands
[2] = const1_rtx
;
2900 gen_shifty_op (ASHIFTRT
, operands
);
2903 operands
[2] = GEN_INT (shift2
);
2904 gen_shifty_hi_op (ASHIFT
, operands
);
2908 operands
[2] = GEN_INT (-shift2
);
2909 gen_shifty_hi_op (LSHIFTRT
, operands
);
2911 emit_insn (size
<= 8
2912 ? gen_extendqisi2 (dest
, gen_lowpart (QImode
, dest
))
2913 : gen_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2920 if (! currently_expanding_to_rtl
2921 && ! reload_in_progress
&& ! reload_completed
)
2922 emit_insn (gen_shl_sext_ext (dest
, source
, left_rtx
, size_rtx
));
2926 operands
[2] = GEN_INT (16 - insize
);
2927 gen_shifty_hi_op (ASHIFT
, operands
);
2928 emit_insn (gen_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2930 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2932 gen_ashift (ASHIFTRT
, 1, dest
);
2937 /* Don't expand fine-grained when combining, because that will
2938 make the pattern fail. */
2939 if (! currently_expanding_to_rtl
2940 && ! reload_in_progress
&& ! reload_completed
)
2942 emit_insn (gen_shl_sext_ext (dest
, source
, left_rtx
, size_rtx
));
2943 emit_insn (gen_movsi (dest
, source
));
2946 emit_insn (gen_andsi3 (dest
, source
, GEN_INT ((1 << insize
) - 1)));
2947 emit_insn (gen_xorsi3 (dest
, dest
, GEN_INT (1 << (insize
- 1))));
2948 emit_insn (gen_addsi3 (dest
, dest
, GEN_INT (-1 << (insize
- 1))));
2950 operands
[2] = kind
== 7 ? GEN_INT (left
+ 1) : left_rtx
;
2951 gen_shifty_op (ASHIFT
, operands
);
2953 emit_insn (gen_ashrsi3_k (dest
, dest
, const1_rtx
));
2961 /* Prefix a symbol_ref name with "datalabel". */
2964 gen_datalabel_ref (rtx sym
)
2968 if (GET_CODE (sym
) == LABEL_REF
)
2969 return gen_rtx_CONST (GET_MODE (sym
),
2970 gen_rtx_UNSPEC (GET_MODE (sym
),
2974 gcc_assert (GET_CODE (sym
) == SYMBOL_REF
);
2976 str
= XSTR (sym
, 0);
2977 /* Share all SYMBOL_REF strings with the same value - that is important
2979 str
= IDENTIFIER_POINTER (get_identifier (str
));
2980 XSTR (sym
, 0) = str
;
2986 /* The SH cannot load a large constant into a register, constants have to
2987 come from a pc relative load. The reference of a pc relative load
2988 instruction must be less than 1k in front of the instruction. This
2989 means that we often have to dump a constant inside a function, and
2990 generate code to branch around it.
2992 It is important to minimize this, since the branches will slow things
2993 down and make things bigger.
2995 Worst case code looks like:
3013 We fix this by performing a scan before scheduling, which notices which
3014 instructions need to have their operands fetched from the constant table
3015 and builds the table.
3019 scan, find an instruction which needs a pcrel move. Look forward, find the
3020 last barrier which is within MAX_COUNT bytes of the requirement.
3021 If there isn't one, make one. Process all the instructions between
3022 the find and the barrier.
3024 In the above example, we can tell that L3 is within 1k of L1, so
3025 the first move can be shrunk from the 3 insn+constant sequence into
3026 just 1 insn, and the constant moved to L3 to make:
3037 Then the second move becomes the target for the shortening process. */
3041 rtx value
; /* Value in table. */
3042 rtx label
; /* Label of value. */
3043 rtx wend
; /* End of window. */
3044 enum machine_mode mode
; /* Mode of value. */
3046 /* True if this constant is accessed as part of a post-increment
3047 sequence. Note that HImode constants are never accessed in this way. */
3048 bool part_of_sequence_p
;
3051 /* The maximum number of constants that can fit into one pool, since
3052 constants in the range 0..510 are at least 2 bytes long, and in the
3053 range from there to 1018 at least 4 bytes. */
3055 #define MAX_POOL_SIZE 372
3056 static pool_node pool_vector
[MAX_POOL_SIZE
];
3057 static int pool_size
;
3058 static rtx pool_window_label
;
3059 static int pool_window_last
;
3061 /* ??? If we need a constant in HImode which is the truncated value of a
3062 constant we need in SImode, we could combine the two entries thus saving
3063 two bytes. Is this common enough to be worth the effort of implementing
3066 /* ??? This stuff should be done at the same time that we shorten branches.
3067 As it is now, we must assume that all branches are the maximum size, and
3068 this causes us to almost always output constant pools sooner than
3071 /* Add a constant to the pool and return its label. */
3074 add_constant (rtx x
, enum machine_mode mode
, rtx last_value
)
3077 rtx lab
, new, ref
, newref
;
3079 /* First see if we've already got it. */
3080 for (i
= 0; i
< pool_size
; i
++)
3082 if (x
->code
== pool_vector
[i
].value
->code
3083 && mode
== pool_vector
[i
].mode
)
3085 if (x
->code
== CODE_LABEL
)
3087 if (XINT (x
, 3) != XINT (pool_vector
[i
].value
, 3))
3090 if (rtx_equal_p (x
, pool_vector
[i
].value
))
3095 || ! rtx_equal_p (last_value
, pool_vector
[i
-1].value
))
3097 new = gen_label_rtx ();
3098 LABEL_REFS (new) = pool_vector
[i
].label
;
3099 pool_vector
[i
].label
= lab
= new;
3101 if (lab
&& pool_window_label
)
3103 newref
= gen_rtx_LABEL_REF (VOIDmode
, pool_window_label
);
3104 ref
= pool_vector
[pool_window_last
].wend
;
3105 LABEL_NEXTREF (newref
) = ref
;
3106 pool_vector
[pool_window_last
].wend
= newref
;
3109 pool_window_label
= new;
3110 pool_window_last
= i
;
3116 /* Need a new one. */
3117 pool_vector
[pool_size
].value
= x
;
3118 if (last_value
&& rtx_equal_p (last_value
, pool_vector
[pool_size
- 1].value
))
3121 pool_vector
[pool_size
- 1].part_of_sequence_p
= true;
3124 lab
= gen_label_rtx ();
3125 pool_vector
[pool_size
].mode
= mode
;
3126 pool_vector
[pool_size
].label
= lab
;
3127 pool_vector
[pool_size
].wend
= NULL_RTX
;
3128 pool_vector
[pool_size
].part_of_sequence_p
= (lab
== 0);
3129 if (lab
&& pool_window_label
)
3131 newref
= gen_rtx_LABEL_REF (VOIDmode
, pool_window_label
);
3132 ref
= pool_vector
[pool_window_last
].wend
;
3133 LABEL_NEXTREF (newref
) = ref
;
3134 pool_vector
[pool_window_last
].wend
= newref
;
3137 pool_window_label
= lab
;
3138 pool_window_last
= pool_size
;
3143 /* Output the literal table. START, if nonzero, is the first instruction
3144 this table is needed for, and also indicates that there is at least one
3145 casesi_worker_2 instruction; We have to emit the operand3 labels from
3146 these insns at a 4-byte aligned position. BARRIER is the barrier
3147 after which we are to place the table. */
3150 dump_table (rtx start
, rtx barrier
)
3158 /* Do two passes, first time dump out the HI sized constants. */
3160 for (i
= 0; i
< pool_size
; i
++)
3162 pool_node
*p
= &pool_vector
[i
];
3164 if (p
->mode
== HImode
)
3168 scan
= emit_insn_after (gen_align_2 (), scan
);
3171 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3172 scan
= emit_label_after (lab
, scan
);
3173 scan
= emit_insn_after (gen_consttable_2 (p
->value
, const0_rtx
),
3175 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3177 lab
= XEXP (ref
, 0);
3178 scan
= emit_insn_after (gen_consttable_window_end (lab
), scan
);
3181 else if (p
->mode
== DFmode
)
3189 scan
= emit_insn_after (gen_align_4 (), scan
);
3191 for (; start
!= barrier
; start
= NEXT_INSN (start
))
3192 if (GET_CODE (start
) == INSN
3193 && recog_memoized (start
) == CODE_FOR_casesi_worker_2
)
3195 rtx src
= SET_SRC (XVECEXP (PATTERN (start
), 0, 0));
3196 rtx lab
= XEXP (XVECEXP (src
, 0, 3), 0);
3198 scan
= emit_label_after (lab
, scan
);
3201 if (TARGET_FMOVD
&& TARGET_ALIGN_DOUBLE
&& have_df
)
3203 rtx align_insn
= NULL_RTX
;
3205 scan
= emit_label_after (gen_label_rtx (), scan
);
3206 scan
= emit_insn_after (gen_align_log (GEN_INT (3)), scan
);
3209 for (i
= 0; i
< pool_size
; i
++)
3211 pool_node
*p
= &pool_vector
[i
];
3219 if (align_insn
&& !p
->part_of_sequence_p
)
3221 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3222 emit_label_before (lab
, align_insn
);
3223 emit_insn_before (gen_consttable_4 (p
->value
, const0_rtx
),
3225 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3227 lab
= XEXP (ref
, 0);
3228 emit_insn_before (gen_consttable_window_end (lab
),
3231 delete_insn (align_insn
);
3232 align_insn
= NULL_RTX
;
3237 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3238 scan
= emit_label_after (lab
, scan
);
3239 scan
= emit_insn_after (gen_consttable_4 (p
->value
,
3241 need_align
= ! need_align
;
3247 scan
= emit_insn_after (gen_align_log (GEN_INT (3)), scan
);
3252 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3253 scan
= emit_label_after (lab
, scan
);
3254 scan
= emit_insn_after (gen_consttable_8 (p
->value
, const0_rtx
),
3261 if (p
->mode
!= HImode
)
3263 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3265 lab
= XEXP (ref
, 0);
3266 scan
= emit_insn_after (gen_consttable_window_end (lab
),
3275 for (i
= 0; i
< pool_size
; i
++)
3277 pool_node
*p
= &pool_vector
[i
];
3288 scan
= emit_label_after (gen_label_rtx (), scan
);
3289 scan
= emit_insn_after (gen_align_4 (), scan
);
3291 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3292 scan
= emit_label_after (lab
, scan
);
3293 scan
= emit_insn_after (gen_consttable_4 (p
->value
, const0_rtx
),
3301 scan
= emit_label_after (gen_label_rtx (), scan
);
3302 scan
= emit_insn_after (gen_align_4 (), scan
);
3304 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3305 scan
= emit_label_after (lab
, scan
);
3306 scan
= emit_insn_after (gen_consttable_8 (p
->value
, const0_rtx
),
3313 if (p
->mode
!= HImode
)
3315 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3317 lab
= XEXP (ref
, 0);
3318 scan
= emit_insn_after (gen_consttable_window_end (lab
), scan
);
3323 scan
= emit_insn_after (gen_consttable_end (), scan
);
3324 scan
= emit_barrier_after (scan
);
3326 pool_window_label
= NULL_RTX
;
3327 pool_window_last
= 0;
3330 /* Return nonzero if constant would be an ok source for a
3331 mov.w instead of a mov.l. */
3336 return (GET_CODE (src
) == CONST_INT
3337 && INTVAL (src
) >= -32768
3338 && INTVAL (src
) <= 32767);
3341 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3343 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3344 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3345 need to fix it if the input value is CONST_OK_FOR_I08. */
3348 broken_move (rtx insn
)
3350 if (GET_CODE (insn
) == INSN
)
3352 rtx pat
= PATTERN (insn
);
3353 if (GET_CODE (pat
) == PARALLEL
)
3354 pat
= XVECEXP (pat
, 0, 0);
3355 if (GET_CODE (pat
) == SET
3356 /* We can load any 8 bit value if we don't care what the high
3357 order bits end up as. */
3358 && GET_MODE (SET_DEST (pat
)) != QImode
3359 && (CONSTANT_P (SET_SRC (pat
))
3360 /* Match mova_const. */
3361 || (GET_CODE (SET_SRC (pat
)) == UNSPEC
3362 && XINT (SET_SRC (pat
), 1) == UNSPEC_MOVA
3363 && GET_CODE (XVECEXP (SET_SRC (pat
), 0, 0)) == CONST
))
3365 && GET_CODE (SET_SRC (pat
)) == CONST_DOUBLE
3366 && (fp_zero_operand (SET_SRC (pat
))
3367 || fp_one_operand (SET_SRC (pat
)))
3368 /* ??? If this is a -m4 or -m4-single compilation, in general
3369 we don't know the current setting of fpscr, so disable fldi.
3370 There is an exception if this was a register-register move
3371 before reload - and hence it was ascertained that we have
3372 single precision setting - and in a post-reload optimization
3373 we changed this to do a constant load. In that case
3374 we don't have an r0 clobber, hence we must use fldi. */
3375 && (! TARGET_SH4
|| TARGET_FMOVD
3376 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn
), 0, 2), 0))
3378 && GET_CODE (SET_DEST (pat
)) == REG
3379 && FP_REGISTER_P (REGNO (SET_DEST (pat
))))
3381 && GET_MODE (SET_DEST (pat
)) == SImode
3382 && GET_CODE (SET_SRC (pat
)) == CONST_INT
3383 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat
))))
3384 && (GET_CODE (SET_SRC (pat
)) != CONST_INT
3385 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat
)))))
3395 return (GET_CODE (insn
) == INSN
3396 && GET_CODE (PATTERN (insn
)) == SET
3397 && GET_CODE (SET_SRC (PATTERN (insn
))) == UNSPEC
3398 && XINT (SET_SRC (PATTERN (insn
)), 1) == UNSPEC_MOVA
3399 /* Don't match mova_const. */
3400 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn
)), 0, 0)) == LABEL_REF
);
3403 /* Fix up a mova from a switch that went out of range. */
3405 fixup_mova (rtx mova
)
3409 SET_SRC (PATTERN (mova
)) = XVECEXP (SET_SRC (PATTERN (mova
)), 0, 0);
3410 INSN_CODE (mova
) = -1;
3415 rtx lab
= gen_label_rtx ();
3416 rtx wpat
, wpat0
, wpat1
, wsrc
, diff
;
3420 worker
= NEXT_INSN (worker
);
3422 && GET_CODE (worker
) != CODE_LABEL
3423 && GET_CODE (worker
) != JUMP_INSN
);
3424 } while (GET_CODE (worker
) == NOTE
3425 || recog_memoized (worker
) != CODE_FOR_casesi_worker_1
);
3426 wpat
= PATTERN (worker
);
3427 wpat0
= XVECEXP (wpat
, 0, 0);
3428 wpat1
= XVECEXP (wpat
, 0, 1);
3429 wsrc
= SET_SRC (wpat0
);
3430 PATTERN (worker
) = (gen_casesi_worker_2
3431 (SET_DEST (wpat0
), XVECEXP (wsrc
, 0, 1),
3432 XEXP (XVECEXP (wsrc
, 0, 2), 0), lab
,
3434 INSN_CODE (worker
) = -1;
3435 diff
= gen_rtx_MINUS (Pmode
, XVECEXP (SET_SRC (PATTERN (mova
)), 0, 0),
3436 gen_rtx_LABEL_REF (Pmode
, lab
));
3437 diff
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, diff
), UNSPEC_PIC
);
3438 SET_SRC (PATTERN (mova
)) = gen_rtx_CONST (Pmode
, diff
);
3439 INSN_CODE (mova
) = -1;
3443 /* Find the last barrier from insn FROM which is close enough to hold the
3444 constant pool. If we can't find one, then create one near the end of
3448 find_barrier (int num_mova
, rtx mova
, rtx from
)
3457 int leading_mova
= num_mova
;
3458 rtx barrier_before_mova
= 0, found_barrier
= 0, good_barrier
= 0;
3462 /* For HImode: range is 510, add 4 because pc counts from address of
3463 second instruction after this one, subtract 2 for the jump instruction
3464 that we may need to emit before the table, subtract 2 for the instruction
3465 that fills the jump delay slot (in very rare cases, reorg will take an
3466 instruction from after the constant pool or will leave the delay slot
3467 empty). This gives 510.
3468 For SImode: range is 1020, add 4 because pc counts from address of
3469 second instruction after this one, subtract 2 in case pc is 2 byte
3470 aligned, subtract 2 for the jump instruction that we may need to emit
3471 before the table, subtract 2 for the instruction that fills the jump
3472 delay slot. This gives 1018. */
3474 /* The branch will always be shortened now that the reference address for
3475 forward branches is the successor address, thus we need no longer make
3476 adjustments to the [sh]i_limit for -O0. */
3481 while (from
&& count_si
< si_limit
&& count_hi
< hi_limit
)
3483 int inc
= get_attr_length (from
);
3486 if (GET_CODE (from
) == CODE_LABEL
)
3489 new_align
= 1 << label_to_alignment (from
);
3490 else if (GET_CODE (prev_nonnote_insn (from
)) == BARRIER
)
3491 new_align
= 1 << barrier_align (from
);
3497 if (GET_CODE (from
) == BARRIER
)
3500 found_barrier
= from
;
3502 /* If we are at the end of the function, or in front of an alignment
3503 instruction, we need not insert an extra alignment. We prefer
3504 this kind of barrier. */
3505 if (barrier_align (from
) > 2)
3506 good_barrier
= from
;
3509 if (broken_move (from
))
3512 enum machine_mode mode
;
3514 pat
= PATTERN (from
);
3515 if (GET_CODE (pat
) == PARALLEL
)
3516 pat
= XVECEXP (pat
, 0, 0);
3517 src
= SET_SRC (pat
);
3518 dst
= SET_DEST (pat
);
3519 mode
= GET_MODE (dst
);
3521 /* We must explicitly check the mode, because sometimes the
3522 front end will generate code to load unsigned constants into
3523 HImode targets without properly sign extending them. */
3525 || (mode
== SImode
&& hi_const (src
) && REGNO (dst
) != FPUL_REG
))
3528 /* We put the short constants before the long constants, so
3529 we must count the length of short constants in the range
3530 for the long constants. */
3531 /* ??? This isn't optimal, but is easy to do. */
3536 /* We dump DF/DI constants before SF/SI ones, because
3537 the limit is the same, but the alignment requirements
3538 are higher. We may waste up to 4 additional bytes
3539 for alignment, and the DF/DI constant may have
3540 another SF/SI constant placed before it. */
3541 if (TARGET_SHCOMPACT
3543 && (mode
== DFmode
|| mode
== DImode
))
3548 while (si_align
> 2 && found_si
+ si_align
- 2 > count_si
)
3550 if (found_si
> count_si
)
3551 count_si
= found_si
;
3552 found_si
+= GET_MODE_SIZE (mode
);
3554 si_limit
-= GET_MODE_SIZE (mode
);
3564 barrier_before_mova
= good_barrier
? good_barrier
: found_barrier
;
3566 if (found_si
> count_si
)
3567 count_si
= found_si
;
3569 else if (GET_CODE (from
) == JUMP_INSN
3570 && (GET_CODE (PATTERN (from
)) == ADDR_VEC
3571 || GET_CODE (PATTERN (from
)) == ADDR_DIFF_VEC
))
3575 if (barrier_align (next_real_insn (from
)) == align_jumps_log
)
3577 /* We have just passed the barrier in front of the
3578 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3579 the ADDR_DIFF_VEC is accessed as data, just like our pool
3580 constants, this is a good opportunity to accommodate what
3581 we have gathered so far.
3582 If we waited any longer, we could end up at a barrier in
3583 front of code, which gives worse cache usage for separated
3584 instruction / data caches. */
3585 good_barrier
= found_barrier
;
3590 rtx body
= PATTERN (from
);
3591 inc
= XVECLEN (body
, 1) * GET_MODE_SIZE (GET_MODE (body
));
3594 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3595 else if (GET_CODE (from
) == JUMP_INSN
3597 && ! TARGET_SMALLCODE
)
3603 if (new_align
> si_align
)
3605 si_limit
-= (count_si
- 1) & (new_align
- si_align
);
3606 si_align
= new_align
;
3608 count_si
= (count_si
+ new_align
- 1) & -new_align
;
3613 if (new_align
> hi_align
)
3615 hi_limit
-= (count_hi
- 1) & (new_align
- hi_align
);
3616 hi_align
= new_align
;
3618 count_hi
= (count_hi
+ new_align
- 1) & -new_align
;
3620 from
= NEXT_INSN (from
);
3627 /* Try as we might, the leading mova is out of range. Change
3628 it into a load (which will become a pcload) and retry. */
3630 return find_barrier (0, 0, mova
);
3634 /* Insert the constant pool table before the mova instruction,
3635 to prevent the mova label reference from going out of range. */
3637 good_barrier
= found_barrier
= barrier_before_mova
;
3643 if (good_barrier
&& next_real_insn (found_barrier
))
3644 found_barrier
= good_barrier
;
3648 /* We didn't find a barrier in time to dump our stuff,
3649 so we'll make one. */
3650 rtx label
= gen_label_rtx ();
3652 /* If we exceeded the range, then we must back up over the last
3653 instruction we looked at. Otherwise, we just need to undo the
3654 NEXT_INSN at the end of the loop. */
3655 if (count_hi
> hi_limit
|| count_si
> si_limit
)
3656 from
= PREV_INSN (PREV_INSN (from
));
3658 from
= PREV_INSN (from
);
3660 /* Walk back to be just before any jump or label.
3661 Putting it before a label reduces the number of times the branch
3662 around the constant pool table will be hit. Putting it before
3663 a jump makes it more likely that the bra delay slot will be
3665 while (GET_CODE (from
) == JUMP_INSN
|| GET_CODE (from
) == NOTE
3666 || GET_CODE (from
) == CODE_LABEL
)
3667 from
= PREV_INSN (from
);
3669 from
= emit_jump_insn_after (gen_jump (label
), from
);
3670 JUMP_LABEL (from
) = label
;
3671 LABEL_NUSES (label
) = 1;
3672 found_barrier
= emit_barrier_after (from
);
3673 emit_label_after (label
, found_barrier
);
3676 return found_barrier
;
3679 /* If the instruction INSN is implemented by a special function, and we can
3680 positively find the register that is used to call the sfunc, and this
3681 register is not used anywhere else in this instruction - except as the
3682 destination of a set, return this register; else, return 0. */
3684 sfunc_uses_reg (rtx insn
)
3687 rtx pattern
, part
, reg_part
, reg
;
3689 if (GET_CODE (insn
) != INSN
)
3691 pattern
= PATTERN (insn
);
3692 if (GET_CODE (pattern
) != PARALLEL
|| get_attr_type (insn
) != TYPE_SFUNC
)
3695 for (reg_part
= 0, i
= XVECLEN (pattern
, 0) - 1; i
>= 1; i
--)
3697 part
= XVECEXP (pattern
, 0, i
);
3698 if (GET_CODE (part
) == USE
&& GET_MODE (XEXP (part
, 0)) == SImode
)
3703 reg
= XEXP (reg_part
, 0);
3704 for (i
= XVECLEN (pattern
, 0) - 1; i
>= 0; i
--)
3706 part
= XVECEXP (pattern
, 0, i
);
3707 if (part
== reg_part
|| GET_CODE (part
) == CLOBBER
)
3709 if (reg_mentioned_p (reg
, ((GET_CODE (part
) == SET
3710 && GET_CODE (SET_DEST (part
)) == REG
)
3711 ? SET_SRC (part
) : part
)))
3717 /* See if the only way in which INSN uses REG is by calling it, or by
3718 setting it while calling it. Set *SET to a SET rtx if the register
3722 noncall_uses_reg (rtx reg
, rtx insn
, rtx
*set
)
3728 reg2
= sfunc_uses_reg (insn
);
3729 if (reg2
&& REGNO (reg2
) == REGNO (reg
))
3731 pattern
= single_set (insn
);
3733 && GET_CODE (SET_DEST (pattern
)) == REG
3734 && REGNO (reg
) == REGNO (SET_DEST (pattern
)))
3738 if (GET_CODE (insn
) != CALL_INSN
)
3740 /* We don't use rtx_equal_p because we don't care if the mode is
3742 pattern
= single_set (insn
);
3744 && GET_CODE (SET_DEST (pattern
)) == REG
3745 && REGNO (reg
) == REGNO (SET_DEST (pattern
)))
3751 par
= PATTERN (insn
);
3752 if (GET_CODE (par
) == PARALLEL
)
3753 for (i
= XVECLEN (par
, 0) - 1; i
>= 0; i
--)
3755 part
= XVECEXP (par
, 0, i
);
3756 if (GET_CODE (part
) != SET
&& reg_mentioned_p (reg
, part
))
3759 return reg_mentioned_p (reg
, SET_SRC (pattern
));
3765 pattern
= PATTERN (insn
);
3767 if (GET_CODE (pattern
) == PARALLEL
)
3771 for (i
= XVECLEN (pattern
, 0) - 1; i
>= 1; i
--)
3772 if (reg_mentioned_p (reg
, XVECEXP (pattern
, 0, i
)))
3774 pattern
= XVECEXP (pattern
, 0, 0);
3777 if (GET_CODE (pattern
) == SET
)
3779 if (reg_mentioned_p (reg
, SET_DEST (pattern
)))
3781 /* We don't use rtx_equal_p, because we don't care if the
3782 mode is different. */
3783 if (GET_CODE (SET_DEST (pattern
)) != REG
3784 || REGNO (reg
) != REGNO (SET_DEST (pattern
)))
3790 pattern
= SET_SRC (pattern
);
3793 if (GET_CODE (pattern
) != CALL
3794 || GET_CODE (XEXP (pattern
, 0)) != MEM
3795 || ! rtx_equal_p (reg
, XEXP (XEXP (pattern
, 0), 0)))
3801 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3802 general registers. Bits 0..15 mean that the respective registers
3803 are used as inputs in the instruction. Bits 16..31 mean that the
3804 registers 0..15, respectively, are used as outputs, or are clobbered.
3805 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3807 regs_used (rtx x
, int is_dest
)
3815 code
= GET_CODE (x
);
3820 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x
))) - 1)
3821 << (REGNO (x
) + is_dest
));
3825 rtx y
= SUBREG_REG (x
);
3827 if (GET_CODE (y
) != REG
)
3830 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x
))) - 1)
3832 subreg_regno_offset (REGNO (y
),
3835 GET_MODE (x
)) + is_dest
));
3839 return regs_used (SET_SRC (x
), 0) | regs_used (SET_DEST (x
), 16);
3841 /* If there was a return value, it must have been indicated with USE. */
3856 fmt
= GET_RTX_FORMAT (code
);
3858 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
3863 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3864 used
|= regs_used (XVECEXP (x
, i
, j
), is_dest
);
3866 else if (fmt
[i
] == 'e')
3867 used
|= regs_used (XEXP (x
, i
), is_dest
);
3872 /* Create an instruction that prevents redirection of a conditional branch
3873 to the destination of the JUMP with address ADDR.
3874 If the branch needs to be implemented as an indirect jump, try to find
3875 a scratch register for it.
3876 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3877 If any preceding insn that doesn't fit into a delay slot is good enough,
3878 pass 1. Pass 2 if a definite blocking insn is needed.
3879 -1 is used internally to avoid deep recursion.
3880 If a blocking instruction is made or recognized, return it. */
3883 gen_block_redirect (rtx jump
, int addr
, int need_block
)
3886 rtx prev
= prev_nonnote_insn (jump
);
3889 /* First, check if we already have an instruction that satisfies our need. */
3890 if (prev
&& GET_CODE (prev
) == INSN
&& ! INSN_DELETED_P (prev
))
3892 if (INSN_CODE (prev
) == CODE_FOR_indirect_jump_scratch
)
3894 if (GET_CODE (PATTERN (prev
)) == USE
3895 || GET_CODE (PATTERN (prev
)) == CLOBBER
3896 || get_attr_in_delay_slot (prev
) == IN_DELAY_SLOT_YES
)
3898 else if ((need_block
&= ~1) < 0)
3900 else if (recog_memoized (prev
) == CODE_FOR_block_branch_redirect
)
3903 if (GET_CODE (PATTERN (jump
)) == RETURN
)
3907 /* Reorg even does nasty things with return insns that cause branches
3908 to go out of range - see find_end_label and callers. */
3909 return emit_insn_before (gen_block_branch_redirect (const0_rtx
) , jump
);
3911 /* We can't use JUMP_LABEL here because it might be undefined
3912 when not optimizing. */
3913 dest
= XEXP (SET_SRC (PATTERN (jump
)), 0);
3914 /* If the branch is out of range, try to find a scratch register for it. */
3916 && (INSN_ADDRESSES (INSN_UID (dest
)) - addr
+ (unsigned) 4092
3920 /* Don't look for the stack pointer as a scratch register,
3921 it would cause trouble if an interrupt occurred. */
3922 unsigned try = 0x7fff, used
;
3923 int jump_left
= flag_expensive_optimizations
+ 1;
3925 /* It is likely that the most recent eligible instruction is wanted for
3926 the delay slot. Therefore, find out which registers it uses, and
3927 try to avoid using them. */
3929 for (scan
= jump
; (scan
= PREV_INSN (scan
)); )
3933 if (INSN_DELETED_P (scan
))
3935 code
= GET_CODE (scan
);
3936 if (code
== CODE_LABEL
|| code
== JUMP_INSN
)
3939 && GET_CODE (PATTERN (scan
)) != USE
3940 && GET_CODE (PATTERN (scan
)) != CLOBBER
3941 && get_attr_in_delay_slot (scan
) == IN_DELAY_SLOT_YES
)
3943 try &= ~regs_used (PATTERN (scan
), 0);
3947 for (used
= dead
= 0, scan
= JUMP_LABEL (jump
);
3948 (scan
= NEXT_INSN (scan
)); )
3952 if (INSN_DELETED_P (scan
))
3954 code
= GET_CODE (scan
);
3957 used
|= regs_used (PATTERN (scan
), 0);
3958 if (code
== CALL_INSN
)
3959 used
|= regs_used (CALL_INSN_FUNCTION_USAGE (scan
), 0);
3960 dead
|= (used
>> 16) & ~used
;
3966 if (code
== JUMP_INSN
)
3968 if (jump_left
-- && simplejump_p (scan
))
3969 scan
= JUMP_LABEL (scan
);
3975 /* Mask out the stack pointer again, in case it was
3976 the only 'free' register we have found. */
3979 /* If the immediate destination is still in range, check for possible
3980 threading with a jump beyond the delay slot insn.
3981 Don't check if we are called recursively; the jump has been or will be
3982 checked in a different invocation then. */
3984 else if (optimize
&& need_block
>= 0)
3986 rtx next
= next_active_insn (next_active_insn (dest
));
3987 if (next
&& GET_CODE (next
) == JUMP_INSN
3988 && GET_CODE (PATTERN (next
)) == SET
3989 && recog_memoized (next
) == CODE_FOR_jump_compact
)
3991 dest
= JUMP_LABEL (next
);
3993 && (INSN_ADDRESSES (INSN_UID (dest
)) - addr
+ (unsigned) 4092
3995 gen_block_redirect (next
, INSN_ADDRESSES (INSN_UID (next
)), -1);
4001 rtx reg
= gen_rtx_REG (SImode
, exact_log2 (dead
& -dead
));
4003 /* It would be nice if we could convert the jump into an indirect
4004 jump / far branch right now, and thus exposing all constituent
4005 instructions to further optimization. However, reorg uses
4006 simplejump_p to determine if there is an unconditional jump where
4007 it should try to schedule instructions from the target of the
4008 branch; simplejump_p fails for indirect jumps even if they have
4010 rtx insn
= emit_insn_before (gen_indirect_jump_scratch
4011 (reg
, GEN_INT (INSN_UID (JUMP_LABEL (jump
))))
4013 /* ??? We would like this to have the scope of the jump, but that
4014 scope will change when a delay slot insn of an inner scope is added.
4015 Hence, after delay slot scheduling, we'll have to expect
4016 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4019 INSN_LOCATOR (insn
) = INSN_LOCATOR (jump
);
4020 INSN_CODE (insn
) = CODE_FOR_indirect_jump_scratch
;
4023 else if (need_block
)
4024 /* We can't use JUMP_LABEL here because it might be undefined
4025 when not optimizing. */
4026 return emit_insn_before (gen_block_branch_redirect
4027 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump
)), 0))))
4032 #define CONDJUMP_MIN -252
4033 #define CONDJUMP_MAX 262
4036 /* A label (to be placed) in front of the jump
4037 that jumps to our ultimate destination. */
4039 /* Where we are going to insert it if we cannot move the jump any farther,
4040 or the jump itself if we have picked up an existing jump. */
4042 /* The ultimate destination. */
4044 struct far_branch
*prev
;
4045 /* If the branch has already been created, its address;
4046 else the address of its first prospective user. */
4050 static void gen_far_branch (struct far_branch
*);
4051 enum mdep_reorg_phase_e mdep_reorg_phase
;
4053 gen_far_branch (struct far_branch
*bp
)
4055 rtx insn
= bp
->insert_place
;
4057 rtx label
= gen_label_rtx ();
4060 emit_label_after (label
, insn
);
4063 jump
= emit_jump_insn_after (gen_jump (bp
->far_label
), insn
);
4064 LABEL_NUSES (bp
->far_label
)++;
4067 jump
= emit_jump_insn_after (gen_return (), insn
);
4068 /* Emit a barrier so that reorg knows that any following instructions
4069 are not reachable via a fall-through path.
4070 But don't do this when not optimizing, since we wouldn't suppress the
4071 alignment for the barrier then, and could end up with out-of-range
4072 pc-relative loads. */
4074 emit_barrier_after (jump
);
4075 emit_label_after (bp
->near_label
, insn
);
4076 JUMP_LABEL (jump
) = bp
->far_label
;
4077 ok
= invert_jump (insn
, label
, 1);
4080 /* If we are branching around a jump (rather than a return), prevent
4081 reorg from using an insn from the jump target as the delay slot insn -
4082 when reorg did this, it pessimized code (we rather hide the delay slot)
4083 and it could cause branches to go out of range. */
4086 (gen_stuff_delay_slot
4087 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump
)), 0))),
4088 GEN_INT (recog_memoized (insn
) == CODE_FOR_branch_false
)),
4090 /* Prevent reorg from undoing our splits. */
4091 gen_block_redirect (jump
, bp
->address
+= 2, 2);
4094 /* Fix up ADDR_DIFF_VECs. */
4096 fixup_addr_diff_vecs (rtx first
)
4100 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4102 rtx vec_lab
, pat
, prev
, prevpat
, x
, braf_label
;
4104 if (GET_CODE (insn
) != JUMP_INSN
4105 || GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
4107 pat
= PATTERN (insn
);
4108 vec_lab
= XEXP (XEXP (pat
, 0), 0);
4110 /* Search the matching casesi_jump_2. */
4111 for (prev
= vec_lab
; ; prev
= PREV_INSN (prev
))
4113 if (GET_CODE (prev
) != JUMP_INSN
)
4115 prevpat
= PATTERN (prev
);
4116 if (GET_CODE (prevpat
) != PARALLEL
|| XVECLEN (prevpat
, 0) != 2)
4118 x
= XVECEXP (prevpat
, 0, 1);
4119 if (GET_CODE (x
) != USE
)
4122 if (GET_CODE (x
) == LABEL_REF
&& XEXP (x
, 0) == vec_lab
)
4125 /* FIXME: This is a bug in the optimizer, but it seems harmless
4126 to just avoid panicing. */
4130 /* Emit the reference label of the braf where it belongs, right after
4131 the casesi_jump_2 (i.e. braf). */
4132 braf_label
= XEXP (XEXP (SET_SRC (XVECEXP (prevpat
, 0, 0)), 1), 0);
4133 emit_label_after (braf_label
, prev
);
4135 /* Fix up the ADDR_DIF_VEC to be relative
4136 to the reference address of the braf. */
4137 XEXP (XEXP (pat
, 0), 0) = braf_label
;
4141 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4142 a barrier. Return the base 2 logarithm of the desired alignment. */
4144 barrier_align (rtx barrier_or_label
)
4146 rtx next
= next_real_insn (barrier_or_label
), pat
, prev
;
4147 int slot
, credit
, jump_to_next
= 0;
4152 pat
= PATTERN (next
);
4154 if (GET_CODE (pat
) == ADDR_DIFF_VEC
)
4157 if (GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == UNSPECV_ALIGN
)
4158 /* This is a barrier in front of a constant table. */
4161 prev
= prev_real_insn (barrier_or_label
);
4162 if (GET_CODE (PATTERN (prev
)) == ADDR_DIFF_VEC
)
4164 pat
= PATTERN (prev
);
4165 /* If this is a very small table, we want to keep the alignment after
4166 the table to the minimum for proper code alignment. */
4167 return ((TARGET_SMALLCODE
4168 || ((unsigned) XVECLEN (pat
, 1) * GET_MODE_SIZE (GET_MODE (pat
))
4169 <= (unsigned) 1 << (CACHE_LOG
- 2)))
4170 ? 1 << TARGET_SHMEDIA
: align_jumps_log
);
4173 if (TARGET_SMALLCODE
)
4176 if (! TARGET_SH2
|| ! optimize
)
4177 return align_jumps_log
;
4179 /* When fixing up pcloads, a constant table might be inserted just before
4180 the basic block that ends with the barrier. Thus, we can't trust the
4181 instruction lengths before that. */
4182 if (mdep_reorg_phase
> SH_FIXUP_PCLOAD
)
4184 /* Check if there is an immediately preceding branch to the insn beyond
4185 the barrier. We must weight the cost of discarding useful information
4186 from the current cache line when executing this branch and there is
4187 an alignment, against that of fetching unneeded insn in front of the
4188 branch target when there is no alignment. */
4190 /* There are two delay_slot cases to consider. One is the simple case
4191 where the preceding branch is to the insn beyond the barrier (simple
4192 delay slot filling), and the other is where the preceding branch has
4193 a delay slot that is a duplicate of the insn after the barrier
4194 (fill_eager_delay_slots) and the branch is to the insn after the insn
4195 after the barrier. */
4197 /* PREV is presumed to be the JUMP_INSN for the barrier under
4198 investigation. Skip to the insn before it. */
4199 prev
= prev_real_insn (prev
);
4201 for (slot
= 2, credit
= (1 << (CACHE_LOG
- 2)) + 2;
4202 credit
>= 0 && prev
&& GET_CODE (prev
) == INSN
;
4203 prev
= prev_real_insn (prev
))
4206 if (GET_CODE (PATTERN (prev
)) == USE
4207 || GET_CODE (PATTERN (prev
)) == CLOBBER
)
4209 if (GET_CODE (PATTERN (prev
)) == SEQUENCE
)
4211 prev
= XVECEXP (PATTERN (prev
), 0, 1);
4212 if (INSN_UID (prev
) == INSN_UID (next
))
4214 /* Delay slot was filled with insn at jump target. */
4221 get_attr_in_delay_slot (prev
) == IN_DELAY_SLOT_YES
)
4223 credit
-= get_attr_length (prev
);
4226 && GET_CODE (prev
) == JUMP_INSN
4227 && JUMP_LABEL (prev
))
4231 || next_real_insn (JUMP_LABEL (prev
)) == next
4232 /* If relax_delay_slots() decides NEXT was redundant
4233 with some previous instruction, it will have
4234 redirected PREV's jump to the following insn. */
4235 || JUMP_LABEL (prev
) == next_nonnote_insn (next
)
4236 /* There is no upper bound on redundant instructions
4237 that might have been skipped, but we must not put an
4238 alignment where none had been before. */
4239 || (x
= (NEXT_INSN (NEXT_INSN (PREV_INSN (prev
)))),
4241 && (INSN_CODE (x
) == CODE_FOR_block_branch_redirect
4242 || INSN_CODE (x
) == CODE_FOR_indirect_jump_scratch
4243 || INSN_CODE (x
) == CODE_FOR_stuff_delay_slot
))))
4245 rtx pat
= PATTERN (prev
);
4246 if (GET_CODE (pat
) == PARALLEL
)
4247 pat
= XVECEXP (pat
, 0, 0);
4248 if (credit
- slot
>= (GET_CODE (SET_SRC (pat
)) == PC
? 2 : 0))
4254 return align_jumps_log
;
4257 /* If we are inside a phony loop, almost any kind of label can turn up as the
4258 first one in the loop. Aligning a braf label causes incorrect switch
4259 destination addresses; we can detect braf labels because they are
4260 followed by a BARRIER.
4261 Applying loop alignment to small constant or switch tables is a waste
4262 of space, so we suppress this too. */
4264 sh_loop_align (rtx label
)
4269 next
= next_nonnote_insn (next
);
4270 while (next
&& GET_CODE (next
) == CODE_LABEL
);
4274 || GET_CODE (PATTERN (next
)) == ADDR_DIFF_VEC
4275 || recog_memoized (next
) == CODE_FOR_consttable_2
)
4278 return align_loops_log
;
4281 /* Do a final pass over the function, just before delayed branch
4287 rtx first
, insn
, mova
= NULL_RTX
;
4289 rtx r0_rtx
= gen_rtx_REG (Pmode
, 0);
4290 rtx r0_inc_rtx
= gen_rtx_POST_INC (Pmode
, r0_rtx
);
4292 first
= get_insns ();
4294 /* We must split call insns before introducing `mova's. If we're
4295 optimizing, they'll have already been split. Otherwise, make
4296 sure we don't split them too late. */
4298 split_all_insns_noflow ();
4303 /* If relaxing, generate pseudo-ops to associate function calls with
4304 the symbols they call. It does no harm to not generate these
4305 pseudo-ops. However, when we can generate them, it enables to
4306 linker to potentially relax the jsr to a bsr, and eliminate the
4307 register load and, possibly, the constant pool entry. */
4309 mdep_reorg_phase
= SH_INSERT_USES_LABELS
;
4312 /* Remove all REG_LABEL notes. We want to use them for our own
4313 purposes. This works because none of the remaining passes
4314 need to look at them.
4316 ??? But it may break in the future. We should use a machine
4317 dependent REG_NOTE, or some other approach entirely. */
4318 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4324 while ((note
= find_reg_note (insn
, REG_LABEL
, NULL_RTX
)) != 0)
4325 remove_note (insn
, note
);
4329 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4331 rtx pattern
, reg
, link
, set
, scan
, dies
, label
;
4332 int rescan
= 0, foundinsn
= 0;
4334 if (GET_CODE (insn
) == CALL_INSN
)
4336 pattern
= PATTERN (insn
);
4338 if (GET_CODE (pattern
) == PARALLEL
)
4339 pattern
= XVECEXP (pattern
, 0, 0);
4340 if (GET_CODE (pattern
) == SET
)
4341 pattern
= SET_SRC (pattern
);
4343 if (GET_CODE (pattern
) != CALL
4344 || GET_CODE (XEXP (pattern
, 0)) != MEM
)
4347 reg
= XEXP (XEXP (pattern
, 0), 0);
4351 reg
= sfunc_uses_reg (insn
);
4356 if (GET_CODE (reg
) != REG
)
4359 /* This is a function call via REG. If the only uses of REG
4360 between the time that it is set and the time that it dies
4361 are in function calls, then we can associate all the
4362 function calls with the setting of REG. */
4364 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
4366 if (REG_NOTE_KIND (link
) != 0)
4368 set
= single_set (XEXP (link
, 0));
4369 if (set
&& rtx_equal_p (reg
, SET_DEST (set
)))
4371 link
= XEXP (link
, 0);
4378 /* ??? Sometimes global register allocation will have
4379 deleted the insn pointed to by LOG_LINKS. Try
4380 scanning backward to find where the register is set. */
4381 for (scan
= PREV_INSN (insn
);
4382 scan
&& GET_CODE (scan
) != CODE_LABEL
;
4383 scan
= PREV_INSN (scan
))
4385 if (! INSN_P (scan
))
4388 if (! reg_mentioned_p (reg
, scan
))
4391 if (noncall_uses_reg (reg
, scan
, &set
))
4405 /* The register is set at LINK. */
4407 /* We can only optimize the function call if the register is
4408 being set to a symbol. In theory, we could sometimes
4409 optimize calls to a constant location, but the assembler
4410 and linker do not support that at present. */
4411 if (GET_CODE (SET_SRC (set
)) != SYMBOL_REF
4412 && GET_CODE (SET_SRC (set
)) != LABEL_REF
)
4415 /* Scan forward from LINK to the place where REG dies, and
4416 make sure that the only insns which use REG are
4417 themselves function calls. */
4419 /* ??? This doesn't work for call targets that were allocated
4420 by reload, since there may not be a REG_DEAD note for the
4424 for (scan
= NEXT_INSN (link
); scan
; scan
= NEXT_INSN (scan
))
4428 /* Don't try to trace forward past a CODE_LABEL if we haven't
4429 seen INSN yet. Ordinarily, we will only find the setting insn
4430 in LOG_LINKS if it is in the same basic block. However,
4431 cross-jumping can insert code labels in between the load and
4432 the call, and can result in situations where a single call
4433 insn may have two targets depending on where we came from. */
4435 if (GET_CODE (scan
) == CODE_LABEL
&& ! foundinsn
)
4438 if (! INSN_P (scan
))
4441 /* Don't try to trace forward past a JUMP. To optimize
4442 safely, we would have to check that all the
4443 instructions at the jump destination did not use REG. */
4445 if (GET_CODE (scan
) == JUMP_INSN
)
4448 if (! reg_mentioned_p (reg
, scan
))
4451 if (noncall_uses_reg (reg
, scan
, &scanset
))
4458 && (GET_CODE (scan
) == CALL_INSN
|| sfunc_uses_reg (scan
)))
4460 /* There is a function call to this register other
4461 than the one we are checking. If we optimize
4462 this call, we need to rescan again below. */
4466 /* ??? We shouldn't have to worry about SCANSET here.
4467 We should just be able to check for a REG_DEAD note
4468 on a function call. However, the REG_DEAD notes are
4469 apparently not dependable around libcalls; c-torture
4470 execute/920501-2 is a test case. If SCANSET is set,
4471 then this insn sets the register, so it must have
4472 died earlier. Unfortunately, this will only handle
4473 the cases in which the register is, in fact, set in a
4476 /* ??? We shouldn't have to use FOUNDINSN here.
4477 However, the LOG_LINKS fields are apparently not
4478 entirely reliable around libcalls;
4479 newlib/libm/math/e_pow.c is a test case. Sometimes
4480 an insn will appear in LOG_LINKS even though it is
4481 not the most recent insn which sets the register. */
4485 || find_reg_note (scan
, REG_DEAD
, reg
)))
4494 /* Either there was a branch, or some insn used REG
4495 other than as a function call address. */
4499 /* Create a code label, and put it in a REG_LABEL note on
4500 the insn which sets the register, and on each call insn
4501 which uses the register. In final_prescan_insn we look
4502 for the REG_LABEL notes, and output the appropriate label
4505 label
= gen_label_rtx ();
4506 REG_NOTES (link
) = gen_rtx_INSN_LIST (REG_LABEL
, label
,
4508 REG_NOTES (insn
) = gen_rtx_INSN_LIST (REG_LABEL
, label
,
4517 scan
= NEXT_INSN (scan
);
4519 && ((GET_CODE (scan
) == CALL_INSN
4520 && reg_mentioned_p (reg
, scan
))
4521 || ((reg2
= sfunc_uses_reg (scan
))
4522 && REGNO (reg2
) == REGNO (reg
))))
4524 = gen_rtx_INSN_LIST (REG_LABEL
, label
, REG_NOTES (scan
));
4526 while (scan
!= dies
);
4532 fixup_addr_diff_vecs (first
);
4536 mdep_reorg_phase
= SH_SHORTEN_BRANCHES0
;
4537 shorten_branches (first
);
4539 /* Scan the function looking for move instructions which have to be
4540 changed to pc-relative loads and insert the literal tables. */
4542 mdep_reorg_phase
= SH_FIXUP_PCLOAD
;
4543 for (insn
= first
, num_mova
= 0; insn
; insn
= NEXT_INSN (insn
))
4547 /* ??? basic block reordering can move a switch table dispatch
4548 below the switch table. Check if that has happened.
4549 We only have the addresses available when optimizing; but then,
4550 this check shouldn't be needed when not optimizing. */
4551 rtx label_ref
= XVECEXP (SET_SRC (PATTERN (insn
)), 0, 0);
4553 && (INSN_ADDRESSES (INSN_UID (insn
))
4554 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref
, 0)))))
4556 /* Change the mova into a load.
4557 broken_move will then return true for it. */
4560 else if (! num_mova
++)
4563 else if (GET_CODE (insn
) == JUMP_INSN
4564 && GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
4572 /* Some code might have been inserted between the mova and
4573 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4574 for (scan
= mova
, total
= 0; scan
!= insn
; scan
= NEXT_INSN (scan
))
4575 total
+= get_attr_length (scan
);
4577 /* range of mova is 1020, add 4 because pc counts from address of
4578 second instruction after this one, subtract 2 in case pc is 2
4579 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4580 cancels out with alignment effects of the mova itself. */
4583 /* Change the mova into a load, and restart scanning
4584 there. broken_move will then return true for mova. */
4589 if (broken_move (insn
)
4590 || (GET_CODE (insn
) == INSN
4591 && recog_memoized (insn
) == CODE_FOR_casesi_worker_2
))
4594 /* Scan ahead looking for a barrier to stick the constant table
4596 rtx barrier
= find_barrier (num_mova
, mova
, insn
);
4597 rtx last_float_move
= NULL_RTX
, last_float
= 0, *last_float_addr
= NULL
;
4598 int need_aligned_label
= 0;
4600 if (num_mova
&& ! mova_p (mova
))
4602 /* find_barrier had to change the first mova into a
4603 pcload; thus, we have to start with this new pcload. */
4607 /* Now find all the moves between the points and modify them. */
4608 for (scan
= insn
; scan
!= barrier
; scan
= NEXT_INSN (scan
))
4610 if (GET_CODE (scan
) == CODE_LABEL
)
4612 if (GET_CODE (scan
) == INSN
4613 && recog_memoized (scan
) == CODE_FOR_casesi_worker_2
)
4614 need_aligned_label
= 1;
4615 if (broken_move (scan
))
4617 rtx
*patp
= &PATTERN (scan
), pat
= *patp
;
4621 enum machine_mode mode
;
4623 if (GET_CODE (pat
) == PARALLEL
)
4624 patp
= &XVECEXP (pat
, 0, 0), pat
= *patp
;
4625 src
= SET_SRC (pat
);
4626 dst
= SET_DEST (pat
);
4627 mode
= GET_MODE (dst
);
4629 if (mode
== SImode
&& hi_const (src
)
4630 && REGNO (dst
) != FPUL_REG
)
4635 while (GET_CODE (dst
) == SUBREG
)
4637 offset
+= subreg_regno_offset (REGNO (SUBREG_REG (dst
)),
4638 GET_MODE (SUBREG_REG (dst
)),
4641 dst
= SUBREG_REG (dst
);
4643 dst
= gen_rtx_REG (HImode
, REGNO (dst
) + offset
);
4645 if (GET_CODE (dst
) == REG
&& FP_ANY_REGISTER_P (REGNO (dst
)))
4647 /* This must be an insn that clobbers r0. */
4648 rtx
*clobberp
= &XVECEXP (PATTERN (scan
), 0,
4649 XVECLEN (PATTERN (scan
), 0)
4651 rtx clobber
= *clobberp
;
4653 gcc_assert (GET_CODE (clobber
) == CLOBBER
4654 && rtx_equal_p (XEXP (clobber
, 0), r0_rtx
));
4657 && reg_set_between_p (r0_rtx
, last_float_move
, scan
))
4661 && GET_MODE_SIZE (mode
) != 4
4662 && GET_MODE_SIZE (GET_MODE (last_float
)) == 4)
4664 lab
= add_constant (src
, mode
, last_float
);
4666 emit_insn_before (gen_mova (lab
), scan
);
4669 /* There will be a REG_UNUSED note for r0 on
4670 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4671 lest reorg:mark_target_live_regs will not
4672 consider r0 to be used, and we end up with delay
4673 slot insn in front of SCAN that clobbers r0. */
4675 = find_regno_note (last_float_move
, REG_UNUSED
, 0);
4677 /* If we are not optimizing, then there may not be
4680 PUT_MODE (note
, REG_INC
);
4682 *last_float_addr
= r0_inc_rtx
;
4684 last_float_move
= scan
;
4686 newsrc
= gen_const_mem (mode
,
4687 (((TARGET_SH4
&& ! TARGET_FMOVD
)
4688 || REGNO (dst
) == FPUL_REG
)
4691 last_float_addr
= &XEXP (newsrc
, 0);
4693 /* Remove the clobber of r0. */
4694 *clobberp
= gen_rtx_CLOBBER (GET_MODE (clobber
),
4695 gen_rtx_SCRATCH (Pmode
));
4697 /* This is a mova needing a label. Create it. */
4698 else if (GET_CODE (src
) == UNSPEC
4699 && XINT (src
, 1) == UNSPEC_MOVA
4700 && GET_CODE (XVECEXP (src
, 0, 0)) == CONST
)
4702 lab
= add_constant (XVECEXP (src
, 0, 0), mode
, 0);
4703 newsrc
= gen_rtx_LABEL_REF (VOIDmode
, lab
);
4704 newsrc
= gen_rtx_UNSPEC (SImode
,
4705 gen_rtvec (1, newsrc
),
4710 lab
= add_constant (src
, mode
, 0);
4711 newsrc
= gen_rtx_LABEL_REF (VOIDmode
, lab
);
4712 newsrc
= gen_const_mem (mode
, newsrc
);
4714 *patp
= gen_rtx_SET (VOIDmode
, dst
, newsrc
);
4715 INSN_CODE (scan
) = -1;
4718 dump_table (need_aligned_label
? insn
: 0, barrier
);
4723 mdep_reorg_phase
= SH_SHORTEN_BRANCHES1
;
4724 INSN_ADDRESSES_FREE ();
4725 split_branches (first
);
4727 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4728 also has an effect on the register that holds the address of the sfunc.
4729 Insert an extra dummy insn in front of each sfunc that pretends to
4730 use this register. */
4731 if (flag_delayed_branch
)
4733 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4735 rtx reg
= sfunc_uses_reg (insn
);
4739 emit_insn_before (gen_use_sfunc_addr (reg
), insn
);
4743 /* fpscr is not actually a user variable, but we pretend it is for the
4744 sake of the previous optimization passes, since we want it handled like
4745 one. However, we don't have any debugging information for it, so turn
4746 it into a non-user variable now. */
4748 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4750 mdep_reorg_phase
= SH_AFTER_MDEP_REORG
;
4754 get_dest_uid (rtx label
, int max_uid
)
4756 rtx dest
= next_real_insn (label
);
4759 /* This can happen for an undefined label. */
4761 dest_uid
= INSN_UID (dest
);
4762 /* If this is a newly created branch redirection blocking instruction,
4763 we cannot index the branch_uid or insn_addresses arrays with its
4764 uid. But then, we won't need to, because the actual destination is
4765 the following branch. */
4766 while (dest_uid
>= max_uid
)
4768 dest
= NEXT_INSN (dest
);
4769 dest_uid
= INSN_UID (dest
);
4771 if (GET_CODE (dest
) == JUMP_INSN
&& GET_CODE (PATTERN (dest
)) == RETURN
)
4776 /* Split condbranches that are out of range. Also add clobbers for
4777 scratch registers that are needed in far jumps.
4778 We do this before delay slot scheduling, so that it can take our
4779 newly created instructions into account. It also allows us to
4780 find branches with common targets more easily. */
4783 split_branches (rtx first
)
4786 struct far_branch
**uid_branch
, *far_branch_list
= 0;
4787 int max_uid
= get_max_uid ();
4790 /* Find out which branches are out of range. */
4791 shorten_branches (first
);
4793 uid_branch
= (struct far_branch
**) alloca (max_uid
* sizeof *uid_branch
);
4794 memset ((char *) uid_branch
, 0, max_uid
* sizeof *uid_branch
);
4796 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4797 if (! INSN_P (insn
))
4799 else if (INSN_DELETED_P (insn
))
4801 /* Shorten_branches would split this instruction again,
4802 so transform it into a note. */
4803 PUT_CODE (insn
, NOTE
);
4804 NOTE_LINE_NUMBER (insn
) = NOTE_INSN_DELETED
;
4805 NOTE_SOURCE_FILE (insn
) = 0;
4807 else if (GET_CODE (insn
) == JUMP_INSN
4808 /* Don't mess with ADDR_DIFF_VEC */
4809 && (GET_CODE (PATTERN (insn
)) == SET
4810 || GET_CODE (PATTERN (insn
)) == RETURN
))
4812 enum attr_type type
= get_attr_type (insn
);
4813 if (type
== TYPE_CBRANCH
)
4817 if (get_attr_length (insn
) > 4)
4819 rtx src
= SET_SRC (PATTERN (insn
));
4820 rtx olabel
= XEXP (XEXP (src
, 1), 0);
4821 int addr
= INSN_ADDRESSES (INSN_UID (insn
));
4823 int dest_uid
= get_dest_uid (olabel
, max_uid
);
4824 struct far_branch
*bp
= uid_branch
[dest_uid
];
4826 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4827 the label if the LABEL_NUSES count drops to zero. There is
4828 always a jump_optimize pass that sets these values, but it
4829 proceeds to delete unreferenced code, and then if not
4830 optimizing, to un-delete the deleted instructions, thus
4831 leaving labels with too low uses counts. */
4834 JUMP_LABEL (insn
) = olabel
;
4835 LABEL_NUSES (olabel
)++;
4839 bp
= (struct far_branch
*) alloca (sizeof *bp
);
4840 uid_branch
[dest_uid
] = bp
;
4841 bp
->prev
= far_branch_list
;
4842 far_branch_list
= bp
;
4844 = XEXP (XEXP (SET_SRC (PATTERN (insn
)), 1), 0);
4845 LABEL_NUSES (bp
->far_label
)++;
4849 label
= bp
->near_label
;
4850 if (! label
&& bp
->address
- addr
>= CONDJUMP_MIN
)
4852 rtx block
= bp
->insert_place
;
4854 if (GET_CODE (PATTERN (block
)) == RETURN
)
4855 block
= PREV_INSN (block
);
4857 block
= gen_block_redirect (block
,
4859 label
= emit_label_after (gen_label_rtx (),
4861 bp
->near_label
= label
;
4863 else if (label
&& ! NEXT_INSN (label
))
4865 if (addr
+ 2 - bp
->address
<= CONDJUMP_MAX
)
4866 bp
->insert_place
= insn
;
4868 gen_far_branch (bp
);
4872 || (NEXT_INSN (label
) && bp
->address
- addr
< CONDJUMP_MIN
))
4874 bp
->near_label
= label
= gen_label_rtx ();
4875 bp
->insert_place
= insn
;
4878 ok
= redirect_jump (insn
, label
, 1);
4883 /* get_attr_length (insn) == 2 */
4884 /* Check if we have a pattern where reorg wants to redirect
4885 the branch to a label from an unconditional branch that
4887 /* We can't use JUMP_LABEL here because it might be undefined
4888 when not optimizing. */
4889 /* A syntax error might cause beyond to be NULL_RTX. */
4891 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn
)), 1),
4895 && (GET_CODE (beyond
) == JUMP_INSN
4896 || ((beyond
= next_active_insn (beyond
))
4897 && GET_CODE (beyond
) == JUMP_INSN
))
4898 && GET_CODE (PATTERN (beyond
)) == SET
4899 && recog_memoized (beyond
) == CODE_FOR_jump_compact
4901 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond
)), 0)))
4902 - INSN_ADDRESSES (INSN_UID (insn
)) + (unsigned) 252)
4904 gen_block_redirect (beyond
,
4905 INSN_ADDRESSES (INSN_UID (beyond
)), 1);
4908 next
= next_active_insn (insn
);
4910 if ((GET_CODE (next
) == JUMP_INSN
4911 || ((next
= next_active_insn (next
))
4912 && GET_CODE (next
) == JUMP_INSN
))
4913 && GET_CODE (PATTERN (next
)) == SET
4914 && recog_memoized (next
) == CODE_FOR_jump_compact
4916 (INSN_UID (XEXP (SET_SRC (PATTERN (next
)), 0)))
4917 - INSN_ADDRESSES (INSN_UID (insn
)) + (unsigned) 252)
4919 gen_block_redirect (next
, INSN_ADDRESSES (INSN_UID (next
)), 1);
4921 else if (type
== TYPE_JUMP
|| type
== TYPE_RETURN
)
4923 int addr
= INSN_ADDRESSES (INSN_UID (insn
));
4926 struct far_branch
*bp
;
4928 if (type
== TYPE_JUMP
)
4930 far_label
= XEXP (SET_SRC (PATTERN (insn
)), 0);
4931 dest_uid
= get_dest_uid (far_label
, max_uid
);
4934 /* Parse errors can lead to labels outside
4936 if (! NEXT_INSN (far_label
))
4941 JUMP_LABEL (insn
) = far_label
;
4942 LABEL_NUSES (far_label
)++;
4944 redirect_jump (insn
, NULL_RTX
, 1);
4948 bp
= uid_branch
[dest_uid
];
4951 bp
= (struct far_branch
*) alloca (sizeof *bp
);
4952 uid_branch
[dest_uid
] = bp
;
4953 bp
->prev
= far_branch_list
;
4954 far_branch_list
= bp
;
4956 bp
->far_label
= far_label
;
4958 LABEL_NUSES (far_label
)++;
4960 else if (bp
->near_label
&& ! NEXT_INSN (bp
->near_label
))
4961 if (addr
- bp
->address
<= CONDJUMP_MAX
)
4962 emit_label_after (bp
->near_label
, PREV_INSN (insn
));
4965 gen_far_branch (bp
);
4971 bp
->insert_place
= insn
;
4973 emit_insn_before (gen_block_branch_redirect (const0_rtx
), insn
);
4975 gen_block_redirect (insn
, addr
, bp
->near_label
? 2 : 0);
4978 /* Generate all pending far branches,
4979 and free our references to the far labels. */
4980 while (far_branch_list
)
4982 if (far_branch_list
->near_label
4983 && ! NEXT_INSN (far_branch_list
->near_label
))
4984 gen_far_branch (far_branch_list
);
4986 && far_branch_list
->far_label
4987 && ! --LABEL_NUSES (far_branch_list
->far_label
))
4988 delete_insn (far_branch_list
->far_label
);
4989 far_branch_list
= far_branch_list
->prev
;
4992 /* Instruction length information is no longer valid due to the new
4993 instructions that have been generated. */
4994 init_insn_lengths ();
4997 /* Dump out instruction addresses, which is useful for debugging the
4998 constant pool table stuff.
5000 If relaxing, output the label and pseudo-ops used to link together
5001 calls and the instruction which set the registers. */
5003 /* ??? The addresses printed by this routine for insns are nonsense for
5004 insns which are inside of a sequence where none of the inner insns have
5005 variable length. This is because the second pass of shorten_branches
5006 does not bother to update them. */
5009 final_prescan_insn (rtx insn
, rtx
*opvec ATTRIBUTE_UNUSED
,
5010 int noperands ATTRIBUTE_UNUSED
)
5012 if (TARGET_DUMPISIZE
)
5013 fprintf (asm_out_file
, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn
)));
5019 note
= find_reg_note (insn
, REG_LABEL
, NULL_RTX
);
5024 pattern
= PATTERN (insn
);
5025 if (GET_CODE (pattern
) == PARALLEL
)
5026 pattern
= XVECEXP (pattern
, 0, 0);
5027 switch (GET_CODE (pattern
))
5030 if (GET_CODE (SET_SRC (pattern
)) != CALL
5031 && get_attr_type (insn
) != TYPE_SFUNC
)
5033 targetm
.asm_out
.internal_label
5034 (asm_out_file
, "L", CODE_LABEL_NUMBER (XEXP (note
, 0)));
5037 /* else FALLTHROUGH */
5039 asm_fprintf (asm_out_file
, "\t.uses %LL%d\n",
5040 CODE_LABEL_NUMBER (XEXP (note
, 0)));
5050 /* Dump out any constants accumulated in the final pass. These will
5054 output_jump_label_table (void)
5060 fprintf (asm_out_file
, "\t.align 2\n");
5061 for (i
= 0; i
< pool_size
; i
++)
5063 pool_node
*p
= &pool_vector
[i
];
5065 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5066 CODE_LABEL_NUMBER (p
->label
));
5067 output_asm_insn (".long %O0", &p
->value
);
5075 /* A full frame looks like:
5079 [ if current_function_anonymous_args
5092 local-0 <- fp points here. */
5094 /* Number of bytes pushed for anonymous args, used to pass information
5095 between expand_prologue and expand_epilogue. */
5097 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5098 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5099 for an epilogue and a negative value means that it's for a sibcall
5100 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5101 all the registers that are about to be restored, and hence dead. */
5104 output_stack_adjust (int size
, rtx reg
, int epilogue_p
,
5105 HARD_REG_SET
*live_regs_mask
)
5107 rtx (*emit_fn
) (rtx
) = epilogue_p
? &emit_insn
: &frame_insn
;
5110 HOST_WIDE_INT align
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5112 /* This test is bogus, as output_stack_adjust is used to re-align the
5115 gcc_assert (!(size
% align
));
5118 if (CONST_OK_FOR_ADD (size
))
5119 emit_fn (GEN_ADD3 (reg
, reg
, GEN_INT (size
)));
5120 /* Try to do it with two partial adjustments; however, we must make
5121 sure that the stack is properly aligned at all times, in case
5122 an interrupt occurs between the two partial adjustments. */
5123 else if (CONST_OK_FOR_ADD (size
/ 2 & -align
)
5124 && CONST_OK_FOR_ADD (size
- (size
/ 2 & -align
)))
5126 emit_fn (GEN_ADD3 (reg
, reg
, GEN_INT (size
/ 2 & -align
)));
5127 emit_fn (GEN_ADD3 (reg
, reg
, GEN_INT (size
- (size
/ 2 & -align
))));
5133 int temp
= epilogue_p
? 7 : (TARGET_SH5
? 0 : 1);
5136 /* If TEMP is invalid, we could temporarily save a general
5137 register to MACL. However, there is currently no need
5138 to handle this case, so just die when we see it. */
5140 || current_function_interrupt
5141 || ! call_really_used_regs
[temp
] || fixed_regs
[temp
])
5143 if (temp
< 0 && ! current_function_interrupt
5144 && (TARGET_SHMEDIA
|| epilogue_p
>= 0))
5147 COPY_HARD_REG_SET (temps
, call_used_reg_set
);
5148 AND_COMPL_HARD_REG_SET (temps
, call_fixed_reg_set
);
5152 if (current_function_return_rtx
)
5154 enum machine_mode mode
;
5155 mode
= GET_MODE (current_function_return_rtx
);
5156 if (BASE_RETURN_VALUE_REG (mode
) == FIRST_RET_REG
)
5157 nreg
= HARD_REGNO_NREGS (FIRST_RET_REG
, mode
);
5159 for (i
= 0; i
< nreg
; i
++)
5160 CLEAR_HARD_REG_BIT (temps
, FIRST_RET_REG
+ i
);
5161 if (current_function_calls_eh_return
)
5163 CLEAR_HARD_REG_BIT (temps
, EH_RETURN_STACKADJ_REGNO
);
5164 for (i
= 0; i
<= 3; i
++)
5165 CLEAR_HARD_REG_BIT (temps
, EH_RETURN_DATA_REGNO (i
));
5168 if (TARGET_SHMEDIA
&& epilogue_p
< 0)
5169 for (i
= FIRST_TARGET_REG
; i
<= LAST_TARGET_REG
; i
++)
5170 CLEAR_HARD_REG_BIT (temps
, i
);
5171 if (epilogue_p
<= 0)
5173 for (i
= FIRST_PARM_REG
;
5174 i
< FIRST_PARM_REG
+ NPARM_REGS (SImode
); i
++)
5175 CLEAR_HARD_REG_BIT (temps
, i
);
5176 if (cfun
->static_chain_decl
!= NULL
)
5177 CLEAR_HARD_REG_BIT (temps
, STATIC_CHAIN_REGNUM
);
5179 temp
= scavenge_reg (&temps
);
5181 if (temp
< 0 && live_regs_mask
)
5182 temp
= scavenge_reg (live_regs_mask
);
5185 rtx adj_reg
, tmp_reg
, mem
;
5187 /* If we reached here, the most likely case is the (sibcall)
5188 epilogue for non SHmedia. Put a special push/pop sequence
5189 for such case as the last resort. This looks lengthy but
5190 would not be problem because it seems to be very
5193 gcc_assert (!TARGET_SHMEDIA
&& epilogue_p
);
5196 /* ??? There is still the slight possibility that r4 or
5197 r5 have been reserved as fixed registers or assigned
5198 as global registers, and they change during an
5199 interrupt. There are possible ways to handle this:
5201 - If we are adjusting the frame pointer (r14), we can do
5202 with a single temp register and an ordinary push / pop
5204 - Grab any call-used or call-saved registers (i.e. not
5205 fixed or globals) for the temps we need. We might
5206 also grab r14 if we are adjusting the stack pointer.
5207 If we can't find enough available registers, issue
5208 a diagnostic and die - the user must have reserved
5209 way too many registers.
5210 But since all this is rather unlikely to happen and
5211 would require extra testing, we just die if r4 / r5
5212 are not available. */
5213 gcc_assert (!fixed_regs
[4] && !fixed_regs
[5]
5214 && !global_regs
[4] && !global_regs
[5]);
5216 adj_reg
= gen_rtx_REG (GET_MODE (reg
), 4);
5217 tmp_reg
= gen_rtx_REG (GET_MODE (reg
), 5);
5218 emit_move_insn (gen_tmp_stack_mem (Pmode
, reg
), adj_reg
);
5219 emit_insn (GEN_MOV (adj_reg
, GEN_INT (size
)));
5220 emit_insn (GEN_ADD3 (adj_reg
, adj_reg
, reg
));
5221 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
, adj_reg
));
5222 emit_move_insn (mem
, tmp_reg
);
5223 emit_move_insn (tmp_reg
, gen_tmp_stack_mem (Pmode
, reg
));
5224 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
, adj_reg
));
5225 emit_move_insn (mem
, tmp_reg
);
5226 emit_move_insn (reg
, adj_reg
);
5227 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_POST_INC (Pmode
, reg
));
5228 emit_move_insn (adj_reg
, mem
);
5229 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_POST_INC (Pmode
, reg
));
5230 emit_move_insn (tmp_reg
, mem
);
5233 const_reg
= gen_rtx_REG (GET_MODE (reg
), temp
);
5235 /* If SIZE is negative, subtract the positive value.
5236 This sometimes allows a constant pool entry to be shared
5237 between prologue and epilogue code. */
5240 emit_insn (GEN_MOV (const_reg
, GEN_INT (-size
)));
5241 insn
= emit_fn (GEN_SUB3 (reg
, reg
, const_reg
));
5245 emit_insn (GEN_MOV (const_reg
, GEN_INT (size
)));
5246 insn
= emit_fn (GEN_ADD3 (reg
, reg
, const_reg
));
5250 = (gen_rtx_EXPR_LIST
5251 (REG_FRAME_RELATED_EXPR
,
5252 gen_rtx_SET (VOIDmode
, reg
,
5253 gen_rtx_PLUS (SImode
, reg
, GEN_INT (size
))),
5263 RTX_FRAME_RELATED_P (x
) = 1;
5267 /* Output RTL to push register RN onto the stack. */
5274 x
= gen_push_fpul ();
5275 else if (rn
== FPSCR_REG
)
5276 x
= gen_push_fpscr ();
5277 else if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& ! TARGET_FPU_SINGLE
5278 && FP_OR_XD_REGISTER_P (rn
))
5280 if (FP_REGISTER_P (rn
) && (rn
- FIRST_FP_REG
) & 1)
5282 x
= gen_push_4 (gen_rtx_REG (DFmode
, rn
));
5284 else if (TARGET_SH2E
&& FP_REGISTER_P (rn
))
5285 x
= gen_push_e (gen_rtx_REG (SFmode
, rn
));
5287 x
= gen_push (gen_rtx_REG (SImode
, rn
));
5291 = gen_rtx_EXPR_LIST (REG_INC
,
5292 gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
), 0);
5296 /* Output RTL to pop register RN from the stack. */
5303 x
= gen_pop_fpul ();
5304 else if (rn
== FPSCR_REG
)
5305 x
= gen_pop_fpscr ();
5306 else if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& ! TARGET_FPU_SINGLE
5307 && FP_OR_XD_REGISTER_P (rn
))
5309 if (FP_REGISTER_P (rn
) && (rn
- FIRST_FP_REG
) & 1)
5311 x
= gen_pop_4 (gen_rtx_REG (DFmode
, rn
));
5313 else if (TARGET_SH2E
&& FP_REGISTER_P (rn
))
5314 x
= gen_pop_e (gen_rtx_REG (SFmode
, rn
));
5316 x
= gen_pop (gen_rtx_REG (SImode
, rn
));
5320 = gen_rtx_EXPR_LIST (REG_INC
,
5321 gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
), 0);
5324 /* Generate code to push the regs specified in the mask. */
5327 push_regs (HARD_REG_SET
*mask
, int interrupt_handler
)
5332 /* Push PR last; this gives better latencies after the prologue, and
5333 candidates for the return delay slot when there are no general
5334 registers pushed. */
5335 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
5337 /* If this is an interrupt handler, and the SZ bit varies,
5338 and we have to push any floating point register, we need
5339 to switch to the correct precision first. */
5340 if (i
== FIRST_FP_REG
&& interrupt_handler
&& TARGET_FMOVD
5341 && hard_regs_intersect_p (mask
, ®_class_contents
[DF_REGS
]))
5343 HARD_REG_SET unsaved
;
5346 COMPL_HARD_REG_SET (unsaved
, *mask
);
5347 fpscr_set_from_mem (NORMAL_MODE (FP_MODE
), unsaved
);
5351 && (i
!= FPSCR_REG
|| ! skip_fpscr
)
5352 && TEST_HARD_REG_BIT (*mask
, i
))
5355 if (TEST_HARD_REG_BIT (*mask
, PR_REG
))
5359 /* Calculate how much extra space is needed to save all callee-saved
5361 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5364 shmedia_target_regs_stack_space (HARD_REG_SET
*live_regs_mask
)
5367 int stack_space
= 0;
5368 int interrupt_handler
= sh_cfun_interrupt_handler_p ();
5370 for (reg
= LAST_TARGET_REG
; reg
>= FIRST_TARGET_REG
; reg
--)
5371 if ((! call_really_used_regs
[reg
] || interrupt_handler
)
5372 && ! TEST_HARD_REG_BIT (*live_regs_mask
, reg
))
5373 /* Leave space to save this target register on the stack,
5374 in case target register allocation wants to use it. */
5375 stack_space
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
));
5379 /* Decide whether we should reserve space for callee-save target registers,
5380 in case target register allocation wants to use them. REGS_SAVED is
5381 the space, in bytes, that is already required for register saves.
5382 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5385 shmedia_reserve_space_for_target_registers_p (int regs_saved
,
5386 HARD_REG_SET
*live_regs_mask
)
5390 return shmedia_target_regs_stack_space (live_regs_mask
) <= regs_saved
;
5393 /* Decide how much space to reserve for callee-save target registers
5394 in case target register allocation wants to use them.
5395 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5398 shmedia_target_regs_stack_adjust (HARD_REG_SET
*live_regs_mask
)
5400 if (shmedia_space_reserved_for_target_registers
)
5401 return shmedia_target_regs_stack_space (live_regs_mask
);
5406 /* Work out the registers which need to be saved, both as a mask and a
5407 count of saved words. Return the count.
5409 If doing a pragma interrupt function, then push all regs used by the
5410 function, and if we call another function (we can tell by looking at PR),
5411 make sure that all the regs it clobbers are safe too. */
5414 calc_live_regs (HARD_REG_SET
*live_regs_mask
)
5418 int interrupt_handler
;
5419 int pr_live
, has_call
;
5421 interrupt_handler
= sh_cfun_interrupt_handler_p ();
5423 CLEAR_HARD_REG_SET (*live_regs_mask
);
5424 if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& interrupt_handler
5425 && regs_ever_live
[FPSCR_REG
])
5426 target_flags
&= ~MASK_FPU_SINGLE
;
5427 /* If we can save a lot of saves by switching to double mode, do that. */
5428 else if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& TARGET_FPU_SINGLE
)
5429 for (count
= 0, reg
= FIRST_FP_REG
; reg
<= LAST_FP_REG
; reg
+= 2)
5430 if (regs_ever_live
[reg
] && regs_ever_live
[reg
+1]
5431 && (! call_really_used_regs
[reg
]
5432 || (interrupt_handler
&& ! pragma_trapa
))
5435 target_flags
&= ~MASK_FPU_SINGLE
;
5438 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5439 knows how to use it. That means the pseudo originally allocated for
5440 the initial value can become the PR_MEDIA_REG hard register, as seen for
5441 execute/20010122-1.c:test9. */
5443 /* ??? this function is called from initial_elimination_offset, hence we
5444 can't use the result of sh_media_register_for_return here. */
5445 pr_live
= sh_pr_n_sets ();
5448 rtx pr_initial
= has_hard_reg_initial_val (Pmode
, PR_REG
);
5449 pr_live
= (pr_initial
5450 ? (GET_CODE (pr_initial
) != REG
5451 || REGNO (pr_initial
) != (PR_REG
))
5452 : regs_ever_live
[PR_REG
]);
5453 /* For Shcompact, if not optimizing, we end up with a memory reference
5454 using the return address pointer for __builtin_return_address even
5455 though there is no actual need to put the PR register on the stack. */
5456 pr_live
|= regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
];
5458 /* Force PR to be live if the prologue has to call the SHmedia
5459 argument decoder or register saver. */
5460 if (TARGET_SHCOMPACT
5461 && ((current_function_args_info
.call_cookie
5462 & ~ CALL_COOKIE_RET_TRAMP (1))
5463 || current_function_has_nonlocal_label
))
5465 has_call
= TARGET_SHMEDIA
? ! leaf_function_p () : pr_live
;
5466 for (count
= 0, reg
= FIRST_PSEUDO_REGISTER
; reg
-- != 0; )
5468 if (reg
== (TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
)
5470 : (interrupt_handler
&& ! pragma_trapa
)
5471 ? (/* Need to save all the regs ever live. */
5472 (regs_ever_live
[reg
]
5473 || (call_really_used_regs
[reg
]
5474 && (! fixed_regs
[reg
] || reg
== MACH_REG
|| reg
== MACL_REG
5475 || reg
== PIC_OFFSET_TABLE_REGNUM
)
5477 || (has_call
&& REGISTER_NATURAL_MODE (reg
) == SImode
5478 && (GENERAL_REGISTER_P (reg
) || TARGET_REGISTER_P (reg
))))
5479 && reg
!= STACK_POINTER_REGNUM
&& reg
!= ARG_POINTER_REGNUM
5480 && reg
!= RETURN_ADDRESS_POINTER_REGNUM
5481 && reg
!= T_REG
&& reg
!= GBR_REG
5482 /* Push fpscr only on targets which have FPU */
5483 && (reg
!= FPSCR_REG
|| TARGET_FPU_ANY
))
5484 : (/* Only push those regs which are used and need to be saved. */
5487 && current_function_args_info
.call_cookie
5488 && reg
== PIC_OFFSET_TABLE_REGNUM
)
5489 || (regs_ever_live
[reg
] && ! call_really_used_regs
[reg
])
5490 || (current_function_calls_eh_return
5491 && (reg
== EH_RETURN_DATA_REGNO (0)
5492 || reg
== EH_RETURN_DATA_REGNO (1)
5493 || reg
== EH_RETURN_DATA_REGNO (2)
5494 || reg
== EH_RETURN_DATA_REGNO (3)))
5495 || ((reg
== MACL_REG
|| reg
== MACH_REG
)
5496 && regs_ever_live
[reg
]
5497 && sh_cfun_attr_renesas_p ())
5500 SET_HARD_REG_BIT (*live_regs_mask
, reg
);
5501 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
));
5503 if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
|| TARGET_SH5
) && TARGET_FMOVD
5504 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg
)) == MODE_FLOAT
)
5506 if (FP_REGISTER_P (reg
))
5508 if (! TARGET_FPU_SINGLE
&& ! regs_ever_live
[reg
^ 1])
5510 SET_HARD_REG_BIT (*live_regs_mask
, (reg
^ 1));
5511 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
^ 1));
5514 else if (XD_REGISTER_P (reg
))
5516 /* Must switch to double mode to access these registers. */
5517 target_flags
&= ~MASK_FPU_SINGLE
;
5522 /* If we have a target register optimization pass after prologue / epilogue
5523 threading, we need to assume all target registers will be live even if
5525 if (flag_branch_target_load_optimize2
5526 && TARGET_SAVE_ALL_TARGET_REGS
5527 && shmedia_space_reserved_for_target_registers
)
5528 for (reg
= LAST_TARGET_REG
; reg
>= FIRST_TARGET_REG
; reg
--)
5529 if ((! call_really_used_regs
[reg
] || interrupt_handler
)
5530 && ! TEST_HARD_REG_BIT (*live_regs_mask
, reg
))
5532 SET_HARD_REG_BIT (*live_regs_mask
, reg
);
5533 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
));
5535 /* If this is an interrupt handler, we don't have any call-clobbered
5536 registers we can conveniently use for target register save/restore.
5537 Make sure we save at least one general purpose register when we need
5538 to save target registers. */
5539 if (interrupt_handler
5540 && hard_regs_intersect_p (live_regs_mask
,
5541 ®_class_contents
[TARGET_REGS
])
5542 && ! hard_regs_intersect_p (live_regs_mask
,
5543 ®_class_contents
[GENERAL_REGS
]))
5545 SET_HARD_REG_BIT (*live_regs_mask
, R0_REG
);
5546 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG
));
5552 /* Code to generate prologue and epilogue sequences */
5554 /* PUSHED is the number of bytes that are being pushed on the
5555 stack for register saves. Return the frame size, padded
5556 appropriately so that the stack stays properly aligned. */
5557 static HOST_WIDE_INT
5558 rounded_frame_size (int pushed
)
5560 HOST_WIDE_INT size
= get_frame_size ();
5561 HOST_WIDE_INT align
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5563 return ((size
+ pushed
+ align
- 1) & -align
) - pushed
;
5566 /* Choose a call-clobbered target-branch register that remains
5567 unchanged along the whole function. We set it up as the return
5568 value in the prologue. */
5570 sh_media_register_for_return (void)
5575 if (! current_function_is_leaf
)
5577 if (lookup_attribute ("interrupt_handler",
5578 DECL_ATTRIBUTES (current_function_decl
)))
5580 if (sh_cfun_interrupt_handler_p ())
5583 tr0_used
= flag_pic
&& regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
];
5585 for (regno
= FIRST_TARGET_REG
+ tr0_used
; regno
<= LAST_TARGET_REG
; regno
++)
5586 if (call_really_used_regs
[regno
] && ! regs_ever_live
[regno
])
5592 /* The maximum registers we need to save are:
5593 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5594 - 32 floating point registers (for each pair, we save none,
5595 one single precision value, or a double precision value).
5596 - 8 target registers
5597 - add 1 entry for a delimiter. */
5598 #define MAX_SAVED_REGS (62+32+8)
5600 typedef struct save_entry_s
5609 /* There will be a delimiter entry with VOIDmode both at the start and the
5610 end of a filled in schedule. The end delimiter has the offset of the
5611 save with the smallest (i.e. most negative) offset. */
5612 typedef struct save_schedule_s
5614 save_entry entries
[MAX_SAVED_REGS
+ 2];
5615 int temps
[MAX_TEMPS
+1];
5618 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5619 use reverse order. Returns the last entry written to (not counting
5620 the delimiter). OFFSET_BASE is a number to be added to all offset
5624 sh5_schedule_saves (HARD_REG_SET
*live_regs_mask
, save_schedule
*schedule
,
5628 save_entry
*entry
= schedule
->entries
;
5632 if (! current_function_interrupt
)
5633 for (i
= FIRST_GENERAL_REG
; tmpx
< MAX_TEMPS
&& i
<= LAST_GENERAL_REG
; i
++)
5634 if (call_really_used_regs
[i
] && ! fixed_regs
[i
] && i
!= PR_MEDIA_REG
5635 && ! FUNCTION_ARG_REGNO_P (i
)
5636 && i
!= FIRST_RET_REG
5637 && ! (cfun
->static_chain_decl
!= NULL
&& i
== STATIC_CHAIN_REGNUM
)
5638 && ! (current_function_calls_eh_return
5639 && (i
== EH_RETURN_STACKADJ_REGNO
5640 || ((unsigned) i
>= EH_RETURN_DATA_REGNO (0)
5641 && (unsigned) i
<= EH_RETURN_DATA_REGNO (3)))))
5642 schedule
->temps
[tmpx
++] = i
;
5644 entry
->mode
= VOIDmode
;
5645 entry
->offset
= offset_base
;
5647 /* We loop twice: first, we save 8-byte aligned registers in the
5648 higher addresses, that are known to be aligned. Then, we
5649 proceed to saving 32-bit registers that don't need 8-byte
5651 If this is an interrupt function, all registers that need saving
5652 need to be saved in full. moreover, we need to postpone saving
5653 target registers till we have saved some general purpose registers
5654 we can then use as scratch registers. */
5655 offset
= offset_base
;
5656 for (align
= 1; align
>= 0; align
--)
5658 for (i
= FIRST_PSEUDO_REGISTER
- 1; i
>= 0; i
--)
5659 if (TEST_HARD_REG_BIT (*live_regs_mask
, i
))
5661 enum machine_mode mode
= REGISTER_NATURAL_MODE (i
);
5664 if (current_function_interrupt
)
5666 if (TARGET_REGISTER_P (i
))
5668 if (GENERAL_REGISTER_P (i
))
5671 if (mode
== SFmode
&& (i
% 2) == 1
5672 && ! TARGET_FPU_SINGLE
&& FP_REGISTER_P (i
)
5673 && (TEST_HARD_REG_BIT (*live_regs_mask
, (i
^ 1))))
5680 /* If we're doing the aligned pass and this is not aligned,
5681 or we're doing the unaligned pass and this is aligned,
5683 if ((GET_MODE_SIZE (mode
) % (STACK_BOUNDARY
/ BITS_PER_UNIT
) == 0)
5687 if (current_function_interrupt
5688 && GENERAL_REGISTER_P (i
)
5689 && tmpx
< MAX_TEMPS
)
5690 schedule
->temps
[tmpx
++] = i
;
5692 offset
-= GET_MODE_SIZE (mode
);
5695 entry
->offset
= offset
;
5698 if (align
&& current_function_interrupt
)
5699 for (i
= LAST_TARGET_REG
; i
>= FIRST_TARGET_REG
; i
--)
5700 if (TEST_HARD_REG_BIT (*live_regs_mask
, i
))
5702 offset
-= GET_MODE_SIZE (DImode
);
5704 entry
->mode
= DImode
;
5705 entry
->offset
= offset
;
5710 entry
->mode
= VOIDmode
;
5711 entry
->offset
= offset
;
5712 schedule
->temps
[tmpx
] = -1;
5717 sh_expand_prologue (void)
5719 HARD_REG_SET live_regs_mask
;
5722 int save_flags
= target_flags
;
5725 current_function_interrupt
= sh_cfun_interrupt_handler_p ();
5727 /* We have pretend args if we had an object sent partially in registers
5728 and partially on the stack, e.g. a large structure. */
5729 pretend_args
= current_function_pretend_args_size
;
5730 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl
)
5731 && (NPARM_REGS(SImode
)
5732 > current_function_args_info
.arg_count
[(int) SH_ARG_INT
]))
5734 output_stack_adjust (-pretend_args
5735 - current_function_args_info
.stack_regs
* 8,
5736 stack_pointer_rtx
, 0, NULL
);
5738 if (TARGET_SHCOMPACT
&& flag_pic
&& current_function_args_info
.call_cookie
)
5739 /* We're going to use the PIC register to load the address of the
5740 incoming-argument decoder and/or of the return trampoline from
5741 the GOT, so make sure the PIC register is preserved and
5743 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5745 if (TARGET_SHCOMPACT
5746 && (current_function_args_info
.call_cookie
& ~ CALL_COOKIE_RET_TRAMP(1)))
5750 /* First, make all registers with incoming arguments that will
5751 be pushed onto the stack live, so that register renaming
5752 doesn't overwrite them. */
5753 for (reg
= 0; reg
< NPARM_REGS (SImode
); reg
++)
5754 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info
.call_cookie
)
5755 >= NPARM_REGS (SImode
) - reg
)
5756 for (; reg
< NPARM_REGS (SImode
); reg
++)
5757 emit_insn (gen_shcompact_preserve_incoming_args
5758 (gen_rtx_REG (SImode
, FIRST_PARM_REG
+ reg
)));
5759 else if (CALL_COOKIE_INT_REG_GET
5760 (current_function_args_info
.call_cookie
, reg
) == 1)
5761 emit_insn (gen_shcompact_preserve_incoming_args
5762 (gen_rtx_REG (SImode
, FIRST_PARM_REG
+ reg
)));
5764 emit_move_insn (gen_rtx_REG (Pmode
, MACL_REG
),
5766 emit_move_insn (gen_rtx_REG (SImode
, R0_REG
),
5767 GEN_INT (current_function_args_info
.call_cookie
));
5768 emit_move_insn (gen_rtx_REG (SImode
, MACH_REG
),
5769 gen_rtx_REG (SImode
, R0_REG
));
5771 else if (TARGET_SHMEDIA
)
5773 int tr
= sh_media_register_for_return ();
5777 rtx insn
= emit_move_insn (gen_rtx_REG (DImode
, tr
),
5778 gen_rtx_REG (DImode
, PR_MEDIA_REG
));
5780 /* ??? We should suppress saving pr when we don't need it, but this
5781 is tricky because of builtin_return_address. */
5783 /* If this function only exits with sibcalls, this copy
5784 will be flagged as dead. */
5785 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
5791 /* Emit the code for SETUP_VARARGS. */
5792 if (current_function_stdarg
)
5794 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl
))
5796 /* Push arg regs as if they'd been provided by caller in stack. */
5797 for (i
= 0; i
< NPARM_REGS(SImode
); i
++)
5799 int rn
= NPARM_REGS(SImode
) + FIRST_PARM_REG
- i
- 1;
5802 if (i
>= (NPARM_REGS(SImode
)
5803 - current_function_args_info
.arg_count
[(int) SH_ARG_INT
]
5807 RTX_FRAME_RELATED_P (insn
) = 0;
5812 /* If we're supposed to switch stacks at function entry, do so now. */
5814 emit_insn (gen_sp_switch_1 ());
5816 d
= calc_live_regs (&live_regs_mask
);
5817 /* ??? Maybe we could save some switching if we can move a mode switch
5818 that already happens to be at the function start into the prologue. */
5819 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
5820 emit_insn (gen_toggle_sz ());
5824 int offset_base
, offset
;
5826 int offset_in_r0
= -1;
5828 int tregs_space
= shmedia_target_regs_stack_adjust (&live_regs_mask
);
5829 int total_size
, save_size
;
5830 save_schedule schedule
;
5834 if (call_really_used_regs
[R0_REG
] && ! fixed_regs
[R0_REG
]
5835 && ! current_function_interrupt
)
5836 r0
= gen_rtx_REG (Pmode
, R0_REG
);
5838 /* D is the actual number of bytes that we need for saving registers,
5839 however, in initial_elimination_offset we have committed to using
5840 an additional TREGS_SPACE amount of bytes - in order to keep both
5841 addresses to arguments supplied by the caller and local variables
5842 valid, we must keep this gap. Place it between the incoming
5843 arguments and the actually saved registers in a bid to optimize
5844 locality of reference. */
5845 total_size
= d
+ tregs_space
;
5846 total_size
+= rounded_frame_size (total_size
);
5847 save_size
= total_size
- rounded_frame_size (d
);
5848 if (save_size
% (STACK_BOUNDARY
/ BITS_PER_UNIT
))
5849 d_rounding
= ((STACK_BOUNDARY
/ BITS_PER_UNIT
)
5850 - save_size
% (STACK_BOUNDARY
/ BITS_PER_UNIT
));
5852 /* If adjusting the stack in a single step costs nothing extra, do so.
5853 I.e. either if a single addi is enough, or we need a movi anyway,
5854 and we don't exceed the maximum offset range (the test for the
5855 latter is conservative for simplicity). */
5857 && (CONST_OK_FOR_I10 (-total_size
)
5858 || (! CONST_OK_FOR_I10 (-(save_size
+ d_rounding
))
5859 && total_size
<= 2044)))
5860 d_rounding
= total_size
- save_size
;
5862 offset_base
= d
+ d_rounding
;
5864 output_stack_adjust (-(save_size
+ d_rounding
), stack_pointer_rtx
,
5867 sh5_schedule_saves (&live_regs_mask
, &schedule
, offset_base
);
5868 tmp_pnt
= schedule
.temps
;
5869 for (entry
= &schedule
.entries
[1]; entry
->mode
!= VOIDmode
; entry
++)
5871 enum machine_mode mode
= entry
->mode
;
5872 unsigned int reg
= entry
->reg
;
5873 rtx reg_rtx
, mem_rtx
, pre_dec
= NULL_RTX
;
5876 offset
= entry
->offset
;
5878 reg_rtx
= gen_rtx_REG (mode
, reg
);
5880 mem_rtx
= gen_frame_mem (mode
,
5881 gen_rtx_PLUS (Pmode
,
5885 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (mem_rtx
, 0), try_pre_dec
);
5892 if (HAVE_PRE_DECREMENT
5893 && (offset_in_r0
- offset
== GET_MODE_SIZE (mode
)
5894 || mem_rtx
== NULL_RTX
5895 || reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
)))
5897 pre_dec
= gen_frame_mem (mode
, gen_rtx_PRE_DEC (Pmode
, r0
));
5899 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (pre_dec
, 0),
5908 offset
+= GET_MODE_SIZE (mode
);
5912 if (mem_rtx
!= NULL_RTX
)
5915 if (offset_in_r0
== -1)
5917 emit_move_insn (r0
, GEN_INT (offset
));
5918 offset_in_r0
= offset
;
5920 else if (offset
!= offset_in_r0
)
5925 GEN_INT (offset
- offset_in_r0
)));
5926 offset_in_r0
+= offset
- offset_in_r0
;
5929 if (pre_dec
!= NULL_RTX
)
5935 (Pmode
, r0
, stack_pointer_rtx
));
5939 offset
-= GET_MODE_SIZE (mode
);
5940 offset_in_r0
-= GET_MODE_SIZE (mode
);
5945 mem_rtx
= gen_frame_mem (mode
, r0
);
5947 mem_rtx
= gen_frame_mem (mode
,
5948 gen_rtx_PLUS (Pmode
,
5952 /* We must not use an r0-based address for target-branch
5953 registers or for special registers without pre-dec
5954 memory addresses, since we store their values in r0
5956 gcc_assert (!TARGET_REGISTER_P (reg
)
5957 && ((reg
!= PR_REG
&& !SPECIAL_REGISTER_P (reg
))
5958 || mem_rtx
== pre_dec
));
5961 orig_reg_rtx
= reg_rtx
;
5962 if (TARGET_REGISTER_P (reg
)
5963 || ((reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
))
5964 && mem_rtx
!= pre_dec
))
5966 rtx tmp_reg
= gen_rtx_REG (GET_MODE (reg_rtx
), *tmp_pnt
);
5968 emit_move_insn (tmp_reg
, reg_rtx
);
5970 if (REGNO (tmp_reg
) == R0_REG
)
5974 gcc_assert (!refers_to_regno_p
5975 (R0_REG
, R0_REG
+1, mem_rtx
, (rtx
*) 0));
5978 if (*++tmp_pnt
<= 0)
5979 tmp_pnt
= schedule
.temps
;
5986 /* Mark as interesting for dwarf cfi generator */
5987 insn
= emit_move_insn (mem_rtx
, reg_rtx
);
5988 RTX_FRAME_RELATED_P (insn
) = 1;
5989 /* If we use an intermediate register for the save, we can't
5990 describe this exactly in cfi as a copy of the to-be-saved
5991 register into the temporary register and then the temporary
5992 register on the stack, because the temporary register can
5993 have a different natural size than the to-be-saved register.
5994 Thus, we gloss over the intermediate copy and pretend we do
5995 a direct save from the to-be-saved register. */
5996 if (REGNO (reg_rtx
) != reg
)
6000 set
= gen_rtx_SET (VOIDmode
, mem_rtx
, orig_reg_rtx
);
6001 note_rtx
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, set
,
6003 REG_NOTES (insn
) = note_rtx
;
6006 if (TARGET_SHCOMPACT
&& (offset_in_r0
!= -1))
6008 rtx reg_rtx
= gen_rtx_REG (mode
, reg
);
6010 rtx mem_rtx
= gen_frame_mem (mode
,
6011 gen_rtx_PLUS (Pmode
,
6015 set
= gen_rtx_SET (VOIDmode
, mem_rtx
, reg_rtx
);
6016 note_rtx
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, set
,
6018 REG_NOTES (insn
) = note_rtx
;
6023 gcc_assert (entry
->offset
== d_rounding
);
6026 push_regs (&live_regs_mask
, current_function_interrupt
);
6028 if (flag_pic
&& regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
])
6030 rtx insn
= get_last_insn ();
6031 rtx last
= emit_insn (gen_GOTaddr2picreg ());
6033 /* Mark these insns as possibly dead. Sometimes, flow2 may
6034 delete all uses of the PIC register. In this case, let it
6035 delete the initialization too. */
6038 insn
= NEXT_INSN (insn
);
6040 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
6044 while (insn
!= last
);
6047 if (SHMEDIA_REGS_STACK_ADJUST ())
6049 /* This must NOT go through the PLT, otherwise mach and macl
6050 may be clobbered. */
6051 function_symbol (gen_rtx_REG (Pmode
, R0_REG
),
6053 ? "__GCC_push_shmedia_regs"
6054 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT
);
6055 emit_insn (gen_shmedia_save_restore_regs_compact
6056 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6059 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
6061 rtx insn
= emit_insn (gen_toggle_sz ());
6063 /* If we're lucky, a mode switch in the function body will
6064 overwrite fpscr, turning this insn dead. Tell flow this
6065 insn is ok to delete. */
6066 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
6071 target_flags
= save_flags
;
6073 output_stack_adjust (-rounded_frame_size (d
) + d_rounding
,
6074 stack_pointer_rtx
, 0, NULL
);
6076 if (frame_pointer_needed
)
6077 frame_insn (GEN_MOV (hard_frame_pointer_rtx
, stack_pointer_rtx
));
6079 if (TARGET_SHCOMPACT
6080 && (current_function_args_info
.call_cookie
& ~ CALL_COOKIE_RET_TRAMP(1)))
6082 /* This must NOT go through the PLT, otherwise mach and macl
6083 may be clobbered. */
6084 function_symbol (gen_rtx_REG (Pmode
, R0_REG
),
6085 "__GCC_shcompact_incoming_args", SFUNC_GOT
);
6086 emit_insn (gen_shcompact_incoming_args ());
6091 sh_expand_epilogue (bool sibcall_p
)
6093 HARD_REG_SET live_regs_mask
;
6097 int save_flags
= target_flags
;
6098 int frame_size
, save_size
;
6099 int fpscr_deferred
= 0;
6100 int e
= sibcall_p
? -1 : 1;
6102 d
= calc_live_regs (&live_regs_mask
);
6105 frame_size
= rounded_frame_size (d
);
6109 int tregs_space
= shmedia_target_regs_stack_adjust (&live_regs_mask
);
6111 if (d
% (STACK_BOUNDARY
/ BITS_PER_UNIT
))
6112 d_rounding
= ((STACK_BOUNDARY
/ BITS_PER_UNIT
)
6113 - d
% (STACK_BOUNDARY
/ BITS_PER_UNIT
));
6115 total_size
= d
+ tregs_space
;
6116 total_size
+= rounded_frame_size (total_size
);
6117 save_size
= total_size
- frame_size
;
6119 /* If adjusting the stack in a single step costs nothing extra, do so.
6120 I.e. either if a single addi is enough, or we need a movi anyway,
6121 and we don't exceed the maximum offset range (the test for the
6122 latter is conservative for simplicity). */
6124 && ! frame_pointer_needed
6125 && (CONST_OK_FOR_I10 (total_size
)
6126 || (! CONST_OK_FOR_I10 (save_size
+ d_rounding
)
6127 && total_size
<= 2044)))
6128 d_rounding
= frame_size
;
6130 frame_size
-= d_rounding
;
6133 if (frame_pointer_needed
)
6135 /* We must avoid scheduling the epilogue with previous basic blocks
6136 when exception handling is enabled. See PR/18032. */
6137 if (flag_exceptions
)
6138 emit_insn (gen_blockage ());
6139 output_stack_adjust (frame_size
, hard_frame_pointer_rtx
, e
,
6142 /* We must avoid moving the stack pointer adjustment past code
6143 which reads from the local frame, else an interrupt could
6144 occur after the SP adjustment and clobber data in the local
6146 emit_insn (gen_blockage ());
6147 emit_insn (GEN_MOV (stack_pointer_rtx
, hard_frame_pointer_rtx
));
6149 else if (frame_size
)
6151 /* We must avoid moving the stack pointer adjustment past code
6152 which reads from the local frame, else an interrupt could
6153 occur after the SP adjustment and clobber data in the local
6155 emit_insn (gen_blockage ());
6156 output_stack_adjust (frame_size
, stack_pointer_rtx
, e
, &live_regs_mask
);
6159 if (SHMEDIA_REGS_STACK_ADJUST ())
6161 function_symbol (gen_rtx_REG (Pmode
, R0_REG
),
6163 ? "__GCC_pop_shmedia_regs"
6164 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT
);
6165 /* This must NOT go through the PLT, otherwise mach and macl
6166 may be clobbered. */
6167 emit_insn (gen_shmedia_save_restore_regs_compact
6168 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6171 /* Pop all the registers. */
6173 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
6174 emit_insn (gen_toggle_sz ());
6177 int offset_base
, offset
;
6178 int offset_in_r0
= -1;
6180 rtx r0
= gen_rtx_REG (Pmode
, R0_REG
);
6181 save_schedule schedule
;
6185 entry
= sh5_schedule_saves (&live_regs_mask
, &schedule
, d_rounding
);
6186 offset_base
= -entry
[1].offset
+ d_rounding
;
6187 tmp_pnt
= schedule
.temps
;
6188 for (; entry
->mode
!= VOIDmode
; entry
--)
6190 enum machine_mode mode
= entry
->mode
;
6191 int reg
= entry
->reg
;
6192 rtx reg_rtx
, mem_rtx
, post_inc
= NULL_RTX
, insn
;
6194 offset
= offset_base
+ entry
->offset
;
6195 reg_rtx
= gen_rtx_REG (mode
, reg
);
6197 mem_rtx
= gen_frame_mem (mode
,
6198 gen_rtx_PLUS (Pmode
,
6202 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (mem_rtx
, 0), try_post_inc
);
6208 if (HAVE_POST_INCREMENT
6209 && (offset
== offset_in_r0
6210 || (offset
+ GET_MODE_SIZE (mode
) != d
+ d_rounding
6211 && mem_rtx
== NULL_RTX
)
6212 || reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
)))
6214 post_inc
= gen_frame_mem (mode
, gen_rtx_POST_INC (Pmode
, r0
));
6216 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (post_inc
, 0),
6219 post_inc
= NULL_RTX
;
6228 if (mem_rtx
!= NULL_RTX
)
6231 if (offset_in_r0
== -1)
6233 emit_move_insn (r0
, GEN_INT (offset
));
6234 offset_in_r0
= offset
;
6236 else if (offset
!= offset_in_r0
)
6241 GEN_INT (offset
- offset_in_r0
)));
6242 offset_in_r0
+= offset
- offset_in_r0
;
6245 if (post_inc
!= NULL_RTX
)
6251 (Pmode
, r0
, stack_pointer_rtx
));
6257 offset_in_r0
+= GET_MODE_SIZE (mode
);
6260 mem_rtx
= gen_frame_mem (mode
, r0
);
6262 mem_rtx
= gen_frame_mem (mode
,
6263 gen_rtx_PLUS (Pmode
,
6267 gcc_assert ((reg
!= PR_REG
&& !SPECIAL_REGISTER_P (reg
))
6268 || mem_rtx
== post_inc
);
6271 if ((reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
))
6272 && mem_rtx
!= post_inc
)
6274 insn
= emit_move_insn (r0
, mem_rtx
);
6277 else if (TARGET_REGISTER_P (reg
))
6279 rtx tmp_reg
= gen_rtx_REG (mode
, *tmp_pnt
);
6281 /* Give the scheduler a bit of freedom by using up to
6282 MAX_TEMPS registers in a round-robin fashion. */
6283 insn
= emit_move_insn (tmp_reg
, mem_rtx
);
6286 tmp_pnt
= schedule
.temps
;
6289 insn
= emit_move_insn (reg_rtx
, mem_rtx
);
6290 if (reg
== PR_MEDIA_REG
&& sh_media_register_for_return () >= 0)
6291 /* This is dead, unless we return with a sibcall. */
6292 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
6297 gcc_assert (entry
->offset
+ offset_base
== d
+ d_rounding
);
6299 else /* ! TARGET_SH5 */
6302 if (TEST_HARD_REG_BIT (live_regs_mask
, PR_REG
))
6304 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
6306 int j
= (FIRST_PSEUDO_REGISTER
- 1) - i
;
6308 if (j
== FPSCR_REG
&& current_function_interrupt
&& TARGET_FMOVD
6309 && hard_regs_intersect_p (&live_regs_mask
,
6310 ®_class_contents
[DF_REGS
]))
6312 else if (j
!= PR_REG
&& TEST_HARD_REG_BIT (live_regs_mask
, j
))
6314 if (j
== FIRST_FP_REG
&& fpscr_deferred
)
6319 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
6320 emit_insn (gen_toggle_sz ());
6321 target_flags
= save_flags
;
6323 output_stack_adjust (current_function_pretend_args_size
6324 + save_size
+ d_rounding
6325 + current_function_args_info
.stack_regs
* 8,
6326 stack_pointer_rtx
, e
, NULL
);
6328 if (current_function_calls_eh_return
)
6329 emit_insn (GEN_ADD3 (stack_pointer_rtx
, stack_pointer_rtx
,
6330 EH_RETURN_STACKADJ_RTX
));
6332 /* Switch back to the normal stack if necessary. */
6334 emit_insn (gen_sp_switch_2 ());
6336 /* Tell flow the insn that pops PR isn't dead. */
6337 /* PR_REG will never be live in SHmedia mode, and we don't need to
6338 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6339 by the return pattern. */
6340 if (TEST_HARD_REG_BIT (live_regs_mask
, PR_REG
))
6341 emit_insn (gen_rtx_USE (VOIDmode
, gen_rtx_REG (SImode
, PR_REG
)));
6344 static int sh_need_epilogue_known
= 0;
6347 sh_need_epilogue (void)
6349 if (! sh_need_epilogue_known
)
6354 sh_expand_epilogue (0);
6355 epilogue
= get_insns ();
6357 sh_need_epilogue_known
= (epilogue
== NULL
? -1 : 1);
6359 return sh_need_epilogue_known
> 0;
6362 /* Emit code to change the current function's return address to RA.
6363 TEMP is available as a scratch register, if needed. */
6366 sh_set_return_address (rtx ra
, rtx tmp
)
6368 HARD_REG_SET live_regs_mask
;
6370 int pr_reg
= TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
;
6373 d
= calc_live_regs (&live_regs_mask
);
6375 /* If pr_reg isn't life, we can set it (or the register given in
6376 sh_media_register_for_return) directly. */
6377 if (! TEST_HARD_REG_BIT (live_regs_mask
, pr_reg
))
6383 int rr_regno
= sh_media_register_for_return ();
6388 rr
= gen_rtx_REG (DImode
, rr_regno
);
6391 rr
= gen_rtx_REG (SImode
, pr_reg
);
6393 emit_insn (GEN_MOV (rr
, ra
));
6394 /* Tell flow the register for return isn't dead. */
6395 emit_insn (gen_rtx_USE (VOIDmode
, rr
));
6402 save_schedule schedule
;
6405 entry
= sh5_schedule_saves (&live_regs_mask
, &schedule
, 0);
6406 offset
= entry
[1].offset
;
6407 for (; entry
->mode
!= VOIDmode
; entry
--)
6408 if (entry
->reg
== pr_reg
)
6411 /* We can't find pr register. */
6415 offset
= entry
->offset
- offset
;
6416 pr_offset
= (rounded_frame_size (d
) + offset
6417 + SHMEDIA_REGS_STACK_ADJUST ());
6420 pr_offset
= rounded_frame_size (d
);
6422 emit_insn (GEN_MOV (tmp
, GEN_INT (pr_offset
)));
6423 emit_insn (GEN_ADD3 (tmp
, tmp
, hard_frame_pointer_rtx
));
6425 tmp
= gen_frame_mem (Pmode
, tmp
);
6426 emit_insn (GEN_MOV (tmp
, ra
));
6429 /* Clear variables at function end. */
6432 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6433 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6435 trap_exit
= pragma_interrupt
= pragma_trapa
= pragma_nosave_low_regs
= 0;
6436 sh_need_epilogue_known
= 0;
6437 sp_switch
= NULL_RTX
;
6441 sh_builtin_saveregs (void)
6443 /* First unnamed integer register. */
6444 int first_intreg
= current_function_args_info
.arg_count
[(int) SH_ARG_INT
];
6445 /* Number of integer registers we need to save. */
6446 int n_intregs
= MAX (0, NPARM_REGS (SImode
) - first_intreg
);
6447 /* First unnamed SFmode float reg */
6448 int first_floatreg
= current_function_args_info
.arg_count
[(int) SH_ARG_FLOAT
];
6449 /* Number of SFmode float regs to save. */
6450 int n_floatregs
= MAX (0, NPARM_REGS (SFmode
) - first_floatreg
);
6453 HOST_WIDE_INT alias_set
;
6459 int pushregs
= n_intregs
;
6461 while (pushregs
< NPARM_REGS (SImode
) - 1
6462 && (CALL_COOKIE_INT_REG_GET
6463 (current_function_args_info
.call_cookie
,
6464 NPARM_REGS (SImode
) - pushregs
)
6467 current_function_args_info
.call_cookie
6468 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode
)
6473 if (pushregs
== NPARM_REGS (SImode
))
6474 current_function_args_info
.call_cookie
6475 |= (CALL_COOKIE_INT_REG (0, 1)
6476 | CALL_COOKIE_STACKSEQ (pushregs
- 1));
6478 current_function_args_info
.call_cookie
6479 |= CALL_COOKIE_STACKSEQ (pushregs
);
6481 current_function_pretend_args_size
+= 8 * n_intregs
;
6483 if (TARGET_SHCOMPACT
)
6487 if (! TARGET_SH2E
&& ! TARGET_SH4
&& ! TARGET_SH5
)
6489 error ("__builtin_saveregs not supported by this subtarget");
6496 /* Allocate block of memory for the regs. */
6497 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6498 Or can assign_stack_local accept a 0 SIZE argument? */
6499 bufsize
= (n_intregs
* UNITS_PER_WORD
) + (n_floatregs
* UNITS_PER_WORD
);
6502 regbuf
= gen_frame_mem (BLKmode
, gen_rtx_REG (Pmode
, ARG_POINTER_REGNUM
));
6503 else if (n_floatregs
& 1)
6507 regbuf
= assign_stack_local (BLKmode
, bufsize
+ UNITS_PER_WORD
, 0);
6508 addr
= copy_to_mode_reg (Pmode
, XEXP (regbuf
, 0));
6509 emit_insn (gen_iorsi3 (addr
, addr
, GEN_INT (UNITS_PER_WORD
)));
6510 regbuf
= change_address (regbuf
, BLKmode
, addr
);
6512 else if (STACK_BOUNDARY
< 64 && TARGET_FPU_DOUBLE
&& n_floatregs
)
6516 regbuf
= assign_stack_local (BLKmode
, bufsize
+ UNITS_PER_WORD
, 0);
6517 addr
= copy_to_mode_reg (Pmode
, plus_constant (XEXP (regbuf
, 0), 4));
6518 mask
= copy_to_mode_reg (Pmode
, GEN_INT (-8));
6519 emit_insn (gen_andsi3 (addr
, addr
, mask
));
6520 regbuf
= change_address (regbuf
, BLKmode
, addr
);
6523 regbuf
= assign_stack_local (BLKmode
, bufsize
, TARGET_FPU_DOUBLE
? 64 : 0);
6524 alias_set
= get_varargs_alias_set ();
6525 set_mem_alias_set (regbuf
, alias_set
);
6528 This is optimized to only save the regs that are necessary. Explicitly
6529 named args need not be saved. */
6531 move_block_from_reg (BASE_ARG_REG (SImode
) + first_intreg
,
6532 adjust_address (regbuf
, BLKmode
,
6533 n_floatregs
* UNITS_PER_WORD
),
6537 /* Return the address of the regbuf. */
6538 return XEXP (regbuf
, 0);
6541 This is optimized to only save the regs that are necessary. Explicitly
6542 named args need not be saved.
6543 We explicitly build a pointer to the buffer because it halves the insn
6544 count when not optimizing (otherwise the pointer is built for each reg
6546 We emit the moves in reverse order so that we can use predecrement. */
6548 fpregs
= copy_to_mode_reg (Pmode
,
6549 plus_constant (XEXP (regbuf
, 0),
6550 n_floatregs
* UNITS_PER_WORD
));
6551 if (TARGET_SH4
|| TARGET_SH2A_DOUBLE
)
6554 for (regno
= NPARM_REGS (DFmode
) - 2; regno
>= first_floatreg
; regno
-= 2)
6556 emit_insn (gen_addsi3 (fpregs
, fpregs
,
6557 GEN_INT (-2 * UNITS_PER_WORD
)));
6558 mem
= change_address (regbuf
, DFmode
, fpregs
);
6559 emit_move_insn (mem
,
6560 gen_rtx_REG (DFmode
, BASE_ARG_REG (DFmode
) + regno
));
6562 regno
= first_floatreg
;
6565 emit_insn (gen_addsi3 (fpregs
, fpregs
, GEN_INT (-UNITS_PER_WORD
)));
6566 mem
= change_address (regbuf
, SFmode
, fpregs
);
6567 emit_move_insn (mem
,
6568 gen_rtx_REG (SFmode
, BASE_ARG_REG (SFmode
) + regno
6569 - (TARGET_LITTLE_ENDIAN
!= 0)));
6573 for (regno
= NPARM_REGS (SFmode
) - 1; regno
>= first_floatreg
; regno
--)
6577 emit_insn (gen_addsi3 (fpregs
, fpregs
, GEN_INT (-UNITS_PER_WORD
)));
6578 mem
= change_address (regbuf
, SFmode
, fpregs
);
6579 emit_move_insn (mem
,
6580 gen_rtx_REG (SFmode
, BASE_ARG_REG (SFmode
) + regno
));
6583 /* Return the address of the regbuf. */
6584 return XEXP (regbuf
, 0);
6587 /* Define the `__builtin_va_list' type for the ABI. */
6590 sh_build_builtin_va_list (void)
6592 tree f_next_o
, f_next_o_limit
, f_next_fp
, f_next_fp_limit
, f_next_stack
;
6595 if (TARGET_SH5
|| (! TARGET_SH2E
&& ! TARGET_SH4
)
6596 || TARGET_HITACHI
|| sh_cfun_attr_renesas_p ())
6597 return ptr_type_node
;
6599 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
6601 f_next_o
= build_decl (FIELD_DECL
, get_identifier ("__va_next_o"),
6603 f_next_o_limit
= build_decl (FIELD_DECL
,
6604 get_identifier ("__va_next_o_limit"),
6606 f_next_fp
= build_decl (FIELD_DECL
, get_identifier ("__va_next_fp"),
6608 f_next_fp_limit
= build_decl (FIELD_DECL
,
6609 get_identifier ("__va_next_fp_limit"),
6611 f_next_stack
= build_decl (FIELD_DECL
, get_identifier ("__va_next_stack"),
6614 DECL_FIELD_CONTEXT (f_next_o
) = record
;
6615 DECL_FIELD_CONTEXT (f_next_o_limit
) = record
;
6616 DECL_FIELD_CONTEXT (f_next_fp
) = record
;
6617 DECL_FIELD_CONTEXT (f_next_fp_limit
) = record
;
6618 DECL_FIELD_CONTEXT (f_next_stack
) = record
;
6620 TYPE_FIELDS (record
) = f_next_o
;
6621 TREE_CHAIN (f_next_o
) = f_next_o_limit
;
6622 TREE_CHAIN (f_next_o_limit
) = f_next_fp
;
6623 TREE_CHAIN (f_next_fp
) = f_next_fp_limit
;
6624 TREE_CHAIN (f_next_fp_limit
) = f_next_stack
;
6626 layout_type (record
);
6631 /* Implement `va_start' for varargs and stdarg. */
6634 sh_va_start (tree valist
, rtx nextarg
)
6636 tree f_next_o
, f_next_o_limit
, f_next_fp
, f_next_fp_limit
, f_next_stack
;
6637 tree next_o
, next_o_limit
, next_fp
, next_fp_limit
, next_stack
;
6643 expand_builtin_saveregs ();
6644 std_expand_builtin_va_start (valist
, nextarg
);
6648 if ((! TARGET_SH2E
&& ! TARGET_SH4
)
6649 || TARGET_HITACHI
|| sh_cfun_attr_renesas_p ())
6651 std_expand_builtin_va_start (valist
, nextarg
);
6655 f_next_o
= TYPE_FIELDS (va_list_type_node
);
6656 f_next_o_limit
= TREE_CHAIN (f_next_o
);
6657 f_next_fp
= TREE_CHAIN (f_next_o_limit
);
6658 f_next_fp_limit
= TREE_CHAIN (f_next_fp
);
6659 f_next_stack
= TREE_CHAIN (f_next_fp_limit
);
6661 next_o
= build (COMPONENT_REF
, TREE_TYPE (f_next_o
), valist
, f_next_o
,
6663 next_o_limit
= build (COMPONENT_REF
, TREE_TYPE (f_next_o_limit
),
6664 valist
, f_next_o_limit
, NULL_TREE
);
6665 next_fp
= build (COMPONENT_REF
, TREE_TYPE (f_next_fp
), valist
, f_next_fp
,
6667 next_fp_limit
= build (COMPONENT_REF
, TREE_TYPE (f_next_fp_limit
),
6668 valist
, f_next_fp_limit
, NULL_TREE
);
6669 next_stack
= build (COMPONENT_REF
, TREE_TYPE (f_next_stack
),
6670 valist
, f_next_stack
, NULL_TREE
);
6672 /* Call __builtin_saveregs. */
6673 u
= make_tree (ptr_type_node
, expand_builtin_saveregs ());
6674 t
= build (MODIFY_EXPR
, ptr_type_node
, next_fp
, u
);
6675 TREE_SIDE_EFFECTS (t
) = 1;
6676 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6678 nfp
= current_function_args_info
.arg_count
[SH_ARG_FLOAT
];
6683 u
= fold (build (PLUS_EXPR
, ptr_type_node
, u
,
6684 build_int_cst (NULL_TREE
, UNITS_PER_WORD
* nfp
)));
6685 t
= build (MODIFY_EXPR
, ptr_type_node
, next_fp_limit
, u
);
6686 TREE_SIDE_EFFECTS (t
) = 1;
6687 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6689 t
= build (MODIFY_EXPR
, ptr_type_node
, next_o
, u
);
6690 TREE_SIDE_EFFECTS (t
) = 1;
6691 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6693 nint
= current_function_args_info
.arg_count
[SH_ARG_INT
];
6698 u
= fold (build (PLUS_EXPR
, ptr_type_node
, u
,
6699 build_int_cst (NULL_TREE
, UNITS_PER_WORD
* nint
)));
6700 t
= build (MODIFY_EXPR
, ptr_type_node
, next_o_limit
, u
);
6701 TREE_SIDE_EFFECTS (t
) = 1;
6702 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6704 u
= make_tree (ptr_type_node
, nextarg
);
6705 t
= build (MODIFY_EXPR
, ptr_type_node
, next_stack
, u
);
6706 TREE_SIDE_EFFECTS (t
) = 1;
6707 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6710 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6711 member, return it. */
6713 find_sole_member (tree type
)
6715 tree field
, member
= NULL_TREE
;
6717 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6719 if (TREE_CODE (field
) != FIELD_DECL
)
6721 if (!DECL_SIZE (field
))
6723 if (integer_zerop (DECL_SIZE (field
)))
6731 /* Implement `va_arg'. */
6734 sh_gimplify_va_arg_expr (tree valist
, tree type
, tree
*pre_p
,
6735 tree
*post_p ATTRIBUTE_UNUSED
)
6737 HOST_WIDE_INT size
, rsize
;
6738 tree tmp
, pptr_type_node
;
6739 tree addr
, lab_over
= NULL
, result
= NULL
;
6740 int pass_by_ref
= targetm
.calls
.must_pass_in_stack (TYPE_MODE (type
), type
);
6743 type
= build_pointer_type (type
);
6745 size
= int_size_in_bytes (type
);
6746 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
6747 pptr_type_node
= build_pointer_type (ptr_type_node
);
6749 if (! TARGET_SH5
&& (TARGET_SH2E
|| TARGET_SH4
)
6750 && ! (TARGET_HITACHI
|| sh_cfun_attr_renesas_p ()))
6752 tree f_next_o
, f_next_o_limit
, f_next_fp
, f_next_fp_limit
, f_next_stack
;
6753 tree next_o
, next_o_limit
, next_fp
, next_fp_limit
, next_stack
;
6758 f_next_o
= TYPE_FIELDS (va_list_type_node
);
6759 f_next_o_limit
= TREE_CHAIN (f_next_o
);
6760 f_next_fp
= TREE_CHAIN (f_next_o_limit
);
6761 f_next_fp_limit
= TREE_CHAIN (f_next_fp
);
6762 f_next_stack
= TREE_CHAIN (f_next_fp_limit
);
6764 next_o
= build (COMPONENT_REF
, TREE_TYPE (f_next_o
), valist
, f_next_o
,
6766 next_o_limit
= build (COMPONENT_REF
, TREE_TYPE (f_next_o_limit
),
6767 valist
, f_next_o_limit
, NULL_TREE
);
6768 next_fp
= build (COMPONENT_REF
, TREE_TYPE (f_next_fp
),
6769 valist
, f_next_fp
, NULL_TREE
);
6770 next_fp_limit
= build (COMPONENT_REF
, TREE_TYPE (f_next_fp_limit
),
6771 valist
, f_next_fp_limit
, NULL_TREE
);
6772 next_stack
= build (COMPONENT_REF
, TREE_TYPE (f_next_stack
),
6773 valist
, f_next_stack
, NULL_TREE
);
6775 /* Structures with a single member with a distinct mode are passed
6776 like their member. This is relevant if the latter has a REAL_TYPE
6777 or COMPLEX_TYPE type. */
6778 while (TREE_CODE (type
) == RECORD_TYPE
6779 && (member
= find_sole_member (type
))
6780 && (TREE_CODE (TREE_TYPE (member
)) == REAL_TYPE
6781 || TREE_CODE (TREE_TYPE (member
)) == COMPLEX_TYPE
6782 || TREE_CODE (TREE_TYPE (member
)) == RECORD_TYPE
))
6784 tree field_type
= TREE_TYPE (member
);
6786 if (TYPE_MODE (type
) == TYPE_MODE (field_type
))
6790 gcc_assert ((TYPE_ALIGN (type
)
6791 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type
)))
6792 || (TYPE_ALIGN (type
)
6793 > GET_MODE_BITSIZE (TYPE_MODE (field_type
))));
6800 pass_as_float
= ((TREE_CODE (type
) == REAL_TYPE
&& size
<= 8)
6801 || (TREE_CODE (type
) == COMPLEX_TYPE
6802 && TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
6807 pass_as_float
= (TREE_CODE (type
) == REAL_TYPE
&& size
== 4);
6810 addr
= create_tmp_var (pptr_type_node
, NULL
);
6811 lab_false
= create_artificial_label ();
6812 lab_over
= create_artificial_label ();
6814 valist
= build1 (INDIRECT_REF
, ptr_type_node
, addr
);
6818 tree next_fp_tmp
= create_tmp_var (TREE_TYPE (f_next_fp
), NULL
);
6820 bool is_double
= size
== 8 && TREE_CODE (type
) == REAL_TYPE
;
6822 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_fp
);
6823 tmp
= build2 (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6824 gimplify_and_add (tmp
, pre_p
);
6826 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, next_fp_tmp
, valist
);
6827 gimplify_and_add (tmp
, pre_p
);
6828 tmp
= next_fp_limit
;
6829 if (size
> 4 && !is_double
)
6830 tmp
= build2 (PLUS_EXPR
, TREE_TYPE (tmp
), tmp
,
6831 fold_convert (TREE_TYPE (tmp
), size_int (4 - size
)));
6832 tmp
= build (GE_EXPR
, boolean_type_node
, next_fp_tmp
, tmp
);
6833 cmp
= build (COND_EXPR
, void_type_node
, tmp
,
6834 build (GOTO_EXPR
, void_type_node
, lab_false
),
6837 gimplify_and_add (cmp
, pre_p
);
6839 if (TYPE_ALIGN (type
) > BITS_PER_WORD
|| (is_double
|| size
== 16))
6841 tmp
= fold_convert (ptr_type_node
, size_int (UNITS_PER_WORD
));
6842 tmp
= build (BIT_AND_EXPR
, ptr_type_node
, next_fp_tmp
, tmp
);
6843 tmp
= build (PLUS_EXPR
, ptr_type_node
, next_fp_tmp
, tmp
);
6844 tmp
= build (MODIFY_EXPR
, ptr_type_node
, next_fp_tmp
, tmp
);
6845 gimplify_and_add (tmp
, pre_p
);
6848 gimplify_and_add (cmp
, pre_p
);
6850 #ifdef FUNCTION_ARG_SCmode_WART
6851 if (TYPE_MODE (type
) == SCmode
&& TARGET_SH4
&& TARGET_LITTLE_ENDIAN
)
6853 tree subtype
= TREE_TYPE (type
);
6857 = std_gimplify_va_arg_expr (next_fp_tmp
, subtype
, pre_p
, NULL
);
6858 imag
= get_initialized_tmp_var (imag
, pre_p
, NULL
);
6861 = std_gimplify_va_arg_expr (next_fp_tmp
, subtype
, pre_p
, NULL
);
6862 real
= get_initialized_tmp_var (real
, pre_p
, NULL
);
6864 result
= build (COMPLEX_EXPR
, type
, real
, imag
);
6865 result
= get_initialized_tmp_var (result
, pre_p
, NULL
);
6867 #endif /* FUNCTION_ARG_SCmode_WART */
6869 tmp
= build (GOTO_EXPR
, void_type_node
, lab_over
);
6870 gimplify_and_add (tmp
, pre_p
);
6872 tmp
= build (LABEL_EXPR
, void_type_node
, lab_false
);
6873 gimplify_and_add (tmp
, pre_p
);
6875 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_stack
);
6876 tmp
= build (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6877 gimplify_and_add (tmp
, pre_p
);
6878 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, next_fp_tmp
, valist
);
6879 gimplify_and_add (tmp
, pre_p
);
6881 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, valist
, next_fp_tmp
);
6882 gimplify_and_add (tmp
, post_p
);
6883 valist
= next_fp_tmp
;
6887 tmp
= fold_convert (ptr_type_node
, size_int (rsize
));
6888 tmp
= build (PLUS_EXPR
, ptr_type_node
, next_o
, tmp
);
6889 tmp
= build (GT_EXPR
, boolean_type_node
, tmp
, next_o_limit
);
6890 tmp
= build (COND_EXPR
, void_type_node
, tmp
,
6891 build (GOTO_EXPR
, void_type_node
, lab_false
),
6893 gimplify_and_add (tmp
, pre_p
);
6895 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_o
);
6896 tmp
= build (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6897 gimplify_and_add (tmp
, pre_p
);
6899 tmp
= build (GOTO_EXPR
, void_type_node
, lab_over
);
6900 gimplify_and_add (tmp
, pre_p
);
6902 tmp
= build (LABEL_EXPR
, void_type_node
, lab_false
);
6903 gimplify_and_add (tmp
, pre_p
);
6905 if (size
> 4 && ! TARGET_SH4
)
6907 tmp
= build (MODIFY_EXPR
, ptr_type_node
, next_o
, next_o_limit
);
6908 gimplify_and_add (tmp
, pre_p
);
6911 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_stack
);
6912 tmp
= build (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6913 gimplify_and_add (tmp
, pre_p
);
6918 tmp
= build (LABEL_EXPR
, void_type_node
, lab_over
);
6919 gimplify_and_add (tmp
, pre_p
);
6923 /* ??? In va-sh.h, there had been code to make values larger than
6924 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6926 tmp
= std_gimplify_va_arg_expr (valist
, type
, pre_p
, NULL
);
6929 tmp
= build (MODIFY_EXPR
, void_type_node
, result
, tmp
);
6930 gimplify_and_add (tmp
, pre_p
);
6932 tmp
= build (LABEL_EXPR
, void_type_node
, lab_over
);
6933 gimplify_and_add (tmp
, pre_p
);
6939 result
= build_va_arg_indirect_ref (result
);
6945 sh_promote_prototypes (tree type
)
6951 return ! sh_attr_renesas_p (type
);
6954 /* Whether an argument must be passed by reference. On SHcompact, we
6955 pretend arguments wider than 32-bits that would have been passed in
6956 registers are passed by reference, so that an SHmedia trampoline
6957 loads them into the full 64-bits registers. */
6960 shcompact_byref (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6961 tree type
, bool named
)
6963 unsigned HOST_WIDE_INT size
;
6966 size
= int_size_in_bytes (type
);
6968 size
= GET_MODE_SIZE (mode
);
6970 if (cum
->arg_count
[SH_ARG_INT
] < NPARM_REGS (SImode
)
6972 || GET_SH_ARG_CLASS (mode
) == SH_ARG_INT
6973 || (GET_SH_ARG_CLASS (mode
) == SH_ARG_FLOAT
6974 && cum
->arg_count
[SH_ARG_FLOAT
] >= NPARM_REGS (SFmode
)))
6976 && !SHCOMPACT_FORCE_ON_STACK (mode
, type
)
6977 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum
, mode
, type
, named
))
6984 sh_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6985 tree type
, bool named
)
6987 if (targetm
.calls
.must_pass_in_stack (mode
, type
))
6990 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6991 wants to know about pass-by-reference semantics for incoming
6996 if (TARGET_SHCOMPACT
)
6998 cum
->byref
= shcompact_byref (cum
, mode
, type
, named
);
6999 return cum
->byref
!= 0;
7006 sh_callee_copies (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7007 tree type
, bool named ATTRIBUTE_UNUSED
)
7009 /* ??? How can it possibly be correct to return true only on the
7010 caller side of the equation? Is there someplace else in the
7011 sh backend that's magically producing the copies? */
7012 return (cum
->outgoing
7013 && ((mode
== BLKmode
? TYPE_ALIGN (type
) : GET_MODE_ALIGNMENT (mode
))
7014 % SH_MIN_ALIGN_FOR_CALLEE_COPY
== 0));
7018 sh_arg_partial_bytes (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7019 tree type
, bool named ATTRIBUTE_UNUSED
)
7024 && PASS_IN_REG_P (*cum
, mode
, type
)
7025 && !(TARGET_SH4
|| TARGET_SH2A_DOUBLE
)
7026 && (ROUND_REG (*cum
, mode
)
7028 ? ROUND_ADVANCE (GET_MODE_SIZE (mode
))
7029 : ROUND_ADVANCE (int_size_in_bytes (type
)))
7030 > NPARM_REGS (mode
)))
7031 words
= NPARM_REGS (mode
) - ROUND_REG (*cum
, mode
);
7033 else if (!TARGET_SHCOMPACT
7034 && SH5_WOULD_BE_PARTIAL_NREGS (*cum
, mode
, type
, named
))
7035 words
= NPARM_REGS (SImode
) - cum
->arg_count
[SH_ARG_INT
];
7037 return words
* UNITS_PER_WORD
;
7041 /* Define where to put the arguments to a function.
7042 Value is zero to push the argument on the stack,
7043 or a hard register in which to store the argument.
7045 MODE is the argument's machine mode.
7046 TYPE is the data type of the argument (as a tree).
7047 This is null for libcalls where that information may
7049 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7050 the preceding args and about the function being called.
7051 NAMED is nonzero if this argument is a named parameter
7052 (otherwise it is an extra parameter matching an ellipsis).
7054 On SH the first args are normally in registers
7055 and the rest are pushed. Any arg that starts within the first
7056 NPARM_REGS words is at least partially passed in a register unless
7057 its data type forbids. */
7061 sh_function_arg (CUMULATIVE_ARGS
*ca
, enum machine_mode mode
,
7062 tree type
, int named
)
7064 if (! TARGET_SH5
&& mode
== VOIDmode
)
7065 return GEN_INT (ca
->renesas_abi
? 1 : 0);
7068 && PASS_IN_REG_P (*ca
, mode
, type
)
7069 && (named
|| ! (TARGET_HITACHI
|| ca
->renesas_abi
)))
7073 if (mode
== SCmode
&& TARGET_SH4
&& TARGET_LITTLE_ENDIAN
7074 && (! FUNCTION_ARG_SCmode_WART
|| (ROUND_REG (*ca
, mode
) & 1)))
7076 rtx r1
= gen_rtx_EXPR_LIST (VOIDmode
,
7077 gen_rtx_REG (SFmode
,
7079 + (ROUND_REG (*ca
, mode
) ^ 1)),
7081 rtx r2
= gen_rtx_EXPR_LIST (VOIDmode
,
7082 gen_rtx_REG (SFmode
,
7084 + ((ROUND_REG (*ca
, mode
) + 1) ^ 1)),
7086 return gen_rtx_PARALLEL(SCmode
, gen_rtvec(2, r1
, r2
));
7089 /* If the alignment of a DF value causes an SF register to be
7090 skipped, we will use that skipped register for the next SF
7092 if ((TARGET_HITACHI
|| ca
->renesas_abi
)
7093 && ca
->free_single_fp_reg
7095 return gen_rtx_REG (mode
, ca
->free_single_fp_reg
);
7097 regno
= (BASE_ARG_REG (mode
) + ROUND_REG (*ca
, mode
))
7098 ^ (mode
== SFmode
&& TARGET_SH4
7099 && TARGET_LITTLE_ENDIAN
!= 0
7100 && ! TARGET_HITACHI
&& ! ca
->renesas_abi
);
7101 return gen_rtx_REG (mode
, regno
);
7107 if (mode
== VOIDmode
&& TARGET_SHCOMPACT
)
7108 return GEN_INT (ca
->call_cookie
);
7110 /* The following test assumes unnamed arguments are promoted to
7112 if (mode
== SFmode
&& ca
->free_single_fp_reg
)
7113 return SH5_PROTOTYPED_FLOAT_ARG (*ca
, mode
, ca
->free_single_fp_reg
);
7115 if ((GET_SH_ARG_CLASS (mode
) == SH_ARG_FLOAT
)
7116 && (named
|| ! ca
->prototype_p
)
7117 && ca
->arg_count
[(int) SH_ARG_FLOAT
] < NPARM_REGS (SFmode
))
7119 if (! ca
->prototype_p
&& TARGET_SHMEDIA
)
7120 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca
, mode
);
7122 return SH5_PROTOTYPED_FLOAT_ARG (*ca
, mode
,
7124 + ca
->arg_count
[(int) SH_ARG_FLOAT
]);
7127 if (ca
->arg_count
[(int) SH_ARG_INT
] < NPARM_REGS (SImode
)
7128 && (! TARGET_SHCOMPACT
7129 || (! SHCOMPACT_FORCE_ON_STACK (mode
, type
)
7130 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca
, mode
,
7133 return gen_rtx_REG (mode
, (FIRST_PARM_REG
7134 + ca
->arg_count
[(int) SH_ARG_INT
]));
7143 /* Update the data in CUM to advance over an argument
7144 of mode MODE and data type TYPE.
7145 (TYPE is null for libcalls where that information may not be
7149 sh_function_arg_advance (CUMULATIVE_ARGS
*ca
, enum machine_mode mode
,
7150 tree type
, int named
)
7154 else if (TARGET_SH5
)
7156 tree type2
= (ca
->byref
&& type
7159 enum machine_mode mode2
= (ca
->byref
&& type
7162 int dwords
= ((ca
->byref
7165 ? int_size_in_bytes (type2
)
7166 : GET_MODE_SIZE (mode2
)) + 7) / 8;
7167 int numregs
= MIN (dwords
, NPARM_REGS (SImode
)
7168 - ca
->arg_count
[(int) SH_ARG_INT
]);
7172 ca
->arg_count
[(int) SH_ARG_INT
] += numregs
;
7173 if (TARGET_SHCOMPACT
7174 && SHCOMPACT_FORCE_ON_STACK (mode2
, type2
))
7177 |= CALL_COOKIE_INT_REG (ca
->arg_count
[(int) SH_ARG_INT
]
7179 /* N.B. We want this also for outgoing. */
7180 ca
->stack_regs
+= numregs
;
7185 ca
->stack_regs
+= numregs
;
7186 ca
->byref_regs
+= numregs
;
7190 |= CALL_COOKIE_INT_REG (ca
->arg_count
[(int) SH_ARG_INT
]
7194 |= CALL_COOKIE_INT_REG (ca
->arg_count
[(int) SH_ARG_INT
]
7197 else if (dwords
> numregs
)
7199 int pushregs
= numregs
;
7201 if (TARGET_SHCOMPACT
)
7202 ca
->stack_regs
+= numregs
;
7203 while (pushregs
< NPARM_REGS (SImode
) - 1
7204 && (CALL_COOKIE_INT_REG_GET
7206 NPARM_REGS (SImode
) - pushregs
)
7210 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode
)
7214 if (numregs
== NPARM_REGS (SImode
))
7216 |= CALL_COOKIE_INT_REG (0, 1)
7217 | CALL_COOKIE_STACKSEQ (numregs
- 1);
7220 |= CALL_COOKIE_STACKSEQ (numregs
);
7223 if (GET_SH_ARG_CLASS (mode2
) == SH_ARG_FLOAT
7224 && (named
|| ! ca
->prototype_p
))
7226 if (mode2
== SFmode
&& ca
->free_single_fp_reg
)
7227 ca
->free_single_fp_reg
= 0;
7228 else if (ca
->arg_count
[(int) SH_ARG_FLOAT
]
7229 < NPARM_REGS (SFmode
))
7232 = MIN ((GET_MODE_SIZE (mode2
) + 7) / 8 * 2,
7234 - ca
->arg_count
[(int) SH_ARG_FLOAT
]);
7236 ca
->arg_count
[(int) SH_ARG_FLOAT
] += numfpregs
;
7238 if (TARGET_SHCOMPACT
&& ! ca
->prototype_p
)
7240 if (ca
->outgoing
&& numregs
> 0)
7244 |= (CALL_COOKIE_INT_REG
7245 (ca
->arg_count
[(int) SH_ARG_INT
]
7246 - numregs
+ ((numfpregs
- 2) / 2),
7247 4 + (ca
->arg_count
[(int) SH_ARG_FLOAT
]
7250 while (numfpregs
-= 2);
7252 else if (mode2
== SFmode
&& (named
)
7253 && (ca
->arg_count
[(int) SH_ARG_FLOAT
]
7254 < NPARM_REGS (SFmode
)))
7255 ca
->free_single_fp_reg
7256 = FIRST_FP_PARM_REG
- numfpregs
7257 + ca
->arg_count
[(int) SH_ARG_FLOAT
] + 1;
7263 if ((TARGET_HITACHI
|| ca
->renesas_abi
) && TARGET_FPU_DOUBLE
)
7265 /* Note that we've used the skipped register. */
7266 if (mode
== SFmode
&& ca
->free_single_fp_reg
)
7268 ca
->free_single_fp_reg
= 0;
7271 /* When we have a DF after an SF, there's an SF register that get
7272 skipped in order to align the DF value. We note this skipped
7273 register, because the next SF value will use it, and not the
7274 SF that follows the DF. */
7276 && ROUND_REG (*ca
, DFmode
) != ROUND_REG (*ca
, SFmode
))
7278 ca
->free_single_fp_reg
= (ROUND_REG (*ca
, SFmode
)
7279 + BASE_ARG_REG (mode
));
7283 if (! ((TARGET_SH4
|| TARGET_SH2A
) || ca
->renesas_abi
)
7284 || PASS_IN_REG_P (*ca
, mode
, type
))
7285 (ca
->arg_count
[(int) GET_SH_ARG_CLASS (mode
)]
7286 = (ROUND_REG (*ca
, mode
)
7288 ? ROUND_ADVANCE (int_size_in_bytes (type
))
7289 : ROUND_ADVANCE (GET_MODE_SIZE (mode
)))));
7292 /* The Renesas calling convention doesn't quite fit into this scheme since
7293 the address is passed like an invisible argument, but one that is always
7294 passed in memory. */
7296 sh_struct_value_rtx (tree fndecl
, int incoming ATTRIBUTE_UNUSED
)
7298 if (TARGET_HITACHI
|| sh_attr_renesas_p (fndecl
))
7300 return gen_rtx_REG (Pmode
, 2);
7303 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7306 sh_return_in_memory (tree type
, tree fndecl
)
7310 if (TYPE_MODE (type
) == BLKmode
)
7311 return ((unsigned HOST_WIDE_INT
) int_size_in_bytes (type
)) > 8;
7313 return GET_MODE_SIZE (TYPE_MODE (type
)) > 8;
7317 return (TYPE_MODE (type
) == BLKmode
7318 || ((TARGET_HITACHI
|| sh_attr_renesas_p (fndecl
))
7319 && TREE_CODE (type
) == RECORD_TYPE
));
7323 /* We actually emit the code in sh_expand_prologue. We used to use
7324 a static variable to flag that we need to emit this code, but that
7325 doesn't when inlining, when functions are deferred and then emitted
7326 later. Fortunately, we already have two flags that are part of struct
7327 function that tell if a function uses varargs or stdarg. */
7329 sh_setup_incoming_varargs (CUMULATIVE_ARGS
*ca
,
7330 enum machine_mode mode
,
7332 int *pretend_arg_size
,
7333 int second_time ATTRIBUTE_UNUSED
)
7335 gcc_assert (current_function_stdarg
);
7336 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl
))
7338 int named_parm_regs
, anon_parm_regs
;
7340 named_parm_regs
= (ROUND_REG (*ca
, mode
)
7342 ? ROUND_ADVANCE (int_size_in_bytes (type
))
7343 : ROUND_ADVANCE (GET_MODE_SIZE (mode
))));
7344 anon_parm_regs
= NPARM_REGS (SImode
) - named_parm_regs
;
7345 if (anon_parm_regs
> 0)
7346 *pretend_arg_size
= anon_parm_regs
* 4;
7351 sh_strict_argument_naming (CUMULATIVE_ARGS
*ca ATTRIBUTE_UNUSED
)
7357 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS
*ca
)
7359 return ! (TARGET_HITACHI
|| ca
->renesas_abi
) && ! TARGET_SH5
;
7363 /* Define the offset between two registers, one to be eliminated, and
7364 the other its replacement, at the start of a routine. */
7367 initial_elimination_offset (int from
, int to
)
7370 int regs_saved_rounding
= 0;
7371 int total_saved_regs_space
;
7372 int total_auto_space
;
7373 int save_flags
= target_flags
;
7375 HARD_REG_SET live_regs_mask
;
7377 shmedia_space_reserved_for_target_registers
= false;
7378 regs_saved
= calc_live_regs (&live_regs_mask
);
7379 regs_saved
+= SHMEDIA_REGS_STACK_ADJUST ();
7381 if (shmedia_reserve_space_for_target_registers_p (regs_saved
, &live_regs_mask
))
7383 shmedia_space_reserved_for_target_registers
= true;
7384 regs_saved
+= shmedia_target_regs_stack_adjust (&live_regs_mask
);
7387 if (TARGET_SH5
&& regs_saved
% (STACK_BOUNDARY
/ BITS_PER_UNIT
))
7388 regs_saved_rounding
= ((STACK_BOUNDARY
/ BITS_PER_UNIT
)
7389 - regs_saved
% (STACK_BOUNDARY
/ BITS_PER_UNIT
));
7391 total_auto_space
= rounded_frame_size (regs_saved
) - regs_saved_rounding
;
7392 copy_flags
= target_flags
;
7393 target_flags
= save_flags
;
7395 total_saved_regs_space
= regs_saved
+ regs_saved_rounding
;
7397 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
7398 return total_saved_regs_space
+ total_auto_space
7399 + current_function_args_info
.byref_regs
* 8;
7401 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
7402 return total_saved_regs_space
+ total_auto_space
7403 + current_function_args_info
.byref_regs
* 8;
7405 /* Initial gap between fp and sp is 0. */
7406 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
7409 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
7410 return rounded_frame_size (0);
7412 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
7413 return rounded_frame_size (0);
7415 gcc_assert (from
== RETURN_ADDRESS_POINTER_REGNUM
7416 && (to
== HARD_FRAME_POINTER_REGNUM
7417 || to
== STACK_POINTER_REGNUM
));
7420 int n
= total_saved_regs_space
;
7421 int pr_reg
= TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
;
7422 save_schedule schedule
;
7425 n
+= total_auto_space
;
7427 /* If it wasn't saved, there's not much we can do. */
7428 if (! TEST_HARD_REG_BIT (live_regs_mask
, pr_reg
))
7431 target_flags
= copy_flags
;
7433 sh5_schedule_saves (&live_regs_mask
, &schedule
, n
);
7434 for (entry
= &schedule
.entries
[1]; entry
->mode
!= VOIDmode
; entry
++)
7435 if (entry
->reg
== pr_reg
)
7437 target_flags
= save_flags
;
7438 return entry
->offset
;
7443 return total_auto_space
;
7446 /* Handle machine specific pragmas to be semi-compatible with Renesas
7450 sh_pr_interrupt (struct cpp_reader
*pfile ATTRIBUTE_UNUSED
)
7452 pragma_interrupt
= 1;
7456 sh_pr_trapa (struct cpp_reader
*pfile ATTRIBUTE_UNUSED
)
7458 pragma_interrupt
= pragma_trapa
= 1;
7462 sh_pr_nosave_low_regs (struct cpp_reader
*pfile ATTRIBUTE_UNUSED
)
7464 pragma_nosave_low_regs
= 1;
7467 /* Generate 'handle_interrupt' attribute for decls */
7470 sh_insert_attributes (tree node
, tree
*attributes
)
7472 if (! pragma_interrupt
7473 || TREE_CODE (node
) != FUNCTION_DECL
)
7476 /* We are only interested in fields. */
7480 /* Add a 'handle_interrupt' attribute. */
7481 * attributes
= tree_cons (get_identifier ("interrupt_handler"), NULL
, * attributes
);
7486 /* Supported attributes:
7488 interrupt_handler -- specifies this function is an interrupt handler.
7490 sp_switch -- specifies an alternate stack for an interrupt handler
7493 trap_exit -- use a trapa to exit an interrupt function instead of
7496 renesas -- use Renesas calling/layout conventions (functions and
7501 const struct attribute_spec sh_attribute_table
[] =
7503 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7504 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute
},
7505 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute
},
7506 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute
},
7507 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute
},
7509 /* Symbian support adds three new attributes:
7510 dllexport - for exporting a function/variable that will live in a dll
7511 dllimport - for importing a function/variable from a dll
7513 Microsoft allows multiple declspecs in one __declspec, separating
7514 them with spaces. We do NOT support this. Instead, use __declspec
7516 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute
},
7517 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute
},
7519 { NULL
, 0, 0, false, false, false, NULL
}
7522 /* Handle an "interrupt_handler" attribute; arguments as in
7523 struct attribute_spec.handler. */
7525 sh_handle_interrupt_handler_attribute (tree
*node
, tree name
,
7526 tree args ATTRIBUTE_UNUSED
,
7527 int flags ATTRIBUTE_UNUSED
,
7530 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7532 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
7533 IDENTIFIER_POINTER (name
));
7534 *no_add_attrs
= true;
7536 else if (TARGET_SHCOMPACT
)
7538 error ("attribute interrupt_handler is not compatible with -m5-compact");
7539 *no_add_attrs
= true;
7545 /* Handle an "sp_switch" attribute; arguments as in
7546 struct attribute_spec.handler. */
7548 sh_handle_sp_switch_attribute (tree
*node
, tree name
, tree args
,
7549 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7551 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7553 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
7554 IDENTIFIER_POINTER (name
));
7555 *no_add_attrs
= true;
7557 else if (!pragma_interrupt
)
7559 /* The sp_switch attribute only has meaning for interrupt functions. */
7560 warning (OPT_Wattributes
, "%qs attribute only applies to "
7561 "interrupt functions", IDENTIFIER_POINTER (name
));
7562 *no_add_attrs
= true;
7564 else if (TREE_CODE (TREE_VALUE (args
)) != STRING_CST
)
7566 /* The argument must be a constant string. */
7567 warning (OPT_Wattributes
, "%qs attribute argument not a string constant",
7568 IDENTIFIER_POINTER (name
));
7569 *no_add_attrs
= true;
7573 const char *s
= ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args
)));
7574 sp_switch
= gen_rtx_SYMBOL_REF (VOIDmode
, s
);
7580 /* Handle an "trap_exit" attribute; arguments as in
7581 struct attribute_spec.handler. */
7583 sh_handle_trap_exit_attribute (tree
*node
, tree name
, tree args
,
7584 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7586 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7588 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
7589 IDENTIFIER_POINTER (name
));
7590 *no_add_attrs
= true;
7592 else if (!pragma_interrupt
)
7594 /* The trap_exit attribute only has meaning for interrupt functions. */
7595 warning (OPT_Wattributes
, "%qs attribute only applies to "
7596 "interrupt functions", IDENTIFIER_POINTER (name
));
7597 *no_add_attrs
= true;
7599 else if (TREE_CODE (TREE_VALUE (args
)) != INTEGER_CST
)
7601 /* The argument must be a constant integer. */
7602 warning (OPT_Wattributes
, "%qs attribute argument not an "
7603 "integer constant", IDENTIFIER_POINTER (name
));
7604 *no_add_attrs
= true;
7608 trap_exit
= TREE_INT_CST_LOW (TREE_VALUE (args
));
7615 sh_handle_renesas_attribute (tree
*node ATTRIBUTE_UNUSED
,
7616 tree name ATTRIBUTE_UNUSED
,
7617 tree args ATTRIBUTE_UNUSED
,
7618 int flags ATTRIBUTE_UNUSED
,
7619 bool *no_add_attrs ATTRIBUTE_UNUSED
)
7624 /* True if __attribute__((renesas)) or -mrenesas. */
7626 sh_attr_renesas_p (tree td
)
7633 td
= TREE_TYPE (td
);
7634 if (td
== error_mark_node
)
7636 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td
))
7640 /* True if __attribute__((renesas)) or -mrenesas, for the current
7643 sh_cfun_attr_renesas_p (void)
7645 return sh_attr_renesas_p (current_function_decl
);
7649 sh_cfun_interrupt_handler_p (void)
7651 return (lookup_attribute ("interrupt_handler",
7652 DECL_ATTRIBUTES (current_function_decl
))
7656 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7659 sh_check_pch_target_flags (int old_flags
)
7661 if ((old_flags
^ target_flags
) & (MASK_SH1
| MASK_SH2
| MASK_SH3
7662 | MASK_SH_E
| MASK_HARD_SH4
7663 | MASK_FPU_SINGLE
| MASK_SH4
))
7664 return _("created and used with different architectures / ABIs");
7665 if ((old_flags
^ target_flags
) & MASK_HITACHI
)
7666 return _("created and used with different ABIs");
7667 if ((old_flags
^ target_flags
) & MASK_LITTLE_ENDIAN
)
7668 return _("created and used with different endianness");
7672 /* Predicates used by the templates. */
7674 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7675 Used only in general_movsrc_operand. */
7678 system_reg_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7690 /* Nonzero if OP is a floating point value with value 0.0. */
7693 fp_zero_operand (rtx op
)
7697 if (GET_MODE (op
) != SFmode
)
7700 REAL_VALUE_FROM_CONST_DOUBLE (r
, op
);
7701 return REAL_VALUES_EQUAL (r
, dconst0
) && ! REAL_VALUE_MINUS_ZERO (r
);
7704 /* Nonzero if OP is a floating point value with value 1.0. */
7707 fp_one_operand (rtx op
)
7711 if (GET_MODE (op
) != SFmode
)
7714 REAL_VALUE_FROM_CONST_DOUBLE (r
, op
);
7715 return REAL_VALUES_EQUAL (r
, dconst1
);
7718 /* For -m4 and -m4-single-only, mode switching is used. If we are
7719 compiling without -mfmovd, movsf_ie isn't taken into account for
7720 mode switching. We could check in machine_dependent_reorg for
7721 cases where we know we are in single precision mode, but there is
7722 interface to find that out during reload, so we must avoid
7723 choosing an fldi alternative during reload and thus failing to
7724 allocate a scratch register for the constant loading. */
7728 return ! TARGET_SH4
|| TARGET_FMOVD
|| reload_completed
;
7732 tertiary_reload_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7734 enum rtx_code code
= GET_CODE (op
);
7735 return code
== MEM
|| (TARGET_SH4
&& code
== CONST_DOUBLE
);
7738 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7740 tls_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7742 if (GET_CODE (op
) != SYMBOL_REF
)
7744 return SYMBOL_REF_TLS_MODEL (op
);
7747 /* Return the destination address of a branch. */
7750 branch_dest (rtx branch
)
7752 rtx dest
= SET_SRC (PATTERN (branch
));
7755 if (GET_CODE (dest
) == IF_THEN_ELSE
)
7756 dest
= XEXP (dest
, 1);
7757 dest
= XEXP (dest
, 0);
7758 dest_uid
= INSN_UID (dest
);
7759 return INSN_ADDRESSES (dest_uid
);
7762 /* Return nonzero if REG is not used after INSN.
7763 We assume REG is a reload reg, and therefore does
7764 not live past labels. It may live past calls or jumps though. */
7766 reg_unused_after (rtx reg
, rtx insn
)
7771 /* If the reg is set by this instruction, then it is safe for our
7772 case. Disregard the case where this is a store to memory, since
7773 we are checking a register used in the store address. */
7774 set
= single_set (insn
);
7775 if (set
&& GET_CODE (SET_DEST (set
)) != MEM
7776 && reg_overlap_mentioned_p (reg
, SET_DEST (set
)))
7779 while ((insn
= NEXT_INSN (insn
)))
7785 code
= GET_CODE (insn
);
7788 /* If this is a label that existed before reload, then the register
7789 if dead here. However, if this is a label added by reorg, then
7790 the register may still be live here. We can't tell the difference,
7791 so we just ignore labels completely. */
7792 if (code
== CODE_LABEL
)
7797 if (code
== JUMP_INSN
)
7800 /* If this is a sequence, we must handle them all at once.
7801 We could have for instance a call that sets the target register,
7802 and an insn in a delay slot that uses the register. In this case,
7803 we must return 0. */
7804 else if (code
== INSN
&& GET_CODE (PATTERN (insn
)) == SEQUENCE
)
7809 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
7811 rtx this_insn
= XVECEXP (PATTERN (insn
), 0, i
);
7812 rtx set
= single_set (this_insn
);
7814 if (GET_CODE (this_insn
) == CALL_INSN
)
7816 else if (GET_CODE (this_insn
) == JUMP_INSN
)
7818 if (INSN_ANNULLED_BRANCH_P (this_insn
))
7823 if (set
&& reg_overlap_mentioned_p (reg
, SET_SRC (set
)))
7825 if (set
&& reg_overlap_mentioned_p (reg
, SET_DEST (set
)))
7827 if (GET_CODE (SET_DEST (set
)) != MEM
)
7833 && reg_overlap_mentioned_p (reg
, PATTERN (this_insn
)))
7838 else if (code
== JUMP_INSN
)
7842 set
= single_set (insn
);
7843 if (set
&& reg_overlap_mentioned_p (reg
, SET_SRC (set
)))
7845 if (set
&& reg_overlap_mentioned_p (reg
, SET_DEST (set
)))
7846 return GET_CODE (SET_DEST (set
)) != MEM
;
7847 if (set
== 0 && reg_overlap_mentioned_p (reg
, PATTERN (insn
)))
7850 if (code
== CALL_INSN
&& call_really_used_regs
[REGNO (reg
)])
7858 static GTY(()) rtx fpscr_rtx
;
7860 get_fpscr_rtx (void)
7864 fpscr_rtx
= gen_rtx_REG (PSImode
, FPSCR_REG
);
7865 REG_USERVAR_P (fpscr_rtx
) = 1;
7866 mark_user_reg (fpscr_rtx
);
7868 if (! reload_completed
|| mdep_reorg_phase
!= SH_AFTER_MDEP_REORG
)
7869 mark_user_reg (fpscr_rtx
);
7873 static GTY(()) tree fpscr_values
;
7876 emit_fpu_switch (rtx scratch
, int index
)
7880 if (fpscr_values
== NULL
)
7884 t
= build_index_type (integer_one_node
);
7885 t
= build_array_type (integer_type_node
, t
);
7886 t
= build_decl (VAR_DECL
, get_identifier ("__fpscr_values"), t
);
7887 DECL_ARTIFICIAL (t
) = 1;
7888 DECL_IGNORED_P (t
) = 1;
7889 DECL_EXTERNAL (t
) = 1;
7890 TREE_STATIC (t
) = 1;
7891 TREE_PUBLIC (t
) = 1;
7897 src
= DECL_RTL (fpscr_values
);
7900 emit_move_insn (scratch
, XEXP (src
, 0));
7902 emit_insn (gen_addsi3 (scratch
, scratch
, GEN_INT (index
* 4)));
7903 src
= adjust_automodify_address (src
, PSImode
, scratch
, index
* 4);
7906 src
= adjust_address (src
, PSImode
, index
* 4);
7908 dst
= get_fpscr_rtx ();
7909 emit_move_insn (dst
, src
);
7913 emit_sf_insn (rtx pat
)
7919 emit_df_insn (rtx pat
)
7925 expand_sf_unop (rtx (*fun
) (rtx
, rtx
, rtx
), rtx
*operands
)
7927 emit_sf_insn ((*fun
) (operands
[0], operands
[1], get_fpscr_rtx ()));
7931 expand_sf_binop (rtx (*fun
) (rtx
, rtx
, rtx
, rtx
), rtx
*operands
)
7933 emit_sf_insn ((*fun
) (operands
[0], operands
[1], operands
[2],
7938 expand_df_unop (rtx (*fun
) (rtx
, rtx
, rtx
), rtx
*operands
)
7940 emit_df_insn ((*fun
) (operands
[0], operands
[1], get_fpscr_rtx ()));
7944 expand_df_binop (rtx (*fun
) (rtx
, rtx
, rtx
, rtx
), rtx
*operands
)
7946 emit_df_insn ((*fun
) (operands
[0], operands
[1], operands
[2],
7950 /* ??? gcc does flow analysis strictly after common subexpression
7951 elimination. As a result, common subexpression elimination fails
7952 when there are some intervening statements setting the same register.
7953 If we did nothing about this, this would hurt the precision switching
7954 for SH4 badly. There is some cse after reload, but it is unable to
7955 undo the extra register pressure from the unused instructions, and
7956 it cannot remove auto-increment loads.
7958 A C code example that shows this flow/cse weakness for (at least) SH
7959 and sparc (as of gcc ss-970706) is this:
7973 So we add another pass before common subexpression elimination, to
7974 remove assignments that are dead due to a following assignment in the
7975 same basic block. */
7978 mark_use (rtx x
, rtx
*reg_set_block
)
7984 code
= GET_CODE (x
);
7989 int regno
= REGNO (x
);
7990 int nregs
= (regno
< FIRST_PSEUDO_REGISTER
7991 ? HARD_REGNO_NREGS (regno
, GET_MODE (x
))
7995 reg_set_block
[regno
+ nregs
- 1] = 0;
8002 rtx dest
= SET_DEST (x
);
8004 if (GET_CODE (dest
) == SUBREG
)
8005 dest
= SUBREG_REG (dest
);
8006 if (GET_CODE (dest
) != REG
)
8007 mark_use (dest
, reg_set_block
);
8008 mark_use (SET_SRC (x
), reg_set_block
);
8015 const char *fmt
= GET_RTX_FORMAT (code
);
8017 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
8020 mark_use (XEXP (x
, i
), reg_set_block
);
8021 else if (fmt
[i
] == 'E')
8022 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
8023 mark_use (XVECEXP (x
, i
, j
), reg_set_block
);
8030 static rtx
get_free_reg (HARD_REG_SET
);
8032 /* This function returns a register to use to load the address to load
8033 the fpscr from. Currently it always returns r1 or r7, but when we are
8034 able to use pseudo registers after combine, or have a better mechanism
8035 for choosing a register, it should be done here. */
8036 /* REGS_LIVE is the liveness information for the point for which we
8037 need this allocation. In some bare-bones exit blocks, r1 is live at the
8038 start. We can even have all of r0..r3 being live:
8039 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8040 INSN before which new insns are placed with will clobber the register
8041 we return. If a basic block consists only of setting the return value
8042 register to a pseudo and using that register, the return value is not
8043 live before or after this block, yet we we'll insert our insns right in
8047 get_free_reg (HARD_REG_SET regs_live
)
8049 if (! TEST_HARD_REG_BIT (regs_live
, 1))
8050 return gen_rtx_REG (Pmode
, 1);
8052 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8053 there shouldn't be anything but a jump before the function end. */
8054 gcc_assert (!TEST_HARD_REG_BIT (regs_live
, 7));
8055 return gen_rtx_REG (Pmode
, 7);
8058 /* This function will set the fpscr from memory.
8059 MODE is the mode we are setting it to. */
8061 fpscr_set_from_mem (int mode
, HARD_REG_SET regs_live
)
8063 enum attr_fp_mode fp_mode
= mode
;
8064 enum attr_fp_mode norm_mode
= ACTUAL_NORMAL_MODE (FP_MODE
);
8065 rtx addr_reg
= get_free_reg (regs_live
);
8067 emit_fpu_switch (addr_reg
, fp_mode
== norm_mode
);
8070 /* Is the given character a logical line separator for the assembler? */
8071 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8072 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8076 sh_insn_length_adjustment (rtx insn
)
8078 /* Instructions with unfilled delay slots take up an extra two bytes for
8079 the nop in the delay slot. */
8080 if (((GET_CODE (insn
) == INSN
8081 && GET_CODE (PATTERN (insn
)) != USE
8082 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
8083 || GET_CODE (insn
) == CALL_INSN
8084 || (GET_CODE (insn
) == JUMP_INSN
8085 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
8086 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
))
8087 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn
)))) != SEQUENCE
8088 && get_attr_needs_delay_slot (insn
) == NEEDS_DELAY_SLOT_YES
)
8091 /* SH2e has a bug that prevents the use of annulled branches, so if
8092 the delay slot is not filled, we'll have to put a NOP in it. */
8093 if (sh_cpu
== CPU_SH2E
8094 && GET_CODE (insn
) == JUMP_INSN
8095 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
8096 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
8097 && get_attr_type (insn
) == TYPE_CBRANCH
8098 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn
)))) != SEQUENCE
)
8101 /* sh-dsp parallel processing insn take four bytes instead of two. */
8103 if (GET_CODE (insn
) == INSN
)
8106 rtx body
= PATTERN (insn
);
8107 const char *template;
8109 int maybe_label
= 1;
8111 if (GET_CODE (body
) == ASM_INPUT
)
8112 template = XSTR (body
, 0);
8113 else if (asm_noperands (body
) >= 0)
8115 = decode_asm_operands (body
, NULL
, NULL
, NULL
, NULL
);
8124 while (c
== ' ' || c
== '\t');
8125 /* all sh-dsp parallel-processing insns start with p.
8126 The only non-ppi sh insn starting with p is pref.
8127 The only ppi starting with pr is prnd. */
8128 if ((c
== 'p' || c
== 'P') && strncasecmp ("re", template, 2))
8130 /* The repeat pseudo-insn expands two three insns, a total of
8131 six bytes in size. */
8132 else if ((c
== 'r' || c
== 'R')
8133 && ! strncasecmp ("epeat", template, 5))
8135 while (c
&& c
!= '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c
))
8137 /* If this is a label, it is obviously not a ppi insn. */
8138 if (c
== ':' && maybe_label
)
8143 else if (c
== '\'' || c
== '"')
8148 maybe_label
= c
!= ':';
8156 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8157 isn't protected by a PIC unspec. */
8159 nonpic_symbol_mentioned_p (rtx x
)
8161 register const char *fmt
;
8164 if (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
8165 || GET_CODE (x
) == PC
)
8168 /* We don't want to look into the possible MEM location of a
8169 CONST_DOUBLE, since we're not going to use it, in general. */
8170 if (GET_CODE (x
) == CONST_DOUBLE
)
8173 if (GET_CODE (x
) == UNSPEC
8174 && (XINT (x
, 1) == UNSPEC_PIC
8175 || XINT (x
, 1) == UNSPEC_GOT
8176 || XINT (x
, 1) == UNSPEC_GOTOFF
8177 || XINT (x
, 1) == UNSPEC_GOTPLT
8178 || XINT (x
, 1) == UNSPEC_GOTTPOFF
8179 || XINT (x
, 1) == UNSPEC_DTPOFF
8180 || XINT (x
, 1) == UNSPEC_PLT
))
8183 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8184 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8190 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
8191 if (nonpic_symbol_mentioned_p (XVECEXP (x
, i
, j
)))
8194 else if (fmt
[i
] == 'e' && nonpic_symbol_mentioned_p (XEXP (x
, i
)))
8201 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8202 @GOTOFF in `reg'. */
8204 legitimize_pic_address (rtx orig
, enum machine_mode mode ATTRIBUTE_UNUSED
,
8207 if (tls_symbolic_operand (orig
, Pmode
))
8210 if (GET_CODE (orig
) == LABEL_REF
8211 || (GET_CODE (orig
) == SYMBOL_REF
&& SYMBOL_REF_LOCAL_P (orig
)))
8214 reg
= gen_reg_rtx (Pmode
);
8216 emit_insn (gen_symGOTOFF2reg (reg
, orig
));
8219 else if (GET_CODE (orig
) == SYMBOL_REF
)
8222 reg
= gen_reg_rtx (Pmode
);
8224 emit_insn (gen_symGOT2reg (reg
, orig
));
8230 /* Mark the use of a constant in the literal table. If the constant
8231 has multiple labels, make it unique. */
8233 mark_constant_pool_use (rtx x
)
8235 rtx insn
, lab
, pattern
;
8240 switch (GET_CODE (x
))
8250 /* Get the first label in the list of labels for the same constant
8251 and delete another labels in the list. */
8253 for (insn
= PREV_INSN (x
); insn
; insn
= PREV_INSN (insn
))
8255 if (GET_CODE (insn
) != CODE_LABEL
8256 || LABEL_REFS (insn
) != NEXT_INSN (insn
))
8261 for (insn
= LABEL_REFS (lab
); insn
; insn
= LABEL_REFS (insn
))
8262 INSN_DELETED_P (insn
) = 1;
8264 /* Mark constants in a window. */
8265 for (insn
= NEXT_INSN (x
); insn
; insn
= NEXT_INSN (insn
))
8267 if (GET_CODE (insn
) != INSN
)
8270 pattern
= PATTERN (insn
);
8271 if (GET_CODE (pattern
) != UNSPEC_VOLATILE
)
8274 switch (XINT (pattern
, 1))
8276 case UNSPECV_CONST2
:
8277 case UNSPECV_CONST4
:
8278 case UNSPECV_CONST8
:
8279 XVECEXP (pattern
, 0, 1) = const1_rtx
;
8281 case UNSPECV_WINDOW_END
:
8282 if (XVECEXP (pattern
, 0, 0) == x
)
8285 case UNSPECV_CONST_END
:
8295 /* Return true if it's possible to redirect BRANCH1 to the destination
8296 of an unconditional jump BRANCH2. We only want to do this if the
8297 resulting branch will have a short displacement. */
8299 sh_can_redirect_branch (rtx branch1
, rtx branch2
)
8301 if (flag_expensive_optimizations
&& simplejump_p (branch2
))
8303 rtx dest
= XEXP (SET_SRC (single_set (branch2
)), 0);
8307 for (distance
= 0, insn
= NEXT_INSN (branch1
);
8308 insn
&& distance
< 256;
8309 insn
= PREV_INSN (insn
))
8314 distance
+= get_attr_length (insn
);
8316 for (distance
= 0, insn
= NEXT_INSN (branch1
);
8317 insn
&& distance
< 256;
8318 insn
= NEXT_INSN (insn
))
8323 distance
+= get_attr_length (insn
);
8329 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8331 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED
,
8332 unsigned int new_reg
)
8334 /* Interrupt functions can only use registers that have already been
8335 saved by the prologue, even if they would normally be
8338 if (sh_cfun_interrupt_handler_p () && !regs_ever_live
[new_reg
])
8344 /* Function to update the integer COST
8345 based on the relationship between INSN that is dependent on
8346 DEP_INSN through the dependence LINK. The default is to make no
8347 adjustment to COST. This can be used for example to specify to
8348 the scheduler that an output- or anti-dependence does not incur
8349 the same cost as a data-dependence. The return value should be
8350 the new value for COST. */
8352 sh_adjust_cost (rtx insn
, rtx link ATTRIBUTE_UNUSED
, rtx dep_insn
, int cost
)
8358 /* On SHmedia, if the dependence is an anti-dependence or
8359 output-dependence, there is no cost. */
8360 if (REG_NOTE_KIND (link
) != 0)
8362 /* However, dependencies between target register loads and
8363 uses of the register in a subsequent block that are separated
8364 by a conditional branch are not modelled - we have to do with
8365 the anti-dependency between the target register load and the
8366 conditional branch that ends the current block. */
8367 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8368 && GET_CODE (PATTERN (dep_insn
)) == SET
8369 && (get_attr_type (dep_insn
) == TYPE_PT_MEDIA
8370 || get_attr_type (dep_insn
) == TYPE_PTABS_MEDIA
)
8371 && get_attr_type (insn
) == TYPE_CBRANCH_MEDIA
)
8373 int orig_cost
= cost
;
8374 rtx note
= find_reg_note (insn
, REG_BR_PROB
, 0);
8375 rtx target
= ((! note
8376 || INTVAL (XEXP (note
, 0)) * 2 < REG_BR_PROB_BASE
)
8377 ? insn
: JUMP_LABEL (insn
));
8378 /* On the likely path, the branch costs 1, on the unlikely path,
8382 target
= next_active_insn (target
);
8383 while (target
&& ! flow_dependent_p (target
, dep_insn
)
8385 /* If two branches are executed in immediate succession, with the
8386 first branch properly predicted, this causes a stall at the
8387 second branch, hence we won't need the target for the
8388 second branch for two cycles after the launch of the first
8390 if (cost
> orig_cost
- 2)
8391 cost
= orig_cost
- 2;
8397 else if (get_attr_is_mac_media (insn
)
8398 && get_attr_is_mac_media (dep_insn
))
8401 else if (! reload_completed
8402 && GET_CODE (PATTERN (insn
)) == SET
8403 && GET_CODE (SET_SRC (PATTERN (insn
))) == FLOAT
8404 && GET_CODE (PATTERN (dep_insn
)) == SET
8405 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn
)), VOIDmode
)
8408 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8409 that is needed at the target. */
8410 else if (get_attr_type (insn
) == TYPE_JUMP_MEDIA
8411 && ! flow_dependent_p (insn
, dep_insn
))
8414 else if (REG_NOTE_KIND (link
) == 0)
8416 enum attr_type dep_type
, type
;
8418 if (recog_memoized (insn
) < 0
8419 || recog_memoized (dep_insn
) < 0)
8422 dep_type
= get_attr_type (dep_insn
);
8423 if (dep_type
== TYPE_FLOAD
|| dep_type
== TYPE_PCFLOAD
)
8425 if ((dep_type
== TYPE_LOAD_SI
|| dep_type
== TYPE_PCLOAD_SI
)
8426 && (type
= get_attr_type (insn
)) != TYPE_CALL
8427 && type
!= TYPE_SFUNC
)
8430 /* The only input for a call that is timing-critical is the
8431 function's address. */
8432 if (GET_CODE(insn
) == CALL_INSN
)
8434 rtx call
= PATTERN (insn
);
8436 if (GET_CODE (call
) == PARALLEL
)
8437 call
= XVECEXP (call
, 0 ,0);
8438 if (GET_CODE (call
) == SET
)
8439 call
= SET_SRC (call
);
8440 if (GET_CODE (call
) == CALL
&& GET_CODE (XEXP (call
, 0)) == MEM
8441 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8442 && (GET_CODE (XEXP (XEXP (call
, 0), 0)) == UNSPEC
8443 || ! reg_set_p (XEXP (XEXP (call
, 0), 0), dep_insn
)))
8446 /* Likewise, the most timing critical input for an sfuncs call
8447 is the function address. However, sfuncs typically start
8448 using their arguments pretty quickly.
8449 Assume a four cycle delay before they are needed. */
8450 /* All sfunc calls are parallels with at least four components.
8451 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8452 else if (GET_CODE (PATTERN (insn
)) == PARALLEL
8453 && XVECLEN (PATTERN (insn
), 0) >= 4
8454 && (reg
= sfunc_uses_reg (insn
)))
8456 if (! reg_set_p (reg
, dep_insn
))
8459 /* When the preceding instruction loads the shift amount of
8460 the following SHAD/SHLD, the latency of the load is increased
8463 && get_attr_type (insn
) == TYPE_DYN_SHIFT
8464 && get_attr_any_int_load (dep_insn
) == ANY_INT_LOAD_YES
8465 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn
)),
8466 XEXP (SET_SRC (single_set (insn
)),
8469 /* When an LS group instruction with a latency of less than
8470 3 cycles is followed by a double-precision floating-point
8471 instruction, FIPR, or FTRV, the latency of the first
8472 instruction is increased to 3 cycles. */
8474 && get_attr_insn_class (dep_insn
) == INSN_CLASS_LS_GROUP
8475 && get_attr_dfp_comp (insn
) == DFP_COMP_YES
)
8477 /* The lsw register of a double-precision computation is ready one
8479 else if (reload_completed
8480 && get_attr_dfp_comp (dep_insn
) == DFP_COMP_YES
8481 && (use_pat
= single_set (insn
))
8482 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn
))),
8486 if (get_attr_any_fp_comp (dep_insn
) == ANY_FP_COMP_YES
8487 && get_attr_late_fp_use (insn
) == LATE_FP_USE_YES
)
8490 /* An anti-dependence penalty of two applies if the first insn is a double
8491 precision fadd / fsub / fmul. */
8492 else if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8493 && recog_memoized (dep_insn
) >= 0
8494 && get_attr_type (dep_insn
) == TYPE_DFP_ARITH
8495 /* A lot of alleged anti-flow dependences are fake,
8496 so check this one is real. */
8497 && flow_dependent_p (dep_insn
, insn
))
8504 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8505 if DEP_INSN is anti-flow dependent on INSN. */
8507 flow_dependent_p (rtx insn
, rtx dep_insn
)
8509 rtx tmp
= PATTERN (insn
);
8511 note_stores (PATTERN (dep_insn
), flow_dependent_p_1
, &tmp
);
8512 return tmp
== NULL_RTX
;
8515 /* A helper function for flow_dependent_p called through note_stores. */
8517 flow_dependent_p_1 (rtx x
, rtx pat ATTRIBUTE_UNUSED
, void *data
)
8519 rtx
* pinsn
= (rtx
*) data
;
8521 if (*pinsn
&& reg_referenced_p (x
, *pinsn
))
8525 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8526 'special function' patterns (type sfunc) that clobber pr, but that
8527 do not look like function calls to leaf_function_p. Hence we must
8528 do this extra check. */
8532 return REG_N_SETS (TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
);
8535 /* Return where to allocate pseudo for a given hard register initial
8538 sh_allocate_initial_value (rtx hard_reg
)
8542 if (REGNO (hard_reg
) == (TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
))
8544 if (current_function_is_leaf
8545 && ! sh_pr_n_sets ()
8546 && ! (TARGET_SHCOMPACT
8547 && ((current_function_args_info
.call_cookie
8548 & ~ CALL_COOKIE_RET_TRAMP (1))
8549 || current_function_has_nonlocal_label
)))
8552 x
= gen_frame_mem (Pmode
, return_address_pointer_rtx
);
8560 /* This function returns "2" to indicate dual issue for the SH4
8561 processor. To be used by the DFA pipeline description. */
8563 sh_issue_rate (void)
8565 if (TARGET_SUPERSCALAR
)
8571 /* Functions for ready queue reordering for sched1. */
8573 /* Get weight for mode for a set x. */
8575 find_set_regmode_weight (rtx x
, enum machine_mode mode
)
8577 if (GET_CODE (x
) == CLOBBER
&& register_operand (SET_DEST (x
), mode
))
8579 if (GET_CODE (x
) == SET
&& register_operand (SET_DEST (x
), mode
))
8581 if (GET_CODE (SET_DEST (x
)) == REG
)
8583 if (!reg_mentioned_p (SET_DEST (x
), SET_SRC (x
)))
8593 /* Get regmode weight for insn. */
8595 find_insn_regmode_weight (rtx insn
, enum machine_mode mode
)
8597 short reg_weight
= 0;
8600 /* Increment weight for each register born here. */
8602 reg_weight
+= find_set_regmode_weight (x
, mode
);
8603 if (GET_CODE (x
) == PARALLEL
)
8606 for (j
= XVECLEN (x
, 0) - 1; j
>= 0; j
--)
8608 x
= XVECEXP (PATTERN (insn
), 0, j
);
8609 reg_weight
+= find_set_regmode_weight (x
, mode
);
8612 /* Decrement weight for each register that dies here. */
8613 for (x
= REG_NOTES (insn
); x
; x
= XEXP (x
, 1))
8615 if (REG_NOTE_KIND (x
) == REG_DEAD
|| REG_NOTE_KIND (x
) == REG_UNUSED
)
8617 rtx note
= XEXP (x
, 0);
8618 if (GET_CODE (note
) == REG
&& GET_MODE (note
) == mode
)
8625 /* Calculate regmode weights for all insns of a basic block. */
8627 find_regmode_weight (int b
, enum machine_mode mode
)
8629 rtx insn
, next_tail
, head
, tail
;
8631 get_block_head_tail (b
, &head
, &tail
);
8632 next_tail
= NEXT_INSN (tail
);
8634 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
8636 /* Handle register life information. */
8641 INSN_REGMODE_WEIGHT (insn
, mode
) =
8642 find_insn_regmode_weight (insn
, mode
) + 2 * find_insn_regmode_weight (insn
, DFmode
);
8643 else if (mode
== SImode
)
8644 INSN_REGMODE_WEIGHT (insn
, mode
) =
8645 find_insn_regmode_weight (insn
, mode
) + 2 * find_insn_regmode_weight (insn
, DImode
);
8649 /* Comparison function for ready queue sorting. */
8651 rank_for_reorder (const void *x
, const void *y
)
8653 rtx tmp
= *(const rtx
*) y
;
8654 rtx tmp2
= *(const rtx
*) x
;
8656 /* The insn in a schedule group should be issued the first. */
8657 if (SCHED_GROUP_P (tmp
) != SCHED_GROUP_P (tmp2
))
8658 return SCHED_GROUP_P (tmp2
) ? 1 : -1;
8660 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8661 minimizes instruction movement, thus minimizing sched's effect on
8662 register pressure. */
8663 return INSN_LUID (tmp
) - INSN_LUID (tmp2
);
8666 /* Resort the array A in which only element at index N may be out of order. */
8668 swap_reorder (rtx
*a
, int n
)
8670 rtx insn
= a
[n
- 1];
8673 while (i
>= 0 && rank_for_reorder (a
+ i
, &insn
) >= 0)
8681 #define SCHED_REORDER(READY, N_READY) \
8684 if ((N_READY) == 2) \
8685 swap_reorder (READY, N_READY); \
8686 else if ((N_READY) > 2) \
8687 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8691 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8694 ready_reorder (rtx
*ready
, int nready
)
8696 SCHED_REORDER (ready
, nready
);
8699 /* Calculate regmode weights for all insns of all basic block. */
8701 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED
,
8702 int verbose ATTRIBUTE_UNUSED
,
8707 regmode_weight
[0] = (short *) xcalloc (old_max_uid
, sizeof (short));
8708 regmode_weight
[1] = (short *) xcalloc (old_max_uid
, sizeof (short));
8710 FOR_EACH_BB_REVERSE (b
)
8712 find_regmode_weight (b
->index
, SImode
);
8713 find_regmode_weight (b
->index
, SFmode
);
8716 CURR_REGMODE_PRESSURE (SImode
) = 0;
8717 CURR_REGMODE_PRESSURE (SFmode
) = 0;
8723 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
8724 int verbose ATTRIBUTE_UNUSED
)
8726 if (regmode_weight
[0])
8728 free (regmode_weight
[0]);
8729 regmode_weight
[0] = NULL
;
8731 if (regmode_weight
[1])
8733 free (regmode_weight
[1]);
8734 regmode_weight
[1] = NULL
;
8738 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8739 keep count of register pressures on SImode and SFmode. */
8741 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
8742 int sched_verbose ATTRIBUTE_UNUSED
,
8746 if (GET_CODE (PATTERN (insn
)) != USE
8747 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
8748 cached_can_issue_more
= can_issue_more
- 1;
8750 cached_can_issue_more
= can_issue_more
;
8752 if (reload_completed
)
8753 return cached_can_issue_more
;
8755 CURR_REGMODE_PRESSURE (SImode
) += INSN_REGMODE_WEIGHT (insn
, SImode
);
8756 CURR_REGMODE_PRESSURE (SFmode
) += INSN_REGMODE_WEIGHT (insn
, SFmode
);
8758 return cached_can_issue_more
;
8762 sh_md_init (FILE *dump ATTRIBUTE_UNUSED
,
8763 int verbose ATTRIBUTE_UNUSED
,
8764 int veclen ATTRIBUTE_UNUSED
)
8766 CURR_REGMODE_PRESSURE (SImode
) = 0;
8767 CURR_REGMODE_PRESSURE (SFmode
) = 0;
8770 /* Some magic numbers. */
8771 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8772 functions that already have high pressure on r0. */
8773 #define R0_MAX_LIFE_REGIONS 2
8774 #define R0_MAX_LIVE_LENGTH 12
8775 /* Register Pressure thresholds for SImode and SFmode registers. */
8776 #define SIMODE_MAX_WEIGHT 5
8777 #define SFMODE_MAX_WEIGHT 10
8779 /* Return true if the pressure is high for MODE. */
8781 high_pressure (enum machine_mode mode
)
8783 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8784 functions that already have high pressure on r0. */
8785 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8786 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH
)
8790 return (CURR_REGMODE_PRESSURE (SFmode
) > SFMODE_MAX_WEIGHT
);
8792 return (CURR_REGMODE_PRESSURE (SImode
) > SIMODE_MAX_WEIGHT
);
8795 /* Reorder ready queue if register pressure is high. */
8797 sh_reorder (FILE *dump ATTRIBUTE_UNUSED
,
8798 int sched_verbose ATTRIBUTE_UNUSED
,
8801 int clock_var ATTRIBUTE_UNUSED
)
8803 if (reload_completed
)
8804 return sh_issue_rate ();
8806 if (high_pressure (SFmode
) || high_pressure (SImode
))
8808 ready_reorder (ready
, *n_readyp
);
8811 return sh_issue_rate ();
8814 /* Skip cycles if the current register pressure is high. */
8816 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
8817 int sched_verbose ATTRIBUTE_UNUSED
,
8818 rtx
*ready ATTRIBUTE_UNUSED
,
8819 int *n_readyp ATTRIBUTE_UNUSED
,
8820 int clock_var ATTRIBUTE_UNUSED
)
8822 if (reload_completed
)
8823 return cached_can_issue_more
;
8825 if (high_pressure(SFmode
) || high_pressure (SImode
))
8828 return cached_can_issue_more
;
8831 /* Skip cycles without sorting the ready queue. This will move insn from
8832 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8833 queue by sh_reorder. */
8835 /* Generally, skipping these many cycles are sufficient for all insns to move
8840 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED
,
8841 int sched_verbose ATTRIBUTE_UNUSED
,
8842 rtx insn ATTRIBUTE_UNUSED
,
8847 if (reload_completed
)
8852 if ((clock_var
- last_clock_var
) < MAX_SKIPS
)
8857 /* If this is the last cycle we are skipping, allow reordering of R. */
8858 if ((clock_var
- last_clock_var
) == MAX_SKIPS
)
8870 /* SHmedia requires registers for branches, so we can't generate new
8871 branches past reload. */
8873 sh_cannot_modify_jumps_p (void)
8875 return (TARGET_SHMEDIA
&& (reload_in_progress
|| reload_completed
));
8879 sh_target_reg_class (void)
8881 return TARGET_SHMEDIA
? TARGET_REGS
: NO_REGS
;
8885 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen
)
8890 if (! shmedia_space_reserved_for_target_registers
)
8892 if (after_prologue_epilogue_gen
&& ! TARGET_SAVE_ALL_TARGET_REGS
)
8894 if (calc_live_regs (&dummy
) >= 6 * 8)
8896 /* This is a borderline case. See if we got a nested loop, or a loop
8897 with a call, or with more than 4 labels inside. */
8898 for (insn
= get_insns(); insn
; insn
= NEXT_INSN (insn
))
8900 if (GET_CODE (insn
) == NOTE
8901 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_BEG
)
8907 insn
= NEXT_INSN (insn
);
8908 if ((GET_CODE (insn
) == NOTE
8909 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_BEG
)
8910 || GET_CODE (insn
) == CALL_INSN
8911 || (GET_CODE (insn
) == CODE_LABEL
&& ++labels
> 4))
8914 while (GET_CODE (insn
) != NOTE
8915 || NOTE_LINE_NUMBER (insn
) != NOTE_INSN_LOOP_END
);
8922 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED
)
8924 return (TARGET_SH5
|| TARGET_HITACHI
|| sh_attr_renesas_p (record_type
));
8928 On the SH1..SH4, the trampoline looks like
8929 2 0002 D202 mov.l l2,r2
8930 1 0000 D301 mov.l l1,r3
8933 5 0008 00000000 l1: .long area
8934 6 000c 00000000 l2: .long function
8936 SH5 (compact) uses r1 instead of r3 for the static chain. */
8939 /* Emit RTL insns to initialize the variable parts of a trampoline.
8940 FNADDR is an RTX for the address of the function's pure code.
8941 CXT is an RTX for the static chain value for the function. */
8944 sh_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
8946 rtx tramp_mem
= gen_frame_mem (BLKmode
, tramp
);
8948 if (TARGET_SHMEDIA64
)
8953 rtx movi1
= GEN_INT (0xcc000010);
8954 rtx shori1
= GEN_INT (0xc8000010);
8957 /* The following trampoline works within a +- 128 KB range for cxt:
8958 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8959 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8960 gettr tr1,r1; blink tr0,r63 */
8961 /* Address rounding makes it hard to compute the exact bounds of the
8962 offset for this trampoline, but we have a rather generous offset
8963 range, so frame_offset should do fine as an upper bound. */
8964 if (cxt
== virtual_stack_vars_rtx
&& frame_offset
< 0x20000)
8966 /* ??? could optimize this trampoline initialization
8967 by writing DImode words with two insns each. */
8968 rtx mask
= force_reg (DImode
, GEN_INT (0x3fffc00));
8969 rtx insn
= gen_rtx_MINUS (DImode
, cxt
, tramp
);
8970 insn
= gen_rtx_ASHIFT (DImode
, insn
, GEN_INT (10-2));
8971 insn
= gen_rtx_AND (DImode
, insn
, mask
);
8972 /* Or in ptb/u .,tr1 pattern */
8973 insn
= gen_rtx_IOR (DImode
, insn
, gen_int_mode (0xec000010, SImode
));
8974 insn
= force_operand (insn
, NULL_RTX
);
8975 insn
= gen_lowpart (SImode
, insn
);
8976 emit_move_insn (change_address (tramp_mem
, SImode
, NULL_RTX
), insn
);
8977 insn
= gen_rtx_LSHIFTRT (DImode
, fnaddr
, GEN_INT (38));
8978 insn
= gen_rtx_AND (DImode
, insn
, mask
);
8979 insn
= force_operand (gen_rtx_IOR (DImode
, movi1
, insn
), NULL_RTX
);
8980 insn
= gen_lowpart (SImode
, insn
);
8981 emit_move_insn (adjust_address (tramp_mem
, SImode
, 4), insn
);
8982 insn
= gen_rtx_LSHIFTRT (DImode
, fnaddr
, GEN_INT (22));
8983 insn
= gen_rtx_AND (DImode
, insn
, mask
);
8984 insn
= force_operand (gen_rtx_IOR (DImode
, shori1
, insn
), NULL_RTX
);
8985 insn
= gen_lowpart (SImode
, insn
);
8986 emit_move_insn (adjust_address (tramp_mem
, SImode
, 8), insn
);
8987 insn
= gen_rtx_LSHIFTRT (DImode
, fnaddr
, GEN_INT (6));
8988 insn
= gen_rtx_AND (DImode
, insn
, mask
);
8989 insn
= force_operand (gen_rtx_IOR (DImode
, shori1
, insn
), NULL_RTX
);
8990 insn
= gen_lowpart (SImode
, insn
);
8991 emit_move_insn (adjust_address (tramp_mem
, SImode
, 12), insn
);
8992 insn
= gen_rtx_ASHIFT (DImode
, fnaddr
, GEN_INT (10));
8993 insn
= gen_rtx_AND (DImode
, insn
, mask
);
8994 insn
= force_operand (gen_rtx_IOR (DImode
, shori1
, insn
), NULL_RTX
);
8995 insn
= gen_lowpart (SImode
, insn
);
8996 emit_move_insn (adjust_address (tramp_mem
, SImode
, 16), insn
);
8997 emit_move_insn (adjust_address (tramp_mem
, SImode
, 20),
8998 GEN_INT (0x6bf10600));
8999 emit_move_insn (adjust_address (tramp_mem
, SImode
, 24),
9000 GEN_INT (0x4415fc10));
9001 emit_move_insn (adjust_address (tramp_mem
, SImode
, 28),
9002 GEN_INT (0x4401fff0));
9003 emit_insn (gen_ic_invalidate_line (tramp
));
9006 tramp_templ
= gen_rtx_SYMBOL_REF (Pmode
,"__GCC_nested_trampoline");
9007 fixed_len
= TRAMPOLINE_SIZE
- 2 * GET_MODE_SIZE (Pmode
);
9009 tramp_templ
= gen_datalabel_ref (tramp_templ
);
9011 src
= gen_const_mem (BLKmode
, tramp_templ
);
9012 set_mem_align (dst
, 256);
9013 set_mem_align (src
, 64);
9014 emit_block_move (dst
, src
, GEN_INT (fixed_len
), BLOCK_OP_NORMAL
);
9016 emit_move_insn (adjust_address (tramp_mem
, Pmode
, fixed_len
), fnaddr
);
9017 emit_move_insn (adjust_address (tramp_mem
, Pmode
,
9018 fixed_len
+ GET_MODE_SIZE (Pmode
)),
9020 emit_insn (gen_ic_invalidate_line (tramp
));
9023 else if (TARGET_SHMEDIA
)
9025 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9026 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9027 rtx quad0
= gen_reg_rtx (DImode
), cxtload
= gen_reg_rtx (DImode
);
9028 rtx quad1
= gen_reg_rtx (DImode
), quad2
= gen_reg_rtx (DImode
);
9029 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9030 rotated 10 right, and higher 16 bit of every 32 selected. */
9032 = force_reg (V2HImode
, (simplify_gen_subreg
9033 (V2HImode
, GEN_INT (0x4330432), SImode
, 0)));
9034 rtx ptabs
= force_reg (DImode
, GEN_INT (0x6bf10600));
9035 rtx blink
= force_reg (DImode
, GEN_INT (0x4401fff0));
9037 tramp
= force_reg (Pmode
, tramp
);
9038 fnaddr
= force_reg (SImode
, fnaddr
);
9039 cxt
= force_reg (SImode
, cxt
);
9040 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode
, quad0
, 0),
9041 gen_rtx_SUBREG (V2HImode
, fnaddr
, 0),
9043 emit_insn (gen_rotrdi3_mextr (quad0
, quad0
,
9044 GEN_INT (TARGET_LITTLE_ENDIAN
? 24 : 56)));
9045 emit_insn (gen_ashldi3_media (quad0
, quad0
, const2_rtx
));
9046 emit_move_insn (change_address (tramp_mem
, DImode
, NULL_RTX
), quad0
);
9047 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode
, cxtload
, 0),
9048 gen_rtx_SUBREG (V2HImode
, cxt
, 0),
9050 emit_insn (gen_rotrdi3_mextr (cxtload
, cxtload
,
9051 GEN_INT (TARGET_LITTLE_ENDIAN
? 24 : 56)));
9052 emit_insn (gen_ashldi3_media (cxtload
, cxtload
, const2_rtx
));
9053 if (TARGET_LITTLE_ENDIAN
)
9055 emit_insn (gen_mshflo_l_di (quad1
, ptabs
, cxtload
));
9056 emit_insn (gen_mextr4 (quad2
, cxtload
, blink
));
9060 emit_insn (gen_mextr4 (quad1
, cxtload
, ptabs
));
9061 emit_insn (gen_mshflo_l_di (quad2
, blink
, cxtload
));
9063 emit_move_insn (adjust_address (tramp_mem
, DImode
, 8), quad1
);
9064 emit_move_insn (adjust_address (tramp_mem
, DImode
, 16), quad2
);
9065 emit_insn (gen_ic_invalidate_line (tramp
));
9068 else if (TARGET_SHCOMPACT
)
9070 emit_insn (gen_initialize_trampoline (tramp
, cxt
, fnaddr
));
9073 emit_move_insn (change_address (tramp_mem
, SImode
, NULL_RTX
),
9074 gen_int_mode (TARGET_LITTLE_ENDIAN
? 0xd301d202 : 0xd202d301,
9076 emit_move_insn (adjust_address (tramp_mem
, SImode
, 4),
9077 gen_int_mode (TARGET_LITTLE_ENDIAN
? 0x0009422b : 0x422b0009,
9079 emit_move_insn (adjust_address (tramp_mem
, SImode
, 8), cxt
);
9080 emit_move_insn (adjust_address (tramp_mem
, SImode
, 12), fnaddr
);
9083 if (TARGET_USERMODE
)
9084 emit_library_call (function_symbol (NULL
, "__ic_invalidate",
9086 0, VOIDmode
, 1, tramp
, SImode
);
9088 emit_insn (gen_ic_invalidate_line (tramp
));
9092 /* FIXME: This is overly conservative. A SHcompact function that
9093 receives arguments ``by reference'' will have them stored in its
9094 own stack frame, so it must not pass pointers or references to
9095 these arguments to other functions by means of sibling calls. */
9096 /* If PIC, we cannot make sibling calls to global functions
9097 because the PLT requires r12 to be live. */
9099 sh_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
9102 && (! TARGET_SHCOMPACT
9103 || current_function_args_info
.stack_regs
== 0)
9104 && ! sh_cfun_interrupt_handler_p ()
9106 || (decl
&& ! TREE_PUBLIC (decl
))
9107 || (decl
&& DECL_VISIBILITY (decl
) != VISIBILITY_DEFAULT
)));
9110 /* Machine specific built-in functions. */
9112 struct builtin_description
9114 const enum insn_code icode
;
9115 const char *const name
;
9119 /* describe number and signedness of arguments; arg[0] == result
9120 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9121 /* 9: 64 bit pointer, 10: 32 bit pointer */
9122 static const char signature_args
[][4] =
9124 #define SH_BLTIN_V2SI2 0
9126 #define SH_BLTIN_V4HI2 1
9128 #define SH_BLTIN_V2SI3 2
9130 #define SH_BLTIN_V4HI3 3
9132 #define SH_BLTIN_V8QI3 4
9134 #define SH_BLTIN_MAC_HISI 5
9136 #define SH_BLTIN_SH_HI 6
9138 #define SH_BLTIN_SH_SI 7
9140 #define SH_BLTIN_V4HI2V2SI 8
9142 #define SH_BLTIN_V4HI2V8QI 9
9144 #define SH_BLTIN_SISF 10
9146 #define SH_BLTIN_LDUA_L 11
9148 #define SH_BLTIN_LDUA_Q 12
9150 #define SH_BLTIN_STUA_L 13
9152 #define SH_BLTIN_STUA_Q 14
9154 #define SH_BLTIN_LDUA_L64 15
9156 #define SH_BLTIN_LDUA_Q64 16
9158 #define SH_BLTIN_STUA_L64 17
9160 #define SH_BLTIN_STUA_Q64 18
9162 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9163 #define SH_BLTIN_2 19
9164 #define SH_BLTIN_SU 19
9166 #define SH_BLTIN_3 20
9167 #define SH_BLTIN_SUS 20
9169 #define SH_BLTIN_PSSV 21
9171 #define SH_BLTIN_XXUU 22
9172 #define SH_BLTIN_UUUU 22
9174 #define SH_BLTIN_PV 23
9177 /* mcmv: operands considered unsigned. */
9178 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9179 /* mperm: control value considered unsigned int. */
9180 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9181 /* mshards_q: returns signed short. */
9182 /* nsb: takes long long arg, returns unsigned char. */
9183 static const struct builtin_description bdesc
[] =
9185 { CODE_FOR_absv2si2
, "__builtin_absv2si2", SH_BLTIN_V2SI2
},
9186 { CODE_FOR_absv4hi2
, "__builtin_absv4hi2", SH_BLTIN_V4HI2
},
9187 { CODE_FOR_addv2si3
, "__builtin_addv2si3", SH_BLTIN_V2SI3
},
9188 { CODE_FOR_addv4hi3
, "__builtin_addv4hi3", SH_BLTIN_V4HI3
},
9189 { CODE_FOR_ssaddv2si3
,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3
},
9190 { CODE_FOR_usaddv8qi3
,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3
},
9191 { CODE_FOR_ssaddv4hi3
,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3
},
9192 { CODE_FOR_alloco_i
, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV
},
9193 { CODE_FOR_negcmpeqv8qi
,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3
},
9194 { CODE_FOR_negcmpeqv2si
,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3
},
9195 { CODE_FOR_negcmpeqv4hi
,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3
},
9196 { CODE_FOR_negcmpgtuv8qi
,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3
},
9197 { CODE_FOR_negcmpgtv2si
,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3
},
9198 { CODE_FOR_negcmpgtv4hi
,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3
},
9199 { CODE_FOR_mcmv
, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU
},
9200 { CODE_FOR_mcnvs_lw
, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3
},
9201 { CODE_FOR_mcnvs_wb
, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI
},
9202 { CODE_FOR_mcnvs_wub
, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI
},
9203 { CODE_FOR_mextr1
, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3
},
9204 { CODE_FOR_mextr2
, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3
},
9205 { CODE_FOR_mextr3
, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3
},
9206 { CODE_FOR_mextr4
, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3
},
9207 { CODE_FOR_mextr5
, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3
},
9208 { CODE_FOR_mextr6
, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3
},
9209 { CODE_FOR_mextr7
, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3
},
9210 { CODE_FOR_mmacfx_wl
, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI
},
9211 { CODE_FOR_mmacnfx_wl
,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI
},
9212 { CODE_FOR_mulv2si3
, "__builtin_mulv2si3", SH_BLTIN_V2SI3
, },
9213 { CODE_FOR_mulv4hi3
, "__builtin_mulv4hi3", SH_BLTIN_V4HI3
},
9214 { CODE_FOR_mmulfx_l
, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3
},
9215 { CODE_FOR_mmulfx_w
, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3
},
9216 { CODE_FOR_mmulfxrp_w
,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3
},
9217 { CODE_FOR_mmulhi_wl
, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI
},
9218 { CODE_FOR_mmullo_wl
, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI
},
9219 { CODE_FOR_mmulsum_wq
,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU
},
9220 { CODE_FOR_mperm_w
, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI
},
9221 { CODE_FOR_msad_ubq
, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU
},
9222 { CODE_FOR_mshalds_l
, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI
},
9223 { CODE_FOR_mshalds_w
, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI
},
9224 { CODE_FOR_ashrv2si3
, "__builtin_ashrv2si3", SH_BLTIN_SH_SI
},
9225 { CODE_FOR_ashrv4hi3
, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI
},
9226 { CODE_FOR_mshards_q
, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS
},
9227 { CODE_FOR_mshfhi_b
, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3
},
9228 { CODE_FOR_mshfhi_l
, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3
},
9229 { CODE_FOR_mshfhi_w
, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3
},
9230 { CODE_FOR_mshflo_b
, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3
},
9231 { CODE_FOR_mshflo_l
, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3
},
9232 { CODE_FOR_mshflo_w
, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3
},
9233 { CODE_FOR_ashlv2si3
, "__builtin_ashlv2si3", SH_BLTIN_SH_SI
},
9234 { CODE_FOR_ashlv4hi3
, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI
},
9235 { CODE_FOR_lshrv2si3
, "__builtin_lshrv2si3", SH_BLTIN_SH_SI
},
9236 { CODE_FOR_lshrv4hi3
, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI
},
9237 { CODE_FOR_subv2si3
, "__builtin_subv2si3", SH_BLTIN_V2SI3
},
9238 { CODE_FOR_subv4hi3
, "__builtin_subv4hi3", SH_BLTIN_V4HI3
},
9239 { CODE_FOR_sssubv2si3
,"__builtin_sssubv2si3", SH_BLTIN_V2SI3
},
9240 { CODE_FOR_ussubv8qi3
,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3
},
9241 { CODE_FOR_sssubv4hi3
,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3
},
9242 { CODE_FOR_fcosa_s
, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF
},
9243 { CODE_FOR_fsina_s
, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF
},
9244 { CODE_FOR_fipr
, "__builtin_sh_media_FIPR_S", SH_BLTIN_3
},
9245 { CODE_FOR_ftrv
, "__builtin_sh_media_FTRV_S", SH_BLTIN_3
},
9246 { CODE_FOR_mac_media
, "__builtin_sh_media_FMAC_S", SH_BLTIN_3
},
9247 { CODE_FOR_sqrtdf2
, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2
},
9248 { CODE_FOR_sqrtsf2
, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2
},
9249 { CODE_FOR_fsrra_s
, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2
},
9250 { CODE_FOR_ldhi_l
, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L
},
9251 { CODE_FOR_ldhi_q
, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q
},
9252 { CODE_FOR_ldlo_l
, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L
},
9253 { CODE_FOR_ldlo_q
, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q
},
9254 { CODE_FOR_sthi_l
, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L
},
9255 { CODE_FOR_sthi_q
, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q
},
9256 { CODE_FOR_stlo_l
, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L
},
9257 { CODE_FOR_stlo_q
, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q
},
9258 { CODE_FOR_ldhi_l64
, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64
},
9259 { CODE_FOR_ldhi_q64
, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64
},
9260 { CODE_FOR_ldlo_l64
, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64
},
9261 { CODE_FOR_ldlo_q64
, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64
},
9262 { CODE_FOR_sthi_l64
, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64
},
9263 { CODE_FOR_sthi_q64
, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64
},
9264 { CODE_FOR_stlo_l64
, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64
},
9265 { CODE_FOR_stlo_q64
, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64
},
9266 { CODE_FOR_nsb
, "__builtin_sh_media_NSB", SH_BLTIN_SU
},
9267 { CODE_FOR_byterev
, "__builtin_sh_media_BYTEREV", SH_BLTIN_2
},
9268 { CODE_FOR_prefetch
, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV
},
9272 sh_media_init_builtins (void)
9274 tree shared
[SH_BLTIN_NUM_SHARED_SIGNATURES
];
9275 const struct builtin_description
*d
;
9277 memset (shared
, 0, sizeof shared
);
9278 for (d
= bdesc
; d
- bdesc
< (int) ARRAY_SIZE (bdesc
); d
++)
9280 tree type
, arg_type
= 0;
9281 int signature
= d
->signature
;
9284 if (signature
< SH_BLTIN_NUM_SHARED_SIGNATURES
&& shared
[signature
])
9285 type
= shared
[signature
];
9288 int has_result
= signature_args
[signature
][0] != 0;
9290 if ((signature_args
[signature
][1] & 8)
9291 && (((signature_args
[signature
][1] & 1) && TARGET_SHMEDIA32
)
9292 || ((signature_args
[signature
][1] & 2) && TARGET_SHMEDIA64
)))
9294 if (! TARGET_FPU_ANY
9295 && FLOAT_MODE_P (insn_data
[d
->icode
].operand
[0].mode
))
9297 type
= void_list_node
;
9300 int arg
= signature_args
[signature
][i
];
9301 int opno
= i
- 1 + has_result
;
9304 arg_type
= ptr_type_node
;
9306 arg_type
= (*lang_hooks
.types
.type_for_mode
)
9307 (insn_data
[d
->icode
].operand
[opno
].mode
,
9312 arg_type
= void_type_node
;
9315 type
= tree_cons (NULL_TREE
, arg_type
, type
);
9317 type
= build_function_type (arg_type
, type
);
9318 if (signature
< SH_BLTIN_NUM_SHARED_SIGNATURES
)
9319 shared
[signature
] = type
;
9321 lang_hooks
.builtin_function (d
->name
, type
, d
- bdesc
, BUILT_IN_MD
,
9326 /* Implements target hook vector_mode_supported_p. */
9328 sh_vector_mode_supported_p (enum machine_mode mode
)
9331 && ((mode
== V2SFmode
)
9332 || (mode
== V4SFmode
)
9333 || (mode
== V16SFmode
)))
9336 else if (TARGET_SHMEDIA
9337 && ((mode
== V8QImode
)
9338 || (mode
== V2HImode
)
9339 || (mode
== V4HImode
)
9340 || (mode
== V2SImode
)))
9346 /* Implements target hook dwarf_calling_convention. Return an enum
9347 of dwarf_calling_convention. */
9349 sh_dwarf_calling_convention (tree func
)
9351 if (sh_attr_renesas_p (func
))
9352 return DW_CC_GNU_renesas_sh
;
9354 return DW_CC_normal
;
9358 sh_init_builtins (void)
9361 sh_media_init_builtins ();
9364 /* Expand an expression EXP that calls a built-in function,
9365 with result going to TARGET if that's convenient
9366 (and in mode MODE if that's convenient).
9367 SUBTARGET may be used as the target for computing one of EXP's operands.
9368 IGNORE is nonzero if the value is to be ignored. */
9371 sh_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
9372 enum machine_mode mode ATTRIBUTE_UNUSED
, int ignore
)
9374 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
9375 tree arglist
= TREE_OPERAND (exp
, 1);
9376 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
9377 const struct builtin_description
*d
= &bdesc
[fcode
];
9378 enum insn_code icode
= d
->icode
;
9379 int signature
= d
->signature
;
9380 enum machine_mode tmode
= VOIDmode
;
9385 if (signature_args
[signature
][0])
9390 tmode
= insn_data
[icode
].operand
[0].mode
;
9392 || GET_MODE (target
) != tmode
9393 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
9394 target
= gen_reg_rtx (tmode
);
9400 for (i
= 1; i
<= 3; i
++, nop
++)
9403 enum machine_mode opmode
, argmode
;
9406 if (! signature_args
[signature
][i
])
9408 arg
= TREE_VALUE (arglist
);
9409 if (arg
== error_mark_node
)
9411 arglist
= TREE_CHAIN (arglist
);
9412 if (signature_args
[signature
][i
] & 8)
9415 optype
= ptr_type_node
;
9419 opmode
= insn_data
[icode
].operand
[nop
].mode
;
9420 optype
= (*lang_hooks
.types
.type_for_mode
) (opmode
, 0);
9422 argmode
= TYPE_MODE (TREE_TYPE (arg
));
9423 if (argmode
!= opmode
)
9424 arg
= build1 (NOP_EXPR
, optype
, arg
);
9425 op
[nop
] = expand_expr (arg
, NULL_RTX
, opmode
, 0);
9426 if (! (*insn_data
[icode
].operand
[nop
].predicate
) (op
[nop
], opmode
))
9427 op
[nop
] = copy_to_mode_reg (opmode
, op
[nop
]);
9433 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0]);
9436 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0], op
[1]);
9439 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0], op
[1], op
[2]);
9442 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0], op
[1], op
[2], op
[3]);
9454 sh_expand_unop_v2sf (enum rtx_code code
, rtx op0
, rtx op1
)
9456 rtx sel0
= const0_rtx
;
9457 rtx sel1
= const1_rtx
;
9458 rtx (*fn
) (rtx
, rtx
, rtx
, rtx
, rtx
) = gen_unary_sf_op
;
9459 rtx op
= gen_rtx_fmt_e (code
, SFmode
, op1
);
9461 emit_insn ((*fn
) (op0
, op1
, op
, sel0
, sel0
));
9462 emit_insn ((*fn
) (op0
, op1
, op
, sel1
, sel1
));
9466 sh_expand_binop_v2sf (enum rtx_code code
, rtx op0
, rtx op1
, rtx op2
)
9468 rtx sel0
= const0_rtx
;
9469 rtx sel1
= const1_rtx
;
9470 rtx (*fn
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
)
9472 rtx op
= gen_rtx_fmt_ee (code
, SFmode
, op1
, op2
);
9474 emit_insn ((*fn
) (op0
, op1
, op2
, op
, sel0
, sel0
, sel0
, sel1
));
9475 emit_insn ((*fn
) (op0
, op1
, op2
, op
, sel1
, sel1
, sel1
, sel0
));
9478 /* Return the class of registers for which a mode change from FROM to TO
9481 sh_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
9482 enum reg_class
class)
9484 /* We want to enable the use of SUBREGs as a means to
9485 VEC_SELECT a single element of a vector. */
9486 if (to
== SFmode
&& VECTOR_MODE_P (from
) && GET_MODE_INNER (from
) == SFmode
)
9487 return (reg_classes_intersect_p (GENERAL_REGS
, class));
9489 if (GET_MODE_SIZE (from
) != GET_MODE_SIZE (to
))
9491 if (TARGET_LITTLE_ENDIAN
)
9493 if (GET_MODE_SIZE (to
) < 8 || GET_MODE_SIZE (from
) < 8)
9494 return reg_classes_intersect_p (DF_REGS
, class);
9498 if (GET_MODE_SIZE (from
) < 8)
9499 return reg_classes_intersect_p (DF_HI_REGS
, class);
9506 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9507 that label is used. */
9510 sh_mark_label (rtx address
, int nuses
)
9512 if (GOTOFF_P (address
))
9514 /* Extract the label or symbol. */
9515 address
= XEXP (address
, 0);
9516 if (GET_CODE (address
) == PLUS
)
9517 address
= XEXP (address
, 0);
9518 address
= XVECEXP (address
, 0, 0);
9520 if (GET_CODE (address
) == LABEL_REF
9521 && GET_CODE (XEXP (address
, 0)) == CODE_LABEL
)
9522 LABEL_NUSES (XEXP (address
, 0)) += nuses
;
9525 /* Compute extra cost of moving data between one register class
9528 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9529 uses this information. Hence, the general register <-> floating point
9530 register information here is not used for SFmode. */
9533 sh_register_move_cost (enum machine_mode mode
,
9534 enum reg_class srcclass
, enum reg_class dstclass
)
9536 if (dstclass
== T_REGS
|| dstclass
== PR_REGS
)
9539 if (dstclass
== MAC_REGS
&& srcclass
== MAC_REGS
)
9542 if (mode
== SImode
&& ! TARGET_SHMEDIA
&& TARGET_FMOVD
9543 && REGCLASS_HAS_FP_REG (srcclass
)
9544 && REGCLASS_HAS_FP_REG (dstclass
))
9547 if (REGCLASS_HAS_FP_REG (dstclass
) && srcclass
== T_REGS
)
9548 return ((TARGET_HARD_SH4
&& !optimize_size
) ? 10 : 7);
9550 if ((REGCLASS_HAS_FP_REG (dstclass
) && srcclass
== MAC_REGS
)
9551 || (dstclass
== MAC_REGS
&& REGCLASS_HAS_FP_REG (srcclass
)))
9554 if ((REGCLASS_HAS_FP_REG (dstclass
)
9555 && REGCLASS_HAS_GENERAL_REG (srcclass
))
9556 || (REGCLASS_HAS_GENERAL_REG (dstclass
)
9557 && REGCLASS_HAS_FP_REG (srcclass
)))
9558 return ((TARGET_SHMEDIA
? 4 : TARGET_FMOVD
? 8 : 12)
9559 * ((GET_MODE_SIZE (mode
) + 7) / 8U));
9561 if ((dstclass
== FPUL_REGS
9562 && REGCLASS_HAS_GENERAL_REG (srcclass
))
9563 || (srcclass
== FPUL_REGS
9564 && REGCLASS_HAS_GENERAL_REG (dstclass
)))
9567 if ((dstclass
== FPUL_REGS
9568 && (srcclass
== PR_REGS
|| srcclass
== MAC_REGS
|| srcclass
== T_REGS
))
9569 || (srcclass
== FPUL_REGS
9570 && (dstclass
== PR_REGS
|| dstclass
== MAC_REGS
)))
9573 if ((srcclass
== TARGET_REGS
&& ! REGCLASS_HAS_GENERAL_REG (dstclass
))
9574 || ((dstclass
) == TARGET_REGS
&& ! REGCLASS_HAS_GENERAL_REG (srcclass
)))
9577 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9579 && ((srcclass
) == TARGET_REGS
|| (srcclass
) == SIBCALL_REGS
))
9581 if (sh_gettrcost
>= 0)
9582 return sh_gettrcost
;
9583 else if (!TARGET_PT_FIXED
)
9587 if ((srcclass
== FPSCR_REGS
&& ! REGCLASS_HAS_GENERAL_REG (dstclass
))
9588 || (dstclass
== FPSCR_REGS
&& ! REGCLASS_HAS_GENERAL_REG (srcclass
)))
9593 && ! REGCLASS_HAS_GENERAL_REG (srcclass
)
9594 && ! REGCLASS_HAS_GENERAL_REG (dstclass
)))
9595 return 2 * ((GET_MODE_SIZE (mode
) + 7) / 8U);
9597 return 2 * ((GET_MODE_SIZE (mode
) + 3) / 4U);
9600 static rtx
emit_load_ptr (rtx
, rtx
);
9603 emit_load_ptr (rtx reg
, rtx addr
)
9605 rtx mem
= gen_const_mem (ptr_mode
, addr
);
9607 if (Pmode
!= ptr_mode
)
9608 mem
= gen_rtx_SIGN_EXTEND (Pmode
, mem
);
9609 return emit_move_insn (reg
, mem
);
9613 sh_output_mi_thunk (FILE *file
, tree thunk_fndecl ATTRIBUTE_UNUSED
,
9614 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
9617 CUMULATIVE_ARGS cum
;
9618 int structure_value_byref
= 0;
9619 rtx
this, this_value
, sibcall
, insns
, funexp
;
9620 tree funtype
= TREE_TYPE (function
);
9621 int simple_add
= CONST_OK_FOR_ADD (delta
);
9623 rtx scratch0
, scratch1
, scratch2
;
9626 reload_completed
= 1;
9627 epilogue_completed
= 1;
9629 current_function_uses_only_leaf_regs
= 1;
9630 reset_block_changes ();
9632 emit_note (NOTE_INSN_PROLOGUE_END
);
9634 /* Find the "this" pointer. We have such a wide range of ABIs for the
9635 SH that it's best to do this completely machine independently.
9636 "this" is passed as first argument, unless a structure return pointer
9637 comes first, in which case "this" comes second. */
9638 INIT_CUMULATIVE_ARGS (cum
, funtype
, NULL_RTX
, 0, 1);
9639 #ifndef PCC_STATIC_STRUCT_RETURN
9640 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
9641 structure_value_byref
= 1;
9642 #endif /* not PCC_STATIC_STRUCT_RETURN */
9643 if (structure_value_byref
&& sh_struct_value_rtx (function
, 0) == 0)
9645 tree ptype
= build_pointer_type (TREE_TYPE (funtype
));
9647 FUNCTION_ARG_ADVANCE (cum
, Pmode
, ptype
, 1);
9649 this = FUNCTION_ARG (cum
, Pmode
, ptr_type_node
, 1);
9651 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9652 static chain pointer (even if you can't have nested virtual functions
9653 right now, someone might implement them sometime), and the rest of the
9654 registers are used for argument passing, are callee-saved, or reserved. */
9655 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9656 -ffixed-reg has been used. */
9657 if (! call_used_regs
[0] || fixed_regs
[0])
9658 error ("r0 needs to be available as a call-clobbered register");
9659 scratch0
= scratch1
= scratch2
= gen_rtx_REG (Pmode
, 0);
9662 if (call_used_regs
[1] && ! fixed_regs
[1])
9663 scratch1
= gen_rtx_REG (ptr_mode
, 1);
9664 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9665 pointing where to return struct values. */
9666 if (call_used_regs
[3] && ! fixed_regs
[3])
9667 scratch2
= gen_rtx_REG (Pmode
, 3);
9669 else if (TARGET_SHMEDIA
)
9671 for (i
= FIRST_GENERAL_REG
; i
<= LAST_GENERAL_REG
; i
++)
9672 if (i
!= REGNO (scratch0
) &&
9673 call_used_regs
[i
] && ! fixed_regs
[i
] && ! FUNCTION_ARG_REGNO_P (i
))
9675 scratch1
= gen_rtx_REG (ptr_mode
, i
);
9678 if (scratch1
== scratch0
)
9679 error ("Need a second call-clobbered general purpose register");
9680 for (i
= FIRST_TARGET_REG
; i
<= LAST_TARGET_REG
; i
++)
9681 if (call_used_regs
[i
] && ! fixed_regs
[i
])
9683 scratch2
= gen_rtx_REG (Pmode
, i
);
9686 if (scratch2
== scratch0
)
9687 error ("Need a call-clobbered target register");
9690 this_value
= plus_constant (this, delta
);
9692 && (simple_add
|| scratch0
!= scratch1
)
9693 && strict_memory_address_p (ptr_mode
, this_value
))
9695 emit_load_ptr (scratch0
, this_value
);
9701 else if (simple_add
)
9702 emit_move_insn (this, this_value
);
9705 emit_move_insn (scratch1
, GEN_INT (delta
));
9706 emit_insn (gen_add2_insn (this, scratch1
));
9714 emit_load_ptr (scratch0
, this);
9716 offset_addr
= plus_constant (scratch0
, vcall_offset
);
9717 if (strict_memory_address_p (ptr_mode
, offset_addr
))
9719 else if (! TARGET_SH5
&& scratch0
!= scratch1
)
9721 /* scratch0 != scratch1, and we have indexed loads. Get better
9722 schedule by loading the offset into r1 and using an indexed
9723 load - then the load of r1 can issue before the load from
9724 (this + delta) finishes. */
9725 emit_move_insn (scratch1
, GEN_INT (vcall_offset
));
9726 offset_addr
= gen_rtx_PLUS (Pmode
, scratch0
, scratch1
);
9728 else if (CONST_OK_FOR_ADD (vcall_offset
))
9730 emit_insn (gen_add2_insn (scratch0
, GEN_INT (vcall_offset
)));
9731 offset_addr
= scratch0
;
9733 else if (scratch0
!= scratch1
)
9735 emit_move_insn (scratch1
, GEN_INT (vcall_offset
));
9736 emit_insn (gen_add2_insn (scratch0
, scratch1
));
9737 offset_addr
= scratch0
;
9740 gcc_unreachable (); /* FIXME */
9741 emit_load_ptr (scratch0
, offset_addr
);
9743 if (Pmode
!= ptr_mode
)
9744 scratch0
= gen_rtx_TRUNCATE (ptr_mode
, scratch0
);
9745 emit_insn (gen_add2_insn (this, scratch0
));
9748 /* Generate a tail call to the target function. */
9749 if (! TREE_USED (function
))
9751 assemble_external (function
);
9752 TREE_USED (function
) = 1;
9754 funexp
= XEXP (DECL_RTL (function
), 0);
9755 /* If the function is overridden, so is the thunk, hence we don't
9756 need GOT addressing even if this is a public symbol. */
9758 if (TARGET_SH1
&& ! flag_weak
)
9759 sibcall
= gen_sibcalli_thunk (funexp
, const0_rtx
);
9762 if (TARGET_SH2
&& flag_pic
)
9764 sibcall
= gen_sibcall_pcrel (funexp
, const0_rtx
);
9765 XEXP (XVECEXP (sibcall
, 0, 2), 0) = scratch2
;
9769 if (TARGET_SHMEDIA
&& flag_pic
)
9771 funexp
= gen_sym2PIC (funexp
);
9772 PUT_MODE (funexp
, Pmode
);
9774 emit_move_insn (scratch2
, funexp
);
9775 funexp
= gen_rtx_MEM (FUNCTION_MODE
, scratch2
);
9776 sibcall
= gen_sibcall (funexp
, const0_rtx
, NULL_RTX
);
9778 sibcall
= emit_call_insn (sibcall
);
9779 SIBLING_CALL_P (sibcall
) = 1;
9780 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall
), this);
9783 /* Run just enough of rest_of_compilation to do scheduling and get
9784 the insns emitted. Note that use_thunk calls
9785 assemble_start_function and assemble_end_function. */
9787 insn_locators_initialize ();
9788 insns
= get_insns ();
9792 /* Initialize the bitmap obstacks. */
9793 bitmap_obstack_initialize (NULL
);
9794 bitmap_obstack_initialize (®_obstack
);
9797 rtl_register_cfg_hooks ();
9798 init_rtl_bb_info (ENTRY_BLOCK_PTR
);
9799 init_rtl_bb_info (EXIT_BLOCK_PTR
);
9800 ENTRY_BLOCK_PTR
->flags
|= BB_RTL
;
9801 EXIT_BLOCK_PTR
->flags
|= BB_RTL
;
9802 find_basic_blocks (insns
);
9804 if (flag_schedule_insns_after_reload
)
9806 life_analysis (dump_file
, PROP_FINAL
);
9808 split_all_insns (1);
9810 schedule_insns (dump_file
);
9812 /* We must split jmp insn in PIC case. */
9814 split_all_insns_noflow ();
9819 if (optimize
> 0 && flag_delayed_branch
)
9820 dbr_schedule (insns
, dump_file
);
9822 shorten_branches (insns
);
9823 final_start_function (insns
, file
, 1);
9824 final (insns
, file
, 1);
9825 final_end_function ();
9829 /* Release all memory allocated by flow. */
9830 free_basic_block_vars ();
9832 /* Release the bitmap obstacks. */
9833 bitmap_obstack_release (®_obstack
);
9834 bitmap_obstack_release (NULL
);
9837 reload_completed
= 0;
9838 epilogue_completed
= 0;
9843 function_symbol (rtx target
, const char *name
, enum sh_function_kind kind
)
9847 /* If this is not an ordinary function, the name usually comes from a
9848 string literal or an sprintf buffer. Make sure we use the same
9849 string consistently, so that cse will be able to unify address loads. */
9850 if (kind
!= FUNCTION_ORDINARY
)
9851 name
= IDENTIFIER_POINTER (get_identifier (name
));
9852 sym
= gen_rtx_SYMBOL_REF (Pmode
, name
);
9853 SYMBOL_REF_FLAGS (sym
) = SYMBOL_FLAG_FUNCTION
;
9857 case FUNCTION_ORDINARY
:
9861 rtx reg
= target
? target
: gen_reg_rtx (Pmode
);
9863 emit_insn (gen_symGOT2reg (reg
, sym
));
9869 /* ??? To allow cse to work, we use GOTOFF relocations.
9870 we could add combiner patterns to transform this into
9871 straight pc-relative calls with sym2PIC / bsrf when
9872 label load and function call are still 1:1 and in the
9873 same basic block during combine. */
9874 rtx reg
= target
? target
: gen_reg_rtx (Pmode
);
9876 emit_insn (gen_symGOTOFF2reg (reg
, sym
));
9881 if (target
&& sym
!= target
)
9883 emit_move_insn (target
, sym
);
9889 /* Find the number of a general purpose register in S. */
9891 scavenge_reg (HARD_REG_SET
*s
)
9894 for (r
= FIRST_GENERAL_REG
; r
<= LAST_GENERAL_REG
; r
++)
9895 if (TEST_HARD_REG_BIT (*s
, r
))
9901 sh_get_pr_initial_val (void)
9905 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9906 PR register on SHcompact, because it might be clobbered by the prologue.
9907 We check first if that is known to be the case. */
9908 if (TARGET_SHCOMPACT
9909 && ((current_function_args_info
.call_cookie
9910 & ~ CALL_COOKIE_RET_TRAMP (1))
9911 || current_function_has_nonlocal_label
))
9912 return gen_frame_mem (SImode
, return_address_pointer_rtx
);
9914 /* If we haven't finished rtl generation, there might be a nonlocal label
9915 that we haven't seen yet.
9916 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9917 is set, unless it has been called before for the same register. And even
9918 then, we end in trouble if we didn't use the register in the same
9919 basic block before. So call get_hard_reg_initial_val now and wrap it
9920 in an unspec if we might need to replace it. */
9921 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9922 combine can put the pseudo returned by get_hard_reg_initial_val into
9923 instructions that need a general purpose registers, which will fail to
9924 be recognized when the pseudo becomes allocated to PR. */
9926 = get_hard_reg_initial_val (Pmode
, TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
);
9928 return gen_rtx_UNSPEC (SImode
, gen_rtvec (1, val
), UNSPEC_RA
);
9933 sh_expand_t_scc (enum rtx_code code
, rtx target
)
9935 rtx result
= target
;
9938 if (GET_CODE (sh_compare_op0
) != REG
|| REGNO (sh_compare_op0
) != T_REG
9939 || GET_CODE (sh_compare_op1
) != CONST_INT
)
9941 if (GET_CODE (result
) != REG
)
9942 result
= gen_reg_rtx (SImode
);
9943 val
= INTVAL (sh_compare_op1
);
9944 if ((code
== EQ
&& val
== 1) || (code
== NE
&& val
== 0))
9945 emit_insn (gen_movt (result
));
9946 else if ((code
== EQ
&& val
== 0) || (code
== NE
&& val
== 1))
9948 emit_insn (gen_rtx_CLOBBER (VOIDmode
, result
));
9949 emit_insn (gen_subc (result
, result
, result
));
9950 emit_insn (gen_addsi3 (result
, result
, const1_rtx
));
9952 else if (code
== EQ
|| code
== NE
)
9953 emit_insn (gen_move_insn (result
, GEN_INT (code
== NE
)));
9956 if (result
!= target
)
9957 emit_move_insn (target
, result
);
9961 /* INSN is an sfunc; return the rtx that describes the address used. */
9963 extract_sfunc_addr (rtx insn
)
9965 rtx pattern
, part
= NULL_RTX
;
9968 pattern
= PATTERN (insn
);
9969 len
= XVECLEN (pattern
, 0);
9970 for (i
= 0; i
< len
; i
++)
9972 part
= XVECEXP (pattern
, 0, i
);
9973 if (GET_CODE (part
) == USE
&& GET_MODE (XEXP (part
, 0)) == Pmode
9974 && GENERAL_REGISTER_P (true_regnum (XEXP (part
, 0))))
9975 return XEXP (part
, 0);
9977 gcc_assert (GET_CODE (XVECEXP (pattern
, 0, 0)) == UNSPEC_VOLATILE
);
9978 return XVECEXP (XVECEXP (pattern
, 0, 0), 0, 1);
9981 /* Verify that the register in use_sfunc_addr still agrees with the address
9982 used in the sfunc. This prevents fill_slots_from_thread from changing
9984 INSN is the use_sfunc_addr instruction, and REG is the register it
9987 check_use_sfunc_addr (rtx insn
, rtx reg
)
9989 /* Search for the sfunc. It should really come right after INSN. */
9990 while ((insn
= NEXT_INSN (insn
)))
9992 if (GET_CODE (insn
) == CODE_LABEL
|| GET_CODE (insn
) == JUMP_INSN
)
9994 if (! INSN_P (insn
))
9997 if (GET_CODE (PATTERN (insn
)) == SEQUENCE
)
9998 insn
= XVECEXP (PATTERN (insn
), 0, 0);
9999 if (GET_CODE (PATTERN (insn
)) != PARALLEL
10000 || get_attr_type (insn
) != TYPE_SFUNC
)
10002 return rtx_equal_p (extract_sfunc_addr (insn
), reg
);
10004 gcc_unreachable ();
10007 /* This function returns a constant rtx that represents pi / 2**15 in
10008 SFmode. it's used to scale SFmode angles, in radians, to a
10009 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10010 maps to 0x10000). */
10012 static GTY(()) rtx sh_fsca_sf2int_rtx
;
10015 sh_fsca_sf2int (void)
10017 if (! sh_fsca_sf2int_rtx
)
10019 REAL_VALUE_TYPE rv
;
10021 real_from_string (&rv
, "10430.378350470453");
10022 sh_fsca_sf2int_rtx
= const_double_from_real_value (rv
, SFmode
);
10025 return sh_fsca_sf2int_rtx
;
10028 /* This function returns a constant rtx that represents pi / 2**15 in
10029 DFmode. it's used to scale DFmode angles, in radians, to a
10030 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10031 maps to 0x10000). */
10033 static GTY(()) rtx sh_fsca_df2int_rtx
;
10036 sh_fsca_df2int (void)
10038 if (! sh_fsca_df2int_rtx
)
10040 REAL_VALUE_TYPE rv
;
10042 real_from_string (&rv
, "10430.378350470453");
10043 sh_fsca_df2int_rtx
= const_double_from_real_value (rv
, DFmode
);
10046 return sh_fsca_df2int_rtx
;
10049 /* This function returns a constant rtx that represents 2**15 / pi in
10050 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10051 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10054 static GTY(()) rtx sh_fsca_int2sf_rtx
;
10057 sh_fsca_int2sf (void)
10059 if (! sh_fsca_int2sf_rtx
)
10061 REAL_VALUE_TYPE rv
;
10063 real_from_string (&rv
, "9.587379924285257e-5");
10064 sh_fsca_int2sf_rtx
= const_double_from_real_value (rv
, SFmode
);
10067 return sh_fsca_int2sf_rtx
;
10070 /* Initialize the CUMULATIVE_ARGS structure. */
10073 sh_init_cumulative_args (CUMULATIVE_ARGS
* pcum
,
10075 rtx libname ATTRIBUTE_UNUSED
,
10077 signed int n_named_args
,
10078 enum machine_mode mode
)
10080 pcum
->arg_count
[(int) SH_ARG_FLOAT
] = 0;
10081 pcum
->free_single_fp_reg
= 0;
10082 pcum
->stack_regs
= 0;
10083 pcum
->byref_regs
= 0;
10085 pcum
->outgoing
= (n_named_args
== -1) ? 0 : 1;
10087 /* XXX - Should we check TARGET_HITACHI here ??? */
10088 pcum
->renesas_abi
= sh_attr_renesas_p (fntype
) ? 1 : 0;
10092 pcum
->force_mem
= ((TARGET_HITACHI
|| pcum
->renesas_abi
)
10093 && aggregate_value_p (TREE_TYPE (fntype
), fndecl
));
10094 pcum
->prototype_p
= TYPE_ARG_TYPES (fntype
) ? TRUE
: FALSE
;
10095 pcum
->arg_count
[(int) SH_ARG_INT
]
10096 = TARGET_SH5
&& aggregate_value_p (TREE_TYPE (fntype
), fndecl
);
10099 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10100 && pcum
->arg_count
[(int) SH_ARG_INT
] == 0
10101 && (TYPE_MODE (TREE_TYPE (fntype
)) == BLKmode
10102 ? int_size_in_bytes (TREE_TYPE (fntype
))
10103 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype
)))) > 4
10104 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype
)))
10105 == FIRST_RET_REG
));
10109 pcum
->arg_count
[(int) SH_ARG_INT
] = 0;
10110 pcum
->prototype_p
= FALSE
;
10111 if (mode
!= VOIDmode
)
10113 pcum
->call_cookie
=
10114 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10115 && GET_MODE_SIZE (mode
) > 4
10116 && BASE_RETURN_VALUE_REG (mode
) == FIRST_RET_REG
);
10118 /* If the default ABI is the Renesas ABI then all library
10119 calls must assume that the library will be using the
10120 Renesas ABI. So if the function would return its result
10121 in memory then we must force the address of this memory
10122 block onto the stack. Ideally we would like to call
10123 targetm.calls.return_in_memory() here but we do not have
10124 the TYPE or the FNDECL available so we synthesize the
10125 contents of that function as best we can. */
10127 (TARGET_DEFAULT
& MASK_HITACHI
)
10128 && (mode
== BLKmode
10129 || (GET_MODE_SIZE (mode
) > 4
10130 && !(mode
== DFmode
10131 && TARGET_FPU_DOUBLE
)));
10135 pcum
->call_cookie
= 0;
10136 pcum
->force_mem
= FALSE
;
10141 /* Determine if two hard register sets intersect.
10142 Return 1 if they do. */
10145 hard_regs_intersect_p (HARD_REG_SET
*a
, HARD_REG_SET
*b
)
10148 COPY_HARD_REG_SET (c
, *a
);
10149 AND_HARD_REG_SET (c
, *b
);
10150 GO_IF_HARD_REG_SUBSET (c
, reg_class_contents
[(int) NO_REGS
], lose
);
10156 #ifdef TARGET_ADJUST_UNROLL_MAX
10158 sh_adjust_unroll_max (struct loop
* loop
, int insn_count
,
10159 int max_unrolled_insns
, int strength_reduce_p
,
10162 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10163 if (TARGET_ADJUST_UNROLL
&& TARGET_SHMEDIA
)
10165 /* Throttle back loop unrolling so that the costs of using more
10166 targets than the eight target register we have don't outweigh
10167 the benefits of unrolling. */
10169 int n_labels
= 0, n_calls
= 0, n_exit_dest
= 0, n_inner_loops
= -1;
10170 int n_barriers
= 0;
10175 int unroll_benefit
= 0, mem_latency
= 0;
10176 int base_cost
, best_cost
, cost
;
10177 int factor
, best_factor
;
10179 unsigned max_iterations
= 32767;
10181 int need_precond
= 0, precond
= 0;
10182 basic_block
* bbs
= get_loop_body (loop
);
10183 struct niter_desc
*desc
;
10185 /* Assume that all labels inside the loop are used from inside the
10186 loop. If the loop has multiple entry points, it is unlikely to
10187 be unrolled anyways.
10188 Also assume that all calls are to different functions. That is
10189 somewhat pessimistic, but if you have lots of calls, unrolling the
10190 loop is not likely to gain you much in the first place. */
10191 i
= loop
->num_nodes
- 1;
10192 for (insn
= BB_HEAD (bbs
[i
]); ; )
10194 if (GET_CODE (insn
) == CODE_LABEL
)
10196 else if (GET_CODE (insn
) == CALL_INSN
)
10198 else if (GET_CODE (insn
) == NOTE
10199 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_BEG
)
10201 else if (GET_CODE (insn
) == BARRIER
)
10203 if (insn
!= BB_END (bbs
[i
]))
10204 insn
= NEXT_INSN (insn
);
10206 insn
= BB_HEAD (bbs
[i
]);
10211 /* One label for the loop top is normal, and it won't be duplicated by
10214 return max_unrolled_insns
;
10215 if (n_inner_loops
> 0)
10217 for (dest
= loop
->exit_labels
; dest
&& n_exit_dest
< 8;
10218 dest
= LABEL_NEXTREF (dest
))
10220 for (i
= n_exit_dest
- 1;
10221 i
>= 0 && XEXP (dest
, 0) != XEXP (exit_dest
[i
], 0); i
--);
10223 exit_dest
[n_exit_dest
++] = dest
;
10225 /* If the loop top and call and exit destinations are enough to fill up
10226 the target registers, we're unlikely to do any more damage by
10228 if (n_calls
+ n_exit_dest
>= 7)
10229 return max_unrolled_insns
;
10231 /* ??? In the new loop unroller, there is no longer any strength
10232 reduction information available. Thus, when it comes to unrolling,
10233 we know the cost of everything, but we know the value of nothing. */
10235 if (strength_reduce_p
10236 && (unroll_type
== LPT_UNROLL_RUNTIME
10237 || unroll_type
== LPT_UNROLL_CONSTANT
10238 || unroll_type
== LPT_PEEL_COMPLETELY
))
10240 struct loop_ivs
*ivs
= LOOP_IVS (loop
);
10241 struct iv_class
*bl
;
10243 /* We'll save one compare-and-branch in each loop body copy
10244 but the last one. */
10245 unroll_benefit
= 1;
10246 /* Assess the benefit of removing biv & giv updates. */
10247 for (bl
= ivs
->list
; bl
; bl
= bl
->next
)
10249 rtx increment
= biv_total_increment (bl
);
10250 struct induction
*v
;
10252 if (increment
&& GET_CODE (increment
) == CONST_INT
)
10255 for (v
= bl
->giv
; v
; v
= v
->next_iv
)
10257 if (! v
->ignore
&& v
->same
== 0
10258 && GET_CODE (v
->mult_val
) == CONST_INT
)
10260 /* If this giv uses an array, try to determine
10261 a maximum iteration count from the size of the
10262 array. This need not be correct all the time,
10263 but should not be too far off the mark too often. */
10264 while (v
->giv_type
== DEST_ADDR
)
10266 rtx mem
= PATTERN (v
->insn
);
10267 tree mem_expr
, type
, size_tree
;
10269 if (GET_CODE (SET_SRC (mem
)) == MEM
)
10270 mem
= SET_SRC (mem
);
10271 else if (GET_CODE (SET_DEST (mem
)) == MEM
)
10272 mem
= SET_DEST (mem
);
10275 mem_expr
= MEM_EXPR (mem
);
10278 type
= TREE_TYPE (mem_expr
);
10279 if (TREE_CODE (type
) != ARRAY_TYPE
10280 || ! TYPE_SIZE (type
) || ! TYPE_SIZE_UNIT (type
))
10282 size_tree
= fold (build (TRUNC_DIV_EXPR
,
10285 TYPE_SIZE_UNIT (type
)));
10286 if (TREE_CODE (size_tree
) == INTEGER_CST
10287 && ! TREE_INT_CST_HIGH (size_tree
)
10288 && TREE_INT_CST_LOW (size_tree
) < max_iterations
)
10289 max_iterations
= TREE_INT_CST_LOW (size_tree
);
10297 /* Assume there is at least some benefit. */
10298 unroll_benefit
= 1;
10301 desc
= get_simple_loop_desc (loop
);
10302 n_iterations
= desc
->const_iter
? desc
->niter
: 0;
10304 = max_iterations
< desc
->niter_max
? max_iterations
: desc
->niter_max
;
10306 if (! strength_reduce_p
|| ! n_iterations
)
10308 if (! n_iterations
)
10311 = max_iterations
< 3 ? max_iterations
: max_iterations
* 3 / 4;
10312 if (! n_iterations
)
10315 #if 0 /* ??? See above - missing induction variable information. */
10316 while (unroll_benefit
> 1) /* no loop */
10318 /* We include the benefit of biv/ giv updates. Check if some or
10319 all of these updates are likely to fit into a scheduling
10321 We check for the following case:
10322 - All the insns leading to the first JUMP_INSN are in a strict
10324 - there is at least one memory reference in them.
10326 When we find such a pattern, we assume that we can hide as many
10327 updates as the total of the load latency is, if we have an
10328 unroll factor of at least two. We might or might not also do
10329 this without unrolling, so rather than considering this as an
10330 extra unroll benefit, discount it in the unroll benefits of unroll
10331 factors higher than two. */
10335 insn
= next_active_insn (loop
->start
);
10336 last_set
= single_set (insn
);
10339 if (GET_CODE (SET_SRC (last_set
)) == MEM
)
10341 for (insn
= NEXT_INSN (insn
); insn
!= end
; insn
= NEXT_INSN (insn
))
10343 if (! INSN_P (insn
))
10345 if (GET_CODE (insn
) == JUMP_INSN
)
10347 if (! reg_referenced_p (SET_DEST (last_set
), PATTERN (insn
)))
10349 /* Check if this is a to-be-reduced giv insn. */
10350 struct loop_ivs
*ivs
= LOOP_IVS (loop
);
10351 struct iv_class
*bl
;
10352 struct induction
*v
;
10353 for (bl
= ivs
->list
; bl
; bl
= bl
->next
)
10355 if (bl
->biv
->insn
== insn
)
10357 for (v
= bl
->giv
; v
; v
= v
->next_iv
)
10358 if (v
->insn
== insn
)
10366 set
= single_set (insn
);
10369 if (GET_CODE (SET_SRC (set
)) == MEM
)
10373 if (mem_latency
< 0)
10375 else if (mem_latency
> unroll_benefit
- 1)
10376 mem_latency
= unroll_benefit
- 1;
10380 if (n_labels
+ (unroll_benefit
+ n_labels
* 8) / n_iterations
10382 return max_unrolled_insns
;
10384 n_dest
= n_labels
+ n_calls
+ n_exit_dest
;
10385 base_cost
= n_dest
<= 8 ? 0 : n_dest
- 7;
10388 if (n_barriers
* 2 > n_labels
- 1)
10389 n_barriers
= (n_labels
- 1) / 2;
10390 for (factor
= 2; factor
<= 8; factor
++)
10392 /* Bump up preconditioning cost for each power of two. */
10393 if (! (factor
& (factor
-1)))
10395 /* When preconditioning, only powers of two will be considered. */
10396 else if (need_precond
)
10398 n_dest
= ((unroll_type
!= LPT_PEEL_COMPLETELY
)
10399 + (n_labels
- 1) * factor
+ n_calls
+ n_exit_dest
10400 - (n_barriers
* factor
>> 1)
10403 = ((n_dest
<= 8 ? 0 : n_dest
- 7)
10404 - base_cost
* factor
10405 - ((factor
> 2 ? unroll_benefit
- mem_latency
: unroll_benefit
)
10406 * (factor
- (unroll_type
!= LPT_PEEL_COMPLETELY
)))
10407 + ((unroll_benefit
+ 1 + (n_labels
- 1) * factor
)
10410 cost
+= (precond
+ unroll_benefit
* factor
/ 2) / n_iterations
;
10411 if (cost
< best_cost
)
10414 best_factor
= factor
;
10417 threshold
= best_factor
* insn_count
;
10418 if (max_unrolled_insns
> threshold
)
10419 max_unrolled_insns
= threshold
;
10421 return max_unrolled_insns
;
10423 #endif /* TARGET_ADJUST_UNROLL_MAX */
10425 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10426 not enter into CONST_DOUBLE for the replace.
10428 Note that copying is not done so X must not be shared unless all copies
10429 are to be modified.
10431 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10432 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10433 replacements[n*2+1] - and that we take mode changes into account.
10435 If a replacement is ambiguous, return NULL_RTX.
10437 If MODIFY is zero, don't modify any rtl in place,
10438 just return zero or nonzero for failure / success. */
10441 replace_n_hard_rtx (rtx x
, rtx
*replacements
, int n_replacements
, int modify
)
10446 /* The following prevents loops occurrence when we change MEM in
10447 CONST_DOUBLE onto the same CONST_DOUBLE. */
10448 if (x
!= 0 && GET_CODE (x
) == CONST_DOUBLE
)
10451 for (i
= n_replacements
- 1; i
>= 0 ; i
--)
10452 if (x
== replacements
[i
*2] && GET_MODE (x
) == GET_MODE (replacements
[i
*2+1]))
10453 return replacements
[i
*2+1];
10455 /* Allow this function to make replacements in EXPR_LISTs. */
10459 if (GET_CODE (x
) == SUBREG
)
10461 rtx
new = replace_n_hard_rtx (SUBREG_REG (x
), replacements
,
10462 n_replacements
, modify
);
10464 if (GET_CODE (new) == CONST_INT
)
10466 x
= simplify_subreg (GET_MODE (x
), new,
10467 GET_MODE (SUBREG_REG (x
)),
10473 SUBREG_REG (x
) = new;
10477 else if (GET_CODE (x
) == REG
)
10479 unsigned regno
= REGNO (x
);
10480 unsigned nregs
= (regno
< FIRST_PSEUDO_REGISTER
10481 ? HARD_REGNO_NREGS (regno
, GET_MODE (x
)) : 1);
10482 rtx result
= NULL_RTX
;
10484 for (i
= n_replacements
- 1; i
>= 0; i
--)
10486 rtx from
= replacements
[i
*2];
10487 rtx to
= replacements
[i
*2+1];
10488 unsigned from_regno
, from_nregs
, to_regno
, new_regno
;
10490 if (GET_CODE (from
) != REG
)
10492 from_regno
= REGNO (from
);
10493 from_nregs
= (from_regno
< FIRST_PSEUDO_REGISTER
10494 ? HARD_REGNO_NREGS (from_regno
, GET_MODE (from
)) : 1);
10495 if (regno
< from_regno
+ from_nregs
&& regno
+ nregs
> from_regno
)
10497 if (regno
< from_regno
10498 || regno
+ nregs
> from_regno
+ nregs
10499 || GET_CODE (to
) != REG
10502 to_regno
= REGNO (to
);
10503 if (to_regno
< FIRST_PSEUDO_REGISTER
)
10505 new_regno
= regno
+ to_regno
- from_regno
;
10506 if ((unsigned) HARD_REGNO_NREGS (new_regno
, GET_MODE (x
))
10509 result
= gen_rtx_REG (GET_MODE (x
), new_regno
);
10511 else if (GET_MODE (x
) <= GET_MODE (to
))
10512 result
= gen_lowpart_common (GET_MODE (x
), to
);
10514 result
= gen_lowpart_SUBREG (GET_MODE (x
), to
);
10517 return result
? result
: x
;
10519 else if (GET_CODE (x
) == ZERO_EXTEND
)
10521 rtx
new = replace_n_hard_rtx (XEXP (x
, 0), replacements
,
10522 n_replacements
, modify
);
10524 if (GET_CODE (new) == CONST_INT
)
10526 x
= simplify_unary_operation (ZERO_EXTEND
, GET_MODE (x
),
10527 new, GET_MODE (XEXP (x
, 0)));
10537 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
10538 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
10544 new = replace_n_hard_rtx (XEXP (x
, i
), replacements
,
10545 n_replacements
, modify
);
10551 else if (fmt
[i
] == 'E')
10552 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
10554 new = replace_n_hard_rtx (XVECEXP (x
, i
, j
), replacements
,
10555 n_replacements
, modify
);
10559 XVECEXP (x
, i
, j
) = new;
10567 sh_gen_truncate (enum machine_mode mode
, rtx x
, int need_sign_ext
)
10569 enum rtx_code code
= TRUNCATE
;
10571 if (GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
)
10573 rtx inner
= XEXP (x
, 0);
10574 enum machine_mode inner_mode
= GET_MODE (inner
);
10576 if (inner_mode
== mode
)
10578 else if (GET_MODE_SIZE (inner_mode
) >= GET_MODE_SIZE (mode
))
10580 else if (GET_MODE_SIZE (inner_mode
) < GET_MODE_SIZE (mode
)
10581 && (! need_sign_ext
|| GET_CODE (x
) == SIGN_EXTEND
))
10583 code
= GET_CODE (x
);
10587 return gen_rtx_fmt_e (code
, mode
, x
);
10590 /* called via for_each_rtx after reload, to clean up truncates of
10591 registers that span multiple actual hard registers. */
10593 shmedia_cleanup_truncate (rtx
*p
, void *n_changes
)
10597 if (GET_CODE (x
) != TRUNCATE
)
10600 if (GET_MODE_SIZE (GET_MODE (reg
)) > 8 && GET_CODE (reg
) == REG
)
10602 enum machine_mode reg_mode
= GET_MODE (reg
);
10603 XEXP (x
, 0) = simplify_subreg (DImode
, reg
, reg_mode
,
10604 subreg_lowpart_offset (DImode
, reg_mode
));
10605 *(int*) n_changes
+= 1;
10611 /* Load and store depend on the highpart of the address. However,
10612 set_attr_alternative does not give well-defined results before reload,
10613 so we must look at the rtl ourselves to see if any of the feeding
10614 registers is used in a memref. */
10616 /* Called by sh_contains_memref_p via for_each_rtx. */
10618 sh_contains_memref_p_1 (rtx
*loc
, void *data ATTRIBUTE_UNUSED
)
10620 return (GET_CODE (*loc
) == MEM
);
10623 /* Return nonzero iff INSN contains a MEM. */
10625 sh_contains_memref_p (rtx insn
)
10627 return for_each_rtx (&PATTERN (insn
), &sh_contains_memref_p_1
, NULL
);
10630 /* FNADDR is the MEM expression from a call expander. Return an address
10631 to use in an SHmedia insn pattern. */
10633 shmedia_prepare_call_address (rtx fnaddr
, int is_sibcall
)
10637 fnaddr
= XEXP (fnaddr
, 0);
10638 is_sym
= GET_CODE (fnaddr
) == SYMBOL_REF
;
10639 if (flag_pic
&& is_sym
)
10641 if (! SYMBOL_REF_LOCAL_P (fnaddr
))
10643 rtx reg
= gen_reg_rtx (Pmode
);
10645 /* We must not use GOTPLT for sibcalls, because PIC_REG
10646 must be restored before the PLT code gets to run. */
10648 emit_insn (gen_symGOT2reg (reg
, fnaddr
));
10650 emit_insn (gen_symGOTPLT2reg (reg
, fnaddr
));
10655 fnaddr
= gen_sym2PIC (fnaddr
);
10656 PUT_MODE (fnaddr
, Pmode
);
10659 /* If ptabs might trap, make this visible to the rest of the compiler.
10660 We generally assume that symbols pertain to valid locations, but
10661 it is possible to generate invalid symbols with asm or linker tricks.
10662 In a list of functions where each returns its successor, an invalid
10663 symbol might denote an empty list. */
10664 if (!TARGET_PT_FIXED
10665 && (!is_sym
|| TARGET_INVALID_SYMBOLS
)
10666 && (!REG_P (fnaddr
) || ! TARGET_REGISTER_P (REGNO (fnaddr
))))
10668 rtx tr
= gen_reg_rtx (PDImode
);
10670 emit_insn (gen_ptabs (tr
, fnaddr
));
10673 else if (! target_reg_operand (fnaddr
, Pmode
))
10674 fnaddr
= copy_to_mode_reg (Pmode
, fnaddr
);
10679 sh_secondary_reload (bool in_p
, rtx x
, enum reg_class
class,
10680 enum machine_mode mode
, secondary_reload_info
*sri
)
10684 if (REGCLASS_HAS_FP_REG (class)
10685 && ! TARGET_SHMEDIA
10686 && immediate_operand ((x
), mode
)
10687 && ! ((fp_zero_operand (x
) || fp_one_operand (x
))
10688 && mode
== SFmode
&& fldi_ok ()))
10692 sri
->icode
= CODE_FOR_reload_insf__frn
;
10695 sri
->icode
= CODE_FOR_reload_indf__frn
;
10698 /* ??? If we knew that we are in the appropriate mode -
10699 single precision - we could use a reload pattern directly. */
10704 if (class == FPUL_REGS
10705 && ((GET_CODE (x
) == REG
10706 && (REGNO (x
) == MACL_REG
|| REGNO (x
) == MACH_REG
10707 || REGNO (x
) == T_REG
))
10708 || GET_CODE (x
) == PLUS
))
10709 return GENERAL_REGS
;
10710 if (class == FPUL_REGS
&& immediate_operand (x
, mode
))
10712 if (GET_CODE (x
) == CONST_INT
&& CONST_OK_FOR_I08 (INTVAL (x
)))
10713 return GENERAL_REGS
;
10714 sri
->icode
= CODE_FOR_reload_insi__i_fpul
;
10717 if (class == FPSCR_REGS
10718 && ((GET_CODE (x
) == REG
&& REGNO (x
) >= FIRST_PSEUDO_REGISTER
)
10719 || (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == PLUS
)))
10720 return GENERAL_REGS
;
10721 if (REGCLASS_HAS_FP_REG (class)
10723 && immediate_operand (x
, mode
)
10724 && x
!= CONST0_RTX (GET_MODE (x
))
10725 && GET_MODE (x
) != V4SFmode
)
10726 return GENERAL_REGS
;
10727 if ((mode
== QImode
|| mode
== HImode
)
10728 && TARGET_SHMEDIA
&& inqhi_operand (x
, mode
))
10730 sri
->icode
= ((mode
== QImode
)
10731 ? CODE_FOR_reload_inqi
: CODE_FOR_reload_inhi
);
10734 if (TARGET_SHMEDIA
&& class == GENERAL_REGS
10735 && (GET_CODE (x
) == LABEL_REF
|| PIC_DIRECT_ADDR_P (x
)))
10736 return TARGET_REGS
;
10737 } /* end of input-only processing. */
10739 if (((REGCLASS_HAS_FP_REG (class)
10740 && (GET_CODE (x
) == REG
10741 && (GENERAL_OR_AP_REGISTER_P (REGNO (x
))
10742 || (FP_REGISTER_P (REGNO (x
)) && mode
== SImode
10743 && TARGET_FMOVD
))))
10744 || (REGCLASS_HAS_GENERAL_REG (class)
10745 && GET_CODE (x
) == REG
10746 && FP_REGISTER_P (REGNO (x
))))
10747 && ! TARGET_SHMEDIA
10748 && (mode
== SFmode
|| mode
== SImode
))
10750 if ((class == FPUL_REGS
10751 || (REGCLASS_HAS_FP_REG (class)
10752 && ! TARGET_SHMEDIA
&& mode
== SImode
))
10753 && (GET_CODE (x
) == MEM
10754 || (GET_CODE (x
) == REG
10755 && (REGNO (x
) >= FIRST_PSEUDO_REGISTER
10756 || REGNO (x
) == T_REG
10757 || system_reg_operand (x
, VOIDmode
)))))
10759 if (class == FPUL_REGS
)
10760 return GENERAL_REGS
;
10763 if ((class == TARGET_REGS
10764 || (TARGET_SHMEDIA
&& class == SIBCALL_REGS
))
10765 && !EXTRA_CONSTRAINT_Csy (x
)
10766 && (GET_CODE (x
) != REG
|| ! GENERAL_REGISTER_P (REGNO (x
))))
10767 return GENERAL_REGS
;
10768 if ((class == MAC_REGS
|| class == PR_REGS
)
10769 && GET_CODE (x
) == REG
&& ! GENERAL_REGISTER_P (REGNO (x
))
10770 && class != REGNO_REG_CLASS (REGNO (x
)))
10771 return GENERAL_REGS
;
10772 if (class != GENERAL_REGS
&& GET_CODE (x
) == REG
10773 && TARGET_REGISTER_P (REGNO (x
)))
10774 return GENERAL_REGS
;
10778 enum sh_divide_strategy_e sh_div_strategy
= SH_DIV_STRATEGY_DEFAULT
;
10780 /* This defines the storage for the variable part of a -mboard= option.
10781 It is only required when using the sh-superh-elf target */
10783 const char * boardtype
= "7750p2";
10784 const char * osruntime
= "bare";