2009-05-04 Paolo Bonzini <bonzini@gnu.org>
[official-gcc.git] / gcc / config / sh / sh.c
blob9834d62d13b1eb1937adc3f46579477fdc3ff4bc
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "insn-config.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "flags.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "c-pragma.h"
41 #include "integrate.h"
42 #include "dwarf2.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
73 they may be. */
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Saved operands from the last compare to use when we generate an scc
110 or bcc insn. */
112 rtx sh_compare_op0;
113 rtx sh_compare_op1;
115 /* Provides the class number of the smallest class containing
116 reg number. */
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
168 int assembler_dialect;
170 static bool shmedia_space_reserved_for_target_registers;
172 static bool sh_handle_option (size_t, const char *, int);
173 static void split_branches (rtx);
174 static int branch_dest (rtx);
175 static void force_into (rtx, rtx);
176 static void print_slot (rtx);
177 static rtx add_constant (rtx, enum machine_mode, rtx);
178 static void dump_table (rtx, rtx);
179 static int hi_const (rtx);
180 static int broken_move (rtx);
181 static int mova_p (rtx);
182 static rtx find_barrier (int, rtx, rtx);
183 static int noncall_uses_reg (rtx, rtx, rtx *);
184 static rtx gen_block_redirect (rtx, int, int);
185 static void sh_reorg (void);
186 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
187 static rtx frame_insn (rtx);
188 static rtx push (int);
189 static void pop (int);
190 static void push_regs (HARD_REG_SET *, int);
191 static int calc_live_regs (HARD_REG_SET *);
192 static HOST_WIDE_INT rounded_frame_size (int);
193 static rtx mark_constant_pool_use (rtx);
194 const struct attribute_spec sh_attribute_table[];
195 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
196 static tree sh_handle_resbank_handler_attribute (tree *, tree,
197 tree, int, bool *);
198 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
199 tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static int find_r0_life_regions (basic_block);
213 static void sh_md_init_global (FILE *, int, int);
214 static void sh_md_finish_global (FILE *, int);
215 static int rank_for_reorder (const void *, const void *);
216 static void swap_reorder (rtx *, int);
217 static void ready_reorder (rtx *, int);
218 static short high_pressure (enum machine_mode);
219 static int sh_reorder (FILE *, int, rtx *, int *, int);
220 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
221 static void sh_md_init (FILE *, int, int);
222 static int sh_variable_issue (FILE *, int, rtx, int);
224 static bool sh_function_ok_for_sibcall (tree, tree);
226 static bool sh_cannot_modify_jumps_p (void);
227 static enum reg_class sh_target_reg_class (void);
228 static bool sh_optimize_target_register_callee_saved (bool);
229 static bool sh_ms_bitfield_layout_p (const_tree);
231 static void sh_init_builtins (void);
232 static void sh_media_init_builtins (void);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *, bool);
245 static int sh_address_cost (rtx, bool);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
249 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
250 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
251 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
252 static int scavenge_reg (HARD_REG_SET *s);
253 struct save_schedule_s;
254 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
255 struct save_schedule_s *, int);
257 static rtx sh_struct_value_rtx (tree, int);
258 static bool sh_return_in_memory (const_tree, const_tree);
259 static rtx sh_builtin_saveregs (void);
260 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
261 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
262 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
263 static tree sh_build_builtin_va_list (void);
264 static void sh_va_start (tree, rtx);
265 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
267 const_tree, bool);
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
269 const_tree, bool);
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
271 tree, bool);
272 static bool sh_scalar_mode_supported_p (enum machine_mode);
273 static int sh_dwarf_calling_convention (const_tree);
274 static void sh_encode_section_info (tree, rtx, int);
275 static int sh2a_function_vector_p (tree);
278 /* Initialize the GCC target structure. */
279 #undef TARGET_ATTRIBUTE_TABLE
280 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
282 /* The next two are used for debug info when compiling with -gdwarf. */
283 #undef TARGET_ASM_UNALIGNED_HI_OP
284 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
285 #undef TARGET_ASM_UNALIGNED_SI_OP
286 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
288 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
289 #undef TARGET_ASM_UNALIGNED_DI_OP
290 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
291 #undef TARGET_ASM_ALIGNED_DI_OP
292 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
294 #undef TARGET_ASM_FUNCTION_EPILOGUE
295 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
297 #undef TARGET_ASM_OUTPUT_MI_THUNK
298 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
300 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
301 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
303 #undef TARGET_ASM_FILE_START
304 #define TARGET_ASM_FILE_START sh_file_start
305 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
306 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
308 #undef TARGET_DEFAULT_TARGET_FLAGS
309 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
310 #undef TARGET_HANDLE_OPTION
311 #define TARGET_HANDLE_OPTION sh_handle_option
313 #undef TARGET_INSERT_ATTRIBUTES
314 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
316 #undef TARGET_SCHED_ADJUST_COST
317 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
319 #undef TARGET_SCHED_ISSUE_RATE
320 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
322 /* The next 5 hooks have been implemented for reenabling sched1. With the
323 help of these macros we are limiting the movement of insns in sched1 to
324 reduce the register pressure. The overall idea is to keep count of SImode
325 and SFmode regs required by already scheduled insns. When these counts
326 cross some threshold values; give priority to insns that free registers.
327 The insn that frees registers is most likely to be the insn with lowest
328 LUID (original insn order); but such an insn might be there in the stalled
329 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
330 upto a max of 8 cycles so that such insns may move from Q -> R.
332 The description of the hooks are as below:
334 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
335 scheduler; it is called inside the sched_init function just after
336 find_insn_reg_weights function call. It is used to calculate the SImode
337 and SFmode weights of insns of basic blocks; much similar to what
338 find_insn_reg_weights does.
339 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
341 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
342 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
343 (Q)->(R).
345 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
346 high; reorder the ready queue so that the insn with lowest LUID will be
347 issued next.
349 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
350 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
352 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
353 can be returned from TARGET_SCHED_REORDER2.
355 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
357 #undef TARGET_SCHED_DFA_NEW_CYCLE
358 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
360 #undef TARGET_SCHED_INIT_GLOBAL
361 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
363 #undef TARGET_SCHED_FINISH_GLOBAL
364 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
366 #undef TARGET_SCHED_VARIABLE_ISSUE
367 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
369 #undef TARGET_SCHED_REORDER
370 #define TARGET_SCHED_REORDER sh_reorder
372 #undef TARGET_SCHED_REORDER2
373 #define TARGET_SCHED_REORDER2 sh_reorder2
375 #undef TARGET_SCHED_INIT
376 #define TARGET_SCHED_INIT sh_md_init
378 #undef TARGET_LEGITIMIZE_ADDRESS
379 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
381 #undef TARGET_CANNOT_MODIFY_JUMPS_P
382 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
383 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
384 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
385 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
386 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
387 sh_optimize_target_register_callee_saved
389 #undef TARGET_MS_BITFIELD_LAYOUT_P
390 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
392 #undef TARGET_INIT_BUILTINS
393 #define TARGET_INIT_BUILTINS sh_init_builtins
394 #undef TARGET_EXPAND_BUILTIN
395 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
397 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
398 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
400 #undef TARGET_CANNOT_COPY_INSN_P
401 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
402 #undef TARGET_RTX_COSTS
403 #define TARGET_RTX_COSTS sh_rtx_costs
404 #undef TARGET_ADDRESS_COST
405 #define TARGET_ADDRESS_COST sh_address_cost
406 #undef TARGET_ALLOCATE_INITIAL_VALUE
407 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
409 #undef TARGET_MACHINE_DEPENDENT_REORG
410 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
412 #undef TARGET_DWARF_REGISTER_SPAN
413 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
415 #ifdef HAVE_AS_TLS
416 #undef TARGET_HAVE_TLS
417 #define TARGET_HAVE_TLS true
418 #endif
420 #undef TARGET_PROMOTE_PROTOTYPES
421 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
422 #undef TARGET_PROMOTE_FUNCTION_ARGS
423 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
424 #undef TARGET_PROMOTE_FUNCTION_RETURN
425 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
427 #undef TARGET_STRUCT_VALUE_RTX
428 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
429 #undef TARGET_RETURN_IN_MEMORY
430 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
432 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
433 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
434 #undef TARGET_SETUP_INCOMING_VARARGS
435 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
436 #undef TARGET_STRICT_ARGUMENT_NAMING
437 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
438 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
439 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
440 #undef TARGET_MUST_PASS_IN_STACK
441 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
442 #undef TARGET_PASS_BY_REFERENCE
443 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
444 #undef TARGET_CALLEE_COPIES
445 #define TARGET_CALLEE_COPIES sh_callee_copies
446 #undef TARGET_ARG_PARTIAL_BYTES
447 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
449 #undef TARGET_BUILD_BUILTIN_VA_LIST
450 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
451 #undef TARGET_EXPAND_BUILTIN_VA_START
452 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
453 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
454 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
456 #undef TARGET_SCALAR_MODE_SUPPORTED_P
457 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
458 #undef TARGET_VECTOR_MODE_SUPPORTED_P
459 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
461 #undef TARGET_CHECK_PCH_TARGET_FLAGS
462 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
464 #undef TARGET_DWARF_CALLING_CONVENTION
465 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
467 /* Return regmode weight for insn. */
468 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
470 /* Return current register pressure for regmode. */
471 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
473 #undef TARGET_ENCODE_SECTION_INFO
474 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
476 #ifdef SYMBIAN
478 #undef TARGET_ENCODE_SECTION_INFO
479 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
480 #undef TARGET_STRIP_NAME_ENCODING
481 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
482 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
483 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
485 #endif /* SYMBIAN */
487 #undef TARGET_SECONDARY_RELOAD
488 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
490 /* Machine-specific symbol_ref flags. */
491 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
493 struct gcc_target targetm = TARGET_INITIALIZER;
495 /* Implement TARGET_HANDLE_OPTION. */
497 static bool
498 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
499 int value ATTRIBUTE_UNUSED)
501 switch (code)
503 case OPT_m1:
504 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
505 return true;
507 case OPT_m2:
508 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
509 return true;
511 case OPT_m2a:
512 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
513 return true;
515 case OPT_m2a_nofpu:
516 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
517 return true;
519 case OPT_m2a_single:
520 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
521 return true;
523 case OPT_m2a_single_only:
524 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
525 return true;
527 case OPT_m2e:
528 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
529 return true;
531 case OPT_m3:
532 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
533 return true;
535 case OPT_m3e:
536 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
537 return true;
539 case OPT_m4:
540 case OPT_m4_100:
541 case OPT_m4_200:
542 case OPT_m4_300:
543 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
544 return true;
546 case OPT_m4_nofpu:
547 case OPT_m4_100_nofpu:
548 case OPT_m4_200_nofpu:
549 case OPT_m4_300_nofpu:
550 case OPT_m4_340:
551 case OPT_m4_400:
552 case OPT_m4_500:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
554 return true;
556 case OPT_m4_single:
557 case OPT_m4_100_single:
558 case OPT_m4_200_single:
559 case OPT_m4_300_single:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
561 return true;
563 case OPT_m4_single_only:
564 case OPT_m4_100_single_only:
565 case OPT_m4_200_single_only:
566 case OPT_m4_300_single_only:
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
568 return true;
570 case OPT_m4a:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
572 return true;
574 case OPT_m4a_nofpu:
575 case OPT_m4al:
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
577 return true;
579 case OPT_m4a_single:
580 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
581 return true;
583 case OPT_m4a_single_only:
584 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
585 return true;
587 case OPT_m5_32media:
588 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
589 return true;
591 case OPT_m5_32media_nofpu:
592 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
593 return true;
595 case OPT_m5_64media:
596 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
597 return true;
599 case OPT_m5_64media_nofpu:
600 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
601 return true;
603 case OPT_m5_compact:
604 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
605 return true;
607 case OPT_m5_compact_nofpu:
608 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
609 return true;
611 default:
612 return true;
616 /* Print the operand address in x to the stream. */
618 void
619 print_operand_address (FILE *stream, rtx x)
621 switch (GET_CODE (x))
623 case REG:
624 case SUBREG:
625 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
626 break;
628 case PLUS:
630 rtx base = XEXP (x, 0);
631 rtx index = XEXP (x, 1);
633 switch (GET_CODE (index))
635 case CONST_INT:
636 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
637 reg_names[true_regnum (base)]);
638 break;
640 case REG:
641 case SUBREG:
643 int base_num = true_regnum (base);
644 int index_num = true_regnum (index);
646 fprintf (stream, "@(r0,%s)",
647 reg_names[MAX (base_num, index_num)]);
648 break;
651 default:
652 gcc_unreachable ();
655 break;
657 case PRE_DEC:
658 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
659 break;
661 case POST_INC:
662 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
663 break;
665 default:
666 x = mark_constant_pool_use (x);
667 output_addr_const (stream, x);
668 break;
672 /* Print operand x (an rtx) in assembler syntax to file stream
673 according to modifier code.
675 '.' print a .s if insn needs delay slot
676 ',' print LOCAL_LABEL_PREFIX
677 '@' print trap, rte or rts depending upon pragma interruptness
678 '#' output a nop if there is nothing to put in the delay slot
679 ''' print likelihood suffix (/u for unlikely).
680 '>' print branch target if -fverbose-asm
681 'O' print a constant without the #
682 'R' print the LSW of a dp value - changes if in little endian
683 'S' print the MSW of a dp value - changes if in little endian
684 'T' print the next word of a dp value - same as 'R' in big endian mode.
685 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
686 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
687 'N' print 'r63' if the operand is (const_int 0).
688 'd' print a V2SF reg as dN instead of fpN.
689 'm' print a pair `base,offset' or `base,index', for LD and ST.
690 'U' Likewise for {LD,ST}{HI,LO}.
691 'V' print the position of a single bit set.
692 'W' print the position of a single bit cleared.
693 't' print a memory address which is a register.
694 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
695 'o' output an operator. */
697 void
698 print_operand (FILE *stream, rtx x, int code)
700 int regno;
701 enum machine_mode mode;
703 switch (code)
705 tree trapa_attr;
707 case '.':
708 if (final_sequence
709 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
710 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
711 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
712 break;
713 case ',':
714 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
715 break;
716 case '@':
717 trapa_attr = lookup_attribute ("trap_exit",
718 DECL_ATTRIBUTES (current_function_decl));
719 if (trapa_attr)
720 fprintf (stream, "trapa #%ld",
721 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
722 else if (sh_cfun_interrupt_handler_p ())
724 if (sh_cfun_resbank_handler_p ())
725 fprintf (stream, "resbank\n");
726 fprintf (stream, "rte");
728 else
729 fprintf (stream, "rts");
730 break;
731 case '#':
732 /* Output a nop if there's nothing in the delay slot. */
733 if (dbr_sequence_length () == 0)
734 fprintf (stream, "\n\tnop");
735 break;
736 case '\'':
738 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
740 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
741 fputs ("/u", stream);
742 break;
744 case '>':
745 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
747 fputs ("\t! target: ", stream);
748 output_addr_const (stream, JUMP_LABEL (current_output_insn));
750 break;
751 case 'O':
752 x = mark_constant_pool_use (x);
753 output_addr_const (stream, x);
754 break;
755 /* N.B.: %R / %S / %T adjust memory addresses by four.
756 For SHMEDIA, that means they can be used to access the first and
757 second 32 bit part of a 64 bit (or larger) value that
758 might be held in floating point registers or memory.
759 While they can be used to access 64 bit parts of a larger value
760 held in general purpose registers, that won't work with memory -
761 neither for fp registers, since the frxx names are used. */
762 case 'R':
763 if (REG_P (x) || GET_CODE (x) == SUBREG)
765 regno = true_regnum (x);
766 regno += FP_REGISTER_P (regno) ? 1 : LSW;
767 fputs (reg_names[regno], (stream));
769 else if (MEM_P (x))
771 x = adjust_address (x, SImode, 4 * LSW);
772 print_operand_address (stream, XEXP (x, 0));
774 else
776 rtx sub = NULL_RTX;
778 mode = GET_MODE (x);
779 if (mode == VOIDmode)
780 mode = DImode;
781 if (GET_MODE_SIZE (mode) >= 8)
782 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
783 if (sub)
784 print_operand (stream, sub, 0);
785 else
786 output_operand_lossage ("invalid operand to %%R");
788 break;
789 case 'S':
790 if (REG_P (x) || GET_CODE (x) == SUBREG)
792 regno = true_regnum (x);
793 regno += FP_REGISTER_P (regno) ? 0 : MSW;
794 fputs (reg_names[regno], (stream));
796 else if (MEM_P (x))
798 x = adjust_address (x, SImode, 4 * MSW);
799 print_operand_address (stream, XEXP (x, 0));
801 else
803 rtx sub = NULL_RTX;
805 mode = GET_MODE (x);
806 if (mode == VOIDmode)
807 mode = DImode;
808 if (GET_MODE_SIZE (mode) >= 8)
809 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
810 if (sub)
811 print_operand (stream, sub, 0);
812 else
813 output_operand_lossage ("invalid operand to %%S");
815 break;
816 case 'T':
817 /* Next word of a double. */
818 switch (GET_CODE (x))
820 case REG:
821 fputs (reg_names[REGNO (x) + 1], (stream));
822 break;
823 case MEM:
824 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
825 && GET_CODE (XEXP (x, 0)) != POST_INC)
826 x = adjust_address (x, SImode, 4);
827 print_operand_address (stream, XEXP (x, 0));
828 break;
829 default:
830 break;
832 break;
834 case 't':
835 gcc_assert (GET_CODE (x) == MEM);
836 x = XEXP (x, 0);
837 switch (GET_CODE (x))
839 case REG:
840 case SUBREG:
841 print_operand (stream, x, 0);
842 break;
843 default:
844 break;
846 break;
848 case 'o':
849 switch (GET_CODE (x))
851 case PLUS: fputs ("add", stream); break;
852 case MINUS: fputs ("sub", stream); break;
853 case MULT: fputs ("mul", stream); break;
854 case DIV: fputs ("div", stream); break;
855 case EQ: fputs ("eq", stream); break;
856 case NE: fputs ("ne", stream); break;
857 case GT: case LT: fputs ("gt", stream); break;
858 case GE: case LE: fputs ("ge", stream); break;
859 case GTU: case LTU: fputs ("gtu", stream); break;
860 case GEU: case LEU: fputs ("geu", stream); break;
861 default:
862 break;
864 break;
865 case 'M':
866 if (TARGET_SHMEDIA)
868 if (GET_CODE (x) == MEM
869 && GET_CODE (XEXP (x, 0)) == PLUS
870 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
871 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
872 fputc ('x', stream);
874 else
876 if (GET_CODE (x) == MEM)
878 switch (GET_MODE (x))
880 case QImode: fputs (".b", stream); break;
881 case HImode: fputs (".w", stream); break;
882 case SImode: fputs (".l", stream); break;
883 case SFmode: fputs (".s", stream); break;
884 case DFmode: fputs (".d", stream); break;
885 default: gcc_unreachable ();
889 break;
891 case 'm':
892 gcc_assert (GET_CODE (x) == MEM);
893 x = XEXP (x, 0);
894 /* Fall through. */
895 case 'U':
896 switch (GET_CODE (x))
898 case REG:
899 case SUBREG:
900 print_operand (stream, x, 0);
901 fputs (", 0", stream);
902 break;
904 case PLUS:
905 print_operand (stream, XEXP (x, 0), 0);
906 fputs (", ", stream);
907 print_operand (stream, XEXP (x, 1), 0);
908 break;
910 default:
911 gcc_unreachable ();
913 break;
915 case 'V':
917 int num = exact_log2 (INTVAL (x));
918 gcc_assert (num >= 0);
919 fprintf (stream, "#%d", num);
921 break;
923 case 'W':
925 int num = exact_log2 (~INTVAL (x));
926 gcc_assert (num >= 0);
927 fprintf (stream, "#%d", num);
929 break;
931 case 'd':
932 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
934 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
935 break;
937 case 'N':
938 if (x == CONST0_RTX (GET_MODE (x)))
940 fprintf ((stream), "r63");
941 break;
943 goto default_output;
944 case 'u':
945 if (GET_CODE (x) == CONST_INT)
947 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
948 break;
950 /* Fall through. */
952 default_output:
953 default:
954 regno = 0;
955 mode = GET_MODE (x);
957 switch (GET_CODE (x))
959 case TRUNCATE:
961 rtx inner = XEXP (x, 0);
962 int offset = 0;
963 enum machine_mode inner_mode;
965 /* We might see SUBREGs with vector mode registers inside. */
966 if (GET_CODE (inner) == SUBREG
967 && (GET_MODE_SIZE (GET_MODE (inner))
968 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
969 && subreg_lowpart_p (inner))
970 inner = SUBREG_REG (inner);
971 if (GET_CODE (inner) == CONST_INT)
973 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
974 goto default_output;
976 inner_mode = GET_MODE (inner);
977 if (GET_CODE (inner) == SUBREG
978 && (GET_MODE_SIZE (GET_MODE (inner))
979 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
980 && GET_CODE (SUBREG_REG (inner)) == REG)
982 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
983 GET_MODE (SUBREG_REG (inner)),
984 SUBREG_BYTE (inner),
985 GET_MODE (inner));
986 inner = SUBREG_REG (inner);
988 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
989 abort ();
990 /* Floating point register pairs are always big endian;
991 general purpose registers are 64 bit wide. */
992 regno = REGNO (inner);
993 regno = (HARD_REGNO_NREGS (regno, inner_mode)
994 - HARD_REGNO_NREGS (regno, mode))
995 + offset;
996 x = inner;
997 goto reg;
999 case SIGN_EXTEND:
1000 x = XEXP (x, 0);
1001 goto reg;
1002 /* FIXME: We need this on SHmedia32 because reload generates
1003 some sign-extended HI or QI loads into DImode registers
1004 but, because Pmode is SImode, the address ends up with a
1005 subreg:SI of the DImode register. Maybe reload should be
1006 fixed so as to apply alter_subreg to such loads? */
1007 case IF_THEN_ELSE:
1008 gcc_assert (trapping_target_operand (x, VOIDmode));
1009 x = XEXP (XEXP (x, 2), 0);
1010 goto default_output;
1011 case SUBREG:
1012 gcc_assert (SUBREG_BYTE (x) == 0
1013 && GET_CODE (SUBREG_REG (x)) == REG);
1015 x = SUBREG_REG (x);
1016 /* Fall through. */
1018 reg:
1019 case REG:
1020 regno += REGNO (x);
1021 if (FP_REGISTER_P (regno)
1022 && mode == V16SFmode)
1023 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1024 else if (FP_REGISTER_P (REGNO (x))
1025 && mode == V4SFmode)
1026 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1027 else if (GET_CODE (x) == REG
1028 && mode == V2SFmode)
1029 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1030 else if (FP_REGISTER_P (REGNO (x))
1031 && GET_MODE_SIZE (mode) > 4)
1032 fprintf ((stream), "d%s", reg_names[regno] + 1);
1033 else
1034 fputs (reg_names[regno], (stream));
1035 break;
1037 case MEM:
1038 output_address (XEXP (x, 0));
1039 break;
1041 default:
1042 if (TARGET_SH1)
1043 fputc ('#', stream);
1044 output_addr_const (stream, x);
1045 break;
1047 break;
1052 /* Encode symbol attributes of a SYMBOL_REF into its
1053 SYMBOL_REF_FLAGS. */
1054 static void
1055 sh_encode_section_info (tree decl, rtx rtl, int first)
1057 default_encode_section_info (decl, rtl, first);
1059 if (TREE_CODE (decl) == FUNCTION_DECL
1060 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1061 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1064 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1065 static void
1066 force_into (rtx value, rtx target)
1068 value = force_operand (value, target);
1069 if (! rtx_equal_p (value, target))
1070 emit_insn (gen_move_insn (target, value));
1073 /* Emit code to perform a block move. Choose the best method.
1075 OPERANDS[0] is the destination.
1076 OPERANDS[1] is the source.
1077 OPERANDS[2] is the size.
1078 OPERANDS[3] is the alignment safe to use. */
1081 expand_block_move (rtx *operands)
1083 int align = INTVAL (operands[3]);
1084 int constp = (GET_CODE (operands[2]) == CONST_INT);
1085 int bytes = (constp ? INTVAL (operands[2]) : 0);
1087 if (! constp)
1088 return 0;
1090 /* If we could use mov.l to move words and dest is word-aligned, we
1091 can use movua.l for loads and still generate a relatively short
1092 and efficient sequence. */
1093 if (TARGET_SH4A_ARCH && align < 4
1094 && MEM_ALIGN (operands[0]) >= 32
1095 && can_move_by_pieces (bytes, 32))
1097 rtx dest = copy_rtx (operands[0]);
1098 rtx src = copy_rtx (operands[1]);
1099 /* We could use different pseudos for each copied word, but
1100 since movua can only load into r0, it's kind of
1101 pointless. */
1102 rtx temp = gen_reg_rtx (SImode);
1103 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1104 int copied = 0;
1106 while (copied + 4 <= bytes)
1108 rtx to = adjust_address (dest, SImode, copied);
1109 rtx from = adjust_automodify_address (src, BLKmode,
1110 src_addr, copied);
1112 set_mem_size (from, GEN_INT (4));
1113 emit_insn (gen_movua (temp, from));
1114 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1115 emit_move_insn (to, temp);
1116 copied += 4;
1119 if (copied < bytes)
1120 move_by_pieces (adjust_address (dest, BLKmode, copied),
1121 adjust_automodify_address (src, BLKmode,
1122 src_addr, copied),
1123 bytes - copied, align, 0);
1125 return 1;
1128 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1129 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1130 if (align < 4 || (bytes % 4 != 0))
1131 return 0;
1133 if (TARGET_HARD_SH4)
1135 if (bytes < 12)
1136 return 0;
1137 else if (bytes == 12)
1139 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1140 rtx r4 = gen_rtx_REG (SImode, 4);
1141 rtx r5 = gen_rtx_REG (SImode, 5);
1143 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1144 force_into (XEXP (operands[0], 0), r4);
1145 force_into (XEXP (operands[1], 0), r5);
1146 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1147 return 1;
1149 else if (! TARGET_SMALLCODE)
1151 const char *entry_name;
1152 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1153 int dwords;
1154 rtx r4 = gen_rtx_REG (SImode, 4);
1155 rtx r5 = gen_rtx_REG (SImode, 5);
1156 rtx r6 = gen_rtx_REG (SImode, 6);
1158 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1159 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1160 force_into (XEXP (operands[0], 0), r4);
1161 force_into (XEXP (operands[1], 0), r5);
1163 dwords = bytes >> 3;
1164 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1165 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1166 return 1;
1168 else
1169 return 0;
1171 if (bytes < 64)
1173 char entry[30];
1174 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1175 rtx r4 = gen_rtx_REG (SImode, 4);
1176 rtx r5 = gen_rtx_REG (SImode, 5);
1178 sprintf (entry, "__movmemSI%d", bytes);
1179 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1180 force_into (XEXP (operands[0], 0), r4);
1181 force_into (XEXP (operands[1], 0), r5);
1182 emit_insn (gen_block_move_real (func_addr_rtx));
1183 return 1;
1186 /* This is the same number of bytes as a memcpy call, but to a different
1187 less common function name, so this will occasionally use more space. */
1188 if (! TARGET_SMALLCODE)
1190 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1191 int final_switch, while_loop;
1192 rtx r4 = gen_rtx_REG (SImode, 4);
1193 rtx r5 = gen_rtx_REG (SImode, 5);
1194 rtx r6 = gen_rtx_REG (SImode, 6);
1196 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1197 force_into (XEXP (operands[0], 0), r4);
1198 force_into (XEXP (operands[1], 0), r5);
1200 /* r6 controls the size of the move. 16 is decremented from it
1201 for each 64 bytes moved. Then the negative bit left over is used
1202 as an index into a list of move instructions. e.g., a 72 byte move
1203 would be set up with size(r6) = 14, for one iteration through the
1204 big while loop, and a switch of -2 for the last part. */
1206 final_switch = 16 - ((bytes / 4) % 16);
1207 while_loop = ((bytes / 4) / 16 - 1) * 16;
1208 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1209 emit_insn (gen_block_lump_real (func_addr_rtx));
1210 return 1;
1213 return 0;
1216 /* Prepare operands for a move define_expand; specifically, one of the
1217 operands must be in a register. */
1220 prepare_move_operands (rtx operands[], enum machine_mode mode)
1222 if ((mode == SImode || mode == DImode)
1223 && flag_pic
1224 && ! ((mode == Pmode || mode == ptr_mode)
1225 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1227 rtx temp;
1228 if (SYMBOLIC_CONST_P (operands[1]))
1230 if (GET_CODE (operands[0]) == MEM)
1231 operands[1] = force_reg (Pmode, operands[1]);
1232 else if (TARGET_SHMEDIA
1233 && GET_CODE (operands[1]) == LABEL_REF
1234 && target_reg_operand (operands[0], mode))
1235 /* It's ok. */;
1236 else
1238 temp = (!can_create_pseudo_p ()
1239 ? operands[0]
1240 : gen_reg_rtx (Pmode));
1241 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1244 else if (GET_CODE (operands[1]) == CONST
1245 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1246 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1248 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1249 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1250 mode, temp);
1251 operands[1] = expand_binop (mode, add_optab, temp,
1252 XEXP (XEXP (operands[1], 0), 1),
1253 (!can_create_pseudo_p ()
1254 ? temp
1255 : gen_reg_rtx (Pmode)),
1256 0, OPTAB_LIB_WIDEN);
1260 if (! reload_in_progress && ! reload_completed)
1262 /* Copy the source to a register if both operands aren't registers. */
1263 if (! register_operand (operands[0], mode)
1264 && ! sh_register_operand (operands[1], mode))
1265 operands[1] = copy_to_mode_reg (mode, operands[1]);
1267 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1269 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1270 except that we can't use that function because it is static. */
1271 rtx new_rtx = change_address (operands[0], mode, 0);
1272 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1273 operands[0] = new_rtx;
1276 /* This case can happen while generating code to move the result
1277 of a library call to the target. Reject `st r0,@(rX,rY)' because
1278 reload will fail to find a spill register for rX, since r0 is already
1279 being used for the source. */
1280 else if (TARGET_SH1
1281 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1282 && GET_CODE (operands[0]) == MEM
1283 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1284 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1285 operands[1] = copy_to_mode_reg (mode, operands[1]);
1288 if (mode == Pmode || mode == ptr_mode)
1290 rtx op0, op1, opc;
1291 enum tls_model tls_kind;
1293 op0 = operands[0];
1294 op1 = operands[1];
1295 if (GET_CODE (op1) == CONST
1296 && GET_CODE (XEXP (op1, 0)) == PLUS
1297 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1298 != TLS_MODEL_NONE))
1300 opc = XEXP (XEXP (op1, 0), 1);
1301 op1 = XEXP (XEXP (op1, 0), 0);
1303 else
1304 opc = NULL_RTX;
1306 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1308 rtx tga_op1, tga_ret, tmp, tmp2;
1310 switch (tls_kind)
1312 case TLS_MODEL_GLOBAL_DYNAMIC:
1313 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1314 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1315 op1 = tga_ret;
1316 break;
1318 case TLS_MODEL_LOCAL_DYNAMIC:
1319 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1320 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1322 tmp = gen_reg_rtx (Pmode);
1323 emit_move_insn (tmp, tga_ret);
1325 if (register_operand (op0, Pmode))
1326 tmp2 = op0;
1327 else
1328 tmp2 = gen_reg_rtx (Pmode);
1330 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1331 op1 = tmp2;
1332 break;
1334 case TLS_MODEL_INITIAL_EXEC:
1335 if (! flag_pic)
1337 /* Don't schedule insns for getting GOT address when
1338 the first scheduling is enabled, to avoid spill
1339 failures for R0. */
1340 if (flag_schedule_insns)
1341 emit_insn (gen_blockage ());
1342 emit_insn (gen_GOTaddr2picreg ());
1343 emit_use (gen_rtx_REG (SImode, PIC_REG));
1344 if (flag_schedule_insns)
1345 emit_insn (gen_blockage ());
1347 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1348 tmp = gen_sym2GOTTPOFF (op1);
1349 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1350 op1 = tga_op1;
1351 break;
1353 case TLS_MODEL_LOCAL_EXEC:
1354 tmp2 = gen_reg_rtx (Pmode);
1355 emit_insn (gen_load_gbr (tmp2));
1356 tmp = gen_reg_rtx (Pmode);
1357 emit_insn (gen_symTPOFF2reg (tmp, op1));
1359 if (register_operand (op0, Pmode))
1360 op1 = op0;
1361 else
1362 op1 = gen_reg_rtx (Pmode);
1364 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1365 break;
1367 default:
1368 gcc_unreachable ();
1370 if (opc)
1371 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1372 operands[1] = op1;
1376 return 0;
1379 enum rtx_code
1380 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1381 enum rtx_code comparison)
1383 rtx op1;
1384 rtx scratch = NULL_RTX;
1386 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1387 comparison = GET_CODE (operands[0]);
1388 else
1389 scratch = operands[4];
1390 if (GET_CODE (operands[1]) == CONST_INT
1391 && GET_CODE (operands[2]) != CONST_INT)
1393 rtx tmp = operands[1];
1395 operands[1] = operands[2];
1396 operands[2] = tmp;
1397 comparison = swap_condition (comparison);
1399 if (GET_CODE (operands[2]) == CONST_INT)
1401 HOST_WIDE_INT val = INTVAL (operands[2]);
1402 if ((val == -1 || val == -0x81)
1403 && (comparison == GT || comparison == LE))
1405 comparison = (comparison == GT) ? GE : LT;
1406 operands[2] = gen_int_mode (val + 1, mode);
1408 else if ((val == 1 || val == 0x80)
1409 && (comparison == GE || comparison == LT))
1411 comparison = (comparison == GE) ? GT : LE;
1412 operands[2] = gen_int_mode (val - 1, mode);
1414 else if (val == 1 && (comparison == GEU || comparison == LTU))
1416 comparison = (comparison == GEU) ? NE : EQ;
1417 operands[2] = CONST0_RTX (mode);
1419 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1421 comparison = (comparison == GEU) ? GTU : LEU;
1422 operands[2] = gen_int_mode (val - 1, mode);
1424 else if (val == 0 && (comparison == GTU || comparison == LEU))
1425 comparison = (comparison == GTU) ? NE : EQ;
1426 else if (mode == SImode
1427 && ((val == 0x7fffffff
1428 && (comparison == GTU || comparison == LEU))
1429 || ((unsigned HOST_WIDE_INT) val
1430 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1431 && (comparison == GEU || comparison == LTU))))
1433 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1434 operands[2] = CONST0_RTX (mode);
1437 op1 = operands[1];
1438 if (can_create_pseudo_p ())
1439 operands[1] = force_reg (mode, op1);
1440 /* When we are handling DImode comparisons, we want to keep constants so
1441 that we can optimize the component comparisons; however, memory loads
1442 are better issued as a whole so that they can be scheduled well.
1443 SImode equality comparisons allow I08 constants, but only when they
1444 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1445 into a register, that register might as well be r0, and we allow the
1446 constant. If it is already in a register, this is likely to be
1447 allocated to a different hard register, thus we load the constant into
1448 a register unless it is zero. */
1449 if (!REG_P (operands[2])
1450 && (GET_CODE (operands[2]) != CONST_INT
1451 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1452 && ((comparison != EQ && comparison != NE)
1453 || (REG_P (op1) && REGNO (op1) != R0_REG)
1454 || !satisfies_constraint_I08 (operands[2])))))
1456 if (scratch && GET_MODE (scratch) == mode)
1458 emit_move_insn (scratch, operands[2]);
1459 operands[2] = scratch;
1461 else if (can_create_pseudo_p ())
1462 operands[2] = force_reg (mode, operands[2]);
1464 return comparison;
1467 void
1468 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1470 rtx (*branch_expander) (rtx) = gen_branch_true;
1471 rtx jump;
1473 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1474 switch (comparison)
1476 case NE: case LT: case LE: case LTU: case LEU:
1477 comparison = reverse_condition (comparison);
1478 branch_expander = gen_branch_false;
1479 default: ;
1481 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1482 gen_rtx_fmt_ee (comparison, SImode,
1483 operands[1], operands[2])));
1484 jump = emit_jump_insn (branch_expander (operands[3]));
1485 if (probability >= 0)
1486 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1490 /* ??? How should we distribute probabilities when more than one branch
1491 is generated. So far we only have soem ad-hoc observations:
1492 - If the operands are random, they are likely to differ in both parts.
1493 - If comparing items in a hash chain, the operands are random or equal;
1494 operation should be EQ or NE.
1495 - If items are searched in an ordered tree from the root, we can expect
1496 the highpart to be unequal about half of the time; operation should be
1497 an inequality comparison, operands non-constant, and overall probability
1498 about 50%. Likewise for quicksort.
1499 - Range checks will be often made against constants. Even if we assume for
1500 simplicity an even distribution of the non-constant operand over a
1501 sub-range here, the same probability could be generated with differently
1502 wide sub-ranges - as long as the ratio of the part of the subrange that
1503 is before the threshold to the part that comes after the threshold stays
1504 the same. Thus, we can't really tell anything here;
1505 assuming random distribution is at least simple.
1508 bool
1509 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1511 enum rtx_code msw_taken, msw_skip, lsw_taken;
1512 rtx skip_label = NULL_RTX;
1513 rtx op1h, op1l, op2h, op2l;
1514 int num_branches;
1515 int prob, rev_prob;
1516 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1517 rtx scratch = operands[4];
1519 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1520 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1521 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1522 op1l = gen_lowpart (SImode, operands[1]);
1523 op2l = gen_lowpart (SImode, operands[2]);
1524 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1525 prob = split_branch_probability;
1526 rev_prob = REG_BR_PROB_BASE - prob;
1527 switch (comparison)
1529 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1530 That costs 1 cycle more when the first branch can be predicted taken,
1531 but saves us mispredicts because only one branch needs prediction.
1532 It also enables generating the cmpeqdi_t-1 pattern. */
1533 case EQ:
1534 if (TARGET_CMPEQDI_T)
1536 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1537 emit_jump_insn (gen_branch_true (operands[3]));
1538 return true;
1540 msw_skip = NE;
1541 lsw_taken = EQ;
1542 if (prob >= 0)
1544 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1546 msw_skip_prob = rev_prob;
1547 if (REG_BR_PROB_BASE <= 65535)
1548 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1549 else
1551 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1552 lsw_taken_prob
1553 = (prob
1554 ? (REG_BR_PROB_BASE
1555 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1556 / ((HOST_WIDEST_INT) prob << 32)))
1557 : 0);
1560 break;
1561 case NE:
1562 if (TARGET_CMPEQDI_T)
1564 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1565 emit_jump_insn (gen_branch_false (operands[3]));
1566 return true;
1568 msw_taken = NE;
1569 msw_taken_prob = prob;
1570 lsw_taken = NE;
1571 lsw_taken_prob = 0;
1572 break;
1573 case GTU: case GT:
1574 msw_taken = comparison;
1575 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1576 break;
1577 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1578 msw_skip = swap_condition (msw_taken);
1579 lsw_taken = GTU;
1580 break;
1581 case GEU: case GE:
1582 if (op2l == CONST0_RTX (SImode))
1583 msw_taken = comparison;
1584 else
1586 msw_taken = comparison == GE ? GT : GTU;
1587 msw_skip = swap_condition (msw_taken);
1588 lsw_taken = GEU;
1590 break;
1591 case LTU: case LT:
1592 msw_taken = comparison;
1593 if (op2l == CONST0_RTX (SImode))
1594 break;
1595 msw_skip = swap_condition (msw_taken);
1596 lsw_taken = LTU;
1597 break;
1598 case LEU: case LE:
1599 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1600 msw_taken = comparison;
1601 else
1603 lsw_taken = LEU;
1604 if (comparison == LE)
1605 msw_taken = LT;
1606 else if (op2h != CONST0_RTX (SImode))
1607 msw_taken = LTU;
1608 else
1609 break;
1610 msw_skip = swap_condition (msw_taken);
1612 break;
1613 default: return false;
1615 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1616 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1617 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1618 if (comparison != EQ && comparison != NE && num_branches > 1)
1620 if (!CONSTANT_P (operands[2])
1621 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1622 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1624 msw_taken_prob = prob / 2U;
1625 msw_skip_prob
1626 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1627 lsw_taken_prob = prob;
1629 else
1631 msw_taken_prob = prob;
1632 msw_skip_prob = REG_BR_PROB_BASE;
1633 /* ??? If we have a constant op2h, should we use that when
1634 calculating lsw_taken_prob? */
1635 lsw_taken_prob = prob;
1638 operands[1] = op1h;
1639 operands[2] = op2h;
1640 operands[4] = NULL_RTX;
1641 if (reload_completed
1642 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1643 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1644 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1646 emit_move_insn (scratch, operands[2]);
1647 operands[2] = scratch;
1649 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1650 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1651 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1653 rtx taken_label = operands[3];
1655 /* Operands were possibly modified, but msw_skip doesn't expect this.
1656 Always use the original ones. */
1657 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1659 operands[1] = op1h;
1660 operands[2] = op2h;
1663 operands[3] = skip_label = gen_label_rtx ();
1664 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1665 operands[3] = taken_label;
1667 operands[1] = op1l;
1668 operands[2] = op2l;
1669 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
1671 if (reload_completed
1672 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1673 operands[4] = scratch;
1674 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1676 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1677 emit_label (skip_label);
1678 return true;
1681 /* Prepare the operands for an scc instruction; make sure that the
1682 compare has been done. */
1684 prepare_scc_operands (enum rtx_code code)
1686 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1687 enum rtx_code oldcode = code;
1688 enum machine_mode mode;
1690 /* First need a compare insn. */
1691 switch (code)
1693 case NE:
1694 /* It isn't possible to handle this case. */
1695 gcc_unreachable ();
1696 case LT:
1697 code = GT;
1698 break;
1699 case LE:
1700 code = GE;
1701 break;
1702 case LTU:
1703 code = GTU;
1704 break;
1705 case LEU:
1706 code = GEU;
1707 break;
1708 default:
1709 break;
1711 if (code != oldcode)
1713 rtx tmp = sh_compare_op0;
1714 sh_compare_op0 = sh_compare_op1;
1715 sh_compare_op1 = tmp;
1718 mode = GET_MODE (sh_compare_op0);
1719 if (mode == VOIDmode)
1720 mode = GET_MODE (sh_compare_op1);
1722 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1723 if ((code != EQ && code != NE
1724 && (sh_compare_op1 != const0_rtx
1725 || code == GTU || code == GEU || code == LTU || code == LEU))
1726 || (mode == DImode && sh_compare_op1 != const0_rtx)
1727 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1728 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1730 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1731 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1732 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1733 gen_rtx_SET (VOIDmode, t_reg,
1734 gen_rtx_fmt_ee (code, SImode,
1735 sh_compare_op0, sh_compare_op1)),
1736 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1737 else
1738 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1739 gen_rtx_fmt_ee (code, SImode,
1740 sh_compare_op0, sh_compare_op1)));
1742 return t_reg;
1745 /* Called from the md file, set up the operands of a compare instruction. */
1747 void
1748 from_compare (rtx *operands, int code)
1750 enum machine_mode mode = GET_MODE (sh_compare_op0);
1751 rtx insn;
1752 if (mode == VOIDmode)
1753 mode = GET_MODE (sh_compare_op1);
1754 if (code != EQ
1755 || mode == DImode
1756 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1758 /* Force args into regs, since we can't use constants here. */
1759 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1760 if (sh_compare_op1 != const0_rtx
1761 || code == GTU || code == GEU
1762 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1763 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1765 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1767 from_compare (operands, GT);
1768 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1770 else
1771 insn = gen_rtx_SET (VOIDmode,
1772 gen_rtx_REG (SImode, T_REG),
1773 gen_rtx_fmt_ee ((enum rtx_code) code, SImode,
1774 sh_compare_op0, sh_compare_op1));
1775 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1777 insn = gen_rtx_PARALLEL (VOIDmode,
1778 gen_rtvec (2, insn,
1779 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1780 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1782 else
1783 emit_insn (insn);
1786 /* Functions to output assembly code. */
1788 /* Return a sequence of instructions to perform DI or DF move.
1790 Since the SH cannot move a DI or DF in one instruction, we have
1791 to take care when we see overlapping source and dest registers. */
1793 const char *
1794 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1795 enum machine_mode mode)
1797 rtx dst = operands[0];
1798 rtx src = operands[1];
1800 if (GET_CODE (dst) == MEM
1801 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1802 return "mov.l %T1,%0\n\tmov.l %1,%0";
1804 if (register_operand (dst, mode)
1805 && register_operand (src, mode))
1807 if (REGNO (src) == MACH_REG)
1808 return "sts mach,%S0\n\tsts macl,%R0";
1810 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1811 when mov.d r1,r0 do r1->r0 then r2->r1. */
1813 if (REGNO (src) + 1 == REGNO (dst))
1814 return "mov %T1,%T0\n\tmov %1,%0";
1815 else
1816 return "mov %1,%0\n\tmov %T1,%T0";
1818 else if (GET_CODE (src) == CONST_INT)
1820 if (INTVAL (src) < 0)
1821 output_asm_insn ("mov #-1,%S0", operands);
1822 else
1823 output_asm_insn ("mov #0,%S0", operands);
1825 return "mov %1,%R0";
1827 else if (GET_CODE (src) == MEM)
1829 int ptrreg = -1;
1830 int dreg = REGNO (dst);
1831 rtx inside = XEXP (src, 0);
1833 switch (GET_CODE (inside))
1835 case REG:
1836 ptrreg = REGNO (inside);
1837 break;
1839 case SUBREG:
1840 ptrreg = subreg_regno (inside);
1841 break;
1843 case PLUS:
1844 ptrreg = REGNO (XEXP (inside, 0));
1845 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1846 an offsettable address. Unfortunately, offsettable addresses use
1847 QImode to check the offset, and a QImode offsettable address
1848 requires r0 for the other operand, which is not currently
1849 supported, so we can't use the 'o' constraint.
1850 Thus we must check for and handle r0+REG addresses here.
1851 We punt for now, since this is likely very rare. */
1852 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1853 break;
1855 case LABEL_REF:
1856 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1857 case POST_INC:
1858 return "mov.l %1,%0\n\tmov.l %1,%T0";
1859 default:
1860 gcc_unreachable ();
1863 /* Work out the safe way to copy. Copy into the second half first. */
1864 if (dreg == ptrreg)
1865 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1868 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1871 /* Print an instruction which would have gone into a delay slot after
1872 another instruction, but couldn't because the other instruction expanded
1873 into a sequence where putting the slot insn at the end wouldn't work. */
1875 static void
1876 print_slot (rtx insn)
1878 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1880 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1883 const char *
1884 output_far_jump (rtx insn, rtx op)
1886 struct { rtx lab, reg, op; } this_jmp;
1887 rtx braf_base_lab = NULL_RTX;
1888 const char *jump;
1889 int far;
1890 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1891 rtx prev;
1893 this_jmp.lab = gen_label_rtx ();
1895 if (TARGET_SH2
1896 && offset >= -32764
1897 && offset - get_attr_length (insn) <= 32766)
1899 far = 0;
1900 jump = "mov.w %O0,%1; braf %1";
1902 else
1904 far = 1;
1905 if (flag_pic)
1907 if (TARGET_SH2)
1908 jump = "mov.l %O0,%1; braf %1";
1909 else
1910 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1912 else
1913 jump = "mov.l %O0,%1; jmp @%1";
1915 /* If we have a scratch register available, use it. */
1916 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1917 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1919 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1920 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1921 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1922 output_asm_insn (jump, &this_jmp.lab);
1923 if (dbr_sequence_length ())
1924 print_slot (final_sequence);
1925 else
1926 output_asm_insn ("nop", 0);
1928 else
1930 /* Output the delay slot insn first if any. */
1931 if (dbr_sequence_length ())
1932 print_slot (final_sequence);
1934 this_jmp.reg = gen_rtx_REG (SImode, 13);
1935 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1936 Fortunately, MACL is fixed and call-clobbered, and we never
1937 need its value across jumps, so save r13 in it instead of in
1938 the stack. */
1939 if (TARGET_SH5)
1940 output_asm_insn ("lds r13, macl", 0);
1941 else
1942 output_asm_insn ("mov.l r13,@-r15", 0);
1943 output_asm_insn (jump, &this_jmp.lab);
1944 if (TARGET_SH5)
1945 output_asm_insn ("sts macl, r13", 0);
1946 else
1947 output_asm_insn ("mov.l @r15+,r13", 0);
1949 if (far && flag_pic && TARGET_SH2)
1951 braf_base_lab = gen_label_rtx ();
1952 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1953 CODE_LABEL_NUMBER (braf_base_lab));
1955 if (far)
1956 output_asm_insn (".align 2", 0);
1957 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
1958 this_jmp.op = op;
1959 if (far && flag_pic)
1961 if (TARGET_SH2)
1962 this_jmp.lab = braf_base_lab;
1963 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
1965 else
1966 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
1967 return "";
1970 /* Local label counter, used for constants in the pool and inside
1971 pattern branches. */
1973 static int lf = 100;
1975 /* Output code for ordinary branches. */
1977 const char *
1978 output_branch (int logic, rtx insn, rtx *operands)
1980 switch (get_attr_length (insn))
1982 case 6:
1983 /* This can happen if filling the delay slot has caused a forward
1984 branch to exceed its range (we could reverse it, but only
1985 when we know we won't overextend other branches; this should
1986 best be handled by relaxation).
1987 It can also happen when other condbranches hoist delay slot insn
1988 from their destination, thus leading to code size increase.
1989 But the branch will still be in the range -4092..+4098 bytes. */
1991 if (! TARGET_RELAX)
1993 int label = lf++;
1994 /* The call to print_slot will clobber the operands. */
1995 rtx op0 = operands[0];
1997 /* If the instruction in the delay slot is annulled (true), then
1998 there is no delay slot where we can put it now. The only safe
1999 place for it is after the label. final will do that by default. */
2001 if (final_sequence
2002 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2003 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2005 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2006 ASSEMBLER_DIALECT ? "/" : ".", label);
2007 print_slot (final_sequence);
2009 else
2010 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2012 output_asm_insn ("bra\t%l0", &op0);
2013 fprintf (asm_out_file, "\tnop\n");
2014 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2016 return "";
2018 /* When relaxing, handle this like a short branch. The linker
2019 will fix it up if it still doesn't fit after relaxation. */
2020 case 2:
2021 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2023 /* These are for SH2e, in which we have to account for the
2024 extra nop because of the hardware bug in annulled branches. */
2025 case 8:
2026 if (! TARGET_RELAX)
2028 int label = lf++;
2030 gcc_assert (!final_sequence
2031 || !(INSN_ANNULLED_BRANCH_P
2032 (XVECEXP (final_sequence, 0, 0))));
2033 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2034 logic ? "f" : "t",
2035 ASSEMBLER_DIALECT ? "/" : ".", label);
2036 fprintf (asm_out_file, "\tnop\n");
2037 output_asm_insn ("bra\t%l0", operands);
2038 fprintf (asm_out_file, "\tnop\n");
2039 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2041 return "";
2043 /* When relaxing, fall through. */
2044 case 4:
2046 char buffer[10];
2048 sprintf (buffer, "b%s%ss\t%%l0",
2049 logic ? "t" : "f",
2050 ASSEMBLER_DIALECT ? "/" : ".");
2051 output_asm_insn (buffer, &operands[0]);
2052 return "nop";
2055 default:
2056 /* There should be no longer branches now - that would
2057 indicate that something has destroyed the branches set
2058 up in machine_dependent_reorg. */
2059 gcc_unreachable ();
2063 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2064 fill in operands 9 as a label to the successor insn.
2065 We try to use jump threading where possible.
2066 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2067 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2068 follow jmp and bt, if the address is in range. */
2069 const char *
2070 output_branchy_insn (enum rtx_code code, const char *templ,
2071 rtx insn, rtx *operands)
2073 rtx next_insn = NEXT_INSN (insn);
2075 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2077 rtx src = SET_SRC (PATTERN (next_insn));
2078 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2080 /* Following branch not taken */
2081 operands[9] = gen_label_rtx ();
2082 emit_label_after (operands[9], next_insn);
2083 INSN_ADDRESSES_NEW (operands[9],
2084 INSN_ADDRESSES (INSN_UID (next_insn))
2085 + get_attr_length (next_insn));
2086 return templ;
2088 else
2090 int offset = (branch_dest (next_insn)
2091 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2092 if (offset >= -252 && offset <= 258)
2094 if (GET_CODE (src) == IF_THEN_ELSE)
2095 /* branch_true */
2096 src = XEXP (src, 1);
2097 operands[9] = src;
2098 return templ;
2102 operands[9] = gen_label_rtx ();
2103 emit_label_after (operands[9], insn);
2104 INSN_ADDRESSES_NEW (operands[9],
2105 INSN_ADDRESSES (INSN_UID (insn))
2106 + get_attr_length (insn));
2107 return templ;
2110 const char *
2111 output_ieee_ccmpeq (rtx insn, rtx *operands)
2113 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2114 insn, operands);
2117 /* Output the start of the assembler file. */
2119 static void
2120 sh_file_start (void)
2122 default_file_start ();
2124 #ifdef SYMBIAN
2125 /* Declare the .directive section before it is used. */
2126 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2127 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2128 #endif
2130 if (TARGET_ELF)
2131 /* We need to show the text section with the proper
2132 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2133 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2134 will complain. We can teach GAS specifically about the
2135 default attributes for our choice of text section, but
2136 then we would have to change GAS again if/when we change
2137 the text section name. */
2138 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2139 else
2140 /* Switch to the data section so that the coffsem symbol
2141 isn't in the text section. */
2142 switch_to_section (data_section);
2144 if (TARGET_LITTLE_ENDIAN)
2145 fputs ("\t.little\n", asm_out_file);
2147 if (!TARGET_ELF)
2149 if (TARGET_SHCOMPACT)
2150 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2151 else if (TARGET_SHMEDIA)
2152 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2153 TARGET_SHMEDIA64 ? 64 : 32);
2157 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2159 static bool
2160 unspec_caller_rtx_p (rtx pat)
2162 rtx base, offset;
2163 int i;
2165 split_const (pat, &base, &offset);
2166 if (GET_CODE (base) == UNSPEC)
2168 if (XINT (base, 1) == UNSPEC_CALLER)
2169 return true;
2170 for (i = 0; i < XVECLEN (base, 0); i++)
2171 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2172 return true;
2174 return false;
2177 /* Indicate that INSN cannot be duplicated. This is true for insn
2178 that generates a unique label. */
2180 static bool
2181 sh_cannot_copy_insn_p (rtx insn)
2183 rtx pat;
2185 if (!reload_completed || !flag_pic)
2186 return false;
2188 if (GET_CODE (insn) != INSN)
2189 return false;
2190 if (asm_noperands (insn) >= 0)
2191 return false;
2193 pat = PATTERN (insn);
2194 if (GET_CODE (pat) != SET)
2195 return false;
2196 pat = SET_SRC (pat);
2198 if (unspec_caller_rtx_p (pat))
2199 return true;
2201 return false;
2204 /* Actual number of instructions used to make a shift by N. */
2205 static const char ashiftrt_insns[] =
2206 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2208 /* Left shift and logical right shift are the same. */
2209 static const char shift_insns[] =
2210 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2212 /* Individual shift amounts needed to get the above length sequences.
2213 One bit right shifts clobber the T bit, so when possible, put one bit
2214 shifts in the middle of the sequence, so the ends are eligible for
2215 branch delay slots. */
2216 static const short shift_amounts[32][5] = {
2217 {0}, {1}, {2}, {2, 1},
2218 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2219 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2220 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2221 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2222 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2223 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2224 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2226 /* Likewise, but for shift amounts < 16, up to three highmost bits
2227 might be clobbered. This is typically used when combined with some
2228 kind of sign or zero extension. */
2230 static const char ext_shift_insns[] =
2231 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2233 static const short ext_shift_amounts[32][4] = {
2234 {0}, {1}, {2}, {2, 1},
2235 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2236 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2237 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2238 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2239 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2240 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2241 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2243 /* Assuming we have a value that has been sign-extended by at least one bit,
2244 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2245 to shift it by N without data loss, and quicker than by other means? */
2246 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2248 /* This is used in length attributes in sh.md to help compute the length
2249 of arbitrary constant shift instructions. */
2252 shift_insns_rtx (rtx insn)
2254 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2255 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2256 enum rtx_code shift_code = GET_CODE (set_src);
2258 switch (shift_code)
2260 case ASHIFTRT:
2261 return ashiftrt_insns[shift_count];
2262 case LSHIFTRT:
2263 case ASHIFT:
2264 return shift_insns[shift_count];
2265 default:
2266 gcc_unreachable ();
2270 /* Return the cost of a shift. */
2272 static inline int
2273 shiftcosts (rtx x)
2275 int value;
2277 if (TARGET_SHMEDIA)
2278 return 1;
2280 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2282 if (GET_MODE (x) == DImode
2283 && GET_CODE (XEXP (x, 1)) == CONST_INT
2284 && INTVAL (XEXP (x, 1)) == 1)
2285 return 2;
2287 /* Everything else is invalid, because there is no pattern for it. */
2288 return MAX_COST;
2290 /* If shift by a non constant, then this will be expensive. */
2291 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2292 return SH_DYNAMIC_SHIFT_COST;
2294 /* Otherwise, return the true cost in instructions. Cope with out of range
2295 shift counts more or less arbitrarily. */
2296 value = INTVAL (XEXP (x, 1)) & 31;
2298 if (GET_CODE (x) == ASHIFTRT)
2300 int cost = ashiftrt_insns[value];
2301 /* If SH3, then we put the constant in a reg and use shad. */
2302 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2303 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2304 return cost;
2306 else
2307 return shift_insns[value];
2310 /* Return the cost of an AND operation. */
2312 static inline int
2313 andcosts (rtx x)
2315 int i;
2317 /* Anding with a register is a single cycle and instruction. */
2318 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2319 return 1;
2321 i = INTVAL (XEXP (x, 1));
2323 if (TARGET_SHMEDIA)
2325 if (satisfies_constraint_I10 (XEXP (x, 1))
2326 || satisfies_constraint_J16 (XEXP (x, 1)))
2327 return 1;
2328 else
2329 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2332 /* These constants are single cycle extu.[bw] instructions. */
2333 if (i == 0xff || i == 0xffff)
2334 return 1;
2335 /* Constants that can be used in an and immediate instruction in a single
2336 cycle, but this requires r0, so make it a little more expensive. */
2337 if (CONST_OK_FOR_K08 (i))
2338 return 2;
2339 /* Constants that can be loaded with a mov immediate and an and.
2340 This case is probably unnecessary. */
2341 if (CONST_OK_FOR_I08 (i))
2342 return 2;
2343 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2344 This case is probably unnecessary. */
2345 return 3;
2348 /* Return the cost of an addition or a subtraction. */
2350 static inline int
2351 addsubcosts (rtx x)
2353 /* Adding a register is a single cycle insn. */
2354 if (GET_CODE (XEXP (x, 1)) == REG
2355 || GET_CODE (XEXP (x, 1)) == SUBREG)
2356 return 1;
2358 /* Likewise for small constants. */
2359 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2360 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2361 return 1;
2363 if (TARGET_SHMEDIA)
2364 switch (GET_CODE (XEXP (x, 1)))
2366 case CONST:
2367 case LABEL_REF:
2368 case SYMBOL_REF:
2369 return TARGET_SHMEDIA64 ? 5 : 3;
2371 case CONST_INT:
2372 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2373 return 2;
2374 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2375 return 3;
2376 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2377 return 4;
2379 /* Fall through. */
2380 default:
2381 return 5;
2384 /* Any other constant requires a 2 cycle pc-relative load plus an
2385 addition. */
2386 return 3;
2389 /* Return the cost of a multiply. */
2390 static inline int
2391 multcosts (rtx x ATTRIBUTE_UNUSED)
2393 if (sh_multcost >= 0)
2394 return sh_multcost;
2395 if (TARGET_SHMEDIA)
2396 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2397 accept constants. Ideally, we would use a cost of one or two and
2398 add the cost of the operand, but disregard the latter when inside loops
2399 and loop invariant code motion is still to follow.
2400 Using a multiply first and splitting it later if it's a loss
2401 doesn't work because of different sign / zero extension semantics
2402 of multiplies vs. shifts. */
2403 return TARGET_SMALLCODE ? 2 : 3;
2405 if (TARGET_SH2)
2407 /* We have a mul insn, so we can never take more than the mul and the
2408 read of the mac reg, but count more because of the latency and extra
2409 reg usage. */
2410 if (TARGET_SMALLCODE)
2411 return 2;
2412 return 3;
2415 /* If we're aiming at small code, then just count the number of
2416 insns in a multiply call sequence. */
2417 if (TARGET_SMALLCODE)
2418 return 5;
2420 /* Otherwise count all the insns in the routine we'd be calling too. */
2421 return 20;
2424 /* Compute a (partial) cost for rtx X. Return true if the complete
2425 cost has been computed, and false if subexpressions should be
2426 scanned. In either case, *TOTAL contains the cost result. */
2428 static bool
2429 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2430 bool speed ATTRIBUTE_UNUSED)
2432 switch (code)
2434 case CONST_INT:
2435 if (TARGET_SHMEDIA)
2437 if (INTVAL (x) == 0)
2438 *total = 0;
2439 else if (outer_code == AND && and_operand ((x), DImode))
2440 *total = 0;
2441 else if ((outer_code == IOR || outer_code == XOR
2442 || outer_code == PLUS)
2443 && CONST_OK_FOR_I10 (INTVAL (x)))
2444 *total = 0;
2445 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2446 *total = COSTS_N_INSNS (outer_code != SET);
2447 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2448 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2449 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2450 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2451 else
2452 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2453 return true;
2455 if (CONST_OK_FOR_I08 (INTVAL (x)))
2456 *total = 0;
2457 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2458 && CONST_OK_FOR_K08 (INTVAL (x)))
2459 *total = 1;
2460 /* prepare_cmp_insn will force costly constants int registers before
2461 the cbranch[sd]i4 patterns can see them, so preserve potentially
2462 interesting ones not covered by I08 above. */
2463 else if (outer_code == COMPARE
2464 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2465 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2466 || INTVAL (x) == 0x7fffffff
2467 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2468 *total = 1;
2469 else
2470 *total = 8;
2471 return true;
2473 case CONST:
2474 case LABEL_REF:
2475 case SYMBOL_REF:
2476 if (TARGET_SHMEDIA64)
2477 *total = COSTS_N_INSNS (4);
2478 else if (TARGET_SHMEDIA32)
2479 *total = COSTS_N_INSNS (2);
2480 else
2481 *total = 5;
2482 return true;
2484 case CONST_DOUBLE:
2485 if (TARGET_SHMEDIA)
2486 *total = COSTS_N_INSNS (4);
2487 /* prepare_cmp_insn will force costly constants int registers before
2488 the cbranchdi4 pattern can see them, so preserve potentially
2489 interesting ones. */
2490 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2491 *total = 1;
2492 else
2493 *total = 10;
2494 return true;
2495 case CONST_VECTOR:
2496 if (x == CONST0_RTX (GET_MODE (x)))
2497 *total = 0;
2498 else if (sh_1el_vec (x, VOIDmode))
2499 *total = outer_code != SET;
2500 if (sh_rep_vec (x, VOIDmode))
2501 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2502 + (outer_code != SET));
2503 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2504 return true;
2506 case PLUS:
2507 case MINUS:
2508 *total = COSTS_N_INSNS (addsubcosts (x));
2509 return true;
2511 case AND:
2512 *total = COSTS_N_INSNS (andcosts (x));
2513 return true;
2515 case MULT:
2516 *total = COSTS_N_INSNS (multcosts (x));
2517 return true;
2519 case ASHIFT:
2520 case ASHIFTRT:
2521 case LSHIFTRT:
2522 *total = COSTS_N_INSNS (shiftcosts (x));
2523 return true;
2525 case DIV:
2526 case UDIV:
2527 case MOD:
2528 case UMOD:
2529 *total = COSTS_N_INSNS (20);
2530 return true;
2532 case PARALLEL:
2533 if (sh_1el_vec (x, VOIDmode))
2534 *total = outer_code != SET;
2535 if (sh_rep_vec (x, VOIDmode))
2536 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2537 + (outer_code != SET));
2538 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2539 return true;
2541 case FLOAT:
2542 case FIX:
2543 *total = 100;
2544 return true;
2546 default:
2547 return false;
2551 /* Compute the cost of an address. For the SH, all valid addresses are
2552 the same cost. Use a slightly higher cost for reg + reg addressing,
2553 since it increases pressure on r0. */
2555 static int
2556 sh_address_cost (rtx X,
2557 bool speed ATTRIBUTE_UNUSED)
2559 return (GET_CODE (X) == PLUS
2560 && ! CONSTANT_P (XEXP (X, 1))
2561 && ! TARGET_SHMEDIA ? 1 : 0);
2564 /* Code to expand a shift. */
2566 void
2567 gen_ashift (int type, int n, rtx reg)
2569 /* Negative values here come from the shift_amounts array. */
2570 if (n < 0)
2572 if (type == ASHIFT)
2573 type = LSHIFTRT;
2574 else
2575 type = ASHIFT;
2576 n = -n;
2579 switch (type)
2581 case ASHIFTRT:
2582 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2583 break;
2584 case LSHIFTRT:
2585 if (n == 1)
2586 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2587 else
2588 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2589 break;
2590 case ASHIFT:
2591 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2592 break;
2596 /* Same for HImode */
2598 void
2599 gen_ashift_hi (int type, int n, rtx reg)
2601 /* Negative values here come from the shift_amounts array. */
2602 if (n < 0)
2604 if (type == ASHIFT)
2605 type = LSHIFTRT;
2606 else
2607 type = ASHIFT;
2608 n = -n;
2611 switch (type)
2613 case ASHIFTRT:
2614 case LSHIFTRT:
2615 /* We don't have HImode right shift operations because using the
2616 ordinary 32 bit shift instructions for that doesn't generate proper
2617 zero/sign extension.
2618 gen_ashift_hi is only called in contexts where we know that the
2619 sign extension works out correctly. */
2621 int offset = 0;
2622 if (GET_CODE (reg) == SUBREG)
2624 offset = SUBREG_BYTE (reg);
2625 reg = SUBREG_REG (reg);
2627 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2628 break;
2630 case ASHIFT:
2631 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2632 break;
2636 /* Output RTL to split a constant shift into its component SH constant
2637 shift instructions. */
2639 void
2640 gen_shifty_op (int code, rtx *operands)
2642 int value = INTVAL (operands[2]);
2643 int max, i;
2645 /* Truncate the shift count in case it is out of bounds. */
2646 value = value & 31;
2648 if (value == 31)
2650 if (code == LSHIFTRT)
2652 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2653 emit_insn (gen_movt (operands[0]));
2654 return;
2656 else if (code == ASHIFT)
2658 /* There is a two instruction sequence for 31 bit left shifts,
2659 but it requires r0. */
2660 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2662 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2663 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2664 return;
2668 else if (value == 0)
2670 /* This can happen even when optimizing, if there were subregs before
2671 reload. Don't output a nop here, as this is never optimized away;
2672 use a no-op move instead. */
2673 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2674 return;
2677 max = shift_insns[value];
2678 for (i = 0; i < max; i++)
2679 gen_ashift (code, shift_amounts[value][i], operands[0]);
2682 /* Same as above, but optimized for values where the topmost bits don't
2683 matter. */
2685 void
2686 gen_shifty_hi_op (int code, rtx *operands)
2688 int value = INTVAL (operands[2]);
2689 int max, i;
2690 void (*gen_fun) (int, int, rtx);
2692 /* This operation is used by and_shl for SImode values with a few
2693 high bits known to be cleared. */
2694 value &= 31;
2695 if (value == 0)
2697 emit_insn (gen_nop ());
2698 return;
2701 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2702 if (code == ASHIFT)
2704 max = ext_shift_insns[value];
2705 for (i = 0; i < max; i++)
2706 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2708 else
2709 /* When shifting right, emit the shifts in reverse order, so that
2710 solitary negative values come first. */
2711 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2712 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2715 /* Output RTL for an arithmetic right shift. */
2717 /* ??? Rewrite to use super-optimizer sequences. */
2720 expand_ashiftrt (rtx *operands)
2722 rtx wrk;
2723 char func[18];
2724 int value;
2726 if (TARGET_SH3)
2728 if (GET_CODE (operands[2]) != CONST_INT)
2730 rtx count = copy_to_mode_reg (SImode, operands[2]);
2731 emit_insn (gen_negsi2 (count, count));
2732 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2733 return 1;
2735 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2736 > 1 + SH_DYNAMIC_SHIFT_COST)
2738 rtx count
2739 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2740 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2741 return 1;
2744 if (GET_CODE (operands[2]) != CONST_INT)
2745 return 0;
2747 value = INTVAL (operands[2]) & 31;
2749 if (value == 31)
2751 /* If we are called from abs expansion, arrange things so that we
2752 we can use a single MT instruction that doesn't clobber the source,
2753 if LICM can hoist out the load of the constant zero. */
2754 if (currently_expanding_to_rtl)
2756 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2757 operands[1]));
2758 emit_insn (gen_mov_neg_si_t (operands[0]));
2759 return 1;
2761 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2762 return 1;
2764 else if (value >= 16 && value <= 19)
2766 wrk = gen_reg_rtx (SImode);
2767 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2768 value -= 16;
2769 while (value--)
2770 gen_ashift (ASHIFTRT, 1, wrk);
2771 emit_move_insn (operands[0], wrk);
2772 return 1;
2774 /* Expand a short sequence inline, longer call a magic routine. */
2775 else if (value <= 5)
2777 wrk = gen_reg_rtx (SImode);
2778 emit_move_insn (wrk, operands[1]);
2779 while (value--)
2780 gen_ashift (ASHIFTRT, 1, wrk);
2781 emit_move_insn (operands[0], wrk);
2782 return 1;
2785 wrk = gen_reg_rtx (Pmode);
2787 /* Load the value into an arg reg and call a helper. */
2788 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2789 sprintf (func, "__ashiftrt_r4_%d", value);
2790 function_symbol (wrk, func, SFUNC_STATIC);
2791 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2792 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2793 return 1;
2797 sh_dynamicalize_shift_p (rtx count)
2799 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
2802 /* Try to find a good way to implement the combiner pattern
2803 [(set (match_operand:SI 0 "register_operand" "r")
2804 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2805 (match_operand:SI 2 "const_int_operand" "n"))
2806 (match_operand:SI 3 "const_int_operand" "n"))) .
2807 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2808 return 0 for simple right / left or left/right shift combination.
2809 return 1 for a combination of shifts with zero_extend.
2810 return 2 for a combination of shifts with an AND that needs r0.
2811 return 3 for a combination of shifts with an AND that needs an extra
2812 scratch register, when the three highmost bits of the AND mask are clear.
2813 return 4 for a combination of shifts with an AND that needs an extra
2814 scratch register, when any of the three highmost bits of the AND mask
2815 is set.
2816 If ATTRP is set, store an initial right shift width in ATTRP[0],
2817 and the instruction length in ATTRP[1] . These values are not valid
2818 when returning 0.
2819 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2820 shift_amounts for the last shift value that is to be used before the
2821 sign extend. */
2823 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2825 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2826 int left = INTVAL (left_rtx), right;
2827 int best = 0;
2828 int cost, best_cost = 10000;
2829 int best_right = 0, best_len = 0;
2830 int i;
2831 int can_ext;
2833 if (left < 0 || left > 31)
2834 return 0;
2835 if (GET_CODE (mask_rtx) == CONST_INT)
2836 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2837 else
2838 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2839 /* Can this be expressed as a right shift / left shift pair? */
2840 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2841 right = exact_log2 (lsb);
2842 mask2 = ~(mask + lsb - 1);
2843 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2844 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2845 if (! mask2)
2846 best_cost = shift_insns[right] + shift_insns[right + left];
2847 /* mask has no trailing zeroes <==> ! right */
2848 else if (! right && mask2 == ~(lsb2 - 1))
2850 int late_right = exact_log2 (lsb2);
2851 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2853 /* Try to use zero extend. */
2854 if (mask2 == ~(lsb2 - 1))
2856 int width, first;
2858 for (width = 8; width <= 16; width += 8)
2860 /* Can we zero-extend right away? */
2861 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2863 cost
2864 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2865 if (cost < best_cost)
2867 best = 1;
2868 best_cost = cost;
2869 best_right = right;
2870 best_len = cost;
2871 if (attrp)
2872 attrp[2] = -1;
2874 continue;
2876 /* ??? Could try to put zero extend into initial right shift,
2877 or even shift a bit left before the right shift. */
2878 /* Determine value of first part of left shift, to get to the
2879 zero extend cut-off point. */
2880 first = width - exact_log2 (lsb2) + right;
2881 if (first >= 0 && right + left - first >= 0)
2883 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2884 + ext_shift_insns[right + left - first];
2885 if (cost < best_cost)
2887 best = 1;
2888 best_cost = cost;
2889 best_right = right;
2890 best_len = cost;
2891 if (attrp)
2892 attrp[2] = first;
2897 /* Try to use r0 AND pattern */
2898 for (i = 0; i <= 2; i++)
2900 if (i > right)
2901 break;
2902 if (! CONST_OK_FOR_K08 (mask >> i))
2903 continue;
2904 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2905 if (cost < best_cost)
2907 best = 2;
2908 best_cost = cost;
2909 best_right = i;
2910 best_len = cost - 1;
2913 /* Try to use a scratch register to hold the AND operand. */
2914 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2915 for (i = 0; i <= 2; i++)
2917 if (i > right)
2918 break;
2919 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2920 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2921 if (cost < best_cost)
2923 best = 4 - can_ext;
2924 best_cost = cost;
2925 best_right = i;
2926 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2930 if (attrp)
2932 attrp[0] = best_right;
2933 attrp[1] = best_len;
2935 return best;
2938 /* This is used in length attributes of the unnamed instructions
2939 corresponding to shl_and_kind return values of 1 and 2. */
2941 shl_and_length (rtx insn)
2943 rtx set_src, left_rtx, mask_rtx;
2944 int attributes[3];
2946 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2947 left_rtx = XEXP (XEXP (set_src, 0), 1);
2948 mask_rtx = XEXP (set_src, 1);
2949 shl_and_kind (left_rtx, mask_rtx, attributes);
2950 return attributes[1];
2953 /* This is used in length attribute of the and_shl_scratch instruction. */
2956 shl_and_scr_length (rtx insn)
2958 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2959 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
2960 rtx op = XEXP (set_src, 0);
2961 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
2962 op = XEXP (XEXP (op, 0), 0);
2963 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
2966 /* Generate rtl for instructions for which shl_and_kind advised a particular
2967 method of generating them, i.e. returned zero. */
2970 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2972 int attributes[3];
2973 unsigned HOST_WIDE_INT mask;
2974 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2975 int right, total_shift;
2976 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2978 right = attributes[0];
2979 total_shift = INTVAL (left_rtx) + right;
2980 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2981 switch (kind)
2983 default:
2984 return -1;
2985 case 1:
2987 int first = attributes[2];
2988 rtx operands[3];
2990 if (first < 0)
2992 emit_insn ((mask << right) <= 0xff
2993 ? gen_zero_extendqisi2 (dest,
2994 gen_lowpart (QImode, source))
2995 : gen_zero_extendhisi2 (dest,
2996 gen_lowpart (HImode, source)));
2997 source = dest;
2999 if (source != dest)
3000 emit_insn (gen_movsi (dest, source));
3001 operands[0] = dest;
3002 if (right)
3004 operands[2] = GEN_INT (right);
3005 gen_shifty_hi_op (LSHIFTRT, operands);
3007 if (first > 0)
3009 operands[2] = GEN_INT (first);
3010 gen_shifty_hi_op (ASHIFT, operands);
3011 total_shift -= first;
3012 mask <<= first;
3014 if (first >= 0)
3015 emit_insn (mask <= 0xff
3016 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3017 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3018 if (total_shift > 0)
3020 operands[2] = GEN_INT (total_shift);
3021 gen_shifty_hi_op (ASHIFT, operands);
3023 break;
3025 case 4:
3026 shift_gen_fun = gen_shifty_op;
3027 case 3:
3028 /* If the topmost bit that matters is set, set the topmost bits
3029 that don't matter. This way, we might be able to get a shorter
3030 signed constant. */
3031 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3032 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3033 case 2:
3034 /* Don't expand fine-grained when combining, because that will
3035 make the pattern fail. */
3036 if (currently_expanding_to_rtl
3037 || reload_in_progress || reload_completed)
3039 rtx operands[3];
3041 /* Cases 3 and 4 should be handled by this split
3042 only while combining */
3043 gcc_assert (kind <= 2);
3044 if (right)
3046 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3047 source = dest;
3049 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3050 if (total_shift)
3052 operands[0] = dest;
3053 operands[1] = dest;
3054 operands[2] = GEN_INT (total_shift);
3055 shift_gen_fun (ASHIFT, operands);
3057 break;
3059 else
3061 int neg = 0;
3062 if (kind != 4 && total_shift < 16)
3064 neg = -ext_shift_amounts[total_shift][1];
3065 if (neg > 0)
3066 neg -= ext_shift_amounts[total_shift][2];
3067 else
3068 neg = 0;
3070 emit_insn (gen_and_shl_scratch (dest, source,
3071 GEN_INT (right),
3072 GEN_INT (mask),
3073 GEN_INT (total_shift + neg),
3074 GEN_INT (neg)));
3075 emit_insn (gen_movsi (dest, dest));
3076 break;
3079 return 0;
3082 /* Try to find a good way to implement the combiner pattern
3083 [(set (match_operand:SI 0 "register_operand" "=r")
3084 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3085 (match_operand:SI 2 "const_int_operand" "n")
3086 (match_operand:SI 3 "const_int_operand" "n")
3087 (const_int 0)))
3088 (clobber (reg:SI T_REG))]
3089 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3090 return 0 for simple left / right shift combination.
3091 return 1 for left shift / 8 bit sign extend / left shift.
3092 return 2 for left shift / 16 bit sign extend / left shift.
3093 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3094 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3095 return 5 for left shift / 16 bit sign extend / right shift
3096 return 6 for < 8 bit sign extend / left shift.
3097 return 7 for < 8 bit sign extend / left shift / single right shift.
3098 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3101 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3103 int left, size, insize, ext;
3104 int cost = 0, best_cost;
3105 int kind;
3107 left = INTVAL (left_rtx);
3108 size = INTVAL (size_rtx);
3109 insize = size - left;
3110 gcc_assert (insize > 0);
3111 /* Default to left / right shift. */
3112 kind = 0;
3113 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3114 if (size <= 16)
3116 /* 16 bit shift / sign extend / 16 bit shift */
3117 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3118 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3119 below, by alternative 3 or something even better. */
3120 if (cost < best_cost)
3122 kind = 5;
3123 best_cost = cost;
3126 /* Try a plain sign extend between two shifts. */
3127 for (ext = 16; ext >= insize; ext -= 8)
3129 if (ext <= size)
3131 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3132 if (cost < best_cost)
3134 kind = ext / (unsigned) 8;
3135 best_cost = cost;
3138 /* Check if we can do a sloppy shift with a final signed shift
3139 restoring the sign. */
3140 if (EXT_SHIFT_SIGNED (size - ext))
3141 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3142 /* If not, maybe it's still cheaper to do the second shift sloppy,
3143 and do a final sign extend? */
3144 else if (size <= 16)
3145 cost = ext_shift_insns[ext - insize] + 1
3146 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3147 else
3148 continue;
3149 if (cost < best_cost)
3151 kind = ext / (unsigned) 8 + 2;
3152 best_cost = cost;
3155 /* Check if we can sign extend in r0 */
3156 if (insize < 8)
3158 cost = 3 + shift_insns[left];
3159 if (cost < best_cost)
3161 kind = 6;
3162 best_cost = cost;
3164 /* Try the same with a final signed shift. */
3165 if (left < 31)
3167 cost = 3 + ext_shift_insns[left + 1] + 1;
3168 if (cost < best_cost)
3170 kind = 7;
3171 best_cost = cost;
3175 if (TARGET_SH3)
3177 /* Try to use a dynamic shift. */
3178 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3179 if (cost < best_cost)
3181 kind = 0;
3182 best_cost = cost;
3185 if (costp)
3186 *costp = cost;
3187 return kind;
3190 /* Function to be used in the length attribute of the instructions
3191 implementing this pattern. */
3194 shl_sext_length (rtx insn)
3196 rtx set_src, left_rtx, size_rtx;
3197 int cost;
3199 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3200 left_rtx = XEXP (XEXP (set_src, 0), 1);
3201 size_rtx = XEXP (set_src, 1);
3202 shl_sext_kind (left_rtx, size_rtx, &cost);
3203 return cost;
3206 /* Generate rtl for this pattern */
3209 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3211 int kind;
3212 int left, size, insize, cost;
3213 rtx operands[3];
3215 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3216 left = INTVAL (left_rtx);
3217 size = INTVAL (size_rtx);
3218 insize = size - left;
3219 switch (kind)
3221 case 1:
3222 case 2:
3223 case 3:
3224 case 4:
3226 int ext = kind & 1 ? 8 : 16;
3227 int shift2 = size - ext;
3229 /* Don't expand fine-grained when combining, because that will
3230 make the pattern fail. */
3231 if (! currently_expanding_to_rtl
3232 && ! reload_in_progress && ! reload_completed)
3234 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3235 emit_insn (gen_movsi (dest, source));
3236 break;
3238 if (dest != source)
3239 emit_insn (gen_movsi (dest, source));
3240 operands[0] = dest;
3241 if (ext - insize)
3243 operands[2] = GEN_INT (ext - insize);
3244 gen_shifty_hi_op (ASHIFT, operands);
3246 emit_insn (kind & 1
3247 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3248 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3249 if (kind <= 2)
3251 if (shift2)
3253 operands[2] = GEN_INT (shift2);
3254 gen_shifty_op (ASHIFT, operands);
3257 else
3259 if (shift2 > 0)
3261 if (EXT_SHIFT_SIGNED (shift2))
3263 operands[2] = GEN_INT (shift2 + 1);
3264 gen_shifty_op (ASHIFT, operands);
3265 operands[2] = const1_rtx;
3266 gen_shifty_op (ASHIFTRT, operands);
3267 break;
3269 operands[2] = GEN_INT (shift2);
3270 gen_shifty_hi_op (ASHIFT, operands);
3272 else if (shift2)
3274 operands[2] = GEN_INT (-shift2);
3275 gen_shifty_hi_op (LSHIFTRT, operands);
3277 emit_insn (size <= 8
3278 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3279 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3281 break;
3283 case 5:
3285 int i = 16 - size;
3286 if (! currently_expanding_to_rtl
3287 && ! reload_in_progress && ! reload_completed)
3288 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3289 else
3291 operands[0] = dest;
3292 operands[2] = GEN_INT (16 - insize);
3293 gen_shifty_hi_op (ASHIFT, operands);
3294 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3296 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3297 while (--i >= 0)
3298 gen_ashift (ASHIFTRT, 1, dest);
3299 break;
3301 case 6:
3302 case 7:
3303 /* Don't expand fine-grained when combining, because that will
3304 make the pattern fail. */
3305 if (! currently_expanding_to_rtl
3306 && ! reload_in_progress && ! reload_completed)
3308 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3309 emit_insn (gen_movsi (dest, source));
3310 break;
3312 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3313 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3314 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3315 operands[0] = dest;
3316 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3317 gen_shifty_op (ASHIFT, operands);
3318 if (kind == 7)
3319 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3320 break;
3321 default:
3322 return -1;
3324 return 0;
3327 /* Prefix a symbol_ref name with "datalabel". */
3330 gen_datalabel_ref (rtx sym)
3332 const char *str;
3334 if (GET_CODE (sym) == LABEL_REF)
3335 return gen_rtx_CONST (GET_MODE (sym),
3336 gen_rtx_UNSPEC (GET_MODE (sym),
3337 gen_rtvec (1, sym),
3338 UNSPEC_DATALABEL));
3340 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3342 str = XSTR (sym, 0);
3343 /* Share all SYMBOL_REF strings with the same value - that is important
3344 for cse. */
3345 str = IDENTIFIER_POINTER (get_identifier (str));
3346 XSTR (sym, 0) = str;
3348 return sym;
3352 static alloc_pool label_ref_list_pool;
3354 typedef struct label_ref_list_d
3356 rtx label;
3357 struct label_ref_list_d *next;
3358 } *label_ref_list_t;
3360 /* The SH cannot load a large constant into a register, constants have to
3361 come from a pc relative load. The reference of a pc relative load
3362 instruction must be less than 1k in front of the instruction. This
3363 means that we often have to dump a constant inside a function, and
3364 generate code to branch around it.
3366 It is important to minimize this, since the branches will slow things
3367 down and make things bigger.
3369 Worst case code looks like:
3371 mov.l L1,rn
3372 bra L2
3374 align
3375 L1: .long value
3379 mov.l L3,rn
3380 bra L4
3382 align
3383 L3: .long value
3387 We fix this by performing a scan before scheduling, which notices which
3388 instructions need to have their operands fetched from the constant table
3389 and builds the table.
3391 The algorithm is:
3393 scan, find an instruction which needs a pcrel move. Look forward, find the
3394 last barrier which is within MAX_COUNT bytes of the requirement.
3395 If there isn't one, make one. Process all the instructions between
3396 the find and the barrier.
3398 In the above example, we can tell that L3 is within 1k of L1, so
3399 the first move can be shrunk from the 3 insn+constant sequence into
3400 just 1 insn, and the constant moved to L3 to make:
3402 mov.l L1,rn
3404 mov.l L3,rn
3405 bra L4
3407 align
3408 L3:.long value
3409 L4:.long value
3411 Then the second move becomes the target for the shortening process. */
3413 typedef struct
3415 rtx value; /* Value in table. */
3416 rtx label; /* Label of value. */
3417 label_ref_list_t wend; /* End of window. */
3418 enum machine_mode mode; /* Mode of value. */
3420 /* True if this constant is accessed as part of a post-increment
3421 sequence. Note that HImode constants are never accessed in this way. */
3422 bool part_of_sequence_p;
3423 } pool_node;
3425 /* The maximum number of constants that can fit into one pool, since
3426 constants in the range 0..510 are at least 2 bytes long, and in the
3427 range from there to 1018 at least 4 bytes. */
3429 #define MAX_POOL_SIZE 372
3430 static pool_node pool_vector[MAX_POOL_SIZE];
3431 static int pool_size;
3432 static rtx pool_window_label;
3433 static int pool_window_last;
3435 static int max_labelno_before_reorg;
3437 /* ??? If we need a constant in HImode which is the truncated value of a
3438 constant we need in SImode, we could combine the two entries thus saving
3439 two bytes. Is this common enough to be worth the effort of implementing
3440 it? */
3442 /* ??? This stuff should be done at the same time that we shorten branches.
3443 As it is now, we must assume that all branches are the maximum size, and
3444 this causes us to almost always output constant pools sooner than
3445 necessary. */
3447 /* Add a constant to the pool and return its label. */
3449 static rtx
3450 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3452 int i;
3453 rtx lab, new_rtx;
3454 label_ref_list_t ref, newref;
3456 /* First see if we've already got it. */
3457 for (i = 0; i < pool_size; i++)
3459 if (x->code == pool_vector[i].value->code
3460 && mode == pool_vector[i].mode)
3462 if (x->code == CODE_LABEL)
3464 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3465 continue;
3467 if (rtx_equal_p (x, pool_vector[i].value))
3469 lab = new_rtx = 0;
3470 if (! last_value
3471 || ! i
3472 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3474 new_rtx = gen_label_rtx ();
3475 LABEL_REFS (new_rtx) = pool_vector[i].label;
3476 pool_vector[i].label = lab = new_rtx;
3478 if (lab && pool_window_label)
3480 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3481 newref->label = pool_window_label;
3482 ref = pool_vector[pool_window_last].wend;
3483 newref->next = ref;
3484 pool_vector[pool_window_last].wend = newref;
3486 if (new_rtx)
3487 pool_window_label = new_rtx;
3488 pool_window_last = i;
3489 return lab;
3494 /* Need a new one. */
3495 pool_vector[pool_size].value = x;
3496 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3498 lab = 0;
3499 pool_vector[pool_size - 1].part_of_sequence_p = true;
3501 else
3502 lab = gen_label_rtx ();
3503 pool_vector[pool_size].mode = mode;
3504 pool_vector[pool_size].label = lab;
3505 pool_vector[pool_size].wend = NULL;
3506 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3507 if (lab && pool_window_label)
3509 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3510 newref->label = pool_window_label;
3511 ref = pool_vector[pool_window_last].wend;
3512 newref->next = ref;
3513 pool_vector[pool_window_last].wend = newref;
3515 if (lab)
3516 pool_window_label = lab;
3517 pool_window_last = pool_size;
3518 pool_size++;
3519 return lab;
3522 /* Output the literal table. START, if nonzero, is the first instruction
3523 this table is needed for, and also indicates that there is at least one
3524 casesi_worker_2 instruction; We have to emit the operand3 labels from
3525 these insns at a 4-byte aligned position. BARRIER is the barrier
3526 after which we are to place the table. */
3528 static void
3529 dump_table (rtx start, rtx barrier)
3531 rtx scan = barrier;
3532 int i;
3533 int need_align = 1;
3534 rtx lab;
3535 label_ref_list_t ref;
3536 int have_df = 0;
3538 /* Do two passes, first time dump out the HI sized constants. */
3540 for (i = 0; i < pool_size; i++)
3542 pool_node *p = &pool_vector[i];
3544 if (p->mode == HImode)
3546 if (need_align)
3548 scan = emit_insn_after (gen_align_2 (), scan);
3549 need_align = 0;
3551 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3552 scan = emit_label_after (lab, scan);
3553 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3554 scan);
3555 for (ref = p->wend; ref; ref = ref->next)
3557 lab = ref->label;
3558 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3561 else if (p->mode == DFmode)
3562 have_df = 1;
3565 need_align = 1;
3567 if (start)
3569 scan = emit_insn_after (gen_align_4 (), scan);
3570 need_align = 0;
3571 for (; start != barrier; start = NEXT_INSN (start))
3572 if (GET_CODE (start) == INSN
3573 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3575 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3576 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3578 scan = emit_label_after (lab, scan);
3581 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3583 rtx align_insn = NULL_RTX;
3585 scan = emit_label_after (gen_label_rtx (), scan);
3586 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3587 need_align = 0;
3589 for (i = 0; i < pool_size; i++)
3591 pool_node *p = &pool_vector[i];
3593 switch (p->mode)
3595 case HImode:
3596 break;
3597 case SImode:
3598 case SFmode:
3599 if (align_insn && !p->part_of_sequence_p)
3601 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3602 emit_label_before (lab, align_insn);
3603 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3604 align_insn);
3605 for (ref = p->wend; ref; ref = ref->next)
3607 lab = ref->label;
3608 emit_insn_before (gen_consttable_window_end (lab),
3609 align_insn);
3611 delete_insn (align_insn);
3612 align_insn = NULL_RTX;
3613 continue;
3615 else
3617 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3618 scan = emit_label_after (lab, scan);
3619 scan = emit_insn_after (gen_consttable_4 (p->value,
3620 const0_rtx), scan);
3621 need_align = ! need_align;
3623 break;
3624 case DFmode:
3625 if (need_align)
3627 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3628 align_insn = scan;
3629 need_align = 0;
3631 case DImode:
3632 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3633 scan = emit_label_after (lab, scan);
3634 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3635 scan);
3636 break;
3637 default:
3638 gcc_unreachable ();
3641 if (p->mode != HImode)
3643 for (ref = p->wend; ref; ref = ref->next)
3645 lab = ref->label;
3646 scan = emit_insn_after (gen_consttable_window_end (lab),
3647 scan);
3652 pool_size = 0;
3655 for (i = 0; i < pool_size; i++)
3657 pool_node *p = &pool_vector[i];
3659 switch (p->mode)
3661 case HImode:
3662 break;
3663 case SImode:
3664 case SFmode:
3665 if (need_align)
3667 need_align = 0;
3668 scan = emit_label_after (gen_label_rtx (), scan);
3669 scan = emit_insn_after (gen_align_4 (), scan);
3671 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3672 scan = emit_label_after (lab, scan);
3673 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3674 scan);
3675 break;
3676 case DFmode:
3677 case DImode:
3678 if (need_align)
3680 need_align = 0;
3681 scan = emit_label_after (gen_label_rtx (), scan);
3682 scan = emit_insn_after (gen_align_4 (), scan);
3684 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3685 scan = emit_label_after (lab, scan);
3686 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3687 scan);
3688 break;
3689 default:
3690 gcc_unreachable ();
3693 if (p->mode != HImode)
3695 for (ref = p->wend; ref; ref = ref->next)
3697 lab = ref->label;
3698 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3703 scan = emit_insn_after (gen_consttable_end (), scan);
3704 scan = emit_barrier_after (scan);
3705 pool_size = 0;
3706 pool_window_label = NULL_RTX;
3707 pool_window_last = 0;
3710 /* Return nonzero if constant would be an ok source for a
3711 mov.w instead of a mov.l. */
3713 static int
3714 hi_const (rtx src)
3716 return (GET_CODE (src) == CONST_INT
3717 && INTVAL (src) >= -32768
3718 && INTVAL (src) <= 32767);
3721 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3723 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3725 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3726 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3727 need to fix it if the input value is CONST_OK_FOR_I08. */
3729 static int
3730 broken_move (rtx insn)
3732 if (GET_CODE (insn) == INSN)
3734 rtx pat = PATTERN (insn);
3735 if (GET_CODE (pat) == PARALLEL)
3736 pat = XVECEXP (pat, 0, 0);
3737 if (GET_CODE (pat) == SET
3738 /* We can load any 8-bit value if we don't care what the high
3739 order bits end up as. */
3740 && GET_MODE (SET_DEST (pat)) != QImode
3741 && (CONSTANT_P (SET_SRC (pat))
3742 /* Match mova_const. */
3743 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3744 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3745 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3746 && ! (TARGET_SH2E
3747 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3748 && (fp_zero_operand (SET_SRC (pat))
3749 || fp_one_operand (SET_SRC (pat)))
3750 /* ??? If this is a -m4 or -m4-single compilation, in general
3751 we don't know the current setting of fpscr, so disable fldi.
3752 There is an exception if this was a register-register move
3753 before reload - and hence it was ascertained that we have
3754 single precision setting - and in a post-reload optimization
3755 we changed this to do a constant load. In that case
3756 we don't have an r0 clobber, hence we must use fldi. */
3757 && (! TARGET_SH4 || TARGET_FMOVD
3758 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3759 == SCRATCH))
3760 && GET_CODE (SET_DEST (pat)) == REG
3761 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3762 && ! (TARGET_SH2A
3763 && GET_MODE (SET_DEST (pat)) == SImode
3764 && (satisfies_constraint_I20 (SET_SRC (pat))
3765 || satisfies_constraint_I28 (SET_SRC (pat))))
3766 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3767 return 1;
3770 return 0;
3773 static int
3774 mova_p (rtx insn)
3776 return (GET_CODE (insn) == INSN
3777 && GET_CODE (PATTERN (insn)) == SET
3778 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3779 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3780 /* Don't match mova_const. */
3781 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3784 /* Fix up a mova from a switch that went out of range. */
3785 static void
3786 fixup_mova (rtx mova)
3788 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3789 if (! flag_pic)
3791 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3792 INSN_CODE (mova) = -1;
3794 else
3796 rtx worker = mova;
3797 rtx lab = gen_label_rtx ();
3798 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
3802 worker = NEXT_INSN (worker);
3803 gcc_assert (worker
3804 && GET_CODE (worker) != CODE_LABEL
3805 && GET_CODE (worker) != JUMP_INSN);
3806 } while (GET_CODE (worker) == NOTE
3807 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3808 wpat = PATTERN (worker);
3809 wpat0 = XVECEXP (wpat, 0, 0);
3810 wpat1 = XVECEXP (wpat, 0, 1);
3811 wsrc = SET_SRC (wpat0);
3812 PATTERN (worker) = (gen_casesi_worker_2
3813 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3814 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3815 XEXP (wpat1, 0)));
3816 INSN_CODE (worker) = -1;
3817 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3818 base = gen_rtx_LABEL_REF (Pmode, lab);
3819 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
3820 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3821 INSN_CODE (mova) = -1;
3825 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3826 *num_mova, and check if the new mova is not nested within the first one.
3827 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3828 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3829 static int
3830 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3832 int n_addr = 0; /* Initialization to shut up spurious warning. */
3833 int f_target, n_target = 0; /* Likewise. */
3835 if (optimize)
3837 /* If NEW_MOVA has no address yet, it will be handled later. */
3838 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
3839 return -1;
3841 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3842 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3843 if (n_addr > n_target || n_addr + 1022 < n_target)
3845 /* Change the mova into a load.
3846 broken_move will then return true for it. */
3847 fixup_mova (new_mova);
3848 return 1;
3851 if (!(*num_mova)++)
3853 *first_mova = new_mova;
3854 return 2;
3856 if (!optimize
3857 || ((f_target
3858 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3859 >= n_target))
3860 return -1;
3862 (*num_mova)--;
3863 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3864 > n_target - n_addr)
3866 fixup_mova (*first_mova);
3867 return 0;
3869 else
3871 fixup_mova (new_mova);
3872 return 1;
3876 /* Find the last barrier from insn FROM which is close enough to hold the
3877 constant pool. If we can't find one, then create one near the end of
3878 the range. */
3880 static rtx
3881 find_barrier (int num_mova, rtx mova, rtx from)
3883 int count_si = 0;
3884 int count_hi = 0;
3885 int found_hi = 0;
3886 int found_si = 0;
3887 int found_di = 0;
3888 int hi_align = 2;
3889 int si_align = 2;
3890 int leading_mova = num_mova;
3891 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3892 int si_limit;
3893 int hi_limit;
3894 rtx orig = from;
3896 /* For HImode: range is 510, add 4 because pc counts from address of
3897 second instruction after this one, subtract 2 for the jump instruction
3898 that we may need to emit before the table, subtract 2 for the instruction
3899 that fills the jump delay slot (in very rare cases, reorg will take an
3900 instruction from after the constant pool or will leave the delay slot
3901 empty). This gives 510.
3902 For SImode: range is 1020, add 4 because pc counts from address of
3903 second instruction after this one, subtract 2 in case pc is 2 byte
3904 aligned, subtract 2 for the jump instruction that we may need to emit
3905 before the table, subtract 2 for the instruction that fills the jump
3906 delay slot. This gives 1018. */
3908 /* The branch will always be shortened now that the reference address for
3909 forward branches is the successor address, thus we need no longer make
3910 adjustments to the [sh]i_limit for -O0. */
3912 si_limit = 1018;
3913 hi_limit = 510;
3915 while (from && count_si < si_limit && count_hi < hi_limit)
3917 int inc = get_attr_length (from);
3918 int new_align = 1;
3920 /* If this is a label that existed at the time of the compute_alignments
3921 call, determine the alignment. N.B. When find_barrier recurses for
3922 an out-of-reach mova, we might see labels at the start of previously
3923 inserted constant tables. */
3924 if (GET_CODE (from) == CODE_LABEL
3925 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3927 if (optimize)
3928 new_align = 1 << label_to_alignment (from);
3929 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3930 new_align = 1 << barrier_align (from);
3931 else
3932 new_align = 1;
3933 inc = 0;
3935 /* In case we are scanning a constant table because of recursion, check
3936 for explicit alignments. If the table is long, we might be forced
3937 to emit the new table in front of it; the length of the alignment
3938 might be the last straw. */
3939 else if (GET_CODE (from) == INSN
3940 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3941 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3942 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3943 /* When we find the end of a constant table, paste the new constant
3944 at the end. That is better than putting it in front because
3945 this way, we don't need extra alignment for adding a 4-byte-aligned
3946 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3947 else if (GET_CODE (from) == INSN
3948 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3949 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3950 return from;
3952 if (GET_CODE (from) == BARRIER)
3954 rtx next;
3956 found_barrier = from;
3958 /* If we are at the end of the function, or in front of an alignment
3959 instruction, we need not insert an extra alignment. We prefer
3960 this kind of barrier. */
3961 if (barrier_align (from) > 2)
3962 good_barrier = from;
3964 /* If we are at the end of a hot/cold block, dump the constants
3965 here. */
3966 next = NEXT_INSN (from);
3967 if (next
3968 && NOTE_P (next)
3969 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3970 break;
3973 if (broken_move (from))
3975 rtx pat, src, dst;
3976 enum machine_mode mode;
3978 pat = PATTERN (from);
3979 if (GET_CODE (pat) == PARALLEL)
3980 pat = XVECEXP (pat, 0, 0);
3981 src = SET_SRC (pat);
3982 dst = SET_DEST (pat);
3983 mode = GET_MODE (dst);
3985 /* We must explicitly check the mode, because sometimes the
3986 front end will generate code to load unsigned constants into
3987 HImode targets without properly sign extending them. */
3988 if (mode == HImode
3989 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3991 found_hi += 2;
3992 /* We put the short constants before the long constants, so
3993 we must count the length of short constants in the range
3994 for the long constants. */
3995 /* ??? This isn't optimal, but is easy to do. */
3996 si_limit -= 2;
3998 else
4000 /* We dump DF/DI constants before SF/SI ones, because
4001 the limit is the same, but the alignment requirements
4002 are higher. We may waste up to 4 additional bytes
4003 for alignment, and the DF/DI constant may have
4004 another SF/SI constant placed before it. */
4005 if (TARGET_SHCOMPACT
4006 && ! found_di
4007 && (mode == DFmode || mode == DImode))
4009 found_di = 1;
4010 si_limit -= 8;
4012 while (si_align > 2 && found_si + si_align - 2 > count_si)
4013 si_align >>= 1;
4014 if (found_si > count_si)
4015 count_si = found_si;
4016 found_si += GET_MODE_SIZE (mode);
4017 if (num_mova)
4018 si_limit -= GET_MODE_SIZE (mode);
4022 if (mova_p (from))
4024 switch (untangle_mova (&num_mova, &mova, from))
4026 case 0: return find_barrier (0, 0, mova);
4027 case 2:
4029 leading_mova = 0;
4030 barrier_before_mova
4031 = good_barrier ? good_barrier : found_barrier;
4033 default: break;
4035 if (found_si > count_si)
4036 count_si = found_si;
4038 else if (GET_CODE (from) == JUMP_INSN
4039 && (GET_CODE (PATTERN (from)) == ADDR_VEC
4040 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
4042 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4043 || (num_mova
4044 && (prev_nonnote_insn (from)
4045 == XEXP (MOVA_LABELREF (mova), 0))))
4046 num_mova--;
4047 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4049 /* We have just passed the barrier in front of the
4050 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4051 the ADDR_DIFF_VEC is accessed as data, just like our pool
4052 constants, this is a good opportunity to accommodate what
4053 we have gathered so far.
4054 If we waited any longer, we could end up at a barrier in
4055 front of code, which gives worse cache usage for separated
4056 instruction / data caches. */
4057 good_barrier = found_barrier;
4058 break;
4060 else
4062 rtx body = PATTERN (from);
4063 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4066 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4067 else if (GET_CODE (from) == JUMP_INSN
4068 && ! TARGET_SH2
4069 && ! TARGET_SMALLCODE)
4070 new_align = 4;
4072 if (found_si)
4074 count_si += inc;
4075 if (new_align > si_align)
4077 si_limit -= (count_si - 1) & (new_align - si_align);
4078 si_align = new_align;
4080 count_si = (count_si + new_align - 1) & -new_align;
4082 if (found_hi)
4084 count_hi += inc;
4085 if (new_align > hi_align)
4087 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4088 hi_align = new_align;
4090 count_hi = (count_hi + new_align - 1) & -new_align;
4092 from = NEXT_INSN (from);
4095 if (num_mova)
4097 if (leading_mova)
4099 /* Try as we might, the leading mova is out of range. Change
4100 it into a load (which will become a pcload) and retry. */
4101 fixup_mova (mova);
4102 return find_barrier (0, 0, mova);
4104 else
4106 /* Insert the constant pool table before the mova instruction,
4107 to prevent the mova label reference from going out of range. */
4108 from = mova;
4109 good_barrier = found_barrier = barrier_before_mova;
4113 if (found_barrier)
4115 if (good_barrier && next_real_insn (found_barrier))
4116 found_barrier = good_barrier;
4118 else
4120 /* We didn't find a barrier in time to dump our stuff,
4121 so we'll make one. */
4122 rtx label = gen_label_rtx ();
4124 /* If we exceeded the range, then we must back up over the last
4125 instruction we looked at. Otherwise, we just need to undo the
4126 NEXT_INSN at the end of the loop. */
4127 if (PREV_INSN (from) != orig
4128 && (count_hi > hi_limit || count_si > si_limit))
4129 from = PREV_INSN (PREV_INSN (from));
4130 else
4131 from = PREV_INSN (from);
4133 /* Walk back to be just before any jump or label.
4134 Putting it before a label reduces the number of times the branch
4135 around the constant pool table will be hit. Putting it before
4136 a jump makes it more likely that the bra delay slot will be
4137 filled. */
4138 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4139 || GET_CODE (from) == CODE_LABEL)
4140 from = PREV_INSN (from);
4142 from = emit_jump_insn_after (gen_jump (label), from);
4143 JUMP_LABEL (from) = label;
4144 LABEL_NUSES (label) = 1;
4145 found_barrier = emit_barrier_after (from);
4146 emit_label_after (label, found_barrier);
4149 return found_barrier;
4152 /* If the instruction INSN is implemented by a special function, and we can
4153 positively find the register that is used to call the sfunc, and this
4154 register is not used anywhere else in this instruction - except as the
4155 destination of a set, return this register; else, return 0. */
4157 sfunc_uses_reg (rtx insn)
4159 int i;
4160 rtx pattern, part, reg_part, reg;
4162 if (GET_CODE (insn) != INSN)
4163 return 0;
4164 pattern = PATTERN (insn);
4165 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4166 return 0;
4168 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4170 part = XVECEXP (pattern, 0, i);
4171 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4172 reg_part = part;
4174 if (! reg_part)
4175 return 0;
4176 reg = XEXP (reg_part, 0);
4177 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4179 part = XVECEXP (pattern, 0, i);
4180 if (part == reg_part || GET_CODE (part) == CLOBBER)
4181 continue;
4182 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4183 && GET_CODE (SET_DEST (part)) == REG)
4184 ? SET_SRC (part) : part)))
4185 return 0;
4187 return reg;
4190 /* See if the only way in which INSN uses REG is by calling it, or by
4191 setting it while calling it. Set *SET to a SET rtx if the register
4192 is set by INSN. */
4194 static int
4195 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4197 rtx pattern, reg2;
4199 *set = NULL_RTX;
4201 reg2 = sfunc_uses_reg (insn);
4202 if (reg2 && REGNO (reg2) == REGNO (reg))
4204 pattern = single_set (insn);
4205 if (pattern
4206 && GET_CODE (SET_DEST (pattern)) == REG
4207 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4208 *set = pattern;
4209 return 0;
4211 if (GET_CODE (insn) != CALL_INSN)
4213 /* We don't use rtx_equal_p because we don't care if the mode is
4214 different. */
4215 pattern = single_set (insn);
4216 if (pattern
4217 && GET_CODE (SET_DEST (pattern)) == REG
4218 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4220 rtx par, part;
4221 int i;
4223 *set = pattern;
4224 par = PATTERN (insn);
4225 if (GET_CODE (par) == PARALLEL)
4226 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4228 part = XVECEXP (par, 0, i);
4229 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4230 return 1;
4232 return reg_mentioned_p (reg, SET_SRC (pattern));
4235 return 1;
4238 pattern = PATTERN (insn);
4240 if (GET_CODE (pattern) == PARALLEL)
4242 int i;
4244 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4245 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4246 return 1;
4247 pattern = XVECEXP (pattern, 0, 0);
4250 if (GET_CODE (pattern) == SET)
4252 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4254 /* We don't use rtx_equal_p, because we don't care if the
4255 mode is different. */
4256 if (GET_CODE (SET_DEST (pattern)) != REG
4257 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4258 return 1;
4260 *set = pattern;
4263 pattern = SET_SRC (pattern);
4266 if (GET_CODE (pattern) != CALL
4267 || GET_CODE (XEXP (pattern, 0)) != MEM
4268 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4269 return 1;
4271 return 0;
4274 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4275 general registers. Bits 0..15 mean that the respective registers
4276 are used as inputs in the instruction. Bits 16..31 mean that the
4277 registers 0..15, respectively, are used as outputs, or are clobbered.
4278 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4280 regs_used (rtx x, int is_dest)
4282 enum rtx_code code;
4283 const char *fmt;
4284 int i, used = 0;
4286 if (! x)
4287 return used;
4288 code = GET_CODE (x);
4289 switch (code)
4291 case REG:
4292 if (REGNO (x) < 16)
4293 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4294 << (REGNO (x) + is_dest));
4295 return 0;
4296 case SUBREG:
4298 rtx y = SUBREG_REG (x);
4300 if (GET_CODE (y) != REG)
4301 break;
4302 if (REGNO (y) < 16)
4303 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4304 << (REGNO (y) +
4305 subreg_regno_offset (REGNO (y),
4306 GET_MODE (y),
4307 SUBREG_BYTE (x),
4308 GET_MODE (x)) + is_dest));
4309 return 0;
4311 case SET:
4312 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4313 case RETURN:
4314 /* If there was a return value, it must have been indicated with USE. */
4315 return 0x00ffff00;
4316 case CLOBBER:
4317 is_dest = 1;
4318 break;
4319 case MEM:
4320 is_dest = 0;
4321 break;
4322 case CALL:
4323 used |= 0x00ff00f0;
4324 break;
4325 default:
4326 break;
4329 fmt = GET_RTX_FORMAT (code);
4331 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4333 if (fmt[i] == 'E')
4335 register int j;
4336 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4337 used |= regs_used (XVECEXP (x, i, j), is_dest);
4339 else if (fmt[i] == 'e')
4340 used |= regs_used (XEXP (x, i), is_dest);
4342 return used;
4345 /* Create an instruction that prevents redirection of a conditional branch
4346 to the destination of the JUMP with address ADDR.
4347 If the branch needs to be implemented as an indirect jump, try to find
4348 a scratch register for it.
4349 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4350 If any preceding insn that doesn't fit into a delay slot is good enough,
4351 pass 1. Pass 2 if a definite blocking insn is needed.
4352 -1 is used internally to avoid deep recursion.
4353 If a blocking instruction is made or recognized, return it. */
4355 static rtx
4356 gen_block_redirect (rtx jump, int addr, int need_block)
4358 int dead = 0;
4359 rtx prev = prev_nonnote_insn (jump);
4360 rtx dest;
4362 /* First, check if we already have an instruction that satisfies our need. */
4363 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4365 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4366 return prev;
4367 if (GET_CODE (PATTERN (prev)) == USE
4368 || GET_CODE (PATTERN (prev)) == CLOBBER
4369 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4370 prev = jump;
4371 else if ((need_block &= ~1) < 0)
4372 return prev;
4373 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4374 need_block = 0;
4376 if (GET_CODE (PATTERN (jump)) == RETURN)
4378 if (! need_block)
4379 return prev;
4380 /* Reorg even does nasty things with return insns that cause branches
4381 to go out of range - see find_end_label and callers. */
4382 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4384 /* We can't use JUMP_LABEL here because it might be undefined
4385 when not optimizing. */
4386 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4387 /* If the branch is out of range, try to find a scratch register for it. */
4388 if (optimize
4389 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4390 > 4092 + 4098))
4392 rtx scan;
4393 /* Don't look for the stack pointer as a scratch register,
4394 it would cause trouble if an interrupt occurred. */
4395 unsigned attempt = 0x7fff, used;
4396 int jump_left = flag_expensive_optimizations + 1;
4398 /* It is likely that the most recent eligible instruction is wanted for
4399 the delay slot. Therefore, find out which registers it uses, and
4400 try to avoid using them. */
4402 for (scan = jump; (scan = PREV_INSN (scan)); )
4404 enum rtx_code code;
4406 if (INSN_DELETED_P (scan))
4407 continue;
4408 code = GET_CODE (scan);
4409 if (code == CODE_LABEL || code == JUMP_INSN)
4410 break;
4411 if (code == INSN
4412 && GET_CODE (PATTERN (scan)) != USE
4413 && GET_CODE (PATTERN (scan)) != CLOBBER
4414 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4416 attempt &= ~regs_used (PATTERN (scan), 0);
4417 break;
4420 for (used = dead = 0, scan = JUMP_LABEL (jump);
4421 (scan = NEXT_INSN (scan)); )
4423 enum rtx_code code;
4425 if (INSN_DELETED_P (scan))
4426 continue;
4427 code = GET_CODE (scan);
4428 if (INSN_P (scan))
4430 used |= regs_used (PATTERN (scan), 0);
4431 if (code == CALL_INSN)
4432 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4433 dead |= (used >> 16) & ~used;
4434 if (dead & attempt)
4436 dead &= attempt;
4437 break;
4439 if (code == JUMP_INSN)
4441 if (jump_left-- && simplejump_p (scan))
4442 scan = JUMP_LABEL (scan);
4443 else
4444 break;
4448 /* Mask out the stack pointer again, in case it was
4449 the only 'free' register we have found. */
4450 dead &= 0x7fff;
4452 /* If the immediate destination is still in range, check for possible
4453 threading with a jump beyond the delay slot insn.
4454 Don't check if we are called recursively; the jump has been or will be
4455 checked in a different invocation then. */
4457 else if (optimize && need_block >= 0)
4459 rtx next = next_active_insn (next_active_insn (dest));
4460 if (next && GET_CODE (next) == JUMP_INSN
4461 && GET_CODE (PATTERN (next)) == SET
4462 && recog_memoized (next) == CODE_FOR_jump_compact)
4464 dest = JUMP_LABEL (next);
4465 if (dest
4466 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4467 > 4092 + 4098))
4468 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4472 if (dead)
4474 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4476 /* It would be nice if we could convert the jump into an indirect
4477 jump / far branch right now, and thus exposing all constituent
4478 instructions to further optimization. However, reorg uses
4479 simplejump_p to determine if there is an unconditional jump where
4480 it should try to schedule instructions from the target of the
4481 branch; simplejump_p fails for indirect jumps even if they have
4482 a JUMP_LABEL. */
4483 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4484 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4485 , jump);
4486 /* ??? We would like this to have the scope of the jump, but that
4487 scope will change when a delay slot insn of an inner scope is added.
4488 Hence, after delay slot scheduling, we'll have to expect
4489 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4490 the jump. */
4492 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4493 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4494 return insn;
4496 else if (need_block)
4497 /* We can't use JUMP_LABEL here because it might be undefined
4498 when not optimizing. */
4499 return emit_insn_before (gen_block_branch_redirect
4500 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4501 , jump);
4502 return prev;
4505 #define CONDJUMP_MIN -252
4506 #define CONDJUMP_MAX 262
4507 struct far_branch
4509 /* A label (to be placed) in front of the jump
4510 that jumps to our ultimate destination. */
4511 rtx near_label;
4512 /* Where we are going to insert it if we cannot move the jump any farther,
4513 or the jump itself if we have picked up an existing jump. */
4514 rtx insert_place;
4515 /* The ultimate destination. */
4516 rtx far_label;
4517 struct far_branch *prev;
4518 /* If the branch has already been created, its address;
4519 else the address of its first prospective user. */
4520 int address;
4523 static void gen_far_branch (struct far_branch *);
4524 enum mdep_reorg_phase_e mdep_reorg_phase;
4525 static void
4526 gen_far_branch (struct far_branch *bp)
4528 rtx insn = bp->insert_place;
4529 rtx jump;
4530 rtx label = gen_label_rtx ();
4531 int ok;
4533 emit_label_after (label, insn);
4534 if (bp->far_label)
4536 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4537 LABEL_NUSES (bp->far_label)++;
4539 else
4540 jump = emit_jump_insn_after (gen_return (), insn);
4541 /* Emit a barrier so that reorg knows that any following instructions
4542 are not reachable via a fall-through path.
4543 But don't do this when not optimizing, since we wouldn't suppress the
4544 alignment for the barrier then, and could end up with out-of-range
4545 pc-relative loads. */
4546 if (optimize)
4547 emit_barrier_after (jump);
4548 emit_label_after (bp->near_label, insn);
4549 JUMP_LABEL (jump) = bp->far_label;
4550 ok = invert_jump (insn, label, 1);
4551 gcc_assert (ok);
4553 /* If we are branching around a jump (rather than a return), prevent
4554 reorg from using an insn from the jump target as the delay slot insn -
4555 when reorg did this, it pessimized code (we rather hide the delay slot)
4556 and it could cause branches to go out of range. */
4557 if (bp->far_label)
4558 (emit_insn_after
4559 (gen_stuff_delay_slot
4560 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4561 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4562 insn));
4563 /* Prevent reorg from undoing our splits. */
4564 gen_block_redirect (jump, bp->address += 2, 2);
4567 /* Fix up ADDR_DIFF_VECs. */
4568 void
4569 fixup_addr_diff_vecs (rtx first)
4571 rtx insn;
4573 for (insn = first; insn; insn = NEXT_INSN (insn))
4575 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4577 if (GET_CODE (insn) != JUMP_INSN
4578 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4579 continue;
4580 pat = PATTERN (insn);
4581 vec_lab = XEXP (XEXP (pat, 0), 0);
4583 /* Search the matching casesi_jump_2. */
4584 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4586 if (GET_CODE (prev) != JUMP_INSN)
4587 continue;
4588 prevpat = PATTERN (prev);
4589 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4590 continue;
4591 x = XVECEXP (prevpat, 0, 1);
4592 if (GET_CODE (x) != USE)
4593 continue;
4594 x = XEXP (x, 0);
4595 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4596 break;
4598 /* FIXME: This is a bug in the optimizer, but it seems harmless
4599 to just avoid panicing. */
4600 if (!prev)
4601 continue;
4603 /* Emit the reference label of the braf where it belongs, right after
4604 the casesi_jump_2 (i.e. braf). */
4605 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4606 emit_label_after (braf_label, prev);
4608 /* Fix up the ADDR_DIF_VEC to be relative
4609 to the reference address of the braf. */
4610 XEXP (XEXP (pat, 0), 0) = braf_label;
4614 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4615 a barrier. Return the base 2 logarithm of the desired alignment. */
4617 barrier_align (rtx barrier_or_label)
4619 rtx next = next_real_insn (barrier_or_label), pat, prev;
4620 int slot, credit, jump_to_next = 0;
4622 if (! next)
4623 return 0;
4625 pat = PATTERN (next);
4627 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4628 return 2;
4630 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4631 /* This is a barrier in front of a constant table. */
4632 return 0;
4634 prev = prev_real_insn (barrier_or_label);
4635 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4637 pat = PATTERN (prev);
4638 /* If this is a very small table, we want to keep the alignment after
4639 the table to the minimum for proper code alignment. */
4640 return ((TARGET_SMALLCODE
4641 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4642 <= (unsigned) 1 << (CACHE_LOG - 2)))
4643 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4646 if (TARGET_SMALLCODE)
4647 return 0;
4649 if (! TARGET_SH2 || ! optimize)
4650 return align_jumps_log;
4652 /* When fixing up pcloads, a constant table might be inserted just before
4653 the basic block that ends with the barrier. Thus, we can't trust the
4654 instruction lengths before that. */
4655 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4657 /* Check if there is an immediately preceding branch to the insn beyond
4658 the barrier. We must weight the cost of discarding useful information
4659 from the current cache line when executing this branch and there is
4660 an alignment, against that of fetching unneeded insn in front of the
4661 branch target when there is no alignment. */
4663 /* There are two delay_slot cases to consider. One is the simple case
4664 where the preceding branch is to the insn beyond the barrier (simple
4665 delay slot filling), and the other is where the preceding branch has
4666 a delay slot that is a duplicate of the insn after the barrier
4667 (fill_eager_delay_slots) and the branch is to the insn after the insn
4668 after the barrier. */
4670 /* PREV is presumed to be the JUMP_INSN for the barrier under
4671 investigation. Skip to the insn before it. */
4672 prev = prev_real_insn (prev);
4674 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4675 credit >= 0 && prev && GET_CODE (prev) == INSN;
4676 prev = prev_real_insn (prev))
4678 jump_to_next = 0;
4679 if (GET_CODE (PATTERN (prev)) == USE
4680 || GET_CODE (PATTERN (prev)) == CLOBBER)
4681 continue;
4682 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4684 prev = XVECEXP (PATTERN (prev), 0, 1);
4685 if (INSN_UID (prev) == INSN_UID (next))
4687 /* Delay slot was filled with insn at jump target. */
4688 jump_to_next = 1;
4689 continue;
4693 if (slot &&
4694 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4695 slot = 0;
4696 credit -= get_attr_length (prev);
4698 if (prev
4699 && GET_CODE (prev) == JUMP_INSN
4700 && JUMP_LABEL (prev))
4702 rtx x;
4703 if (jump_to_next
4704 || next_real_insn (JUMP_LABEL (prev)) == next
4705 /* If relax_delay_slots() decides NEXT was redundant
4706 with some previous instruction, it will have
4707 redirected PREV's jump to the following insn. */
4708 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4709 /* There is no upper bound on redundant instructions
4710 that might have been skipped, but we must not put an
4711 alignment where none had been before. */
4712 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4713 (INSN_P (x)
4714 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4715 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4716 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4718 rtx pat = PATTERN (prev);
4719 if (GET_CODE (pat) == PARALLEL)
4720 pat = XVECEXP (pat, 0, 0);
4721 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4722 return 0;
4727 return align_jumps_log;
4730 /* If we are inside a phony loop, almost any kind of label can turn up as the
4731 first one in the loop. Aligning a braf label causes incorrect switch
4732 destination addresses; we can detect braf labels because they are
4733 followed by a BARRIER.
4734 Applying loop alignment to small constant or switch tables is a waste
4735 of space, so we suppress this too. */
4737 sh_loop_align (rtx label)
4739 rtx next = label;
4742 next = next_nonnote_insn (next);
4743 while (next && GET_CODE (next) == CODE_LABEL);
4745 if (! next
4746 || ! INSN_P (next)
4747 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4748 || recog_memoized (next) == CODE_FOR_consttable_2)
4749 return 0;
4751 return align_loops_log;
4754 /* Do a final pass over the function, just before delayed branch
4755 scheduling. */
4757 static void
4758 sh_reorg (void)
4760 rtx first, insn, mova = NULL_RTX;
4761 int num_mova;
4762 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4763 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4765 first = get_insns ();
4766 max_labelno_before_reorg = max_label_num ();
4768 /* We must split call insns before introducing `mova's. If we're
4769 optimizing, they'll have already been split. Otherwise, make
4770 sure we don't split them too late. */
4771 if (! optimize)
4772 split_all_insns_noflow ();
4774 if (TARGET_SHMEDIA)
4775 return;
4777 /* If relaxing, generate pseudo-ops to associate function calls with
4778 the symbols they call. It does no harm to not generate these
4779 pseudo-ops. However, when we can generate them, it enables to
4780 linker to potentially relax the jsr to a bsr, and eliminate the
4781 register load and, possibly, the constant pool entry. */
4783 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4784 if (TARGET_RELAX)
4786 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
4787 own purposes. This works because none of the remaining passes
4788 need to look at them.
4790 ??? But it may break in the future. We should use a machine
4791 dependent REG_NOTE, or some other approach entirely. */
4792 for (insn = first; insn; insn = NEXT_INSN (insn))
4794 if (INSN_P (insn))
4796 rtx note;
4798 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
4799 NULL_RTX)) != 0)
4800 remove_note (insn, note);
4804 for (insn = first; insn; insn = NEXT_INSN (insn))
4806 rtx pattern, reg, link, set, scan, dies, label;
4807 int rescan = 0, foundinsn = 0;
4809 if (GET_CODE (insn) == CALL_INSN)
4811 pattern = PATTERN (insn);
4813 if (GET_CODE (pattern) == PARALLEL)
4814 pattern = XVECEXP (pattern, 0, 0);
4815 if (GET_CODE (pattern) == SET)
4816 pattern = SET_SRC (pattern);
4818 if (GET_CODE (pattern) != CALL
4819 || GET_CODE (XEXP (pattern, 0)) != MEM)
4820 continue;
4822 reg = XEXP (XEXP (pattern, 0), 0);
4824 else
4826 reg = sfunc_uses_reg (insn);
4827 if (! reg)
4828 continue;
4831 if (GET_CODE (reg) != REG)
4832 continue;
4834 /* Try scanning backward to find where the register is set. */
4835 link = NULL;
4836 for (scan = PREV_INSN (insn);
4837 scan && GET_CODE (scan) != CODE_LABEL;
4838 scan = PREV_INSN (scan))
4840 if (! INSN_P (scan))
4841 continue;
4843 if (! reg_mentioned_p (reg, scan))
4844 continue;
4846 if (noncall_uses_reg (reg, scan, &set))
4847 break;
4849 if (set)
4851 link = scan;
4852 break;
4856 if (! link)
4857 continue;
4859 /* The register is set at LINK. */
4861 /* We can only optimize the function call if the register is
4862 being set to a symbol. In theory, we could sometimes
4863 optimize calls to a constant location, but the assembler
4864 and linker do not support that at present. */
4865 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4866 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4867 continue;
4869 /* Scan forward from LINK to the place where REG dies, and
4870 make sure that the only insns which use REG are
4871 themselves function calls. */
4873 /* ??? This doesn't work for call targets that were allocated
4874 by reload, since there may not be a REG_DEAD note for the
4875 register. */
4877 dies = NULL_RTX;
4878 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4880 rtx scanset;
4882 /* Don't try to trace forward past a CODE_LABEL if we haven't
4883 seen INSN yet. Ordinarily, we will only find the setting insn
4884 if it is in the same basic block. However,
4885 cross-jumping can insert code labels in between the load and
4886 the call, and can result in situations where a single call
4887 insn may have two targets depending on where we came from. */
4889 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4890 break;
4892 if (! INSN_P (scan))
4893 continue;
4895 /* Don't try to trace forward past a JUMP. To optimize
4896 safely, we would have to check that all the
4897 instructions at the jump destination did not use REG. */
4899 if (GET_CODE (scan) == JUMP_INSN)
4900 break;
4902 if (! reg_mentioned_p (reg, scan))
4903 continue;
4905 if (noncall_uses_reg (reg, scan, &scanset))
4906 break;
4908 if (scan == insn)
4909 foundinsn = 1;
4911 if (scan != insn
4912 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4914 /* There is a function call to this register other
4915 than the one we are checking. If we optimize
4916 this call, we need to rescan again below. */
4917 rescan = 1;
4920 /* ??? We shouldn't have to worry about SCANSET here.
4921 We should just be able to check for a REG_DEAD note
4922 on a function call. However, the REG_DEAD notes are
4923 apparently not dependable around libcalls; c-torture
4924 execute/920501-2 is a test case. If SCANSET is set,
4925 then this insn sets the register, so it must have
4926 died earlier. Unfortunately, this will only handle
4927 the cases in which the register is, in fact, set in a
4928 later insn. */
4930 /* ??? We shouldn't have to use FOUNDINSN here.
4931 This dates back to when we used LOG_LINKS to find
4932 the most recent insn which sets the register. */
4934 if (foundinsn
4935 && (scanset
4936 || find_reg_note (scan, REG_DEAD, reg)))
4938 dies = scan;
4939 break;
4943 if (! dies)
4945 /* Either there was a branch, or some insn used REG
4946 other than as a function call address. */
4947 continue;
4950 /* Create a code label, and put it in a REG_LABEL_OPERAND note
4951 on the insn which sets the register, and on each call insn
4952 which uses the register. In final_prescan_insn we look for
4953 the REG_LABEL_OPERAND notes, and output the appropriate label
4954 or pseudo-op. */
4956 label = gen_label_rtx ();
4957 add_reg_note (link, REG_LABEL_OPERAND, label);
4958 add_reg_note (insn, REG_LABEL_OPERAND, label);
4959 if (rescan)
4961 scan = link;
4964 rtx reg2;
4966 scan = NEXT_INSN (scan);
4967 if (scan != insn
4968 && ((GET_CODE (scan) == CALL_INSN
4969 && reg_mentioned_p (reg, scan))
4970 || ((reg2 = sfunc_uses_reg (scan))
4971 && REGNO (reg2) == REGNO (reg))))
4972 add_reg_note (scan, REG_LABEL_OPERAND, label);
4974 while (scan != dies);
4979 if (TARGET_SH2)
4980 fixup_addr_diff_vecs (first);
4982 if (optimize)
4984 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4985 shorten_branches (first);
4988 /* Scan the function looking for move instructions which have to be
4989 changed to pc-relative loads and insert the literal tables. */
4990 label_ref_list_pool = create_alloc_pool ("label references list",
4991 sizeof (struct label_ref_list_d),
4992 30);
4993 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4994 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4996 if (mova_p (insn))
4998 /* ??? basic block reordering can move a switch table dispatch
4999 below the switch table. Check if that has happened.
5000 We only have the addresses available when optimizing; but then,
5001 this check shouldn't be needed when not optimizing. */
5002 if (!untangle_mova (&num_mova, &mova, insn))
5004 insn = mova;
5005 num_mova = 0;
5008 else if (GET_CODE (insn) == JUMP_INSN
5009 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5010 && num_mova
5011 /* ??? loop invariant motion can also move a mova out of a
5012 loop. Since loop does this code motion anyway, maybe we
5013 should wrap UNSPEC_MOVA into a CONST, so that reload can
5014 move it back. */
5015 && ((num_mova > 1
5016 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5017 || (prev_nonnote_insn (insn)
5018 == XEXP (MOVA_LABELREF (mova), 0))))
5020 rtx scan;
5021 int total;
5023 num_mova--;
5025 /* Some code might have been inserted between the mova and
5026 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5027 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5028 total += get_attr_length (scan);
5030 /* range of mova is 1020, add 4 because pc counts from address of
5031 second instruction after this one, subtract 2 in case pc is 2
5032 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5033 cancels out with alignment effects of the mova itself. */
5034 if (total > 1022)
5036 /* Change the mova into a load, and restart scanning
5037 there. broken_move will then return true for mova. */
5038 fixup_mova (mova);
5039 insn = mova;
5042 if (broken_move (insn)
5043 || (GET_CODE (insn) == INSN
5044 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5046 rtx scan;
5047 /* Scan ahead looking for a barrier to stick the constant table
5048 behind. */
5049 rtx barrier = find_barrier (num_mova, mova, insn);
5050 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5051 int need_aligned_label = 0;
5053 if (num_mova && ! mova_p (mova))
5055 /* find_barrier had to change the first mova into a
5056 pcload; thus, we have to start with this new pcload. */
5057 insn = mova;
5058 num_mova = 0;
5060 /* Now find all the moves between the points and modify them. */
5061 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5063 if (GET_CODE (scan) == CODE_LABEL)
5064 last_float = 0;
5065 if (GET_CODE (scan) == INSN
5066 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5067 need_aligned_label = 1;
5068 if (broken_move (scan))
5070 rtx *patp = &PATTERN (scan), pat = *patp;
5071 rtx src, dst;
5072 rtx lab;
5073 rtx newsrc;
5074 enum machine_mode mode;
5076 if (GET_CODE (pat) == PARALLEL)
5077 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5078 src = SET_SRC (pat);
5079 dst = SET_DEST (pat);
5080 mode = GET_MODE (dst);
5082 if (mode == SImode && hi_const (src)
5083 && REGNO (dst) != FPUL_REG)
5085 int offset = 0;
5087 mode = HImode;
5088 while (GET_CODE (dst) == SUBREG)
5090 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5091 GET_MODE (SUBREG_REG (dst)),
5092 SUBREG_BYTE (dst),
5093 GET_MODE (dst));
5094 dst = SUBREG_REG (dst);
5096 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5098 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5100 /* This must be an insn that clobbers r0. */
5101 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5102 XVECLEN (PATTERN (scan), 0)
5103 - 1);
5104 rtx clobber = *clobberp;
5106 gcc_assert (GET_CODE (clobber) == CLOBBER
5107 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5109 if (last_float
5110 && reg_set_between_p (r0_rtx, last_float_move, scan))
5111 last_float = 0;
5112 if (last_float
5113 && TARGET_SHCOMPACT
5114 && GET_MODE_SIZE (mode) != 4
5115 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5116 last_float = 0;
5117 lab = add_constant (src, mode, last_float);
5118 if (lab)
5119 emit_insn_before (gen_mova (lab), scan);
5120 else
5122 /* There will be a REG_UNUSED note for r0 on
5123 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5124 lest reorg:mark_target_live_regs will not
5125 consider r0 to be used, and we end up with delay
5126 slot insn in front of SCAN that clobbers r0. */
5127 rtx note
5128 = find_regno_note (last_float_move, REG_UNUSED, 0);
5130 /* If we are not optimizing, then there may not be
5131 a note. */
5132 if (note)
5133 PUT_REG_NOTE_KIND (note, REG_INC);
5135 *last_float_addr = r0_inc_rtx;
5137 last_float_move = scan;
5138 last_float = src;
5139 newsrc = gen_const_mem (mode,
5140 (((TARGET_SH4 && ! TARGET_FMOVD)
5141 || REGNO (dst) == FPUL_REG)
5142 ? r0_inc_rtx
5143 : r0_rtx));
5144 last_float_addr = &XEXP (newsrc, 0);
5146 /* Remove the clobber of r0. */
5147 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5148 gen_rtx_SCRATCH (Pmode));
5150 /* This is a mova needing a label. Create it. */
5151 else if (GET_CODE (src) == UNSPEC
5152 && XINT (src, 1) == UNSPEC_MOVA
5153 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5155 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5156 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5157 newsrc = gen_rtx_UNSPEC (SImode,
5158 gen_rtvec (1, newsrc),
5159 UNSPEC_MOVA);
5161 else
5163 lab = add_constant (src, mode, 0);
5164 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5165 newsrc = gen_const_mem (mode, newsrc);
5167 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5168 INSN_CODE (scan) = -1;
5171 dump_table (need_aligned_label ? insn : 0, barrier);
5172 insn = barrier;
5175 free_alloc_pool (label_ref_list_pool);
5176 for (insn = first; insn; insn = NEXT_INSN (insn))
5177 PUT_MODE (insn, VOIDmode);
5179 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5180 INSN_ADDRESSES_FREE ();
5181 split_branches (first);
5183 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5184 also has an effect on the register that holds the address of the sfunc.
5185 Insert an extra dummy insn in front of each sfunc that pretends to
5186 use this register. */
5187 if (flag_delayed_branch)
5189 for (insn = first; insn; insn = NEXT_INSN (insn))
5191 rtx reg = sfunc_uses_reg (insn);
5193 if (! reg)
5194 continue;
5195 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5198 #if 0
5199 /* fpscr is not actually a user variable, but we pretend it is for the
5200 sake of the previous optimization passes, since we want it handled like
5201 one. However, we don't have any debugging information for it, so turn
5202 it into a non-user variable now. */
5203 if (TARGET_SH4)
5204 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5205 #endif
5206 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5210 get_dest_uid (rtx label, int max_uid)
5212 rtx dest = next_real_insn (label);
5213 int dest_uid;
5214 if (! dest)
5215 /* This can happen for an undefined label. */
5216 return 0;
5217 dest_uid = INSN_UID (dest);
5218 /* If this is a newly created branch redirection blocking instruction,
5219 we cannot index the branch_uid or insn_addresses arrays with its
5220 uid. But then, we won't need to, because the actual destination is
5221 the following branch. */
5222 while (dest_uid >= max_uid)
5224 dest = NEXT_INSN (dest);
5225 dest_uid = INSN_UID (dest);
5227 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5228 return 0;
5229 return dest_uid;
5232 /* Split condbranches that are out of range. Also add clobbers for
5233 scratch registers that are needed in far jumps.
5234 We do this before delay slot scheduling, so that it can take our
5235 newly created instructions into account. It also allows us to
5236 find branches with common targets more easily. */
5238 static void
5239 split_branches (rtx first)
5241 rtx insn;
5242 struct far_branch **uid_branch, *far_branch_list = 0;
5243 int max_uid = get_max_uid ();
5244 int ok;
5246 /* Find out which branches are out of range. */
5247 shorten_branches (first);
5249 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5250 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5252 for (insn = first; insn; insn = NEXT_INSN (insn))
5253 if (! INSN_P (insn))
5254 continue;
5255 else if (INSN_DELETED_P (insn))
5257 /* Shorten_branches would split this instruction again,
5258 so transform it into a note. */
5259 SET_INSN_DELETED (insn);
5261 else if (GET_CODE (insn) == JUMP_INSN
5262 /* Don't mess with ADDR_DIFF_VEC */
5263 && (GET_CODE (PATTERN (insn)) == SET
5264 || GET_CODE (PATTERN (insn)) == RETURN))
5266 enum attr_type type = get_attr_type (insn);
5267 if (type == TYPE_CBRANCH)
5269 rtx next, beyond;
5271 if (get_attr_length (insn) > 4)
5273 rtx src = SET_SRC (PATTERN (insn));
5274 rtx olabel = XEXP (XEXP (src, 1), 0);
5275 int addr = INSN_ADDRESSES (INSN_UID (insn));
5276 rtx label = 0;
5277 int dest_uid = get_dest_uid (olabel, max_uid);
5278 struct far_branch *bp = uid_branch[dest_uid];
5280 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5281 the label if the LABEL_NUSES count drops to zero. There is
5282 always a jump_optimize pass that sets these values, but it
5283 proceeds to delete unreferenced code, and then if not
5284 optimizing, to un-delete the deleted instructions, thus
5285 leaving labels with too low uses counts. */
5286 if (! optimize)
5288 JUMP_LABEL (insn) = olabel;
5289 LABEL_NUSES (olabel)++;
5291 if (! bp)
5293 bp = (struct far_branch *) alloca (sizeof *bp);
5294 uid_branch[dest_uid] = bp;
5295 bp->prev = far_branch_list;
5296 far_branch_list = bp;
5297 bp->far_label
5298 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5299 LABEL_NUSES (bp->far_label)++;
5301 else
5303 label = bp->near_label;
5304 if (! label && bp->address - addr >= CONDJUMP_MIN)
5306 rtx block = bp->insert_place;
5308 if (GET_CODE (PATTERN (block)) == RETURN)
5309 block = PREV_INSN (block);
5310 else
5311 block = gen_block_redirect (block,
5312 bp->address, 2);
5313 label = emit_label_after (gen_label_rtx (),
5314 PREV_INSN (block));
5315 bp->near_label = label;
5317 else if (label && ! NEXT_INSN (label))
5319 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5320 bp->insert_place = insn;
5321 else
5322 gen_far_branch (bp);
5325 if (! label
5326 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5328 bp->near_label = label = gen_label_rtx ();
5329 bp->insert_place = insn;
5330 bp->address = addr;
5332 ok = redirect_jump (insn, label, 0);
5333 gcc_assert (ok);
5335 else
5337 /* get_attr_length (insn) == 2 */
5338 /* Check if we have a pattern where reorg wants to redirect
5339 the branch to a label from an unconditional branch that
5340 is too far away. */
5341 /* We can't use JUMP_LABEL here because it might be undefined
5342 when not optimizing. */
5343 /* A syntax error might cause beyond to be NULL_RTX. */
5344 beyond
5345 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5346 0));
5348 if (beyond
5349 && (GET_CODE (beyond) == JUMP_INSN
5350 || ((beyond = next_active_insn (beyond))
5351 && GET_CODE (beyond) == JUMP_INSN))
5352 && GET_CODE (PATTERN (beyond)) == SET
5353 && recog_memoized (beyond) == CODE_FOR_jump_compact
5354 && ((INSN_ADDRESSES
5355 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5356 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5357 > 252 + 258 + 2))
5358 gen_block_redirect (beyond,
5359 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5362 next = next_active_insn (insn);
5364 if ((GET_CODE (next) == JUMP_INSN
5365 || ((next = next_active_insn (next))
5366 && GET_CODE (next) == JUMP_INSN))
5367 && GET_CODE (PATTERN (next)) == SET
5368 && recog_memoized (next) == CODE_FOR_jump_compact
5369 && ((INSN_ADDRESSES
5370 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5371 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5372 > 252 + 258 + 2))
5373 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5375 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5377 int addr = INSN_ADDRESSES (INSN_UID (insn));
5378 rtx far_label = 0;
5379 int dest_uid = 0;
5380 struct far_branch *bp;
5382 if (type == TYPE_JUMP)
5384 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5385 dest_uid = get_dest_uid (far_label, max_uid);
5386 if (! dest_uid)
5388 /* Parse errors can lead to labels outside
5389 the insn stream. */
5390 if (! NEXT_INSN (far_label))
5391 continue;
5393 if (! optimize)
5395 JUMP_LABEL (insn) = far_label;
5396 LABEL_NUSES (far_label)++;
5398 redirect_jump (insn, NULL_RTX, 1);
5399 far_label = 0;
5402 bp = uid_branch[dest_uid];
5403 if (! bp)
5405 bp = (struct far_branch *) alloca (sizeof *bp);
5406 uid_branch[dest_uid] = bp;
5407 bp->prev = far_branch_list;
5408 far_branch_list = bp;
5409 bp->near_label = 0;
5410 bp->far_label = far_label;
5411 if (far_label)
5412 LABEL_NUSES (far_label)++;
5414 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5415 if (addr - bp->address <= CONDJUMP_MAX)
5416 emit_label_after (bp->near_label, PREV_INSN (insn));
5417 else
5419 gen_far_branch (bp);
5420 bp->near_label = 0;
5422 else
5423 bp->near_label = 0;
5424 bp->address = addr;
5425 bp->insert_place = insn;
5426 if (! far_label)
5427 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5428 else
5429 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5432 /* Generate all pending far branches,
5433 and free our references to the far labels. */
5434 while (far_branch_list)
5436 if (far_branch_list->near_label
5437 && ! NEXT_INSN (far_branch_list->near_label))
5438 gen_far_branch (far_branch_list);
5439 if (optimize
5440 && far_branch_list->far_label
5441 && ! --LABEL_NUSES (far_branch_list->far_label))
5442 delete_insn (far_branch_list->far_label);
5443 far_branch_list = far_branch_list->prev;
5446 /* Instruction length information is no longer valid due to the new
5447 instructions that have been generated. */
5448 init_insn_lengths ();
5451 /* Dump out instruction addresses, which is useful for debugging the
5452 constant pool table stuff.
5454 If relaxing, output the label and pseudo-ops used to link together
5455 calls and the instruction which set the registers. */
5457 /* ??? The addresses printed by this routine for insns are nonsense for
5458 insns which are inside of a sequence where none of the inner insns have
5459 variable length. This is because the second pass of shorten_branches
5460 does not bother to update them. */
5462 void
5463 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5464 int noperands ATTRIBUTE_UNUSED)
5466 if (TARGET_DUMPISIZE)
5467 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5469 if (TARGET_RELAX)
5471 rtx note;
5473 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5474 if (note)
5476 rtx pattern;
5478 pattern = PATTERN (insn);
5479 if (GET_CODE (pattern) == PARALLEL)
5480 pattern = XVECEXP (pattern, 0, 0);
5481 switch (GET_CODE (pattern))
5483 case SET:
5484 if (GET_CODE (SET_SRC (pattern)) != CALL
5485 && get_attr_type (insn) != TYPE_SFUNC)
5487 targetm.asm_out.internal_label
5488 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5489 break;
5491 /* else FALLTHROUGH */
5492 case CALL:
5493 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5494 CODE_LABEL_NUMBER (XEXP (note, 0)));
5495 break;
5497 default:
5498 gcc_unreachable ();
5504 /* Dump out any constants accumulated in the final pass. These will
5505 only be labels. */
5507 const char *
5508 output_jump_label_table (void)
5510 int i;
5512 if (pool_size)
5514 fprintf (asm_out_file, "\t.align 2\n");
5515 for (i = 0; i < pool_size; i++)
5517 pool_node *p = &pool_vector[i];
5519 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5520 CODE_LABEL_NUMBER (p->label));
5521 output_asm_insn (".long %O0", &p->value);
5523 pool_size = 0;
5526 return "";
5529 /* A full frame looks like:
5531 arg-5
5532 arg-4
5533 [ if current_function_anonymous_args
5534 arg-3
5535 arg-2
5536 arg-1
5537 arg-0 ]
5538 saved-fp
5539 saved-r10
5540 saved-r11
5541 saved-r12
5542 saved-pr
5543 local-n
5545 local-1
5546 local-0 <- fp points here. */
5548 /* Number of bytes pushed for anonymous args, used to pass information
5549 between expand_prologue and expand_epilogue. */
5551 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5552 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5553 for an epilogue and a negative value means that it's for a sibcall
5554 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5555 all the registers that are about to be restored, and hence dead. */
5557 static void
5558 output_stack_adjust (int size, rtx reg, int epilogue_p,
5559 HARD_REG_SET *live_regs_mask)
5561 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5562 if (size)
5564 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5566 /* This test is bogus, as output_stack_adjust is used to re-align the
5567 stack. */
5568 #if 0
5569 gcc_assert (!(size % align));
5570 #endif
5572 if (CONST_OK_FOR_ADD (size))
5573 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5574 /* Try to do it with two partial adjustments; however, we must make
5575 sure that the stack is properly aligned at all times, in case
5576 an interrupt occurs between the two partial adjustments. */
5577 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5578 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5580 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5581 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5583 else
5585 rtx const_reg;
5586 rtx insn;
5587 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5588 int i;
5590 /* If TEMP is invalid, we could temporarily save a general
5591 register to MACL. However, there is currently no need
5592 to handle this case, so just die when we see it. */
5593 if (epilogue_p < 0
5594 || current_function_interrupt
5595 || ! call_really_used_regs[temp] || fixed_regs[temp])
5596 temp = -1;
5597 if (temp < 0 && ! current_function_interrupt
5598 && (TARGET_SHMEDIA || epilogue_p >= 0))
5600 HARD_REG_SET temps;
5601 COPY_HARD_REG_SET (temps, call_used_reg_set);
5602 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5603 if (epilogue_p > 0)
5605 int nreg = 0;
5606 if (crtl->return_rtx)
5608 enum machine_mode mode;
5609 mode = GET_MODE (crtl->return_rtx);
5610 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5611 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5613 for (i = 0; i < nreg; i++)
5614 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5615 if (crtl->calls_eh_return)
5617 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5618 for (i = 0; i <= 3; i++)
5619 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5622 if (TARGET_SHMEDIA && epilogue_p < 0)
5623 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5624 CLEAR_HARD_REG_BIT (temps, i);
5625 if (epilogue_p <= 0)
5627 for (i = FIRST_PARM_REG;
5628 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5629 CLEAR_HARD_REG_BIT (temps, i);
5630 if (cfun->static_chain_decl != NULL)
5631 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5633 temp = scavenge_reg (&temps);
5635 if (temp < 0 && live_regs_mask)
5637 HARD_REG_SET temps;
5639 COPY_HARD_REG_SET (temps, *live_regs_mask);
5640 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5641 temp = scavenge_reg (&temps);
5643 if (temp < 0)
5645 rtx adj_reg, tmp_reg, mem;
5647 /* If we reached here, the most likely case is the (sibcall)
5648 epilogue for non SHmedia. Put a special push/pop sequence
5649 for such case as the last resort. This looks lengthy but
5650 would not be problem because it seems to be very
5651 rare. */
5653 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5656 /* ??? There is still the slight possibility that r4 or
5657 r5 have been reserved as fixed registers or assigned
5658 as global registers, and they change during an
5659 interrupt. There are possible ways to handle this:
5661 - If we are adjusting the frame pointer (r14), we can do
5662 with a single temp register and an ordinary push / pop
5663 on the stack.
5664 - Grab any call-used or call-saved registers (i.e. not
5665 fixed or globals) for the temps we need. We might
5666 also grab r14 if we are adjusting the stack pointer.
5667 If we can't find enough available registers, issue
5668 a diagnostic and die - the user must have reserved
5669 way too many registers.
5670 But since all this is rather unlikely to happen and
5671 would require extra testing, we just die if r4 / r5
5672 are not available. */
5673 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5674 && !global_regs[4] && !global_regs[5]);
5676 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5677 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5678 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5679 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5680 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5681 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5682 emit_move_insn (mem, tmp_reg);
5683 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5684 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5685 emit_move_insn (mem, tmp_reg);
5686 emit_move_insn (reg, adj_reg);
5687 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5688 emit_move_insn (adj_reg, mem);
5689 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5690 emit_move_insn (tmp_reg, mem);
5691 /* Tell flow the insns that pop r4/r5 aren't dead. */
5692 emit_use (tmp_reg);
5693 emit_use (adj_reg);
5694 return;
5696 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5698 /* If SIZE is negative, subtract the positive value.
5699 This sometimes allows a constant pool entry to be shared
5700 between prologue and epilogue code. */
5701 if (size < 0)
5703 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5704 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5706 else
5708 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5709 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5711 if (! epilogue_p)
5712 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5713 gen_rtx_SET (VOIDmode, reg,
5714 gen_rtx_PLUS (SImode, reg,
5715 GEN_INT (size))));
5720 static rtx
5721 frame_insn (rtx x)
5723 x = emit_insn (x);
5724 RTX_FRAME_RELATED_P (x) = 1;
5725 return x;
5728 /* Output RTL to push register RN onto the stack. */
5730 static rtx
5731 push (int rn)
5733 rtx x;
5734 if (rn == FPUL_REG)
5735 x = gen_push_fpul ();
5736 else if (rn == FPSCR_REG)
5737 x = gen_push_fpscr ();
5738 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5739 && FP_OR_XD_REGISTER_P (rn))
5741 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5742 return NULL_RTX;
5743 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5745 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5746 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5747 else
5748 x = gen_push (gen_rtx_REG (SImode, rn));
5750 x = frame_insn (x);
5751 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
5752 return x;
5755 /* Output RTL to pop register RN from the stack. */
5757 static void
5758 pop (int rn)
5760 rtx x;
5761 if (rn == FPUL_REG)
5762 x = gen_pop_fpul ();
5763 else if (rn == FPSCR_REG)
5764 x = gen_pop_fpscr ();
5765 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5766 && FP_OR_XD_REGISTER_P (rn))
5768 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5769 return;
5770 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5772 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5773 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5774 else
5775 x = gen_pop (gen_rtx_REG (SImode, rn));
5777 x = emit_insn (x);
5778 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
5781 /* Generate code to push the regs specified in the mask. */
5783 static void
5784 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5786 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5787 int skip_fpscr = 0;
5789 /* Push PR last; this gives better latencies after the prologue, and
5790 candidates for the return delay slot when there are no general
5791 registers pushed. */
5792 for (; i < FIRST_PSEUDO_REGISTER; i++)
5794 /* If this is an interrupt handler, and the SZ bit varies,
5795 and we have to push any floating point register, we need
5796 to switch to the correct precision first. */
5797 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5798 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5800 HARD_REG_SET unsaved;
5802 push (FPSCR_REG);
5803 COMPL_HARD_REG_SET (unsaved, *mask);
5804 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5805 skip_fpscr = 1;
5807 if (i != PR_REG
5808 && (i != FPSCR_REG || ! skip_fpscr)
5809 && TEST_HARD_REG_BIT (*mask, i))
5811 /* If the ISR has RESBANK attribute assigned, don't push any of
5812 the following registers - R0-R14, MACH, MACL and GBR. */
5813 if (! (sh_cfun_resbank_handler_p ()
5814 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
5815 || i == MACH_REG
5816 || i == MACL_REG
5817 || i == GBR_REG)))
5818 push (i);
5822 /* Push banked registers last to improve delay slot opportunities. */
5823 if (interrupt_handler)
5824 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5825 if (TEST_HARD_REG_BIT (*mask, i))
5826 push (i);
5828 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
5829 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
5830 push (PR_REG);
5833 /* Calculate how much extra space is needed to save all callee-saved
5834 target registers.
5835 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5837 static int
5838 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5840 int reg;
5841 int stack_space = 0;
5842 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5844 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5845 if ((! call_really_used_regs[reg] || interrupt_handler)
5846 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5847 /* Leave space to save this target register on the stack,
5848 in case target register allocation wants to use it. */
5849 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5850 return stack_space;
5853 /* Decide whether we should reserve space for callee-save target registers,
5854 in case target register allocation wants to use them. REGS_SAVED is
5855 the space, in bytes, that is already required for register saves.
5856 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5858 static int
5859 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5860 HARD_REG_SET *live_regs_mask)
5862 if (optimize_size)
5863 return 0;
5864 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5867 /* Decide how much space to reserve for callee-save target registers
5868 in case target register allocation wants to use them.
5869 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5871 static int
5872 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5874 if (shmedia_space_reserved_for_target_registers)
5875 return shmedia_target_regs_stack_space (live_regs_mask);
5876 else
5877 return 0;
5880 /* Work out the registers which need to be saved, both as a mask and a
5881 count of saved words. Return the count.
5883 If doing a pragma interrupt function, then push all regs used by the
5884 function, and if we call another function (we can tell by looking at PR),
5885 make sure that all the regs it clobbers are safe too. */
5887 static int
5888 calc_live_regs (HARD_REG_SET *live_regs_mask)
5890 unsigned int reg;
5891 int count;
5892 tree attrs;
5893 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5894 bool nosave_low_regs;
5895 int pr_live, has_call;
5897 attrs = DECL_ATTRIBUTES (current_function_decl);
5898 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5899 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5900 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5901 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5903 CLEAR_HARD_REG_SET (*live_regs_mask);
5904 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5905 && df_regs_ever_live_p (FPSCR_REG))
5906 target_flags &= ~MASK_FPU_SINGLE;
5907 /* If we can save a lot of saves by switching to double mode, do that. */
5908 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5909 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5910 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
5911 && (! call_really_used_regs[reg]
5912 || interrupt_handler)
5913 && ++count > 2)
5915 target_flags &= ~MASK_FPU_SINGLE;
5916 break;
5918 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5919 knows how to use it. That means the pseudo originally allocated for
5920 the initial value can become the PR_MEDIA_REG hard register, as seen for
5921 execute/20010122-1.c:test9. */
5922 if (TARGET_SHMEDIA)
5923 /* ??? this function is called from initial_elimination_offset, hence we
5924 can't use the result of sh_media_register_for_return here. */
5925 pr_live = sh_pr_n_sets ();
5926 else
5928 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5929 pr_live = (pr_initial
5930 ? (GET_CODE (pr_initial) != REG
5931 || REGNO (pr_initial) != (PR_REG))
5932 : df_regs_ever_live_p (PR_REG));
5933 /* For Shcompact, if not optimizing, we end up with a memory reference
5934 using the return address pointer for __builtin_return_address even
5935 though there is no actual need to put the PR register on the stack. */
5936 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
5938 /* Force PR to be live if the prologue has to call the SHmedia
5939 argument decoder or register saver. */
5940 if (TARGET_SHCOMPACT
5941 && ((crtl->args.info.call_cookie
5942 & ~ CALL_COOKIE_RET_TRAMP (1))
5943 || crtl->saves_all_registers))
5944 pr_live = 1;
5945 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5946 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5948 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5949 ? pr_live
5950 : interrupt_handler
5951 ? (/* Need to save all the regs ever live. */
5952 (df_regs_ever_live_p (reg)
5953 || (call_really_used_regs[reg]
5954 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5955 || reg == PIC_OFFSET_TABLE_REGNUM)
5956 && has_call)
5957 || (TARGET_SHMEDIA && has_call
5958 && REGISTER_NATURAL_MODE (reg) == SImode
5959 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5960 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5961 && reg != RETURN_ADDRESS_POINTER_REGNUM
5962 && reg != T_REG && reg != GBR_REG
5963 /* Push fpscr only on targets which have FPU */
5964 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5965 : (/* Only push those regs which are used and need to be saved. */
5966 (TARGET_SHCOMPACT
5967 && flag_pic
5968 && crtl->args.info.call_cookie
5969 && reg == PIC_OFFSET_TABLE_REGNUM)
5970 || (df_regs_ever_live_p (reg)
5971 && ((!call_really_used_regs[reg]
5972 && !(reg != PIC_OFFSET_TABLE_REGNUM
5973 && fixed_regs[reg] && call_used_regs[reg]))
5974 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5975 || (crtl->calls_eh_return
5976 && (reg == EH_RETURN_DATA_REGNO (0)
5977 || reg == EH_RETURN_DATA_REGNO (1)
5978 || reg == EH_RETURN_DATA_REGNO (2)
5979 || reg == EH_RETURN_DATA_REGNO (3)))
5980 || ((reg == MACL_REG || reg == MACH_REG)
5981 && df_regs_ever_live_p (reg)
5982 && sh_cfun_attr_renesas_p ())
5985 SET_HARD_REG_BIT (*live_regs_mask, reg);
5986 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5988 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5989 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5991 if (FP_REGISTER_P (reg))
5993 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
5995 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5996 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5999 else if (XD_REGISTER_P (reg))
6001 /* Must switch to double mode to access these registers. */
6002 target_flags &= ~MASK_FPU_SINGLE;
6006 if (nosave_low_regs && reg == R8_REG)
6007 break;
6009 /* If we have a target register optimization pass after prologue / epilogue
6010 threading, we need to assume all target registers will be live even if
6011 they aren't now. */
6012 if (flag_branch_target_load_optimize2
6013 && TARGET_SAVE_ALL_TARGET_REGS
6014 && shmedia_space_reserved_for_target_registers)
6015 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6016 if ((! call_really_used_regs[reg] || interrupt_handler)
6017 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6019 SET_HARD_REG_BIT (*live_regs_mask, reg);
6020 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6022 /* If this is an interrupt handler, we don't have any call-clobbered
6023 registers we can conveniently use for target register save/restore.
6024 Make sure we save at least one general purpose register when we need
6025 to save target registers. */
6026 if (interrupt_handler
6027 && hard_reg_set_intersect_p (*live_regs_mask,
6028 reg_class_contents[TARGET_REGS])
6029 && ! hard_reg_set_intersect_p (*live_regs_mask,
6030 reg_class_contents[GENERAL_REGS]))
6032 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6033 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6036 return count;
6039 /* Code to generate prologue and epilogue sequences */
6041 /* PUSHED is the number of bytes that are being pushed on the
6042 stack for register saves. Return the frame size, padded
6043 appropriately so that the stack stays properly aligned. */
6044 static HOST_WIDE_INT
6045 rounded_frame_size (int pushed)
6047 HOST_WIDE_INT size = get_frame_size ();
6048 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6050 return ((size + pushed + align - 1) & -align) - pushed;
6053 /* Choose a call-clobbered target-branch register that remains
6054 unchanged along the whole function. We set it up as the return
6055 value in the prologue. */
6057 sh_media_register_for_return (void)
6059 int regno;
6060 int tr0_used;
6062 if (! current_function_is_leaf)
6063 return -1;
6064 if (lookup_attribute ("interrupt_handler",
6065 DECL_ATTRIBUTES (current_function_decl)))
6066 return -1;
6067 if (sh_cfun_interrupt_handler_p ())
6068 return -1;
6070 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6072 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6073 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6074 return regno;
6076 return -1;
6079 /* The maximum registers we need to save are:
6080 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6081 - 32 floating point registers (for each pair, we save none,
6082 one single precision value, or a double precision value).
6083 - 8 target registers
6084 - add 1 entry for a delimiter. */
6085 #define MAX_SAVED_REGS (62+32+8)
6087 typedef struct save_entry_s
6089 unsigned char reg;
6090 unsigned char mode;
6091 short offset;
6092 } save_entry;
6094 #define MAX_TEMPS 4
6096 /* There will be a delimiter entry with VOIDmode both at the start and the
6097 end of a filled in schedule. The end delimiter has the offset of the
6098 save with the smallest (i.e. most negative) offset. */
6099 typedef struct save_schedule_s
6101 save_entry entries[MAX_SAVED_REGS + 2];
6102 int temps[MAX_TEMPS+1];
6103 } save_schedule;
6105 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6106 use reverse order. Returns the last entry written to (not counting
6107 the delimiter). OFFSET_BASE is a number to be added to all offset
6108 entries. */
6110 static save_entry *
6111 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6112 int offset_base)
6114 int align, i;
6115 save_entry *entry = schedule->entries;
6116 int tmpx = 0;
6117 int offset;
6119 if (! current_function_interrupt)
6120 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6121 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6122 && ! FUNCTION_ARG_REGNO_P (i)
6123 && i != FIRST_RET_REG
6124 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6125 && ! (crtl->calls_eh_return
6126 && (i == EH_RETURN_STACKADJ_REGNO
6127 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6128 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6129 schedule->temps[tmpx++] = i;
6130 entry->reg = -1;
6131 entry->mode = VOIDmode;
6132 entry->offset = offset_base;
6133 entry++;
6134 /* We loop twice: first, we save 8-byte aligned registers in the
6135 higher addresses, that are known to be aligned. Then, we
6136 proceed to saving 32-bit registers that don't need 8-byte
6137 alignment.
6138 If this is an interrupt function, all registers that need saving
6139 need to be saved in full. moreover, we need to postpone saving
6140 target registers till we have saved some general purpose registers
6141 we can then use as scratch registers. */
6142 offset = offset_base;
6143 for (align = 1; align >= 0; align--)
6145 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6146 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6148 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6149 int reg = i;
6151 if (current_function_interrupt)
6153 if (TARGET_REGISTER_P (i))
6154 continue;
6155 if (GENERAL_REGISTER_P (i))
6156 mode = DImode;
6158 if (mode == SFmode && (i % 2) == 1
6159 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6160 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6162 mode = DFmode;
6163 i--;
6164 reg--;
6167 /* If we're doing the aligned pass and this is not aligned,
6168 or we're doing the unaligned pass and this is aligned,
6169 skip it. */
6170 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6171 != align)
6172 continue;
6174 if (current_function_interrupt
6175 && GENERAL_REGISTER_P (i)
6176 && tmpx < MAX_TEMPS)
6177 schedule->temps[tmpx++] = i;
6179 offset -= GET_MODE_SIZE (mode);
6180 entry->reg = i;
6181 entry->mode = mode;
6182 entry->offset = offset;
6183 entry++;
6185 if (align && current_function_interrupt)
6186 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6187 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6189 offset -= GET_MODE_SIZE (DImode);
6190 entry->reg = i;
6191 entry->mode = DImode;
6192 entry->offset = offset;
6193 entry++;
6196 entry->reg = -1;
6197 entry->mode = VOIDmode;
6198 entry->offset = offset;
6199 schedule->temps[tmpx] = -1;
6200 return entry - 1;
6203 void
6204 sh_expand_prologue (void)
6206 HARD_REG_SET live_regs_mask;
6207 int d, i;
6208 int d_rounding = 0;
6209 int save_flags = target_flags;
6210 int pretend_args;
6211 tree sp_switch_attr
6212 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6214 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6216 /* We have pretend args if we had an object sent partially in registers
6217 and partially on the stack, e.g. a large structure. */
6218 pretend_args = crtl->args.pretend_args_size;
6219 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6220 && (NPARM_REGS(SImode)
6221 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6222 pretend_args = 0;
6223 output_stack_adjust (-pretend_args
6224 - crtl->args.info.stack_regs * 8,
6225 stack_pointer_rtx, 0, NULL);
6227 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6228 /* We're going to use the PIC register to load the address of the
6229 incoming-argument decoder and/or of the return trampoline from
6230 the GOT, so make sure the PIC register is preserved and
6231 initialized. */
6232 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6234 if (TARGET_SHCOMPACT
6235 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6237 int reg;
6239 /* First, make all registers with incoming arguments that will
6240 be pushed onto the stack live, so that register renaming
6241 doesn't overwrite them. */
6242 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6243 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6244 >= NPARM_REGS (SImode) - reg)
6245 for (; reg < NPARM_REGS (SImode); reg++)
6246 emit_insn (gen_shcompact_preserve_incoming_args
6247 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6248 else if (CALL_COOKIE_INT_REG_GET
6249 (crtl->args.info.call_cookie, reg) == 1)
6250 emit_insn (gen_shcompact_preserve_incoming_args
6251 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6253 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6254 stack_pointer_rtx);
6255 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6256 GEN_INT (crtl->args.info.call_cookie));
6257 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6258 gen_rtx_REG (SImode, R0_REG));
6260 else if (TARGET_SHMEDIA)
6262 int tr = sh_media_register_for_return ();
6264 if (tr >= 0)
6265 emit_move_insn (gen_rtx_REG (DImode, tr),
6266 gen_rtx_REG (DImode, PR_MEDIA_REG));
6269 /* Emit the code for SETUP_VARARGS. */
6270 if (cfun->stdarg)
6272 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6274 /* Push arg regs as if they'd been provided by caller in stack. */
6275 for (i = 0; i < NPARM_REGS(SImode); i++)
6277 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6278 rtx insn;
6280 if (i >= (NPARM_REGS(SImode)
6281 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6283 break;
6284 insn = push (rn);
6289 /* If we're supposed to switch stacks at function entry, do so now. */
6290 if (sp_switch_attr)
6292 /* The argument specifies a variable holding the address of the
6293 stack the interrupt function should switch to/from at entry/exit. */
6294 const char *s
6295 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6296 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6298 emit_insn (gen_sp_switch_1 (sp_switch));
6301 d = calc_live_regs (&live_regs_mask);
6302 /* ??? Maybe we could save some switching if we can move a mode switch
6303 that already happens to be at the function start into the prologue. */
6304 if (target_flags != save_flags && ! current_function_interrupt)
6305 emit_insn (gen_toggle_sz ());
6307 if (TARGET_SH5)
6309 int offset_base, offset;
6310 rtx r0 = NULL_RTX;
6311 int offset_in_r0 = -1;
6312 int sp_in_r0 = 0;
6313 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6314 int total_size, save_size;
6315 save_schedule schedule;
6316 save_entry *entry;
6317 int *tmp_pnt;
6319 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6320 && ! current_function_interrupt)
6321 r0 = gen_rtx_REG (Pmode, R0_REG);
6323 /* D is the actual number of bytes that we need for saving registers,
6324 however, in initial_elimination_offset we have committed to using
6325 an additional TREGS_SPACE amount of bytes - in order to keep both
6326 addresses to arguments supplied by the caller and local variables
6327 valid, we must keep this gap. Place it between the incoming
6328 arguments and the actually saved registers in a bid to optimize
6329 locality of reference. */
6330 total_size = d + tregs_space;
6331 total_size += rounded_frame_size (total_size);
6332 save_size = total_size - rounded_frame_size (d);
6333 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6334 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6335 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6337 /* If adjusting the stack in a single step costs nothing extra, do so.
6338 I.e. either if a single addi is enough, or we need a movi anyway,
6339 and we don't exceed the maximum offset range (the test for the
6340 latter is conservative for simplicity). */
6341 if (TARGET_SHMEDIA
6342 && (CONST_OK_FOR_I10 (-total_size)
6343 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6344 && total_size <= 2044)))
6345 d_rounding = total_size - save_size;
6347 offset_base = d + d_rounding;
6349 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6350 0, NULL);
6352 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6353 tmp_pnt = schedule.temps;
6354 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6356 enum machine_mode mode = (enum machine_mode) entry->mode;
6357 unsigned int reg = entry->reg;
6358 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6359 rtx orig_reg_rtx;
6361 offset = entry->offset;
6363 reg_rtx = gen_rtx_REG (mode, reg);
6365 mem_rtx = gen_frame_mem (mode,
6366 gen_rtx_PLUS (Pmode,
6367 stack_pointer_rtx,
6368 GEN_INT (offset)));
6370 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6372 gcc_assert (r0);
6373 mem_rtx = NULL_RTX;
6376 if (HAVE_PRE_DECREMENT
6377 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6378 || mem_rtx == NULL_RTX
6379 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6381 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6383 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6384 pre_dec = NULL_RTX;
6385 else
6387 mem_rtx = NULL_RTX;
6388 offset += GET_MODE_SIZE (mode);
6392 if (mem_rtx != NULL_RTX)
6393 goto addr_ok;
6395 if (offset_in_r0 == -1)
6397 emit_move_insn (r0, GEN_INT (offset));
6398 offset_in_r0 = offset;
6400 else if (offset != offset_in_r0)
6402 emit_move_insn (r0,
6403 gen_rtx_PLUS
6404 (Pmode, r0,
6405 GEN_INT (offset - offset_in_r0)));
6406 offset_in_r0 += offset - offset_in_r0;
6409 if (pre_dec != NULL_RTX)
6411 if (! sp_in_r0)
6413 emit_move_insn (r0,
6414 gen_rtx_PLUS
6415 (Pmode, r0, stack_pointer_rtx));
6416 sp_in_r0 = 1;
6419 offset -= GET_MODE_SIZE (mode);
6420 offset_in_r0 -= GET_MODE_SIZE (mode);
6422 mem_rtx = pre_dec;
6424 else if (sp_in_r0)
6425 mem_rtx = gen_frame_mem (mode, r0);
6426 else
6427 mem_rtx = gen_frame_mem (mode,
6428 gen_rtx_PLUS (Pmode,
6429 stack_pointer_rtx,
6430 r0));
6432 /* We must not use an r0-based address for target-branch
6433 registers or for special registers without pre-dec
6434 memory addresses, since we store their values in r0
6435 first. */
6436 gcc_assert (!TARGET_REGISTER_P (reg)
6437 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6438 || mem_rtx == pre_dec));
6440 addr_ok:
6441 orig_reg_rtx = reg_rtx;
6442 if (TARGET_REGISTER_P (reg)
6443 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6444 && mem_rtx != pre_dec))
6446 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6448 emit_move_insn (tmp_reg, reg_rtx);
6450 if (REGNO (tmp_reg) == R0_REG)
6452 offset_in_r0 = -1;
6453 sp_in_r0 = 0;
6454 gcc_assert (!refers_to_regno_p
6455 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6458 if (*++tmp_pnt <= 0)
6459 tmp_pnt = schedule.temps;
6461 reg_rtx = tmp_reg;
6464 rtx insn;
6466 /* Mark as interesting for dwarf cfi generator */
6467 insn = emit_move_insn (mem_rtx, reg_rtx);
6468 RTX_FRAME_RELATED_P (insn) = 1;
6469 /* If we use an intermediate register for the save, we can't
6470 describe this exactly in cfi as a copy of the to-be-saved
6471 register into the temporary register and then the temporary
6472 register on the stack, because the temporary register can
6473 have a different natural size than the to-be-saved register.
6474 Thus, we gloss over the intermediate copy and pretend we do
6475 a direct save from the to-be-saved register. */
6476 if (REGNO (reg_rtx) != reg)
6478 rtx set;
6480 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6481 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6484 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6486 rtx reg_rtx = gen_rtx_REG (mode, reg);
6487 rtx set;
6488 rtx mem_rtx = gen_frame_mem (mode,
6489 gen_rtx_PLUS (Pmode,
6490 stack_pointer_rtx,
6491 GEN_INT (offset)));
6493 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6494 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6499 gcc_assert (entry->offset == d_rounding);
6501 else
6502 push_regs (&live_regs_mask, current_function_interrupt);
6504 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6505 emit_insn (gen_GOTaddr2picreg ());
6507 if (SHMEDIA_REGS_STACK_ADJUST ())
6509 /* This must NOT go through the PLT, otherwise mach and macl
6510 may be clobbered. */
6511 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6512 (TARGET_FPU_ANY
6513 ? "__GCC_push_shmedia_regs"
6514 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6515 emit_insn (gen_shmedia_save_restore_regs_compact
6516 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6519 if (target_flags != save_flags && ! current_function_interrupt)
6520 emit_insn (gen_toggle_sz ());
6522 target_flags = save_flags;
6524 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6525 stack_pointer_rtx, 0, NULL);
6527 if (frame_pointer_needed)
6528 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6530 if (TARGET_SHCOMPACT
6531 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6533 /* This must NOT go through the PLT, otherwise mach and macl
6534 may be clobbered. */
6535 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6536 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6537 emit_insn (gen_shcompact_incoming_args ());
6541 void
6542 sh_expand_epilogue (bool sibcall_p)
6544 HARD_REG_SET live_regs_mask;
6545 int d, i;
6546 int d_rounding = 0;
6548 int save_flags = target_flags;
6549 int frame_size, save_size;
6550 int fpscr_deferred = 0;
6551 int e = sibcall_p ? -1 : 1;
6553 d = calc_live_regs (&live_regs_mask);
6555 save_size = d;
6556 frame_size = rounded_frame_size (d);
6558 if (TARGET_SH5)
6560 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6561 int total_size;
6562 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6563 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6564 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6566 total_size = d + tregs_space;
6567 total_size += rounded_frame_size (total_size);
6568 save_size = total_size - frame_size;
6570 /* If adjusting the stack in a single step costs nothing extra, do so.
6571 I.e. either if a single addi is enough, or we need a movi anyway,
6572 and we don't exceed the maximum offset range (the test for the
6573 latter is conservative for simplicity). */
6574 if (TARGET_SHMEDIA
6575 && ! frame_pointer_needed
6576 && (CONST_OK_FOR_I10 (total_size)
6577 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6578 && total_size <= 2044)))
6579 d_rounding = frame_size;
6581 frame_size -= d_rounding;
6584 if (frame_pointer_needed)
6586 /* We must avoid scheduling the epilogue with previous basic blocks
6587 when exception handling is enabled. See PR/18032. */
6588 if (flag_exceptions)
6589 emit_insn (gen_blockage ());
6590 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6591 &live_regs_mask);
6593 /* We must avoid moving the stack pointer adjustment past code
6594 which reads from the local frame, else an interrupt could
6595 occur after the SP adjustment and clobber data in the local
6596 frame. */
6597 emit_insn (gen_blockage ());
6598 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6600 else if (frame_size)
6602 /* We must avoid moving the stack pointer adjustment past code
6603 which reads from the local frame, else an interrupt could
6604 occur after the SP adjustment and clobber data in the local
6605 frame. */
6606 emit_insn (gen_blockage ());
6607 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6610 if (SHMEDIA_REGS_STACK_ADJUST ())
6612 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6613 (TARGET_FPU_ANY
6614 ? "__GCC_pop_shmedia_regs"
6615 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6616 /* This must NOT go through the PLT, otherwise mach and macl
6617 may be clobbered. */
6618 emit_insn (gen_shmedia_save_restore_regs_compact
6619 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6622 /* Pop all the registers. */
6624 if (target_flags != save_flags && ! current_function_interrupt)
6625 emit_insn (gen_toggle_sz ());
6626 if (TARGET_SH5)
6628 int offset_base, offset;
6629 int offset_in_r0 = -1;
6630 int sp_in_r0 = 0;
6631 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6632 save_schedule schedule;
6633 save_entry *entry;
6634 int *tmp_pnt;
6636 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6637 offset_base = -entry[1].offset + d_rounding;
6638 tmp_pnt = schedule.temps;
6639 for (; entry->mode != VOIDmode; entry--)
6641 enum machine_mode mode = (enum machine_mode) entry->mode;
6642 int reg = entry->reg;
6643 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6645 offset = offset_base + entry->offset;
6646 reg_rtx = gen_rtx_REG (mode, reg);
6648 mem_rtx = gen_frame_mem (mode,
6649 gen_rtx_PLUS (Pmode,
6650 stack_pointer_rtx,
6651 GEN_INT (offset)));
6653 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6654 mem_rtx = NULL_RTX;
6656 if (HAVE_POST_INCREMENT
6657 && (offset == offset_in_r0
6658 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6659 && mem_rtx == NULL_RTX)
6660 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6662 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6664 if (!memory_address_p (mode, XEXP (post_inc, 0)))
6665 post_inc = NULL_RTX;
6666 else
6667 mem_rtx = NULL_RTX;
6670 if (mem_rtx != NULL_RTX)
6671 goto addr_ok;
6673 if (offset_in_r0 == -1)
6675 emit_move_insn (r0, GEN_INT (offset));
6676 offset_in_r0 = offset;
6678 else if (offset != offset_in_r0)
6680 emit_move_insn (r0,
6681 gen_rtx_PLUS
6682 (Pmode, r0,
6683 GEN_INT (offset - offset_in_r0)));
6684 offset_in_r0 += offset - offset_in_r0;
6687 if (post_inc != NULL_RTX)
6689 if (! sp_in_r0)
6691 emit_move_insn (r0,
6692 gen_rtx_PLUS
6693 (Pmode, r0, stack_pointer_rtx));
6694 sp_in_r0 = 1;
6697 mem_rtx = post_inc;
6699 offset_in_r0 += GET_MODE_SIZE (mode);
6701 else if (sp_in_r0)
6702 mem_rtx = gen_frame_mem (mode, r0);
6703 else
6704 mem_rtx = gen_frame_mem (mode,
6705 gen_rtx_PLUS (Pmode,
6706 stack_pointer_rtx,
6707 r0));
6709 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6710 || mem_rtx == post_inc);
6712 addr_ok:
6713 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6714 && mem_rtx != post_inc)
6716 insn = emit_move_insn (r0, mem_rtx);
6717 mem_rtx = r0;
6719 else if (TARGET_REGISTER_P (reg))
6721 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6723 /* Give the scheduler a bit of freedom by using up to
6724 MAX_TEMPS registers in a round-robin fashion. */
6725 insn = emit_move_insn (tmp_reg, mem_rtx);
6726 mem_rtx = tmp_reg;
6727 if (*++tmp_pnt < 0)
6728 tmp_pnt = schedule.temps;
6731 insn = emit_move_insn (reg_rtx, mem_rtx);
6734 gcc_assert (entry->offset + offset_base == d + d_rounding);
6736 else /* ! TARGET_SH5 */
6738 int last_reg;
6740 save_size = 0;
6741 /* For an ISR with RESBANK attribute assigned, don't pop PR
6742 register. */
6743 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
6744 && !sh_cfun_resbank_handler_p ())
6746 if (!frame_pointer_needed)
6747 emit_insn (gen_blockage ());
6748 pop (PR_REG);
6751 /* Banked registers are poped first to avoid being scheduled in the
6752 delay slot. RTE switches banks before the ds instruction. */
6753 if (current_function_interrupt)
6755 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6756 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6757 pop (LAST_BANKED_REG - i);
6759 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6761 else
6762 last_reg = FIRST_PSEUDO_REGISTER;
6764 for (i = 0; i < last_reg; i++)
6766 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6768 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6769 && hard_reg_set_intersect_p (live_regs_mask,
6770 reg_class_contents[DF_REGS]))
6771 fpscr_deferred = 1;
6772 /* For an ISR with RESBANK attribute assigned, don't pop
6773 following registers, R0-R14, MACH, MACL and GBR. */
6774 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
6775 && ! (sh_cfun_resbank_handler_p ()
6776 && ((j >= FIRST_GENERAL_REG
6777 && j < LAST_GENERAL_REG)
6778 || j == MACH_REG
6779 || j == MACL_REG
6780 || j == GBR_REG)))
6781 pop (j);
6783 if (j == FIRST_FP_REG && fpscr_deferred)
6784 pop (FPSCR_REG);
6787 if (target_flags != save_flags && ! current_function_interrupt)
6788 emit_insn (gen_toggle_sz ());
6789 target_flags = save_flags;
6791 output_stack_adjust (crtl->args.pretend_args_size
6792 + save_size + d_rounding
6793 + crtl->args.info.stack_regs * 8,
6794 stack_pointer_rtx, e, NULL);
6796 if (crtl->calls_eh_return)
6797 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6798 EH_RETURN_STACKADJ_RTX));
6800 /* Switch back to the normal stack if necessary. */
6801 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6802 emit_insn (gen_sp_switch_2 ());
6804 /* Tell flow the insn that pops PR isn't dead. */
6805 /* PR_REG will never be live in SHmedia mode, and we don't need to
6806 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6807 by the return pattern. */
6808 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6809 emit_use (gen_rtx_REG (SImode, PR_REG));
6812 static int sh_need_epilogue_known = 0;
6815 sh_need_epilogue (void)
6817 if (! sh_need_epilogue_known)
6819 rtx epilogue;
6821 start_sequence ();
6822 sh_expand_epilogue (0);
6823 epilogue = get_insns ();
6824 end_sequence ();
6825 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6827 return sh_need_epilogue_known > 0;
6830 /* Emit code to change the current function's return address to RA.
6831 TEMP is available as a scratch register, if needed. */
6833 void
6834 sh_set_return_address (rtx ra, rtx tmp)
6836 HARD_REG_SET live_regs_mask;
6837 int d;
6838 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6839 int pr_offset;
6841 d = calc_live_regs (&live_regs_mask);
6843 /* If pr_reg isn't life, we can set it (or the register given in
6844 sh_media_register_for_return) directly. */
6845 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6847 rtx rr;
6849 if (TARGET_SHMEDIA)
6851 int rr_regno = sh_media_register_for_return ();
6853 if (rr_regno < 0)
6854 rr_regno = pr_reg;
6856 rr = gen_rtx_REG (DImode, rr_regno);
6858 else
6859 rr = gen_rtx_REG (SImode, pr_reg);
6861 emit_insn (GEN_MOV (rr, ra));
6862 /* Tell flow the register for return isn't dead. */
6863 emit_use (rr);
6864 return;
6867 if (TARGET_SH5)
6869 int offset;
6870 save_schedule schedule;
6871 save_entry *entry;
6873 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6874 offset = entry[1].offset;
6875 for (; entry->mode != VOIDmode; entry--)
6876 if (entry->reg == pr_reg)
6877 goto found;
6879 /* We can't find pr register. */
6880 gcc_unreachable ();
6882 found:
6883 offset = entry->offset - offset;
6884 pr_offset = (rounded_frame_size (d) + offset
6885 + SHMEDIA_REGS_STACK_ADJUST ());
6887 else
6888 pr_offset = rounded_frame_size (d);
6890 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6891 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6893 tmp = gen_frame_mem (Pmode, tmp);
6894 emit_insn (GEN_MOV (tmp, ra));
6897 /* Clear variables at function end. */
6899 static void
6900 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6901 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6903 sh_need_epilogue_known = 0;
6906 static rtx
6907 sh_builtin_saveregs (void)
6909 /* First unnamed integer register. */
6910 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
6911 /* Number of integer registers we need to save. */
6912 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6913 /* First unnamed SFmode float reg */
6914 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
6915 /* Number of SFmode float regs to save. */
6916 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6917 rtx regbuf, fpregs;
6918 int bufsize, regno;
6919 alias_set_type alias_set;
6921 if (TARGET_SH5)
6923 if (n_intregs)
6925 int pushregs = n_intregs;
6927 while (pushregs < NPARM_REGS (SImode) - 1
6928 && (CALL_COOKIE_INT_REG_GET
6929 (crtl->args.info.call_cookie,
6930 NPARM_REGS (SImode) - pushregs)
6931 == 1))
6933 crtl->args.info.call_cookie
6934 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6935 - pushregs, 1);
6936 pushregs++;
6939 if (pushregs == NPARM_REGS (SImode))
6940 crtl->args.info.call_cookie
6941 |= (CALL_COOKIE_INT_REG (0, 1)
6942 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6943 else
6944 crtl->args.info.call_cookie
6945 |= CALL_COOKIE_STACKSEQ (pushregs);
6947 crtl->args.pretend_args_size += 8 * n_intregs;
6949 if (TARGET_SHCOMPACT)
6950 return const0_rtx;
6953 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6955 error ("__builtin_saveregs not supported by this subtarget");
6956 return const0_rtx;
6959 if (TARGET_SHMEDIA)
6960 n_floatregs = 0;
6962 /* Allocate block of memory for the regs. */
6963 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6964 Or can assign_stack_local accept a 0 SIZE argument? */
6965 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6967 if (TARGET_SHMEDIA)
6968 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6969 else if (n_floatregs & 1)
6971 rtx addr;
6973 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6974 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6975 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6976 regbuf = change_address (regbuf, BLKmode, addr);
6978 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6980 rtx addr, mask;
6982 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6983 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6984 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6985 emit_insn (gen_andsi3 (addr, addr, mask));
6986 regbuf = change_address (regbuf, BLKmode, addr);
6988 else
6989 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6990 alias_set = get_varargs_alias_set ();
6991 set_mem_alias_set (regbuf, alias_set);
6993 /* Save int args.
6994 This is optimized to only save the regs that are necessary. Explicitly
6995 named args need not be saved. */
6996 if (n_intregs > 0)
6997 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6998 adjust_address (regbuf, BLKmode,
6999 n_floatregs * UNITS_PER_WORD),
7000 n_intregs);
7002 if (TARGET_SHMEDIA)
7003 /* Return the address of the regbuf. */
7004 return XEXP (regbuf, 0);
7006 /* Save float args.
7007 This is optimized to only save the regs that are necessary. Explicitly
7008 named args need not be saved.
7009 We explicitly build a pointer to the buffer because it halves the insn
7010 count when not optimizing (otherwise the pointer is built for each reg
7011 saved).
7012 We emit the moves in reverse order so that we can use predecrement. */
7014 fpregs = copy_to_mode_reg (Pmode,
7015 plus_constant (XEXP (regbuf, 0),
7016 n_floatregs * UNITS_PER_WORD));
7017 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7019 rtx mem;
7020 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7022 emit_insn (gen_addsi3 (fpregs, fpregs,
7023 GEN_INT (-2 * UNITS_PER_WORD)));
7024 mem = change_address (regbuf, DFmode, fpregs);
7025 emit_move_insn (mem,
7026 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7028 regno = first_floatreg;
7029 if (regno & 1)
7031 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7032 mem = change_address (regbuf, SFmode, fpregs);
7033 emit_move_insn (mem,
7034 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7035 - (TARGET_LITTLE_ENDIAN != 0)));
7038 else
7039 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7041 rtx mem;
7043 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7044 mem = change_address (regbuf, SFmode, fpregs);
7045 emit_move_insn (mem,
7046 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7049 /* Return the address of the regbuf. */
7050 return XEXP (regbuf, 0);
7053 /* Define the `__builtin_va_list' type for the ABI. */
7055 static tree
7056 sh_build_builtin_va_list (void)
7058 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7059 tree record;
7061 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7062 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7063 return ptr_type_node;
7065 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7067 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7068 ptr_type_node);
7069 f_next_o_limit = build_decl (FIELD_DECL,
7070 get_identifier ("__va_next_o_limit"),
7071 ptr_type_node);
7072 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7073 ptr_type_node);
7074 f_next_fp_limit = build_decl (FIELD_DECL,
7075 get_identifier ("__va_next_fp_limit"),
7076 ptr_type_node);
7077 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7078 ptr_type_node);
7080 DECL_FIELD_CONTEXT (f_next_o) = record;
7081 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7082 DECL_FIELD_CONTEXT (f_next_fp) = record;
7083 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7084 DECL_FIELD_CONTEXT (f_next_stack) = record;
7086 TYPE_FIELDS (record) = f_next_o;
7087 TREE_CHAIN (f_next_o) = f_next_o_limit;
7088 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7089 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7090 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7092 layout_type (record);
7094 return record;
7097 /* Implement `va_start' for varargs and stdarg. */
7099 static void
7100 sh_va_start (tree valist, rtx nextarg)
7102 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7103 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7104 tree t, u;
7105 int nfp, nint;
7107 if (TARGET_SH5)
7109 expand_builtin_saveregs ();
7110 std_expand_builtin_va_start (valist, nextarg);
7111 return;
7114 if ((! TARGET_SH2E && ! TARGET_SH4)
7115 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7117 std_expand_builtin_va_start (valist, nextarg);
7118 return;
7121 f_next_o = TYPE_FIELDS (va_list_type_node);
7122 f_next_o_limit = TREE_CHAIN (f_next_o);
7123 f_next_fp = TREE_CHAIN (f_next_o_limit);
7124 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7125 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7127 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7128 NULL_TREE);
7129 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7130 valist, f_next_o_limit, NULL_TREE);
7131 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7132 NULL_TREE);
7133 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7134 valist, f_next_fp_limit, NULL_TREE);
7135 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7136 valist, f_next_stack, NULL_TREE);
7138 /* Call __builtin_saveregs. */
7139 u = make_tree (sizetype, expand_builtin_saveregs ());
7140 u = fold_convert (ptr_type_node, u);
7141 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7142 TREE_SIDE_EFFECTS (t) = 1;
7143 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7145 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7146 if (nfp < 8)
7147 nfp = 8 - nfp;
7148 else
7149 nfp = 0;
7150 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7151 size_int (UNITS_PER_WORD * nfp));
7152 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7153 TREE_SIDE_EFFECTS (t) = 1;
7154 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7156 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7157 TREE_SIDE_EFFECTS (t) = 1;
7158 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7160 nint = crtl->args.info.arg_count[SH_ARG_INT];
7161 if (nint < 4)
7162 nint = 4 - nint;
7163 else
7164 nint = 0;
7165 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7166 size_int (UNITS_PER_WORD * nint));
7167 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7168 TREE_SIDE_EFFECTS (t) = 1;
7169 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7171 u = make_tree (ptr_type_node, nextarg);
7172 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7173 TREE_SIDE_EFFECTS (t) = 1;
7174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7177 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7178 member, return it. */
7179 static tree
7180 find_sole_member (tree type)
7182 tree field, member = NULL_TREE;
7184 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7186 if (TREE_CODE (field) != FIELD_DECL)
7187 continue;
7188 if (!DECL_SIZE (field))
7189 return NULL_TREE;
7190 if (integer_zerop (DECL_SIZE (field)))
7191 continue;
7192 if (member)
7193 return NULL_TREE;
7194 member = field;
7196 return member;
7198 /* Implement `va_arg'. */
7200 static tree
7201 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7202 gimple_seq *post_p ATTRIBUTE_UNUSED)
7204 HOST_WIDE_INT size, rsize;
7205 tree tmp, pptr_type_node;
7206 tree addr, lab_over = NULL, result = NULL;
7207 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7208 tree eff_type;
7210 if (pass_by_ref)
7211 type = build_pointer_type (type);
7213 size = int_size_in_bytes (type);
7214 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7215 pptr_type_node = build_pointer_type (ptr_type_node);
7217 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7218 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7220 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7221 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7222 int pass_as_float;
7223 tree lab_false;
7224 tree member;
7226 f_next_o = TYPE_FIELDS (va_list_type_node);
7227 f_next_o_limit = TREE_CHAIN (f_next_o);
7228 f_next_fp = TREE_CHAIN (f_next_o_limit);
7229 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7230 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7232 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7233 NULL_TREE);
7234 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7235 valist, f_next_o_limit, NULL_TREE);
7236 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7237 valist, f_next_fp, NULL_TREE);
7238 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7239 valist, f_next_fp_limit, NULL_TREE);
7240 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7241 valist, f_next_stack, NULL_TREE);
7243 /* Structures with a single member with a distinct mode are passed
7244 like their member. This is relevant if the latter has a REAL_TYPE
7245 or COMPLEX_TYPE type. */
7246 eff_type = type;
7247 while (TREE_CODE (eff_type) == RECORD_TYPE
7248 && (member = find_sole_member (eff_type))
7249 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7250 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7251 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7253 tree field_type = TREE_TYPE (member);
7255 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7256 eff_type = field_type;
7257 else
7259 gcc_assert ((TYPE_ALIGN (eff_type)
7260 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7261 || (TYPE_ALIGN (eff_type)
7262 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7263 break;
7267 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7269 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7270 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7271 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7272 && size <= 16));
7274 else
7276 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7279 addr = create_tmp_var (pptr_type_node, NULL);
7280 lab_false = create_artificial_label ();
7281 lab_over = create_artificial_label ();
7283 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7285 if (pass_as_float)
7287 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7288 tree cmp;
7289 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7291 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7292 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7294 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7295 tmp = next_fp_limit;
7296 if (size > 4 && !is_double)
7297 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7298 unshare_expr (tmp), size_int (4 - size));
7299 tmp = build2 (GE_EXPR, boolean_type_node,
7300 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7301 cmp = build3 (COND_EXPR, void_type_node, tmp,
7302 build1 (GOTO_EXPR, void_type_node,
7303 unshare_expr (lab_false)), NULL_TREE);
7304 if (!is_double)
7305 gimplify_and_add (cmp, pre_p);
7307 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7308 || (is_double || size == 16))
7310 tmp = fold_convert (sizetype, next_fp_tmp);
7311 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7312 size_int (UNITS_PER_WORD));
7313 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7314 unshare_expr (next_fp_tmp), tmp);
7315 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7317 if (is_double)
7318 gimplify_and_add (cmp, pre_p);
7320 #ifdef FUNCTION_ARG_SCmode_WART
7321 if (TYPE_MODE (eff_type) == SCmode
7322 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7324 tree subtype = TREE_TYPE (eff_type);
7325 tree real, imag;
7327 imag
7328 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7329 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7331 real
7332 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7333 real = get_initialized_tmp_var (real, pre_p, NULL);
7335 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7336 if (type != eff_type)
7337 result = build1 (VIEW_CONVERT_EXPR, type, result);
7338 result = get_initialized_tmp_var (result, pre_p, NULL);
7340 #endif /* FUNCTION_ARG_SCmode_WART */
7342 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7343 gimplify_and_add (tmp, pre_p);
7345 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7346 gimplify_and_add (tmp, pre_p);
7348 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7349 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7350 gimplify_assign (unshare_expr (next_fp_tmp),
7351 unshare_expr (valist), pre_p);
7353 gimplify_assign (unshare_expr (valist),
7354 unshare_expr (next_fp_tmp), post_p);
7355 valist = next_fp_tmp;
7357 else
7359 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7360 unshare_expr (next_o), size_int (rsize));
7361 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7362 unshare_expr (next_o_limit));
7363 tmp = build3 (COND_EXPR, void_type_node, tmp,
7364 build1 (GOTO_EXPR, void_type_node,
7365 unshare_expr (lab_false)),
7366 NULL_TREE);
7367 gimplify_and_add (tmp, pre_p);
7369 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7370 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7372 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7373 gimplify_and_add (tmp, pre_p);
7375 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7376 gimplify_and_add (tmp, pre_p);
7378 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7379 gimplify_assign (unshare_expr (next_o),
7380 unshare_expr (next_o_limit), pre_p);
7382 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7383 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7386 if (!result)
7388 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7389 gimplify_and_add (tmp, pre_p);
7393 /* ??? In va-sh.h, there had been code to make values larger than
7394 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7396 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7397 if (result)
7399 gimplify_assign (result, tmp, pre_p);
7401 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7402 gimplify_and_add (tmp, pre_p);
7404 else
7405 result = tmp;
7407 if (pass_by_ref)
7408 result = build_va_arg_indirect_ref (result);
7410 return result;
7413 /* 64 bit floating points memory transfers are paired single precision loads
7414 or store. So DWARF information needs fixing in little endian (unless
7415 PR=SZ=1 in FPSCR). */
7417 sh_dwarf_register_span (rtx reg)
7419 unsigned regno = REGNO (reg);
7421 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7422 return NULL_RTX;
7424 return
7425 gen_rtx_PARALLEL (VOIDmode,
7426 gen_rtvec (2,
7427 gen_rtx_REG (SFmode,
7428 DBX_REGISTER_NUMBER (regno+1)),
7429 gen_rtx_REG (SFmode,
7430 DBX_REGISTER_NUMBER (regno))));
7433 bool
7434 sh_promote_prototypes (const_tree type)
7436 if (TARGET_HITACHI)
7437 return 0;
7438 if (! type)
7439 return 1;
7440 return ! sh_attr_renesas_p (type);
7443 /* Whether an argument must be passed by reference. On SHcompact, we
7444 pretend arguments wider than 32-bits that would have been passed in
7445 registers are passed by reference, so that an SHmedia trampoline
7446 loads them into the full 64-bits registers. */
7448 static int
7449 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7450 const_tree type, bool named)
7452 unsigned HOST_WIDE_INT size;
7454 if (type)
7455 size = int_size_in_bytes (type);
7456 else
7457 size = GET_MODE_SIZE (mode);
7459 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7460 && (!named
7461 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7462 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7463 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7464 && size > 4
7465 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7466 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7467 return size;
7468 else
7469 return 0;
7472 static bool
7473 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7474 const_tree type, bool named)
7476 if (targetm.calls.must_pass_in_stack (mode, type))
7477 return true;
7479 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7480 wants to know about pass-by-reference semantics for incoming
7481 arguments. */
7482 if (! cum)
7483 return false;
7485 if (TARGET_SHCOMPACT)
7487 cum->byref = shcompact_byref (cum, mode, type, named);
7488 return cum->byref != 0;
7491 return false;
7494 static bool
7495 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7496 const_tree type, bool named ATTRIBUTE_UNUSED)
7498 /* ??? How can it possibly be correct to return true only on the
7499 caller side of the equation? Is there someplace else in the
7500 sh backend that's magically producing the copies? */
7501 return (cum->outgoing
7502 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7503 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7506 static int
7507 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7508 tree type, bool named ATTRIBUTE_UNUSED)
7510 int words = 0;
7512 if (!TARGET_SH5
7513 && PASS_IN_REG_P (*cum, mode, type)
7514 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7515 && (ROUND_REG (*cum, mode)
7516 + (mode != BLKmode
7517 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7518 : ROUND_ADVANCE (int_size_in_bytes (type)))
7519 > NPARM_REGS (mode)))
7520 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7522 else if (!TARGET_SHCOMPACT
7523 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7524 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7526 return words * UNITS_PER_WORD;
7530 /* Define where to put the arguments to a function.
7531 Value is zero to push the argument on the stack,
7532 or a hard register in which to store the argument.
7534 MODE is the argument's machine mode.
7535 TYPE is the data type of the argument (as a tree).
7536 This is null for libcalls where that information may
7537 not be available.
7538 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7539 the preceding args and about the function being called.
7540 NAMED is nonzero if this argument is a named parameter
7541 (otherwise it is an extra parameter matching an ellipsis).
7543 On SH the first args are normally in registers
7544 and the rest are pushed. Any arg that starts within the first
7545 NPARM_REGS words is at least partially passed in a register unless
7546 its data type forbids. */
7550 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7551 tree type, int named)
7553 if (! TARGET_SH5 && mode == VOIDmode)
7554 return GEN_INT (ca->renesas_abi ? 1 : 0);
7556 if (! TARGET_SH5
7557 && PASS_IN_REG_P (*ca, mode, type)
7558 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7560 int regno;
7562 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7563 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7565 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7566 gen_rtx_REG (SFmode,
7567 BASE_ARG_REG (mode)
7568 + (ROUND_REG (*ca, mode) ^ 1)),
7569 const0_rtx);
7570 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7571 gen_rtx_REG (SFmode,
7572 BASE_ARG_REG (mode)
7573 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7574 GEN_INT (4));
7575 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7578 /* If the alignment of a DF value causes an SF register to be
7579 skipped, we will use that skipped register for the next SF
7580 value. */
7581 if ((TARGET_HITACHI || ca->renesas_abi)
7582 && ca->free_single_fp_reg
7583 && mode == SFmode)
7584 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7586 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7587 ^ (mode == SFmode && TARGET_SH4
7588 && TARGET_LITTLE_ENDIAN != 0
7589 && ! TARGET_HITACHI && ! ca->renesas_abi);
7590 return gen_rtx_REG (mode, regno);
7594 if (TARGET_SH5)
7596 if (mode == VOIDmode && TARGET_SHCOMPACT)
7597 return GEN_INT (ca->call_cookie);
7599 /* The following test assumes unnamed arguments are promoted to
7600 DFmode. */
7601 if (mode == SFmode && ca->free_single_fp_reg)
7602 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7604 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7605 && (named || ! ca->prototype_p)
7606 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7608 if (! ca->prototype_p && TARGET_SHMEDIA)
7609 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7611 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7612 FIRST_FP_PARM_REG
7613 + ca->arg_count[(int) SH_ARG_FLOAT]);
7616 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7617 && (! TARGET_SHCOMPACT
7618 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7619 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7620 type, named))))
7622 return gen_rtx_REG (mode, (FIRST_PARM_REG
7623 + ca->arg_count[(int) SH_ARG_INT]));
7626 return 0;
7629 return 0;
7632 /* Update the data in CUM to advance over an argument
7633 of mode MODE and data type TYPE.
7634 (TYPE is null for libcalls where that information may not be
7635 available.) */
7637 void
7638 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7639 tree type, int named)
7641 if (ca->force_mem)
7642 ca->force_mem = 0;
7643 else if (TARGET_SH5)
7645 tree type2 = (ca->byref && type
7646 ? TREE_TYPE (type)
7647 : type);
7648 enum machine_mode mode2 = (ca->byref && type
7649 ? TYPE_MODE (type2)
7650 : mode);
7651 int dwords = ((ca->byref
7652 ? ca->byref
7653 : mode2 == BLKmode
7654 ? int_size_in_bytes (type2)
7655 : GET_MODE_SIZE (mode2)) + 7) / 8;
7656 int numregs = MIN (dwords, NPARM_REGS (SImode)
7657 - ca->arg_count[(int) SH_ARG_INT]);
7659 if (numregs)
7661 ca->arg_count[(int) SH_ARG_INT] += numregs;
7662 if (TARGET_SHCOMPACT
7663 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7665 ca->call_cookie
7666 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7667 - numregs, 1);
7668 /* N.B. We want this also for outgoing. */
7669 ca->stack_regs += numregs;
7671 else if (ca->byref)
7673 if (! ca->outgoing)
7674 ca->stack_regs += numregs;
7675 ca->byref_regs += numregs;
7676 ca->byref = 0;
7678 ca->call_cookie
7679 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7680 - numregs, 2);
7681 while (--numregs);
7682 ca->call_cookie
7683 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7684 - 1, 1);
7686 else if (dwords > numregs)
7688 int pushregs = numregs;
7690 if (TARGET_SHCOMPACT)
7691 ca->stack_regs += numregs;
7692 while (pushregs < NPARM_REGS (SImode) - 1
7693 && (CALL_COOKIE_INT_REG_GET
7694 (ca->call_cookie,
7695 NPARM_REGS (SImode) - pushregs)
7696 == 1))
7698 ca->call_cookie
7699 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7700 - pushregs, 1);
7701 pushregs++;
7703 if (numregs == NPARM_REGS (SImode))
7704 ca->call_cookie
7705 |= CALL_COOKIE_INT_REG (0, 1)
7706 | CALL_COOKIE_STACKSEQ (numregs - 1);
7707 else
7708 ca->call_cookie
7709 |= CALL_COOKIE_STACKSEQ (numregs);
7712 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7713 && (named || ! ca->prototype_p))
7715 if (mode2 == SFmode && ca->free_single_fp_reg)
7716 ca->free_single_fp_reg = 0;
7717 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7718 < NPARM_REGS (SFmode))
7720 int numfpregs
7721 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7722 NPARM_REGS (SFmode)
7723 - ca->arg_count[(int) SH_ARG_FLOAT]);
7725 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7727 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7729 if (ca->outgoing && numregs > 0)
7732 ca->call_cookie
7733 |= (CALL_COOKIE_INT_REG
7734 (ca->arg_count[(int) SH_ARG_INT]
7735 - numregs + ((numfpregs - 2) / 2),
7736 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7737 - numfpregs) / 2));
7739 while (numfpregs -= 2);
7741 else if (mode2 == SFmode && (named)
7742 && (ca->arg_count[(int) SH_ARG_FLOAT]
7743 < NPARM_REGS (SFmode)))
7744 ca->free_single_fp_reg
7745 = FIRST_FP_PARM_REG - numfpregs
7746 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7749 return;
7752 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7754 /* Note that we've used the skipped register. */
7755 if (mode == SFmode && ca->free_single_fp_reg)
7757 ca->free_single_fp_reg = 0;
7758 return;
7760 /* When we have a DF after an SF, there's an SF register that get
7761 skipped in order to align the DF value. We note this skipped
7762 register, because the next SF value will use it, and not the
7763 SF that follows the DF. */
7764 if (mode == DFmode
7765 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7767 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7768 + BASE_ARG_REG (mode));
7772 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7773 || PASS_IN_REG_P (*ca, mode, type))
7774 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7775 = (ROUND_REG (*ca, mode)
7776 + (mode == BLKmode
7777 ? ROUND_ADVANCE (int_size_in_bytes (type))
7778 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7781 /* The Renesas calling convention doesn't quite fit into this scheme since
7782 the address is passed like an invisible argument, but one that is always
7783 passed in memory. */
7784 static rtx
7785 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7787 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7788 return 0;
7789 return gen_rtx_REG (Pmode, 2);
7792 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7794 static bool
7795 sh_return_in_memory (const_tree type, const_tree fndecl)
7797 if (TARGET_SH5)
7799 if (TYPE_MODE (type) == BLKmode)
7800 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7801 else
7802 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7804 else
7806 return (TYPE_MODE (type) == BLKmode
7807 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7808 && TREE_CODE (type) == RECORD_TYPE));
7812 /* We actually emit the code in sh_expand_prologue. We used to use
7813 a static variable to flag that we need to emit this code, but that
7814 doesn't when inlining, when functions are deferred and then emitted
7815 later. Fortunately, we already have two flags that are part of struct
7816 function that tell if a function uses varargs or stdarg. */
7817 static void
7818 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7819 enum machine_mode mode,
7820 tree type,
7821 int *pretend_arg_size,
7822 int second_time ATTRIBUTE_UNUSED)
7824 gcc_assert (cfun->stdarg);
7825 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7827 int named_parm_regs, anon_parm_regs;
7829 named_parm_regs = (ROUND_REG (*ca, mode)
7830 + (mode == BLKmode
7831 ? ROUND_ADVANCE (int_size_in_bytes (type))
7832 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7833 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7834 if (anon_parm_regs > 0)
7835 *pretend_arg_size = anon_parm_regs * 4;
7839 static bool
7840 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7842 return TARGET_SH5;
7845 static bool
7846 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7848 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7852 /* Define the offset between two registers, one to be eliminated, and
7853 the other its replacement, at the start of a routine. */
7856 initial_elimination_offset (int from, int to)
7858 int regs_saved;
7859 int regs_saved_rounding = 0;
7860 int total_saved_regs_space;
7861 int total_auto_space;
7862 int save_flags = target_flags;
7863 int copy_flags;
7864 HARD_REG_SET live_regs_mask;
7866 shmedia_space_reserved_for_target_registers = false;
7867 regs_saved = calc_live_regs (&live_regs_mask);
7868 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7870 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7872 shmedia_space_reserved_for_target_registers = true;
7873 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7876 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7877 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7878 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7880 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7881 copy_flags = target_flags;
7882 target_flags = save_flags;
7884 total_saved_regs_space = regs_saved + regs_saved_rounding;
7886 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7887 return total_saved_regs_space + total_auto_space
7888 + crtl->args.info.byref_regs * 8;
7890 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7891 return total_saved_regs_space + total_auto_space
7892 + crtl->args.info.byref_regs * 8;
7894 /* Initial gap between fp and sp is 0. */
7895 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7896 return 0;
7898 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7899 return rounded_frame_size (0);
7901 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7902 return rounded_frame_size (0);
7904 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7905 && (to == HARD_FRAME_POINTER_REGNUM
7906 || to == STACK_POINTER_REGNUM));
7907 if (TARGET_SH5)
7909 int n = total_saved_regs_space;
7910 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7911 save_schedule schedule;
7912 save_entry *entry;
7914 n += total_auto_space;
7916 /* If it wasn't saved, there's not much we can do. */
7917 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7918 return n;
7920 target_flags = copy_flags;
7922 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7923 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7924 if (entry->reg == pr_reg)
7926 target_flags = save_flags;
7927 return entry->offset;
7929 gcc_unreachable ();
7931 else
7932 return total_auto_space;
7935 /* Parse the -mfixed-range= option string. */
7936 void
7937 sh_fix_range (const char *const_str)
7939 int i, first, last;
7940 char *str, *dash, *comma;
7942 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
7943 REG2 are either register names or register numbers. The effect
7944 of this option is to mark the registers in the range from REG1 to
7945 REG2 as ``fixed'' so they won't be used by the compiler. */
7947 i = strlen (const_str);
7948 str = (char *) alloca (i + 1);
7949 memcpy (str, const_str, i + 1);
7951 while (1)
7953 dash = strchr (str, '-');
7954 if (!dash)
7956 warning (0, "value of -mfixed-range must have form REG1-REG2");
7957 return;
7959 *dash = '\0';
7960 comma = strchr (dash + 1, ',');
7961 if (comma)
7962 *comma = '\0';
7964 first = decode_reg_name (str);
7965 if (first < 0)
7967 warning (0, "unknown register name: %s", str);
7968 return;
7971 last = decode_reg_name (dash + 1);
7972 if (last < 0)
7974 warning (0, "unknown register name: %s", dash + 1);
7975 return;
7978 *dash = '-';
7980 if (first > last)
7982 warning (0, "%s-%s is an empty range", str, dash + 1);
7983 return;
7986 for (i = first; i <= last; ++i)
7987 fixed_regs[i] = call_used_regs[i] = 1;
7989 if (!comma)
7990 break;
7992 *comma = ',';
7993 str = comma + 1;
7997 /* Insert any deferred function attributes from earlier pragmas. */
7998 static void
7999 sh_insert_attributes (tree node, tree *attributes)
8001 tree attrs;
8003 if (TREE_CODE (node) != FUNCTION_DECL)
8004 return;
8006 /* We are only interested in fields. */
8007 if (!DECL_P (node))
8008 return;
8010 /* Append the attributes to the deferred attributes. */
8011 *sh_deferred_function_attributes_tail = *attributes;
8012 attrs = sh_deferred_function_attributes;
8013 if (!attrs)
8014 return;
8016 /* Some attributes imply or require the interrupt attribute. */
8017 if (!lookup_attribute ("interrupt_handler", attrs)
8018 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8020 /* If we have a trapa_handler, but no interrupt_handler attribute,
8021 insert an interrupt_handler attribute. */
8022 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8023 /* We can't use sh_pr_interrupt here because that's not in the
8024 java frontend. */
8025 attrs
8026 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8027 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8028 if the interrupt attribute is missing, we ignore the attribute
8029 and warn. */
8030 else if (lookup_attribute ("sp_switch", attrs)
8031 || lookup_attribute ("trap_exit", attrs)
8032 || lookup_attribute ("nosave_low_regs", attrs)
8033 || lookup_attribute ("resbank", attrs))
8035 tree *tail;
8037 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8039 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8040 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8041 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8042 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8043 warning (OPT_Wattributes,
8044 "%qs attribute only applies to interrupt functions",
8045 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
8046 else
8048 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8049 NULL_TREE);
8050 tail = &TREE_CHAIN (*tail);
8053 attrs = *attributes;
8057 /* Install the processed list. */
8058 *attributes = attrs;
8060 /* Clear deferred attributes. */
8061 sh_deferred_function_attributes = NULL_TREE;
8062 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8064 return;
8067 /* Supported attributes:
8069 interrupt_handler -- specifies this function is an interrupt handler.
8071 trapa_handler - like above, but don't save all registers.
8073 sp_switch -- specifies an alternate stack for an interrupt handler
8074 to run on.
8076 trap_exit -- use a trapa to exit an interrupt function instead of
8077 an rte instruction.
8079 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8080 This is useful on the SH3 and upwards,
8081 which has a separate set of low regs for User and Supervisor modes.
8082 This should only be used for the lowest level of interrupts. Higher levels
8083 of interrupts must save the registers in case they themselves are
8084 interrupted.
8086 renesas -- use Renesas calling/layout conventions (functions and
8087 structures).
8089 resbank -- In case of an ISR, use a register bank to save registers
8090 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8093 const struct attribute_spec sh_attribute_table[] =
8095 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
8096 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8097 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
8098 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
8099 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
8100 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8101 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8102 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
8103 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
8104 #ifdef SYMBIAN
8105 /* Symbian support adds three new attributes:
8106 dllexport - for exporting a function/variable that will live in a dll
8107 dllimport - for importing a function/variable from a dll
8109 Microsoft allows multiple declspecs in one __declspec, separating
8110 them with spaces. We do NOT support this. Instead, use __declspec
8111 multiple times. */
8112 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8113 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8114 #endif
8115 { NULL, 0, 0, false, false, false, NULL }
8118 /* Handle a 'resbank' attribute. */
8119 static tree
8120 sh_handle_resbank_handler_attribute (tree * node, tree name,
8121 tree args ATTRIBUTE_UNUSED,
8122 int flags ATTRIBUTE_UNUSED,
8123 bool * no_add_attrs)
8125 if (!TARGET_SH2A)
8127 warning (OPT_Wattributes, "%qs attribute is supported only for SH2A",
8128 IDENTIFIER_POINTER (name));
8129 *no_add_attrs = true;
8131 if (TREE_CODE (*node) != FUNCTION_DECL)
8133 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8134 IDENTIFIER_POINTER (name));
8135 *no_add_attrs = true;
8138 return NULL_TREE;
8141 /* Handle an "interrupt_handler" attribute; arguments as in
8142 struct attribute_spec.handler. */
8143 static tree
8144 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8145 tree args ATTRIBUTE_UNUSED,
8146 int flags ATTRIBUTE_UNUSED,
8147 bool *no_add_attrs)
8149 if (TREE_CODE (*node) != FUNCTION_DECL)
8151 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8152 IDENTIFIER_POINTER (name));
8153 *no_add_attrs = true;
8155 else if (TARGET_SHCOMPACT)
8157 error ("attribute interrupt_handler is not compatible with -m5-compact");
8158 *no_add_attrs = true;
8161 return NULL_TREE;
8164 /* Handle an 'function_vector' attribute; arguments as in
8165 struct attribute_spec.handler. */
8166 static tree
8167 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8168 tree args ATTRIBUTE_UNUSED,
8169 int flags ATTRIBUTE_UNUSED,
8170 bool * no_add_attrs)
8172 if (!TARGET_SH2A)
8174 warning (OPT_Wattributes, "%qs attribute only applies to SH2A",
8175 IDENTIFIER_POINTER (name));
8176 *no_add_attrs = true;
8178 else if (TREE_CODE (*node) != FUNCTION_DECL)
8180 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8181 IDENTIFIER_POINTER (name));
8182 *no_add_attrs = true;
8184 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8186 /* The argument must be a constant integer. */
8187 warning (OPT_Wattributes,
8188 "`%s' attribute argument not an integer constant",
8189 IDENTIFIER_POINTER (name));
8190 *no_add_attrs = true;
8192 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8194 /* The argument value must be between 0 to 255. */
8195 warning (OPT_Wattributes,
8196 "`%s' attribute argument should be between 0 to 255",
8197 IDENTIFIER_POINTER (name));
8198 *no_add_attrs = true;
8200 return NULL_TREE;
8203 /* Returns 1 if current function has been assigned the attribute
8204 'function_vector'. */
8206 sh2a_is_function_vector_call (rtx x)
8208 if (GET_CODE (x) == SYMBOL_REF
8209 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8211 tree tr = SYMBOL_REF_DECL (x);
8213 if (sh2a_function_vector_p (tr))
8214 return 1;
8217 return 0;
8220 /* Returns the function vector number, if the the attribute
8221 'function_vector' is assigned, otherwise returns zero. */
8223 sh2a_get_function_vector_number (rtx x)
8225 int num;
8226 tree list, t;
8228 if ((GET_CODE (x) == SYMBOL_REF)
8229 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8231 t = SYMBOL_REF_DECL (x);
8233 if (TREE_CODE (t) != FUNCTION_DECL)
8234 return 0;
8236 list = SH_ATTRIBUTES (t);
8237 while (list)
8239 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8241 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8242 return num;
8245 list = TREE_CHAIN (list);
8248 return 0;
8250 else
8251 return 0;
8254 /* Handle an "sp_switch" attribute; arguments as in
8255 struct attribute_spec.handler. */
8256 static tree
8257 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8258 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8260 if (TREE_CODE (*node) != FUNCTION_DECL)
8262 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8263 IDENTIFIER_POINTER (name));
8264 *no_add_attrs = true;
8266 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8268 /* The argument must be a constant string. */
8269 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8270 IDENTIFIER_POINTER (name));
8271 *no_add_attrs = true;
8274 return NULL_TREE;
8277 /* Handle an "trap_exit" attribute; arguments as in
8278 struct attribute_spec.handler. */
8279 static tree
8280 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8281 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8283 if (TREE_CODE (*node) != FUNCTION_DECL)
8285 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8286 IDENTIFIER_POINTER (name));
8287 *no_add_attrs = true;
8289 /* The argument specifies a trap number to be used in a trapa instruction
8290 at function exit (instead of an rte instruction). */
8291 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8293 /* The argument must be a constant integer. */
8294 warning (OPT_Wattributes, "%qs attribute argument not an "
8295 "integer constant", IDENTIFIER_POINTER (name));
8296 *no_add_attrs = true;
8299 return NULL_TREE;
8302 static tree
8303 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8304 tree name ATTRIBUTE_UNUSED,
8305 tree args ATTRIBUTE_UNUSED,
8306 int flags ATTRIBUTE_UNUSED,
8307 bool *no_add_attrs ATTRIBUTE_UNUSED)
8309 return NULL_TREE;
8312 /* True if __attribute__((renesas)) or -mrenesas. */
8314 sh_attr_renesas_p (const_tree td)
8316 if (TARGET_HITACHI)
8317 return 1;
8318 if (td == 0)
8319 return 0;
8320 if (DECL_P (td))
8321 td = TREE_TYPE (td);
8322 if (td == error_mark_node)
8323 return 0;
8324 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8325 != NULL_TREE);
8328 /* True if __attribute__((renesas)) or -mrenesas, for the current
8329 function. */
8331 sh_cfun_attr_renesas_p (void)
8333 return sh_attr_renesas_p (current_function_decl);
8337 sh_cfun_interrupt_handler_p (void)
8339 return (lookup_attribute ("interrupt_handler",
8340 DECL_ATTRIBUTES (current_function_decl))
8341 != NULL_TREE);
8344 /* Returns 1 if FUNC has been assigned the attribute
8345 "function_vector". */
8347 sh2a_function_vector_p (tree func)
8349 tree list;
8350 if (TREE_CODE (func) != FUNCTION_DECL)
8351 return 0;
8353 list = SH_ATTRIBUTES (func);
8354 while (list)
8356 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8357 return 1;
8359 list = TREE_CHAIN (list);
8361 return 0;
8364 /* Returns TRUE if given tree has the "resbank" attribute. */
8367 sh_cfun_resbank_handler_p (void)
8369 return ((lookup_attribute ("resbank",
8370 DECL_ATTRIBUTES (current_function_decl))
8371 != NULL_TREE)
8372 && (lookup_attribute ("interrupt_handler",
8373 DECL_ATTRIBUTES (current_function_decl))
8374 != NULL_TREE) && TARGET_SH2A);
8377 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8379 static const char *
8380 sh_check_pch_target_flags (int old_flags)
8382 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8383 | MASK_SH_E | MASK_HARD_SH4
8384 | MASK_FPU_SINGLE | MASK_SH4))
8385 return _("created and used with different architectures / ABIs");
8386 if ((old_flags ^ target_flags) & MASK_HITACHI)
8387 return _("created and used with different ABIs");
8388 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8389 return _("created and used with different endianness");
8390 return NULL;
8393 /* Predicates used by the templates. */
8395 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8396 Used only in general_movsrc_operand. */
8399 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8401 switch (REGNO (op))
8403 case PR_REG:
8404 case MACL_REG:
8405 case MACH_REG:
8406 return 1;
8408 return 0;
8411 /* Nonzero if OP is a floating point value with value 0.0. */
8414 fp_zero_operand (rtx op)
8416 REAL_VALUE_TYPE r;
8418 if (GET_MODE (op) != SFmode)
8419 return 0;
8421 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8422 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8425 /* Nonzero if OP is a floating point value with value 1.0. */
8428 fp_one_operand (rtx op)
8430 REAL_VALUE_TYPE r;
8432 if (GET_MODE (op) != SFmode)
8433 return 0;
8435 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8436 return REAL_VALUES_EQUAL (r, dconst1);
8439 /* For -m4 and -m4-single-only, mode switching is used. If we are
8440 compiling without -mfmovd, movsf_ie isn't taken into account for
8441 mode switching. We could check in machine_dependent_reorg for
8442 cases where we know we are in single precision mode, but there is
8443 interface to find that out during reload, so we must avoid
8444 choosing an fldi alternative during reload and thus failing to
8445 allocate a scratch register for the constant loading. */
8447 fldi_ok (void)
8449 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8453 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8455 enum rtx_code code = GET_CODE (op);
8456 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8459 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8460 enum tls_model
8461 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8463 if (GET_CODE (op) != SYMBOL_REF)
8464 return TLS_MODEL_NONE;
8465 return SYMBOL_REF_TLS_MODEL (op);
8468 /* Return the destination address of a branch. */
8470 static int
8471 branch_dest (rtx branch)
8473 rtx dest = SET_SRC (PATTERN (branch));
8474 int dest_uid;
8476 if (GET_CODE (dest) == IF_THEN_ELSE)
8477 dest = XEXP (dest, 1);
8478 dest = XEXP (dest, 0);
8479 dest_uid = INSN_UID (dest);
8480 return INSN_ADDRESSES (dest_uid);
8483 /* Return nonzero if REG is not used after INSN.
8484 We assume REG is a reload reg, and therefore does
8485 not live past labels. It may live past calls or jumps though. */
8487 reg_unused_after (rtx reg, rtx insn)
8489 enum rtx_code code;
8490 rtx set;
8492 /* If the reg is set by this instruction, then it is safe for our
8493 case. Disregard the case where this is a store to memory, since
8494 we are checking a register used in the store address. */
8495 set = single_set (insn);
8496 if (set && GET_CODE (SET_DEST (set)) != MEM
8497 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8498 return 1;
8500 while ((insn = NEXT_INSN (insn)))
8502 rtx set;
8503 if (!INSN_P (insn))
8504 continue;
8506 code = GET_CODE (insn);
8508 #if 0
8509 /* If this is a label that existed before reload, then the register
8510 if dead here. However, if this is a label added by reorg, then
8511 the register may still be live here. We can't tell the difference,
8512 so we just ignore labels completely. */
8513 if (code == CODE_LABEL)
8514 return 1;
8515 /* else */
8516 #endif
8518 if (code == JUMP_INSN)
8519 return 0;
8521 /* If this is a sequence, we must handle them all at once.
8522 We could have for instance a call that sets the target register,
8523 and an insn in a delay slot that uses the register. In this case,
8524 we must return 0. */
8525 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8527 int i;
8528 int retval = 0;
8530 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8532 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8533 rtx set = single_set (this_insn);
8535 if (GET_CODE (this_insn) == CALL_INSN)
8536 code = CALL_INSN;
8537 else if (GET_CODE (this_insn) == JUMP_INSN)
8539 if (INSN_ANNULLED_BRANCH_P (this_insn))
8540 return 0;
8541 code = JUMP_INSN;
8544 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8545 return 0;
8546 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8548 if (GET_CODE (SET_DEST (set)) != MEM)
8549 retval = 1;
8550 else
8551 return 0;
8553 if (set == 0
8554 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8555 return 0;
8557 if (retval == 1)
8558 return 1;
8559 else if (code == JUMP_INSN)
8560 return 0;
8563 set = single_set (insn);
8564 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8565 return 0;
8566 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8567 return GET_CODE (SET_DEST (set)) != MEM;
8568 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8569 return 0;
8571 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8572 return 1;
8574 return 1;
8577 #include "ggc.h"
8579 static GTY(()) rtx fpscr_rtx;
8581 get_fpscr_rtx (void)
8583 if (! fpscr_rtx)
8585 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8586 REG_USERVAR_P (fpscr_rtx) = 1;
8587 mark_user_reg (fpscr_rtx);
8589 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8590 mark_user_reg (fpscr_rtx);
8591 return fpscr_rtx;
8594 static GTY(()) tree fpscr_values;
8596 static void
8597 emit_fpu_switch (rtx scratch, int index)
8599 rtx dst, src;
8601 if (fpscr_values == NULL)
8603 tree t;
8605 t = build_index_type (integer_one_node);
8606 t = build_array_type (integer_type_node, t);
8607 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8608 DECL_ARTIFICIAL (t) = 1;
8609 DECL_IGNORED_P (t) = 1;
8610 DECL_EXTERNAL (t) = 1;
8611 TREE_STATIC (t) = 1;
8612 TREE_PUBLIC (t) = 1;
8613 TREE_USED (t) = 1;
8615 fpscr_values = t;
8618 src = DECL_RTL (fpscr_values);
8619 if (!can_create_pseudo_p ())
8621 emit_move_insn (scratch, XEXP (src, 0));
8622 if (index != 0)
8623 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8624 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8626 else
8627 src = adjust_address (src, PSImode, index * 4);
8629 dst = get_fpscr_rtx ();
8630 emit_move_insn (dst, src);
8633 void
8634 emit_sf_insn (rtx pat)
8636 emit_insn (pat);
8639 void
8640 emit_df_insn (rtx pat)
8642 emit_insn (pat);
8645 void
8646 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8648 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8651 void
8652 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8654 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8655 get_fpscr_rtx ()));
8658 void
8659 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8661 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8664 void
8665 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8667 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8668 get_fpscr_rtx ()));
8671 static rtx get_free_reg (HARD_REG_SET);
8673 /* This function returns a register to use to load the address to load
8674 the fpscr from. Currently it always returns r1 or r7, but when we are
8675 able to use pseudo registers after combine, or have a better mechanism
8676 for choosing a register, it should be done here. */
8677 /* REGS_LIVE is the liveness information for the point for which we
8678 need this allocation. In some bare-bones exit blocks, r1 is live at the
8679 start. We can even have all of r0..r3 being live:
8680 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8681 INSN before which new insns are placed with will clobber the register
8682 we return. If a basic block consists only of setting the return value
8683 register to a pseudo and using that register, the return value is not
8684 live before or after this block, yet we we'll insert our insns right in
8685 the middle. */
8687 static rtx
8688 get_free_reg (HARD_REG_SET regs_live)
8690 if (! TEST_HARD_REG_BIT (regs_live, 1))
8691 return gen_rtx_REG (Pmode, 1);
8693 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8694 there shouldn't be anything but a jump before the function end. */
8695 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8696 return gen_rtx_REG (Pmode, 7);
8699 /* This function will set the fpscr from memory.
8700 MODE is the mode we are setting it to. */
8701 void
8702 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8704 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8705 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8706 rtx addr_reg;
8708 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8709 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8712 /* Is the given character a logical line separator for the assembler? */
8713 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8714 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8715 #endif
8718 sh_insn_length_adjustment (rtx insn)
8720 /* Instructions with unfilled delay slots take up an extra two bytes for
8721 the nop in the delay slot. */
8722 if (((GET_CODE (insn) == INSN
8723 && GET_CODE (PATTERN (insn)) != USE
8724 && GET_CODE (PATTERN (insn)) != CLOBBER)
8725 || GET_CODE (insn) == CALL_INSN
8726 || (GET_CODE (insn) == JUMP_INSN
8727 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8728 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8729 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8730 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8731 return 2;
8733 /* SH2e has a bug that prevents the use of annulled branches, so if
8734 the delay slot is not filled, we'll have to put a NOP in it. */
8735 if (sh_cpu_attr == CPU_SH2E
8736 && GET_CODE (insn) == JUMP_INSN
8737 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8738 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8739 && get_attr_type (insn) == TYPE_CBRANCH
8740 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8741 return 2;
8743 /* sh-dsp parallel processing insn take four bytes instead of two. */
8745 if (GET_CODE (insn) == INSN)
8747 int sum = 0;
8748 rtx body = PATTERN (insn);
8749 const char *templ;
8750 char c;
8751 int maybe_label = 1;
8753 if (GET_CODE (body) == ASM_INPUT)
8754 templ = XSTR (body, 0);
8755 else if (asm_noperands (body) >= 0)
8756 templ
8757 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8758 else
8759 return 0;
8762 int ppi_adjust = 0;
8765 c = *templ++;
8766 while (c == ' ' || c == '\t');
8767 /* all sh-dsp parallel-processing insns start with p.
8768 The only non-ppi sh insn starting with p is pref.
8769 The only ppi starting with pr is prnd. */
8770 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8771 ppi_adjust = 2;
8772 /* The repeat pseudo-insn expands two three insns, a total of
8773 six bytes in size. */
8774 else if ((c == 'r' || c == 'R')
8775 && ! strncasecmp ("epeat", templ, 5))
8776 ppi_adjust = 4;
8777 while (c && c != '\n'
8778 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8780 /* If this is a label, it is obviously not a ppi insn. */
8781 if (c == ':' && maybe_label)
8783 ppi_adjust = 0;
8784 break;
8786 else if (c == '\'' || c == '"')
8787 maybe_label = 0;
8788 c = *templ++;
8790 sum += ppi_adjust;
8791 maybe_label = c != ':';
8793 while (c);
8794 return sum;
8796 return 0;
8799 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8800 isn't protected by a PIC unspec. */
8802 nonpic_symbol_mentioned_p (rtx x)
8804 register const char *fmt;
8805 register int i;
8807 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8808 || GET_CODE (x) == PC)
8809 return 1;
8811 /* We don't want to look into the possible MEM location of a
8812 CONST_DOUBLE, since we're not going to use it, in general. */
8813 if (GET_CODE (x) == CONST_DOUBLE)
8814 return 0;
8816 if (GET_CODE (x) == UNSPEC
8817 && (XINT (x, 1) == UNSPEC_PIC
8818 || XINT (x, 1) == UNSPEC_GOT
8819 || XINT (x, 1) == UNSPEC_GOTOFF
8820 || XINT (x, 1) == UNSPEC_GOTPLT
8821 || XINT (x, 1) == UNSPEC_GOTTPOFF
8822 || XINT (x, 1) == UNSPEC_DTPOFF
8823 || XINT (x, 1) == UNSPEC_PLT
8824 || XINT (x, 1) == UNSPEC_SYMOFF
8825 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
8826 return 0;
8828 fmt = GET_RTX_FORMAT (GET_CODE (x));
8829 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8831 if (fmt[i] == 'E')
8833 register int j;
8835 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8836 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8837 return 1;
8839 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8840 return 1;
8843 return 0;
8846 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8847 @GOTOFF in `reg'. */
8849 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8850 rtx reg)
8852 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
8853 return orig;
8855 if (GET_CODE (orig) == LABEL_REF
8856 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8858 if (reg == 0)
8859 reg = gen_reg_rtx (Pmode);
8861 emit_insn (gen_symGOTOFF2reg (reg, orig));
8862 return reg;
8864 else if (GET_CODE (orig) == SYMBOL_REF)
8866 if (reg == 0)
8867 reg = gen_reg_rtx (Pmode);
8869 emit_insn (gen_symGOT2reg (reg, orig));
8870 return reg;
8872 return orig;
8875 /* Try machine-dependent ways of modifying an illegitimate address
8876 to be legitimate. If we find one, return the new, valid address.
8877 Otherwise, return X.
8879 For the SH, if X is almost suitable for indexing, but the offset is
8880 out of range, convert it into a normal form so that CSE has a chance
8881 of reducing the number of address registers used. */
8883 static rtx
8884 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
8886 if (flag_pic)
8887 x = legitimize_pic_address (oldx, mode, NULL_RTX);
8889 if (GET_CODE (x) == PLUS
8890 && (GET_MODE_SIZE (mode) == 4
8891 || GET_MODE_SIZE (mode) == 8)
8892 && GET_CODE (XEXP (x, 1)) == CONST_INT
8893 && BASE_REGISTER_RTX_P (XEXP (x, 0))
8894 && ! TARGET_SHMEDIA
8895 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
8896 && ! (TARGET_SH2E && mode == SFmode))
8898 rtx index_rtx = XEXP (x, 1);
8899 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
8900 rtx sum;
8902 /* On rare occasions, we might get an unaligned pointer
8903 that is indexed in a way to give an aligned address.
8904 Therefore, keep the lower two bits in offset_base. */
8905 /* Instead of offset_base 128..131 use 124..127, so that
8906 simple add suffices. */
8907 if (offset > 127)
8908 offset_base = ((offset + 4) & ~60) - 4;
8909 else
8910 offset_base = offset & ~60;
8912 /* Sometimes the normal form does not suit DImode. We
8913 could avoid that by using smaller ranges, but that
8914 would give less optimized code when SImode is
8915 prevalent. */
8916 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
8918 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
8919 GEN_INT (offset_base), NULL_RTX, 0,
8920 OPTAB_LIB_WIDEN);
8922 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
8926 return x;
8929 /* Mark the use of a constant in the literal table. If the constant
8930 has multiple labels, make it unique. */
8931 static rtx
8932 mark_constant_pool_use (rtx x)
8934 rtx insn, lab, pattern;
8936 if (x == NULL)
8937 return x;
8939 switch (GET_CODE (x))
8941 case LABEL_REF:
8942 x = XEXP (x, 0);
8943 case CODE_LABEL:
8944 break;
8945 default:
8946 return x;
8949 /* Get the first label in the list of labels for the same constant
8950 and delete another labels in the list. */
8951 lab = x;
8952 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8954 if (GET_CODE (insn) != CODE_LABEL
8955 || LABEL_REFS (insn) != NEXT_INSN (insn))
8956 break;
8957 lab = insn;
8960 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8961 INSN_DELETED_P (insn) = 1;
8963 /* Mark constants in a window. */
8964 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8966 if (GET_CODE (insn) != INSN)
8967 continue;
8969 pattern = PATTERN (insn);
8970 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8971 continue;
8973 switch (XINT (pattern, 1))
8975 case UNSPECV_CONST2:
8976 case UNSPECV_CONST4:
8977 case UNSPECV_CONST8:
8978 XVECEXP (pattern, 0, 1) = const1_rtx;
8979 break;
8980 case UNSPECV_WINDOW_END:
8981 if (XVECEXP (pattern, 0, 0) == x)
8982 return lab;
8983 break;
8984 case UNSPECV_CONST_END:
8985 return lab;
8986 default:
8987 break;
8991 return lab;
8994 /* Return true if it's possible to redirect BRANCH1 to the destination
8995 of an unconditional jump BRANCH2. We only want to do this if the
8996 resulting branch will have a short displacement. */
8998 sh_can_redirect_branch (rtx branch1, rtx branch2)
9000 if (flag_expensive_optimizations && simplejump_p (branch2))
9002 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9003 rtx insn;
9004 int distance;
9006 for (distance = 0, insn = NEXT_INSN (branch1);
9007 insn && distance < 256;
9008 insn = PREV_INSN (insn))
9010 if (insn == dest)
9011 return 1;
9012 else
9013 distance += get_attr_length (insn);
9015 for (distance = 0, insn = NEXT_INSN (branch1);
9016 insn && distance < 256;
9017 insn = NEXT_INSN (insn))
9019 if (insn == dest)
9020 return 1;
9021 else
9022 distance += get_attr_length (insn);
9025 return 0;
9028 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9030 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9031 unsigned int new_reg)
9033 /* Interrupt functions can only use registers that have already been
9034 saved by the prologue, even if they would normally be
9035 call-clobbered. */
9037 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9038 return 0;
9040 return 1;
9043 /* Function to update the integer COST
9044 based on the relationship between INSN that is dependent on
9045 DEP_INSN through the dependence LINK. The default is to make no
9046 adjustment to COST. This can be used for example to specify to
9047 the scheduler that an output- or anti-dependence does not incur
9048 the same cost as a data-dependence. The return value should be
9049 the new value for COST. */
9050 static int
9051 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9053 rtx reg, use_pat;
9055 if (TARGET_SHMEDIA)
9057 /* On SHmedia, if the dependence is an anti-dependence or
9058 output-dependence, there is no cost. */
9059 if (REG_NOTE_KIND (link) != 0)
9061 /* However, dependencies between target register loads and
9062 uses of the register in a subsequent block that are separated
9063 by a conditional branch are not modelled - we have to do with
9064 the anti-dependency between the target register load and the
9065 conditional branch that ends the current block. */
9066 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9067 && GET_CODE (PATTERN (dep_insn)) == SET
9068 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9069 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9070 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9072 int orig_cost = cost;
9073 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9074 rtx target = ((! note
9075 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9076 ? insn : JUMP_LABEL (insn));
9077 /* On the likely path, the branch costs 1, on the unlikely path,
9078 it costs 3. */
9079 cost--;
9081 target = next_active_insn (target);
9082 while (target && ! flow_dependent_p (target, dep_insn)
9083 && --cost > 0);
9084 /* If two branches are executed in immediate succession, with the
9085 first branch properly predicted, this causes a stall at the
9086 second branch, hence we won't need the target for the
9087 second branch for two cycles after the launch of the first
9088 branch. */
9089 if (cost > orig_cost - 2)
9090 cost = orig_cost - 2;
9092 else
9093 cost = 0;
9096 else if (get_attr_is_mac_media (insn)
9097 && get_attr_is_mac_media (dep_insn))
9098 cost = 1;
9100 else if (! reload_completed
9101 && GET_CODE (PATTERN (insn)) == SET
9102 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9103 && GET_CODE (PATTERN (dep_insn)) == SET
9104 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9105 && cost < 4)
9106 cost = 4;
9107 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9108 that is needed at the target. */
9109 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9110 && ! flow_dependent_p (insn, dep_insn))
9111 cost--;
9113 else if (REG_NOTE_KIND (link) == 0)
9115 enum attr_type type;
9116 rtx dep_set;
9118 if (recog_memoized (insn) < 0
9119 || recog_memoized (dep_insn) < 0)
9120 return cost;
9122 dep_set = single_set (dep_insn);
9124 /* The latency that we specify in the scheduling description refers
9125 to the actual output, not to an auto-increment register; for that,
9126 the latency is one. */
9127 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9129 rtx set = single_set (insn);
9131 if (set
9132 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9133 && (!MEM_P (SET_DEST (set))
9134 || !reg_mentioned_p (SET_DEST (dep_set),
9135 XEXP (SET_DEST (set), 0))))
9136 cost = 1;
9138 /* The only input for a call that is timing-critical is the
9139 function's address. */
9140 if (GET_CODE (insn) == CALL_INSN)
9142 rtx call = PATTERN (insn);
9144 if (GET_CODE (call) == PARALLEL)
9145 call = XVECEXP (call, 0 ,0);
9146 if (GET_CODE (call) == SET)
9147 call = SET_SRC (call);
9148 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
9149 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9150 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9151 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9152 cost -= TARGET_SH4_300 ? 3 : 6;
9154 /* Likewise, the most timing critical input for an sfuncs call
9155 is the function address. However, sfuncs typically start
9156 using their arguments pretty quickly.
9157 Assume a four cycle delay for SH4 before they are needed.
9158 Cached ST40-300 calls are quicker, so assume only a one
9159 cycle delay there.
9160 ??? Maybe we should encode the delays till input registers
9161 are needed by sfuncs into the sfunc call insn. */
9162 /* All sfunc calls are parallels with at least four components.
9163 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9164 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9165 && XVECLEN (PATTERN (insn), 0) >= 4
9166 && (reg = sfunc_uses_reg (insn)))
9168 if (! reg_set_p (reg, dep_insn))
9169 cost -= TARGET_SH4_300 ? 1 : 4;
9171 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9173 enum attr_type dep_type = get_attr_type (dep_insn);
9175 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9176 cost--;
9177 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9178 && (type = get_attr_type (insn)) != TYPE_CALL
9179 && type != TYPE_SFUNC)
9180 cost--;
9181 /* When the preceding instruction loads the shift amount of
9182 the following SHAD/SHLD, the latency of the load is increased
9183 by 1 cycle. */
9184 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9185 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9186 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9187 XEXP (SET_SRC (single_set (insn)),
9188 1)))
9189 cost++;
9190 /* When an LS group instruction with a latency of less than
9191 3 cycles is followed by a double-precision floating-point
9192 instruction, FIPR, or FTRV, the latency of the first
9193 instruction is increased to 3 cycles. */
9194 else if (cost < 3
9195 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9196 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9197 cost = 3;
9198 /* The lsw register of a double-precision computation is ready one
9199 cycle earlier. */
9200 else if (reload_completed
9201 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9202 && (use_pat = single_set (insn))
9203 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9204 SET_SRC (use_pat)))
9205 cost -= 1;
9207 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9208 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9209 cost -= 1;
9211 else if (TARGET_SH4_300)
9213 /* Stores need their input register two cycles later. */
9214 if (dep_set && cost >= 1
9215 && ((type = get_attr_type (insn)) == TYPE_STORE
9216 || type == TYPE_PSTORE
9217 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9219 rtx set = single_set (insn);
9221 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9222 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9224 cost -= 2;
9225 /* But don't reduce the cost below 1 if the address depends
9226 on a side effect of dep_insn. */
9227 if (cost < 1
9228 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9229 cost = 1;
9234 /* An anti-dependence penalty of two applies if the first insn is a double
9235 precision fadd / fsub / fmul. */
9236 else if (!TARGET_SH4_300
9237 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9238 && recog_memoized (dep_insn) >= 0
9239 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9240 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9241 /* A lot of alleged anti-flow dependences are fake,
9242 so check this one is real. */
9243 && flow_dependent_p (dep_insn, insn))
9244 cost = 2;
9246 return cost;
9249 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9250 if DEP_INSN is anti-flow dependent on INSN. */
9251 static int
9252 flow_dependent_p (rtx insn, rtx dep_insn)
9254 rtx tmp = PATTERN (insn);
9256 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9257 return tmp == NULL_RTX;
9260 /* A helper function for flow_dependent_p called through note_stores. */
9261 static void
9262 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9264 rtx * pinsn = (rtx *) data;
9266 if (*pinsn && reg_referenced_p (x, *pinsn))
9267 *pinsn = NULL_RTX;
9270 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9271 'special function' patterns (type sfunc) that clobber pr, but that
9272 do not look like function calls to leaf_function_p. Hence we must
9273 do this extra check. */
9274 static int
9275 sh_pr_n_sets (void)
9277 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9280 /* Return where to allocate pseudo for a given hard register initial
9281 value. */
9282 static rtx
9283 sh_allocate_initial_value (rtx hard_reg)
9285 rtx x;
9287 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9289 if (current_function_is_leaf
9290 && ! sh_pr_n_sets ()
9291 && ! (TARGET_SHCOMPACT
9292 && ((crtl->args.info.call_cookie
9293 & ~ CALL_COOKIE_RET_TRAMP (1))
9294 || crtl->saves_all_registers)))
9295 x = hard_reg;
9296 else
9297 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9299 else
9300 x = NULL_RTX;
9302 return x;
9305 /* This function returns "2" to indicate dual issue for the SH4
9306 processor. To be used by the DFA pipeline description. */
9307 static int
9308 sh_issue_rate (void)
9310 if (TARGET_SUPERSCALAR)
9311 return 2;
9312 else
9313 return 1;
9316 /* Functions for ready queue reordering for sched1. */
9318 /* Get weight for mode for a set x. */
9319 static short
9320 find_set_regmode_weight (rtx x, enum machine_mode mode)
9322 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9323 return 1;
9324 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9326 if (GET_CODE (SET_DEST (x)) == REG)
9328 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9329 return 1;
9330 else
9331 return 0;
9333 return 1;
9335 return 0;
9338 /* Get regmode weight for insn. */
9339 static short
9340 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9342 short reg_weight = 0;
9343 rtx x;
9345 /* Increment weight for each register born here. */
9346 x = PATTERN (insn);
9347 reg_weight += find_set_regmode_weight (x, mode);
9348 if (GET_CODE (x) == PARALLEL)
9350 int j;
9351 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9353 x = XVECEXP (PATTERN (insn), 0, j);
9354 reg_weight += find_set_regmode_weight (x, mode);
9357 /* Decrement weight for each register that dies here. */
9358 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9360 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9362 rtx note = XEXP (x, 0);
9363 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9364 reg_weight--;
9367 return reg_weight;
9370 /* Calculate regmode weights for all insns of a basic block. */
9371 static void
9372 find_regmode_weight (basic_block b, enum machine_mode mode)
9374 rtx insn, next_tail, head, tail;
9376 get_ebb_head_tail (b, b, &head, &tail);
9377 next_tail = NEXT_INSN (tail);
9379 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9381 /* Handle register life information. */
9382 if (!INSN_P (insn))
9383 continue;
9385 if (mode == SFmode)
9386 INSN_REGMODE_WEIGHT (insn, mode) =
9387 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9388 else if (mode == SImode)
9389 INSN_REGMODE_WEIGHT (insn, mode) =
9390 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9394 /* Comparison function for ready queue sorting. */
9395 static int
9396 rank_for_reorder (const void *x, const void *y)
9398 rtx tmp = *(const rtx *) y;
9399 rtx tmp2 = *(const rtx *) x;
9401 /* The insn in a schedule group should be issued the first. */
9402 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9403 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9405 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9406 minimizes instruction movement, thus minimizing sched's effect on
9407 register pressure. */
9408 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9411 /* Resort the array A in which only element at index N may be out of order. */
9412 static void
9413 swap_reorder (rtx *a, int n)
9415 rtx insn = a[n - 1];
9416 int i = n - 2;
9418 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9420 a[i + 1] = a[i];
9421 i -= 1;
9423 a[i + 1] = insn;
9426 #define SCHED_REORDER(READY, N_READY) \
9427 do \
9429 if ((N_READY) == 2) \
9430 swap_reorder (READY, N_READY); \
9431 else if ((N_READY) > 2) \
9432 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9434 while (0)
9436 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9437 macro. */
9438 static void
9439 ready_reorder (rtx *ready, int nready)
9441 SCHED_REORDER (ready, nready);
9444 /* Count life regions of r0 for a block. */
9445 static int
9446 find_r0_life_regions (basic_block b)
9448 rtx end, insn;
9449 rtx pset;
9450 rtx r0_reg;
9451 int live;
9452 int set;
9453 int death = 0;
9455 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9457 set = 1;
9458 live = 1;
9460 else
9462 set = 0;
9463 live = 0;
9466 insn = BB_HEAD (b);
9467 end = BB_END (b);
9468 r0_reg = gen_rtx_REG (SImode, R0_REG);
9469 while (1)
9471 if (INSN_P (insn))
9473 if (find_regno_note (insn, REG_DEAD, R0_REG))
9475 death++;
9476 live = 0;
9478 if (!live
9479 && (pset = single_set (insn))
9480 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9481 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9483 set++;
9484 live = 1;
9487 if (insn == end)
9488 break;
9489 insn = NEXT_INSN (insn);
9491 return set - death;
9494 /* Calculate regmode weights for all insns of all basic block. */
9495 static void
9496 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9497 int verbose ATTRIBUTE_UNUSED,
9498 int old_max_uid)
9500 basic_block b;
9502 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9503 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9504 r0_life_regions = 0;
9506 FOR_EACH_BB_REVERSE (b)
9508 find_regmode_weight (b, SImode);
9509 find_regmode_weight (b, SFmode);
9510 if (!reload_completed)
9511 r0_life_regions += find_r0_life_regions (b);
9514 CURR_REGMODE_PRESSURE (SImode) = 0;
9515 CURR_REGMODE_PRESSURE (SFmode) = 0;
9519 /* Cleanup. */
9520 static void
9521 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9522 int verbose ATTRIBUTE_UNUSED)
9524 if (regmode_weight[0])
9526 free (regmode_weight[0]);
9527 regmode_weight[0] = NULL;
9529 if (regmode_weight[1])
9531 free (regmode_weight[1]);
9532 regmode_weight[1] = NULL;
9536 /* The scalar modes supported differs from the default version in TImode
9537 for 32-bit SHMEDIA. */
9538 static bool
9539 sh_scalar_mode_supported_p (enum machine_mode mode)
9541 if (TARGET_SHMEDIA32 && mode == TImode)
9542 return false;
9544 return default_scalar_mode_supported_p (mode);
9547 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9548 keep count of register pressures on SImode and SFmode. */
9549 static int
9550 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9551 int sched_verbose ATTRIBUTE_UNUSED,
9552 rtx insn,
9553 int can_issue_more)
9555 if (GET_CODE (PATTERN (insn)) != USE
9556 && GET_CODE (PATTERN (insn)) != CLOBBER)
9557 cached_can_issue_more = can_issue_more - 1;
9558 else
9559 cached_can_issue_more = can_issue_more;
9561 if (reload_completed)
9562 return cached_can_issue_more;
9564 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9565 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9567 return cached_can_issue_more;
9570 static void
9571 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9572 int verbose ATTRIBUTE_UNUSED,
9573 int veclen ATTRIBUTE_UNUSED)
9575 CURR_REGMODE_PRESSURE (SImode) = 0;
9576 CURR_REGMODE_PRESSURE (SFmode) = 0;
9579 /* Some magic numbers. */
9580 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9581 functions that already have high pressure on r0. */
9582 #define R0_MAX_LIFE_REGIONS 2
9583 /* Register Pressure thresholds for SImode and SFmode registers. */
9584 #define SIMODE_MAX_WEIGHT 5
9585 #define SFMODE_MAX_WEIGHT 10
9587 /* Return true if the pressure is high for MODE. */
9588 static short
9589 high_pressure (enum machine_mode mode)
9591 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9592 functions that already have high pressure on r0. */
9593 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9594 return 1;
9596 if (mode == SFmode)
9597 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9598 else
9599 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9602 /* Reorder ready queue if register pressure is high. */
9603 static int
9604 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9605 int sched_verbose ATTRIBUTE_UNUSED,
9606 rtx *ready,
9607 int *n_readyp,
9608 int clock_var ATTRIBUTE_UNUSED)
9610 if (reload_completed)
9611 return sh_issue_rate ();
9613 if (high_pressure (SFmode) || high_pressure (SImode))
9615 ready_reorder (ready, *n_readyp);
9618 return sh_issue_rate ();
9621 /* Skip cycles if the current register pressure is high. */
9622 static int
9623 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9624 int sched_verbose ATTRIBUTE_UNUSED,
9625 rtx *ready ATTRIBUTE_UNUSED,
9626 int *n_readyp ATTRIBUTE_UNUSED,
9627 int clock_var ATTRIBUTE_UNUSED)
9629 if (reload_completed)
9630 return cached_can_issue_more;
9632 if (high_pressure(SFmode) || high_pressure (SImode))
9633 skip_cycles = 1;
9635 return cached_can_issue_more;
9638 /* Skip cycles without sorting the ready queue. This will move insn from
9639 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9640 queue by sh_reorder. */
9642 /* Generally, skipping these many cycles are sufficient for all insns to move
9643 from Q -> R. */
9644 #define MAX_SKIPS 8
9646 static int
9647 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9648 int sched_verbose ATTRIBUTE_UNUSED,
9649 rtx insn ATTRIBUTE_UNUSED,
9650 int last_clock_var,
9651 int clock_var,
9652 int *sort_p)
9654 if (reload_completed)
9655 return 0;
9657 if (skip_cycles)
9659 if ((clock_var - last_clock_var) < MAX_SKIPS)
9661 *sort_p = 0;
9662 return 1;
9664 /* If this is the last cycle we are skipping, allow reordering of R. */
9665 if ((clock_var - last_clock_var) == MAX_SKIPS)
9667 *sort_p = 1;
9668 return 1;
9672 skip_cycles = 0;
9674 return 0;
9677 /* SHmedia requires registers for branches, so we can't generate new
9678 branches past reload. */
9679 static bool
9680 sh_cannot_modify_jumps_p (void)
9682 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9685 static enum reg_class
9686 sh_target_reg_class (void)
9688 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9691 static bool
9692 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9694 HARD_REG_SET dummy;
9695 #if 0
9696 rtx insn;
9697 #endif
9699 if (! shmedia_space_reserved_for_target_registers)
9700 return 0;
9701 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9702 return 0;
9703 if (calc_live_regs (&dummy) >= 6 * 8)
9704 return 1;
9705 return 0;
9708 static bool
9709 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9711 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9715 On the SH1..SH4, the trampoline looks like
9716 2 0002 D202 mov.l l2,r2
9717 1 0000 D301 mov.l l1,r3
9718 3 0004 422B jmp @r2
9719 4 0006 0009 nop
9720 5 0008 00000000 l1: .long area
9721 6 000c 00000000 l2: .long function
9723 SH5 (compact) uses r1 instead of r3 for the static chain. */
9726 /* Emit RTL insns to initialize the variable parts of a trampoline.
9727 FNADDR is an RTX for the address of the function's pure code.
9728 CXT is an RTX for the static chain value for the function. */
9730 void
9731 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9733 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9735 if (TARGET_SHMEDIA64)
9737 rtx tramp_templ;
9738 int fixed_len;
9740 rtx movi1 = GEN_INT (0xcc000010);
9741 rtx shori1 = GEN_INT (0xc8000010);
9742 rtx src, dst;
9744 /* The following trampoline works within a +- 128 KB range for cxt:
9745 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9746 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9747 gettr tr1,r1; blink tr0,r63 */
9748 /* Address rounding makes it hard to compute the exact bounds of the
9749 offset for this trampoline, but we have a rather generous offset
9750 range, so frame_offset should do fine as an upper bound. */
9751 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9753 /* ??? could optimize this trampoline initialization
9754 by writing DImode words with two insns each. */
9755 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9756 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9757 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9758 insn = gen_rtx_AND (DImode, insn, mask);
9759 /* Or in ptb/u .,tr1 pattern */
9760 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9761 insn = force_operand (insn, NULL_RTX);
9762 insn = gen_lowpart (SImode, insn);
9763 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9764 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9765 insn = gen_rtx_AND (DImode, insn, mask);
9766 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9767 insn = gen_lowpart (SImode, insn);
9768 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9769 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9770 insn = gen_rtx_AND (DImode, insn, mask);
9771 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9772 insn = gen_lowpart (SImode, insn);
9773 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9774 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9775 insn = gen_rtx_AND (DImode, insn, mask);
9776 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9777 insn = gen_lowpart (SImode, insn);
9778 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9779 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9780 insn = gen_rtx_AND (DImode, insn, mask);
9781 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9782 insn = gen_lowpart (SImode, insn);
9783 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9784 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9785 GEN_INT (0x6bf10600));
9786 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9787 GEN_INT (0x4415fc10));
9788 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9789 GEN_INT (0x4401fff0));
9790 emit_insn (gen_ic_invalidate_line (tramp));
9791 return;
9793 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9794 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9796 tramp_templ = gen_datalabel_ref (tramp_templ);
9797 dst = tramp_mem;
9798 src = gen_const_mem (BLKmode, tramp_templ);
9799 set_mem_align (dst, 256);
9800 set_mem_align (src, 64);
9801 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9803 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9804 emit_move_insn (adjust_address (tramp_mem, Pmode,
9805 fixed_len + GET_MODE_SIZE (Pmode)),
9806 cxt);
9807 emit_insn (gen_ic_invalidate_line (tramp));
9808 return;
9810 else if (TARGET_SHMEDIA)
9812 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9813 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9814 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9815 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9816 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9817 rotated 10 right, and higher 16 bit of every 32 selected. */
9818 rtx movishori
9819 = force_reg (V2HImode, (simplify_gen_subreg
9820 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9821 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9822 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9824 tramp = force_reg (Pmode, tramp);
9825 fnaddr = force_reg (SImode, fnaddr);
9826 cxt = force_reg (SImode, cxt);
9827 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9828 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9829 movishori));
9830 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9831 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9832 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9833 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9834 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9835 gen_rtx_SUBREG (V2HImode, cxt, 0),
9836 movishori));
9837 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9838 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9839 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9840 if (TARGET_LITTLE_ENDIAN)
9842 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9843 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9845 else
9847 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9848 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9850 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9851 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9852 emit_insn (gen_ic_invalidate_line (tramp));
9853 return;
9855 else if (TARGET_SHCOMPACT)
9857 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9858 return;
9860 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9861 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9862 SImode));
9863 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9864 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9865 SImode));
9866 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9867 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9868 if (TARGET_HARVARD)
9870 if (!TARGET_INLINE_IC_INVALIDATE
9871 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9872 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9873 FUNCTION_ORDINARY),
9874 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
9875 else
9876 emit_insn (gen_ic_invalidate_line (tramp));
9880 /* FIXME: This is overly conservative. A SHcompact function that
9881 receives arguments ``by reference'' will have them stored in its
9882 own stack frame, so it must not pass pointers or references to
9883 these arguments to other functions by means of sibling calls. */
9884 /* If PIC, we cannot make sibling calls to global functions
9885 because the PLT requires r12 to be live. */
9886 static bool
9887 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9889 return (1
9890 && (! TARGET_SHCOMPACT
9891 || crtl->args.info.stack_regs == 0)
9892 && ! sh_cfun_interrupt_handler_p ()
9893 && (! flag_pic
9894 || (decl && ! TREE_PUBLIC (decl))
9895 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9898 /* Machine specific built-in functions. */
9900 struct builtin_description
9902 const enum insn_code icode;
9903 const char *const name;
9904 int signature;
9907 /* describe number and signedness of arguments; arg[0] == result
9908 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9909 /* 9: 64-bit pointer, 10: 32-bit pointer */
9910 static const char signature_args[][4] =
9912 #define SH_BLTIN_V2SI2 0
9913 { 4, 4 },
9914 #define SH_BLTIN_V4HI2 1
9915 { 4, 4 },
9916 #define SH_BLTIN_V2SI3 2
9917 { 4, 4, 4 },
9918 #define SH_BLTIN_V4HI3 3
9919 { 4, 4, 4 },
9920 #define SH_BLTIN_V8QI3 4
9921 { 4, 4, 4 },
9922 #define SH_BLTIN_MAC_HISI 5
9923 { 1, 4, 4, 1 },
9924 #define SH_BLTIN_SH_HI 6
9925 { 4, 4, 1 },
9926 #define SH_BLTIN_SH_SI 7
9927 { 4, 4, 1 },
9928 #define SH_BLTIN_V4HI2V2SI 8
9929 { 4, 4, 4 },
9930 #define SH_BLTIN_V4HI2V8QI 9
9931 { 4, 4, 4 },
9932 #define SH_BLTIN_SISF 10
9933 { 4, 2 },
9934 #define SH_BLTIN_LDUA_L 11
9935 { 2, 10 },
9936 #define SH_BLTIN_LDUA_Q 12
9937 { 1, 10 },
9938 #define SH_BLTIN_STUA_L 13
9939 { 0, 10, 2 },
9940 #define SH_BLTIN_STUA_Q 14
9941 { 0, 10, 1 },
9942 #define SH_BLTIN_LDUA_L64 15
9943 { 2, 9 },
9944 #define SH_BLTIN_LDUA_Q64 16
9945 { 1, 9 },
9946 #define SH_BLTIN_STUA_L64 17
9947 { 0, 9, 2 },
9948 #define SH_BLTIN_STUA_Q64 18
9949 { 0, 9, 1 },
9950 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9951 #define SH_BLTIN_2 19
9952 #define SH_BLTIN_SU 19
9953 { 1, 2 },
9954 #define SH_BLTIN_3 20
9955 #define SH_BLTIN_SUS 20
9956 { 2, 2, 1 },
9957 #define SH_BLTIN_PSSV 21
9958 { 0, 8, 2, 2 },
9959 #define SH_BLTIN_XXUU 22
9960 #define SH_BLTIN_UUUU 22
9961 { 1, 1, 1, 1 },
9962 #define SH_BLTIN_PV 23
9963 { 0, 8 },
9965 /* mcmv: operands considered unsigned. */
9966 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9967 /* mperm: control value considered unsigned int. */
9968 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9969 /* mshards_q: returns signed short. */
9970 /* nsb: takes long long arg, returns unsigned char. */
9971 static const struct builtin_description bdesc[] =
9973 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9974 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9975 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9976 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9977 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9978 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9979 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9980 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9981 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9982 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9983 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9984 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9985 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9986 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9987 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9988 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9989 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9990 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9991 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9992 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9993 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9994 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9995 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9996 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9997 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9998 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9999 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
10000 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
10001 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
10002 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
10003 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
10004 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
10005 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
10006 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
10007 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
10008 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
10009 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
10010 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
10011 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
10012 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
10013 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
10014 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
10015 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
10016 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
10017 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
10018 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
10019 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
10020 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
10021 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
10022 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
10023 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
10024 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
10025 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
10026 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
10027 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
10028 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
10029 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
10030 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
10031 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
10032 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
10033 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
10034 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
10035 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
10036 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
10037 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
10038 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
10039 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
10040 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
10041 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
10042 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
10043 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
10044 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
10045 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
10046 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
10047 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
10048 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
10049 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
10050 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
10051 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
10052 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
10053 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
10054 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
10055 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
10056 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
10059 static void
10060 sh_media_init_builtins (void)
10062 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10063 const struct builtin_description *d;
10065 memset (shared, 0, sizeof shared);
10066 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10068 tree type, arg_type = 0;
10069 int signature = d->signature;
10070 int i;
10072 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10073 type = shared[signature];
10074 else
10076 int has_result = signature_args[signature][0] != 0;
10078 if ((signature_args[signature][1] & 8)
10079 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10080 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10081 continue;
10082 if (! TARGET_FPU_ANY
10083 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10084 continue;
10085 type = void_list_node;
10086 for (i = 3; ; i--)
10088 int arg = signature_args[signature][i];
10089 int opno = i - 1 + has_result;
10091 if (arg & 8)
10092 arg_type = ptr_type_node;
10093 else if (arg)
10094 arg_type = (*lang_hooks.types.type_for_mode)
10095 (insn_data[d->icode].operand[opno].mode,
10096 (arg & 1));
10097 else if (i)
10098 continue;
10099 else
10100 arg_type = void_type_node;
10101 if (i == 0)
10102 break;
10103 type = tree_cons (NULL_TREE, arg_type, type);
10105 type = build_function_type (arg_type, type);
10106 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10107 shared[signature] = type;
10109 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10110 NULL, NULL_TREE);
10114 /* Implements target hook vector_mode_supported_p. */
10115 bool
10116 sh_vector_mode_supported_p (enum machine_mode mode)
10118 if (TARGET_FPU_ANY
10119 && ((mode == V2SFmode)
10120 || (mode == V4SFmode)
10121 || (mode == V16SFmode)))
10122 return true;
10124 else if (TARGET_SHMEDIA
10125 && ((mode == V8QImode)
10126 || (mode == V2HImode)
10127 || (mode == V4HImode)
10128 || (mode == V2SImode)))
10129 return true;
10131 return false;
10134 /* Implements target hook dwarf_calling_convention. Return an enum
10135 of dwarf_calling_convention. */
10137 sh_dwarf_calling_convention (const_tree func)
10139 if (sh_attr_renesas_p (func))
10140 return DW_CC_GNU_renesas_sh;
10142 return DW_CC_normal;
10145 static void
10146 sh_init_builtins (void)
10148 if (TARGET_SHMEDIA)
10149 sh_media_init_builtins ();
10152 /* Expand an expression EXP that calls a built-in function,
10153 with result going to TARGET if that's convenient
10154 (and in mode MODE if that's convenient).
10155 SUBTARGET may be used as the target for computing one of EXP's operands.
10156 IGNORE is nonzero if the value is to be ignored. */
10158 static rtx
10159 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10160 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10162 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10163 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10164 const struct builtin_description *d = &bdesc[fcode];
10165 enum insn_code icode = d->icode;
10166 int signature = d->signature;
10167 enum machine_mode tmode = VOIDmode;
10168 int nop = 0, i;
10169 rtx op[4];
10170 rtx pat = 0;
10172 if (signature_args[signature][0])
10174 if (ignore)
10175 return 0;
10177 tmode = insn_data[icode].operand[0].mode;
10178 if (! target
10179 || GET_MODE (target) != tmode
10180 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10181 target = gen_reg_rtx (tmode);
10182 op[nop++] = target;
10184 else
10185 target = 0;
10187 for (i = 1; i <= 3; i++, nop++)
10189 tree arg;
10190 enum machine_mode opmode, argmode;
10191 tree optype;
10193 if (! signature_args[signature][i])
10194 break;
10195 arg = CALL_EXPR_ARG (exp, i - 1);
10196 if (arg == error_mark_node)
10197 return const0_rtx;
10198 if (signature_args[signature][i] & 8)
10200 opmode = ptr_mode;
10201 optype = ptr_type_node;
10203 else
10205 opmode = insn_data[icode].operand[nop].mode;
10206 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10208 argmode = TYPE_MODE (TREE_TYPE (arg));
10209 if (argmode != opmode)
10210 arg = build1 (NOP_EXPR, optype, arg);
10211 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10212 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10213 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10216 switch (nop)
10218 case 1:
10219 pat = (*insn_data[d->icode].genfun) (op[0]);
10220 break;
10221 case 2:
10222 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10223 break;
10224 case 3:
10225 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10226 break;
10227 case 4:
10228 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10229 break;
10230 default:
10231 gcc_unreachable ();
10233 if (! pat)
10234 return 0;
10235 emit_insn (pat);
10236 return target;
10239 void
10240 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10242 rtx sel0 = const0_rtx;
10243 rtx sel1 = const1_rtx;
10244 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10245 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10247 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10248 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10251 void
10252 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10254 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10256 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10257 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10260 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10261 We can allow any mode in any general register. The special registers
10262 only allow SImode. Don't allow any mode in the PR.
10264 We cannot hold DCmode values in the XD registers because alter_reg
10265 handles subregs of them incorrectly. We could work around this by
10266 spacing the XD registers like the DR registers, but this would require
10267 additional memory in every compilation to hold larger register vectors.
10268 We could hold SFmode / SCmode values in XD registers, but that
10269 would require a tertiary reload when reloading from / to memory,
10270 and a secondary reload to reload from / to general regs; that
10271 seems to be a loosing proposition.
10273 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10274 it won't be ferried through GP registers first. */
10276 bool
10277 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
10279 if (SPECIAL_REGISTER_P (regno))
10280 return mode == SImode;
10282 if (regno == FPUL_REG)
10283 return (mode == SImode || mode == SFmode);
10285 if (FP_REGISTER_P (regno) && mode == SFmode)
10286 return true;
10288 if (mode == V2SFmode)
10290 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10291 || GENERAL_REGISTER_P (regno)))
10292 return true;
10293 else
10294 return false;
10297 if (mode == V4SFmode)
10299 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10300 || GENERAL_REGISTER_P (regno))
10301 return true;
10302 else
10303 return false;
10306 if (mode == V16SFmode)
10308 if (TARGET_SHMEDIA)
10310 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
10311 return true;
10312 else
10313 return false;
10315 else
10316 return regno == FIRST_XD_REG;
10319 if (FP_REGISTER_P (regno))
10321 if (mode == SFmode
10322 || mode == SImode
10323 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
10324 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10325 || mode == DCmode
10326 || (TARGET_SHMEDIA
10327 && (mode == DFmode || mode == DImode
10328 || mode == V2SFmode || mode == TImode)))
10329 && ((regno - FIRST_FP_REG) & 1) == 0)
10330 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
10331 && ((regno - FIRST_FP_REG) & 3) == 0))
10332 return true;
10333 else
10334 return false;
10337 if (XD_REGISTER_P (regno))
10338 return mode == DFmode;
10340 if (TARGET_REGISTER_P (regno))
10341 return (mode == DImode || mode == SImode || mode == PDImode);
10343 if (regno == PR_REG)
10344 return mode == SImode;
10346 if (regno == FPSCR_REG)
10347 return mode == PSImode;
10349 /* FIXME. This works around PR target/37633 for -O0. */
10350 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
10352 unsigned int n = GET_MODE_SIZE (mode) / 8;
10354 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
10355 && regno <= FIRST_GENERAL_REG + 14)
10356 return false;
10359 return true;
10362 /* Return the class of registers for which a mode change from FROM to TO
10363 is invalid. */
10364 bool
10365 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10366 enum reg_class rclass)
10368 /* We want to enable the use of SUBREGs as a means to
10369 VEC_SELECT a single element of a vector. */
10370 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10371 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
10373 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10375 if (TARGET_LITTLE_ENDIAN)
10377 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10378 return reg_classes_intersect_p (DF_REGS, rclass);
10380 else
10382 if (GET_MODE_SIZE (from) < 8)
10383 return reg_classes_intersect_p (DF_HI_REGS, rclass);
10386 return 0;
10390 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10391 that label is used. */
10393 void
10394 sh_mark_label (rtx address, int nuses)
10396 if (GOTOFF_P (address))
10398 /* Extract the label or symbol. */
10399 address = XEXP (address, 0);
10400 if (GET_CODE (address) == PLUS)
10401 address = XEXP (address, 0);
10402 address = XVECEXP (address, 0, 0);
10404 if (GET_CODE (address) == LABEL_REF
10405 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10406 LABEL_NUSES (XEXP (address, 0)) += nuses;
10409 /* Compute extra cost of moving data between one register class
10410 and another. */
10412 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10413 uses this information. Hence, the general register <-> floating point
10414 register information here is not used for SFmode. */
10417 sh_register_move_cost (enum machine_mode mode,
10418 enum reg_class srcclass, enum reg_class dstclass)
10420 if (dstclass == T_REGS || dstclass == PR_REGS)
10421 return 10;
10423 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10424 return 4;
10426 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10427 && REGCLASS_HAS_FP_REG (srcclass)
10428 && REGCLASS_HAS_FP_REG (dstclass))
10429 return 4;
10431 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10432 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10434 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10435 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10436 return 9;
10438 if ((REGCLASS_HAS_FP_REG (dstclass)
10439 && REGCLASS_HAS_GENERAL_REG (srcclass))
10440 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10441 && REGCLASS_HAS_FP_REG (srcclass)))
10442 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10443 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10445 if ((dstclass == FPUL_REGS
10446 && REGCLASS_HAS_GENERAL_REG (srcclass))
10447 || (srcclass == FPUL_REGS
10448 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10449 return 5;
10451 if ((dstclass == FPUL_REGS
10452 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10453 || (srcclass == FPUL_REGS
10454 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10455 return 7;
10457 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10458 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10459 return 20;
10461 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10462 if (TARGET_SHMEDIA
10463 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10465 if (sh_gettrcost >= 0)
10466 return sh_gettrcost;
10467 else if (!TARGET_PT_FIXED)
10468 return 100;
10471 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10472 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10473 return 4;
10475 if (TARGET_SHMEDIA
10476 || (TARGET_FMOVD
10477 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10478 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10479 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10481 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10484 static rtx emit_load_ptr (rtx, rtx);
10486 static rtx
10487 emit_load_ptr (rtx reg, rtx addr)
10489 rtx mem = gen_const_mem (ptr_mode, addr);
10491 if (Pmode != ptr_mode)
10492 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10493 return emit_move_insn (reg, mem);
10496 static void
10497 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10498 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10499 tree function)
10501 CUMULATIVE_ARGS cum;
10502 int structure_value_byref = 0;
10503 rtx this_rtx, this_value, sibcall, insns, funexp;
10504 tree funtype = TREE_TYPE (function);
10505 int simple_add = CONST_OK_FOR_ADD (delta);
10506 int did_load = 0;
10507 rtx scratch0, scratch1, scratch2;
10508 unsigned i;
10510 reload_completed = 1;
10511 epilogue_completed = 1;
10512 current_function_uses_only_leaf_regs = 1;
10514 emit_note (NOTE_INSN_PROLOGUE_END);
10516 /* Find the "this" pointer. We have such a wide range of ABIs for the
10517 SH that it's best to do this completely machine independently.
10518 "this" is passed as first argument, unless a structure return pointer
10519 comes first, in which case "this" comes second. */
10520 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10521 #ifndef PCC_STATIC_STRUCT_RETURN
10522 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10523 structure_value_byref = 1;
10524 #endif /* not PCC_STATIC_STRUCT_RETURN */
10525 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10527 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10529 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10531 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10533 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10534 static chain pointer (even if you can't have nested virtual functions
10535 right now, someone might implement them sometime), and the rest of the
10536 registers are used for argument passing, are callee-saved, or reserved. */
10537 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10538 -ffixed-reg has been used. */
10539 if (! call_used_regs[0] || fixed_regs[0])
10540 error ("r0 needs to be available as a call-clobbered register");
10541 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10542 if (! TARGET_SH5)
10544 if (call_used_regs[1] && ! fixed_regs[1])
10545 scratch1 = gen_rtx_REG (ptr_mode, 1);
10546 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10547 pointing where to return struct values. */
10548 if (call_used_regs[3] && ! fixed_regs[3])
10549 scratch2 = gen_rtx_REG (Pmode, 3);
10551 else if (TARGET_SHMEDIA)
10553 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10554 if (i != REGNO (scratch0) &&
10555 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10557 scratch1 = gen_rtx_REG (ptr_mode, i);
10558 break;
10560 if (scratch1 == scratch0)
10561 error ("Need a second call-clobbered general purpose register");
10562 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10563 if (call_used_regs[i] && ! fixed_regs[i])
10565 scratch2 = gen_rtx_REG (Pmode, i);
10566 break;
10568 if (scratch2 == scratch0)
10569 error ("Need a call-clobbered target register");
10572 this_value = plus_constant (this_rtx, delta);
10573 if (vcall_offset
10574 && (simple_add || scratch0 != scratch1)
10575 && strict_memory_address_p (ptr_mode, this_value))
10577 emit_load_ptr (scratch0, this_value);
10578 did_load = 1;
10581 if (!delta)
10582 ; /* Do nothing. */
10583 else if (simple_add)
10584 emit_move_insn (this_rtx, this_value);
10585 else
10587 emit_move_insn (scratch1, GEN_INT (delta));
10588 emit_insn (gen_add2_insn (this_rtx, scratch1));
10591 if (vcall_offset)
10593 rtx offset_addr;
10595 if (!did_load)
10596 emit_load_ptr (scratch0, this_rtx);
10598 offset_addr = plus_constant (scratch0, vcall_offset);
10599 if (strict_memory_address_p (ptr_mode, offset_addr))
10600 ; /* Do nothing. */
10601 else if (! TARGET_SH5 && scratch0 != scratch1)
10603 /* scratch0 != scratch1, and we have indexed loads. Get better
10604 schedule by loading the offset into r1 and using an indexed
10605 load - then the load of r1 can issue before the load from
10606 (this_rtx + delta) finishes. */
10607 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10608 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10610 else if (CONST_OK_FOR_ADD (vcall_offset))
10612 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10613 offset_addr = scratch0;
10615 else if (scratch0 != scratch1)
10617 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10618 emit_insn (gen_add2_insn (scratch0, scratch1));
10619 offset_addr = scratch0;
10621 else
10622 gcc_unreachable (); /* FIXME */
10623 emit_load_ptr (scratch0, offset_addr);
10625 if (Pmode != ptr_mode)
10626 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10627 emit_insn (gen_add2_insn (this_rtx, scratch0));
10630 /* Generate a tail call to the target function. */
10631 if (! TREE_USED (function))
10633 assemble_external (function);
10634 TREE_USED (function) = 1;
10636 funexp = XEXP (DECL_RTL (function), 0);
10637 /* If the function is overridden, so is the thunk, hence we don't
10638 need GOT addressing even if this is a public symbol. */
10639 #if 0
10640 if (TARGET_SH1 && ! flag_weak)
10641 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10642 else
10643 #endif
10644 if (TARGET_SH2 && flag_pic)
10646 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10647 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10649 else
10651 if (TARGET_SHMEDIA && flag_pic)
10653 funexp = gen_sym2PIC (funexp);
10654 PUT_MODE (funexp, Pmode);
10656 emit_move_insn (scratch2, funexp);
10657 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10658 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10660 sibcall = emit_call_insn (sibcall);
10661 SIBLING_CALL_P (sibcall) = 1;
10662 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10663 emit_barrier ();
10665 /* Run just enough of rest_of_compilation to do scheduling and get
10666 the insns emitted. Note that use_thunk calls
10667 assemble_start_function and assemble_end_function. */
10669 insn_locators_alloc ();
10670 insns = get_insns ();
10672 if (optimize > 0)
10674 if (! cfun->cfg)
10675 init_flow (cfun);
10676 split_all_insns_noflow ();
10679 sh_reorg ();
10681 if (optimize > 0 && flag_delayed_branch)
10682 dbr_schedule (insns);
10684 shorten_branches (insns);
10685 final_start_function (insns, file, 1);
10686 final (insns, file, 1);
10687 final_end_function ();
10688 free_after_compilation (cfun);
10690 reload_completed = 0;
10691 epilogue_completed = 0;
10695 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10697 rtx sym;
10699 /* If this is not an ordinary function, the name usually comes from a
10700 string literal or an sprintf buffer. Make sure we use the same
10701 string consistently, so that cse will be able to unify address loads. */
10702 if (kind != FUNCTION_ORDINARY)
10703 name = IDENTIFIER_POINTER (get_identifier (name));
10704 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10705 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10706 if (flag_pic)
10707 switch (kind)
10709 case FUNCTION_ORDINARY:
10710 break;
10711 case SFUNC_GOT:
10713 rtx reg = target ? target : gen_reg_rtx (Pmode);
10715 emit_insn (gen_symGOT2reg (reg, sym));
10716 sym = reg;
10717 break;
10719 case SFUNC_STATIC:
10721 /* ??? To allow cse to work, we use GOTOFF relocations.
10722 we could add combiner patterns to transform this into
10723 straight pc-relative calls with sym2PIC / bsrf when
10724 label load and function call are still 1:1 and in the
10725 same basic block during combine. */
10726 rtx reg = target ? target : gen_reg_rtx (Pmode);
10728 emit_insn (gen_symGOTOFF2reg (reg, sym));
10729 sym = reg;
10730 break;
10733 if (target && sym != target)
10735 emit_move_insn (target, sym);
10736 return target;
10738 return sym;
10741 /* Find the number of a general purpose register in S. */
10742 static int
10743 scavenge_reg (HARD_REG_SET *s)
10745 int r;
10746 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10747 if (TEST_HARD_REG_BIT (*s, r))
10748 return r;
10749 return -1;
10753 sh_get_pr_initial_val (void)
10755 rtx val;
10757 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10758 PR register on SHcompact, because it might be clobbered by the prologue.
10759 We check first if that is known to be the case. */
10760 if (TARGET_SHCOMPACT
10761 && ((crtl->args.info.call_cookie
10762 & ~ CALL_COOKIE_RET_TRAMP (1))
10763 || crtl->saves_all_registers))
10764 return gen_frame_mem (SImode, return_address_pointer_rtx);
10766 /* If we haven't finished rtl generation, there might be a nonlocal label
10767 that we haven't seen yet.
10768 ??? get_hard_reg_initial_val fails if it is called after register
10769 allocation has started, unless it has been called before for the
10770 same register. And even then, we end in trouble if we didn't use
10771 the register in the same basic block before. So call
10772 get_hard_reg_initial_val now and wrap it in an unspec if we might
10773 need to replace it. */
10774 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10775 combine can put the pseudo returned by get_hard_reg_initial_val into
10776 instructions that need a general purpose registers, which will fail to
10777 be recognized when the pseudo becomes allocated to PR. */
10779 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10780 if (TARGET_SH1)
10781 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10782 return val;
10786 sh_expand_t_scc (enum rtx_code code, rtx target)
10788 rtx result = target;
10789 HOST_WIDE_INT val;
10791 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10792 || GET_CODE (sh_compare_op1) != CONST_INT)
10793 return 0;
10794 if (GET_CODE (result) != REG)
10795 result = gen_reg_rtx (SImode);
10796 val = INTVAL (sh_compare_op1);
10797 if ((code == EQ && val == 1) || (code == NE && val == 0))
10798 emit_insn (gen_movt (result));
10799 else if (TARGET_SH2A && ((code == EQ && val == 0)
10800 || (code == NE && val == 1)))
10801 emit_insn (gen_movrt (result));
10802 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10804 emit_clobber (result);
10805 emit_insn (gen_subc (result, result, result));
10806 emit_insn (gen_addsi3 (result, result, const1_rtx));
10808 else if (code == EQ || code == NE)
10809 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10810 else
10811 return 0;
10812 if (result != target)
10813 emit_move_insn (target, result);
10814 return 1;
10817 /* INSN is an sfunc; return the rtx that describes the address used. */
10818 static rtx
10819 extract_sfunc_addr (rtx insn)
10821 rtx pattern, part = NULL_RTX;
10822 int len, i;
10824 pattern = PATTERN (insn);
10825 len = XVECLEN (pattern, 0);
10826 for (i = 0; i < len; i++)
10828 part = XVECEXP (pattern, 0, i);
10829 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10830 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10831 return XEXP (part, 0);
10833 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10834 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10837 /* Verify that the register in use_sfunc_addr still agrees with the address
10838 used in the sfunc. This prevents fill_slots_from_thread from changing
10839 use_sfunc_addr.
10840 INSN is the use_sfunc_addr instruction, and REG is the register it
10841 guards. */
10843 check_use_sfunc_addr (rtx insn, rtx reg)
10845 /* Search for the sfunc. It should really come right after INSN. */
10846 while ((insn = NEXT_INSN (insn)))
10848 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10849 break;
10850 if (! INSN_P (insn))
10851 continue;
10853 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10854 insn = XVECEXP (PATTERN (insn), 0, 0);
10855 if (GET_CODE (PATTERN (insn)) != PARALLEL
10856 || get_attr_type (insn) != TYPE_SFUNC)
10857 continue;
10858 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10860 gcc_unreachable ();
10863 /* This function returns a constant rtx that represents pi / 2**15 in
10864 SFmode. it's used to scale SFmode angles, in radians, to a
10865 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10866 maps to 0x10000). */
10868 static GTY(()) rtx sh_fsca_sf2int_rtx;
10871 sh_fsca_sf2int (void)
10873 if (! sh_fsca_sf2int_rtx)
10875 REAL_VALUE_TYPE rv;
10877 real_from_string (&rv, "10430.378350470453");
10878 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10881 return sh_fsca_sf2int_rtx;
10884 /* This function returns a constant rtx that represents pi / 2**15 in
10885 DFmode. it's used to scale DFmode angles, in radians, to a
10886 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10887 maps to 0x10000). */
10889 static GTY(()) rtx sh_fsca_df2int_rtx;
10892 sh_fsca_df2int (void)
10894 if (! sh_fsca_df2int_rtx)
10896 REAL_VALUE_TYPE rv;
10898 real_from_string (&rv, "10430.378350470453");
10899 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10902 return sh_fsca_df2int_rtx;
10905 /* This function returns a constant rtx that represents 2**15 / pi in
10906 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10907 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10908 2*pi). */
10910 static GTY(()) rtx sh_fsca_int2sf_rtx;
10913 sh_fsca_int2sf (void)
10915 if (! sh_fsca_int2sf_rtx)
10917 REAL_VALUE_TYPE rv;
10919 real_from_string (&rv, "9.587379924285257e-5");
10920 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10923 return sh_fsca_int2sf_rtx;
10926 /* Initialize the CUMULATIVE_ARGS structure. */
10928 void
10929 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10930 tree fntype,
10931 rtx libname ATTRIBUTE_UNUSED,
10932 tree fndecl,
10933 signed int n_named_args,
10934 enum machine_mode mode)
10936 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10937 pcum->free_single_fp_reg = 0;
10938 pcum->stack_regs = 0;
10939 pcum->byref_regs = 0;
10940 pcum->byref = 0;
10941 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10943 /* XXX - Should we check TARGET_HITACHI here ??? */
10944 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10946 if (fntype)
10948 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10949 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10950 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10951 pcum->arg_count [(int) SH_ARG_INT]
10952 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10954 pcum->call_cookie
10955 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10956 && pcum->arg_count [(int) SH_ARG_INT] == 0
10957 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10958 ? int_size_in_bytes (TREE_TYPE (fntype))
10959 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10960 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10961 == FIRST_RET_REG));
10963 else
10965 pcum->arg_count [(int) SH_ARG_INT] = 0;
10966 pcum->prototype_p = FALSE;
10967 if (mode != VOIDmode)
10969 pcum->call_cookie =
10970 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10971 && GET_MODE_SIZE (mode) > 4
10972 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10974 /* If the default ABI is the Renesas ABI then all library
10975 calls must assume that the library will be using the
10976 Renesas ABI. So if the function would return its result
10977 in memory then we must force the address of this memory
10978 block onto the stack. Ideally we would like to call
10979 targetm.calls.return_in_memory() here but we do not have
10980 the TYPE or the FNDECL available so we synthesize the
10981 contents of that function as best we can. */
10982 pcum->force_mem =
10983 (TARGET_DEFAULT & MASK_HITACHI)
10984 && (mode == BLKmode
10985 || (GET_MODE_SIZE (mode) > 4
10986 && !(mode == DFmode
10987 && TARGET_FPU_DOUBLE)));
10989 else
10991 pcum->call_cookie = 0;
10992 pcum->force_mem = FALSE;
10997 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10998 not enter into CONST_DOUBLE for the replace.
11000 Note that copying is not done so X must not be shared unless all copies
11001 are to be modified.
11003 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11004 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11005 replacements[n*2+1] - and that we take mode changes into account.
11007 If a replacement is ambiguous, return NULL_RTX.
11009 If MODIFY is zero, don't modify any rtl in place,
11010 just return zero or nonzero for failure / success. */
11013 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11015 int i, j;
11016 const char *fmt;
11018 /* The following prevents loops occurrence when we change MEM in
11019 CONST_DOUBLE onto the same CONST_DOUBLE. */
11020 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11021 return x;
11023 for (i = n_replacements - 1; i >= 0 ; i--)
11024 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11025 return replacements[i*2+1];
11027 /* Allow this function to make replacements in EXPR_LISTs. */
11028 if (x == 0)
11029 return 0;
11031 if (GET_CODE (x) == SUBREG)
11033 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11034 n_replacements, modify);
11036 if (GET_CODE (new_rtx) == CONST_INT)
11038 x = simplify_subreg (GET_MODE (x), new_rtx,
11039 GET_MODE (SUBREG_REG (x)),
11040 SUBREG_BYTE (x));
11041 if (! x)
11042 abort ();
11044 else if (modify)
11045 SUBREG_REG (x) = new_rtx;
11047 return x;
11049 else if (GET_CODE (x) == REG)
11051 unsigned regno = REGNO (x);
11052 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11053 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11054 rtx result = NULL_RTX;
11056 for (i = n_replacements - 1; i >= 0; i--)
11058 rtx from = replacements[i*2];
11059 rtx to = replacements[i*2+1];
11060 unsigned from_regno, from_nregs, to_regno, new_regno;
11062 if (GET_CODE (from) != REG)
11063 continue;
11064 from_regno = REGNO (from);
11065 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11066 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11067 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11069 if (regno < from_regno
11070 || regno + nregs > from_regno + nregs
11071 || GET_CODE (to) != REG
11072 || result)
11073 return NULL_RTX;
11074 to_regno = REGNO (to);
11075 if (to_regno < FIRST_PSEUDO_REGISTER)
11077 new_regno = regno + to_regno - from_regno;
11078 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11079 != nregs)
11080 return NULL_RTX;
11081 result = gen_rtx_REG (GET_MODE (x), new_regno);
11083 else if (GET_MODE (x) <= GET_MODE (to))
11084 result = gen_lowpart_common (GET_MODE (x), to);
11085 else
11086 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11089 return result ? result : x;
11091 else if (GET_CODE (x) == ZERO_EXTEND)
11093 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11094 n_replacements, modify);
11096 if (GET_CODE (new_rtx) == CONST_INT)
11098 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11099 new_rtx, GET_MODE (XEXP (x, 0)));
11100 if (! x)
11101 abort ();
11103 else if (modify)
11104 XEXP (x, 0) = new_rtx;
11106 return x;
11109 fmt = GET_RTX_FORMAT (GET_CODE (x));
11110 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11112 rtx new_rtx;
11114 if (fmt[i] == 'e')
11116 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11117 n_replacements, modify);
11118 if (!new_rtx)
11119 return NULL_RTX;
11120 if (modify)
11121 XEXP (x, i) = new_rtx;
11123 else if (fmt[i] == 'E')
11124 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11126 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11127 n_replacements, modify);
11128 if (!new_rtx)
11129 return NULL_RTX;
11130 if (modify)
11131 XVECEXP (x, i, j) = new_rtx;
11135 return x;
11139 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11141 enum rtx_code code = TRUNCATE;
11143 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11145 rtx inner = XEXP (x, 0);
11146 enum machine_mode inner_mode = GET_MODE (inner);
11148 if (inner_mode == mode)
11149 return inner;
11150 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11151 x = inner;
11152 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11153 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11155 code = GET_CODE (x);
11156 x = inner;
11159 return gen_rtx_fmt_e (code, mode, x);
11162 /* called via for_each_rtx after reload, to clean up truncates of
11163 registers that span multiple actual hard registers. */
11165 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11167 rtx x = *p, reg;
11169 if (GET_CODE (x) != TRUNCATE)
11170 return 0;
11171 reg = XEXP (x, 0);
11172 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11174 enum machine_mode reg_mode = GET_MODE (reg);
11175 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11176 subreg_lowpart_offset (DImode, reg_mode));
11177 *(int*) n_changes += 1;
11178 return -1;
11180 return 0;
11183 /* Load and store depend on the highpart of the address. However,
11184 set_attr_alternative does not give well-defined results before reload,
11185 so we must look at the rtl ourselves to see if any of the feeding
11186 registers is used in a memref. */
11188 /* Called by sh_contains_memref_p via for_each_rtx. */
11189 static int
11190 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11192 return (GET_CODE (*loc) == MEM);
11195 /* Return nonzero iff INSN contains a MEM. */
11197 sh_contains_memref_p (rtx insn)
11199 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11202 /* Return nonzero iff INSN loads a banked register. */
11204 sh_loads_bankedreg_p (rtx insn)
11206 if (GET_CODE (PATTERN (insn)) == SET)
11208 rtx op = SET_DEST (PATTERN(insn));
11209 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11210 return 1;
11213 return 0;
11216 /* FNADDR is the MEM expression from a call expander. Return an address
11217 to use in an SHmedia insn pattern. */
11219 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11221 int is_sym;
11223 fnaddr = XEXP (fnaddr, 0);
11224 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11225 if (flag_pic && is_sym)
11227 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11229 rtx reg = gen_reg_rtx (Pmode);
11231 /* We must not use GOTPLT for sibcalls, because PIC_REG
11232 must be restored before the PLT code gets to run. */
11233 if (is_sibcall)
11234 emit_insn (gen_symGOT2reg (reg, fnaddr));
11235 else
11236 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11237 fnaddr = reg;
11239 else
11241 fnaddr = gen_sym2PIC (fnaddr);
11242 PUT_MODE (fnaddr, Pmode);
11245 /* If ptabs might trap, make this visible to the rest of the compiler.
11246 We generally assume that symbols pertain to valid locations, but
11247 it is possible to generate invalid symbols with asm or linker tricks.
11248 In a list of functions where each returns its successor, an invalid
11249 symbol might denote an empty list. */
11250 if (!TARGET_PT_FIXED
11251 && (!is_sym || TARGET_INVALID_SYMBOLS)
11252 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11254 rtx tr = gen_reg_rtx (PDImode);
11256 emit_insn (gen_ptabs (tr, fnaddr));
11257 fnaddr = tr;
11259 else if (! target_reg_operand (fnaddr, Pmode))
11260 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11261 return fnaddr;
11264 enum reg_class
11265 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
11266 enum machine_mode mode, secondary_reload_info *sri)
11268 if (in_p)
11270 if (REGCLASS_HAS_FP_REG (rclass)
11271 && ! TARGET_SHMEDIA
11272 && immediate_operand ((x), mode)
11273 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11274 && mode == SFmode && fldi_ok ()))
11275 switch (mode)
11277 case SFmode:
11278 sri->icode = CODE_FOR_reload_insf__frn;
11279 return NO_REGS;
11280 case DFmode:
11281 sri->icode = CODE_FOR_reload_indf__frn;
11282 return NO_REGS;
11283 case SImode:
11284 /* ??? If we knew that we are in the appropriate mode -
11285 single precision - we could use a reload pattern directly. */
11286 return FPUL_REGS;
11287 default:
11288 abort ();
11290 if (rclass == FPUL_REGS
11291 && ((GET_CODE (x) == REG
11292 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11293 || REGNO (x) == T_REG))
11294 || GET_CODE (x) == PLUS))
11295 return GENERAL_REGS;
11296 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11298 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11299 return GENERAL_REGS;
11300 else if (mode == SFmode)
11301 return FP_REGS;
11302 sri->icode = CODE_FOR_reload_insi__i_fpul;
11303 return NO_REGS;
11305 if (rclass == FPSCR_REGS
11306 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11307 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
11308 return GENERAL_REGS;
11309 if (REGCLASS_HAS_FP_REG (rclass)
11310 && TARGET_SHMEDIA
11311 && immediate_operand (x, mode)
11312 && x != CONST0_RTX (GET_MODE (x))
11313 && GET_MODE (x) != V4SFmode)
11314 return GENERAL_REGS;
11315 if ((mode == QImode || mode == HImode)
11316 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11318 sri->icode = ((mode == QImode)
11319 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11320 return NO_REGS;
11322 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
11323 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
11324 return TARGET_REGS;
11325 } /* end of input-only processing. */
11327 if (((REGCLASS_HAS_FP_REG (rclass)
11328 && (GET_CODE (x) == REG
11329 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11330 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11331 && TARGET_FMOVD))))
11332 || (REGCLASS_HAS_GENERAL_REG (rclass)
11333 && GET_CODE (x) == REG
11334 && FP_REGISTER_P (REGNO (x))))
11335 && ! TARGET_SHMEDIA
11336 && (mode == SFmode || mode == SImode))
11337 return FPUL_REGS;
11338 if ((rclass == FPUL_REGS
11339 || (REGCLASS_HAS_FP_REG (rclass)
11340 && ! TARGET_SHMEDIA && mode == SImode))
11341 && (GET_CODE (x) == MEM
11342 || (GET_CODE (x) == REG
11343 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11344 || REGNO (x) == T_REG
11345 || system_reg_operand (x, VOIDmode)))))
11347 if (rclass == FPUL_REGS)
11348 return GENERAL_REGS;
11349 return FPUL_REGS;
11351 if ((rclass == TARGET_REGS
11352 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
11353 && !satisfies_constraint_Csy (x)
11354 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
11355 return GENERAL_REGS;
11356 if ((rclass == MAC_REGS || rclass == PR_REGS)
11357 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
11358 && rclass != REGNO_REG_CLASS (REGNO (x)))
11359 return GENERAL_REGS;
11360 if (rclass != GENERAL_REGS && GET_CODE (x) == REG
11361 && TARGET_REGISTER_P (REGNO (x)))
11362 return GENERAL_REGS;
11363 return NO_REGS;
11366 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11368 #include "gt-sh.h"