[SH] double precision floating point dwarf fix
[official-gcc.git] / gcc / config / sh / sh.c
blob603ddecb1881912ffb8c1b333e5594529c23bde3
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "insn-config.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "flags.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "c-pragma.h"
41 #include "integrate.h"
42 #include "dwarf2.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
73 they may be. */
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Saved operands from the last compare to use when we generate an scc
110 or bcc insn. */
112 rtx sh_compare_op0;
113 rtx sh_compare_op1;
115 /* Provides the class number of the smallest class containing
116 reg number. */
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
168 int assembler_dialect;
170 static bool shmedia_space_reserved_for_target_registers;
172 static bool sh_handle_option (size_t, const char *, int);
173 static void split_branches (rtx);
174 static int branch_dest (rtx);
175 static void force_into (rtx, rtx);
176 static void print_slot (rtx);
177 static rtx add_constant (rtx, enum machine_mode, rtx);
178 static void dump_table (rtx, rtx);
179 static int hi_const (rtx);
180 static int broken_move (rtx);
181 static int mova_p (rtx);
182 static rtx find_barrier (int, rtx, rtx);
183 static int noncall_uses_reg (rtx, rtx, rtx *);
184 static rtx gen_block_redirect (rtx, int, int);
185 static void sh_reorg (void);
186 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
187 static rtx frame_insn (rtx);
188 static rtx push (int);
189 static void pop (int);
190 static void push_regs (HARD_REG_SET *, int);
191 static int calc_live_regs (HARD_REG_SET *);
192 static HOST_WIDE_INT rounded_frame_size (int);
193 static rtx mark_constant_pool_use (rtx);
194 const struct attribute_spec sh_attribute_table[];
195 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
196 static tree sh_handle_resbank_handler_attribute (tree *, tree,
197 tree, int, bool *);
198 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
199 tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static int find_r0_life_regions (basic_block);
213 static void sh_md_init_global (FILE *, int, int);
214 static void sh_md_finish_global (FILE *, int);
215 static int rank_for_reorder (const void *, const void *);
216 static void swap_reorder (rtx *, int);
217 static void ready_reorder (rtx *, int);
218 static short high_pressure (enum machine_mode);
219 static int sh_reorder (FILE *, int, rtx *, int *, int);
220 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
221 static void sh_md_init (FILE *, int, int);
222 static int sh_variable_issue (FILE *, int, rtx, int);
224 static bool sh_function_ok_for_sibcall (tree, tree);
226 static bool sh_cannot_modify_jumps_p (void);
227 static int sh_target_reg_class (void);
228 static bool sh_optimize_target_register_callee_saved (bool);
229 static bool sh_ms_bitfield_layout_p (const_tree);
231 static void sh_init_builtins (void);
232 static void sh_media_init_builtins (void);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *, bool);
245 static int sh_address_cost (rtx, bool);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
249 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
250 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
251 static int scavenge_reg (HARD_REG_SET *s);
252 struct save_schedule_s;
253 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
254 struct save_schedule_s *, int);
256 static rtx sh_struct_value_rtx (tree, int);
257 static bool sh_return_in_memory (const_tree, const_tree);
258 static rtx sh_builtin_saveregs (void);
259 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
260 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
261 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
262 static tree sh_build_builtin_va_list (void);
263 static void sh_va_start (tree, rtx);
264 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
266 const_tree, bool);
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
268 const_tree, bool);
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
270 tree, bool);
271 static bool sh_scalar_mode_supported_p (enum machine_mode);
272 static int sh_dwarf_calling_convention (const_tree);
273 static void sh_encode_section_info (tree, rtx, int);
274 static int sh2a_function_vector_p (tree);
277 /* Initialize the GCC target structure. */
278 #undef TARGET_ATTRIBUTE_TABLE
279 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
281 /* The next two are used for debug info when compiling with -gdwarf. */
282 #undef TARGET_ASM_UNALIGNED_HI_OP
283 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
284 #undef TARGET_ASM_UNALIGNED_SI_OP
285 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
287 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
288 #undef TARGET_ASM_UNALIGNED_DI_OP
289 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
290 #undef TARGET_ASM_ALIGNED_DI_OP
291 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
293 #undef TARGET_ASM_FUNCTION_EPILOGUE
294 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
296 #undef TARGET_ASM_OUTPUT_MI_THUNK
297 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
302 #undef TARGET_ASM_FILE_START
303 #define TARGET_ASM_FILE_START sh_file_start
304 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
305 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
307 #undef TARGET_DEFAULT_TARGET_FLAGS
308 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
309 #undef TARGET_HANDLE_OPTION
310 #define TARGET_HANDLE_OPTION sh_handle_option
312 #undef TARGET_INSERT_ATTRIBUTES
313 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
318 #undef TARGET_SCHED_ISSUE_RATE
319 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
321 /* The next 5 hooks have been implemented for reenabling sched1. With the
322 help of these macros we are limiting the movement of insns in sched1 to
323 reduce the register pressure. The overall idea is to keep count of SImode
324 and SFmode regs required by already scheduled insns. When these counts
325 cross some threshold values; give priority to insns that free registers.
326 The insn that frees registers is most likely to be the insn with lowest
327 LUID (original insn order); but such an insn might be there in the stalled
328 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
329 upto a max of 8 cycles so that such insns may move from Q -> R.
331 The description of the hooks are as below:
333 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
334 scheduler; it is called inside the sched_init function just after
335 find_insn_reg_weights function call. It is used to calculate the SImode
336 and SFmode weights of insns of basic blocks; much similar to what
337 find_insn_reg_weights does.
338 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
340 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
341 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
342 (Q)->(R).
344 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
345 high; reorder the ready queue so that the insn with lowest LUID will be
346 issued next.
348 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
349 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
351 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
352 can be returned from TARGET_SCHED_REORDER2.
354 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
356 #undef TARGET_SCHED_DFA_NEW_CYCLE
357 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
359 #undef TARGET_SCHED_INIT_GLOBAL
360 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
362 #undef TARGET_SCHED_FINISH_GLOBAL
363 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
365 #undef TARGET_SCHED_VARIABLE_ISSUE
366 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
368 #undef TARGET_SCHED_REORDER
369 #define TARGET_SCHED_REORDER sh_reorder
371 #undef TARGET_SCHED_REORDER2
372 #define TARGET_SCHED_REORDER2 sh_reorder2
374 #undef TARGET_SCHED_INIT
375 #define TARGET_SCHED_INIT sh_md_init
377 #undef TARGET_CANNOT_MODIFY_JUMPS_P
378 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
379 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
380 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
381 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
382 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
383 sh_optimize_target_register_callee_saved
385 #undef TARGET_MS_BITFIELD_LAYOUT_P
386 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
388 #undef TARGET_INIT_BUILTINS
389 #define TARGET_INIT_BUILTINS sh_init_builtins
390 #undef TARGET_EXPAND_BUILTIN
391 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
393 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
394 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
396 #undef TARGET_CANNOT_COPY_INSN_P
397 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
398 #undef TARGET_RTX_COSTS
399 #define TARGET_RTX_COSTS sh_rtx_costs
400 #undef TARGET_ADDRESS_COST
401 #define TARGET_ADDRESS_COST sh_address_cost
402 #undef TARGET_ALLOCATE_INITIAL_VALUE
403 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
405 #undef TARGET_MACHINE_DEPENDENT_REORG
406 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
408 #undef TARGET_DWARF_REGISTER_SPAN
409 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
411 #ifdef HAVE_AS_TLS
412 #undef TARGET_HAVE_TLS
413 #define TARGET_HAVE_TLS true
414 #endif
416 #undef TARGET_PROMOTE_PROTOTYPES
417 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
418 #undef TARGET_PROMOTE_FUNCTION_ARGS
419 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
420 #undef TARGET_PROMOTE_FUNCTION_RETURN
421 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
423 #undef TARGET_STRUCT_VALUE_RTX
424 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
425 #undef TARGET_RETURN_IN_MEMORY
426 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
428 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
429 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
430 #undef TARGET_SETUP_INCOMING_VARARGS
431 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
432 #undef TARGET_STRICT_ARGUMENT_NAMING
433 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
434 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
435 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
436 #undef TARGET_MUST_PASS_IN_STACK
437 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
438 #undef TARGET_PASS_BY_REFERENCE
439 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
440 #undef TARGET_CALLEE_COPIES
441 #define TARGET_CALLEE_COPIES sh_callee_copies
442 #undef TARGET_ARG_PARTIAL_BYTES
443 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
445 #undef TARGET_BUILD_BUILTIN_VA_LIST
446 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
447 #undef TARGET_EXPAND_BUILTIN_VA_START
448 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
449 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
450 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
452 #undef TARGET_SCALAR_MODE_SUPPORTED_P
453 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
454 #undef TARGET_VECTOR_MODE_SUPPORTED_P
455 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
457 #undef TARGET_CHECK_PCH_TARGET_FLAGS
458 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
460 #undef TARGET_DWARF_CALLING_CONVENTION
461 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
463 /* Return regmode weight for insn. */
464 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
466 /* Return current register pressure for regmode. */
467 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
469 #undef TARGET_ENCODE_SECTION_INFO
470 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
472 #ifdef SYMBIAN
474 #undef TARGET_ENCODE_SECTION_INFO
475 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
476 #undef TARGET_STRIP_NAME_ENCODING
477 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
478 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
479 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
481 #endif /* SYMBIAN */
483 #undef TARGET_SECONDARY_RELOAD
484 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
486 /* Machine-specific symbol_ref flags. */
487 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
489 struct gcc_target targetm = TARGET_INITIALIZER;
491 /* Implement TARGET_HANDLE_OPTION. */
493 static bool
494 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
495 int value ATTRIBUTE_UNUSED)
497 switch (code)
499 case OPT_m1:
500 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
501 return true;
503 case OPT_m2:
504 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
505 return true;
507 case OPT_m2a:
508 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
509 return true;
511 case OPT_m2a_nofpu:
512 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
513 return true;
515 case OPT_m2a_single:
516 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
517 return true;
519 case OPT_m2a_single_only:
520 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
521 return true;
523 case OPT_m2e:
524 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
525 return true;
527 case OPT_m3:
528 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
529 return true;
531 case OPT_m3e:
532 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
533 return true;
535 case OPT_m4:
536 case OPT_m4_100:
537 case OPT_m4_200:
538 case OPT_m4_300:
539 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
540 return true;
542 case OPT_m4_nofpu:
543 case OPT_m4_100_nofpu:
544 case OPT_m4_200_nofpu:
545 case OPT_m4_300_nofpu:
546 case OPT_m4_340:
547 case OPT_m4_400:
548 case OPT_m4_500:
549 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
550 return true;
552 case OPT_m4_single:
553 case OPT_m4_100_single:
554 case OPT_m4_200_single:
555 case OPT_m4_300_single:
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
557 return true;
559 case OPT_m4_single_only:
560 case OPT_m4_100_single_only:
561 case OPT_m4_200_single_only:
562 case OPT_m4_300_single_only:
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
564 return true;
566 case OPT_m4a:
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
568 return true;
570 case OPT_m4a_nofpu:
571 case OPT_m4al:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
573 return true;
575 case OPT_m4a_single:
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
577 return true;
579 case OPT_m4a_single_only:
580 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
581 return true;
583 case OPT_m5_32media:
584 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
585 return true;
587 case OPT_m5_32media_nofpu:
588 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
589 return true;
591 case OPT_m5_64media:
592 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
593 return true;
595 case OPT_m5_64media_nofpu:
596 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
597 return true;
599 case OPT_m5_compact:
600 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
601 return true;
603 case OPT_m5_compact_nofpu:
604 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
605 return true;
607 default:
608 return true;
612 /* Print the operand address in x to the stream. */
614 void
615 print_operand_address (FILE *stream, rtx x)
617 switch (GET_CODE (x))
619 case REG:
620 case SUBREG:
621 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
622 break;
624 case PLUS:
626 rtx base = XEXP (x, 0);
627 rtx index = XEXP (x, 1);
629 switch (GET_CODE (index))
631 case CONST_INT:
632 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
633 reg_names[true_regnum (base)]);
634 break;
636 case REG:
637 case SUBREG:
639 int base_num = true_regnum (base);
640 int index_num = true_regnum (index);
642 fprintf (stream, "@(r0,%s)",
643 reg_names[MAX (base_num, index_num)]);
644 break;
647 default:
648 gcc_unreachable ();
651 break;
653 case PRE_DEC:
654 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
655 break;
657 case POST_INC:
658 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
659 break;
661 default:
662 x = mark_constant_pool_use (x);
663 output_addr_const (stream, x);
664 break;
668 /* Print operand x (an rtx) in assembler syntax to file stream
669 according to modifier code.
671 '.' print a .s if insn needs delay slot
672 ',' print LOCAL_LABEL_PREFIX
673 '@' print trap, rte or rts depending upon pragma interruptness
674 '#' output a nop if there is nothing to put in the delay slot
675 ''' print likelihood suffix (/u for unlikely).
676 '>' print branch target if -fverbose-asm
677 'O' print a constant without the #
678 'R' print the LSW of a dp value - changes if in little endian
679 'S' print the MSW of a dp value - changes if in little endian
680 'T' print the next word of a dp value - same as 'R' in big endian mode.
681 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
682 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
683 'N' print 'r63' if the operand is (const_int 0).
684 'd' print a V2SF reg as dN instead of fpN.
685 'm' print a pair `base,offset' or `base,index', for LD and ST.
686 'U' Likewise for {LD,ST}{HI,LO}.
687 'V' print the position of a single bit set.
688 'W' print the position of a single bit cleared.
689 't' print a memory address which is a register.
690 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
691 'o' output an operator. */
693 void
694 print_operand (FILE *stream, rtx x, int code)
696 int regno;
697 enum machine_mode mode;
699 switch (code)
701 tree trapa_attr;
703 case '.':
704 if (final_sequence
705 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
706 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
707 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
708 break;
709 case ',':
710 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
711 break;
712 case '@':
713 trapa_attr = lookup_attribute ("trap_exit",
714 DECL_ATTRIBUTES (current_function_decl));
715 if (trapa_attr)
716 fprintf (stream, "trapa #%ld",
717 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
718 else if (sh_cfun_interrupt_handler_p ())
720 if (sh_cfun_resbank_handler_p ())
721 fprintf (stream, "resbank\n");
722 fprintf (stream, "rte");
724 else
725 fprintf (stream, "rts");
726 break;
727 case '#':
728 /* Output a nop if there's nothing in the delay slot. */
729 if (dbr_sequence_length () == 0)
730 fprintf (stream, "\n\tnop");
731 break;
732 case '\'':
734 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
736 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
737 fputs ("/u", stream);
738 break;
740 case '>':
741 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
743 fputs ("\t! target: ", stream);
744 output_addr_const (stream, JUMP_LABEL (current_output_insn));
746 break;
747 case 'O':
748 x = mark_constant_pool_use (x);
749 output_addr_const (stream, x);
750 break;
751 /* N.B.: %R / %S / %T adjust memory addresses by four.
752 For SHMEDIA, that means they can be used to access the first and
753 second 32 bit part of a 64 bit (or larger) value that
754 might be held in floating point registers or memory.
755 While they can be used to access 64 bit parts of a larger value
756 held in general purpose registers, that won't work with memory -
757 neither for fp registers, since the frxx names are used. */
758 case 'R':
759 if (REG_P (x) || GET_CODE (x) == SUBREG)
761 regno = true_regnum (x);
762 regno += FP_REGISTER_P (regno) ? 1 : LSW;
763 fputs (reg_names[regno], (stream));
765 else if (MEM_P (x))
767 x = adjust_address (x, SImode, 4 * LSW);
768 print_operand_address (stream, XEXP (x, 0));
770 else
772 rtx sub = NULL_RTX;
774 mode = GET_MODE (x);
775 if (mode == VOIDmode)
776 mode = DImode;
777 if (GET_MODE_SIZE (mode) >= 8)
778 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
779 if (sub)
780 print_operand (stream, sub, 0);
781 else
782 output_operand_lossage ("invalid operand to %%R");
784 break;
785 case 'S':
786 if (REG_P (x) || GET_CODE (x) == SUBREG)
788 regno = true_regnum (x);
789 regno += FP_REGISTER_P (regno) ? 0 : MSW;
790 fputs (reg_names[regno], (stream));
792 else if (MEM_P (x))
794 x = adjust_address (x, SImode, 4 * MSW);
795 print_operand_address (stream, XEXP (x, 0));
797 else
799 rtx sub = NULL_RTX;
801 mode = GET_MODE (x);
802 if (mode == VOIDmode)
803 mode = DImode;
804 if (GET_MODE_SIZE (mode) >= 8)
805 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
806 if (sub)
807 print_operand (stream, sub, 0);
808 else
809 output_operand_lossage ("invalid operand to %%S");
811 break;
812 case 'T':
813 /* Next word of a double. */
814 switch (GET_CODE (x))
816 case REG:
817 fputs (reg_names[REGNO (x) + 1], (stream));
818 break;
819 case MEM:
820 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
821 && GET_CODE (XEXP (x, 0)) != POST_INC)
822 x = adjust_address (x, SImode, 4);
823 print_operand_address (stream, XEXP (x, 0));
824 break;
825 default:
826 break;
828 break;
830 case 't':
831 gcc_assert (GET_CODE (x) == MEM);
832 x = XEXP (x, 0);
833 switch (GET_CODE (x))
835 case REG:
836 case SUBREG:
837 print_operand (stream, x, 0);
838 break;
839 default:
840 break;
842 break;
844 case 'o':
845 switch (GET_CODE (x))
847 case PLUS: fputs ("add", stream); break;
848 case MINUS: fputs ("sub", stream); break;
849 case MULT: fputs ("mul", stream); break;
850 case DIV: fputs ("div", stream); break;
851 case EQ: fputs ("eq", stream); break;
852 case NE: fputs ("ne", stream); break;
853 case GT: case LT: fputs ("gt", stream); break;
854 case GE: case LE: fputs ("ge", stream); break;
855 case GTU: case LTU: fputs ("gtu", stream); break;
856 case GEU: case LEU: fputs ("geu", stream); break;
857 default:
858 break;
860 break;
861 case 'M':
862 if (TARGET_SHMEDIA)
864 if (GET_CODE (x) == MEM
865 && GET_CODE (XEXP (x, 0)) == PLUS
866 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
867 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
868 fputc ('x', stream);
870 else
872 if (GET_CODE (x) == MEM)
874 switch (GET_MODE (x))
876 case QImode: fputs (".b", stream); break;
877 case HImode: fputs (".w", stream); break;
878 case SImode: fputs (".l", stream); break;
879 case SFmode: fputs (".s", stream); break;
880 case DFmode: fputs (".d", stream); break;
881 default: gcc_unreachable ();
885 break;
887 case 'm':
888 gcc_assert (GET_CODE (x) == MEM);
889 x = XEXP (x, 0);
890 /* Fall through. */
891 case 'U':
892 switch (GET_CODE (x))
894 case REG:
895 case SUBREG:
896 print_operand (stream, x, 0);
897 fputs (", 0", stream);
898 break;
900 case PLUS:
901 print_operand (stream, XEXP (x, 0), 0);
902 fputs (", ", stream);
903 print_operand (stream, XEXP (x, 1), 0);
904 break;
906 default:
907 gcc_unreachable ();
909 break;
911 case 'V':
913 int num = exact_log2 (INTVAL (x));
914 gcc_assert (num >= 0);
915 fprintf (stream, "#%d", num);
917 break;
919 case 'W':
921 int num = exact_log2 (~INTVAL (x));
922 gcc_assert (num >= 0);
923 fprintf (stream, "#%d", num);
925 break;
927 case 'd':
928 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
930 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
931 break;
933 case 'N':
934 if (x == CONST0_RTX (GET_MODE (x)))
936 fprintf ((stream), "r63");
937 break;
939 goto default_output;
940 case 'u':
941 if (GET_CODE (x) == CONST_INT)
943 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
944 break;
946 /* Fall through. */
948 default_output:
949 default:
950 regno = 0;
951 mode = GET_MODE (x);
953 switch (GET_CODE (x))
955 case TRUNCATE:
957 rtx inner = XEXP (x, 0);
958 int offset = 0;
959 enum machine_mode inner_mode;
961 /* We might see SUBREGs with vector mode registers inside. */
962 if (GET_CODE (inner) == SUBREG
963 && (GET_MODE_SIZE (GET_MODE (inner))
964 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
965 && subreg_lowpart_p (inner))
966 inner = SUBREG_REG (inner);
967 if (GET_CODE (inner) == CONST_INT)
969 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
970 goto default_output;
972 inner_mode = GET_MODE (inner);
973 if (GET_CODE (inner) == SUBREG
974 && (GET_MODE_SIZE (GET_MODE (inner))
975 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
976 && GET_CODE (SUBREG_REG (inner)) == REG)
978 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
979 GET_MODE (SUBREG_REG (inner)),
980 SUBREG_BYTE (inner),
981 GET_MODE (inner));
982 inner = SUBREG_REG (inner);
984 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
985 abort ();
986 /* Floating point register pairs are always big endian;
987 general purpose registers are 64 bit wide. */
988 regno = REGNO (inner);
989 regno = (HARD_REGNO_NREGS (regno, inner_mode)
990 - HARD_REGNO_NREGS (regno, mode))
991 + offset;
992 x = inner;
993 goto reg;
995 case SIGN_EXTEND:
996 x = XEXP (x, 0);
997 goto reg;
998 /* FIXME: We need this on SHmedia32 because reload generates
999 some sign-extended HI or QI loads into DImode registers
1000 but, because Pmode is SImode, the address ends up with a
1001 subreg:SI of the DImode register. Maybe reload should be
1002 fixed so as to apply alter_subreg to such loads? */
1003 case IF_THEN_ELSE:
1004 gcc_assert (trapping_target_operand (x, VOIDmode));
1005 x = XEXP (XEXP (x, 2), 0);
1006 goto default_output;
1007 case SUBREG:
1008 gcc_assert (SUBREG_BYTE (x) == 0
1009 && GET_CODE (SUBREG_REG (x)) == REG);
1011 x = SUBREG_REG (x);
1012 /* Fall through. */
1014 reg:
1015 case REG:
1016 regno += REGNO (x);
1017 if (FP_REGISTER_P (regno)
1018 && mode == V16SFmode)
1019 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1020 else if (FP_REGISTER_P (REGNO (x))
1021 && mode == V4SFmode)
1022 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1023 else if (GET_CODE (x) == REG
1024 && mode == V2SFmode)
1025 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1026 else if (FP_REGISTER_P (REGNO (x))
1027 && GET_MODE_SIZE (mode) > 4)
1028 fprintf ((stream), "d%s", reg_names[regno] + 1);
1029 else
1030 fputs (reg_names[regno], (stream));
1031 break;
1033 case MEM:
1034 output_address (XEXP (x, 0));
1035 break;
1037 default:
1038 if (TARGET_SH1)
1039 fputc ('#', stream);
1040 output_addr_const (stream, x);
1041 break;
1043 break;
1048 /* Encode symbol attributes of a SYMBOL_REF into its
1049 SYMBOL_REF_FLAGS. */
1050 static void
1051 sh_encode_section_info (tree decl, rtx rtl, int first)
1053 default_encode_section_info (decl, rtl, first);
1055 if (TREE_CODE (decl) == FUNCTION_DECL
1056 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1057 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1060 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1061 static void
1062 force_into (rtx value, rtx target)
1064 value = force_operand (value, target);
1065 if (! rtx_equal_p (value, target))
1066 emit_insn (gen_move_insn (target, value));
1069 /* Emit code to perform a block move. Choose the best method.
1071 OPERANDS[0] is the destination.
1072 OPERANDS[1] is the source.
1073 OPERANDS[2] is the size.
1074 OPERANDS[3] is the alignment safe to use. */
1077 expand_block_move (rtx *operands)
1079 int align = INTVAL (operands[3]);
1080 int constp = (GET_CODE (operands[2]) == CONST_INT);
1081 int bytes = (constp ? INTVAL (operands[2]) : 0);
1083 if (! constp)
1084 return 0;
1086 /* If we could use mov.l to move words and dest is word-aligned, we
1087 can use movua.l for loads and still generate a relatively short
1088 and efficient sequence. */
1089 if (TARGET_SH4A_ARCH && align < 4
1090 && MEM_ALIGN (operands[0]) >= 32
1091 && can_move_by_pieces (bytes, 32))
1093 rtx dest = copy_rtx (operands[0]);
1094 rtx src = copy_rtx (operands[1]);
1095 /* We could use different pseudos for each copied word, but
1096 since movua can only load into r0, it's kind of
1097 pointless. */
1098 rtx temp = gen_reg_rtx (SImode);
1099 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1100 int copied = 0;
1102 while (copied + 4 <= bytes)
1104 rtx to = adjust_address (dest, SImode, copied);
1105 rtx from = adjust_automodify_address (src, BLKmode,
1106 src_addr, copied);
1108 set_mem_size (from, GEN_INT (4));
1109 emit_insn (gen_movua (temp, from));
1110 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1111 emit_move_insn (to, temp);
1112 copied += 4;
1115 if (copied < bytes)
1116 move_by_pieces (adjust_address (dest, BLKmode, copied),
1117 adjust_automodify_address (src, BLKmode,
1118 src_addr, copied),
1119 bytes - copied, align, 0);
1121 return 1;
1124 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1125 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1126 if (align < 4 || (bytes % 4 != 0))
1127 return 0;
1129 if (TARGET_HARD_SH4)
1131 if (bytes < 12)
1132 return 0;
1133 else if (bytes == 12)
1135 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1136 rtx r4 = gen_rtx_REG (SImode, 4);
1137 rtx r5 = gen_rtx_REG (SImode, 5);
1139 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1140 force_into (XEXP (operands[0], 0), r4);
1141 force_into (XEXP (operands[1], 0), r5);
1142 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1143 return 1;
1145 else if (! TARGET_SMALLCODE)
1147 const char *entry_name;
1148 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1149 int dwords;
1150 rtx r4 = gen_rtx_REG (SImode, 4);
1151 rtx r5 = gen_rtx_REG (SImode, 5);
1152 rtx r6 = gen_rtx_REG (SImode, 6);
1154 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1155 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1156 force_into (XEXP (operands[0], 0), r4);
1157 force_into (XEXP (operands[1], 0), r5);
1159 dwords = bytes >> 3;
1160 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1161 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1162 return 1;
1164 else
1165 return 0;
1167 if (bytes < 64)
1169 char entry[30];
1170 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1171 rtx r4 = gen_rtx_REG (SImode, 4);
1172 rtx r5 = gen_rtx_REG (SImode, 5);
1174 sprintf (entry, "__movmemSI%d", bytes);
1175 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1176 force_into (XEXP (operands[0], 0), r4);
1177 force_into (XEXP (operands[1], 0), r5);
1178 emit_insn (gen_block_move_real (func_addr_rtx));
1179 return 1;
1182 /* This is the same number of bytes as a memcpy call, but to a different
1183 less common function name, so this will occasionally use more space. */
1184 if (! TARGET_SMALLCODE)
1186 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1187 int final_switch, while_loop;
1188 rtx r4 = gen_rtx_REG (SImode, 4);
1189 rtx r5 = gen_rtx_REG (SImode, 5);
1190 rtx r6 = gen_rtx_REG (SImode, 6);
1192 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1193 force_into (XEXP (operands[0], 0), r4);
1194 force_into (XEXP (operands[1], 0), r5);
1196 /* r6 controls the size of the move. 16 is decremented from it
1197 for each 64 bytes moved. Then the negative bit left over is used
1198 as an index into a list of move instructions. e.g., a 72 byte move
1199 would be set up with size(r6) = 14, for one iteration through the
1200 big while loop, and a switch of -2 for the last part. */
1202 final_switch = 16 - ((bytes / 4) % 16);
1203 while_loop = ((bytes / 4) / 16 - 1) * 16;
1204 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1205 emit_insn (gen_block_lump_real (func_addr_rtx));
1206 return 1;
1209 return 0;
1212 /* Prepare operands for a move define_expand; specifically, one of the
1213 operands must be in a register. */
1216 prepare_move_operands (rtx operands[], enum machine_mode mode)
1218 if ((mode == SImode || mode == DImode)
1219 && flag_pic
1220 && ! ((mode == Pmode || mode == ptr_mode)
1221 && tls_symbolic_operand (operands[1], Pmode) != 0))
1223 rtx temp;
1224 if (SYMBOLIC_CONST_P (operands[1]))
1226 if (GET_CODE (operands[0]) == MEM)
1227 operands[1] = force_reg (Pmode, operands[1]);
1228 else if (TARGET_SHMEDIA
1229 && GET_CODE (operands[1]) == LABEL_REF
1230 && target_reg_operand (operands[0], mode))
1231 /* It's ok. */;
1232 else
1234 temp = (!can_create_pseudo_p ()
1235 ? operands[0]
1236 : gen_reg_rtx (Pmode));
1237 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1240 else if (GET_CODE (operands[1]) == CONST
1241 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1242 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1244 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1245 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1246 mode, temp);
1247 operands[1] = expand_binop (mode, add_optab, temp,
1248 XEXP (XEXP (operands[1], 0), 1),
1249 (!can_create_pseudo_p ()
1250 ? temp
1251 : gen_reg_rtx (Pmode)),
1252 0, OPTAB_LIB_WIDEN);
1256 if (! reload_in_progress && ! reload_completed)
1258 /* Copy the source to a register if both operands aren't registers. */
1259 if (! register_operand (operands[0], mode)
1260 && ! sh_register_operand (operands[1], mode))
1261 operands[1] = copy_to_mode_reg (mode, operands[1]);
1263 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1265 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1266 except that we can't use that function because it is static. */
1267 rtx new_rtx = change_address (operands[0], mode, 0);
1268 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1269 operands[0] = new_rtx;
1272 /* This case can happen while generating code to move the result
1273 of a library call to the target. Reject `st r0,@(rX,rY)' because
1274 reload will fail to find a spill register for rX, since r0 is already
1275 being used for the source. */
1276 else if (TARGET_SH1
1277 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1278 && GET_CODE (operands[0]) == MEM
1279 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1280 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1281 operands[1] = copy_to_mode_reg (mode, operands[1]);
1284 if (mode == Pmode || mode == ptr_mode)
1286 rtx op0, op1, opc;
1287 enum tls_model tls_kind;
1289 op0 = operands[0];
1290 op1 = operands[1];
1291 if (GET_CODE (op1) == CONST
1292 && GET_CODE (XEXP (op1, 0)) == PLUS
1293 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1295 opc = XEXP (XEXP (op1, 0), 1);
1296 op1 = XEXP (XEXP (op1, 0), 0);
1298 else
1299 opc = NULL_RTX;
1301 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1303 rtx tga_op1, tga_ret, tmp, tmp2;
1305 switch (tls_kind)
1307 case TLS_MODEL_GLOBAL_DYNAMIC:
1308 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1309 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1310 op1 = tga_ret;
1311 break;
1313 case TLS_MODEL_LOCAL_DYNAMIC:
1314 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1315 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1317 tmp = gen_reg_rtx (Pmode);
1318 emit_move_insn (tmp, tga_ret);
1320 if (register_operand (op0, Pmode))
1321 tmp2 = op0;
1322 else
1323 tmp2 = gen_reg_rtx (Pmode);
1325 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1326 op1 = tmp2;
1327 break;
1329 case TLS_MODEL_INITIAL_EXEC:
1330 if (! flag_pic)
1332 /* Don't schedule insns for getting GOT address when
1333 the first scheduling is enabled, to avoid spill
1334 failures for R0. */
1335 if (flag_schedule_insns)
1336 emit_insn (gen_blockage ());
1337 emit_insn (gen_GOTaddr2picreg ());
1338 emit_use (gen_rtx_REG (SImode, PIC_REG));
1339 if (flag_schedule_insns)
1340 emit_insn (gen_blockage ());
1342 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1343 tmp = gen_sym2GOTTPOFF (op1);
1344 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1345 op1 = tga_op1;
1346 break;
1348 case TLS_MODEL_LOCAL_EXEC:
1349 tmp2 = gen_reg_rtx (Pmode);
1350 emit_insn (gen_load_gbr (tmp2));
1351 tmp = gen_reg_rtx (Pmode);
1352 emit_insn (gen_symTPOFF2reg (tmp, op1));
1354 if (register_operand (op0, Pmode))
1355 op1 = op0;
1356 else
1357 op1 = gen_reg_rtx (Pmode);
1359 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1360 break;
1362 default:
1363 gcc_unreachable ();
1365 if (opc)
1366 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1367 operands[1] = op1;
1371 return 0;
1374 enum rtx_code
1375 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1376 enum rtx_code comparison)
1378 rtx op1;
1379 rtx scratch = NULL_RTX;
1381 if (comparison == CODE_FOR_nothing)
1382 comparison = GET_CODE (operands[0]);
1383 else
1384 scratch = operands[4];
1385 if (GET_CODE (operands[1]) == CONST_INT
1386 && GET_CODE (operands[2]) != CONST_INT)
1388 rtx tmp = operands[1];
1390 operands[1] = operands[2];
1391 operands[2] = tmp;
1392 comparison = swap_condition (comparison);
1394 if (GET_CODE (operands[2]) == CONST_INT)
1396 HOST_WIDE_INT val = INTVAL (operands[2]);
1397 if ((val == -1 || val == -0x81)
1398 && (comparison == GT || comparison == LE))
1400 comparison = (comparison == GT) ? GE : LT;
1401 operands[2] = gen_int_mode (val + 1, mode);
1403 else if ((val == 1 || val == 0x80)
1404 && (comparison == GE || comparison == LT))
1406 comparison = (comparison == GE) ? GT : LE;
1407 operands[2] = gen_int_mode (val - 1, mode);
1409 else if (val == 1 && (comparison == GEU || comparison == LTU))
1411 comparison = (comparison == GEU) ? NE : EQ;
1412 operands[2] = CONST0_RTX (mode);
1414 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1416 comparison = (comparison == GEU) ? GTU : LEU;
1417 operands[2] = gen_int_mode (val - 1, mode);
1419 else if (val == 0 && (comparison == GTU || comparison == LEU))
1420 comparison = (comparison == GTU) ? NE : EQ;
1421 else if (mode == SImode
1422 && ((val == 0x7fffffff
1423 && (comparison == GTU || comparison == LEU))
1424 || ((unsigned HOST_WIDE_INT) val
1425 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1426 && (comparison == GEU || comparison == LTU))))
1428 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1429 operands[2] = CONST0_RTX (mode);
1432 op1 = operands[1];
1433 if (can_create_pseudo_p ())
1434 operands[1] = force_reg (mode, op1);
1435 /* When we are handling DImode comparisons, we want to keep constants so
1436 that we can optimize the component comparisons; however, memory loads
1437 are better issued as a whole so that they can be scheduled well.
1438 SImode equality comparisons allow I08 constants, but only when they
1439 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1440 into a register, that register might as well be r0, and we allow the
1441 constant. If it is already in a register, this is likely to be
1442 allocated to a different hard register, thus we load the constant into
1443 a register unless it is zero. */
1444 if (!REG_P (operands[2])
1445 && (GET_CODE (operands[2]) != CONST_INT
1446 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1447 && ((comparison != EQ && comparison != NE)
1448 || (REG_P (op1) && REGNO (op1) != R0_REG)
1449 || !satisfies_constraint_I08 (operands[2])))))
1451 if (scratch && GET_MODE (scratch) == mode)
1453 emit_move_insn (scratch, operands[2]);
1454 operands[2] = scratch;
1456 else if (can_create_pseudo_p ())
1457 operands[2] = force_reg (mode, operands[2]);
1459 return comparison;
1462 void
1463 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1465 rtx (*branch_expander) (rtx) = gen_branch_true;
1466 rtx jump;
1468 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1469 switch (comparison)
1471 case NE: case LT: case LE: case LTU: case LEU:
1472 comparison = reverse_condition (comparison);
1473 branch_expander = gen_branch_false;
1474 default: ;
1476 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1477 gen_rtx_fmt_ee (comparison, SImode,
1478 operands[1], operands[2])));
1479 jump = emit_jump_insn (branch_expander (operands[3]));
1480 if (probability >= 0)
1481 REG_NOTES (jump)
1482 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1483 REG_NOTES (jump));
1487 /* ??? How should we distribute probabilities when more than one branch
1488 is generated. So far we only have soem ad-hoc observations:
1489 - If the operands are random, they are likely to differ in both parts.
1490 - If comparing items in a hash chain, the operands are random or equal;
1491 operation should be EQ or NE.
1492 - If items are searched in an ordered tree from the root, we can expect
1493 the highpart to be unequal about half of the time; operation should be
1494 an inequality comparison, operands non-constant, and overall probability
1495 about 50%. Likewise for quicksort.
1496 - Range checks will be often made against constants. Even if we assume for
1497 simplicity an even distribution of the non-constant operand over a
1498 sub-range here, the same probability could be generated with differently
1499 wide sub-ranges - as long as the ratio of the part of the subrange that
1500 is before the threshold to the part that comes after the threshold stays
1501 the same. Thus, we can't really tell anything here;
1502 assuming random distribution is at least simple.
1505 bool
1506 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1508 enum rtx_code msw_taken, msw_skip, lsw_taken;
1509 rtx skip_label = NULL_RTX;
1510 rtx op1h, op1l, op2h, op2l;
1511 int num_branches;
1512 int prob, rev_prob;
1513 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1514 rtx scratch = operands[4];
1516 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1517 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1518 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1519 op1l = gen_lowpart (SImode, operands[1]);
1520 op2l = gen_lowpart (SImode, operands[2]);
1521 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1522 prob = split_branch_probability;
1523 rev_prob = REG_BR_PROB_BASE - prob;
1524 switch (comparison)
1526 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1527 That costs 1 cycle more when the first branch can be predicted taken,
1528 but saves us mispredicts because only one branch needs prediction.
1529 It also enables generating the cmpeqdi_t-1 pattern. */
1530 case EQ:
1531 if (TARGET_CMPEQDI_T)
1533 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1534 emit_jump_insn (gen_branch_true (operands[3]));
1535 return true;
1537 msw_skip = NE;
1538 lsw_taken = EQ;
1539 if (prob >= 0)
1541 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1543 msw_skip_prob = rev_prob;
1544 if (REG_BR_PROB_BASE <= 65535)
1545 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1546 else
1548 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1549 lsw_taken_prob
1550 = (prob
1551 ? (REG_BR_PROB_BASE
1552 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1553 / ((HOST_WIDEST_INT) prob << 32)))
1554 : 0);
1557 break;
1558 case NE:
1559 if (TARGET_CMPEQDI_T)
1561 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1562 emit_jump_insn (gen_branch_false (operands[3]));
1563 return true;
1565 msw_taken = NE;
1566 msw_taken_prob = prob;
1567 lsw_taken = NE;
1568 lsw_taken_prob = 0;
1569 break;
1570 case GTU: case GT:
1571 msw_taken = comparison;
1572 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1573 break;
1574 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1575 msw_skip = swap_condition (msw_taken);
1576 lsw_taken = GTU;
1577 break;
1578 case GEU: case GE:
1579 if (op2l == CONST0_RTX (SImode))
1580 msw_taken = comparison;
1581 else
1583 msw_taken = comparison == GE ? GT : GTU;
1584 msw_skip = swap_condition (msw_taken);
1585 lsw_taken = GEU;
1587 break;
1588 case LTU: case LT:
1589 msw_taken = comparison;
1590 if (op2l == CONST0_RTX (SImode))
1591 break;
1592 msw_skip = swap_condition (msw_taken);
1593 lsw_taken = LTU;
1594 break;
1595 case LEU: case LE:
1596 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1597 msw_taken = comparison;
1598 else
1600 lsw_taken = LEU;
1601 if (comparison == LE)
1602 msw_taken = LT;
1603 else if (op2h != CONST0_RTX (SImode))
1604 msw_taken = LTU;
1605 else
1606 break;
1607 msw_skip = swap_condition (msw_taken);
1609 break;
1610 default: return false;
1612 num_branches = ((msw_taken != CODE_FOR_nothing)
1613 + (msw_skip != CODE_FOR_nothing)
1614 + (lsw_taken != CODE_FOR_nothing));
1615 if (comparison != EQ && comparison != NE && num_branches > 1)
1617 if (!CONSTANT_P (operands[2])
1618 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1619 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1621 msw_taken_prob = prob / 2U;
1622 msw_skip_prob
1623 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1624 lsw_taken_prob = prob;
1626 else
1628 msw_taken_prob = prob;
1629 msw_skip_prob = REG_BR_PROB_BASE;
1630 /* ??? If we have a constant op2h, should we use that when
1631 calculating lsw_taken_prob? */
1632 lsw_taken_prob = prob;
1635 operands[1] = op1h;
1636 operands[2] = op2h;
1637 operands[4] = NULL_RTX;
1638 if (reload_completed
1639 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1640 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1642 emit_move_insn (scratch, operands[2]);
1643 operands[2] = scratch;
1645 if (msw_taken != CODE_FOR_nothing)
1646 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1647 if (msw_skip != CODE_FOR_nothing)
1649 rtx taken_label = operands[3];
1651 /* Operands were possibly modified, but msw_skip doesn't expect this.
1652 Always use the original ones. */
1653 if (msw_taken != CODE_FOR_nothing)
1655 operands[1] = op1h;
1656 operands[2] = op2h;
1659 operands[3] = skip_label = gen_label_rtx ();
1660 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1661 operands[3] = taken_label;
1663 operands[1] = op1l;
1664 operands[2] = op2l;
1665 if (lsw_taken != CODE_FOR_nothing)
1667 if (reload_completed
1668 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1669 operands[4] = scratch;
1670 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1672 if (msw_skip != CODE_FOR_nothing)
1673 emit_label (skip_label);
1674 return true;
1677 /* Prepare the operands for an scc instruction; make sure that the
1678 compare has been done. */
1680 prepare_scc_operands (enum rtx_code code)
1682 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1683 enum rtx_code oldcode = code;
1684 enum machine_mode mode;
1686 /* First need a compare insn. */
1687 switch (code)
1689 case NE:
1690 /* It isn't possible to handle this case. */
1691 gcc_unreachable ();
1692 case LT:
1693 code = GT;
1694 break;
1695 case LE:
1696 code = GE;
1697 break;
1698 case LTU:
1699 code = GTU;
1700 break;
1701 case LEU:
1702 code = GEU;
1703 break;
1704 default:
1705 break;
1707 if (code != oldcode)
1709 rtx tmp = sh_compare_op0;
1710 sh_compare_op0 = sh_compare_op1;
1711 sh_compare_op1 = tmp;
1714 mode = GET_MODE (sh_compare_op0);
1715 if (mode == VOIDmode)
1716 mode = GET_MODE (sh_compare_op1);
1718 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1719 if ((code != EQ && code != NE
1720 && (sh_compare_op1 != const0_rtx
1721 || code == GTU || code == GEU || code == LTU || code == LEU))
1722 || (mode == DImode && sh_compare_op1 != const0_rtx)
1723 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1724 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1726 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1727 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1728 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1729 gen_rtx_SET (VOIDmode, t_reg,
1730 gen_rtx_fmt_ee (code, SImode,
1731 sh_compare_op0, sh_compare_op1)),
1732 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1733 else
1734 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1735 gen_rtx_fmt_ee (code, SImode,
1736 sh_compare_op0, sh_compare_op1)));
1738 return t_reg;
1741 /* Called from the md file, set up the operands of a compare instruction. */
1743 void
1744 from_compare (rtx *operands, int code)
1746 enum machine_mode mode = GET_MODE (sh_compare_op0);
1747 rtx insn;
1748 if (mode == VOIDmode)
1749 mode = GET_MODE (sh_compare_op1);
1750 if (code != EQ
1751 || mode == DImode
1752 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1754 /* Force args into regs, since we can't use constants here. */
1755 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1756 if (sh_compare_op1 != const0_rtx
1757 || code == GTU || code == GEU
1758 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1759 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1761 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1763 from_compare (operands, GT);
1764 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1766 else
1767 insn = gen_rtx_SET (VOIDmode,
1768 gen_rtx_REG (SImode, T_REG),
1769 gen_rtx_fmt_ee (code, SImode,
1770 sh_compare_op0, sh_compare_op1));
1771 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1773 insn = gen_rtx_PARALLEL (VOIDmode,
1774 gen_rtvec (2, insn,
1775 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1776 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1778 else
1779 emit_insn (insn);
1782 /* Functions to output assembly code. */
1784 /* Return a sequence of instructions to perform DI or DF move.
1786 Since the SH cannot move a DI or DF in one instruction, we have
1787 to take care when we see overlapping source and dest registers. */
1789 const char *
1790 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1791 enum machine_mode mode)
1793 rtx dst = operands[0];
1794 rtx src = operands[1];
1796 if (GET_CODE (dst) == MEM
1797 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1798 return "mov.l %T1,%0\n\tmov.l %1,%0";
1800 if (register_operand (dst, mode)
1801 && register_operand (src, mode))
1803 if (REGNO (src) == MACH_REG)
1804 return "sts mach,%S0\n\tsts macl,%R0";
1806 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1807 when mov.d r1,r0 do r1->r0 then r2->r1. */
1809 if (REGNO (src) + 1 == REGNO (dst))
1810 return "mov %T1,%T0\n\tmov %1,%0";
1811 else
1812 return "mov %1,%0\n\tmov %T1,%T0";
1814 else if (GET_CODE (src) == CONST_INT)
1816 if (INTVAL (src) < 0)
1817 output_asm_insn ("mov #-1,%S0", operands);
1818 else
1819 output_asm_insn ("mov #0,%S0", operands);
1821 return "mov %1,%R0";
1823 else if (GET_CODE (src) == MEM)
1825 int ptrreg = -1;
1826 int dreg = REGNO (dst);
1827 rtx inside = XEXP (src, 0);
1829 switch (GET_CODE (inside))
1831 case REG:
1832 ptrreg = REGNO (inside);
1833 break;
1835 case SUBREG:
1836 ptrreg = subreg_regno (inside);
1837 break;
1839 case PLUS:
1840 ptrreg = REGNO (XEXP (inside, 0));
1841 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1842 an offsettable address. Unfortunately, offsettable addresses use
1843 QImode to check the offset, and a QImode offsettable address
1844 requires r0 for the other operand, which is not currently
1845 supported, so we can't use the 'o' constraint.
1846 Thus we must check for and handle r0+REG addresses here.
1847 We punt for now, since this is likely very rare. */
1848 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1849 break;
1851 case LABEL_REF:
1852 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1853 case POST_INC:
1854 return "mov.l %1,%0\n\tmov.l %1,%T0";
1855 default:
1856 gcc_unreachable ();
1859 /* Work out the safe way to copy. Copy into the second half first. */
1860 if (dreg == ptrreg)
1861 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1864 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1867 /* Print an instruction which would have gone into a delay slot after
1868 another instruction, but couldn't because the other instruction expanded
1869 into a sequence where putting the slot insn at the end wouldn't work. */
1871 static void
1872 print_slot (rtx insn)
1874 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1876 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1879 const char *
1880 output_far_jump (rtx insn, rtx op)
1882 struct { rtx lab, reg, op; } this_jmp;
1883 rtx braf_base_lab = NULL_RTX;
1884 const char *jump;
1885 int far;
1886 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1887 rtx prev;
1889 this_jmp.lab = gen_label_rtx ();
1891 if (TARGET_SH2
1892 && offset >= -32764
1893 && offset - get_attr_length (insn) <= 32766)
1895 far = 0;
1896 jump = "mov.w %O0,%1; braf %1";
1898 else
1900 far = 1;
1901 if (flag_pic)
1903 if (TARGET_SH2)
1904 jump = "mov.l %O0,%1; braf %1";
1905 else
1906 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1908 else
1909 jump = "mov.l %O0,%1; jmp @%1";
1911 /* If we have a scratch register available, use it. */
1912 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1913 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1915 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1916 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1917 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1918 output_asm_insn (jump, &this_jmp.lab);
1919 if (dbr_sequence_length ())
1920 print_slot (final_sequence);
1921 else
1922 output_asm_insn ("nop", 0);
1924 else
1926 /* Output the delay slot insn first if any. */
1927 if (dbr_sequence_length ())
1928 print_slot (final_sequence);
1930 this_jmp.reg = gen_rtx_REG (SImode, 13);
1931 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1932 Fortunately, MACL is fixed and call-clobbered, and we never
1933 need its value across jumps, so save r13 in it instead of in
1934 the stack. */
1935 if (TARGET_SH5)
1936 output_asm_insn ("lds r13, macl", 0);
1937 else
1938 output_asm_insn ("mov.l r13,@-r15", 0);
1939 output_asm_insn (jump, &this_jmp.lab);
1940 if (TARGET_SH5)
1941 output_asm_insn ("sts macl, r13", 0);
1942 else
1943 output_asm_insn ("mov.l @r15+,r13", 0);
1945 if (far && flag_pic && TARGET_SH2)
1947 braf_base_lab = gen_label_rtx ();
1948 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1949 CODE_LABEL_NUMBER (braf_base_lab));
1951 if (far)
1952 output_asm_insn (".align 2", 0);
1953 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
1954 this_jmp.op = op;
1955 if (far && flag_pic)
1957 if (TARGET_SH2)
1958 this_jmp.lab = braf_base_lab;
1959 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
1961 else
1962 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
1963 return "";
1966 /* Local label counter, used for constants in the pool and inside
1967 pattern branches. */
1969 static int lf = 100;
1971 /* Output code for ordinary branches. */
1973 const char *
1974 output_branch (int logic, rtx insn, rtx *operands)
1976 switch (get_attr_length (insn))
1978 case 6:
1979 /* This can happen if filling the delay slot has caused a forward
1980 branch to exceed its range (we could reverse it, but only
1981 when we know we won't overextend other branches; this should
1982 best be handled by relaxation).
1983 It can also happen when other condbranches hoist delay slot insn
1984 from their destination, thus leading to code size increase.
1985 But the branch will still be in the range -4092..+4098 bytes. */
1987 if (! TARGET_RELAX)
1989 int label = lf++;
1990 /* The call to print_slot will clobber the operands. */
1991 rtx op0 = operands[0];
1993 /* If the instruction in the delay slot is annulled (true), then
1994 there is no delay slot where we can put it now. The only safe
1995 place for it is after the label. final will do that by default. */
1997 if (final_sequence
1998 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1999 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2001 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2002 ASSEMBLER_DIALECT ? "/" : ".", label);
2003 print_slot (final_sequence);
2005 else
2006 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2008 output_asm_insn ("bra\t%l0", &op0);
2009 fprintf (asm_out_file, "\tnop\n");
2010 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2012 return "";
2014 /* When relaxing, handle this like a short branch. The linker
2015 will fix it up if it still doesn't fit after relaxation. */
2016 case 2:
2017 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2019 /* These are for SH2e, in which we have to account for the
2020 extra nop because of the hardware bug in annulled branches. */
2021 case 8:
2022 if (! TARGET_RELAX)
2024 int label = lf++;
2026 gcc_assert (!final_sequence
2027 || !(INSN_ANNULLED_BRANCH_P
2028 (XVECEXP (final_sequence, 0, 0))));
2029 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2030 logic ? "f" : "t",
2031 ASSEMBLER_DIALECT ? "/" : ".", label);
2032 fprintf (asm_out_file, "\tnop\n");
2033 output_asm_insn ("bra\t%l0", operands);
2034 fprintf (asm_out_file, "\tnop\n");
2035 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2037 return "";
2039 /* When relaxing, fall through. */
2040 case 4:
2042 char buffer[10];
2044 sprintf (buffer, "b%s%ss\t%%l0",
2045 logic ? "t" : "f",
2046 ASSEMBLER_DIALECT ? "/" : ".");
2047 output_asm_insn (buffer, &operands[0]);
2048 return "nop";
2051 default:
2052 /* There should be no longer branches now - that would
2053 indicate that something has destroyed the branches set
2054 up in machine_dependent_reorg. */
2055 gcc_unreachable ();
2059 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2060 fill in operands 9 as a label to the successor insn.
2061 We try to use jump threading where possible.
2062 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2063 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2064 follow jmp and bt, if the address is in range. */
2065 const char *
2066 output_branchy_insn (enum rtx_code code, const char *templ,
2067 rtx insn, rtx *operands)
2069 rtx next_insn = NEXT_INSN (insn);
2071 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2073 rtx src = SET_SRC (PATTERN (next_insn));
2074 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2076 /* Following branch not taken */
2077 operands[9] = gen_label_rtx ();
2078 emit_label_after (operands[9], next_insn);
2079 INSN_ADDRESSES_NEW (operands[9],
2080 INSN_ADDRESSES (INSN_UID (next_insn))
2081 + get_attr_length (next_insn));
2082 return templ;
2084 else
2086 int offset = (branch_dest (next_insn)
2087 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2088 if (offset >= -252 && offset <= 258)
2090 if (GET_CODE (src) == IF_THEN_ELSE)
2091 /* branch_true */
2092 src = XEXP (src, 1);
2093 operands[9] = src;
2094 return templ;
2098 operands[9] = gen_label_rtx ();
2099 emit_label_after (operands[9], insn);
2100 INSN_ADDRESSES_NEW (operands[9],
2101 INSN_ADDRESSES (INSN_UID (insn))
2102 + get_attr_length (insn));
2103 return templ;
2106 const char *
2107 output_ieee_ccmpeq (rtx insn, rtx *operands)
2109 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2110 insn, operands);
2113 /* Output the start of the assembler file. */
2115 static void
2116 sh_file_start (void)
2118 default_file_start ();
2120 #ifdef SYMBIAN
2121 /* Declare the .directive section before it is used. */
2122 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2123 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2124 #endif
2126 if (TARGET_ELF)
2127 /* We need to show the text section with the proper
2128 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2129 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2130 will complain. We can teach GAS specifically about the
2131 default attributes for our choice of text section, but
2132 then we would have to change GAS again if/when we change
2133 the text section name. */
2134 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2135 else
2136 /* Switch to the data section so that the coffsem symbol
2137 isn't in the text section. */
2138 switch_to_section (data_section);
2140 if (TARGET_LITTLE_ENDIAN)
2141 fputs ("\t.little\n", asm_out_file);
2143 if (!TARGET_ELF)
2145 if (TARGET_SHCOMPACT)
2146 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2147 else if (TARGET_SHMEDIA)
2148 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2149 TARGET_SHMEDIA64 ? 64 : 32);
2153 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2155 static bool
2156 unspec_caller_rtx_p (rtx pat)
2158 rtx base, offset;
2159 int i;
2161 split_const (pat, &base, &offset);
2162 if (GET_CODE (base) == UNSPEC)
2164 if (XINT (base, 1) == UNSPEC_CALLER)
2165 return true;
2166 for (i = 0; i < XVECLEN (base, 0); i++)
2167 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2168 return true;
2170 return false;
2173 /* Indicate that INSN cannot be duplicated. This is true for insn
2174 that generates a unique label. */
2176 static bool
2177 sh_cannot_copy_insn_p (rtx insn)
2179 rtx pat;
2181 if (!reload_completed || !flag_pic)
2182 return false;
2184 if (GET_CODE (insn) != INSN)
2185 return false;
2186 if (asm_noperands (insn) >= 0)
2187 return false;
2189 pat = PATTERN (insn);
2190 if (GET_CODE (pat) != SET)
2191 return false;
2192 pat = SET_SRC (pat);
2194 if (unspec_caller_rtx_p (pat))
2195 return true;
2197 return false;
2200 /* Actual number of instructions used to make a shift by N. */
2201 static const char ashiftrt_insns[] =
2202 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2204 /* Left shift and logical right shift are the same. */
2205 static const char shift_insns[] =
2206 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2208 /* Individual shift amounts needed to get the above length sequences.
2209 One bit right shifts clobber the T bit, so when possible, put one bit
2210 shifts in the middle of the sequence, so the ends are eligible for
2211 branch delay slots. */
2212 static const short shift_amounts[32][5] = {
2213 {0}, {1}, {2}, {2, 1},
2214 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2215 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2216 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2217 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2218 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2219 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2220 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2222 /* Likewise, but for shift amounts < 16, up to three highmost bits
2223 might be clobbered. This is typically used when combined with some
2224 kind of sign or zero extension. */
2226 static const char ext_shift_insns[] =
2227 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2229 static const short ext_shift_amounts[32][4] = {
2230 {0}, {1}, {2}, {2, 1},
2231 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2232 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2233 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2234 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2235 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2236 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2237 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2239 /* Assuming we have a value that has been sign-extended by at least one bit,
2240 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2241 to shift it by N without data loss, and quicker than by other means? */
2242 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2244 /* This is used in length attributes in sh.md to help compute the length
2245 of arbitrary constant shift instructions. */
2248 shift_insns_rtx (rtx insn)
2250 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2251 int shift_count = INTVAL (XEXP (set_src, 1));
2252 enum rtx_code shift_code = GET_CODE (set_src);
2254 switch (shift_code)
2256 case ASHIFTRT:
2257 return ashiftrt_insns[shift_count];
2258 case LSHIFTRT:
2259 case ASHIFT:
2260 return shift_insns[shift_count];
2261 default:
2262 gcc_unreachable ();
2266 /* Return the cost of a shift. */
2268 static inline int
2269 shiftcosts (rtx x)
2271 int value;
2273 if (TARGET_SHMEDIA)
2274 return 1;
2276 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2278 if (GET_MODE (x) == DImode
2279 && GET_CODE (XEXP (x, 1)) == CONST_INT
2280 && INTVAL (XEXP (x, 1)) == 1)
2281 return 2;
2283 /* Everything else is invalid, because there is no pattern for it. */
2284 return MAX_COST;
2286 /* If shift by a non constant, then this will be expensive. */
2287 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2288 return SH_DYNAMIC_SHIFT_COST;
2290 value = INTVAL (XEXP (x, 1));
2292 /* Otherwise, return the true cost in instructions. */
2293 if (GET_CODE (x) == ASHIFTRT)
2295 int cost = ashiftrt_insns[value];
2296 /* If SH3, then we put the constant in a reg and use shad. */
2297 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2298 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2299 return cost;
2301 else
2302 return shift_insns[value];
2305 /* Return the cost of an AND operation. */
2307 static inline int
2308 andcosts (rtx x)
2310 int i;
2312 /* Anding with a register is a single cycle and instruction. */
2313 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2314 return 1;
2316 i = INTVAL (XEXP (x, 1));
2318 if (TARGET_SHMEDIA)
2320 if (satisfies_constraint_I10 (XEXP (x, 1))
2321 || satisfies_constraint_J16 (XEXP (x, 1)))
2322 return 1;
2323 else
2324 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2327 /* These constants are single cycle extu.[bw] instructions. */
2328 if (i == 0xff || i == 0xffff)
2329 return 1;
2330 /* Constants that can be used in an and immediate instruction in a single
2331 cycle, but this requires r0, so make it a little more expensive. */
2332 if (CONST_OK_FOR_K08 (i))
2333 return 2;
2334 /* Constants that can be loaded with a mov immediate and an and.
2335 This case is probably unnecessary. */
2336 if (CONST_OK_FOR_I08 (i))
2337 return 2;
2338 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2339 This case is probably unnecessary. */
2340 return 3;
2343 /* Return the cost of an addition or a subtraction. */
2345 static inline int
2346 addsubcosts (rtx x)
2348 /* Adding a register is a single cycle insn. */
2349 if (GET_CODE (XEXP (x, 1)) == REG
2350 || GET_CODE (XEXP (x, 1)) == SUBREG)
2351 return 1;
2353 /* Likewise for small constants. */
2354 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2355 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2356 return 1;
2358 if (TARGET_SHMEDIA)
2359 switch (GET_CODE (XEXP (x, 1)))
2361 case CONST:
2362 case LABEL_REF:
2363 case SYMBOL_REF:
2364 return TARGET_SHMEDIA64 ? 5 : 3;
2366 case CONST_INT:
2367 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2368 return 2;
2369 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2370 return 3;
2371 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2372 return 4;
2374 /* Fall through. */
2375 default:
2376 return 5;
2379 /* Any other constant requires a 2 cycle pc-relative load plus an
2380 addition. */
2381 return 3;
2384 /* Return the cost of a multiply. */
2385 static inline int
2386 multcosts (rtx x ATTRIBUTE_UNUSED)
2388 if (sh_multcost >= 0)
2389 return sh_multcost;
2390 if (TARGET_SHMEDIA)
2391 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2392 accept constants. Ideally, we would use a cost of one or two and
2393 add the cost of the operand, but disregard the latter when inside loops
2394 and loop invariant code motion is still to follow.
2395 Using a multiply first and splitting it later if it's a loss
2396 doesn't work because of different sign / zero extension semantics
2397 of multiplies vs. shifts. */
2398 return TARGET_SMALLCODE ? 2 : 3;
2400 if (TARGET_SH2)
2402 /* We have a mul insn, so we can never take more than the mul and the
2403 read of the mac reg, but count more because of the latency and extra
2404 reg usage. */
2405 if (TARGET_SMALLCODE)
2406 return 2;
2407 return 3;
2410 /* If we're aiming at small code, then just count the number of
2411 insns in a multiply call sequence. */
2412 if (TARGET_SMALLCODE)
2413 return 5;
2415 /* Otherwise count all the insns in the routine we'd be calling too. */
2416 return 20;
2419 /* Compute a (partial) cost for rtx X. Return true if the complete
2420 cost has been computed, and false if subexpressions should be
2421 scanned. In either case, *TOTAL contains the cost result. */
2423 static bool
2424 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2425 bool speed ATTRIBUTE_UNUSED)
2427 switch (code)
2429 case CONST_INT:
2430 if (TARGET_SHMEDIA)
2432 if (INTVAL (x) == 0)
2433 *total = 0;
2434 else if (outer_code == AND && and_operand ((x), DImode))
2435 *total = 0;
2436 else if ((outer_code == IOR || outer_code == XOR
2437 || outer_code == PLUS)
2438 && CONST_OK_FOR_I10 (INTVAL (x)))
2439 *total = 0;
2440 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2441 *total = COSTS_N_INSNS (outer_code != SET);
2442 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2443 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2444 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2445 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2446 else
2447 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2448 return true;
2450 if (CONST_OK_FOR_I08 (INTVAL (x)))
2451 *total = 0;
2452 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2453 && CONST_OK_FOR_K08 (INTVAL (x)))
2454 *total = 1;
2455 /* prepare_cmp_insn will force costly constants int registers before
2456 the cbranch[sd]i4 patterns can see them, so preserve potentially
2457 interesting ones not covered by I08 above. */
2458 else if (outer_code == COMPARE
2459 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2460 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2461 || INTVAL (x) == 0x7fffffff
2462 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2463 *total = 1;
2464 else
2465 *total = 8;
2466 return true;
2468 case CONST:
2469 case LABEL_REF:
2470 case SYMBOL_REF:
2471 if (TARGET_SHMEDIA64)
2472 *total = COSTS_N_INSNS (4);
2473 else if (TARGET_SHMEDIA32)
2474 *total = COSTS_N_INSNS (2);
2475 else
2476 *total = 5;
2477 return true;
2479 case CONST_DOUBLE:
2480 if (TARGET_SHMEDIA)
2481 *total = COSTS_N_INSNS (4);
2482 /* prepare_cmp_insn will force costly constants int registers before
2483 the cbranchdi4 pattern can see them, so preserve potentially
2484 interesting ones. */
2485 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2486 *total = 1;
2487 else
2488 *total = 10;
2489 return true;
2490 case CONST_VECTOR:
2491 if (x == CONST0_RTX (GET_MODE (x)))
2492 *total = 0;
2493 else if (sh_1el_vec (x, VOIDmode))
2494 *total = outer_code != SET;
2495 if (sh_rep_vec (x, VOIDmode))
2496 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2497 + (outer_code != SET));
2498 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2499 return true;
2501 case PLUS:
2502 case MINUS:
2503 *total = COSTS_N_INSNS (addsubcosts (x));
2504 return true;
2506 case AND:
2507 *total = COSTS_N_INSNS (andcosts (x));
2508 return true;
2510 case MULT:
2511 *total = COSTS_N_INSNS (multcosts (x));
2512 return true;
2514 case ASHIFT:
2515 case ASHIFTRT:
2516 case LSHIFTRT:
2517 *total = COSTS_N_INSNS (shiftcosts (x));
2518 return true;
2520 case DIV:
2521 case UDIV:
2522 case MOD:
2523 case UMOD:
2524 *total = COSTS_N_INSNS (20);
2525 return true;
2527 case PARALLEL:
2528 if (sh_1el_vec (x, VOIDmode))
2529 *total = outer_code != SET;
2530 if (sh_rep_vec (x, VOIDmode))
2531 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2532 + (outer_code != SET));
2533 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2534 return true;
2536 case FLOAT:
2537 case FIX:
2538 *total = 100;
2539 return true;
2541 default:
2542 return false;
2546 /* Compute the cost of an address. For the SH, all valid addresses are
2547 the same cost. Use a slightly higher cost for reg + reg addressing,
2548 since it increases pressure on r0. */
2550 static int
2551 sh_address_cost (rtx X,
2552 bool speed ATTRIBUTE_UNUSED)
2554 return (GET_CODE (X) == PLUS
2555 && ! CONSTANT_P (XEXP (X, 1))
2556 && ! TARGET_SHMEDIA ? 1 : 0);
2559 /* Code to expand a shift. */
2561 void
2562 gen_ashift (int type, int n, rtx reg)
2564 /* Negative values here come from the shift_amounts array. */
2565 if (n < 0)
2567 if (type == ASHIFT)
2568 type = LSHIFTRT;
2569 else
2570 type = ASHIFT;
2571 n = -n;
2574 switch (type)
2576 case ASHIFTRT:
2577 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2578 break;
2579 case LSHIFTRT:
2580 if (n == 1)
2581 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2582 else
2583 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2584 break;
2585 case ASHIFT:
2586 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2587 break;
2591 /* Same for HImode */
2593 void
2594 gen_ashift_hi (int type, int n, rtx reg)
2596 /* Negative values here come from the shift_amounts array. */
2597 if (n < 0)
2599 if (type == ASHIFT)
2600 type = LSHIFTRT;
2601 else
2602 type = ASHIFT;
2603 n = -n;
2606 switch (type)
2608 case ASHIFTRT:
2609 case LSHIFTRT:
2610 /* We don't have HImode right shift operations because using the
2611 ordinary 32 bit shift instructions for that doesn't generate proper
2612 zero/sign extension.
2613 gen_ashift_hi is only called in contexts where we know that the
2614 sign extension works out correctly. */
2616 int offset = 0;
2617 if (GET_CODE (reg) == SUBREG)
2619 offset = SUBREG_BYTE (reg);
2620 reg = SUBREG_REG (reg);
2622 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2623 break;
2625 case ASHIFT:
2626 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2627 break;
2631 /* Output RTL to split a constant shift into its component SH constant
2632 shift instructions. */
2634 void
2635 gen_shifty_op (int code, rtx *operands)
2637 int value = INTVAL (operands[2]);
2638 int max, i;
2640 /* Truncate the shift count in case it is out of bounds. */
2641 value = value & 0x1f;
2643 if (value == 31)
2645 if (code == LSHIFTRT)
2647 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2648 emit_insn (gen_movt (operands[0]));
2649 return;
2651 else if (code == ASHIFT)
2653 /* There is a two instruction sequence for 31 bit left shifts,
2654 but it requires r0. */
2655 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2657 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2658 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2659 return;
2663 else if (value == 0)
2665 /* This can happen even when optimizing, if there were subregs before
2666 reload. Don't output a nop here, as this is never optimized away;
2667 use a no-op move instead. */
2668 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2669 return;
2672 max = shift_insns[value];
2673 for (i = 0; i < max; i++)
2674 gen_ashift (code, shift_amounts[value][i], operands[0]);
2677 /* Same as above, but optimized for values where the topmost bits don't
2678 matter. */
2680 void
2681 gen_shifty_hi_op (int code, rtx *operands)
2683 int value = INTVAL (operands[2]);
2684 int max, i;
2685 void (*gen_fun) (int, int, rtx);
2687 /* This operation is used by and_shl for SImode values with a few
2688 high bits known to be cleared. */
2689 value &= 31;
2690 if (value == 0)
2692 emit_insn (gen_nop ());
2693 return;
2696 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2697 if (code == ASHIFT)
2699 max = ext_shift_insns[value];
2700 for (i = 0; i < max; i++)
2701 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2703 else
2704 /* When shifting right, emit the shifts in reverse order, so that
2705 solitary negative values come first. */
2706 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2707 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2710 /* Output RTL for an arithmetic right shift. */
2712 /* ??? Rewrite to use super-optimizer sequences. */
2715 expand_ashiftrt (rtx *operands)
2717 rtx wrk;
2718 char func[18];
2719 int value;
2721 if (TARGET_SH3)
2723 if (GET_CODE (operands[2]) != CONST_INT)
2725 rtx count = copy_to_mode_reg (SImode, operands[2]);
2726 emit_insn (gen_negsi2 (count, count));
2727 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2728 return 1;
2730 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2731 > 1 + SH_DYNAMIC_SHIFT_COST)
2733 rtx count
2734 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2735 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2736 return 1;
2739 if (GET_CODE (operands[2]) != CONST_INT)
2740 return 0;
2742 value = INTVAL (operands[2]) & 31;
2744 if (value == 31)
2746 /* If we are called from abs expansion, arrange things so that we
2747 we can use a single MT instruction that doesn't clobber the source,
2748 if LICM can hoist out the load of the constant zero. */
2749 if (currently_expanding_to_rtl)
2751 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2752 operands[1]));
2753 emit_insn (gen_mov_neg_si_t (operands[0]));
2754 return 1;
2756 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2757 return 1;
2759 else if (value >= 16 && value <= 19)
2761 wrk = gen_reg_rtx (SImode);
2762 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2763 value -= 16;
2764 while (value--)
2765 gen_ashift (ASHIFTRT, 1, wrk);
2766 emit_move_insn (operands[0], wrk);
2767 return 1;
2769 /* Expand a short sequence inline, longer call a magic routine. */
2770 else if (value <= 5)
2772 wrk = gen_reg_rtx (SImode);
2773 emit_move_insn (wrk, operands[1]);
2774 while (value--)
2775 gen_ashift (ASHIFTRT, 1, wrk);
2776 emit_move_insn (operands[0], wrk);
2777 return 1;
2780 wrk = gen_reg_rtx (Pmode);
2782 /* Load the value into an arg reg and call a helper. */
2783 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2784 sprintf (func, "__ashiftrt_r4_%d", value);
2785 function_symbol (wrk, func, SFUNC_STATIC);
2786 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2787 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2788 return 1;
2792 sh_dynamicalize_shift_p (rtx count)
2794 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2797 /* Try to find a good way to implement the combiner pattern
2798 [(set (match_operand:SI 0 "register_operand" "r")
2799 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2800 (match_operand:SI 2 "const_int_operand" "n"))
2801 (match_operand:SI 3 "const_int_operand" "n"))) .
2802 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2803 return 0 for simple right / left or left/right shift combination.
2804 return 1 for a combination of shifts with zero_extend.
2805 return 2 for a combination of shifts with an AND that needs r0.
2806 return 3 for a combination of shifts with an AND that needs an extra
2807 scratch register, when the three highmost bits of the AND mask are clear.
2808 return 4 for a combination of shifts with an AND that needs an extra
2809 scratch register, when any of the three highmost bits of the AND mask
2810 is set.
2811 If ATTRP is set, store an initial right shift width in ATTRP[0],
2812 and the instruction length in ATTRP[1] . These values are not valid
2813 when returning 0.
2814 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2815 shift_amounts for the last shift value that is to be used before the
2816 sign extend. */
2818 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2820 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2821 int left = INTVAL (left_rtx), right;
2822 int best = 0;
2823 int cost, best_cost = 10000;
2824 int best_right = 0, best_len = 0;
2825 int i;
2826 int can_ext;
2828 if (left < 0 || left > 31)
2829 return 0;
2830 if (GET_CODE (mask_rtx) == CONST_INT)
2831 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2832 else
2833 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2834 /* Can this be expressed as a right shift / left shift pair? */
2835 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2836 right = exact_log2 (lsb);
2837 mask2 = ~(mask + lsb - 1);
2838 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2839 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2840 if (! mask2)
2841 best_cost = shift_insns[right] + shift_insns[right + left];
2842 /* mask has no trailing zeroes <==> ! right */
2843 else if (! right && mask2 == ~(lsb2 - 1))
2845 int late_right = exact_log2 (lsb2);
2846 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2848 /* Try to use zero extend. */
2849 if (mask2 == ~(lsb2 - 1))
2851 int width, first;
2853 for (width = 8; width <= 16; width += 8)
2855 /* Can we zero-extend right away? */
2856 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2858 cost
2859 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2860 if (cost < best_cost)
2862 best = 1;
2863 best_cost = cost;
2864 best_right = right;
2865 best_len = cost;
2866 if (attrp)
2867 attrp[2] = -1;
2869 continue;
2871 /* ??? Could try to put zero extend into initial right shift,
2872 or even shift a bit left before the right shift. */
2873 /* Determine value of first part of left shift, to get to the
2874 zero extend cut-off point. */
2875 first = width - exact_log2 (lsb2) + right;
2876 if (first >= 0 && right + left - first >= 0)
2878 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2879 + ext_shift_insns[right + left - first];
2880 if (cost < best_cost)
2882 best = 1;
2883 best_cost = cost;
2884 best_right = right;
2885 best_len = cost;
2886 if (attrp)
2887 attrp[2] = first;
2892 /* Try to use r0 AND pattern */
2893 for (i = 0; i <= 2; i++)
2895 if (i > right)
2896 break;
2897 if (! CONST_OK_FOR_K08 (mask >> i))
2898 continue;
2899 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2900 if (cost < best_cost)
2902 best = 2;
2903 best_cost = cost;
2904 best_right = i;
2905 best_len = cost - 1;
2908 /* Try to use a scratch register to hold the AND operand. */
2909 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2910 for (i = 0; i <= 2; i++)
2912 if (i > right)
2913 break;
2914 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2915 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2916 if (cost < best_cost)
2918 best = 4 - can_ext;
2919 best_cost = cost;
2920 best_right = i;
2921 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2925 if (attrp)
2927 attrp[0] = best_right;
2928 attrp[1] = best_len;
2930 return best;
2933 /* This is used in length attributes of the unnamed instructions
2934 corresponding to shl_and_kind return values of 1 and 2. */
2936 shl_and_length (rtx insn)
2938 rtx set_src, left_rtx, mask_rtx;
2939 int attributes[3];
2941 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2942 left_rtx = XEXP (XEXP (set_src, 0), 1);
2943 mask_rtx = XEXP (set_src, 1);
2944 shl_and_kind (left_rtx, mask_rtx, attributes);
2945 return attributes[1];
2948 /* This is used in length attribute of the and_shl_scratch instruction. */
2951 shl_and_scr_length (rtx insn)
2953 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2954 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2955 rtx op = XEXP (set_src, 0);
2956 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2957 op = XEXP (XEXP (op, 0), 0);
2958 return len + shift_insns[INTVAL (XEXP (op, 1))];
2961 /* Generate rtl for instructions for which shl_and_kind advised a particular
2962 method of generating them, i.e. returned zero. */
2965 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2967 int attributes[3];
2968 unsigned HOST_WIDE_INT mask;
2969 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2970 int right, total_shift;
2971 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2973 right = attributes[0];
2974 total_shift = INTVAL (left_rtx) + right;
2975 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2976 switch (kind)
2978 default:
2979 return -1;
2980 case 1:
2982 int first = attributes[2];
2983 rtx operands[3];
2985 if (first < 0)
2987 emit_insn ((mask << right) <= 0xff
2988 ? gen_zero_extendqisi2 (dest,
2989 gen_lowpart (QImode, source))
2990 : gen_zero_extendhisi2 (dest,
2991 gen_lowpart (HImode, source)));
2992 source = dest;
2994 if (source != dest)
2995 emit_insn (gen_movsi (dest, source));
2996 operands[0] = dest;
2997 if (right)
2999 operands[2] = GEN_INT (right);
3000 gen_shifty_hi_op (LSHIFTRT, operands);
3002 if (first > 0)
3004 operands[2] = GEN_INT (first);
3005 gen_shifty_hi_op (ASHIFT, operands);
3006 total_shift -= first;
3007 mask <<= first;
3009 if (first >= 0)
3010 emit_insn (mask <= 0xff
3011 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3012 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3013 if (total_shift > 0)
3015 operands[2] = GEN_INT (total_shift);
3016 gen_shifty_hi_op (ASHIFT, operands);
3018 break;
3020 case 4:
3021 shift_gen_fun = gen_shifty_op;
3022 case 3:
3023 /* If the topmost bit that matters is set, set the topmost bits
3024 that don't matter. This way, we might be able to get a shorter
3025 signed constant. */
3026 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3027 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3028 case 2:
3029 /* Don't expand fine-grained when combining, because that will
3030 make the pattern fail. */
3031 if (currently_expanding_to_rtl
3032 || reload_in_progress || reload_completed)
3034 rtx operands[3];
3036 /* Cases 3 and 4 should be handled by this split
3037 only while combining */
3038 gcc_assert (kind <= 2);
3039 if (right)
3041 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3042 source = dest;
3044 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3045 if (total_shift)
3047 operands[0] = dest;
3048 operands[1] = dest;
3049 operands[2] = GEN_INT (total_shift);
3050 shift_gen_fun (ASHIFT, operands);
3052 break;
3054 else
3056 int neg = 0;
3057 if (kind != 4 && total_shift < 16)
3059 neg = -ext_shift_amounts[total_shift][1];
3060 if (neg > 0)
3061 neg -= ext_shift_amounts[total_shift][2];
3062 else
3063 neg = 0;
3065 emit_insn (gen_and_shl_scratch (dest, source,
3066 GEN_INT (right),
3067 GEN_INT (mask),
3068 GEN_INT (total_shift + neg),
3069 GEN_INT (neg)));
3070 emit_insn (gen_movsi (dest, dest));
3071 break;
3074 return 0;
3077 /* Try to find a good way to implement the combiner pattern
3078 [(set (match_operand:SI 0 "register_operand" "=r")
3079 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3080 (match_operand:SI 2 "const_int_operand" "n")
3081 (match_operand:SI 3 "const_int_operand" "n")
3082 (const_int 0)))
3083 (clobber (reg:SI T_REG))]
3084 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3085 return 0 for simple left / right shift combination.
3086 return 1 for left shift / 8 bit sign extend / left shift.
3087 return 2 for left shift / 16 bit sign extend / left shift.
3088 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3089 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3090 return 5 for left shift / 16 bit sign extend / right shift
3091 return 6 for < 8 bit sign extend / left shift.
3092 return 7 for < 8 bit sign extend / left shift / single right shift.
3093 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3096 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3098 int left, size, insize, ext;
3099 int cost = 0, best_cost;
3100 int kind;
3102 left = INTVAL (left_rtx);
3103 size = INTVAL (size_rtx);
3104 insize = size - left;
3105 gcc_assert (insize > 0);
3106 /* Default to left / right shift. */
3107 kind = 0;
3108 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3109 if (size <= 16)
3111 /* 16 bit shift / sign extend / 16 bit shift */
3112 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3113 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3114 below, by alternative 3 or something even better. */
3115 if (cost < best_cost)
3117 kind = 5;
3118 best_cost = cost;
3121 /* Try a plain sign extend between two shifts. */
3122 for (ext = 16; ext >= insize; ext -= 8)
3124 if (ext <= size)
3126 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3127 if (cost < best_cost)
3129 kind = ext / (unsigned) 8;
3130 best_cost = cost;
3133 /* Check if we can do a sloppy shift with a final signed shift
3134 restoring the sign. */
3135 if (EXT_SHIFT_SIGNED (size - ext))
3136 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3137 /* If not, maybe it's still cheaper to do the second shift sloppy,
3138 and do a final sign extend? */
3139 else if (size <= 16)
3140 cost = ext_shift_insns[ext - insize] + 1
3141 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3142 else
3143 continue;
3144 if (cost < best_cost)
3146 kind = ext / (unsigned) 8 + 2;
3147 best_cost = cost;
3150 /* Check if we can sign extend in r0 */
3151 if (insize < 8)
3153 cost = 3 + shift_insns[left];
3154 if (cost < best_cost)
3156 kind = 6;
3157 best_cost = cost;
3159 /* Try the same with a final signed shift. */
3160 if (left < 31)
3162 cost = 3 + ext_shift_insns[left + 1] + 1;
3163 if (cost < best_cost)
3165 kind = 7;
3166 best_cost = cost;
3170 if (TARGET_SH3)
3172 /* Try to use a dynamic shift. */
3173 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3174 if (cost < best_cost)
3176 kind = 0;
3177 best_cost = cost;
3180 if (costp)
3181 *costp = cost;
3182 return kind;
3185 /* Function to be used in the length attribute of the instructions
3186 implementing this pattern. */
3189 shl_sext_length (rtx insn)
3191 rtx set_src, left_rtx, size_rtx;
3192 int cost;
3194 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3195 left_rtx = XEXP (XEXP (set_src, 0), 1);
3196 size_rtx = XEXP (set_src, 1);
3197 shl_sext_kind (left_rtx, size_rtx, &cost);
3198 return cost;
3201 /* Generate rtl for this pattern */
3204 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3206 int kind;
3207 int left, size, insize, cost;
3208 rtx operands[3];
3210 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3211 left = INTVAL (left_rtx);
3212 size = INTVAL (size_rtx);
3213 insize = size - left;
3214 switch (kind)
3216 case 1:
3217 case 2:
3218 case 3:
3219 case 4:
3221 int ext = kind & 1 ? 8 : 16;
3222 int shift2 = size - ext;
3224 /* Don't expand fine-grained when combining, because that will
3225 make the pattern fail. */
3226 if (! currently_expanding_to_rtl
3227 && ! reload_in_progress && ! reload_completed)
3229 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3230 emit_insn (gen_movsi (dest, source));
3231 break;
3233 if (dest != source)
3234 emit_insn (gen_movsi (dest, source));
3235 operands[0] = dest;
3236 if (ext - insize)
3238 operands[2] = GEN_INT (ext - insize);
3239 gen_shifty_hi_op (ASHIFT, operands);
3241 emit_insn (kind & 1
3242 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3243 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3244 if (kind <= 2)
3246 if (shift2)
3248 operands[2] = GEN_INT (shift2);
3249 gen_shifty_op (ASHIFT, operands);
3252 else
3254 if (shift2 > 0)
3256 if (EXT_SHIFT_SIGNED (shift2))
3258 operands[2] = GEN_INT (shift2 + 1);
3259 gen_shifty_op (ASHIFT, operands);
3260 operands[2] = const1_rtx;
3261 gen_shifty_op (ASHIFTRT, operands);
3262 break;
3264 operands[2] = GEN_INT (shift2);
3265 gen_shifty_hi_op (ASHIFT, operands);
3267 else if (shift2)
3269 operands[2] = GEN_INT (-shift2);
3270 gen_shifty_hi_op (LSHIFTRT, operands);
3272 emit_insn (size <= 8
3273 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3274 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3276 break;
3278 case 5:
3280 int i = 16 - size;
3281 if (! currently_expanding_to_rtl
3282 && ! reload_in_progress && ! reload_completed)
3283 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3284 else
3286 operands[0] = dest;
3287 operands[2] = GEN_INT (16 - insize);
3288 gen_shifty_hi_op (ASHIFT, operands);
3289 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3291 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3292 while (--i >= 0)
3293 gen_ashift (ASHIFTRT, 1, dest);
3294 break;
3296 case 6:
3297 case 7:
3298 /* Don't expand fine-grained when combining, because that will
3299 make the pattern fail. */
3300 if (! currently_expanding_to_rtl
3301 && ! reload_in_progress && ! reload_completed)
3303 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3304 emit_insn (gen_movsi (dest, source));
3305 break;
3307 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3308 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3309 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3310 operands[0] = dest;
3311 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3312 gen_shifty_op (ASHIFT, operands);
3313 if (kind == 7)
3314 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3315 break;
3316 default:
3317 return -1;
3319 return 0;
3322 /* Prefix a symbol_ref name with "datalabel". */
3325 gen_datalabel_ref (rtx sym)
3327 const char *str;
3329 if (GET_CODE (sym) == LABEL_REF)
3330 return gen_rtx_CONST (GET_MODE (sym),
3331 gen_rtx_UNSPEC (GET_MODE (sym),
3332 gen_rtvec (1, sym),
3333 UNSPEC_DATALABEL));
3335 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3337 str = XSTR (sym, 0);
3338 /* Share all SYMBOL_REF strings with the same value - that is important
3339 for cse. */
3340 str = IDENTIFIER_POINTER (get_identifier (str));
3341 XSTR (sym, 0) = str;
3343 return sym;
3347 static alloc_pool label_ref_list_pool;
3349 typedef struct label_ref_list_d
3351 rtx label;
3352 struct label_ref_list_d *next;
3353 } *label_ref_list_t;
3355 /* The SH cannot load a large constant into a register, constants have to
3356 come from a pc relative load. The reference of a pc relative load
3357 instruction must be less than 1k in front of the instruction. This
3358 means that we often have to dump a constant inside a function, and
3359 generate code to branch around it.
3361 It is important to minimize this, since the branches will slow things
3362 down and make things bigger.
3364 Worst case code looks like:
3366 mov.l L1,rn
3367 bra L2
3369 align
3370 L1: .long value
3374 mov.l L3,rn
3375 bra L4
3377 align
3378 L3: .long value
3382 We fix this by performing a scan before scheduling, which notices which
3383 instructions need to have their operands fetched from the constant table
3384 and builds the table.
3386 The algorithm is:
3388 scan, find an instruction which needs a pcrel move. Look forward, find the
3389 last barrier which is within MAX_COUNT bytes of the requirement.
3390 If there isn't one, make one. Process all the instructions between
3391 the find and the barrier.
3393 In the above example, we can tell that L3 is within 1k of L1, so
3394 the first move can be shrunk from the 3 insn+constant sequence into
3395 just 1 insn, and the constant moved to L3 to make:
3397 mov.l L1,rn
3399 mov.l L3,rn
3400 bra L4
3402 align
3403 L3:.long value
3404 L4:.long value
3406 Then the second move becomes the target for the shortening process. */
3408 typedef struct
3410 rtx value; /* Value in table. */
3411 rtx label; /* Label of value. */
3412 label_ref_list_t wend; /* End of window. */
3413 enum machine_mode mode; /* Mode of value. */
3415 /* True if this constant is accessed as part of a post-increment
3416 sequence. Note that HImode constants are never accessed in this way. */
3417 bool part_of_sequence_p;
3418 } pool_node;
3420 /* The maximum number of constants that can fit into one pool, since
3421 constants in the range 0..510 are at least 2 bytes long, and in the
3422 range from there to 1018 at least 4 bytes. */
3424 #define MAX_POOL_SIZE 372
3425 static pool_node pool_vector[MAX_POOL_SIZE];
3426 static int pool_size;
3427 static rtx pool_window_label;
3428 static int pool_window_last;
3430 static int max_labelno_before_reorg;
3432 /* ??? If we need a constant in HImode which is the truncated value of a
3433 constant we need in SImode, we could combine the two entries thus saving
3434 two bytes. Is this common enough to be worth the effort of implementing
3435 it? */
3437 /* ??? This stuff should be done at the same time that we shorten branches.
3438 As it is now, we must assume that all branches are the maximum size, and
3439 this causes us to almost always output constant pools sooner than
3440 necessary. */
3442 /* Add a constant to the pool and return its label. */
3444 static rtx
3445 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3447 int i;
3448 rtx lab, new_rtx;
3449 label_ref_list_t ref, newref;
3451 /* First see if we've already got it. */
3452 for (i = 0; i < pool_size; i++)
3454 if (x->code == pool_vector[i].value->code
3455 && mode == pool_vector[i].mode)
3457 if (x->code == CODE_LABEL)
3459 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3460 continue;
3462 if (rtx_equal_p (x, pool_vector[i].value))
3464 lab = new_rtx = 0;
3465 if (! last_value
3466 || ! i
3467 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3469 new_rtx = gen_label_rtx ();
3470 LABEL_REFS (new_rtx) = pool_vector[i].label;
3471 pool_vector[i].label = lab = new_rtx;
3473 if (lab && pool_window_label)
3475 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3476 newref->label = pool_window_label;
3477 ref = pool_vector[pool_window_last].wend;
3478 newref->next = ref;
3479 pool_vector[pool_window_last].wend = newref;
3481 if (new_rtx)
3482 pool_window_label = new_rtx;
3483 pool_window_last = i;
3484 return lab;
3489 /* Need a new one. */
3490 pool_vector[pool_size].value = x;
3491 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3493 lab = 0;
3494 pool_vector[pool_size - 1].part_of_sequence_p = true;
3496 else
3497 lab = gen_label_rtx ();
3498 pool_vector[pool_size].mode = mode;
3499 pool_vector[pool_size].label = lab;
3500 pool_vector[pool_size].wend = NULL;
3501 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3502 if (lab && pool_window_label)
3504 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3505 newref->label = pool_window_label;
3506 ref = pool_vector[pool_window_last].wend;
3507 newref->next = ref;
3508 pool_vector[pool_window_last].wend = newref;
3510 if (lab)
3511 pool_window_label = lab;
3512 pool_window_last = pool_size;
3513 pool_size++;
3514 return lab;
3517 /* Output the literal table. START, if nonzero, is the first instruction
3518 this table is needed for, and also indicates that there is at least one
3519 casesi_worker_2 instruction; We have to emit the operand3 labels from
3520 these insns at a 4-byte aligned position. BARRIER is the barrier
3521 after which we are to place the table. */
3523 static void
3524 dump_table (rtx start, rtx barrier)
3526 rtx scan = barrier;
3527 int i;
3528 int need_align = 1;
3529 rtx lab;
3530 label_ref_list_t ref;
3531 int have_df = 0;
3533 /* Do two passes, first time dump out the HI sized constants. */
3535 for (i = 0; i < pool_size; i++)
3537 pool_node *p = &pool_vector[i];
3539 if (p->mode == HImode)
3541 if (need_align)
3543 scan = emit_insn_after (gen_align_2 (), scan);
3544 need_align = 0;
3546 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3547 scan = emit_label_after (lab, scan);
3548 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3549 scan);
3550 for (ref = p->wend; ref; ref = ref->next)
3552 lab = ref->label;
3553 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3556 else if (p->mode == DFmode)
3557 have_df = 1;
3560 need_align = 1;
3562 if (start)
3564 scan = emit_insn_after (gen_align_4 (), scan);
3565 need_align = 0;
3566 for (; start != barrier; start = NEXT_INSN (start))
3567 if (GET_CODE (start) == INSN
3568 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3570 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3571 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3573 scan = emit_label_after (lab, scan);
3576 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3578 rtx align_insn = NULL_RTX;
3580 scan = emit_label_after (gen_label_rtx (), scan);
3581 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3582 need_align = 0;
3584 for (i = 0; i < pool_size; i++)
3586 pool_node *p = &pool_vector[i];
3588 switch (p->mode)
3590 case HImode:
3591 break;
3592 case SImode:
3593 case SFmode:
3594 if (align_insn && !p->part_of_sequence_p)
3596 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3597 emit_label_before (lab, align_insn);
3598 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3599 align_insn);
3600 for (ref = p->wend; ref; ref = ref->next)
3602 lab = ref->label;
3603 emit_insn_before (gen_consttable_window_end (lab),
3604 align_insn);
3606 delete_insn (align_insn);
3607 align_insn = NULL_RTX;
3608 continue;
3610 else
3612 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3613 scan = emit_label_after (lab, scan);
3614 scan = emit_insn_after (gen_consttable_4 (p->value,
3615 const0_rtx), scan);
3616 need_align = ! need_align;
3618 break;
3619 case DFmode:
3620 if (need_align)
3622 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3623 align_insn = scan;
3624 need_align = 0;
3626 case DImode:
3627 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3628 scan = emit_label_after (lab, scan);
3629 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3630 scan);
3631 break;
3632 default:
3633 gcc_unreachable ();
3636 if (p->mode != HImode)
3638 for (ref = p->wend; ref; ref = ref->next)
3640 lab = ref->label;
3641 scan = emit_insn_after (gen_consttable_window_end (lab),
3642 scan);
3647 pool_size = 0;
3650 for (i = 0; i < pool_size; i++)
3652 pool_node *p = &pool_vector[i];
3654 switch (p->mode)
3656 case HImode:
3657 break;
3658 case SImode:
3659 case SFmode:
3660 if (need_align)
3662 need_align = 0;
3663 scan = emit_label_after (gen_label_rtx (), scan);
3664 scan = emit_insn_after (gen_align_4 (), scan);
3666 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3667 scan = emit_label_after (lab, scan);
3668 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3669 scan);
3670 break;
3671 case DFmode:
3672 case DImode:
3673 if (need_align)
3675 need_align = 0;
3676 scan = emit_label_after (gen_label_rtx (), scan);
3677 scan = emit_insn_after (gen_align_4 (), scan);
3679 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3680 scan = emit_label_after (lab, scan);
3681 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3682 scan);
3683 break;
3684 default:
3685 gcc_unreachable ();
3688 if (p->mode != HImode)
3690 for (ref = p->wend; ref; ref = ref->next)
3692 lab = ref->label;
3693 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3698 scan = emit_insn_after (gen_consttable_end (), scan);
3699 scan = emit_barrier_after (scan);
3700 pool_size = 0;
3701 pool_window_label = NULL_RTX;
3702 pool_window_last = 0;
3705 /* Return nonzero if constant would be an ok source for a
3706 mov.w instead of a mov.l. */
3708 static int
3709 hi_const (rtx src)
3711 return (GET_CODE (src) == CONST_INT
3712 && INTVAL (src) >= -32768
3713 && INTVAL (src) <= 32767);
3716 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3718 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3720 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3721 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3722 need to fix it if the input value is CONST_OK_FOR_I08. */
3724 static int
3725 broken_move (rtx insn)
3727 if (GET_CODE (insn) == INSN)
3729 rtx pat = PATTERN (insn);
3730 if (GET_CODE (pat) == PARALLEL)
3731 pat = XVECEXP (pat, 0, 0);
3732 if (GET_CODE (pat) == SET
3733 /* We can load any 8-bit value if we don't care what the high
3734 order bits end up as. */
3735 && GET_MODE (SET_DEST (pat)) != QImode
3736 && (CONSTANT_P (SET_SRC (pat))
3737 /* Match mova_const. */
3738 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3739 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3740 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3741 && ! (TARGET_SH2E
3742 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3743 && (fp_zero_operand (SET_SRC (pat))
3744 || fp_one_operand (SET_SRC (pat)))
3745 /* ??? If this is a -m4 or -m4-single compilation, in general
3746 we don't know the current setting of fpscr, so disable fldi.
3747 There is an exception if this was a register-register move
3748 before reload - and hence it was ascertained that we have
3749 single precision setting - and in a post-reload optimization
3750 we changed this to do a constant load. In that case
3751 we don't have an r0 clobber, hence we must use fldi. */
3752 && (! TARGET_SH4 || TARGET_FMOVD
3753 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3754 == SCRATCH))
3755 && GET_CODE (SET_DEST (pat)) == REG
3756 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3757 && ! (TARGET_SH2A
3758 && GET_MODE (SET_DEST (pat)) == SImode
3759 && (satisfies_constraint_I20 (SET_SRC (pat))
3760 || satisfies_constraint_I28 (SET_SRC (pat))))
3761 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3762 return 1;
3765 return 0;
3768 static int
3769 mova_p (rtx insn)
3771 return (GET_CODE (insn) == INSN
3772 && GET_CODE (PATTERN (insn)) == SET
3773 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3774 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3775 /* Don't match mova_const. */
3776 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3779 /* Fix up a mova from a switch that went out of range. */
3780 static void
3781 fixup_mova (rtx mova)
3783 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3784 if (! flag_pic)
3786 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3787 INSN_CODE (mova) = -1;
3789 else
3791 rtx worker = mova;
3792 rtx lab = gen_label_rtx ();
3793 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
3797 worker = NEXT_INSN (worker);
3798 gcc_assert (worker
3799 && GET_CODE (worker) != CODE_LABEL
3800 && GET_CODE (worker) != JUMP_INSN);
3801 } while (GET_CODE (worker) == NOTE
3802 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3803 wpat = PATTERN (worker);
3804 wpat0 = XVECEXP (wpat, 0, 0);
3805 wpat1 = XVECEXP (wpat, 0, 1);
3806 wsrc = SET_SRC (wpat0);
3807 PATTERN (worker) = (gen_casesi_worker_2
3808 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3809 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3810 XEXP (wpat1, 0)));
3811 INSN_CODE (worker) = -1;
3812 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3813 base = gen_rtx_LABEL_REF (Pmode, lab);
3814 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
3815 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3816 INSN_CODE (mova) = -1;
3820 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3821 *num_mova, and check if the new mova is not nested within the first one.
3822 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3823 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3824 static int
3825 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3827 int n_addr = 0; /* Initialization to shut up spurious warning. */
3828 int f_target, n_target = 0; /* Likewise. */
3830 if (optimize)
3832 /* If NEW_MOVA has no address yet, it will be handled later. */
3833 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
3834 return -1;
3836 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3837 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3838 if (n_addr > n_target || n_addr + 1022 < n_target)
3840 /* Change the mova into a load.
3841 broken_move will then return true for it. */
3842 fixup_mova (new_mova);
3843 return 1;
3846 if (!(*num_mova)++)
3848 *first_mova = new_mova;
3849 return 2;
3851 if (!optimize
3852 || ((f_target
3853 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3854 >= n_target))
3855 return -1;
3857 (*num_mova)--;
3858 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3859 > n_target - n_addr)
3861 fixup_mova (*first_mova);
3862 return 0;
3864 else
3866 fixup_mova (new_mova);
3867 return 1;
3871 /* Find the last barrier from insn FROM which is close enough to hold the
3872 constant pool. If we can't find one, then create one near the end of
3873 the range. */
3875 static rtx
3876 find_barrier (int num_mova, rtx mova, rtx from)
3878 int count_si = 0;
3879 int count_hi = 0;
3880 int found_hi = 0;
3881 int found_si = 0;
3882 int found_di = 0;
3883 int hi_align = 2;
3884 int si_align = 2;
3885 int leading_mova = num_mova;
3886 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3887 int si_limit;
3888 int hi_limit;
3889 rtx orig = from;
3891 /* For HImode: range is 510, add 4 because pc counts from address of
3892 second instruction after this one, subtract 2 for the jump instruction
3893 that we may need to emit before the table, subtract 2 for the instruction
3894 that fills the jump delay slot (in very rare cases, reorg will take an
3895 instruction from after the constant pool or will leave the delay slot
3896 empty). This gives 510.
3897 For SImode: range is 1020, add 4 because pc counts from address of
3898 second instruction after this one, subtract 2 in case pc is 2 byte
3899 aligned, subtract 2 for the jump instruction that we may need to emit
3900 before the table, subtract 2 for the instruction that fills the jump
3901 delay slot. This gives 1018. */
3903 /* The branch will always be shortened now that the reference address for
3904 forward branches is the successor address, thus we need no longer make
3905 adjustments to the [sh]i_limit for -O0. */
3907 si_limit = 1018;
3908 hi_limit = 510;
3910 while (from && count_si < si_limit && count_hi < hi_limit)
3912 int inc = get_attr_length (from);
3913 int new_align = 1;
3915 /* If this is a label that existed at the time of the compute_alignments
3916 call, determine the alignment. N.B. When find_barrier recurses for
3917 an out-of-reach mova, we might see labels at the start of previously
3918 inserted constant tables. */
3919 if (GET_CODE (from) == CODE_LABEL
3920 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3922 if (optimize)
3923 new_align = 1 << label_to_alignment (from);
3924 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3925 new_align = 1 << barrier_align (from);
3926 else
3927 new_align = 1;
3928 inc = 0;
3930 /* In case we are scanning a constant table because of recursion, check
3931 for explicit alignments. If the table is long, we might be forced
3932 to emit the new table in front of it; the length of the alignment
3933 might be the last straw. */
3934 else if (GET_CODE (from) == INSN
3935 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3936 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3937 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3938 /* When we find the end of a constant table, paste the new constant
3939 at the end. That is better than putting it in front because
3940 this way, we don't need extra alignment for adding a 4-byte-aligned
3941 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3942 else if (GET_CODE (from) == INSN
3943 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3944 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3945 return from;
3947 if (GET_CODE (from) == BARRIER)
3949 rtx next;
3951 found_barrier = from;
3953 /* If we are at the end of the function, or in front of an alignment
3954 instruction, we need not insert an extra alignment. We prefer
3955 this kind of barrier. */
3956 if (barrier_align (from) > 2)
3957 good_barrier = from;
3959 /* If we are at the end of a hot/cold block, dump the constants
3960 here. */
3961 next = NEXT_INSN (from);
3962 if (next
3963 && NOTE_P (next)
3964 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3965 break;
3968 if (broken_move (from))
3970 rtx pat, src, dst;
3971 enum machine_mode mode;
3973 pat = PATTERN (from);
3974 if (GET_CODE (pat) == PARALLEL)
3975 pat = XVECEXP (pat, 0, 0);
3976 src = SET_SRC (pat);
3977 dst = SET_DEST (pat);
3978 mode = GET_MODE (dst);
3980 /* We must explicitly check the mode, because sometimes the
3981 front end will generate code to load unsigned constants into
3982 HImode targets without properly sign extending them. */
3983 if (mode == HImode
3984 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3986 found_hi += 2;
3987 /* We put the short constants before the long constants, so
3988 we must count the length of short constants in the range
3989 for the long constants. */
3990 /* ??? This isn't optimal, but is easy to do. */
3991 si_limit -= 2;
3993 else
3995 /* We dump DF/DI constants before SF/SI ones, because
3996 the limit is the same, but the alignment requirements
3997 are higher. We may waste up to 4 additional bytes
3998 for alignment, and the DF/DI constant may have
3999 another SF/SI constant placed before it. */
4000 if (TARGET_SHCOMPACT
4001 && ! found_di
4002 && (mode == DFmode || mode == DImode))
4004 found_di = 1;
4005 si_limit -= 8;
4007 while (si_align > 2 && found_si + si_align - 2 > count_si)
4008 si_align >>= 1;
4009 if (found_si > count_si)
4010 count_si = found_si;
4011 found_si += GET_MODE_SIZE (mode);
4012 if (num_mova)
4013 si_limit -= GET_MODE_SIZE (mode);
4017 if (mova_p (from))
4019 switch (untangle_mova (&num_mova, &mova, from))
4021 case 0: return find_barrier (0, 0, mova);
4022 case 2:
4024 leading_mova = 0;
4025 barrier_before_mova
4026 = good_barrier ? good_barrier : found_barrier;
4028 default: break;
4030 if (found_si > count_si)
4031 count_si = found_si;
4033 else if (GET_CODE (from) == JUMP_INSN
4034 && (GET_CODE (PATTERN (from)) == ADDR_VEC
4035 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
4037 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4038 || (num_mova
4039 && (prev_nonnote_insn (from)
4040 == XEXP (MOVA_LABELREF (mova), 0))))
4041 num_mova--;
4042 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4044 /* We have just passed the barrier in front of the
4045 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4046 the ADDR_DIFF_VEC is accessed as data, just like our pool
4047 constants, this is a good opportunity to accommodate what
4048 we have gathered so far.
4049 If we waited any longer, we could end up at a barrier in
4050 front of code, which gives worse cache usage for separated
4051 instruction / data caches. */
4052 good_barrier = found_barrier;
4053 break;
4055 else
4057 rtx body = PATTERN (from);
4058 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4061 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4062 else if (GET_CODE (from) == JUMP_INSN
4063 && ! TARGET_SH2
4064 && ! TARGET_SMALLCODE)
4065 new_align = 4;
4067 if (found_si)
4069 count_si += inc;
4070 if (new_align > si_align)
4072 si_limit -= (count_si - 1) & (new_align - si_align);
4073 si_align = new_align;
4075 count_si = (count_si + new_align - 1) & -new_align;
4077 if (found_hi)
4079 count_hi += inc;
4080 if (new_align > hi_align)
4082 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4083 hi_align = new_align;
4085 count_hi = (count_hi + new_align - 1) & -new_align;
4087 from = NEXT_INSN (from);
4090 if (num_mova)
4092 if (leading_mova)
4094 /* Try as we might, the leading mova is out of range. Change
4095 it into a load (which will become a pcload) and retry. */
4096 fixup_mova (mova);
4097 return find_barrier (0, 0, mova);
4099 else
4101 /* Insert the constant pool table before the mova instruction,
4102 to prevent the mova label reference from going out of range. */
4103 from = mova;
4104 good_barrier = found_barrier = barrier_before_mova;
4108 if (found_barrier)
4110 if (good_barrier && next_real_insn (found_barrier))
4111 found_barrier = good_barrier;
4113 else
4115 /* We didn't find a barrier in time to dump our stuff,
4116 so we'll make one. */
4117 rtx label = gen_label_rtx ();
4119 /* If we exceeded the range, then we must back up over the last
4120 instruction we looked at. Otherwise, we just need to undo the
4121 NEXT_INSN at the end of the loop. */
4122 if (PREV_INSN (from) != orig
4123 && (count_hi > hi_limit || count_si > si_limit))
4124 from = PREV_INSN (PREV_INSN (from));
4125 else
4126 from = PREV_INSN (from);
4128 /* Walk back to be just before any jump or label.
4129 Putting it before a label reduces the number of times the branch
4130 around the constant pool table will be hit. Putting it before
4131 a jump makes it more likely that the bra delay slot will be
4132 filled. */
4133 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4134 || GET_CODE (from) == CODE_LABEL)
4135 from = PREV_INSN (from);
4137 from = emit_jump_insn_after (gen_jump (label), from);
4138 JUMP_LABEL (from) = label;
4139 LABEL_NUSES (label) = 1;
4140 found_barrier = emit_barrier_after (from);
4141 emit_label_after (label, found_barrier);
4144 return found_barrier;
4147 /* If the instruction INSN is implemented by a special function, and we can
4148 positively find the register that is used to call the sfunc, and this
4149 register is not used anywhere else in this instruction - except as the
4150 destination of a set, return this register; else, return 0. */
4152 sfunc_uses_reg (rtx insn)
4154 int i;
4155 rtx pattern, part, reg_part, reg;
4157 if (GET_CODE (insn) != INSN)
4158 return 0;
4159 pattern = PATTERN (insn);
4160 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4161 return 0;
4163 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4165 part = XVECEXP (pattern, 0, i);
4166 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4167 reg_part = part;
4169 if (! reg_part)
4170 return 0;
4171 reg = XEXP (reg_part, 0);
4172 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4174 part = XVECEXP (pattern, 0, i);
4175 if (part == reg_part || GET_CODE (part) == CLOBBER)
4176 continue;
4177 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4178 && GET_CODE (SET_DEST (part)) == REG)
4179 ? SET_SRC (part) : part)))
4180 return 0;
4182 return reg;
4185 /* See if the only way in which INSN uses REG is by calling it, or by
4186 setting it while calling it. Set *SET to a SET rtx if the register
4187 is set by INSN. */
4189 static int
4190 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4192 rtx pattern, reg2;
4194 *set = NULL_RTX;
4196 reg2 = sfunc_uses_reg (insn);
4197 if (reg2 && REGNO (reg2) == REGNO (reg))
4199 pattern = single_set (insn);
4200 if (pattern
4201 && GET_CODE (SET_DEST (pattern)) == REG
4202 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4203 *set = pattern;
4204 return 0;
4206 if (GET_CODE (insn) != CALL_INSN)
4208 /* We don't use rtx_equal_p because we don't care if the mode is
4209 different. */
4210 pattern = single_set (insn);
4211 if (pattern
4212 && GET_CODE (SET_DEST (pattern)) == REG
4213 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4215 rtx par, part;
4216 int i;
4218 *set = pattern;
4219 par = PATTERN (insn);
4220 if (GET_CODE (par) == PARALLEL)
4221 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4223 part = XVECEXP (par, 0, i);
4224 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4225 return 1;
4227 return reg_mentioned_p (reg, SET_SRC (pattern));
4230 return 1;
4233 pattern = PATTERN (insn);
4235 if (GET_CODE (pattern) == PARALLEL)
4237 int i;
4239 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4240 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4241 return 1;
4242 pattern = XVECEXP (pattern, 0, 0);
4245 if (GET_CODE (pattern) == SET)
4247 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4249 /* We don't use rtx_equal_p, because we don't care if the
4250 mode is different. */
4251 if (GET_CODE (SET_DEST (pattern)) != REG
4252 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4253 return 1;
4255 *set = pattern;
4258 pattern = SET_SRC (pattern);
4261 if (GET_CODE (pattern) != CALL
4262 || GET_CODE (XEXP (pattern, 0)) != MEM
4263 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4264 return 1;
4266 return 0;
4269 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4270 general registers. Bits 0..15 mean that the respective registers
4271 are used as inputs in the instruction. Bits 16..31 mean that the
4272 registers 0..15, respectively, are used as outputs, or are clobbered.
4273 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4275 regs_used (rtx x, int is_dest)
4277 enum rtx_code code;
4278 const char *fmt;
4279 int i, used = 0;
4281 if (! x)
4282 return used;
4283 code = GET_CODE (x);
4284 switch (code)
4286 case REG:
4287 if (REGNO (x) < 16)
4288 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4289 << (REGNO (x) + is_dest));
4290 return 0;
4291 case SUBREG:
4293 rtx y = SUBREG_REG (x);
4295 if (GET_CODE (y) != REG)
4296 break;
4297 if (REGNO (y) < 16)
4298 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4299 << (REGNO (y) +
4300 subreg_regno_offset (REGNO (y),
4301 GET_MODE (y),
4302 SUBREG_BYTE (x),
4303 GET_MODE (x)) + is_dest));
4304 return 0;
4306 case SET:
4307 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4308 case RETURN:
4309 /* If there was a return value, it must have been indicated with USE. */
4310 return 0x00ffff00;
4311 case CLOBBER:
4312 is_dest = 1;
4313 break;
4314 case MEM:
4315 is_dest = 0;
4316 break;
4317 case CALL:
4318 used |= 0x00ff00f0;
4319 break;
4320 default:
4321 break;
4324 fmt = GET_RTX_FORMAT (code);
4326 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4328 if (fmt[i] == 'E')
4330 register int j;
4331 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4332 used |= regs_used (XVECEXP (x, i, j), is_dest);
4334 else if (fmt[i] == 'e')
4335 used |= regs_used (XEXP (x, i), is_dest);
4337 return used;
4340 /* Create an instruction that prevents redirection of a conditional branch
4341 to the destination of the JUMP with address ADDR.
4342 If the branch needs to be implemented as an indirect jump, try to find
4343 a scratch register for it.
4344 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4345 If any preceding insn that doesn't fit into a delay slot is good enough,
4346 pass 1. Pass 2 if a definite blocking insn is needed.
4347 -1 is used internally to avoid deep recursion.
4348 If a blocking instruction is made or recognized, return it. */
4350 static rtx
4351 gen_block_redirect (rtx jump, int addr, int need_block)
4353 int dead = 0;
4354 rtx prev = prev_nonnote_insn (jump);
4355 rtx dest;
4357 /* First, check if we already have an instruction that satisfies our need. */
4358 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4360 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4361 return prev;
4362 if (GET_CODE (PATTERN (prev)) == USE
4363 || GET_CODE (PATTERN (prev)) == CLOBBER
4364 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4365 prev = jump;
4366 else if ((need_block &= ~1) < 0)
4367 return prev;
4368 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4369 need_block = 0;
4371 if (GET_CODE (PATTERN (jump)) == RETURN)
4373 if (! need_block)
4374 return prev;
4375 /* Reorg even does nasty things with return insns that cause branches
4376 to go out of range - see find_end_label and callers. */
4377 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4379 /* We can't use JUMP_LABEL here because it might be undefined
4380 when not optimizing. */
4381 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4382 /* If the branch is out of range, try to find a scratch register for it. */
4383 if (optimize
4384 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4385 > 4092 + 4098))
4387 rtx scan;
4388 /* Don't look for the stack pointer as a scratch register,
4389 it would cause trouble if an interrupt occurred. */
4390 unsigned attempt = 0x7fff, used;
4391 int jump_left = flag_expensive_optimizations + 1;
4393 /* It is likely that the most recent eligible instruction is wanted for
4394 the delay slot. Therefore, find out which registers it uses, and
4395 try to avoid using them. */
4397 for (scan = jump; (scan = PREV_INSN (scan)); )
4399 enum rtx_code code;
4401 if (INSN_DELETED_P (scan))
4402 continue;
4403 code = GET_CODE (scan);
4404 if (code == CODE_LABEL || code == JUMP_INSN)
4405 break;
4406 if (code == INSN
4407 && GET_CODE (PATTERN (scan)) != USE
4408 && GET_CODE (PATTERN (scan)) != CLOBBER
4409 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4411 attempt &= ~regs_used (PATTERN (scan), 0);
4412 break;
4415 for (used = dead = 0, scan = JUMP_LABEL (jump);
4416 (scan = NEXT_INSN (scan)); )
4418 enum rtx_code code;
4420 if (INSN_DELETED_P (scan))
4421 continue;
4422 code = GET_CODE (scan);
4423 if (INSN_P (scan))
4425 used |= regs_used (PATTERN (scan), 0);
4426 if (code == CALL_INSN)
4427 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4428 dead |= (used >> 16) & ~used;
4429 if (dead & attempt)
4431 dead &= attempt;
4432 break;
4434 if (code == JUMP_INSN)
4436 if (jump_left-- && simplejump_p (scan))
4437 scan = JUMP_LABEL (scan);
4438 else
4439 break;
4443 /* Mask out the stack pointer again, in case it was
4444 the only 'free' register we have found. */
4445 dead &= 0x7fff;
4447 /* If the immediate destination is still in range, check for possible
4448 threading with a jump beyond the delay slot insn.
4449 Don't check if we are called recursively; the jump has been or will be
4450 checked in a different invocation then. */
4452 else if (optimize && need_block >= 0)
4454 rtx next = next_active_insn (next_active_insn (dest));
4455 if (next && GET_CODE (next) == JUMP_INSN
4456 && GET_CODE (PATTERN (next)) == SET
4457 && recog_memoized (next) == CODE_FOR_jump_compact)
4459 dest = JUMP_LABEL (next);
4460 if (dest
4461 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4462 > 4092 + 4098))
4463 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4467 if (dead)
4469 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4471 /* It would be nice if we could convert the jump into an indirect
4472 jump / far branch right now, and thus exposing all constituent
4473 instructions to further optimization. However, reorg uses
4474 simplejump_p to determine if there is an unconditional jump where
4475 it should try to schedule instructions from the target of the
4476 branch; simplejump_p fails for indirect jumps even if they have
4477 a JUMP_LABEL. */
4478 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4479 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4480 , jump);
4481 /* ??? We would like this to have the scope of the jump, but that
4482 scope will change when a delay slot insn of an inner scope is added.
4483 Hence, after delay slot scheduling, we'll have to expect
4484 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4485 the jump. */
4487 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4488 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4489 return insn;
4491 else if (need_block)
4492 /* We can't use JUMP_LABEL here because it might be undefined
4493 when not optimizing. */
4494 return emit_insn_before (gen_block_branch_redirect
4495 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4496 , jump);
4497 return prev;
4500 #define CONDJUMP_MIN -252
4501 #define CONDJUMP_MAX 262
4502 struct far_branch
4504 /* A label (to be placed) in front of the jump
4505 that jumps to our ultimate destination. */
4506 rtx near_label;
4507 /* Where we are going to insert it if we cannot move the jump any farther,
4508 or the jump itself if we have picked up an existing jump. */
4509 rtx insert_place;
4510 /* The ultimate destination. */
4511 rtx far_label;
4512 struct far_branch *prev;
4513 /* If the branch has already been created, its address;
4514 else the address of its first prospective user. */
4515 int address;
4518 static void gen_far_branch (struct far_branch *);
4519 enum mdep_reorg_phase_e mdep_reorg_phase;
4520 static void
4521 gen_far_branch (struct far_branch *bp)
4523 rtx insn = bp->insert_place;
4524 rtx jump;
4525 rtx label = gen_label_rtx ();
4526 int ok;
4528 emit_label_after (label, insn);
4529 if (bp->far_label)
4531 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4532 LABEL_NUSES (bp->far_label)++;
4534 else
4535 jump = emit_jump_insn_after (gen_return (), insn);
4536 /* Emit a barrier so that reorg knows that any following instructions
4537 are not reachable via a fall-through path.
4538 But don't do this when not optimizing, since we wouldn't suppress the
4539 alignment for the barrier then, and could end up with out-of-range
4540 pc-relative loads. */
4541 if (optimize)
4542 emit_barrier_after (jump);
4543 emit_label_after (bp->near_label, insn);
4544 JUMP_LABEL (jump) = bp->far_label;
4545 ok = invert_jump (insn, label, 1);
4546 gcc_assert (ok);
4548 /* If we are branching around a jump (rather than a return), prevent
4549 reorg from using an insn from the jump target as the delay slot insn -
4550 when reorg did this, it pessimized code (we rather hide the delay slot)
4551 and it could cause branches to go out of range. */
4552 if (bp->far_label)
4553 (emit_insn_after
4554 (gen_stuff_delay_slot
4555 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4556 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4557 insn));
4558 /* Prevent reorg from undoing our splits. */
4559 gen_block_redirect (jump, bp->address += 2, 2);
4562 /* Fix up ADDR_DIFF_VECs. */
4563 void
4564 fixup_addr_diff_vecs (rtx first)
4566 rtx insn;
4568 for (insn = first; insn; insn = NEXT_INSN (insn))
4570 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4572 if (GET_CODE (insn) != JUMP_INSN
4573 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4574 continue;
4575 pat = PATTERN (insn);
4576 vec_lab = XEXP (XEXP (pat, 0), 0);
4578 /* Search the matching casesi_jump_2. */
4579 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4581 if (GET_CODE (prev) != JUMP_INSN)
4582 continue;
4583 prevpat = PATTERN (prev);
4584 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4585 continue;
4586 x = XVECEXP (prevpat, 0, 1);
4587 if (GET_CODE (x) != USE)
4588 continue;
4589 x = XEXP (x, 0);
4590 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4591 break;
4593 /* FIXME: This is a bug in the optimizer, but it seems harmless
4594 to just avoid panicing. */
4595 if (!prev)
4596 continue;
4598 /* Emit the reference label of the braf where it belongs, right after
4599 the casesi_jump_2 (i.e. braf). */
4600 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4601 emit_label_after (braf_label, prev);
4603 /* Fix up the ADDR_DIF_VEC to be relative
4604 to the reference address of the braf. */
4605 XEXP (XEXP (pat, 0), 0) = braf_label;
4609 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4610 a barrier. Return the base 2 logarithm of the desired alignment. */
4612 barrier_align (rtx barrier_or_label)
4614 rtx next = next_real_insn (barrier_or_label), pat, prev;
4615 int slot, credit, jump_to_next = 0;
4617 if (! next)
4618 return 0;
4620 pat = PATTERN (next);
4622 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4623 return 2;
4625 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4626 /* This is a barrier in front of a constant table. */
4627 return 0;
4629 prev = prev_real_insn (barrier_or_label);
4630 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4632 pat = PATTERN (prev);
4633 /* If this is a very small table, we want to keep the alignment after
4634 the table to the minimum for proper code alignment. */
4635 return ((TARGET_SMALLCODE
4636 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4637 <= (unsigned) 1 << (CACHE_LOG - 2)))
4638 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4641 if (TARGET_SMALLCODE)
4642 return 0;
4644 if (! TARGET_SH2 || ! optimize)
4645 return align_jumps_log;
4647 /* When fixing up pcloads, a constant table might be inserted just before
4648 the basic block that ends with the barrier. Thus, we can't trust the
4649 instruction lengths before that. */
4650 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4652 /* Check if there is an immediately preceding branch to the insn beyond
4653 the barrier. We must weight the cost of discarding useful information
4654 from the current cache line when executing this branch and there is
4655 an alignment, against that of fetching unneeded insn in front of the
4656 branch target when there is no alignment. */
4658 /* There are two delay_slot cases to consider. One is the simple case
4659 where the preceding branch is to the insn beyond the barrier (simple
4660 delay slot filling), and the other is where the preceding branch has
4661 a delay slot that is a duplicate of the insn after the barrier
4662 (fill_eager_delay_slots) and the branch is to the insn after the insn
4663 after the barrier. */
4665 /* PREV is presumed to be the JUMP_INSN for the barrier under
4666 investigation. Skip to the insn before it. */
4667 prev = prev_real_insn (prev);
4669 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4670 credit >= 0 && prev && GET_CODE (prev) == INSN;
4671 prev = prev_real_insn (prev))
4673 jump_to_next = 0;
4674 if (GET_CODE (PATTERN (prev)) == USE
4675 || GET_CODE (PATTERN (prev)) == CLOBBER)
4676 continue;
4677 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4679 prev = XVECEXP (PATTERN (prev), 0, 1);
4680 if (INSN_UID (prev) == INSN_UID (next))
4682 /* Delay slot was filled with insn at jump target. */
4683 jump_to_next = 1;
4684 continue;
4688 if (slot &&
4689 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4690 slot = 0;
4691 credit -= get_attr_length (prev);
4693 if (prev
4694 && GET_CODE (prev) == JUMP_INSN
4695 && JUMP_LABEL (prev))
4697 rtx x;
4698 if (jump_to_next
4699 || next_real_insn (JUMP_LABEL (prev)) == next
4700 /* If relax_delay_slots() decides NEXT was redundant
4701 with some previous instruction, it will have
4702 redirected PREV's jump to the following insn. */
4703 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4704 /* There is no upper bound on redundant instructions
4705 that might have been skipped, but we must not put an
4706 alignment where none had been before. */
4707 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4708 (INSN_P (x)
4709 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4710 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4711 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4713 rtx pat = PATTERN (prev);
4714 if (GET_CODE (pat) == PARALLEL)
4715 pat = XVECEXP (pat, 0, 0);
4716 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4717 return 0;
4722 return align_jumps_log;
4725 /* If we are inside a phony loop, almost any kind of label can turn up as the
4726 first one in the loop. Aligning a braf label causes incorrect switch
4727 destination addresses; we can detect braf labels because they are
4728 followed by a BARRIER.
4729 Applying loop alignment to small constant or switch tables is a waste
4730 of space, so we suppress this too. */
4732 sh_loop_align (rtx label)
4734 rtx next = label;
4737 next = next_nonnote_insn (next);
4738 while (next && GET_CODE (next) == CODE_LABEL);
4740 if (! next
4741 || ! INSN_P (next)
4742 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4743 || recog_memoized (next) == CODE_FOR_consttable_2)
4744 return 0;
4746 return align_loops_log;
4749 /* Do a final pass over the function, just before delayed branch
4750 scheduling. */
4752 static void
4753 sh_reorg (void)
4755 rtx first, insn, mova = NULL_RTX;
4756 int num_mova;
4757 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4758 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4760 first = get_insns ();
4761 max_labelno_before_reorg = max_label_num ();
4763 /* We must split call insns before introducing `mova's. If we're
4764 optimizing, they'll have already been split. Otherwise, make
4765 sure we don't split them too late. */
4766 if (! optimize)
4767 split_all_insns_noflow ();
4769 if (TARGET_SHMEDIA)
4770 return;
4772 /* If relaxing, generate pseudo-ops to associate function calls with
4773 the symbols they call. It does no harm to not generate these
4774 pseudo-ops. However, when we can generate them, it enables to
4775 linker to potentially relax the jsr to a bsr, and eliminate the
4776 register load and, possibly, the constant pool entry. */
4778 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4779 if (TARGET_RELAX)
4781 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
4782 own purposes. This works because none of the remaining passes
4783 need to look at them.
4785 ??? But it may break in the future. We should use a machine
4786 dependent REG_NOTE, or some other approach entirely. */
4787 for (insn = first; insn; insn = NEXT_INSN (insn))
4789 if (INSN_P (insn))
4791 rtx note;
4793 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
4794 NULL_RTX)) != 0)
4795 remove_note (insn, note);
4799 for (insn = first; insn; insn = NEXT_INSN (insn))
4801 rtx pattern, reg, link, set, scan, dies, label;
4802 int rescan = 0, foundinsn = 0;
4804 if (GET_CODE (insn) == CALL_INSN)
4806 pattern = PATTERN (insn);
4808 if (GET_CODE (pattern) == PARALLEL)
4809 pattern = XVECEXP (pattern, 0, 0);
4810 if (GET_CODE (pattern) == SET)
4811 pattern = SET_SRC (pattern);
4813 if (GET_CODE (pattern) != CALL
4814 || GET_CODE (XEXP (pattern, 0)) != MEM)
4815 continue;
4817 reg = XEXP (XEXP (pattern, 0), 0);
4819 else
4821 reg = sfunc_uses_reg (insn);
4822 if (! reg)
4823 continue;
4826 if (GET_CODE (reg) != REG)
4827 continue;
4829 /* Try scanning backward to find where the register is set. */
4830 link = NULL;
4831 for (scan = PREV_INSN (insn);
4832 scan && GET_CODE (scan) != CODE_LABEL;
4833 scan = PREV_INSN (scan))
4835 if (! INSN_P (scan))
4836 continue;
4838 if (! reg_mentioned_p (reg, scan))
4839 continue;
4841 if (noncall_uses_reg (reg, scan, &set))
4842 break;
4844 if (set)
4846 link = scan;
4847 break;
4851 if (! link)
4852 continue;
4854 /* The register is set at LINK. */
4856 /* We can only optimize the function call if the register is
4857 being set to a symbol. In theory, we could sometimes
4858 optimize calls to a constant location, but the assembler
4859 and linker do not support that at present. */
4860 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4861 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4862 continue;
4864 /* Scan forward from LINK to the place where REG dies, and
4865 make sure that the only insns which use REG are
4866 themselves function calls. */
4868 /* ??? This doesn't work for call targets that were allocated
4869 by reload, since there may not be a REG_DEAD note for the
4870 register. */
4872 dies = NULL_RTX;
4873 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4875 rtx scanset;
4877 /* Don't try to trace forward past a CODE_LABEL if we haven't
4878 seen INSN yet. Ordinarily, we will only find the setting insn
4879 if it is in the same basic block. However,
4880 cross-jumping can insert code labels in between the load and
4881 the call, and can result in situations where a single call
4882 insn may have two targets depending on where we came from. */
4884 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4885 break;
4887 if (! INSN_P (scan))
4888 continue;
4890 /* Don't try to trace forward past a JUMP. To optimize
4891 safely, we would have to check that all the
4892 instructions at the jump destination did not use REG. */
4894 if (GET_CODE (scan) == JUMP_INSN)
4895 break;
4897 if (! reg_mentioned_p (reg, scan))
4898 continue;
4900 if (noncall_uses_reg (reg, scan, &scanset))
4901 break;
4903 if (scan == insn)
4904 foundinsn = 1;
4906 if (scan != insn
4907 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4909 /* There is a function call to this register other
4910 than the one we are checking. If we optimize
4911 this call, we need to rescan again below. */
4912 rescan = 1;
4915 /* ??? We shouldn't have to worry about SCANSET here.
4916 We should just be able to check for a REG_DEAD note
4917 on a function call. However, the REG_DEAD notes are
4918 apparently not dependable around libcalls; c-torture
4919 execute/920501-2 is a test case. If SCANSET is set,
4920 then this insn sets the register, so it must have
4921 died earlier. Unfortunately, this will only handle
4922 the cases in which the register is, in fact, set in a
4923 later insn. */
4925 /* ??? We shouldn't have to use FOUNDINSN here.
4926 This dates back to when we used LOG_LINKS to find
4927 the most recent insn which sets the register. */
4929 if (foundinsn
4930 && (scanset
4931 || find_reg_note (scan, REG_DEAD, reg)))
4933 dies = scan;
4934 break;
4938 if (! dies)
4940 /* Either there was a branch, or some insn used REG
4941 other than as a function call address. */
4942 continue;
4945 /* Create a code label, and put it in a REG_LABEL_OPERAND note
4946 on the insn which sets the register, and on each call insn
4947 which uses the register. In final_prescan_insn we look for
4948 the REG_LABEL_OPERAND notes, and output the appropriate label
4949 or pseudo-op. */
4951 label = gen_label_rtx ();
4952 add_reg_note (link, REG_LABEL_OPERAND, label);
4953 add_reg_note (insn, REG_LABEL_OPERAND, label);
4954 if (rescan)
4956 scan = link;
4959 rtx reg2;
4961 scan = NEXT_INSN (scan);
4962 if (scan != insn
4963 && ((GET_CODE (scan) == CALL_INSN
4964 && reg_mentioned_p (reg, scan))
4965 || ((reg2 = sfunc_uses_reg (scan))
4966 && REGNO (reg2) == REGNO (reg))))
4967 add_reg_note (scan, REG_LABEL_OPERAND, label);
4969 while (scan != dies);
4974 if (TARGET_SH2)
4975 fixup_addr_diff_vecs (first);
4977 if (optimize)
4979 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4980 shorten_branches (first);
4983 /* Scan the function looking for move instructions which have to be
4984 changed to pc-relative loads and insert the literal tables. */
4985 label_ref_list_pool = create_alloc_pool ("label references list",
4986 sizeof (struct label_ref_list_d),
4987 30);
4988 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4989 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4991 if (mova_p (insn))
4993 /* ??? basic block reordering can move a switch table dispatch
4994 below the switch table. Check if that has happened.
4995 We only have the addresses available when optimizing; but then,
4996 this check shouldn't be needed when not optimizing. */
4997 if (!untangle_mova (&num_mova, &mova, insn))
4999 insn = mova;
5000 num_mova = 0;
5003 else if (GET_CODE (insn) == JUMP_INSN
5004 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5005 && num_mova
5006 /* ??? loop invariant motion can also move a mova out of a
5007 loop. Since loop does this code motion anyway, maybe we
5008 should wrap UNSPEC_MOVA into a CONST, so that reload can
5009 move it back. */
5010 && ((num_mova > 1
5011 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5012 || (prev_nonnote_insn (insn)
5013 == XEXP (MOVA_LABELREF (mova), 0))))
5015 rtx scan;
5016 int total;
5018 num_mova--;
5020 /* Some code might have been inserted between the mova and
5021 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5022 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5023 total += get_attr_length (scan);
5025 /* range of mova is 1020, add 4 because pc counts from address of
5026 second instruction after this one, subtract 2 in case pc is 2
5027 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5028 cancels out with alignment effects of the mova itself. */
5029 if (total > 1022)
5031 /* Change the mova into a load, and restart scanning
5032 there. broken_move will then return true for mova. */
5033 fixup_mova (mova);
5034 insn = mova;
5037 if (broken_move (insn)
5038 || (GET_CODE (insn) == INSN
5039 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5041 rtx scan;
5042 /* Scan ahead looking for a barrier to stick the constant table
5043 behind. */
5044 rtx barrier = find_barrier (num_mova, mova, insn);
5045 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5046 int need_aligned_label = 0;
5048 if (num_mova && ! mova_p (mova))
5050 /* find_barrier had to change the first mova into a
5051 pcload; thus, we have to start with this new pcload. */
5052 insn = mova;
5053 num_mova = 0;
5055 /* Now find all the moves between the points and modify them. */
5056 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5058 if (GET_CODE (scan) == CODE_LABEL)
5059 last_float = 0;
5060 if (GET_CODE (scan) == INSN
5061 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5062 need_aligned_label = 1;
5063 if (broken_move (scan))
5065 rtx *patp = &PATTERN (scan), pat = *patp;
5066 rtx src, dst;
5067 rtx lab;
5068 rtx newsrc;
5069 enum machine_mode mode;
5071 if (GET_CODE (pat) == PARALLEL)
5072 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5073 src = SET_SRC (pat);
5074 dst = SET_DEST (pat);
5075 mode = GET_MODE (dst);
5077 if (mode == SImode && hi_const (src)
5078 && REGNO (dst) != FPUL_REG)
5080 int offset = 0;
5082 mode = HImode;
5083 while (GET_CODE (dst) == SUBREG)
5085 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5086 GET_MODE (SUBREG_REG (dst)),
5087 SUBREG_BYTE (dst),
5088 GET_MODE (dst));
5089 dst = SUBREG_REG (dst);
5091 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5093 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5095 /* This must be an insn that clobbers r0. */
5096 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5097 XVECLEN (PATTERN (scan), 0)
5098 - 1);
5099 rtx clobber = *clobberp;
5101 gcc_assert (GET_CODE (clobber) == CLOBBER
5102 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5104 if (last_float
5105 && reg_set_between_p (r0_rtx, last_float_move, scan))
5106 last_float = 0;
5107 if (last_float
5108 && TARGET_SHCOMPACT
5109 && GET_MODE_SIZE (mode) != 4
5110 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5111 last_float = 0;
5112 lab = add_constant (src, mode, last_float);
5113 if (lab)
5114 emit_insn_before (gen_mova (lab), scan);
5115 else
5117 /* There will be a REG_UNUSED note for r0 on
5118 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5119 lest reorg:mark_target_live_regs will not
5120 consider r0 to be used, and we end up with delay
5121 slot insn in front of SCAN that clobbers r0. */
5122 rtx note
5123 = find_regno_note (last_float_move, REG_UNUSED, 0);
5125 /* If we are not optimizing, then there may not be
5126 a note. */
5127 if (note)
5128 PUT_MODE (note, REG_INC);
5130 *last_float_addr = r0_inc_rtx;
5132 last_float_move = scan;
5133 last_float = src;
5134 newsrc = gen_const_mem (mode,
5135 (((TARGET_SH4 && ! TARGET_FMOVD)
5136 || REGNO (dst) == FPUL_REG)
5137 ? r0_inc_rtx
5138 : r0_rtx));
5139 last_float_addr = &XEXP (newsrc, 0);
5141 /* Remove the clobber of r0. */
5142 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5143 gen_rtx_SCRATCH (Pmode));
5145 /* This is a mova needing a label. Create it. */
5146 else if (GET_CODE (src) == UNSPEC
5147 && XINT (src, 1) == UNSPEC_MOVA
5148 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5150 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5151 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5152 newsrc = gen_rtx_UNSPEC (SImode,
5153 gen_rtvec (1, newsrc),
5154 UNSPEC_MOVA);
5156 else
5158 lab = add_constant (src, mode, 0);
5159 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5160 newsrc = gen_const_mem (mode, newsrc);
5162 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5163 INSN_CODE (scan) = -1;
5166 dump_table (need_aligned_label ? insn : 0, barrier);
5167 insn = barrier;
5170 free_alloc_pool (label_ref_list_pool);
5171 for (insn = first; insn; insn = NEXT_INSN (insn))
5172 PUT_MODE (insn, VOIDmode);
5174 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5175 INSN_ADDRESSES_FREE ();
5176 split_branches (first);
5178 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5179 also has an effect on the register that holds the address of the sfunc.
5180 Insert an extra dummy insn in front of each sfunc that pretends to
5181 use this register. */
5182 if (flag_delayed_branch)
5184 for (insn = first; insn; insn = NEXT_INSN (insn))
5186 rtx reg = sfunc_uses_reg (insn);
5188 if (! reg)
5189 continue;
5190 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5193 #if 0
5194 /* fpscr is not actually a user variable, but we pretend it is for the
5195 sake of the previous optimization passes, since we want it handled like
5196 one. However, we don't have any debugging information for it, so turn
5197 it into a non-user variable now. */
5198 if (TARGET_SH4)
5199 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5200 #endif
5201 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5205 get_dest_uid (rtx label, int max_uid)
5207 rtx dest = next_real_insn (label);
5208 int dest_uid;
5209 if (! dest)
5210 /* This can happen for an undefined label. */
5211 return 0;
5212 dest_uid = INSN_UID (dest);
5213 /* If this is a newly created branch redirection blocking instruction,
5214 we cannot index the branch_uid or insn_addresses arrays with its
5215 uid. But then, we won't need to, because the actual destination is
5216 the following branch. */
5217 while (dest_uid >= max_uid)
5219 dest = NEXT_INSN (dest);
5220 dest_uid = INSN_UID (dest);
5222 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5223 return 0;
5224 return dest_uid;
5227 /* Split condbranches that are out of range. Also add clobbers for
5228 scratch registers that are needed in far jumps.
5229 We do this before delay slot scheduling, so that it can take our
5230 newly created instructions into account. It also allows us to
5231 find branches with common targets more easily. */
5233 static void
5234 split_branches (rtx first)
5236 rtx insn;
5237 struct far_branch **uid_branch, *far_branch_list = 0;
5238 int max_uid = get_max_uid ();
5239 int ok;
5241 /* Find out which branches are out of range. */
5242 shorten_branches (first);
5244 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5245 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5247 for (insn = first; insn; insn = NEXT_INSN (insn))
5248 if (! INSN_P (insn))
5249 continue;
5250 else if (INSN_DELETED_P (insn))
5252 /* Shorten_branches would split this instruction again,
5253 so transform it into a note. */
5254 SET_INSN_DELETED (insn);
5256 else if (GET_CODE (insn) == JUMP_INSN
5257 /* Don't mess with ADDR_DIFF_VEC */
5258 && (GET_CODE (PATTERN (insn)) == SET
5259 || GET_CODE (PATTERN (insn)) == RETURN))
5261 enum attr_type type = get_attr_type (insn);
5262 if (type == TYPE_CBRANCH)
5264 rtx next, beyond;
5266 if (get_attr_length (insn) > 4)
5268 rtx src = SET_SRC (PATTERN (insn));
5269 rtx olabel = XEXP (XEXP (src, 1), 0);
5270 int addr = INSN_ADDRESSES (INSN_UID (insn));
5271 rtx label = 0;
5272 int dest_uid = get_dest_uid (olabel, max_uid);
5273 struct far_branch *bp = uid_branch[dest_uid];
5275 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5276 the label if the LABEL_NUSES count drops to zero. There is
5277 always a jump_optimize pass that sets these values, but it
5278 proceeds to delete unreferenced code, and then if not
5279 optimizing, to un-delete the deleted instructions, thus
5280 leaving labels with too low uses counts. */
5281 if (! optimize)
5283 JUMP_LABEL (insn) = olabel;
5284 LABEL_NUSES (olabel)++;
5286 if (! bp)
5288 bp = (struct far_branch *) alloca (sizeof *bp);
5289 uid_branch[dest_uid] = bp;
5290 bp->prev = far_branch_list;
5291 far_branch_list = bp;
5292 bp->far_label
5293 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5294 LABEL_NUSES (bp->far_label)++;
5296 else
5298 label = bp->near_label;
5299 if (! label && bp->address - addr >= CONDJUMP_MIN)
5301 rtx block = bp->insert_place;
5303 if (GET_CODE (PATTERN (block)) == RETURN)
5304 block = PREV_INSN (block);
5305 else
5306 block = gen_block_redirect (block,
5307 bp->address, 2);
5308 label = emit_label_after (gen_label_rtx (),
5309 PREV_INSN (block));
5310 bp->near_label = label;
5312 else if (label && ! NEXT_INSN (label))
5314 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5315 bp->insert_place = insn;
5316 else
5317 gen_far_branch (bp);
5320 if (! label
5321 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5323 bp->near_label = label = gen_label_rtx ();
5324 bp->insert_place = insn;
5325 bp->address = addr;
5327 ok = redirect_jump (insn, label, 0);
5328 gcc_assert (ok);
5330 else
5332 /* get_attr_length (insn) == 2 */
5333 /* Check if we have a pattern where reorg wants to redirect
5334 the branch to a label from an unconditional branch that
5335 is too far away. */
5336 /* We can't use JUMP_LABEL here because it might be undefined
5337 when not optimizing. */
5338 /* A syntax error might cause beyond to be NULL_RTX. */
5339 beyond
5340 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5341 0));
5343 if (beyond
5344 && (GET_CODE (beyond) == JUMP_INSN
5345 || ((beyond = next_active_insn (beyond))
5346 && GET_CODE (beyond) == JUMP_INSN))
5347 && GET_CODE (PATTERN (beyond)) == SET
5348 && recog_memoized (beyond) == CODE_FOR_jump_compact
5349 && ((INSN_ADDRESSES
5350 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5351 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5352 > 252 + 258 + 2))
5353 gen_block_redirect (beyond,
5354 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5357 next = next_active_insn (insn);
5359 if ((GET_CODE (next) == JUMP_INSN
5360 || ((next = next_active_insn (next))
5361 && GET_CODE (next) == JUMP_INSN))
5362 && GET_CODE (PATTERN (next)) == SET
5363 && recog_memoized (next) == CODE_FOR_jump_compact
5364 && ((INSN_ADDRESSES
5365 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5366 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5367 > 252 + 258 + 2))
5368 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5370 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5372 int addr = INSN_ADDRESSES (INSN_UID (insn));
5373 rtx far_label = 0;
5374 int dest_uid = 0;
5375 struct far_branch *bp;
5377 if (type == TYPE_JUMP)
5379 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5380 dest_uid = get_dest_uid (far_label, max_uid);
5381 if (! dest_uid)
5383 /* Parse errors can lead to labels outside
5384 the insn stream. */
5385 if (! NEXT_INSN (far_label))
5386 continue;
5388 if (! optimize)
5390 JUMP_LABEL (insn) = far_label;
5391 LABEL_NUSES (far_label)++;
5393 redirect_jump (insn, NULL_RTX, 1);
5394 far_label = 0;
5397 bp = uid_branch[dest_uid];
5398 if (! bp)
5400 bp = (struct far_branch *) alloca (sizeof *bp);
5401 uid_branch[dest_uid] = bp;
5402 bp->prev = far_branch_list;
5403 far_branch_list = bp;
5404 bp->near_label = 0;
5405 bp->far_label = far_label;
5406 if (far_label)
5407 LABEL_NUSES (far_label)++;
5409 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5410 if (addr - bp->address <= CONDJUMP_MAX)
5411 emit_label_after (bp->near_label, PREV_INSN (insn));
5412 else
5414 gen_far_branch (bp);
5415 bp->near_label = 0;
5417 else
5418 bp->near_label = 0;
5419 bp->address = addr;
5420 bp->insert_place = insn;
5421 if (! far_label)
5422 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5423 else
5424 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5427 /* Generate all pending far branches,
5428 and free our references to the far labels. */
5429 while (far_branch_list)
5431 if (far_branch_list->near_label
5432 && ! NEXT_INSN (far_branch_list->near_label))
5433 gen_far_branch (far_branch_list);
5434 if (optimize
5435 && far_branch_list->far_label
5436 && ! --LABEL_NUSES (far_branch_list->far_label))
5437 delete_insn (far_branch_list->far_label);
5438 far_branch_list = far_branch_list->prev;
5441 /* Instruction length information is no longer valid due to the new
5442 instructions that have been generated. */
5443 init_insn_lengths ();
5446 /* Dump out instruction addresses, which is useful for debugging the
5447 constant pool table stuff.
5449 If relaxing, output the label and pseudo-ops used to link together
5450 calls and the instruction which set the registers. */
5452 /* ??? The addresses printed by this routine for insns are nonsense for
5453 insns which are inside of a sequence where none of the inner insns have
5454 variable length. This is because the second pass of shorten_branches
5455 does not bother to update them. */
5457 void
5458 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5459 int noperands ATTRIBUTE_UNUSED)
5461 if (TARGET_DUMPISIZE)
5462 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5464 if (TARGET_RELAX)
5466 rtx note;
5468 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5469 if (note)
5471 rtx pattern;
5473 pattern = PATTERN (insn);
5474 if (GET_CODE (pattern) == PARALLEL)
5475 pattern = XVECEXP (pattern, 0, 0);
5476 switch (GET_CODE (pattern))
5478 case SET:
5479 if (GET_CODE (SET_SRC (pattern)) != CALL
5480 && get_attr_type (insn) != TYPE_SFUNC)
5482 targetm.asm_out.internal_label
5483 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5484 break;
5486 /* else FALLTHROUGH */
5487 case CALL:
5488 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5489 CODE_LABEL_NUMBER (XEXP (note, 0)));
5490 break;
5492 default:
5493 gcc_unreachable ();
5499 /* Dump out any constants accumulated in the final pass. These will
5500 only be labels. */
5502 const char *
5503 output_jump_label_table (void)
5505 int i;
5507 if (pool_size)
5509 fprintf (asm_out_file, "\t.align 2\n");
5510 for (i = 0; i < pool_size; i++)
5512 pool_node *p = &pool_vector[i];
5514 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5515 CODE_LABEL_NUMBER (p->label));
5516 output_asm_insn (".long %O0", &p->value);
5518 pool_size = 0;
5521 return "";
5524 /* A full frame looks like:
5526 arg-5
5527 arg-4
5528 [ if current_function_anonymous_args
5529 arg-3
5530 arg-2
5531 arg-1
5532 arg-0 ]
5533 saved-fp
5534 saved-r10
5535 saved-r11
5536 saved-r12
5537 saved-pr
5538 local-n
5540 local-1
5541 local-0 <- fp points here. */
5543 /* Number of bytes pushed for anonymous args, used to pass information
5544 between expand_prologue and expand_epilogue. */
5546 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5547 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5548 for an epilogue and a negative value means that it's for a sibcall
5549 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5550 all the registers that are about to be restored, and hence dead. */
5552 static void
5553 output_stack_adjust (int size, rtx reg, int epilogue_p,
5554 HARD_REG_SET *live_regs_mask)
5556 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5557 if (size)
5559 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5561 /* This test is bogus, as output_stack_adjust is used to re-align the
5562 stack. */
5563 #if 0
5564 gcc_assert (!(size % align));
5565 #endif
5567 if (CONST_OK_FOR_ADD (size))
5568 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5569 /* Try to do it with two partial adjustments; however, we must make
5570 sure that the stack is properly aligned at all times, in case
5571 an interrupt occurs between the two partial adjustments. */
5572 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5573 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5575 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5576 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5578 else
5580 rtx const_reg;
5581 rtx insn;
5582 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5583 int i;
5585 /* If TEMP is invalid, we could temporarily save a general
5586 register to MACL. However, there is currently no need
5587 to handle this case, so just die when we see it. */
5588 if (epilogue_p < 0
5589 || current_function_interrupt
5590 || ! call_really_used_regs[temp] || fixed_regs[temp])
5591 temp = -1;
5592 if (temp < 0 && ! current_function_interrupt
5593 && (TARGET_SHMEDIA || epilogue_p >= 0))
5595 HARD_REG_SET temps;
5596 COPY_HARD_REG_SET (temps, call_used_reg_set);
5597 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5598 if (epilogue_p > 0)
5600 int nreg = 0;
5601 if (crtl->return_rtx)
5603 enum machine_mode mode;
5604 mode = GET_MODE (crtl->return_rtx);
5605 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5606 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5608 for (i = 0; i < nreg; i++)
5609 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5610 if (crtl->calls_eh_return)
5612 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5613 for (i = 0; i <= 3; i++)
5614 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5617 if (TARGET_SHMEDIA && epilogue_p < 0)
5618 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5619 CLEAR_HARD_REG_BIT (temps, i);
5620 if (epilogue_p <= 0)
5622 for (i = FIRST_PARM_REG;
5623 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5624 CLEAR_HARD_REG_BIT (temps, i);
5625 if (cfun->static_chain_decl != NULL)
5626 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5628 temp = scavenge_reg (&temps);
5630 if (temp < 0 && live_regs_mask)
5632 HARD_REG_SET temps;
5634 COPY_HARD_REG_SET (temps, *live_regs_mask);
5635 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5636 temp = scavenge_reg (&temps);
5638 if (temp < 0)
5640 rtx adj_reg, tmp_reg, mem;
5642 /* If we reached here, the most likely case is the (sibcall)
5643 epilogue for non SHmedia. Put a special push/pop sequence
5644 for such case as the last resort. This looks lengthy but
5645 would not be problem because it seems to be very
5646 rare. */
5648 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5651 /* ??? There is still the slight possibility that r4 or
5652 r5 have been reserved as fixed registers or assigned
5653 as global registers, and they change during an
5654 interrupt. There are possible ways to handle this:
5656 - If we are adjusting the frame pointer (r14), we can do
5657 with a single temp register and an ordinary push / pop
5658 on the stack.
5659 - Grab any call-used or call-saved registers (i.e. not
5660 fixed or globals) for the temps we need. We might
5661 also grab r14 if we are adjusting the stack pointer.
5662 If we can't find enough available registers, issue
5663 a diagnostic and die - the user must have reserved
5664 way too many registers.
5665 But since all this is rather unlikely to happen and
5666 would require extra testing, we just die if r4 / r5
5667 are not available. */
5668 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5669 && !global_regs[4] && !global_regs[5]);
5671 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5672 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5673 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5674 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5675 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5676 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5677 emit_move_insn (mem, tmp_reg);
5678 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5679 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5680 emit_move_insn (mem, tmp_reg);
5681 emit_move_insn (reg, adj_reg);
5682 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5683 emit_move_insn (adj_reg, mem);
5684 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5685 emit_move_insn (tmp_reg, mem);
5686 /* Tell flow the insns that pop r4/r5 aren't dead. */
5687 emit_use (tmp_reg);
5688 emit_use (adj_reg);
5689 return;
5691 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5693 /* If SIZE is negative, subtract the positive value.
5694 This sometimes allows a constant pool entry to be shared
5695 between prologue and epilogue code. */
5696 if (size < 0)
5698 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5699 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5701 else
5703 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5704 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5706 if (! epilogue_p)
5707 REG_NOTES (insn)
5708 = (gen_rtx_EXPR_LIST
5709 (REG_FRAME_RELATED_EXPR,
5710 gen_rtx_SET (VOIDmode, reg,
5711 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5712 REG_NOTES (insn)));
5717 static rtx
5718 frame_insn (rtx x)
5720 x = emit_insn (x);
5721 RTX_FRAME_RELATED_P (x) = 1;
5722 return x;
5725 /* Output RTL to push register RN onto the stack. */
5727 static rtx
5728 push (int rn)
5730 rtx x;
5731 if (rn == FPUL_REG)
5732 x = gen_push_fpul ();
5733 else if (rn == FPSCR_REG)
5734 x = gen_push_fpscr ();
5735 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5736 && FP_OR_XD_REGISTER_P (rn))
5738 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5739 return NULL_RTX;
5740 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5742 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5743 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5744 else
5745 x = gen_push (gen_rtx_REG (SImode, rn));
5747 x = frame_insn (x);
5748 REG_NOTES (x)
5749 = gen_rtx_EXPR_LIST (REG_INC,
5750 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5751 return x;
5754 /* Output RTL to pop register RN from the stack. */
5756 static void
5757 pop (int rn)
5759 rtx x;
5760 if (rn == FPUL_REG)
5761 x = gen_pop_fpul ();
5762 else if (rn == FPSCR_REG)
5763 x = gen_pop_fpscr ();
5764 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5765 && FP_OR_XD_REGISTER_P (rn))
5767 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5768 return;
5769 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5771 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5772 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5773 else
5774 x = gen_pop (gen_rtx_REG (SImode, rn));
5776 x = emit_insn (x);
5777 REG_NOTES (x)
5778 = gen_rtx_EXPR_LIST (REG_INC,
5779 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5782 /* Generate code to push the regs specified in the mask. */
5784 static void
5785 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5787 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5788 int skip_fpscr = 0;
5790 /* Push PR last; this gives better latencies after the prologue, and
5791 candidates for the return delay slot when there are no general
5792 registers pushed. */
5793 for (; i < FIRST_PSEUDO_REGISTER; i++)
5795 /* If this is an interrupt handler, and the SZ bit varies,
5796 and we have to push any floating point register, we need
5797 to switch to the correct precision first. */
5798 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5799 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5801 HARD_REG_SET unsaved;
5803 push (FPSCR_REG);
5804 COMPL_HARD_REG_SET (unsaved, *mask);
5805 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5806 skip_fpscr = 1;
5808 if (i != PR_REG
5809 && (i != FPSCR_REG || ! skip_fpscr)
5810 && TEST_HARD_REG_BIT (*mask, i))
5812 /* If the ISR has RESBANK attribute assigned, don't push any of
5813 the following registers - R0-R14, MACH, MACL and GBR. */
5814 if (! (sh_cfun_resbank_handler_p ()
5815 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
5816 || i == MACH_REG
5817 || i == MACL_REG
5818 || i == GBR_REG)))
5819 push (i);
5823 /* Push banked registers last to improve delay slot opportunities. */
5824 if (interrupt_handler)
5825 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5826 if (TEST_HARD_REG_BIT (*mask, i))
5827 push (i);
5829 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
5830 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
5831 push (PR_REG);
5834 /* Calculate how much extra space is needed to save all callee-saved
5835 target registers.
5836 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5838 static int
5839 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5841 int reg;
5842 int stack_space = 0;
5843 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5845 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5846 if ((! call_really_used_regs[reg] || interrupt_handler)
5847 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5848 /* Leave space to save this target register on the stack,
5849 in case target register allocation wants to use it. */
5850 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5851 return stack_space;
5854 /* Decide whether we should reserve space for callee-save target registers,
5855 in case target register allocation wants to use them. REGS_SAVED is
5856 the space, in bytes, that is already required for register saves.
5857 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5859 static int
5860 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5861 HARD_REG_SET *live_regs_mask)
5863 if (optimize_size)
5864 return 0;
5865 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5868 /* Decide how much space to reserve for callee-save target registers
5869 in case target register allocation wants to use them.
5870 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5872 static int
5873 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5875 if (shmedia_space_reserved_for_target_registers)
5876 return shmedia_target_regs_stack_space (live_regs_mask);
5877 else
5878 return 0;
5881 /* Work out the registers which need to be saved, both as a mask and a
5882 count of saved words. Return the count.
5884 If doing a pragma interrupt function, then push all regs used by the
5885 function, and if we call another function (we can tell by looking at PR),
5886 make sure that all the regs it clobbers are safe too. */
5888 static int
5889 calc_live_regs (HARD_REG_SET *live_regs_mask)
5891 unsigned int reg;
5892 int count;
5893 tree attrs;
5894 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5895 bool nosave_low_regs;
5896 int pr_live, has_call;
5898 attrs = DECL_ATTRIBUTES (current_function_decl);
5899 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5900 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5901 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5902 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5904 CLEAR_HARD_REG_SET (*live_regs_mask);
5905 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5906 && df_regs_ever_live_p (FPSCR_REG))
5907 target_flags &= ~MASK_FPU_SINGLE;
5908 /* If we can save a lot of saves by switching to double mode, do that. */
5909 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5910 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5911 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
5912 && (! call_really_used_regs[reg]
5913 || interrupt_handler)
5914 && ++count > 2)
5916 target_flags &= ~MASK_FPU_SINGLE;
5917 break;
5919 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5920 knows how to use it. That means the pseudo originally allocated for
5921 the initial value can become the PR_MEDIA_REG hard register, as seen for
5922 execute/20010122-1.c:test9. */
5923 if (TARGET_SHMEDIA)
5924 /* ??? this function is called from initial_elimination_offset, hence we
5925 can't use the result of sh_media_register_for_return here. */
5926 pr_live = sh_pr_n_sets ();
5927 else
5929 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5930 pr_live = (pr_initial
5931 ? (GET_CODE (pr_initial) != REG
5932 || REGNO (pr_initial) != (PR_REG))
5933 : df_regs_ever_live_p (PR_REG));
5934 /* For Shcompact, if not optimizing, we end up with a memory reference
5935 using the return address pointer for __builtin_return_address even
5936 though there is no actual need to put the PR register on the stack. */
5937 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
5939 /* Force PR to be live if the prologue has to call the SHmedia
5940 argument decoder or register saver. */
5941 if (TARGET_SHCOMPACT
5942 && ((crtl->args.info.call_cookie
5943 & ~ CALL_COOKIE_RET_TRAMP (1))
5944 || crtl->saves_all_registers))
5945 pr_live = 1;
5946 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5947 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5949 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5950 ? pr_live
5951 : interrupt_handler
5952 ? (/* Need to save all the regs ever live. */
5953 (df_regs_ever_live_p (reg)
5954 || (call_really_used_regs[reg]
5955 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5956 || reg == PIC_OFFSET_TABLE_REGNUM)
5957 && has_call)
5958 || (TARGET_SHMEDIA && has_call
5959 && REGISTER_NATURAL_MODE (reg) == SImode
5960 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5961 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5962 && reg != RETURN_ADDRESS_POINTER_REGNUM
5963 && reg != T_REG && reg != GBR_REG
5964 /* Push fpscr only on targets which have FPU */
5965 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5966 : (/* Only push those regs which are used and need to be saved. */
5967 (TARGET_SHCOMPACT
5968 && flag_pic
5969 && crtl->args.info.call_cookie
5970 && reg == PIC_OFFSET_TABLE_REGNUM)
5971 || (df_regs_ever_live_p (reg)
5972 && ((!call_really_used_regs[reg]
5973 && !(reg != PIC_OFFSET_TABLE_REGNUM
5974 && fixed_regs[reg] && call_used_regs[reg]))
5975 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5976 || (crtl->calls_eh_return
5977 && (reg == EH_RETURN_DATA_REGNO (0)
5978 || reg == EH_RETURN_DATA_REGNO (1)
5979 || reg == EH_RETURN_DATA_REGNO (2)
5980 || reg == EH_RETURN_DATA_REGNO (3)))
5981 || ((reg == MACL_REG || reg == MACH_REG)
5982 && df_regs_ever_live_p (reg)
5983 && sh_cfun_attr_renesas_p ())
5986 SET_HARD_REG_BIT (*live_regs_mask, reg);
5987 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5989 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5990 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5992 if (FP_REGISTER_P (reg))
5994 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
5996 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5997 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6000 else if (XD_REGISTER_P (reg))
6002 /* Must switch to double mode to access these registers. */
6003 target_flags &= ~MASK_FPU_SINGLE;
6007 if (nosave_low_regs && reg == R8_REG)
6008 break;
6010 /* If we have a target register optimization pass after prologue / epilogue
6011 threading, we need to assume all target registers will be live even if
6012 they aren't now. */
6013 if (flag_branch_target_load_optimize2
6014 && TARGET_SAVE_ALL_TARGET_REGS
6015 && shmedia_space_reserved_for_target_registers)
6016 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6017 if ((! call_really_used_regs[reg] || interrupt_handler)
6018 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6020 SET_HARD_REG_BIT (*live_regs_mask, reg);
6021 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6023 /* If this is an interrupt handler, we don't have any call-clobbered
6024 registers we can conveniently use for target register save/restore.
6025 Make sure we save at least one general purpose register when we need
6026 to save target registers. */
6027 if (interrupt_handler
6028 && hard_reg_set_intersect_p (*live_regs_mask,
6029 reg_class_contents[TARGET_REGS])
6030 && ! hard_reg_set_intersect_p (*live_regs_mask,
6031 reg_class_contents[GENERAL_REGS]))
6033 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6034 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6037 return count;
6040 /* Code to generate prologue and epilogue sequences */
6042 /* PUSHED is the number of bytes that are being pushed on the
6043 stack for register saves. Return the frame size, padded
6044 appropriately so that the stack stays properly aligned. */
6045 static HOST_WIDE_INT
6046 rounded_frame_size (int pushed)
6048 HOST_WIDE_INT size = get_frame_size ();
6049 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6051 return ((size + pushed + align - 1) & -align) - pushed;
6054 /* Choose a call-clobbered target-branch register that remains
6055 unchanged along the whole function. We set it up as the return
6056 value in the prologue. */
6058 sh_media_register_for_return (void)
6060 int regno;
6061 int tr0_used;
6063 if (! current_function_is_leaf)
6064 return -1;
6065 if (lookup_attribute ("interrupt_handler",
6066 DECL_ATTRIBUTES (current_function_decl)))
6067 return -1;
6068 if (sh_cfun_interrupt_handler_p ())
6069 return -1;
6071 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6073 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6074 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6075 return regno;
6077 return -1;
6080 /* The maximum registers we need to save are:
6081 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6082 - 32 floating point registers (for each pair, we save none,
6083 one single precision value, or a double precision value).
6084 - 8 target registers
6085 - add 1 entry for a delimiter. */
6086 #define MAX_SAVED_REGS (62+32+8)
6088 typedef struct save_entry_s
6090 unsigned char reg;
6091 unsigned char mode;
6092 short offset;
6093 } save_entry;
6095 #define MAX_TEMPS 4
6097 /* There will be a delimiter entry with VOIDmode both at the start and the
6098 end of a filled in schedule. The end delimiter has the offset of the
6099 save with the smallest (i.e. most negative) offset. */
6100 typedef struct save_schedule_s
6102 save_entry entries[MAX_SAVED_REGS + 2];
6103 int temps[MAX_TEMPS+1];
6104 } save_schedule;
6106 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6107 use reverse order. Returns the last entry written to (not counting
6108 the delimiter). OFFSET_BASE is a number to be added to all offset
6109 entries. */
6111 static save_entry *
6112 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6113 int offset_base)
6115 int align, i;
6116 save_entry *entry = schedule->entries;
6117 int tmpx = 0;
6118 int offset;
6120 if (! current_function_interrupt)
6121 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6122 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6123 && ! FUNCTION_ARG_REGNO_P (i)
6124 && i != FIRST_RET_REG
6125 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6126 && ! (crtl->calls_eh_return
6127 && (i == EH_RETURN_STACKADJ_REGNO
6128 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6129 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6130 schedule->temps[tmpx++] = i;
6131 entry->reg = -1;
6132 entry->mode = VOIDmode;
6133 entry->offset = offset_base;
6134 entry++;
6135 /* We loop twice: first, we save 8-byte aligned registers in the
6136 higher addresses, that are known to be aligned. Then, we
6137 proceed to saving 32-bit registers that don't need 8-byte
6138 alignment.
6139 If this is an interrupt function, all registers that need saving
6140 need to be saved in full. moreover, we need to postpone saving
6141 target registers till we have saved some general purpose registers
6142 we can then use as scratch registers. */
6143 offset = offset_base;
6144 for (align = 1; align >= 0; align--)
6146 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6147 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6149 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6150 int reg = i;
6152 if (current_function_interrupt)
6154 if (TARGET_REGISTER_P (i))
6155 continue;
6156 if (GENERAL_REGISTER_P (i))
6157 mode = DImode;
6159 if (mode == SFmode && (i % 2) == 1
6160 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6161 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6163 mode = DFmode;
6164 i--;
6165 reg--;
6168 /* If we're doing the aligned pass and this is not aligned,
6169 or we're doing the unaligned pass and this is aligned,
6170 skip it. */
6171 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6172 != align)
6173 continue;
6175 if (current_function_interrupt
6176 && GENERAL_REGISTER_P (i)
6177 && tmpx < MAX_TEMPS)
6178 schedule->temps[tmpx++] = i;
6180 offset -= GET_MODE_SIZE (mode);
6181 entry->reg = i;
6182 entry->mode = mode;
6183 entry->offset = offset;
6184 entry++;
6186 if (align && current_function_interrupt)
6187 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6188 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6190 offset -= GET_MODE_SIZE (DImode);
6191 entry->reg = i;
6192 entry->mode = DImode;
6193 entry->offset = offset;
6194 entry++;
6197 entry->reg = -1;
6198 entry->mode = VOIDmode;
6199 entry->offset = offset;
6200 schedule->temps[tmpx] = -1;
6201 return entry - 1;
6204 void
6205 sh_expand_prologue (void)
6207 HARD_REG_SET live_regs_mask;
6208 int d, i;
6209 int d_rounding = 0;
6210 int save_flags = target_flags;
6211 int pretend_args;
6212 tree sp_switch_attr
6213 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6215 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6217 /* We have pretend args if we had an object sent partially in registers
6218 and partially on the stack, e.g. a large structure. */
6219 pretend_args = crtl->args.pretend_args_size;
6220 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6221 && (NPARM_REGS(SImode)
6222 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6223 pretend_args = 0;
6224 output_stack_adjust (-pretend_args
6225 - crtl->args.info.stack_regs * 8,
6226 stack_pointer_rtx, 0, NULL);
6228 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6229 /* We're going to use the PIC register to load the address of the
6230 incoming-argument decoder and/or of the return trampoline from
6231 the GOT, so make sure the PIC register is preserved and
6232 initialized. */
6233 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6235 if (TARGET_SHCOMPACT
6236 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6238 int reg;
6240 /* First, make all registers with incoming arguments that will
6241 be pushed onto the stack live, so that register renaming
6242 doesn't overwrite them. */
6243 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6244 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6245 >= NPARM_REGS (SImode) - reg)
6246 for (; reg < NPARM_REGS (SImode); reg++)
6247 emit_insn (gen_shcompact_preserve_incoming_args
6248 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6249 else if (CALL_COOKIE_INT_REG_GET
6250 (crtl->args.info.call_cookie, reg) == 1)
6251 emit_insn (gen_shcompact_preserve_incoming_args
6252 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6254 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6255 stack_pointer_rtx);
6256 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6257 GEN_INT (crtl->args.info.call_cookie));
6258 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6259 gen_rtx_REG (SImode, R0_REG));
6261 else if (TARGET_SHMEDIA)
6263 int tr = sh_media_register_for_return ();
6265 if (tr >= 0)
6266 emit_move_insn (gen_rtx_REG (DImode, tr),
6267 gen_rtx_REG (DImode, PR_MEDIA_REG));
6270 /* Emit the code for SETUP_VARARGS. */
6271 if (cfun->stdarg)
6273 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6275 /* Push arg regs as if they'd been provided by caller in stack. */
6276 for (i = 0; i < NPARM_REGS(SImode); i++)
6278 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6279 rtx insn;
6281 if (i >= (NPARM_REGS(SImode)
6282 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6284 break;
6285 insn = push (rn);
6290 /* If we're supposed to switch stacks at function entry, do so now. */
6291 if (sp_switch_attr)
6293 /* The argument specifies a variable holding the address of the
6294 stack the interrupt function should switch to/from at entry/exit. */
6295 const char *s
6296 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6297 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6299 emit_insn (gen_sp_switch_1 (sp_switch));
6302 d = calc_live_regs (&live_regs_mask);
6303 /* ??? Maybe we could save some switching if we can move a mode switch
6304 that already happens to be at the function start into the prologue. */
6305 if (target_flags != save_flags && ! current_function_interrupt)
6306 emit_insn (gen_toggle_sz ());
6308 if (TARGET_SH5)
6310 int offset_base, offset;
6311 rtx r0 = NULL_RTX;
6312 int offset_in_r0 = -1;
6313 int sp_in_r0 = 0;
6314 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6315 int total_size, save_size;
6316 save_schedule schedule;
6317 save_entry *entry;
6318 int *tmp_pnt;
6320 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6321 && ! current_function_interrupt)
6322 r0 = gen_rtx_REG (Pmode, R0_REG);
6324 /* D is the actual number of bytes that we need for saving registers,
6325 however, in initial_elimination_offset we have committed to using
6326 an additional TREGS_SPACE amount of bytes - in order to keep both
6327 addresses to arguments supplied by the caller and local variables
6328 valid, we must keep this gap. Place it between the incoming
6329 arguments and the actually saved registers in a bid to optimize
6330 locality of reference. */
6331 total_size = d + tregs_space;
6332 total_size += rounded_frame_size (total_size);
6333 save_size = total_size - rounded_frame_size (d);
6334 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6335 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6336 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6338 /* If adjusting the stack in a single step costs nothing extra, do so.
6339 I.e. either if a single addi is enough, or we need a movi anyway,
6340 and we don't exceed the maximum offset range (the test for the
6341 latter is conservative for simplicity). */
6342 if (TARGET_SHMEDIA
6343 && (CONST_OK_FOR_I10 (-total_size)
6344 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6345 && total_size <= 2044)))
6346 d_rounding = total_size - save_size;
6348 offset_base = d + d_rounding;
6350 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6351 0, NULL);
6353 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6354 tmp_pnt = schedule.temps;
6355 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6357 enum machine_mode mode = entry->mode;
6358 unsigned int reg = entry->reg;
6359 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6360 rtx orig_reg_rtx;
6362 offset = entry->offset;
6364 reg_rtx = gen_rtx_REG (mode, reg);
6366 mem_rtx = gen_frame_mem (mode,
6367 gen_rtx_PLUS (Pmode,
6368 stack_pointer_rtx,
6369 GEN_INT (offset)));
6371 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6373 gcc_assert (r0);
6374 mem_rtx = NULL_RTX;
6376 try_pre_dec:
6378 if (HAVE_PRE_DECREMENT
6379 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6380 || mem_rtx == NULL_RTX
6381 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6383 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6385 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6386 pre_dec_ok);
6388 pre_dec = NULL_RTX;
6390 break;
6392 pre_dec_ok:
6393 mem_rtx = NULL_RTX;
6394 offset += GET_MODE_SIZE (mode);
6396 while (0);
6398 if (mem_rtx != NULL_RTX)
6399 goto addr_ok;
6401 if (offset_in_r0 == -1)
6403 emit_move_insn (r0, GEN_INT (offset));
6404 offset_in_r0 = offset;
6406 else if (offset != offset_in_r0)
6408 emit_move_insn (r0,
6409 gen_rtx_PLUS
6410 (Pmode, r0,
6411 GEN_INT (offset - offset_in_r0)));
6412 offset_in_r0 += offset - offset_in_r0;
6415 if (pre_dec != NULL_RTX)
6417 if (! sp_in_r0)
6419 emit_move_insn (r0,
6420 gen_rtx_PLUS
6421 (Pmode, r0, stack_pointer_rtx));
6422 sp_in_r0 = 1;
6425 offset -= GET_MODE_SIZE (mode);
6426 offset_in_r0 -= GET_MODE_SIZE (mode);
6428 mem_rtx = pre_dec;
6430 else if (sp_in_r0)
6431 mem_rtx = gen_frame_mem (mode, r0);
6432 else
6433 mem_rtx = gen_frame_mem (mode,
6434 gen_rtx_PLUS (Pmode,
6435 stack_pointer_rtx,
6436 r0));
6438 /* We must not use an r0-based address for target-branch
6439 registers or for special registers without pre-dec
6440 memory addresses, since we store their values in r0
6441 first. */
6442 gcc_assert (!TARGET_REGISTER_P (reg)
6443 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6444 || mem_rtx == pre_dec));
6446 addr_ok:
6447 orig_reg_rtx = reg_rtx;
6448 if (TARGET_REGISTER_P (reg)
6449 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6450 && mem_rtx != pre_dec))
6452 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6454 emit_move_insn (tmp_reg, reg_rtx);
6456 if (REGNO (tmp_reg) == R0_REG)
6458 offset_in_r0 = -1;
6459 sp_in_r0 = 0;
6460 gcc_assert (!refers_to_regno_p
6461 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6464 if (*++tmp_pnt <= 0)
6465 tmp_pnt = schedule.temps;
6467 reg_rtx = tmp_reg;
6470 rtx insn;
6472 /* Mark as interesting for dwarf cfi generator */
6473 insn = emit_move_insn (mem_rtx, reg_rtx);
6474 RTX_FRAME_RELATED_P (insn) = 1;
6475 /* If we use an intermediate register for the save, we can't
6476 describe this exactly in cfi as a copy of the to-be-saved
6477 register into the temporary register and then the temporary
6478 register on the stack, because the temporary register can
6479 have a different natural size than the to-be-saved register.
6480 Thus, we gloss over the intermediate copy and pretend we do
6481 a direct save from the to-be-saved register. */
6482 if (REGNO (reg_rtx) != reg)
6484 rtx set, note_rtx;
6486 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6487 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6488 REG_NOTES (insn));
6489 REG_NOTES (insn) = note_rtx;
6492 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6494 rtx reg_rtx = gen_rtx_REG (mode, reg);
6495 rtx set, note_rtx;
6496 rtx mem_rtx = gen_frame_mem (mode,
6497 gen_rtx_PLUS (Pmode,
6498 stack_pointer_rtx,
6499 GEN_INT (offset)));
6501 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6502 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6503 REG_NOTES (insn));
6504 REG_NOTES (insn) = note_rtx;
6509 gcc_assert (entry->offset == d_rounding);
6511 else
6512 push_regs (&live_regs_mask, current_function_interrupt);
6514 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6515 emit_insn (gen_GOTaddr2picreg ());
6517 if (SHMEDIA_REGS_STACK_ADJUST ())
6519 /* This must NOT go through the PLT, otherwise mach and macl
6520 may be clobbered. */
6521 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6522 (TARGET_FPU_ANY
6523 ? "__GCC_push_shmedia_regs"
6524 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6525 emit_insn (gen_shmedia_save_restore_regs_compact
6526 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6529 if (target_flags != save_flags && ! current_function_interrupt)
6530 emit_insn (gen_toggle_sz ());
6532 target_flags = save_flags;
6534 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6535 stack_pointer_rtx, 0, NULL);
6537 if (frame_pointer_needed)
6538 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6540 if (TARGET_SHCOMPACT
6541 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6543 /* This must NOT go through the PLT, otherwise mach and macl
6544 may be clobbered. */
6545 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6546 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6547 emit_insn (gen_shcompact_incoming_args ());
6551 void
6552 sh_expand_epilogue (bool sibcall_p)
6554 HARD_REG_SET live_regs_mask;
6555 int d, i;
6556 int d_rounding = 0;
6558 int save_flags = target_flags;
6559 int frame_size, save_size;
6560 int fpscr_deferred = 0;
6561 int e = sibcall_p ? -1 : 1;
6563 d = calc_live_regs (&live_regs_mask);
6565 save_size = d;
6566 frame_size = rounded_frame_size (d);
6568 if (TARGET_SH5)
6570 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6571 int total_size;
6572 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6573 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6574 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6576 total_size = d + tregs_space;
6577 total_size += rounded_frame_size (total_size);
6578 save_size = total_size - frame_size;
6580 /* If adjusting the stack in a single step costs nothing extra, do so.
6581 I.e. either if a single addi is enough, or we need a movi anyway,
6582 and we don't exceed the maximum offset range (the test for the
6583 latter is conservative for simplicity). */
6584 if (TARGET_SHMEDIA
6585 && ! frame_pointer_needed
6586 && (CONST_OK_FOR_I10 (total_size)
6587 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6588 && total_size <= 2044)))
6589 d_rounding = frame_size;
6591 frame_size -= d_rounding;
6594 if (frame_pointer_needed)
6596 /* We must avoid scheduling the epilogue with previous basic blocks
6597 when exception handling is enabled. See PR/18032. */
6598 if (flag_exceptions)
6599 emit_insn (gen_blockage ());
6600 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6601 &live_regs_mask);
6603 /* We must avoid moving the stack pointer adjustment past code
6604 which reads from the local frame, else an interrupt could
6605 occur after the SP adjustment and clobber data in the local
6606 frame. */
6607 emit_insn (gen_blockage ());
6608 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6610 else if (frame_size)
6612 /* We must avoid moving the stack pointer adjustment past code
6613 which reads from the local frame, else an interrupt could
6614 occur after the SP adjustment and clobber data in the local
6615 frame. */
6616 emit_insn (gen_blockage ());
6617 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6620 if (SHMEDIA_REGS_STACK_ADJUST ())
6622 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6623 (TARGET_FPU_ANY
6624 ? "__GCC_pop_shmedia_regs"
6625 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6626 /* This must NOT go through the PLT, otherwise mach and macl
6627 may be clobbered. */
6628 emit_insn (gen_shmedia_save_restore_regs_compact
6629 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6632 /* Pop all the registers. */
6634 if (target_flags != save_flags && ! current_function_interrupt)
6635 emit_insn (gen_toggle_sz ());
6636 if (TARGET_SH5)
6638 int offset_base, offset;
6639 int offset_in_r0 = -1;
6640 int sp_in_r0 = 0;
6641 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6642 save_schedule schedule;
6643 save_entry *entry;
6644 int *tmp_pnt;
6646 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6647 offset_base = -entry[1].offset + d_rounding;
6648 tmp_pnt = schedule.temps;
6649 for (; entry->mode != VOIDmode; entry--)
6651 enum machine_mode mode = entry->mode;
6652 int reg = entry->reg;
6653 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6655 offset = offset_base + entry->offset;
6656 reg_rtx = gen_rtx_REG (mode, reg);
6658 mem_rtx = gen_frame_mem (mode,
6659 gen_rtx_PLUS (Pmode,
6660 stack_pointer_rtx,
6661 GEN_INT (offset)));
6663 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6665 mem_rtx = NULL_RTX;
6667 try_post_inc:
6669 if (HAVE_POST_INCREMENT
6670 && (offset == offset_in_r0
6671 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6672 && mem_rtx == NULL_RTX)
6673 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6675 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6677 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6678 post_inc_ok);
6680 post_inc = NULL_RTX;
6682 break;
6684 post_inc_ok:
6685 mem_rtx = NULL_RTX;
6687 while (0);
6689 if (mem_rtx != NULL_RTX)
6690 goto addr_ok;
6692 if (offset_in_r0 == -1)
6694 emit_move_insn (r0, GEN_INT (offset));
6695 offset_in_r0 = offset;
6697 else if (offset != offset_in_r0)
6699 emit_move_insn (r0,
6700 gen_rtx_PLUS
6701 (Pmode, r0,
6702 GEN_INT (offset - offset_in_r0)));
6703 offset_in_r0 += offset - offset_in_r0;
6706 if (post_inc != NULL_RTX)
6708 if (! sp_in_r0)
6710 emit_move_insn (r0,
6711 gen_rtx_PLUS
6712 (Pmode, r0, stack_pointer_rtx));
6713 sp_in_r0 = 1;
6716 mem_rtx = post_inc;
6718 offset_in_r0 += GET_MODE_SIZE (mode);
6720 else if (sp_in_r0)
6721 mem_rtx = gen_frame_mem (mode, r0);
6722 else
6723 mem_rtx = gen_frame_mem (mode,
6724 gen_rtx_PLUS (Pmode,
6725 stack_pointer_rtx,
6726 r0));
6728 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6729 || mem_rtx == post_inc);
6731 addr_ok:
6732 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6733 && mem_rtx != post_inc)
6735 insn = emit_move_insn (r0, mem_rtx);
6736 mem_rtx = r0;
6738 else if (TARGET_REGISTER_P (reg))
6740 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6742 /* Give the scheduler a bit of freedom by using up to
6743 MAX_TEMPS registers in a round-robin fashion. */
6744 insn = emit_move_insn (tmp_reg, mem_rtx);
6745 mem_rtx = tmp_reg;
6746 if (*++tmp_pnt < 0)
6747 tmp_pnt = schedule.temps;
6750 insn = emit_move_insn (reg_rtx, mem_rtx);
6753 gcc_assert (entry->offset + offset_base == d + d_rounding);
6755 else /* ! TARGET_SH5 */
6757 int last_reg;
6759 save_size = 0;
6760 /* For an ISR with RESBANK attribute assigned, don't pop PR
6761 register. */
6762 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
6763 && !sh_cfun_resbank_handler_p ())
6765 if (!frame_pointer_needed)
6766 emit_insn (gen_blockage ());
6767 pop (PR_REG);
6770 /* Banked registers are poped first to avoid being scheduled in the
6771 delay slot. RTE switches banks before the ds instruction. */
6772 if (current_function_interrupt)
6774 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6775 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6776 pop (LAST_BANKED_REG - i);
6778 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6780 else
6781 last_reg = FIRST_PSEUDO_REGISTER;
6783 for (i = 0; i < last_reg; i++)
6785 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6787 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6788 && hard_reg_set_intersect_p (live_regs_mask,
6789 reg_class_contents[DF_REGS]))
6790 fpscr_deferred = 1;
6791 /* For an ISR with RESBANK attribute assigned, don't pop
6792 following registers, R0-R14, MACH, MACL and GBR. */
6793 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
6794 && ! (sh_cfun_resbank_handler_p ()
6795 && ((j >= FIRST_GENERAL_REG
6796 && j < LAST_GENERAL_REG)
6797 || j == MACH_REG
6798 || j == MACL_REG
6799 || j == GBR_REG)))
6800 pop (j);
6802 if (j == FIRST_FP_REG && fpscr_deferred)
6803 pop (FPSCR_REG);
6806 if (target_flags != save_flags && ! current_function_interrupt)
6807 emit_insn (gen_toggle_sz ());
6808 target_flags = save_flags;
6810 output_stack_adjust (crtl->args.pretend_args_size
6811 + save_size + d_rounding
6812 + crtl->args.info.stack_regs * 8,
6813 stack_pointer_rtx, e, NULL);
6815 if (crtl->calls_eh_return)
6816 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6817 EH_RETURN_STACKADJ_RTX));
6819 /* Switch back to the normal stack if necessary. */
6820 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6821 emit_insn (gen_sp_switch_2 ());
6823 /* Tell flow the insn that pops PR isn't dead. */
6824 /* PR_REG will never be live in SHmedia mode, and we don't need to
6825 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6826 by the return pattern. */
6827 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6828 emit_use (gen_rtx_REG (SImode, PR_REG));
6831 static int sh_need_epilogue_known = 0;
6834 sh_need_epilogue (void)
6836 if (! sh_need_epilogue_known)
6838 rtx epilogue;
6840 start_sequence ();
6841 sh_expand_epilogue (0);
6842 epilogue = get_insns ();
6843 end_sequence ();
6844 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6846 return sh_need_epilogue_known > 0;
6849 /* Emit code to change the current function's return address to RA.
6850 TEMP is available as a scratch register, if needed. */
6852 void
6853 sh_set_return_address (rtx ra, rtx tmp)
6855 HARD_REG_SET live_regs_mask;
6856 int d;
6857 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6858 int pr_offset;
6860 d = calc_live_regs (&live_regs_mask);
6862 /* If pr_reg isn't life, we can set it (or the register given in
6863 sh_media_register_for_return) directly. */
6864 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6866 rtx rr;
6868 if (TARGET_SHMEDIA)
6870 int rr_regno = sh_media_register_for_return ();
6872 if (rr_regno < 0)
6873 rr_regno = pr_reg;
6875 rr = gen_rtx_REG (DImode, rr_regno);
6877 else
6878 rr = gen_rtx_REG (SImode, pr_reg);
6880 emit_insn (GEN_MOV (rr, ra));
6881 /* Tell flow the register for return isn't dead. */
6882 emit_use (rr);
6883 return;
6886 if (TARGET_SH5)
6888 int offset;
6889 save_schedule schedule;
6890 save_entry *entry;
6892 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6893 offset = entry[1].offset;
6894 for (; entry->mode != VOIDmode; entry--)
6895 if (entry->reg == pr_reg)
6896 goto found;
6898 /* We can't find pr register. */
6899 gcc_unreachable ();
6901 found:
6902 offset = entry->offset - offset;
6903 pr_offset = (rounded_frame_size (d) + offset
6904 + SHMEDIA_REGS_STACK_ADJUST ());
6906 else
6907 pr_offset = rounded_frame_size (d);
6909 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6910 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6912 tmp = gen_frame_mem (Pmode, tmp);
6913 emit_insn (GEN_MOV (tmp, ra));
6916 /* Clear variables at function end. */
6918 static void
6919 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6920 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6922 sh_need_epilogue_known = 0;
6925 static rtx
6926 sh_builtin_saveregs (void)
6928 /* First unnamed integer register. */
6929 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
6930 /* Number of integer registers we need to save. */
6931 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6932 /* First unnamed SFmode float reg */
6933 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
6934 /* Number of SFmode float regs to save. */
6935 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6936 rtx regbuf, fpregs;
6937 int bufsize, regno;
6938 alias_set_type alias_set;
6940 if (TARGET_SH5)
6942 if (n_intregs)
6944 int pushregs = n_intregs;
6946 while (pushregs < NPARM_REGS (SImode) - 1
6947 && (CALL_COOKIE_INT_REG_GET
6948 (crtl->args.info.call_cookie,
6949 NPARM_REGS (SImode) - pushregs)
6950 == 1))
6952 crtl->args.info.call_cookie
6953 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6954 - pushregs, 1);
6955 pushregs++;
6958 if (pushregs == NPARM_REGS (SImode))
6959 crtl->args.info.call_cookie
6960 |= (CALL_COOKIE_INT_REG (0, 1)
6961 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6962 else
6963 crtl->args.info.call_cookie
6964 |= CALL_COOKIE_STACKSEQ (pushregs);
6966 crtl->args.pretend_args_size += 8 * n_intregs;
6968 if (TARGET_SHCOMPACT)
6969 return const0_rtx;
6972 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6974 error ("__builtin_saveregs not supported by this subtarget");
6975 return const0_rtx;
6978 if (TARGET_SHMEDIA)
6979 n_floatregs = 0;
6981 /* Allocate block of memory for the regs. */
6982 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6983 Or can assign_stack_local accept a 0 SIZE argument? */
6984 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6986 if (TARGET_SHMEDIA)
6987 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6988 else if (n_floatregs & 1)
6990 rtx addr;
6992 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6993 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6994 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6995 regbuf = change_address (regbuf, BLKmode, addr);
6997 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6999 rtx addr, mask;
7001 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7002 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7003 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7004 emit_insn (gen_andsi3 (addr, addr, mask));
7005 regbuf = change_address (regbuf, BLKmode, addr);
7007 else
7008 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7009 alias_set = get_varargs_alias_set ();
7010 set_mem_alias_set (regbuf, alias_set);
7012 /* Save int args.
7013 This is optimized to only save the regs that are necessary. Explicitly
7014 named args need not be saved. */
7015 if (n_intregs > 0)
7016 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7017 adjust_address (regbuf, BLKmode,
7018 n_floatregs * UNITS_PER_WORD),
7019 n_intregs);
7021 if (TARGET_SHMEDIA)
7022 /* Return the address of the regbuf. */
7023 return XEXP (regbuf, 0);
7025 /* Save float args.
7026 This is optimized to only save the regs that are necessary. Explicitly
7027 named args need not be saved.
7028 We explicitly build a pointer to the buffer because it halves the insn
7029 count when not optimizing (otherwise the pointer is built for each reg
7030 saved).
7031 We emit the moves in reverse order so that we can use predecrement. */
7033 fpregs = copy_to_mode_reg (Pmode,
7034 plus_constant (XEXP (regbuf, 0),
7035 n_floatregs * UNITS_PER_WORD));
7036 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7038 rtx mem;
7039 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7041 emit_insn (gen_addsi3 (fpregs, fpregs,
7042 GEN_INT (-2 * UNITS_PER_WORD)));
7043 mem = change_address (regbuf, DFmode, fpregs);
7044 emit_move_insn (mem,
7045 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7047 regno = first_floatreg;
7048 if (regno & 1)
7050 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7051 mem = change_address (regbuf, SFmode, fpregs);
7052 emit_move_insn (mem,
7053 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7054 - (TARGET_LITTLE_ENDIAN != 0)));
7057 else
7058 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7060 rtx mem;
7062 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7063 mem = change_address (regbuf, SFmode, fpregs);
7064 emit_move_insn (mem,
7065 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7068 /* Return the address of the regbuf. */
7069 return XEXP (regbuf, 0);
7072 /* Define the `__builtin_va_list' type for the ABI. */
7074 static tree
7075 sh_build_builtin_va_list (void)
7077 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7078 tree record;
7080 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7081 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7082 return ptr_type_node;
7084 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7086 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7087 ptr_type_node);
7088 f_next_o_limit = build_decl (FIELD_DECL,
7089 get_identifier ("__va_next_o_limit"),
7090 ptr_type_node);
7091 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7092 ptr_type_node);
7093 f_next_fp_limit = build_decl (FIELD_DECL,
7094 get_identifier ("__va_next_fp_limit"),
7095 ptr_type_node);
7096 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7097 ptr_type_node);
7099 DECL_FIELD_CONTEXT (f_next_o) = record;
7100 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7101 DECL_FIELD_CONTEXT (f_next_fp) = record;
7102 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7103 DECL_FIELD_CONTEXT (f_next_stack) = record;
7105 TYPE_FIELDS (record) = f_next_o;
7106 TREE_CHAIN (f_next_o) = f_next_o_limit;
7107 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7108 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7109 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7111 layout_type (record);
7113 return record;
7116 /* Implement `va_start' for varargs and stdarg. */
7118 static void
7119 sh_va_start (tree valist, rtx nextarg)
7121 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7122 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7123 tree t, u;
7124 int nfp, nint;
7126 if (TARGET_SH5)
7128 expand_builtin_saveregs ();
7129 std_expand_builtin_va_start (valist, nextarg);
7130 return;
7133 if ((! TARGET_SH2E && ! TARGET_SH4)
7134 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7136 std_expand_builtin_va_start (valist, nextarg);
7137 return;
7140 f_next_o = TYPE_FIELDS (va_list_type_node);
7141 f_next_o_limit = TREE_CHAIN (f_next_o);
7142 f_next_fp = TREE_CHAIN (f_next_o_limit);
7143 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7144 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7146 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7147 NULL_TREE);
7148 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7149 valist, f_next_o_limit, NULL_TREE);
7150 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7151 NULL_TREE);
7152 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7153 valist, f_next_fp_limit, NULL_TREE);
7154 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7155 valist, f_next_stack, NULL_TREE);
7157 /* Call __builtin_saveregs. */
7158 u = make_tree (sizetype, expand_builtin_saveregs ());
7159 u = fold_convert (ptr_type_node, u);
7160 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7161 TREE_SIDE_EFFECTS (t) = 1;
7162 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7164 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7165 if (nfp < 8)
7166 nfp = 8 - nfp;
7167 else
7168 nfp = 0;
7169 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7170 size_int (UNITS_PER_WORD * nfp));
7171 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7172 TREE_SIDE_EFFECTS (t) = 1;
7173 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7175 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7176 TREE_SIDE_EFFECTS (t) = 1;
7177 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7179 nint = crtl->args.info.arg_count[SH_ARG_INT];
7180 if (nint < 4)
7181 nint = 4 - nint;
7182 else
7183 nint = 0;
7184 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7185 size_int (UNITS_PER_WORD * nint));
7186 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7187 TREE_SIDE_EFFECTS (t) = 1;
7188 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7190 u = make_tree (ptr_type_node, nextarg);
7191 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7192 TREE_SIDE_EFFECTS (t) = 1;
7193 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7196 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7197 member, return it. */
7198 static tree
7199 find_sole_member (tree type)
7201 tree field, member = NULL_TREE;
7203 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7205 if (TREE_CODE (field) != FIELD_DECL)
7206 continue;
7207 if (!DECL_SIZE (field))
7208 return NULL_TREE;
7209 if (integer_zerop (DECL_SIZE (field)))
7210 continue;
7211 if (member)
7212 return NULL_TREE;
7213 member = field;
7215 return member;
7217 /* Implement `va_arg'. */
7219 static tree
7220 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7221 gimple_seq *post_p ATTRIBUTE_UNUSED)
7223 HOST_WIDE_INT size, rsize;
7224 tree tmp, pptr_type_node;
7225 tree addr, lab_over = NULL, result = NULL;
7226 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7227 tree eff_type;
7229 if (pass_by_ref)
7230 type = build_pointer_type (type);
7232 size = int_size_in_bytes (type);
7233 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7234 pptr_type_node = build_pointer_type (ptr_type_node);
7236 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7237 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7239 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7240 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7241 int pass_as_float;
7242 tree lab_false;
7243 tree member;
7245 f_next_o = TYPE_FIELDS (va_list_type_node);
7246 f_next_o_limit = TREE_CHAIN (f_next_o);
7247 f_next_fp = TREE_CHAIN (f_next_o_limit);
7248 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7249 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7251 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7252 NULL_TREE);
7253 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7254 valist, f_next_o_limit, NULL_TREE);
7255 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7256 valist, f_next_fp, NULL_TREE);
7257 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7258 valist, f_next_fp_limit, NULL_TREE);
7259 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7260 valist, f_next_stack, NULL_TREE);
7262 /* Structures with a single member with a distinct mode are passed
7263 like their member. This is relevant if the latter has a REAL_TYPE
7264 or COMPLEX_TYPE type. */
7265 eff_type = type;
7266 while (TREE_CODE (eff_type) == RECORD_TYPE
7267 && (member = find_sole_member (eff_type))
7268 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7269 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7270 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7272 tree field_type = TREE_TYPE (member);
7274 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7275 eff_type = field_type;
7276 else
7278 gcc_assert ((TYPE_ALIGN (eff_type)
7279 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7280 || (TYPE_ALIGN (eff_type)
7281 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7282 break;
7286 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7288 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7289 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7290 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7291 && size <= 16));
7293 else
7295 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7298 addr = create_tmp_var (pptr_type_node, NULL);
7299 lab_false = create_artificial_label ();
7300 lab_over = create_artificial_label ();
7302 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7304 if (pass_as_float)
7306 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7307 tree cmp;
7308 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7310 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7311 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7313 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7314 tmp = next_fp_limit;
7315 if (size > 4 && !is_double)
7316 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7317 unshare_expr (tmp), size_int (4 - size));
7318 tmp = build2 (GE_EXPR, boolean_type_node,
7319 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7320 cmp = build3 (COND_EXPR, void_type_node, tmp,
7321 build1 (GOTO_EXPR, void_type_node,
7322 unshare_expr (lab_false)), NULL_TREE);
7323 if (!is_double)
7324 gimplify_and_add (cmp, pre_p);
7326 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7327 || (is_double || size == 16))
7329 tmp = fold_convert (sizetype, next_fp_tmp);
7330 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7331 size_int (UNITS_PER_WORD));
7332 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7333 unshare_expr (next_fp_tmp), tmp);
7334 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7336 if (is_double)
7337 gimplify_and_add (cmp, pre_p);
7339 #ifdef FUNCTION_ARG_SCmode_WART
7340 if (TYPE_MODE (eff_type) == SCmode
7341 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7343 tree subtype = TREE_TYPE (eff_type);
7344 tree real, imag;
7346 imag
7347 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7348 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7350 real
7351 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7352 real = get_initialized_tmp_var (real, pre_p, NULL);
7354 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7355 if (type != eff_type)
7356 result = build1 (VIEW_CONVERT_EXPR, type, result);
7357 result = get_initialized_tmp_var (result, pre_p, NULL);
7359 #endif /* FUNCTION_ARG_SCmode_WART */
7361 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7362 gimplify_and_add (tmp, pre_p);
7364 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7365 gimplify_and_add (tmp, pre_p);
7367 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7368 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7369 gimplify_assign (unshare_expr (next_fp_tmp),
7370 unshare_expr (valist), pre_p);
7372 gimplify_assign (unshare_expr (valist),
7373 unshare_expr (next_fp_tmp), post_p);
7374 valist = next_fp_tmp;
7376 else
7378 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7379 unshare_expr (next_o), size_int (rsize));
7380 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7381 unshare_expr (next_o_limit));
7382 tmp = build3 (COND_EXPR, void_type_node, tmp,
7383 build1 (GOTO_EXPR, void_type_node,
7384 unshare_expr (lab_false)),
7385 NULL_TREE);
7386 gimplify_and_add (tmp, pre_p);
7388 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7389 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7391 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7392 gimplify_and_add (tmp, pre_p);
7394 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7395 gimplify_and_add (tmp, pre_p);
7397 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7398 gimplify_assign (unshare_expr (next_o),
7399 unshare_expr (next_o_limit), pre_p);
7401 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7402 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7405 if (!result)
7407 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7408 gimplify_and_add (tmp, pre_p);
7412 /* ??? In va-sh.h, there had been code to make values larger than
7413 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7415 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7416 if (result)
7418 gimplify_assign (result, tmp, pre_p);
7420 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7421 gimplify_and_add (tmp, pre_p);
7423 else
7424 result = tmp;
7426 if (pass_by_ref)
7427 result = build_va_arg_indirect_ref (result);
7429 return result;
7432 /* 64 bit floating points memory transfers are paired single precision loads
7433 or store. So DWARF information needs fixing in little endian (unless
7434 PR=SZ=1 in FPSCR). */
7436 sh_dwarf_register_span (rtx reg)
7438 unsigned regno = REGNO (reg);
7440 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7441 return NULL_RTX;
7443 return
7444 gen_rtx_PARALLEL (VOIDmode,
7445 gen_rtvec (2,
7446 gen_rtx_REG (SFmode,
7447 DBX_REGISTER_NUMBER (regno+1)),
7448 gen_rtx_REG (SFmode,
7449 DBX_REGISTER_NUMBER (regno))));
7452 bool
7453 sh_promote_prototypes (const_tree type)
7455 if (TARGET_HITACHI)
7456 return 0;
7457 if (! type)
7458 return 1;
7459 return ! sh_attr_renesas_p (type);
7462 /* Whether an argument must be passed by reference. On SHcompact, we
7463 pretend arguments wider than 32-bits that would have been passed in
7464 registers are passed by reference, so that an SHmedia trampoline
7465 loads them into the full 64-bits registers. */
7467 static int
7468 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7469 const_tree type, bool named)
7471 unsigned HOST_WIDE_INT size;
7473 if (type)
7474 size = int_size_in_bytes (type);
7475 else
7476 size = GET_MODE_SIZE (mode);
7478 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7479 && (!named
7480 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7481 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7482 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7483 && size > 4
7484 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7485 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7486 return size;
7487 else
7488 return 0;
7491 static bool
7492 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7493 const_tree type, bool named)
7495 if (targetm.calls.must_pass_in_stack (mode, type))
7496 return true;
7498 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7499 wants to know about pass-by-reference semantics for incoming
7500 arguments. */
7501 if (! cum)
7502 return false;
7504 if (TARGET_SHCOMPACT)
7506 cum->byref = shcompact_byref (cum, mode, type, named);
7507 return cum->byref != 0;
7510 return false;
7513 static bool
7514 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7515 const_tree type, bool named ATTRIBUTE_UNUSED)
7517 /* ??? How can it possibly be correct to return true only on the
7518 caller side of the equation? Is there someplace else in the
7519 sh backend that's magically producing the copies? */
7520 return (cum->outgoing
7521 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7522 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7525 static int
7526 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7527 tree type, bool named ATTRIBUTE_UNUSED)
7529 int words = 0;
7531 if (!TARGET_SH5
7532 && PASS_IN_REG_P (*cum, mode, type)
7533 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7534 && (ROUND_REG (*cum, mode)
7535 + (mode != BLKmode
7536 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7537 : ROUND_ADVANCE (int_size_in_bytes (type)))
7538 > NPARM_REGS (mode)))
7539 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7541 else if (!TARGET_SHCOMPACT
7542 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7543 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7545 return words * UNITS_PER_WORD;
7549 /* Define where to put the arguments to a function.
7550 Value is zero to push the argument on the stack,
7551 or a hard register in which to store the argument.
7553 MODE is the argument's machine mode.
7554 TYPE is the data type of the argument (as a tree).
7555 This is null for libcalls where that information may
7556 not be available.
7557 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7558 the preceding args and about the function being called.
7559 NAMED is nonzero if this argument is a named parameter
7560 (otherwise it is an extra parameter matching an ellipsis).
7562 On SH the first args are normally in registers
7563 and the rest are pushed. Any arg that starts within the first
7564 NPARM_REGS words is at least partially passed in a register unless
7565 its data type forbids. */
7569 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7570 tree type, int named)
7572 if (! TARGET_SH5 && mode == VOIDmode)
7573 return GEN_INT (ca->renesas_abi ? 1 : 0);
7575 if (! TARGET_SH5
7576 && PASS_IN_REG_P (*ca, mode, type)
7577 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7579 int regno;
7581 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7582 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7584 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7585 gen_rtx_REG (SFmode,
7586 BASE_ARG_REG (mode)
7587 + (ROUND_REG (*ca, mode) ^ 1)),
7588 const0_rtx);
7589 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7590 gen_rtx_REG (SFmode,
7591 BASE_ARG_REG (mode)
7592 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7593 GEN_INT (4));
7594 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7597 /* If the alignment of a DF value causes an SF register to be
7598 skipped, we will use that skipped register for the next SF
7599 value. */
7600 if ((TARGET_HITACHI || ca->renesas_abi)
7601 && ca->free_single_fp_reg
7602 && mode == SFmode)
7603 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7605 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7606 ^ (mode == SFmode && TARGET_SH4
7607 && TARGET_LITTLE_ENDIAN != 0
7608 && ! TARGET_HITACHI && ! ca->renesas_abi);
7609 return gen_rtx_REG (mode, regno);
7613 if (TARGET_SH5)
7615 if (mode == VOIDmode && TARGET_SHCOMPACT)
7616 return GEN_INT (ca->call_cookie);
7618 /* The following test assumes unnamed arguments are promoted to
7619 DFmode. */
7620 if (mode == SFmode && ca->free_single_fp_reg)
7621 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7623 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7624 && (named || ! ca->prototype_p)
7625 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7627 if (! ca->prototype_p && TARGET_SHMEDIA)
7628 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7630 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7631 FIRST_FP_PARM_REG
7632 + ca->arg_count[(int) SH_ARG_FLOAT]);
7635 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7636 && (! TARGET_SHCOMPACT
7637 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7638 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7639 type, named))))
7641 return gen_rtx_REG (mode, (FIRST_PARM_REG
7642 + ca->arg_count[(int) SH_ARG_INT]));
7645 return 0;
7648 return 0;
7651 /* Update the data in CUM to advance over an argument
7652 of mode MODE and data type TYPE.
7653 (TYPE is null for libcalls where that information may not be
7654 available.) */
7656 void
7657 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7658 tree type, int named)
7660 if (ca->force_mem)
7661 ca->force_mem = 0;
7662 else if (TARGET_SH5)
7664 tree type2 = (ca->byref && type
7665 ? TREE_TYPE (type)
7666 : type);
7667 enum machine_mode mode2 = (ca->byref && type
7668 ? TYPE_MODE (type2)
7669 : mode);
7670 int dwords = ((ca->byref
7671 ? ca->byref
7672 : mode2 == BLKmode
7673 ? int_size_in_bytes (type2)
7674 : GET_MODE_SIZE (mode2)) + 7) / 8;
7675 int numregs = MIN (dwords, NPARM_REGS (SImode)
7676 - ca->arg_count[(int) SH_ARG_INT]);
7678 if (numregs)
7680 ca->arg_count[(int) SH_ARG_INT] += numregs;
7681 if (TARGET_SHCOMPACT
7682 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7684 ca->call_cookie
7685 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7686 - numregs, 1);
7687 /* N.B. We want this also for outgoing. */
7688 ca->stack_regs += numregs;
7690 else if (ca->byref)
7692 if (! ca->outgoing)
7693 ca->stack_regs += numregs;
7694 ca->byref_regs += numregs;
7695 ca->byref = 0;
7697 ca->call_cookie
7698 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7699 - numregs, 2);
7700 while (--numregs);
7701 ca->call_cookie
7702 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7703 - 1, 1);
7705 else if (dwords > numregs)
7707 int pushregs = numregs;
7709 if (TARGET_SHCOMPACT)
7710 ca->stack_regs += numregs;
7711 while (pushregs < NPARM_REGS (SImode) - 1
7712 && (CALL_COOKIE_INT_REG_GET
7713 (ca->call_cookie,
7714 NPARM_REGS (SImode) - pushregs)
7715 == 1))
7717 ca->call_cookie
7718 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7719 - pushregs, 1);
7720 pushregs++;
7722 if (numregs == NPARM_REGS (SImode))
7723 ca->call_cookie
7724 |= CALL_COOKIE_INT_REG (0, 1)
7725 | CALL_COOKIE_STACKSEQ (numregs - 1);
7726 else
7727 ca->call_cookie
7728 |= CALL_COOKIE_STACKSEQ (numregs);
7731 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7732 && (named || ! ca->prototype_p))
7734 if (mode2 == SFmode && ca->free_single_fp_reg)
7735 ca->free_single_fp_reg = 0;
7736 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7737 < NPARM_REGS (SFmode))
7739 int numfpregs
7740 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7741 NPARM_REGS (SFmode)
7742 - ca->arg_count[(int) SH_ARG_FLOAT]);
7744 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7746 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7748 if (ca->outgoing && numregs > 0)
7751 ca->call_cookie
7752 |= (CALL_COOKIE_INT_REG
7753 (ca->arg_count[(int) SH_ARG_INT]
7754 - numregs + ((numfpregs - 2) / 2),
7755 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7756 - numfpregs) / 2));
7758 while (numfpregs -= 2);
7760 else if (mode2 == SFmode && (named)
7761 && (ca->arg_count[(int) SH_ARG_FLOAT]
7762 < NPARM_REGS (SFmode)))
7763 ca->free_single_fp_reg
7764 = FIRST_FP_PARM_REG - numfpregs
7765 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7768 return;
7771 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7773 /* Note that we've used the skipped register. */
7774 if (mode == SFmode && ca->free_single_fp_reg)
7776 ca->free_single_fp_reg = 0;
7777 return;
7779 /* When we have a DF after an SF, there's an SF register that get
7780 skipped in order to align the DF value. We note this skipped
7781 register, because the next SF value will use it, and not the
7782 SF that follows the DF. */
7783 if (mode == DFmode
7784 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7786 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7787 + BASE_ARG_REG (mode));
7791 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7792 || PASS_IN_REG_P (*ca, mode, type))
7793 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7794 = (ROUND_REG (*ca, mode)
7795 + (mode == BLKmode
7796 ? ROUND_ADVANCE (int_size_in_bytes (type))
7797 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7800 /* The Renesas calling convention doesn't quite fit into this scheme since
7801 the address is passed like an invisible argument, but one that is always
7802 passed in memory. */
7803 static rtx
7804 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7806 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7807 return 0;
7808 return gen_rtx_REG (Pmode, 2);
7811 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7813 static bool
7814 sh_return_in_memory (const_tree type, const_tree fndecl)
7816 if (TARGET_SH5)
7818 if (TYPE_MODE (type) == BLKmode)
7819 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7820 else
7821 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7823 else
7825 return (TYPE_MODE (type) == BLKmode
7826 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7827 && TREE_CODE (type) == RECORD_TYPE));
7831 /* We actually emit the code in sh_expand_prologue. We used to use
7832 a static variable to flag that we need to emit this code, but that
7833 doesn't when inlining, when functions are deferred and then emitted
7834 later. Fortunately, we already have two flags that are part of struct
7835 function that tell if a function uses varargs or stdarg. */
7836 static void
7837 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7838 enum machine_mode mode,
7839 tree type,
7840 int *pretend_arg_size,
7841 int second_time ATTRIBUTE_UNUSED)
7843 gcc_assert (cfun->stdarg);
7844 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7846 int named_parm_regs, anon_parm_regs;
7848 named_parm_regs = (ROUND_REG (*ca, mode)
7849 + (mode == BLKmode
7850 ? ROUND_ADVANCE (int_size_in_bytes (type))
7851 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7852 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7853 if (anon_parm_regs > 0)
7854 *pretend_arg_size = anon_parm_regs * 4;
7858 static bool
7859 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7861 return TARGET_SH5;
7864 static bool
7865 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7867 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7871 /* Define the offset between two registers, one to be eliminated, and
7872 the other its replacement, at the start of a routine. */
7875 initial_elimination_offset (int from, int to)
7877 int regs_saved;
7878 int regs_saved_rounding = 0;
7879 int total_saved_regs_space;
7880 int total_auto_space;
7881 int save_flags = target_flags;
7882 int copy_flags;
7883 HARD_REG_SET live_regs_mask;
7885 shmedia_space_reserved_for_target_registers = false;
7886 regs_saved = calc_live_regs (&live_regs_mask);
7887 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7889 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7891 shmedia_space_reserved_for_target_registers = true;
7892 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7895 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7896 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7897 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7899 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7900 copy_flags = target_flags;
7901 target_flags = save_flags;
7903 total_saved_regs_space = regs_saved + regs_saved_rounding;
7905 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7906 return total_saved_regs_space + total_auto_space
7907 + crtl->args.info.byref_regs * 8;
7909 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7910 return total_saved_regs_space + total_auto_space
7911 + crtl->args.info.byref_regs * 8;
7913 /* Initial gap between fp and sp is 0. */
7914 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7915 return 0;
7917 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7918 return rounded_frame_size (0);
7920 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7921 return rounded_frame_size (0);
7923 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7924 && (to == HARD_FRAME_POINTER_REGNUM
7925 || to == STACK_POINTER_REGNUM));
7926 if (TARGET_SH5)
7928 int n = total_saved_regs_space;
7929 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7930 save_schedule schedule;
7931 save_entry *entry;
7933 n += total_auto_space;
7935 /* If it wasn't saved, there's not much we can do. */
7936 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7937 return n;
7939 target_flags = copy_flags;
7941 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7942 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7943 if (entry->reg == pr_reg)
7945 target_flags = save_flags;
7946 return entry->offset;
7948 gcc_unreachable ();
7950 else
7951 return total_auto_space;
7954 /* Parse the -mfixed-range= option string. */
7955 void
7956 sh_fix_range (const char *const_str)
7958 int i, first, last;
7959 char *str, *dash, *comma;
7961 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
7962 REG2 are either register names or register numbers. The effect
7963 of this option is to mark the registers in the range from REG1 to
7964 REG2 as ``fixed'' so they won't be used by the compiler. */
7966 i = strlen (const_str);
7967 str = (char *) alloca (i + 1);
7968 memcpy (str, const_str, i + 1);
7970 while (1)
7972 dash = strchr (str, '-');
7973 if (!dash)
7975 warning (0, "value of -mfixed-range must have form REG1-REG2");
7976 return;
7978 *dash = '\0';
7979 comma = strchr (dash + 1, ',');
7980 if (comma)
7981 *comma = '\0';
7983 first = decode_reg_name (str);
7984 if (first < 0)
7986 warning (0, "unknown register name: %s", str);
7987 return;
7990 last = decode_reg_name (dash + 1);
7991 if (last < 0)
7993 warning (0, "unknown register name: %s", dash + 1);
7994 return;
7997 *dash = '-';
7999 if (first > last)
8001 warning (0, "%s-%s is an empty range", str, dash + 1);
8002 return;
8005 for (i = first; i <= last; ++i)
8006 fixed_regs[i] = call_used_regs[i] = 1;
8008 if (!comma)
8009 break;
8011 *comma = ',';
8012 str = comma + 1;
8016 /* Insert any deferred function attributes from earlier pragmas. */
8017 static void
8018 sh_insert_attributes (tree node, tree *attributes)
8020 tree attrs;
8022 if (TREE_CODE (node) != FUNCTION_DECL)
8023 return;
8025 /* We are only interested in fields. */
8026 if (!DECL_P (node))
8027 return;
8029 /* Append the attributes to the deferred attributes. */
8030 *sh_deferred_function_attributes_tail = *attributes;
8031 attrs = sh_deferred_function_attributes;
8032 if (!attrs)
8033 return;
8035 /* Some attributes imply or require the interrupt attribute. */
8036 if (!lookup_attribute ("interrupt_handler", attrs)
8037 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8039 /* If we have a trapa_handler, but no interrupt_handler attribute,
8040 insert an interrupt_handler attribute. */
8041 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8042 /* We can't use sh_pr_interrupt here because that's not in the
8043 java frontend. */
8044 attrs
8045 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8046 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8047 if the interrupt attribute is missing, we ignore the attribute
8048 and warn. */
8049 else if (lookup_attribute ("sp_switch", attrs)
8050 || lookup_attribute ("trap_exit", attrs)
8051 || lookup_attribute ("nosave_low_regs", attrs)
8052 || lookup_attribute ("resbank", attrs))
8054 tree *tail;
8056 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8058 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8059 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8060 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8061 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8062 warning (OPT_Wattributes,
8063 "%qs attribute only applies to interrupt functions",
8064 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
8065 else
8067 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8068 NULL_TREE);
8069 tail = &TREE_CHAIN (*tail);
8072 attrs = *attributes;
8076 /* Install the processed list. */
8077 *attributes = attrs;
8079 /* Clear deferred attributes. */
8080 sh_deferred_function_attributes = NULL_TREE;
8081 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8083 return;
8086 /* Supported attributes:
8088 interrupt_handler -- specifies this function is an interrupt handler.
8090 trapa_handler - like above, but don't save all registers.
8092 sp_switch -- specifies an alternate stack for an interrupt handler
8093 to run on.
8095 trap_exit -- use a trapa to exit an interrupt function instead of
8096 an rte instruction.
8098 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8099 This is useful on the SH3 and upwards,
8100 which has a separate set of low regs for User and Supervisor modes.
8101 This should only be used for the lowest level of interrupts. Higher levels
8102 of interrupts must save the registers in case they themselves are
8103 interrupted.
8105 renesas -- use Renesas calling/layout conventions (functions and
8106 structures).
8108 resbank -- In case of an ISR, use a register bank to save registers
8109 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8112 const struct attribute_spec sh_attribute_table[] =
8114 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
8115 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8116 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
8117 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
8118 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
8119 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8120 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8121 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
8122 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
8123 #ifdef SYMBIAN
8124 /* Symbian support adds three new attributes:
8125 dllexport - for exporting a function/variable that will live in a dll
8126 dllimport - for importing a function/variable from a dll
8128 Microsoft allows multiple declspecs in one __declspec, separating
8129 them with spaces. We do NOT support this. Instead, use __declspec
8130 multiple times. */
8131 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8132 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8133 #endif
8134 { NULL, 0, 0, false, false, false, NULL }
8137 /* Handle a 'resbank' attribute. */
8138 static tree
8139 sh_handle_resbank_handler_attribute (tree * node, tree name,
8140 tree args ATTRIBUTE_UNUSED,
8141 int flags ATTRIBUTE_UNUSED,
8142 bool * no_add_attrs)
8144 if (!TARGET_SH2A)
8146 warning (OPT_Wattributes, "%qs attribute is supported only for SH2A",
8147 IDENTIFIER_POINTER (name));
8148 *no_add_attrs = true;
8150 if (TREE_CODE (*node) != FUNCTION_DECL)
8152 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8153 IDENTIFIER_POINTER (name));
8154 *no_add_attrs = true;
8157 return NULL_TREE;
8160 /* Handle an "interrupt_handler" attribute; arguments as in
8161 struct attribute_spec.handler. */
8162 static tree
8163 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8164 tree args ATTRIBUTE_UNUSED,
8165 int flags ATTRIBUTE_UNUSED,
8166 bool *no_add_attrs)
8168 if (TREE_CODE (*node) != FUNCTION_DECL)
8170 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8171 IDENTIFIER_POINTER (name));
8172 *no_add_attrs = true;
8174 else if (TARGET_SHCOMPACT)
8176 error ("attribute interrupt_handler is not compatible with -m5-compact");
8177 *no_add_attrs = true;
8180 return NULL_TREE;
8183 /* Handle an 'function_vector' attribute; arguments as in
8184 struct attribute_spec.handler. */
8185 static tree
8186 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8187 tree args ATTRIBUTE_UNUSED,
8188 int flags ATTRIBUTE_UNUSED,
8189 bool * no_add_attrs)
8191 if (!TARGET_SH2A)
8193 warning (OPT_Wattributes, "%qs attribute only applies to SH2A",
8194 IDENTIFIER_POINTER (name));
8195 *no_add_attrs = true;
8197 else if (TREE_CODE (*node) != FUNCTION_DECL)
8199 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8200 IDENTIFIER_POINTER (name));
8201 *no_add_attrs = true;
8203 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8205 /* The argument must be a constant integer. */
8206 warning (OPT_Wattributes,
8207 "`%s' attribute argument not an integer constant",
8208 IDENTIFIER_POINTER (name));
8209 *no_add_attrs = true;
8211 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8213 /* The argument value must be between 0 to 255. */
8214 warning (OPT_Wattributes,
8215 "`%s' attribute argument should be between 0 to 255",
8216 IDENTIFIER_POINTER (name));
8217 *no_add_attrs = true;
8219 return NULL_TREE;
8222 /* Returns 1 if current function has been assigned the attribute
8223 'function_vector'. */
8225 sh2a_is_function_vector_call (rtx x)
8227 if (GET_CODE (x) == SYMBOL_REF
8228 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8230 tree tr = SYMBOL_REF_DECL (x);
8232 if (sh2a_function_vector_p (tr))
8233 return 1;
8236 return 0;
8239 /* Returns the function vector number, if the the attribute
8240 'function_vector' is assigned, otherwise returns zero. */
8242 sh2a_get_function_vector_number (rtx x)
8244 int num;
8245 tree list, t;
8247 if ((GET_CODE (x) == SYMBOL_REF)
8248 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8250 t = SYMBOL_REF_DECL (x);
8252 if (TREE_CODE (t) != FUNCTION_DECL)
8253 return 0;
8255 list = SH_ATTRIBUTES (t);
8256 while (list)
8258 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8260 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8261 return num;
8264 list = TREE_CHAIN (list);
8267 return 0;
8269 else
8270 return 0;
8273 /* Handle an "sp_switch" attribute; arguments as in
8274 struct attribute_spec.handler. */
8275 static tree
8276 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8277 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8279 if (TREE_CODE (*node) != FUNCTION_DECL)
8281 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8282 IDENTIFIER_POINTER (name));
8283 *no_add_attrs = true;
8285 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8287 /* The argument must be a constant string. */
8288 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8289 IDENTIFIER_POINTER (name));
8290 *no_add_attrs = true;
8293 return NULL_TREE;
8296 /* Handle an "trap_exit" attribute; arguments as in
8297 struct attribute_spec.handler. */
8298 static tree
8299 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8300 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8302 if (TREE_CODE (*node) != FUNCTION_DECL)
8304 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8305 IDENTIFIER_POINTER (name));
8306 *no_add_attrs = true;
8308 /* The argument specifies a trap number to be used in a trapa instruction
8309 at function exit (instead of an rte instruction). */
8310 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8312 /* The argument must be a constant integer. */
8313 warning (OPT_Wattributes, "%qs attribute argument not an "
8314 "integer constant", IDENTIFIER_POINTER (name));
8315 *no_add_attrs = true;
8318 return NULL_TREE;
8321 static tree
8322 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8323 tree name ATTRIBUTE_UNUSED,
8324 tree args ATTRIBUTE_UNUSED,
8325 int flags ATTRIBUTE_UNUSED,
8326 bool *no_add_attrs ATTRIBUTE_UNUSED)
8328 return NULL_TREE;
8331 /* True if __attribute__((renesas)) or -mrenesas. */
8333 sh_attr_renesas_p (const_tree td)
8335 if (TARGET_HITACHI)
8336 return 1;
8337 if (td == 0)
8338 return 0;
8339 if (DECL_P (td))
8340 td = TREE_TYPE (td);
8341 if (td == error_mark_node)
8342 return 0;
8343 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8344 != NULL_TREE);
8347 /* True if __attribute__((renesas)) or -mrenesas, for the current
8348 function. */
8350 sh_cfun_attr_renesas_p (void)
8352 return sh_attr_renesas_p (current_function_decl);
8356 sh_cfun_interrupt_handler_p (void)
8358 return (lookup_attribute ("interrupt_handler",
8359 DECL_ATTRIBUTES (current_function_decl))
8360 != NULL_TREE);
8363 /* Returns 1 if FUNC has been assigned the attribute
8364 "function_vector". */
8366 sh2a_function_vector_p (tree func)
8368 tree list;
8369 if (TREE_CODE (func) != FUNCTION_DECL)
8370 return 0;
8372 list = SH_ATTRIBUTES (func);
8373 while (list)
8375 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8376 return 1;
8378 list = TREE_CHAIN (list);
8380 return 0;
8383 /* Returns TRUE if given tree has the "resbank" attribute. */
8386 sh_cfun_resbank_handler_p (void)
8388 return ((lookup_attribute ("resbank",
8389 DECL_ATTRIBUTES (current_function_decl))
8390 != NULL_TREE)
8391 && (lookup_attribute ("interrupt_handler",
8392 DECL_ATTRIBUTES (current_function_decl))
8393 != NULL_TREE) && TARGET_SH2A);
8396 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8398 static const char *
8399 sh_check_pch_target_flags (int old_flags)
8401 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8402 | MASK_SH_E | MASK_HARD_SH4
8403 | MASK_FPU_SINGLE | MASK_SH4))
8404 return _("created and used with different architectures / ABIs");
8405 if ((old_flags ^ target_flags) & MASK_HITACHI)
8406 return _("created and used with different ABIs");
8407 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8408 return _("created and used with different endianness");
8409 return NULL;
8412 /* Predicates used by the templates. */
8414 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8415 Used only in general_movsrc_operand. */
8418 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8420 switch (REGNO (op))
8422 case PR_REG:
8423 case MACL_REG:
8424 case MACH_REG:
8425 return 1;
8427 return 0;
8430 /* Nonzero if OP is a floating point value with value 0.0. */
8433 fp_zero_operand (rtx op)
8435 REAL_VALUE_TYPE r;
8437 if (GET_MODE (op) != SFmode)
8438 return 0;
8440 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8441 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8444 /* Nonzero if OP is a floating point value with value 1.0. */
8447 fp_one_operand (rtx op)
8449 REAL_VALUE_TYPE r;
8451 if (GET_MODE (op) != SFmode)
8452 return 0;
8454 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8455 return REAL_VALUES_EQUAL (r, dconst1);
8458 /* For -m4 and -m4-single-only, mode switching is used. If we are
8459 compiling without -mfmovd, movsf_ie isn't taken into account for
8460 mode switching. We could check in machine_dependent_reorg for
8461 cases where we know we are in single precision mode, but there is
8462 interface to find that out during reload, so we must avoid
8463 choosing an fldi alternative during reload and thus failing to
8464 allocate a scratch register for the constant loading. */
8466 fldi_ok (void)
8468 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8472 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8474 enum rtx_code code = GET_CODE (op);
8475 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8478 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8480 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8482 if (GET_CODE (op) != SYMBOL_REF)
8483 return 0;
8484 return SYMBOL_REF_TLS_MODEL (op);
8487 /* Return the destination address of a branch. */
8489 static int
8490 branch_dest (rtx branch)
8492 rtx dest = SET_SRC (PATTERN (branch));
8493 int dest_uid;
8495 if (GET_CODE (dest) == IF_THEN_ELSE)
8496 dest = XEXP (dest, 1);
8497 dest = XEXP (dest, 0);
8498 dest_uid = INSN_UID (dest);
8499 return INSN_ADDRESSES (dest_uid);
8502 /* Return nonzero if REG is not used after INSN.
8503 We assume REG is a reload reg, and therefore does
8504 not live past labels. It may live past calls or jumps though. */
8506 reg_unused_after (rtx reg, rtx insn)
8508 enum rtx_code code;
8509 rtx set;
8511 /* If the reg is set by this instruction, then it is safe for our
8512 case. Disregard the case where this is a store to memory, since
8513 we are checking a register used in the store address. */
8514 set = single_set (insn);
8515 if (set && GET_CODE (SET_DEST (set)) != MEM
8516 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8517 return 1;
8519 while ((insn = NEXT_INSN (insn)))
8521 rtx set;
8522 if (!INSN_P (insn))
8523 continue;
8525 code = GET_CODE (insn);
8527 #if 0
8528 /* If this is a label that existed before reload, then the register
8529 if dead here. However, if this is a label added by reorg, then
8530 the register may still be live here. We can't tell the difference,
8531 so we just ignore labels completely. */
8532 if (code == CODE_LABEL)
8533 return 1;
8534 /* else */
8535 #endif
8537 if (code == JUMP_INSN)
8538 return 0;
8540 /* If this is a sequence, we must handle them all at once.
8541 We could have for instance a call that sets the target register,
8542 and an insn in a delay slot that uses the register. In this case,
8543 we must return 0. */
8544 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8546 int i;
8547 int retval = 0;
8549 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8551 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8552 rtx set = single_set (this_insn);
8554 if (GET_CODE (this_insn) == CALL_INSN)
8555 code = CALL_INSN;
8556 else if (GET_CODE (this_insn) == JUMP_INSN)
8558 if (INSN_ANNULLED_BRANCH_P (this_insn))
8559 return 0;
8560 code = JUMP_INSN;
8563 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8564 return 0;
8565 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8567 if (GET_CODE (SET_DEST (set)) != MEM)
8568 retval = 1;
8569 else
8570 return 0;
8572 if (set == 0
8573 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8574 return 0;
8576 if (retval == 1)
8577 return 1;
8578 else if (code == JUMP_INSN)
8579 return 0;
8582 set = single_set (insn);
8583 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8584 return 0;
8585 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8586 return GET_CODE (SET_DEST (set)) != MEM;
8587 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8588 return 0;
8590 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8591 return 1;
8593 return 1;
8596 #include "ggc.h"
8598 static GTY(()) rtx fpscr_rtx;
8600 get_fpscr_rtx (void)
8602 if (! fpscr_rtx)
8604 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8605 REG_USERVAR_P (fpscr_rtx) = 1;
8606 mark_user_reg (fpscr_rtx);
8608 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8609 mark_user_reg (fpscr_rtx);
8610 return fpscr_rtx;
8613 static GTY(()) tree fpscr_values;
8615 static void
8616 emit_fpu_switch (rtx scratch, int index)
8618 rtx dst, src;
8620 if (fpscr_values == NULL)
8622 tree t;
8624 t = build_index_type (integer_one_node);
8625 t = build_array_type (integer_type_node, t);
8626 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8627 DECL_ARTIFICIAL (t) = 1;
8628 DECL_IGNORED_P (t) = 1;
8629 DECL_EXTERNAL (t) = 1;
8630 TREE_STATIC (t) = 1;
8631 TREE_PUBLIC (t) = 1;
8632 TREE_USED (t) = 1;
8634 fpscr_values = t;
8637 src = DECL_RTL (fpscr_values);
8638 if (!can_create_pseudo_p ())
8640 emit_move_insn (scratch, XEXP (src, 0));
8641 if (index != 0)
8642 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8643 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8645 else
8646 src = adjust_address (src, PSImode, index * 4);
8648 dst = get_fpscr_rtx ();
8649 emit_move_insn (dst, src);
8652 void
8653 emit_sf_insn (rtx pat)
8655 emit_insn (pat);
8658 void
8659 emit_df_insn (rtx pat)
8661 emit_insn (pat);
8664 void
8665 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8667 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8670 void
8671 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8673 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8674 get_fpscr_rtx ()));
8677 void
8678 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8680 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8683 void
8684 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8686 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8687 get_fpscr_rtx ()));
8690 static rtx get_free_reg (HARD_REG_SET);
8692 /* This function returns a register to use to load the address to load
8693 the fpscr from. Currently it always returns r1 or r7, but when we are
8694 able to use pseudo registers after combine, or have a better mechanism
8695 for choosing a register, it should be done here. */
8696 /* REGS_LIVE is the liveness information for the point for which we
8697 need this allocation. In some bare-bones exit blocks, r1 is live at the
8698 start. We can even have all of r0..r3 being live:
8699 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8700 INSN before which new insns are placed with will clobber the register
8701 we return. If a basic block consists only of setting the return value
8702 register to a pseudo and using that register, the return value is not
8703 live before or after this block, yet we we'll insert our insns right in
8704 the middle. */
8706 static rtx
8707 get_free_reg (HARD_REG_SET regs_live)
8709 if (! TEST_HARD_REG_BIT (regs_live, 1))
8710 return gen_rtx_REG (Pmode, 1);
8712 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8713 there shouldn't be anything but a jump before the function end. */
8714 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8715 return gen_rtx_REG (Pmode, 7);
8718 /* This function will set the fpscr from memory.
8719 MODE is the mode we are setting it to. */
8720 void
8721 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8723 enum attr_fp_mode fp_mode = mode;
8724 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8725 rtx addr_reg;
8727 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8728 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8731 /* Is the given character a logical line separator for the assembler? */
8732 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8733 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8734 #endif
8737 sh_insn_length_adjustment (rtx insn)
8739 /* Instructions with unfilled delay slots take up an extra two bytes for
8740 the nop in the delay slot. */
8741 if (((GET_CODE (insn) == INSN
8742 && GET_CODE (PATTERN (insn)) != USE
8743 && GET_CODE (PATTERN (insn)) != CLOBBER)
8744 || GET_CODE (insn) == CALL_INSN
8745 || (GET_CODE (insn) == JUMP_INSN
8746 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8747 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8748 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8749 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8750 return 2;
8752 /* SH2e has a bug that prevents the use of annulled branches, so if
8753 the delay slot is not filled, we'll have to put a NOP in it. */
8754 if (sh_cpu == CPU_SH2E
8755 && GET_CODE (insn) == JUMP_INSN
8756 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8757 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8758 && get_attr_type (insn) == TYPE_CBRANCH
8759 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8760 return 2;
8762 /* sh-dsp parallel processing insn take four bytes instead of two. */
8764 if (GET_CODE (insn) == INSN)
8766 int sum = 0;
8767 rtx body = PATTERN (insn);
8768 const char *templ;
8769 char c;
8770 int maybe_label = 1;
8772 if (GET_CODE (body) == ASM_INPUT)
8773 templ = XSTR (body, 0);
8774 else if (asm_noperands (body) >= 0)
8775 templ
8776 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8777 else
8778 return 0;
8781 int ppi_adjust = 0;
8784 c = *templ++;
8785 while (c == ' ' || c == '\t');
8786 /* all sh-dsp parallel-processing insns start with p.
8787 The only non-ppi sh insn starting with p is pref.
8788 The only ppi starting with pr is prnd. */
8789 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8790 ppi_adjust = 2;
8791 /* The repeat pseudo-insn expands two three insns, a total of
8792 six bytes in size. */
8793 else if ((c == 'r' || c == 'R')
8794 && ! strncasecmp ("epeat", templ, 5))
8795 ppi_adjust = 4;
8796 while (c && c != '\n'
8797 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8799 /* If this is a label, it is obviously not a ppi insn. */
8800 if (c == ':' && maybe_label)
8802 ppi_adjust = 0;
8803 break;
8805 else if (c == '\'' || c == '"')
8806 maybe_label = 0;
8807 c = *templ++;
8809 sum += ppi_adjust;
8810 maybe_label = c != ':';
8812 while (c);
8813 return sum;
8815 return 0;
8818 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8819 isn't protected by a PIC unspec. */
8821 nonpic_symbol_mentioned_p (rtx x)
8823 register const char *fmt;
8824 register int i;
8826 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8827 || GET_CODE (x) == PC)
8828 return 1;
8830 /* We don't want to look into the possible MEM location of a
8831 CONST_DOUBLE, since we're not going to use it, in general. */
8832 if (GET_CODE (x) == CONST_DOUBLE)
8833 return 0;
8835 if (GET_CODE (x) == UNSPEC
8836 && (XINT (x, 1) == UNSPEC_PIC
8837 || XINT (x, 1) == UNSPEC_GOT
8838 || XINT (x, 1) == UNSPEC_GOTOFF
8839 || XINT (x, 1) == UNSPEC_GOTPLT
8840 || XINT (x, 1) == UNSPEC_GOTTPOFF
8841 || XINT (x, 1) == UNSPEC_DTPOFF
8842 || XINT (x, 1) == UNSPEC_PLT
8843 || XINT (x, 1) == UNSPEC_SYMOFF
8844 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
8845 return 0;
8847 fmt = GET_RTX_FORMAT (GET_CODE (x));
8848 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8850 if (fmt[i] == 'E')
8852 register int j;
8854 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8855 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8856 return 1;
8858 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8859 return 1;
8862 return 0;
8865 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8866 @GOTOFF in `reg'. */
8868 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8869 rtx reg)
8871 if (tls_symbolic_operand (orig, Pmode))
8872 return orig;
8874 if (GET_CODE (orig) == LABEL_REF
8875 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8877 if (reg == 0)
8878 reg = gen_reg_rtx (Pmode);
8880 emit_insn (gen_symGOTOFF2reg (reg, orig));
8881 return reg;
8883 else if (GET_CODE (orig) == SYMBOL_REF)
8885 if (reg == 0)
8886 reg = gen_reg_rtx (Pmode);
8888 emit_insn (gen_symGOT2reg (reg, orig));
8889 return reg;
8891 return orig;
8894 /* Mark the use of a constant in the literal table. If the constant
8895 has multiple labels, make it unique. */
8896 static rtx
8897 mark_constant_pool_use (rtx x)
8899 rtx insn, lab, pattern;
8901 if (x == NULL)
8902 return x;
8904 switch (GET_CODE (x))
8906 case LABEL_REF:
8907 x = XEXP (x, 0);
8908 case CODE_LABEL:
8909 break;
8910 default:
8911 return x;
8914 /* Get the first label in the list of labels for the same constant
8915 and delete another labels in the list. */
8916 lab = x;
8917 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8919 if (GET_CODE (insn) != CODE_LABEL
8920 || LABEL_REFS (insn) != NEXT_INSN (insn))
8921 break;
8922 lab = insn;
8925 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8926 INSN_DELETED_P (insn) = 1;
8928 /* Mark constants in a window. */
8929 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8931 if (GET_CODE (insn) != INSN)
8932 continue;
8934 pattern = PATTERN (insn);
8935 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8936 continue;
8938 switch (XINT (pattern, 1))
8940 case UNSPECV_CONST2:
8941 case UNSPECV_CONST4:
8942 case UNSPECV_CONST8:
8943 XVECEXP (pattern, 0, 1) = const1_rtx;
8944 break;
8945 case UNSPECV_WINDOW_END:
8946 if (XVECEXP (pattern, 0, 0) == x)
8947 return lab;
8948 break;
8949 case UNSPECV_CONST_END:
8950 return lab;
8951 default:
8952 break;
8956 return lab;
8959 /* Return true if it's possible to redirect BRANCH1 to the destination
8960 of an unconditional jump BRANCH2. We only want to do this if the
8961 resulting branch will have a short displacement. */
8963 sh_can_redirect_branch (rtx branch1, rtx branch2)
8965 if (flag_expensive_optimizations && simplejump_p (branch2))
8967 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8968 rtx insn;
8969 int distance;
8971 for (distance = 0, insn = NEXT_INSN (branch1);
8972 insn && distance < 256;
8973 insn = PREV_INSN (insn))
8975 if (insn == dest)
8976 return 1;
8977 else
8978 distance += get_attr_length (insn);
8980 for (distance = 0, insn = NEXT_INSN (branch1);
8981 insn && distance < 256;
8982 insn = NEXT_INSN (insn))
8984 if (insn == dest)
8985 return 1;
8986 else
8987 distance += get_attr_length (insn);
8990 return 0;
8993 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8995 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8996 unsigned int new_reg)
8998 /* Interrupt functions can only use registers that have already been
8999 saved by the prologue, even if they would normally be
9000 call-clobbered. */
9002 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9003 return 0;
9005 return 1;
9008 /* Function to update the integer COST
9009 based on the relationship between INSN that is dependent on
9010 DEP_INSN through the dependence LINK. The default is to make no
9011 adjustment to COST. This can be used for example to specify to
9012 the scheduler that an output- or anti-dependence does not incur
9013 the same cost as a data-dependence. The return value should be
9014 the new value for COST. */
9015 static int
9016 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9018 rtx reg, use_pat;
9020 if (TARGET_SHMEDIA)
9022 /* On SHmedia, if the dependence is an anti-dependence or
9023 output-dependence, there is no cost. */
9024 if (REG_NOTE_KIND (link) != 0)
9026 /* However, dependencies between target register loads and
9027 uses of the register in a subsequent block that are separated
9028 by a conditional branch are not modelled - we have to do with
9029 the anti-dependency between the target register load and the
9030 conditional branch that ends the current block. */
9031 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9032 && GET_CODE (PATTERN (dep_insn)) == SET
9033 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9034 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9035 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9037 int orig_cost = cost;
9038 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9039 rtx target = ((! note
9040 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9041 ? insn : JUMP_LABEL (insn));
9042 /* On the likely path, the branch costs 1, on the unlikely path,
9043 it costs 3. */
9044 cost--;
9046 target = next_active_insn (target);
9047 while (target && ! flow_dependent_p (target, dep_insn)
9048 && --cost > 0);
9049 /* If two branches are executed in immediate succession, with the
9050 first branch properly predicted, this causes a stall at the
9051 second branch, hence we won't need the target for the
9052 second branch for two cycles after the launch of the first
9053 branch. */
9054 if (cost > orig_cost - 2)
9055 cost = orig_cost - 2;
9057 else
9058 cost = 0;
9061 else if (get_attr_is_mac_media (insn)
9062 && get_attr_is_mac_media (dep_insn))
9063 cost = 1;
9065 else if (! reload_completed
9066 && GET_CODE (PATTERN (insn)) == SET
9067 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9068 && GET_CODE (PATTERN (dep_insn)) == SET
9069 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9070 && cost < 4)
9071 cost = 4;
9072 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9073 that is needed at the target. */
9074 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9075 && ! flow_dependent_p (insn, dep_insn))
9076 cost--;
9078 else if (REG_NOTE_KIND (link) == 0)
9080 enum attr_type type;
9081 rtx dep_set;
9083 if (recog_memoized (insn) < 0
9084 || recog_memoized (dep_insn) < 0)
9085 return cost;
9087 dep_set = single_set (dep_insn);
9089 /* The latency that we specify in the scheduling description refers
9090 to the actual output, not to an auto-increment register; for that,
9091 the latency is one. */
9092 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9094 rtx set = single_set (insn);
9096 if (set
9097 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9098 && (!MEM_P (SET_DEST (set))
9099 || !reg_mentioned_p (SET_DEST (dep_set),
9100 XEXP (SET_DEST (set), 0))))
9101 cost = 1;
9103 /* The only input for a call that is timing-critical is the
9104 function's address. */
9105 if (GET_CODE (insn) == CALL_INSN)
9107 rtx call = PATTERN (insn);
9109 if (GET_CODE (call) == PARALLEL)
9110 call = XVECEXP (call, 0 ,0);
9111 if (GET_CODE (call) == SET)
9112 call = SET_SRC (call);
9113 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
9114 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9115 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9116 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9117 cost -= TARGET_SH4_300 ? 3 : 6;
9119 /* Likewise, the most timing critical input for an sfuncs call
9120 is the function address. However, sfuncs typically start
9121 using their arguments pretty quickly.
9122 Assume a four cycle delay for SH4 before they are needed.
9123 Cached ST40-300 calls are quicker, so assume only a one
9124 cycle delay there.
9125 ??? Maybe we should encode the delays till input registers
9126 are needed by sfuncs into the sfunc call insn. */
9127 /* All sfunc calls are parallels with at least four components.
9128 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9129 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9130 && XVECLEN (PATTERN (insn), 0) >= 4
9131 && (reg = sfunc_uses_reg (insn)))
9133 if (! reg_set_p (reg, dep_insn))
9134 cost -= TARGET_SH4_300 ? 1 : 4;
9136 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9138 enum attr_type dep_type = get_attr_type (dep_insn);
9140 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9141 cost--;
9142 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9143 && (type = get_attr_type (insn)) != TYPE_CALL
9144 && type != TYPE_SFUNC)
9145 cost--;
9146 /* When the preceding instruction loads the shift amount of
9147 the following SHAD/SHLD, the latency of the load is increased
9148 by 1 cycle. */
9149 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9150 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9151 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9152 XEXP (SET_SRC (single_set (insn)),
9153 1)))
9154 cost++;
9155 /* When an LS group instruction with a latency of less than
9156 3 cycles is followed by a double-precision floating-point
9157 instruction, FIPR, or FTRV, the latency of the first
9158 instruction is increased to 3 cycles. */
9159 else if (cost < 3
9160 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9161 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9162 cost = 3;
9163 /* The lsw register of a double-precision computation is ready one
9164 cycle earlier. */
9165 else if (reload_completed
9166 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9167 && (use_pat = single_set (insn))
9168 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9169 SET_SRC (use_pat)))
9170 cost -= 1;
9172 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9173 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9174 cost -= 1;
9176 else if (TARGET_SH4_300)
9178 /* Stores need their input register two cycles later. */
9179 if (dep_set && cost >= 1
9180 && ((type = get_attr_type (insn)) == TYPE_STORE
9181 || type == TYPE_PSTORE
9182 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9184 rtx set = single_set (insn);
9186 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9187 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9189 cost -= 2;
9190 /* But don't reduce the cost below 1 if the address depends
9191 on a side effect of dep_insn. */
9192 if (cost < 1
9193 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9194 cost = 1;
9199 /* An anti-dependence penalty of two applies if the first insn is a double
9200 precision fadd / fsub / fmul. */
9201 else if (!TARGET_SH4_300
9202 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9203 && recog_memoized (dep_insn) >= 0
9204 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9205 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9206 /* A lot of alleged anti-flow dependences are fake,
9207 so check this one is real. */
9208 && flow_dependent_p (dep_insn, insn))
9209 cost = 2;
9211 return cost;
9214 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9215 if DEP_INSN is anti-flow dependent on INSN. */
9216 static int
9217 flow_dependent_p (rtx insn, rtx dep_insn)
9219 rtx tmp = PATTERN (insn);
9221 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9222 return tmp == NULL_RTX;
9225 /* A helper function for flow_dependent_p called through note_stores. */
9226 static void
9227 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9229 rtx * pinsn = (rtx *) data;
9231 if (*pinsn && reg_referenced_p (x, *pinsn))
9232 *pinsn = NULL_RTX;
9235 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9236 'special function' patterns (type sfunc) that clobber pr, but that
9237 do not look like function calls to leaf_function_p. Hence we must
9238 do this extra check. */
9239 static int
9240 sh_pr_n_sets (void)
9242 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9245 /* Return where to allocate pseudo for a given hard register initial
9246 value. */
9247 static rtx
9248 sh_allocate_initial_value (rtx hard_reg)
9250 rtx x;
9252 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9254 if (current_function_is_leaf
9255 && ! sh_pr_n_sets ()
9256 && ! (TARGET_SHCOMPACT
9257 && ((crtl->args.info.call_cookie
9258 & ~ CALL_COOKIE_RET_TRAMP (1))
9259 || crtl->saves_all_registers)))
9260 x = hard_reg;
9261 else
9262 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9264 else
9265 x = NULL_RTX;
9267 return x;
9270 /* This function returns "2" to indicate dual issue for the SH4
9271 processor. To be used by the DFA pipeline description. */
9272 static int
9273 sh_issue_rate (void)
9275 if (TARGET_SUPERSCALAR)
9276 return 2;
9277 else
9278 return 1;
9281 /* Functions for ready queue reordering for sched1. */
9283 /* Get weight for mode for a set x. */
9284 static short
9285 find_set_regmode_weight (rtx x, enum machine_mode mode)
9287 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9288 return 1;
9289 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9291 if (GET_CODE (SET_DEST (x)) == REG)
9293 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9294 return 1;
9295 else
9296 return 0;
9298 return 1;
9300 return 0;
9303 /* Get regmode weight for insn. */
9304 static short
9305 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9307 short reg_weight = 0;
9308 rtx x;
9310 /* Increment weight for each register born here. */
9311 x = PATTERN (insn);
9312 reg_weight += find_set_regmode_weight (x, mode);
9313 if (GET_CODE (x) == PARALLEL)
9315 int j;
9316 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9318 x = XVECEXP (PATTERN (insn), 0, j);
9319 reg_weight += find_set_regmode_weight (x, mode);
9322 /* Decrement weight for each register that dies here. */
9323 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9325 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9327 rtx note = XEXP (x, 0);
9328 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9329 reg_weight--;
9332 return reg_weight;
9335 /* Calculate regmode weights for all insns of a basic block. */
9336 static void
9337 find_regmode_weight (basic_block b, enum machine_mode mode)
9339 rtx insn, next_tail, head, tail;
9341 get_ebb_head_tail (b, b, &head, &tail);
9342 next_tail = NEXT_INSN (tail);
9344 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9346 /* Handle register life information. */
9347 if (!INSN_P (insn))
9348 continue;
9350 if (mode == SFmode)
9351 INSN_REGMODE_WEIGHT (insn, mode) =
9352 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9353 else if (mode == SImode)
9354 INSN_REGMODE_WEIGHT (insn, mode) =
9355 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9359 /* Comparison function for ready queue sorting. */
9360 static int
9361 rank_for_reorder (const void *x, const void *y)
9363 rtx tmp = *(const rtx *) y;
9364 rtx tmp2 = *(const rtx *) x;
9366 /* The insn in a schedule group should be issued the first. */
9367 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9368 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9370 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9371 minimizes instruction movement, thus minimizing sched's effect on
9372 register pressure. */
9373 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9376 /* Resort the array A in which only element at index N may be out of order. */
9377 static void
9378 swap_reorder (rtx *a, int n)
9380 rtx insn = a[n - 1];
9381 int i = n - 2;
9383 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9385 a[i + 1] = a[i];
9386 i -= 1;
9388 a[i + 1] = insn;
9391 #define SCHED_REORDER(READY, N_READY) \
9392 do \
9394 if ((N_READY) == 2) \
9395 swap_reorder (READY, N_READY); \
9396 else if ((N_READY) > 2) \
9397 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9399 while (0)
9401 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9402 macro. */
9403 static void
9404 ready_reorder (rtx *ready, int nready)
9406 SCHED_REORDER (ready, nready);
9409 /* Count life regions of r0 for a block. */
9410 static int
9411 find_r0_life_regions (basic_block b)
9413 rtx end, insn;
9414 rtx pset;
9415 rtx r0_reg;
9416 int live;
9417 int set;
9418 int death = 0;
9420 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9422 set = 1;
9423 live = 1;
9425 else
9427 set = 0;
9428 live = 0;
9431 insn = BB_HEAD (b);
9432 end = BB_END (b);
9433 r0_reg = gen_rtx_REG (SImode, R0_REG);
9434 while (1)
9436 if (INSN_P (insn))
9438 if (find_regno_note (insn, REG_DEAD, R0_REG))
9440 death++;
9441 live = 0;
9443 if (!live
9444 && (pset = single_set (insn))
9445 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9446 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9448 set++;
9449 live = 1;
9452 if (insn == end)
9453 break;
9454 insn = NEXT_INSN (insn);
9456 return set - death;
9459 /* Calculate regmode weights for all insns of all basic block. */
9460 static void
9461 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9462 int verbose ATTRIBUTE_UNUSED,
9463 int old_max_uid)
9465 basic_block b;
9467 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9468 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9469 r0_life_regions = 0;
9471 FOR_EACH_BB_REVERSE (b)
9473 find_regmode_weight (b, SImode);
9474 find_regmode_weight (b, SFmode);
9475 if (!reload_completed)
9476 r0_life_regions += find_r0_life_regions (b);
9479 CURR_REGMODE_PRESSURE (SImode) = 0;
9480 CURR_REGMODE_PRESSURE (SFmode) = 0;
9484 /* Cleanup. */
9485 static void
9486 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9487 int verbose ATTRIBUTE_UNUSED)
9489 if (regmode_weight[0])
9491 free (regmode_weight[0]);
9492 regmode_weight[0] = NULL;
9494 if (regmode_weight[1])
9496 free (regmode_weight[1]);
9497 regmode_weight[1] = NULL;
9501 /* The scalar modes supported differs from the default version in TImode
9502 for 32-bit SHMEDIA. */
9503 static bool
9504 sh_scalar_mode_supported_p (enum machine_mode mode)
9506 if (TARGET_SHMEDIA32 && mode == TImode)
9507 return false;
9509 return default_scalar_mode_supported_p (mode);
9512 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9513 keep count of register pressures on SImode and SFmode. */
9514 static int
9515 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9516 int sched_verbose ATTRIBUTE_UNUSED,
9517 rtx insn,
9518 int can_issue_more)
9520 if (GET_CODE (PATTERN (insn)) != USE
9521 && GET_CODE (PATTERN (insn)) != CLOBBER)
9522 cached_can_issue_more = can_issue_more - 1;
9523 else
9524 cached_can_issue_more = can_issue_more;
9526 if (reload_completed)
9527 return cached_can_issue_more;
9529 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9530 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9532 return cached_can_issue_more;
9535 static void
9536 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9537 int verbose ATTRIBUTE_UNUSED,
9538 int veclen ATTRIBUTE_UNUSED)
9540 CURR_REGMODE_PRESSURE (SImode) = 0;
9541 CURR_REGMODE_PRESSURE (SFmode) = 0;
9544 /* Some magic numbers. */
9545 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9546 functions that already have high pressure on r0. */
9547 #define R0_MAX_LIFE_REGIONS 2
9548 /* Register Pressure thresholds for SImode and SFmode registers. */
9549 #define SIMODE_MAX_WEIGHT 5
9550 #define SFMODE_MAX_WEIGHT 10
9552 /* Return true if the pressure is high for MODE. */
9553 static short
9554 high_pressure (enum machine_mode mode)
9556 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9557 functions that already have high pressure on r0. */
9558 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9559 return 1;
9561 if (mode == SFmode)
9562 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9563 else
9564 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9567 /* Reorder ready queue if register pressure is high. */
9568 static int
9569 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9570 int sched_verbose ATTRIBUTE_UNUSED,
9571 rtx *ready,
9572 int *n_readyp,
9573 int clock_var ATTRIBUTE_UNUSED)
9575 if (reload_completed)
9576 return sh_issue_rate ();
9578 if (high_pressure (SFmode) || high_pressure (SImode))
9580 ready_reorder (ready, *n_readyp);
9583 return sh_issue_rate ();
9586 /* Skip cycles if the current register pressure is high. */
9587 static int
9588 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9589 int sched_verbose ATTRIBUTE_UNUSED,
9590 rtx *ready ATTRIBUTE_UNUSED,
9591 int *n_readyp ATTRIBUTE_UNUSED,
9592 int clock_var ATTRIBUTE_UNUSED)
9594 if (reload_completed)
9595 return cached_can_issue_more;
9597 if (high_pressure(SFmode) || high_pressure (SImode))
9598 skip_cycles = 1;
9600 return cached_can_issue_more;
9603 /* Skip cycles without sorting the ready queue. This will move insn from
9604 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9605 queue by sh_reorder. */
9607 /* Generally, skipping these many cycles are sufficient for all insns to move
9608 from Q -> R. */
9609 #define MAX_SKIPS 8
9611 static int
9612 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9613 int sched_verbose ATTRIBUTE_UNUSED,
9614 rtx insn ATTRIBUTE_UNUSED,
9615 int last_clock_var,
9616 int clock_var,
9617 int *sort_p)
9619 if (reload_completed)
9620 return 0;
9622 if (skip_cycles)
9624 if ((clock_var - last_clock_var) < MAX_SKIPS)
9626 *sort_p = 0;
9627 return 1;
9629 /* If this is the last cycle we are skipping, allow reordering of R. */
9630 if ((clock_var - last_clock_var) == MAX_SKIPS)
9632 *sort_p = 1;
9633 return 1;
9637 skip_cycles = 0;
9639 return 0;
9642 /* SHmedia requires registers for branches, so we can't generate new
9643 branches past reload. */
9644 static bool
9645 sh_cannot_modify_jumps_p (void)
9647 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9650 static int
9651 sh_target_reg_class (void)
9653 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9656 static bool
9657 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9659 HARD_REG_SET dummy;
9660 #if 0
9661 rtx insn;
9662 #endif
9664 if (! shmedia_space_reserved_for_target_registers)
9665 return 0;
9666 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9667 return 0;
9668 if (calc_live_regs (&dummy) >= 6 * 8)
9669 return 1;
9670 return 0;
9673 static bool
9674 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9676 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9680 On the SH1..SH4, the trampoline looks like
9681 2 0002 D202 mov.l l2,r2
9682 1 0000 D301 mov.l l1,r3
9683 3 0004 422B jmp @r2
9684 4 0006 0009 nop
9685 5 0008 00000000 l1: .long area
9686 6 000c 00000000 l2: .long function
9688 SH5 (compact) uses r1 instead of r3 for the static chain. */
9691 /* Emit RTL insns to initialize the variable parts of a trampoline.
9692 FNADDR is an RTX for the address of the function's pure code.
9693 CXT is an RTX for the static chain value for the function. */
9695 void
9696 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9698 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9700 if (TARGET_SHMEDIA64)
9702 rtx tramp_templ;
9703 int fixed_len;
9705 rtx movi1 = GEN_INT (0xcc000010);
9706 rtx shori1 = GEN_INT (0xc8000010);
9707 rtx src, dst;
9709 /* The following trampoline works within a +- 128 KB range for cxt:
9710 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9711 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9712 gettr tr1,r1; blink tr0,r63 */
9713 /* Address rounding makes it hard to compute the exact bounds of the
9714 offset for this trampoline, but we have a rather generous offset
9715 range, so frame_offset should do fine as an upper bound. */
9716 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9718 /* ??? could optimize this trampoline initialization
9719 by writing DImode words with two insns each. */
9720 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9721 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9722 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9723 insn = gen_rtx_AND (DImode, insn, mask);
9724 /* Or in ptb/u .,tr1 pattern */
9725 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9726 insn = force_operand (insn, NULL_RTX);
9727 insn = gen_lowpart (SImode, insn);
9728 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9729 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9730 insn = gen_rtx_AND (DImode, insn, mask);
9731 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9732 insn = gen_lowpart (SImode, insn);
9733 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9734 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9735 insn = gen_rtx_AND (DImode, insn, mask);
9736 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9737 insn = gen_lowpart (SImode, insn);
9738 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9739 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9740 insn = gen_rtx_AND (DImode, insn, mask);
9741 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9742 insn = gen_lowpart (SImode, insn);
9743 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9744 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9745 insn = gen_rtx_AND (DImode, insn, mask);
9746 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9747 insn = gen_lowpart (SImode, insn);
9748 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9749 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9750 GEN_INT (0x6bf10600));
9751 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9752 GEN_INT (0x4415fc10));
9753 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9754 GEN_INT (0x4401fff0));
9755 emit_insn (gen_ic_invalidate_line (tramp));
9756 return;
9758 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9759 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9761 tramp_templ = gen_datalabel_ref (tramp_templ);
9762 dst = tramp_mem;
9763 src = gen_const_mem (BLKmode, tramp_templ);
9764 set_mem_align (dst, 256);
9765 set_mem_align (src, 64);
9766 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9768 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9769 emit_move_insn (adjust_address (tramp_mem, Pmode,
9770 fixed_len + GET_MODE_SIZE (Pmode)),
9771 cxt);
9772 emit_insn (gen_ic_invalidate_line (tramp));
9773 return;
9775 else if (TARGET_SHMEDIA)
9777 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9778 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9779 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9780 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9781 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9782 rotated 10 right, and higher 16 bit of every 32 selected. */
9783 rtx movishori
9784 = force_reg (V2HImode, (simplify_gen_subreg
9785 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9786 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9787 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9789 tramp = force_reg (Pmode, tramp);
9790 fnaddr = force_reg (SImode, fnaddr);
9791 cxt = force_reg (SImode, cxt);
9792 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9793 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9794 movishori));
9795 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9796 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9797 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9798 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9799 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9800 gen_rtx_SUBREG (V2HImode, cxt, 0),
9801 movishori));
9802 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9803 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9804 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9805 if (TARGET_LITTLE_ENDIAN)
9807 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9808 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9810 else
9812 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9813 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9815 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9816 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9817 emit_insn (gen_ic_invalidate_line (tramp));
9818 return;
9820 else if (TARGET_SHCOMPACT)
9822 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9823 return;
9825 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9826 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9827 SImode));
9828 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9829 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9830 SImode));
9831 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9832 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9833 if (TARGET_HARVARD)
9835 if (!TARGET_INLINE_IC_INVALIDATE
9836 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9837 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9838 FUNCTION_ORDINARY),
9839 0, VOIDmode, 1, tramp, SImode);
9840 else
9841 emit_insn (gen_ic_invalidate_line (tramp));
9845 /* FIXME: This is overly conservative. A SHcompact function that
9846 receives arguments ``by reference'' will have them stored in its
9847 own stack frame, so it must not pass pointers or references to
9848 these arguments to other functions by means of sibling calls. */
9849 /* If PIC, we cannot make sibling calls to global functions
9850 because the PLT requires r12 to be live. */
9851 static bool
9852 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9854 return (1
9855 && (! TARGET_SHCOMPACT
9856 || crtl->args.info.stack_regs == 0)
9857 && ! sh_cfun_interrupt_handler_p ()
9858 && (! flag_pic
9859 || (decl && ! TREE_PUBLIC (decl))
9860 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9863 /* Machine specific built-in functions. */
9865 struct builtin_description
9867 const enum insn_code icode;
9868 const char *const name;
9869 int signature;
9872 /* describe number and signedness of arguments; arg[0] == result
9873 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9874 /* 9: 64-bit pointer, 10: 32-bit pointer */
9875 static const char signature_args[][4] =
9877 #define SH_BLTIN_V2SI2 0
9878 { 4, 4 },
9879 #define SH_BLTIN_V4HI2 1
9880 { 4, 4 },
9881 #define SH_BLTIN_V2SI3 2
9882 { 4, 4, 4 },
9883 #define SH_BLTIN_V4HI3 3
9884 { 4, 4, 4 },
9885 #define SH_BLTIN_V8QI3 4
9886 { 4, 4, 4 },
9887 #define SH_BLTIN_MAC_HISI 5
9888 { 1, 4, 4, 1 },
9889 #define SH_BLTIN_SH_HI 6
9890 { 4, 4, 1 },
9891 #define SH_BLTIN_SH_SI 7
9892 { 4, 4, 1 },
9893 #define SH_BLTIN_V4HI2V2SI 8
9894 { 4, 4, 4 },
9895 #define SH_BLTIN_V4HI2V8QI 9
9896 { 4, 4, 4 },
9897 #define SH_BLTIN_SISF 10
9898 { 4, 2 },
9899 #define SH_BLTIN_LDUA_L 11
9900 { 2, 10 },
9901 #define SH_BLTIN_LDUA_Q 12
9902 { 1, 10 },
9903 #define SH_BLTIN_STUA_L 13
9904 { 0, 10, 2 },
9905 #define SH_BLTIN_STUA_Q 14
9906 { 0, 10, 1 },
9907 #define SH_BLTIN_LDUA_L64 15
9908 { 2, 9 },
9909 #define SH_BLTIN_LDUA_Q64 16
9910 { 1, 9 },
9911 #define SH_BLTIN_STUA_L64 17
9912 { 0, 9, 2 },
9913 #define SH_BLTIN_STUA_Q64 18
9914 { 0, 9, 1 },
9915 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9916 #define SH_BLTIN_2 19
9917 #define SH_BLTIN_SU 19
9918 { 1, 2 },
9919 #define SH_BLTIN_3 20
9920 #define SH_BLTIN_SUS 20
9921 { 2, 2, 1 },
9922 #define SH_BLTIN_PSSV 21
9923 { 0, 8, 2, 2 },
9924 #define SH_BLTIN_XXUU 22
9925 #define SH_BLTIN_UUUU 22
9926 { 1, 1, 1, 1 },
9927 #define SH_BLTIN_PV 23
9928 { 0, 8 },
9930 /* mcmv: operands considered unsigned. */
9931 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9932 /* mperm: control value considered unsigned int. */
9933 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9934 /* mshards_q: returns signed short. */
9935 /* nsb: takes long long arg, returns unsigned char. */
9936 static const struct builtin_description bdesc[] =
9938 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9939 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9940 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9941 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9942 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9943 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9944 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9945 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9946 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9947 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9948 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9949 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9950 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9951 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9952 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9953 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9954 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9955 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9956 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9957 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9958 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9959 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9960 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9961 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9962 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9963 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9964 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9965 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9966 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9967 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9968 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9969 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9970 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9971 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9972 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9973 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9974 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9975 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9976 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9977 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9978 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9979 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9980 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9981 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9982 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9983 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9984 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9985 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9986 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9987 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9988 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9989 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9990 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9991 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9992 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9993 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9994 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9995 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9996 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9997 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9998 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9999 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
10000 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
10001 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
10002 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
10003 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
10004 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
10005 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
10006 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
10007 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
10008 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
10009 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
10010 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
10011 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
10012 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
10013 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
10014 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
10015 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
10016 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
10017 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
10018 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
10019 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
10020 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
10021 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
10024 static void
10025 sh_media_init_builtins (void)
10027 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10028 const struct builtin_description *d;
10030 memset (shared, 0, sizeof shared);
10031 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10033 tree type, arg_type = 0;
10034 int signature = d->signature;
10035 int i;
10037 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10038 type = shared[signature];
10039 else
10041 int has_result = signature_args[signature][0] != 0;
10043 if ((signature_args[signature][1] & 8)
10044 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10045 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10046 continue;
10047 if (! TARGET_FPU_ANY
10048 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10049 continue;
10050 type = void_list_node;
10051 for (i = 3; ; i--)
10053 int arg = signature_args[signature][i];
10054 int opno = i - 1 + has_result;
10056 if (arg & 8)
10057 arg_type = ptr_type_node;
10058 else if (arg)
10059 arg_type = (*lang_hooks.types.type_for_mode)
10060 (insn_data[d->icode].operand[opno].mode,
10061 (arg & 1));
10062 else if (i)
10063 continue;
10064 else
10065 arg_type = void_type_node;
10066 if (i == 0)
10067 break;
10068 type = tree_cons (NULL_TREE, arg_type, type);
10070 type = build_function_type (arg_type, type);
10071 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10072 shared[signature] = type;
10074 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10075 NULL, NULL_TREE);
10079 /* Implements target hook vector_mode_supported_p. */
10080 bool
10081 sh_vector_mode_supported_p (enum machine_mode mode)
10083 if (TARGET_FPU_ANY
10084 && ((mode == V2SFmode)
10085 || (mode == V4SFmode)
10086 || (mode == V16SFmode)))
10087 return true;
10089 else if (TARGET_SHMEDIA
10090 && ((mode == V8QImode)
10091 || (mode == V2HImode)
10092 || (mode == V4HImode)
10093 || (mode == V2SImode)))
10094 return true;
10096 return false;
10099 /* Implements target hook dwarf_calling_convention. Return an enum
10100 of dwarf_calling_convention. */
10102 sh_dwarf_calling_convention (const_tree func)
10104 if (sh_attr_renesas_p (func))
10105 return DW_CC_GNU_renesas_sh;
10107 return DW_CC_normal;
10110 static void
10111 sh_init_builtins (void)
10113 if (TARGET_SHMEDIA)
10114 sh_media_init_builtins ();
10117 /* Expand an expression EXP that calls a built-in function,
10118 with result going to TARGET if that's convenient
10119 (and in mode MODE if that's convenient).
10120 SUBTARGET may be used as the target for computing one of EXP's operands.
10121 IGNORE is nonzero if the value is to be ignored. */
10123 static rtx
10124 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10125 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10127 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10128 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10129 const struct builtin_description *d = &bdesc[fcode];
10130 enum insn_code icode = d->icode;
10131 int signature = d->signature;
10132 enum machine_mode tmode = VOIDmode;
10133 int nop = 0, i;
10134 rtx op[4];
10135 rtx pat = 0;
10137 if (signature_args[signature][0])
10139 if (ignore)
10140 return 0;
10142 tmode = insn_data[icode].operand[0].mode;
10143 if (! target
10144 || GET_MODE (target) != tmode
10145 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10146 target = gen_reg_rtx (tmode);
10147 op[nop++] = target;
10149 else
10150 target = 0;
10152 for (i = 1; i <= 3; i++, nop++)
10154 tree arg;
10155 enum machine_mode opmode, argmode;
10156 tree optype;
10158 if (! signature_args[signature][i])
10159 break;
10160 arg = CALL_EXPR_ARG (exp, i - 1);
10161 if (arg == error_mark_node)
10162 return const0_rtx;
10163 if (signature_args[signature][i] & 8)
10165 opmode = ptr_mode;
10166 optype = ptr_type_node;
10168 else
10170 opmode = insn_data[icode].operand[nop].mode;
10171 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10173 argmode = TYPE_MODE (TREE_TYPE (arg));
10174 if (argmode != opmode)
10175 arg = build1 (NOP_EXPR, optype, arg);
10176 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
10177 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10178 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10181 switch (nop)
10183 case 1:
10184 pat = (*insn_data[d->icode].genfun) (op[0]);
10185 break;
10186 case 2:
10187 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10188 break;
10189 case 3:
10190 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10191 break;
10192 case 4:
10193 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10194 break;
10195 default:
10196 gcc_unreachable ();
10198 if (! pat)
10199 return 0;
10200 emit_insn (pat);
10201 return target;
10204 void
10205 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10207 rtx sel0 = const0_rtx;
10208 rtx sel1 = const1_rtx;
10209 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10210 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10212 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10213 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10216 void
10217 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10219 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10221 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10222 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10225 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10226 We can allow any mode in any general register. The special registers
10227 only allow SImode. Don't allow any mode in the PR.
10229 We cannot hold DCmode values in the XD registers because alter_reg
10230 handles subregs of them incorrectly. We could work around this by
10231 spacing the XD registers like the DR registers, but this would require
10232 additional memory in every compilation to hold larger register vectors.
10233 We could hold SFmode / SCmode values in XD registers, but that
10234 would require a tertiary reload when reloading from / to memory,
10235 and a secondary reload to reload from / to general regs; that
10236 seems to be a loosing proposition.
10238 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10239 it won't be ferried through GP registers first. */
10241 bool
10242 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
10244 if (SPECIAL_REGISTER_P (regno))
10245 return mode == SImode;
10247 if (regno == FPUL_REG)
10248 return (mode == SImode || mode == SFmode);
10250 if (FP_REGISTER_P (regno) && mode == SFmode)
10251 return true;
10253 if (mode == V2SFmode)
10255 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10256 || GENERAL_REGISTER_P (regno)))
10257 return true;
10258 else
10259 return false;
10262 if (mode == V4SFmode)
10264 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10265 || GENERAL_REGISTER_P (regno))
10266 return true;
10267 else
10268 return false;
10271 if (mode == V16SFmode)
10273 if (TARGET_SHMEDIA)
10275 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
10276 return true;
10277 else
10278 return false;
10280 else
10281 return regno == FIRST_XD_REG;
10284 if (FP_REGISTER_P (regno))
10286 if (mode == SFmode
10287 || mode == SImode
10288 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
10289 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10290 || mode == DCmode
10291 || (TARGET_SHMEDIA
10292 && (mode == DFmode || mode == DImode
10293 || mode == V2SFmode || mode == TImode)))
10294 && ((regno - FIRST_FP_REG) & 1) == 0)
10295 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
10296 && ((regno - FIRST_FP_REG) & 3) == 0))
10297 return true;
10298 else
10299 return false;
10302 if (XD_REGISTER_P (regno))
10303 return mode == DFmode;
10305 if (TARGET_REGISTER_P (regno))
10306 return (mode == DImode || mode == SImode || mode == PDImode);
10308 if (regno == PR_REG)
10309 return mode == SImode;
10311 if (regno == FPSCR_REG)
10312 return mode == PSImode;
10314 /* FIXME. This works around PR target/37633 for -O0. */
10315 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
10317 unsigned int n = GET_MODE_SIZE (mode) / 8;
10319 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
10320 && regno <= FIRST_GENERAL_REG + 14)
10321 return false;
10324 return true;
10327 /* Return the class of registers for which a mode change from FROM to TO
10328 is invalid. */
10329 bool
10330 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10331 enum reg_class rclass)
10333 /* We want to enable the use of SUBREGs as a means to
10334 VEC_SELECT a single element of a vector. */
10335 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10336 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
10338 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10340 if (TARGET_LITTLE_ENDIAN)
10342 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10343 return reg_classes_intersect_p (DF_REGS, rclass);
10345 else
10347 if (GET_MODE_SIZE (from) < 8)
10348 return reg_classes_intersect_p (DF_HI_REGS, rclass);
10351 return 0;
10355 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10356 that label is used. */
10358 void
10359 sh_mark_label (rtx address, int nuses)
10361 if (GOTOFF_P (address))
10363 /* Extract the label or symbol. */
10364 address = XEXP (address, 0);
10365 if (GET_CODE (address) == PLUS)
10366 address = XEXP (address, 0);
10367 address = XVECEXP (address, 0, 0);
10369 if (GET_CODE (address) == LABEL_REF
10370 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10371 LABEL_NUSES (XEXP (address, 0)) += nuses;
10374 /* Compute extra cost of moving data between one register class
10375 and another. */
10377 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10378 uses this information. Hence, the general register <-> floating point
10379 register information here is not used for SFmode. */
10382 sh_register_move_cost (enum machine_mode mode,
10383 enum reg_class srcclass, enum reg_class dstclass)
10385 if (dstclass == T_REGS || dstclass == PR_REGS)
10386 return 10;
10388 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10389 return 4;
10391 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10392 && REGCLASS_HAS_FP_REG (srcclass)
10393 && REGCLASS_HAS_FP_REG (dstclass))
10394 return 4;
10396 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10397 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10399 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10400 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10401 return 9;
10403 if ((REGCLASS_HAS_FP_REG (dstclass)
10404 && REGCLASS_HAS_GENERAL_REG (srcclass))
10405 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10406 && REGCLASS_HAS_FP_REG (srcclass)))
10407 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10408 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10410 if ((dstclass == FPUL_REGS
10411 && REGCLASS_HAS_GENERAL_REG (srcclass))
10412 || (srcclass == FPUL_REGS
10413 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10414 return 5;
10416 if ((dstclass == FPUL_REGS
10417 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10418 || (srcclass == FPUL_REGS
10419 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10420 return 7;
10422 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10423 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10424 return 20;
10426 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10427 if (TARGET_SHMEDIA
10428 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10430 if (sh_gettrcost >= 0)
10431 return sh_gettrcost;
10432 else if (!TARGET_PT_FIXED)
10433 return 100;
10436 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10437 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10438 return 4;
10440 if (TARGET_SHMEDIA
10441 || (TARGET_FMOVD
10442 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10443 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10444 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10446 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10449 static rtx emit_load_ptr (rtx, rtx);
10451 static rtx
10452 emit_load_ptr (rtx reg, rtx addr)
10454 rtx mem = gen_const_mem (ptr_mode, addr);
10456 if (Pmode != ptr_mode)
10457 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10458 return emit_move_insn (reg, mem);
10461 static void
10462 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10463 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10464 tree function)
10466 CUMULATIVE_ARGS cum;
10467 int structure_value_byref = 0;
10468 rtx this_rtx, this_value, sibcall, insns, funexp;
10469 tree funtype = TREE_TYPE (function);
10470 int simple_add = CONST_OK_FOR_ADD (delta);
10471 int did_load = 0;
10472 rtx scratch0, scratch1, scratch2;
10473 unsigned i;
10475 reload_completed = 1;
10476 epilogue_completed = 1;
10477 current_function_uses_only_leaf_regs = 1;
10479 emit_note (NOTE_INSN_PROLOGUE_END);
10481 /* Find the "this" pointer. We have such a wide range of ABIs for the
10482 SH that it's best to do this completely machine independently.
10483 "this" is passed as first argument, unless a structure return pointer
10484 comes first, in which case "this" comes second. */
10485 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10486 #ifndef PCC_STATIC_STRUCT_RETURN
10487 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10488 structure_value_byref = 1;
10489 #endif /* not PCC_STATIC_STRUCT_RETURN */
10490 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10492 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10494 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10496 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10498 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10499 static chain pointer (even if you can't have nested virtual functions
10500 right now, someone might implement them sometime), and the rest of the
10501 registers are used for argument passing, are callee-saved, or reserved. */
10502 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10503 -ffixed-reg has been used. */
10504 if (! call_used_regs[0] || fixed_regs[0])
10505 error ("r0 needs to be available as a call-clobbered register");
10506 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10507 if (! TARGET_SH5)
10509 if (call_used_regs[1] && ! fixed_regs[1])
10510 scratch1 = gen_rtx_REG (ptr_mode, 1);
10511 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10512 pointing where to return struct values. */
10513 if (call_used_regs[3] && ! fixed_regs[3])
10514 scratch2 = gen_rtx_REG (Pmode, 3);
10516 else if (TARGET_SHMEDIA)
10518 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10519 if (i != REGNO (scratch0) &&
10520 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10522 scratch1 = gen_rtx_REG (ptr_mode, i);
10523 break;
10525 if (scratch1 == scratch0)
10526 error ("Need a second call-clobbered general purpose register");
10527 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10528 if (call_used_regs[i] && ! fixed_regs[i])
10530 scratch2 = gen_rtx_REG (Pmode, i);
10531 break;
10533 if (scratch2 == scratch0)
10534 error ("Need a call-clobbered target register");
10537 this_value = plus_constant (this_rtx, delta);
10538 if (vcall_offset
10539 && (simple_add || scratch0 != scratch1)
10540 && strict_memory_address_p (ptr_mode, this_value))
10542 emit_load_ptr (scratch0, this_value);
10543 did_load = 1;
10546 if (!delta)
10547 ; /* Do nothing. */
10548 else if (simple_add)
10549 emit_move_insn (this_rtx, this_value);
10550 else
10552 emit_move_insn (scratch1, GEN_INT (delta));
10553 emit_insn (gen_add2_insn (this_rtx, scratch1));
10556 if (vcall_offset)
10558 rtx offset_addr;
10560 if (!did_load)
10561 emit_load_ptr (scratch0, this_rtx);
10563 offset_addr = plus_constant (scratch0, vcall_offset);
10564 if (strict_memory_address_p (ptr_mode, offset_addr))
10565 ; /* Do nothing. */
10566 else if (! TARGET_SH5 && scratch0 != scratch1)
10568 /* scratch0 != scratch1, and we have indexed loads. Get better
10569 schedule by loading the offset into r1 and using an indexed
10570 load - then the load of r1 can issue before the load from
10571 (this_rtx + delta) finishes. */
10572 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10573 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10575 else if (CONST_OK_FOR_ADD (vcall_offset))
10577 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10578 offset_addr = scratch0;
10580 else if (scratch0 != scratch1)
10582 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10583 emit_insn (gen_add2_insn (scratch0, scratch1));
10584 offset_addr = scratch0;
10586 else
10587 gcc_unreachable (); /* FIXME */
10588 emit_load_ptr (scratch0, offset_addr);
10590 if (Pmode != ptr_mode)
10591 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10592 emit_insn (gen_add2_insn (this_rtx, scratch0));
10595 /* Generate a tail call to the target function. */
10596 if (! TREE_USED (function))
10598 assemble_external (function);
10599 TREE_USED (function) = 1;
10601 funexp = XEXP (DECL_RTL (function), 0);
10602 /* If the function is overridden, so is the thunk, hence we don't
10603 need GOT addressing even if this is a public symbol. */
10604 #if 0
10605 if (TARGET_SH1 && ! flag_weak)
10606 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10607 else
10608 #endif
10609 if (TARGET_SH2 && flag_pic)
10611 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10612 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10614 else
10616 if (TARGET_SHMEDIA && flag_pic)
10618 funexp = gen_sym2PIC (funexp);
10619 PUT_MODE (funexp, Pmode);
10621 emit_move_insn (scratch2, funexp);
10622 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10623 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10625 sibcall = emit_call_insn (sibcall);
10626 SIBLING_CALL_P (sibcall) = 1;
10627 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10628 emit_barrier ();
10630 /* Run just enough of rest_of_compilation to do scheduling and get
10631 the insns emitted. Note that use_thunk calls
10632 assemble_start_function and assemble_end_function. */
10634 insn_locators_alloc ();
10635 insns = get_insns ();
10637 #if 0
10638 if (optimize > 0)
10640 /* Initialize the bitmap obstacks. */
10641 bitmap_obstack_initialize (NULL);
10642 bitmap_obstack_initialize (&reg_obstack);
10643 if (! cfun->cfg)
10644 init_flow ();
10645 rtl_register_cfg_hooks ();
10646 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10647 init_rtl_bb_info (EXIT_BLOCK_PTR);
10648 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10649 EXIT_BLOCK_PTR->flags |= BB_RTL;
10650 find_basic_blocks (insns);
10652 if (flag_schedule_insns_after_reload)
10654 life_analysis (PROP_FINAL);
10656 split_all_insns (1);
10658 schedule_insns ();
10660 /* We must split jmp insn in PIC case. */
10661 else if (flag_pic)
10662 split_all_insns_noflow ();
10664 #else
10665 if (optimize > 0)
10667 if (! cfun->cfg)
10668 init_flow (cfun);
10669 split_all_insns_noflow ();
10671 #endif
10673 sh_reorg ();
10675 if (optimize > 0 && flag_delayed_branch)
10676 dbr_schedule (insns);
10678 shorten_branches (insns);
10679 final_start_function (insns, file, 1);
10680 final (insns, file, 1);
10681 final_end_function ();
10682 free_after_compilation (cfun);
10684 reload_completed = 0;
10685 epilogue_completed = 0;
10689 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10691 rtx sym;
10693 /* If this is not an ordinary function, the name usually comes from a
10694 string literal or an sprintf buffer. Make sure we use the same
10695 string consistently, so that cse will be able to unify address loads. */
10696 if (kind != FUNCTION_ORDINARY)
10697 name = IDENTIFIER_POINTER (get_identifier (name));
10698 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10699 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10700 if (flag_pic)
10701 switch (kind)
10703 case FUNCTION_ORDINARY:
10704 break;
10705 case SFUNC_GOT:
10707 rtx reg = target ? target : gen_reg_rtx (Pmode);
10709 emit_insn (gen_symGOT2reg (reg, sym));
10710 sym = reg;
10711 break;
10713 case SFUNC_STATIC:
10715 /* ??? To allow cse to work, we use GOTOFF relocations.
10716 we could add combiner patterns to transform this into
10717 straight pc-relative calls with sym2PIC / bsrf when
10718 label load and function call are still 1:1 and in the
10719 same basic block during combine. */
10720 rtx reg = target ? target : gen_reg_rtx (Pmode);
10722 emit_insn (gen_symGOTOFF2reg (reg, sym));
10723 sym = reg;
10724 break;
10727 if (target && sym != target)
10729 emit_move_insn (target, sym);
10730 return target;
10732 return sym;
10735 /* Find the number of a general purpose register in S. */
10736 static int
10737 scavenge_reg (HARD_REG_SET *s)
10739 int r;
10740 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10741 if (TEST_HARD_REG_BIT (*s, r))
10742 return r;
10743 return -1;
10747 sh_get_pr_initial_val (void)
10749 rtx val;
10751 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10752 PR register on SHcompact, because it might be clobbered by the prologue.
10753 We check first if that is known to be the case. */
10754 if (TARGET_SHCOMPACT
10755 && ((crtl->args.info.call_cookie
10756 & ~ CALL_COOKIE_RET_TRAMP (1))
10757 || crtl->saves_all_registers))
10758 return gen_frame_mem (SImode, return_address_pointer_rtx);
10760 /* If we haven't finished rtl generation, there might be a nonlocal label
10761 that we haven't seen yet.
10762 ??? get_hard_reg_initial_val fails if it is called after register
10763 allocation has started, unless it has been called before for the
10764 same register. And even then, we end in trouble if we didn't use
10765 the register in the same basic block before. So call
10766 get_hard_reg_initial_val now and wrap it in an unspec if we might
10767 need to replace it. */
10768 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10769 combine can put the pseudo returned by get_hard_reg_initial_val into
10770 instructions that need a general purpose registers, which will fail to
10771 be recognized when the pseudo becomes allocated to PR. */
10773 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10774 if (TARGET_SH1)
10775 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10776 return val;
10780 sh_expand_t_scc (enum rtx_code code, rtx target)
10782 rtx result = target;
10783 HOST_WIDE_INT val;
10785 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10786 || GET_CODE (sh_compare_op1) != CONST_INT)
10787 return 0;
10788 if (GET_CODE (result) != REG)
10789 result = gen_reg_rtx (SImode);
10790 val = INTVAL (sh_compare_op1);
10791 if ((code == EQ && val == 1) || (code == NE && val == 0))
10792 emit_insn (gen_movt (result));
10793 else if (TARGET_SH2A && ((code == EQ && val == 0)
10794 || (code == NE && val == 1)))
10795 emit_insn (gen_movrt (result));
10796 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10798 emit_clobber (result);
10799 emit_insn (gen_subc (result, result, result));
10800 emit_insn (gen_addsi3 (result, result, const1_rtx));
10802 else if (code == EQ || code == NE)
10803 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10804 else
10805 return 0;
10806 if (result != target)
10807 emit_move_insn (target, result);
10808 return 1;
10811 /* INSN is an sfunc; return the rtx that describes the address used. */
10812 static rtx
10813 extract_sfunc_addr (rtx insn)
10815 rtx pattern, part = NULL_RTX;
10816 int len, i;
10818 pattern = PATTERN (insn);
10819 len = XVECLEN (pattern, 0);
10820 for (i = 0; i < len; i++)
10822 part = XVECEXP (pattern, 0, i);
10823 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10824 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10825 return XEXP (part, 0);
10827 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10828 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10831 /* Verify that the register in use_sfunc_addr still agrees with the address
10832 used in the sfunc. This prevents fill_slots_from_thread from changing
10833 use_sfunc_addr.
10834 INSN is the use_sfunc_addr instruction, and REG is the register it
10835 guards. */
10837 check_use_sfunc_addr (rtx insn, rtx reg)
10839 /* Search for the sfunc. It should really come right after INSN. */
10840 while ((insn = NEXT_INSN (insn)))
10842 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10843 break;
10844 if (! INSN_P (insn))
10845 continue;
10847 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10848 insn = XVECEXP (PATTERN (insn), 0, 0);
10849 if (GET_CODE (PATTERN (insn)) != PARALLEL
10850 || get_attr_type (insn) != TYPE_SFUNC)
10851 continue;
10852 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10854 gcc_unreachable ();
10857 /* This function returns a constant rtx that represents pi / 2**15 in
10858 SFmode. it's used to scale SFmode angles, in radians, to a
10859 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10860 maps to 0x10000). */
10862 static GTY(()) rtx sh_fsca_sf2int_rtx;
10865 sh_fsca_sf2int (void)
10867 if (! sh_fsca_sf2int_rtx)
10869 REAL_VALUE_TYPE rv;
10871 real_from_string (&rv, "10430.378350470453");
10872 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10875 return sh_fsca_sf2int_rtx;
10878 /* This function returns a constant rtx that represents pi / 2**15 in
10879 DFmode. it's used to scale DFmode angles, in radians, to a
10880 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10881 maps to 0x10000). */
10883 static GTY(()) rtx sh_fsca_df2int_rtx;
10886 sh_fsca_df2int (void)
10888 if (! sh_fsca_df2int_rtx)
10890 REAL_VALUE_TYPE rv;
10892 real_from_string (&rv, "10430.378350470453");
10893 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10896 return sh_fsca_df2int_rtx;
10899 /* This function returns a constant rtx that represents 2**15 / pi in
10900 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10901 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10902 2*pi). */
10904 static GTY(()) rtx sh_fsca_int2sf_rtx;
10907 sh_fsca_int2sf (void)
10909 if (! sh_fsca_int2sf_rtx)
10911 REAL_VALUE_TYPE rv;
10913 real_from_string (&rv, "9.587379924285257e-5");
10914 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10917 return sh_fsca_int2sf_rtx;
10920 /* Initialize the CUMULATIVE_ARGS structure. */
10922 void
10923 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10924 tree fntype,
10925 rtx libname ATTRIBUTE_UNUSED,
10926 tree fndecl,
10927 signed int n_named_args,
10928 enum machine_mode mode)
10930 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10931 pcum->free_single_fp_reg = 0;
10932 pcum->stack_regs = 0;
10933 pcum->byref_regs = 0;
10934 pcum->byref = 0;
10935 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10937 /* XXX - Should we check TARGET_HITACHI here ??? */
10938 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10940 if (fntype)
10942 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10943 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10944 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10945 pcum->arg_count [(int) SH_ARG_INT]
10946 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10948 pcum->call_cookie
10949 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10950 && pcum->arg_count [(int) SH_ARG_INT] == 0
10951 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10952 ? int_size_in_bytes (TREE_TYPE (fntype))
10953 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10954 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10955 == FIRST_RET_REG));
10957 else
10959 pcum->arg_count [(int) SH_ARG_INT] = 0;
10960 pcum->prototype_p = FALSE;
10961 if (mode != VOIDmode)
10963 pcum->call_cookie =
10964 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10965 && GET_MODE_SIZE (mode) > 4
10966 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10968 /* If the default ABI is the Renesas ABI then all library
10969 calls must assume that the library will be using the
10970 Renesas ABI. So if the function would return its result
10971 in memory then we must force the address of this memory
10972 block onto the stack. Ideally we would like to call
10973 targetm.calls.return_in_memory() here but we do not have
10974 the TYPE or the FNDECL available so we synthesize the
10975 contents of that function as best we can. */
10976 pcum->force_mem =
10977 (TARGET_DEFAULT & MASK_HITACHI)
10978 && (mode == BLKmode
10979 || (GET_MODE_SIZE (mode) > 4
10980 && !(mode == DFmode
10981 && TARGET_FPU_DOUBLE)));
10983 else
10985 pcum->call_cookie = 0;
10986 pcum->force_mem = FALSE;
10991 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10992 not enter into CONST_DOUBLE for the replace.
10994 Note that copying is not done so X must not be shared unless all copies
10995 are to be modified.
10997 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10998 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10999 replacements[n*2+1] - and that we take mode changes into account.
11001 If a replacement is ambiguous, return NULL_RTX.
11003 If MODIFY is zero, don't modify any rtl in place,
11004 just return zero or nonzero for failure / success. */
11007 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11009 int i, j;
11010 const char *fmt;
11012 /* The following prevents loops occurrence when we change MEM in
11013 CONST_DOUBLE onto the same CONST_DOUBLE. */
11014 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11015 return x;
11017 for (i = n_replacements - 1; i >= 0 ; i--)
11018 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11019 return replacements[i*2+1];
11021 /* Allow this function to make replacements in EXPR_LISTs. */
11022 if (x == 0)
11023 return 0;
11025 if (GET_CODE (x) == SUBREG)
11027 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11028 n_replacements, modify);
11030 if (GET_CODE (new_rtx) == CONST_INT)
11032 x = simplify_subreg (GET_MODE (x), new_rtx,
11033 GET_MODE (SUBREG_REG (x)),
11034 SUBREG_BYTE (x));
11035 if (! x)
11036 abort ();
11038 else if (modify)
11039 SUBREG_REG (x) = new_rtx;
11041 return x;
11043 else if (GET_CODE (x) == REG)
11045 unsigned regno = REGNO (x);
11046 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11047 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11048 rtx result = NULL_RTX;
11050 for (i = n_replacements - 1; i >= 0; i--)
11052 rtx from = replacements[i*2];
11053 rtx to = replacements[i*2+1];
11054 unsigned from_regno, from_nregs, to_regno, new_regno;
11056 if (GET_CODE (from) != REG)
11057 continue;
11058 from_regno = REGNO (from);
11059 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11060 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11061 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11063 if (regno < from_regno
11064 || regno + nregs > from_regno + nregs
11065 || GET_CODE (to) != REG
11066 || result)
11067 return NULL_RTX;
11068 to_regno = REGNO (to);
11069 if (to_regno < FIRST_PSEUDO_REGISTER)
11071 new_regno = regno + to_regno - from_regno;
11072 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11073 != nregs)
11074 return NULL_RTX;
11075 result = gen_rtx_REG (GET_MODE (x), new_regno);
11077 else if (GET_MODE (x) <= GET_MODE (to))
11078 result = gen_lowpart_common (GET_MODE (x), to);
11079 else
11080 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11083 return result ? result : x;
11085 else if (GET_CODE (x) == ZERO_EXTEND)
11087 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11088 n_replacements, modify);
11090 if (GET_CODE (new_rtx) == CONST_INT)
11092 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11093 new_rtx, GET_MODE (XEXP (x, 0)));
11094 if (! x)
11095 abort ();
11097 else if (modify)
11098 XEXP (x, 0) = new_rtx;
11100 return x;
11103 fmt = GET_RTX_FORMAT (GET_CODE (x));
11104 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11106 rtx new_rtx;
11108 if (fmt[i] == 'e')
11110 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11111 n_replacements, modify);
11112 if (!new_rtx)
11113 return NULL_RTX;
11114 if (modify)
11115 XEXP (x, i) = new_rtx;
11117 else if (fmt[i] == 'E')
11118 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11120 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11121 n_replacements, modify);
11122 if (!new_rtx)
11123 return NULL_RTX;
11124 if (modify)
11125 XVECEXP (x, i, j) = new_rtx;
11129 return x;
11133 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11135 enum rtx_code code = TRUNCATE;
11137 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11139 rtx inner = XEXP (x, 0);
11140 enum machine_mode inner_mode = GET_MODE (inner);
11142 if (inner_mode == mode)
11143 return inner;
11144 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11145 x = inner;
11146 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11147 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11149 code = GET_CODE (x);
11150 x = inner;
11153 return gen_rtx_fmt_e (code, mode, x);
11156 /* called via for_each_rtx after reload, to clean up truncates of
11157 registers that span multiple actual hard registers. */
11159 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11161 rtx x = *p, reg;
11163 if (GET_CODE (x) != TRUNCATE)
11164 return 0;
11165 reg = XEXP (x, 0);
11166 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11168 enum machine_mode reg_mode = GET_MODE (reg);
11169 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11170 subreg_lowpart_offset (DImode, reg_mode));
11171 *(int*) n_changes += 1;
11172 return -1;
11174 return 0;
11177 /* Load and store depend on the highpart of the address. However,
11178 set_attr_alternative does not give well-defined results before reload,
11179 so we must look at the rtl ourselves to see if any of the feeding
11180 registers is used in a memref. */
11182 /* Called by sh_contains_memref_p via for_each_rtx. */
11183 static int
11184 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11186 return (GET_CODE (*loc) == MEM);
11189 /* Return nonzero iff INSN contains a MEM. */
11191 sh_contains_memref_p (rtx insn)
11193 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11196 /* Return nonzero iff INSN loads a banked register. */
11198 sh_loads_bankedreg_p (rtx insn)
11200 if (GET_CODE (PATTERN (insn)) == SET)
11202 rtx op = SET_DEST (PATTERN(insn));
11203 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11204 return 1;
11207 return 0;
11210 /* FNADDR is the MEM expression from a call expander. Return an address
11211 to use in an SHmedia insn pattern. */
11213 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11215 int is_sym;
11217 fnaddr = XEXP (fnaddr, 0);
11218 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11219 if (flag_pic && is_sym)
11221 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11223 rtx reg = gen_reg_rtx (Pmode);
11225 /* We must not use GOTPLT for sibcalls, because PIC_REG
11226 must be restored before the PLT code gets to run. */
11227 if (is_sibcall)
11228 emit_insn (gen_symGOT2reg (reg, fnaddr));
11229 else
11230 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11231 fnaddr = reg;
11233 else
11235 fnaddr = gen_sym2PIC (fnaddr);
11236 PUT_MODE (fnaddr, Pmode);
11239 /* If ptabs might trap, make this visible to the rest of the compiler.
11240 We generally assume that symbols pertain to valid locations, but
11241 it is possible to generate invalid symbols with asm or linker tricks.
11242 In a list of functions where each returns its successor, an invalid
11243 symbol might denote an empty list. */
11244 if (!TARGET_PT_FIXED
11245 && (!is_sym || TARGET_INVALID_SYMBOLS)
11246 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11248 rtx tr = gen_reg_rtx (PDImode);
11250 emit_insn (gen_ptabs (tr, fnaddr));
11251 fnaddr = tr;
11253 else if (! target_reg_operand (fnaddr, Pmode))
11254 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11255 return fnaddr;
11258 enum reg_class
11259 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
11260 enum machine_mode mode, secondary_reload_info *sri)
11262 if (in_p)
11264 if (REGCLASS_HAS_FP_REG (rclass)
11265 && ! TARGET_SHMEDIA
11266 && immediate_operand ((x), mode)
11267 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11268 && mode == SFmode && fldi_ok ()))
11269 switch (mode)
11271 case SFmode:
11272 sri->icode = CODE_FOR_reload_insf__frn;
11273 return NO_REGS;
11274 case DFmode:
11275 sri->icode = CODE_FOR_reload_indf__frn;
11276 return NO_REGS;
11277 case SImode:
11278 /* ??? If we knew that we are in the appropriate mode -
11279 single precision - we could use a reload pattern directly. */
11280 return FPUL_REGS;
11281 default:
11282 abort ();
11284 if (rclass == FPUL_REGS
11285 && ((GET_CODE (x) == REG
11286 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11287 || REGNO (x) == T_REG))
11288 || GET_CODE (x) == PLUS))
11289 return GENERAL_REGS;
11290 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11292 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11293 return GENERAL_REGS;
11294 else if (mode == SFmode)
11295 return FP_REGS;
11296 sri->icode = CODE_FOR_reload_insi__i_fpul;
11297 return NO_REGS;
11299 if (rclass == FPSCR_REGS
11300 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11301 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
11302 return GENERAL_REGS;
11303 if (REGCLASS_HAS_FP_REG (rclass)
11304 && TARGET_SHMEDIA
11305 && immediate_operand (x, mode)
11306 && x != CONST0_RTX (GET_MODE (x))
11307 && GET_MODE (x) != V4SFmode)
11308 return GENERAL_REGS;
11309 if ((mode == QImode || mode == HImode)
11310 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11312 sri->icode = ((mode == QImode)
11313 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11314 return NO_REGS;
11316 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
11317 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
11318 return TARGET_REGS;
11319 } /* end of input-only processing. */
11321 if (((REGCLASS_HAS_FP_REG (rclass)
11322 && (GET_CODE (x) == REG
11323 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11324 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11325 && TARGET_FMOVD))))
11326 || (REGCLASS_HAS_GENERAL_REG (rclass)
11327 && GET_CODE (x) == REG
11328 && FP_REGISTER_P (REGNO (x))))
11329 && ! TARGET_SHMEDIA
11330 && (mode == SFmode || mode == SImode))
11331 return FPUL_REGS;
11332 if ((rclass == FPUL_REGS
11333 || (REGCLASS_HAS_FP_REG (rclass)
11334 && ! TARGET_SHMEDIA && mode == SImode))
11335 && (GET_CODE (x) == MEM
11336 || (GET_CODE (x) == REG
11337 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11338 || REGNO (x) == T_REG
11339 || system_reg_operand (x, VOIDmode)))))
11341 if (rclass == FPUL_REGS)
11342 return GENERAL_REGS;
11343 return FPUL_REGS;
11345 if ((rclass == TARGET_REGS
11346 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
11347 && !satisfies_constraint_Csy (x)
11348 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
11349 return GENERAL_REGS;
11350 if ((rclass == MAC_REGS || rclass == PR_REGS)
11351 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
11352 && rclass != REGNO_REG_CLASS (REGNO (x)))
11353 return GENERAL_REGS;
11354 if (rclass != GENERAL_REGS && GET_CODE (x) == REG
11355 && TARGET_REGISTER_P (REGNO (x)))
11356 return GENERAL_REGS;
11357 return NO_REGS;
11360 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11362 #include "gt-sh.h"