Merge -r 127928:132243 from trunk
[official-gcc.git] / gcc / config / sh / sh.c
blob4088ef73cdfb775ab93c80de1007e3d52ceb470f
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "insn-config.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "flags.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "c-pragma.h"
41 #include "integrate.h"
42 #include "dwarf2.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "df.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
73 int current_function_interrupt;
75 tree sh_deferred_function_attributes;
76 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
78 /* Global variables for machine-dependent things. */
80 /* Which cpu are we scheduling for. */
81 enum processor_type sh_cpu;
83 /* Definitions used in ready queue reordering for first scheduling pass. */
85 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
86 static short *regmode_weight[2];
88 /* Total SFmode and SImode weights of scheduled insns. */
89 static int curr_regmode_pressure[2];
91 /* Number of r0 life regions. */
92 static int r0_life_regions;
94 /* If true, skip cycles for Q -> R movement. */
95 static int skip_cycles = 0;
97 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
98 and returned from sh_reorder2. */
99 static short cached_can_issue_more;
101 /* Saved operands from the last compare to use when we generate an scc
102 or bcc insn. */
104 rtx sh_compare_op0;
105 rtx sh_compare_op1;
107 /* Provides the class number of the smallest class containing
108 reg number. */
110 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
112 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
149 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
150 GENERAL_REGS, GENERAL_REGS,
153 char sh_register_names[FIRST_PSEUDO_REGISTER] \
154 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
156 char sh_additional_register_names[ADDREGNAMES_SIZE] \
157 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
158 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
160 int assembler_dialect;
162 static bool shmedia_space_reserved_for_target_registers;
164 static bool sh_handle_option (size_t, const char *, int);
165 static void split_branches (rtx);
166 static int branch_dest (rtx);
167 static void force_into (rtx, rtx);
168 static void print_slot (rtx);
169 static rtx add_constant (rtx, enum machine_mode, rtx);
170 static void dump_table (rtx, rtx);
171 static int hi_const (rtx);
172 static int broken_move (rtx);
173 static int mova_p (rtx);
174 static rtx find_barrier (int, rtx, rtx);
175 static int noncall_uses_reg (rtx, rtx, rtx *);
176 static rtx gen_block_redirect (rtx, int, int);
177 static void sh_reorg (void);
178 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
179 static rtx frame_insn (rtx);
180 static rtx push (int);
181 static void pop (int);
182 static void push_regs (HARD_REG_SET *, int);
183 static int calc_live_regs (HARD_REG_SET *);
184 static HOST_WIDE_INT rounded_frame_size (int);
185 static rtx mark_constant_pool_use (rtx);
186 const struct attribute_spec sh_attribute_table[];
187 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
188 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
189 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
190 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
191 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
192 static void sh_insert_attributes (tree, tree *);
193 static const char *sh_check_pch_target_flags (int);
194 static int sh_adjust_cost (rtx, rtx, rtx, int);
195 static int sh_issue_rate (void);
196 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
197 static short find_set_regmode_weight (rtx, enum machine_mode);
198 static short find_insn_regmode_weight (rtx, enum machine_mode);
199 static void find_regmode_weight (basic_block, enum machine_mode);
200 static int find_r0_life_regions (basic_block);
201 static void sh_md_init_global (FILE *, int, int);
202 static void sh_md_finish_global (FILE *, int);
203 static int rank_for_reorder (const void *, const void *);
204 static void swap_reorder (rtx *, int);
205 static void ready_reorder (rtx *, int);
206 static short high_pressure (enum machine_mode);
207 static int sh_reorder (FILE *, int, rtx *, int *, int);
208 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
209 static void sh_md_init (FILE *, int, int);
210 static int sh_variable_issue (FILE *, int, rtx, int);
212 static bool sh_function_ok_for_sibcall (tree, tree);
214 static bool sh_cannot_modify_jumps_p (void);
215 static int sh_target_reg_class (void);
216 static bool sh_optimize_target_register_callee_saved (bool);
217 static bool sh_ms_bitfield_layout_p (const_tree);
219 static void sh_init_builtins (void);
220 static void sh_media_init_builtins (void);
221 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
222 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
223 static void sh_file_start (void);
224 static int flow_dependent_p (rtx, rtx);
225 static void flow_dependent_p_1 (rtx, const_rtx, void *);
226 static int shiftcosts (rtx);
227 static int andcosts (rtx);
228 static int addsubcosts (rtx);
229 static int multcosts (rtx);
230 static bool unspec_caller_rtx_p (rtx);
231 static bool sh_cannot_copy_insn_p (rtx);
232 static bool sh_rtx_costs (rtx, int, int, int *);
233 static int sh_address_cost (rtx);
234 static int sh_pr_n_sets (void);
235 static rtx sh_allocate_initial_value (rtx);
236 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
237 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
238 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
239 static int scavenge_reg (HARD_REG_SET *s);
240 struct save_schedule_s;
241 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
242 struct save_schedule_s *, int);
244 static rtx sh_struct_value_rtx (tree, int);
245 static bool sh_return_in_memory (const_tree, const_tree);
246 static rtx sh_builtin_saveregs (void);
247 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
248 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
249 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
250 static tree sh_build_builtin_va_list (void);
251 static void sh_va_start (tree, rtx);
252 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
253 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
254 const_tree, bool);
255 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
256 const_tree, bool);
257 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
258 tree, bool);
259 static int sh_dwarf_calling_convention (const_tree);
262 /* Initialize the GCC target structure. */
263 #undef TARGET_ATTRIBUTE_TABLE
264 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
266 /* The next two are used for debug info when compiling with -gdwarf. */
267 #undef TARGET_ASM_UNALIGNED_HI_OP
268 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
269 #undef TARGET_ASM_UNALIGNED_SI_OP
270 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
272 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
273 #undef TARGET_ASM_UNALIGNED_DI_OP
274 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
275 #undef TARGET_ASM_ALIGNED_DI_OP
276 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
278 #undef TARGET_ASM_FUNCTION_EPILOGUE
279 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
281 #undef TARGET_ASM_OUTPUT_MI_THUNK
282 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
284 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
285 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
287 #undef TARGET_ASM_FILE_START
288 #define TARGET_ASM_FILE_START sh_file_start
289 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
290 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
292 #undef TARGET_DEFAULT_TARGET_FLAGS
293 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
294 #undef TARGET_HANDLE_OPTION
295 #define TARGET_HANDLE_OPTION sh_handle_option
297 #undef TARGET_INSERT_ATTRIBUTES
298 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
300 #undef TARGET_SCHED_ADJUST_COST
301 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
303 #undef TARGET_SCHED_ISSUE_RATE
304 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
306 /* The next 5 hooks have been implemented for reenabling sched1. With the
307 help of these macros we are limiting the movement of insns in sched1 to
308 reduce the register pressure. The overall idea is to keep count of SImode
309 and SFmode regs required by already scheduled insns. When these counts
310 cross some threshold values; give priority to insns that free registers.
311 The insn that frees registers is most likely to be the insn with lowest
312 LUID (original insn order); but such an insn might be there in the stalled
313 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
314 upto a max of 8 cycles so that such insns may move from Q -> R.
316 The description of the hooks are as below:
318 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
319 scheduler; it is called inside the sched_init function just after
320 find_insn_reg_weights function call. It is used to calculate the SImode
321 and SFmode weights of insns of basic blocks; much similar to what
322 find_insn_reg_weights does.
323 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
325 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
326 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
327 (Q)->(R).
329 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
330 high; reorder the ready queue so that the insn with lowest LUID will be
331 issued next.
333 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
334 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
336 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
337 can be returned from TARGET_SCHED_REORDER2.
339 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
341 #undef TARGET_SCHED_DFA_NEW_CYCLE
342 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
344 #undef TARGET_SCHED_INIT_GLOBAL
345 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
347 #undef TARGET_SCHED_FINISH_GLOBAL
348 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
350 #undef TARGET_SCHED_VARIABLE_ISSUE
351 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
353 #undef TARGET_SCHED_REORDER
354 #define TARGET_SCHED_REORDER sh_reorder
356 #undef TARGET_SCHED_REORDER2
357 #define TARGET_SCHED_REORDER2 sh_reorder2
359 #undef TARGET_SCHED_INIT
360 #define TARGET_SCHED_INIT sh_md_init
362 #undef TARGET_CANNOT_MODIFY_JUMPS_P
363 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
364 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
365 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
366 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
367 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
368 sh_optimize_target_register_callee_saved
370 #undef TARGET_MS_BITFIELD_LAYOUT_P
371 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
373 #undef TARGET_INIT_BUILTINS
374 #define TARGET_INIT_BUILTINS sh_init_builtins
375 #undef TARGET_EXPAND_BUILTIN
376 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
378 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
379 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
381 #undef TARGET_CANNOT_COPY_INSN_P
382 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
383 #undef TARGET_RTX_COSTS
384 #define TARGET_RTX_COSTS sh_rtx_costs
385 #undef TARGET_ADDRESS_COST
386 #define TARGET_ADDRESS_COST sh_address_cost
387 #undef TARGET_ALLOCATE_INITIAL_VALUE
388 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
390 #undef TARGET_MACHINE_DEPENDENT_REORG
391 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
393 #ifdef HAVE_AS_TLS
394 #undef TARGET_HAVE_TLS
395 #define TARGET_HAVE_TLS true
396 #endif
398 #undef TARGET_PROMOTE_PROTOTYPES
399 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
400 #undef TARGET_PROMOTE_FUNCTION_ARGS
401 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
402 #undef TARGET_PROMOTE_FUNCTION_RETURN
403 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
405 #undef TARGET_STRUCT_VALUE_RTX
406 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
407 #undef TARGET_RETURN_IN_MEMORY
408 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
410 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
411 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
412 #undef TARGET_SETUP_INCOMING_VARARGS
413 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
414 #undef TARGET_STRICT_ARGUMENT_NAMING
415 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
416 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
417 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
418 #undef TARGET_MUST_PASS_IN_STACK
419 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
420 #undef TARGET_PASS_BY_REFERENCE
421 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
422 #undef TARGET_CALLEE_COPIES
423 #define TARGET_CALLEE_COPIES sh_callee_copies
424 #undef TARGET_ARG_PARTIAL_BYTES
425 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
427 #undef TARGET_BUILD_BUILTIN_VA_LIST
428 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
429 #undef TARGET_EXPAND_BUILTIN_VA_START
430 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
431 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
432 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
434 #undef TARGET_VECTOR_MODE_SUPPORTED_P
435 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
437 #undef TARGET_CHECK_PCH_TARGET_FLAGS
438 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
440 #undef TARGET_DWARF_CALLING_CONVENTION
441 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
443 /* Return regmode weight for insn. */
444 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
446 /* Return current register pressure for regmode. */
447 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
449 #ifdef SYMBIAN
451 #undef TARGET_ENCODE_SECTION_INFO
452 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
453 #undef TARGET_STRIP_NAME_ENCODING
454 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
455 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
456 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
458 #endif /* SYMBIAN */
460 #undef TARGET_SECONDARY_RELOAD
461 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
463 struct gcc_target targetm = TARGET_INITIALIZER;
465 /* Implement TARGET_HANDLE_OPTION. */
467 static bool
468 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
469 int value ATTRIBUTE_UNUSED)
471 switch (code)
473 case OPT_m1:
474 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
475 return true;
477 case OPT_m2:
478 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
479 return true;
481 case OPT_m2a:
482 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
483 return true;
485 case OPT_m2a_nofpu:
486 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
487 return true;
489 case OPT_m2a_single:
490 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
491 return true;
493 case OPT_m2a_single_only:
494 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
495 return true;
497 case OPT_m2e:
498 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
499 return true;
501 case OPT_m3:
502 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
503 return true;
505 case OPT_m3e:
506 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
507 return true;
509 case OPT_m4:
510 case OPT_m4_100:
511 case OPT_m4_200:
512 case OPT_m4_300:
513 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
514 return true;
516 case OPT_m4_nofpu:
517 case OPT_m4_100_nofpu:
518 case OPT_m4_200_nofpu:
519 case OPT_m4_300_nofpu:
520 case OPT_m4_340:
521 case OPT_m4_400:
522 case OPT_m4_500:
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
524 return true;
526 case OPT_m4_single:
527 case OPT_m4_100_single:
528 case OPT_m4_200_single:
529 case OPT_m4_300_single:
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
531 return true;
533 case OPT_m4_single_only:
534 case OPT_m4_100_single_only:
535 case OPT_m4_200_single_only:
536 case OPT_m4_300_single_only:
537 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
538 return true;
540 case OPT_m4a:
541 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
542 return true;
544 case OPT_m4a_nofpu:
545 case OPT_m4al:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
547 return true;
549 case OPT_m4a_single:
550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
551 return true;
553 case OPT_m4a_single_only:
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
555 return true;
557 case OPT_m5_32media:
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
559 return true;
561 case OPT_m5_32media_nofpu:
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
563 return true;
565 case OPT_m5_64media:
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
567 return true;
569 case OPT_m5_64media_nofpu:
570 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
571 return true;
573 case OPT_m5_compact:
574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
575 return true;
577 case OPT_m5_compact_nofpu:
578 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
579 return true;
581 default:
582 return true;
586 /* Print the operand address in x to the stream. */
588 void
589 print_operand_address (FILE *stream, rtx x)
591 switch (GET_CODE (x))
593 case REG:
594 case SUBREG:
595 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
596 break;
598 case PLUS:
600 rtx base = XEXP (x, 0);
601 rtx index = XEXP (x, 1);
603 switch (GET_CODE (index))
605 case CONST_INT:
606 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
607 reg_names[true_regnum (base)]);
608 break;
610 case REG:
611 case SUBREG:
613 int base_num = true_regnum (base);
614 int index_num = true_regnum (index);
616 fprintf (stream, "@(r0,%s)",
617 reg_names[MAX (base_num, index_num)]);
618 break;
621 default:
622 gcc_unreachable ();
625 break;
627 case PRE_DEC:
628 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
629 break;
631 case POST_INC:
632 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
633 break;
635 default:
636 x = mark_constant_pool_use (x);
637 output_addr_const (stream, x);
638 break;
642 /* Print operand x (an rtx) in assembler syntax to file stream
643 according to modifier code.
645 '.' print a .s if insn needs delay slot
646 ',' print LOCAL_LABEL_PREFIX
647 '@' print trap, rte or rts depending upon pragma interruptness
648 '#' output a nop if there is nothing to put in the delay slot
649 ''' print likelihood suffix (/u for unlikely).
650 '>' print branch target if -fverbose-asm
651 'O' print a constant without the #
652 'R' print the LSW of a dp value - changes if in little endian
653 'S' print the MSW of a dp value - changes if in little endian
654 'T' print the next word of a dp value - same as 'R' in big endian mode.
655 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
656 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
657 'N' print 'r63' if the operand is (const_int 0).
658 'd' print a V2SF reg as dN instead of fpN.
659 'm' print a pair `base,offset' or `base,index', for LD and ST.
660 'U' Likewise for {LD,ST}{HI,LO}.
661 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
662 'o' output an operator. */
664 void
665 print_operand (FILE *stream, rtx x, int code)
667 int regno;
668 enum machine_mode mode;
670 switch (code)
672 tree trapa_attr;
674 case '.':
675 if (final_sequence
676 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
677 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
678 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
679 break;
680 case ',':
681 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
682 break;
683 case '@':
684 trapa_attr = lookup_attribute ("trap_exit",
685 DECL_ATTRIBUTES (current_function_decl));
686 if (trapa_attr)
687 fprintf (stream, "trapa #%ld",
688 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
689 else if (sh_cfun_interrupt_handler_p ())
690 fprintf (stream, "rte");
691 else
692 fprintf (stream, "rts");
693 break;
694 case '#':
695 /* Output a nop if there's nothing in the delay slot. */
696 if (dbr_sequence_length () == 0)
697 fprintf (stream, "\n\tnop");
698 break;
699 case '\'':
701 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
703 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
704 fputs ("/u", stream);
705 break;
707 case '>':
708 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
710 fputs ("\t! target: ", stream);
711 output_addr_const (stream, JUMP_LABEL (current_output_insn));
713 break;
714 case 'O':
715 x = mark_constant_pool_use (x);
716 output_addr_const (stream, x);
717 break;
718 /* N.B.: %R / %S / %T adjust memory addresses by four.
719 For SHMEDIA, that means they can be used to access the first and
720 second 32 bit part of a 64 bit (or larger) value that
721 might be held in floating point registers or memory.
722 While they can be used to access 64 bit parts of a larger value
723 held in general purpose registers, that won't work with memory -
724 neither for fp registers, since the frxx names are used. */
725 case 'R':
726 if (REG_P (x) || GET_CODE (x) == SUBREG)
728 regno = true_regnum (x);
729 regno += FP_REGISTER_P (regno) ? 1 : LSW;
730 fputs (reg_names[regno], (stream));
732 else if (MEM_P (x))
734 x = adjust_address (x, SImode, 4 * LSW);
735 print_operand_address (stream, XEXP (x, 0));
737 else
739 rtx sub = NULL_RTX;
741 mode = GET_MODE (x);
742 if (mode == VOIDmode)
743 mode = DImode;
744 if (GET_MODE_SIZE (mode) >= 8)
745 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
746 if (sub)
747 print_operand (stream, sub, 0);
748 else
749 output_operand_lossage ("invalid operand to %%R");
751 break;
752 case 'S':
753 if (REG_P (x) || GET_CODE (x) == SUBREG)
755 regno = true_regnum (x);
756 regno += FP_REGISTER_P (regno) ? 0 : MSW;
757 fputs (reg_names[regno], (stream));
759 else if (MEM_P (x))
761 x = adjust_address (x, SImode, 4 * MSW);
762 print_operand_address (stream, XEXP (x, 0));
764 else
766 rtx sub = NULL_RTX;
768 mode = GET_MODE (x);
769 if (mode == VOIDmode)
770 mode = DImode;
771 if (GET_MODE_SIZE (mode) >= 8)
772 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
773 if (sub)
774 print_operand (stream, sub, 0);
775 else
776 output_operand_lossage ("invalid operand to %%S");
778 break;
779 case 'T':
780 /* Next word of a double. */
781 switch (GET_CODE (x))
783 case REG:
784 fputs (reg_names[REGNO (x) + 1], (stream));
785 break;
786 case MEM:
787 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
788 && GET_CODE (XEXP (x, 0)) != POST_INC)
789 x = adjust_address (x, SImode, 4);
790 print_operand_address (stream, XEXP (x, 0));
791 break;
792 default:
793 break;
795 break;
796 case 'o':
797 switch (GET_CODE (x))
799 case PLUS: fputs ("add", stream); break;
800 case MINUS: fputs ("sub", stream); break;
801 case MULT: fputs ("mul", stream); break;
802 case DIV: fputs ("div", stream); break;
803 case EQ: fputs ("eq", stream); break;
804 case NE: fputs ("ne", stream); break;
805 case GT: case LT: fputs ("gt", stream); break;
806 case GE: case LE: fputs ("ge", stream); break;
807 case GTU: case LTU: fputs ("gtu", stream); break;
808 case GEU: case LEU: fputs ("geu", stream); break;
809 default:
810 break;
812 break;
813 case 'M':
814 if (TARGET_SHMEDIA)
816 if (GET_CODE (x) == MEM
817 && GET_CODE (XEXP (x, 0)) == PLUS
818 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
819 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
820 fputc ('x', stream);
822 else
824 if (GET_CODE (x) == MEM)
826 switch (GET_MODE (x))
828 case QImode: fputs (".b", stream); break;
829 case HImode: fputs (".w", stream); break;
830 case SImode: fputs (".l", stream); break;
831 case SFmode: fputs (".s", stream); break;
832 case DFmode: fputs (".d", stream); break;
833 default: gcc_unreachable ();
837 break;
839 case 'm':
840 gcc_assert (GET_CODE (x) == MEM);
841 x = XEXP (x, 0);
842 /* Fall through. */
843 case 'U':
844 switch (GET_CODE (x))
846 case REG:
847 case SUBREG:
848 print_operand (stream, x, 0);
849 fputs (", 0", stream);
850 break;
852 case PLUS:
853 print_operand (stream, XEXP (x, 0), 0);
854 fputs (", ", stream);
855 print_operand (stream, XEXP (x, 1), 0);
856 break;
858 default:
859 gcc_unreachable ();
861 break;
863 case 'd':
864 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
866 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
867 break;
869 case 'N':
870 if (x == CONST0_RTX (GET_MODE (x)))
872 fprintf ((stream), "r63");
873 break;
875 goto default_output;
876 case 'u':
877 if (GET_CODE (x) == CONST_INT)
879 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
880 break;
882 /* Fall through. */
884 default_output:
885 default:
886 regno = 0;
887 mode = GET_MODE (x);
889 switch (GET_CODE (x))
891 case TRUNCATE:
893 rtx inner = XEXP (x, 0);
894 int offset = 0;
895 enum machine_mode inner_mode;
897 /* We might see SUBREGs with vector mode registers inside. */
898 if (GET_CODE (inner) == SUBREG
899 && (GET_MODE_SIZE (GET_MODE (inner))
900 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
901 && subreg_lowpart_p (inner))
902 inner = SUBREG_REG (inner);
903 if (GET_CODE (inner) == CONST_INT)
905 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
906 goto default_output;
908 inner_mode = GET_MODE (inner);
909 if (GET_CODE (inner) == SUBREG
910 && (GET_MODE_SIZE (GET_MODE (inner))
911 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
912 && GET_CODE (SUBREG_REG (inner)) == REG)
914 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
915 GET_MODE (SUBREG_REG (inner)),
916 SUBREG_BYTE (inner),
917 GET_MODE (inner));
918 inner = SUBREG_REG (inner);
920 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
921 abort ();
922 /* Floating point register pairs are always big endian;
923 general purpose registers are 64 bit wide. */
924 regno = REGNO (inner);
925 regno = (HARD_REGNO_NREGS (regno, inner_mode)
926 - HARD_REGNO_NREGS (regno, mode))
927 + offset;
928 x = inner;
929 goto reg;
931 case SIGN_EXTEND:
932 x = XEXP (x, 0);
933 goto reg;
934 /* FIXME: We need this on SHmedia32 because reload generates
935 some sign-extended HI or QI loads into DImode registers
936 but, because Pmode is SImode, the address ends up with a
937 subreg:SI of the DImode register. Maybe reload should be
938 fixed so as to apply alter_subreg to such loads? */
939 case IF_THEN_ELSE:
940 gcc_assert (trapping_target_operand (x, VOIDmode));
941 x = XEXP (XEXP (x, 2), 0);
942 goto default_output;
943 case SUBREG:
944 gcc_assert (SUBREG_BYTE (x) == 0
945 && GET_CODE (SUBREG_REG (x)) == REG);
947 x = SUBREG_REG (x);
948 /* Fall through. */
950 reg:
951 case REG:
952 regno += REGNO (x);
953 if (FP_REGISTER_P (regno)
954 && mode == V16SFmode)
955 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
956 else if (FP_REGISTER_P (REGNO (x))
957 && mode == V4SFmode)
958 fprintf ((stream), "fv%s", reg_names[regno] + 2);
959 else if (GET_CODE (x) == REG
960 && mode == V2SFmode)
961 fprintf ((stream), "fp%s", reg_names[regno] + 2);
962 else if (FP_REGISTER_P (REGNO (x))
963 && GET_MODE_SIZE (mode) > 4)
964 fprintf ((stream), "d%s", reg_names[regno] + 1);
965 else
966 fputs (reg_names[regno], (stream));
967 break;
969 case MEM:
970 output_address (XEXP (x, 0));
971 break;
973 case CONST:
974 if (TARGET_SHMEDIA
975 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
976 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
977 && (GET_MODE (XEXP (x, 0)) == DImode
978 || GET_MODE (XEXP (x, 0)) == SImode)
979 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
980 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
982 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
983 rtx val2 = val;
984 bool nested_expr = false;
986 fputc ('(', stream);
987 if (GET_CODE (val) == ASHIFTRT)
989 fputc ('(', stream);
990 val2 = XEXP (val, 0);
992 if (GET_CODE (val2) == CONST
993 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
995 fputc ('(', stream);
996 nested_expr = true;
998 output_addr_const (stream, val2);
999 if (nested_expr)
1000 fputc (')', stream);
1001 if (GET_CODE (val) == ASHIFTRT)
1003 fputs (" >> ", stream);
1004 output_addr_const (stream, XEXP (val, 1));
1005 fputc (')', stream);
1007 fputs (" & 65535)", stream);
1008 break;
1011 /* Fall through. */
1012 default:
1013 if (TARGET_SH1)
1014 fputc ('#', stream);
1015 output_addr_const (stream, x);
1016 break;
1018 break;
1022 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1023 static void
1024 force_into (rtx value, rtx target)
1026 value = force_operand (value, target);
1027 if (! rtx_equal_p (value, target))
1028 emit_insn (gen_move_insn (target, value));
1031 /* Emit code to perform a block move. Choose the best method.
1033 OPERANDS[0] is the destination.
1034 OPERANDS[1] is the source.
1035 OPERANDS[2] is the size.
1036 OPERANDS[3] is the alignment safe to use. */
1039 expand_block_move (rtx *operands)
1041 int align = INTVAL (operands[3]);
1042 int constp = (GET_CODE (operands[2]) == CONST_INT);
1043 int bytes = (constp ? INTVAL (operands[2]) : 0);
1045 if (! constp)
1046 return 0;
1048 /* If we could use mov.l to move words and dest is word-aligned, we
1049 can use movua.l for loads and still generate a relatively short
1050 and efficient sequence. */
1051 if (TARGET_SH4A_ARCH && align < 4
1052 && MEM_ALIGN (operands[0]) >= 32
1053 && can_move_by_pieces (bytes, 32))
1055 rtx dest = copy_rtx (operands[0]);
1056 rtx src = copy_rtx (operands[1]);
1057 /* We could use different pseudos for each copied word, but
1058 since movua can only load into r0, it's kind of
1059 pointless. */
1060 rtx temp = gen_reg_rtx (SImode);
1061 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1062 int copied = 0;
1064 while (copied + 4 <= bytes)
1066 rtx to = adjust_address (dest, SImode, copied);
1067 rtx from = adjust_automodify_address (src, BLKmode,
1068 src_addr, copied);
1070 set_mem_size (from, GEN_INT (4));
1071 emit_insn (gen_movua (temp, from));
1072 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1073 emit_move_insn (to, temp);
1074 copied += 4;
1077 if (copied < bytes)
1078 move_by_pieces (adjust_address (dest, BLKmode, copied),
1079 adjust_automodify_address (src, BLKmode,
1080 src_addr, copied),
1081 bytes - copied, align, 0);
1083 return 1;
1086 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1087 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1088 if (align < 4 || (bytes % 4 != 0))
1089 return 0;
1091 if (TARGET_HARD_SH4)
1093 if (bytes < 12)
1094 return 0;
1095 else if (bytes == 12)
1097 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1098 rtx r4 = gen_rtx_REG (SImode, 4);
1099 rtx r5 = gen_rtx_REG (SImode, 5);
1101 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1102 force_into (XEXP (operands[0], 0), r4);
1103 force_into (XEXP (operands[1], 0), r5);
1104 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1105 return 1;
1107 else if (! TARGET_SMALLCODE)
1109 const char *entry_name;
1110 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1111 int dwords;
1112 rtx r4 = gen_rtx_REG (SImode, 4);
1113 rtx r5 = gen_rtx_REG (SImode, 5);
1114 rtx r6 = gen_rtx_REG (SImode, 6);
1116 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1117 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1118 force_into (XEXP (operands[0], 0), r4);
1119 force_into (XEXP (operands[1], 0), r5);
1121 dwords = bytes >> 3;
1122 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1123 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1124 return 1;
1126 else
1127 return 0;
1129 if (bytes < 64)
1131 char entry[30];
1132 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1133 rtx r4 = gen_rtx_REG (SImode, 4);
1134 rtx r5 = gen_rtx_REG (SImode, 5);
1136 sprintf (entry, "__movmemSI%d", bytes);
1137 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1138 force_into (XEXP (operands[0], 0), r4);
1139 force_into (XEXP (operands[1], 0), r5);
1140 emit_insn (gen_block_move_real (func_addr_rtx));
1141 return 1;
1144 /* This is the same number of bytes as a memcpy call, but to a different
1145 less common function name, so this will occasionally use more space. */
1146 if (! TARGET_SMALLCODE)
1148 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1149 int final_switch, while_loop;
1150 rtx r4 = gen_rtx_REG (SImode, 4);
1151 rtx r5 = gen_rtx_REG (SImode, 5);
1152 rtx r6 = gen_rtx_REG (SImode, 6);
1154 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1155 force_into (XEXP (operands[0], 0), r4);
1156 force_into (XEXP (operands[1], 0), r5);
1158 /* r6 controls the size of the move. 16 is decremented from it
1159 for each 64 bytes moved. Then the negative bit left over is used
1160 as an index into a list of move instructions. e.g., a 72 byte move
1161 would be set up with size(r6) = 14, for one iteration through the
1162 big while loop, and a switch of -2 for the last part. */
1164 final_switch = 16 - ((bytes / 4) % 16);
1165 while_loop = ((bytes / 4) / 16 - 1) * 16;
1166 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1167 emit_insn (gen_block_lump_real (func_addr_rtx));
1168 return 1;
1171 return 0;
1174 /* Prepare operands for a move define_expand; specifically, one of the
1175 operands must be in a register. */
1178 prepare_move_operands (rtx operands[], enum machine_mode mode)
1180 if ((mode == SImode || mode == DImode)
1181 && flag_pic
1182 && ! ((mode == Pmode || mode == ptr_mode)
1183 && tls_symbolic_operand (operands[1], Pmode) != 0))
1185 rtx temp;
1186 if (SYMBOLIC_CONST_P (operands[1]))
1188 if (GET_CODE (operands[0]) == MEM)
1189 operands[1] = force_reg (Pmode, operands[1]);
1190 else if (TARGET_SHMEDIA
1191 && GET_CODE (operands[1]) == LABEL_REF
1192 && target_reg_operand (operands[0], mode))
1193 /* It's ok. */;
1194 else
1196 temp = (!can_create_pseudo_p ()
1197 ? operands[0]
1198 : gen_reg_rtx (Pmode));
1199 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1202 else if (GET_CODE (operands[1]) == CONST
1203 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1204 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1206 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1207 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1208 mode, temp);
1209 operands[1] = expand_binop (mode, add_optab, temp,
1210 XEXP (XEXP (operands[1], 0), 1),
1211 (!can_create_pseudo_p ()
1212 ? temp
1213 : gen_reg_rtx (Pmode)),
1214 0, OPTAB_LIB_WIDEN);
1218 if (! reload_in_progress && ! reload_completed)
1220 /* Copy the source to a register if both operands aren't registers. */
1221 if (! register_operand (operands[0], mode)
1222 && ! sh_register_operand (operands[1], mode))
1223 operands[1] = copy_to_mode_reg (mode, operands[1]);
1225 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1227 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1228 except that we can't use that function because it is static. */
1229 rtx new = change_address (operands[0], mode, 0);
1230 MEM_COPY_ATTRIBUTES (new, operands[0]);
1231 operands[0] = new;
1234 /* This case can happen while generating code to move the result
1235 of a library call to the target. Reject `st r0,@(rX,rY)' because
1236 reload will fail to find a spill register for rX, since r0 is already
1237 being used for the source. */
1238 else if (TARGET_SH1
1239 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1240 && GET_CODE (operands[0]) == MEM
1241 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1242 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1243 operands[1] = copy_to_mode_reg (mode, operands[1]);
1246 if (mode == Pmode || mode == ptr_mode)
1248 rtx op0, op1, opc;
1249 enum tls_model tls_kind;
1251 op0 = operands[0];
1252 op1 = operands[1];
1253 if (GET_CODE (op1) == CONST
1254 && GET_CODE (XEXP (op1, 0)) == PLUS
1255 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1257 opc = XEXP (XEXP (op1, 0), 1);
1258 op1 = XEXP (XEXP (op1, 0), 0);
1260 else
1261 opc = NULL_RTX;
1263 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1265 rtx tga_op1, tga_ret, tmp, tmp2;
1267 switch (tls_kind)
1269 case TLS_MODEL_GLOBAL_DYNAMIC:
1270 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1271 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1272 op1 = tga_ret;
1273 break;
1275 case TLS_MODEL_LOCAL_DYNAMIC:
1276 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1277 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1279 tmp = gen_reg_rtx (Pmode);
1280 emit_move_insn (tmp, tga_ret);
1282 if (register_operand (op0, Pmode))
1283 tmp2 = op0;
1284 else
1285 tmp2 = gen_reg_rtx (Pmode);
1287 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1288 op1 = tmp2;
1289 break;
1291 case TLS_MODEL_INITIAL_EXEC:
1292 if (! flag_pic)
1294 /* Don't schedule insns for getting GOT address when
1295 the first scheduling is enabled, to avoid spill
1296 failures for R0. */
1297 if (flag_schedule_insns)
1298 emit_insn (gen_blockage ());
1299 emit_insn (gen_GOTaddr2picreg ());
1300 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1301 PIC_REG)));
1302 if (flag_schedule_insns)
1303 emit_insn (gen_blockage ());
1305 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1306 tmp = gen_sym2GOTTPOFF (op1);
1307 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1308 op1 = tga_op1;
1309 break;
1311 case TLS_MODEL_LOCAL_EXEC:
1312 tmp2 = gen_reg_rtx (Pmode);
1313 emit_insn (gen_load_gbr (tmp2));
1314 tmp = gen_reg_rtx (Pmode);
1315 emit_insn (gen_symTPOFF2reg (tmp, op1));
1317 if (register_operand (op0, Pmode))
1318 op1 = op0;
1319 else
1320 op1 = gen_reg_rtx (Pmode);
1322 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1323 break;
1325 default:
1326 gcc_unreachable ();
1328 if (opc)
1329 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1330 operands[1] = op1;
1334 return 0;
1337 enum rtx_code
1338 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1339 enum rtx_code comparison)
1341 rtx op1;
1342 rtx scratch = NULL_RTX;
1344 if (comparison == CODE_FOR_nothing)
1345 comparison = GET_CODE (operands[0]);
1346 else
1347 scratch = operands[4];
1348 if (GET_CODE (operands[1]) == CONST_INT
1349 && GET_CODE (operands[2]) != CONST_INT)
1351 rtx tmp = operands[1];
1353 operands[1] = operands[2];
1354 operands[2] = tmp;
1355 comparison = swap_condition (comparison);
1357 if (GET_CODE (operands[2]) == CONST_INT)
1359 HOST_WIDE_INT val = INTVAL (operands[2]);
1360 if ((val == -1 || val == -0x81)
1361 && (comparison == GT || comparison == LE))
1363 comparison = (comparison == GT) ? GE : LT;
1364 operands[2] = gen_int_mode (val + 1, mode);
1366 else if ((val == 1 || val == 0x80)
1367 && (comparison == GE || comparison == LT))
1369 comparison = (comparison == GE) ? GT : LE;
1370 operands[2] = gen_int_mode (val - 1, mode);
1372 else if (val == 1 && (comparison == GEU || comparison == LTU))
1374 comparison = (comparison == GEU) ? NE : EQ;
1375 operands[2] = CONST0_RTX (mode);
1377 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1379 comparison = (comparison == GEU) ? GTU : LEU;
1380 operands[2] = gen_int_mode (val - 1, mode);
1382 else if (val == 0 && (comparison == GTU || comparison == LEU))
1383 comparison = (comparison == GTU) ? NE : EQ;
1384 else if (mode == SImode
1385 && ((val == 0x7fffffff
1386 && (comparison == GTU || comparison == LEU))
1387 || ((unsigned HOST_WIDE_INT) val
1388 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1389 && (comparison == GEU || comparison == LTU))))
1391 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1392 operands[2] = CONST0_RTX (mode);
1395 op1 = operands[1];
1396 if (can_create_pseudo_p ())
1397 operands[1] = force_reg (mode, op1);
1398 /* When we are handling DImode comparisons, we want to keep constants so
1399 that we can optimize the component comparisons; however, memory loads
1400 are better issued as a whole so that they can be scheduled well.
1401 SImode equality comparisons allow I08 constants, but only when they
1402 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1403 into a register, that register might as well be r0, and we allow the
1404 constant. If it is already in a register, this is likely to be
1405 allocated to a different hard register, thus we load the constant into
1406 a register unless it is zero. */
1407 if (!REG_P (operands[2])
1408 && (GET_CODE (operands[2]) != CONST_INT
1409 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1410 && ((comparison != EQ && comparison != NE)
1411 || (REG_P (op1) && REGNO (op1) != R0_REG)
1412 || !satisfies_constraint_I08 (operands[2])))))
1414 if (scratch && GET_MODE (scratch) == mode)
1416 emit_move_insn (scratch, operands[2]);
1417 operands[2] = scratch;
1419 else if (can_create_pseudo_p ())
1420 operands[2] = force_reg (mode, operands[2]);
1422 return comparison;
1425 void
1426 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1428 rtx (*branch_expander) (rtx) = gen_branch_true;
1429 rtx jump;
1431 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1432 switch (comparison)
1434 case NE: case LT: case LE: case LTU: case LEU:
1435 comparison = reverse_condition (comparison);
1436 branch_expander = gen_branch_false;
1437 default: ;
1439 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1440 gen_rtx_fmt_ee (comparison, SImode,
1441 operands[1], operands[2])));
1442 jump = emit_jump_insn (branch_expander (operands[3]));
1443 if (probability >= 0)
1444 REG_NOTES (jump)
1445 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1446 REG_NOTES (jump));
1450 /* ??? How should we distribute probabilities when more than one branch
1451 is generated. So far we only have soem ad-hoc observations:
1452 - If the operands are random, they are likely to differ in both parts.
1453 - If comparing items in a hash chain, the operands are random or equal;
1454 operation should be EQ or NE.
1455 - If items are searched in an ordered tree from the root, we can expect
1456 the highpart to be unequal about half of the time; operation should be
1457 an inequality comparison, operands non-constant, and overall probability
1458 about 50%. Likewise for quicksort.
1459 - Range checks will be often made against constants. Even if we assume for
1460 simplicity an even distribution of the non-constant operand over a
1461 sub-range here, the same probability could be generated with differently
1462 wide sub-ranges - as long as the ratio of the part of the subrange that
1463 is before the threshold to the part that comes after the threshold stays
1464 the same. Thus, we can't really tell anything here;
1465 assuming random distribution is at least simple.
1468 bool
1469 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1471 enum rtx_code msw_taken, msw_skip, lsw_taken;
1472 rtx skip_label = NULL_RTX;
1473 rtx op1h, op1l, op2h, op2l;
1474 int num_branches;
1475 int prob, rev_prob;
1476 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1477 rtx scratch = operands[4];
1479 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1480 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1481 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1482 op1l = gen_lowpart (SImode, operands[1]);
1483 op2l = gen_lowpart (SImode, operands[2]);
1484 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1485 prob = split_branch_probability;
1486 rev_prob = REG_BR_PROB_BASE - prob;
1487 switch (comparison)
1489 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1490 That costs 1 cycle more when the first branch can be predicted taken,
1491 but saves us mispredicts because only one branch needs prediction.
1492 It also enables generating the cmpeqdi_t-1 pattern. */
1493 case EQ:
1494 if (TARGET_CMPEQDI_T)
1496 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1497 emit_jump_insn (gen_branch_true (operands[3]));
1498 return true;
1500 msw_skip = NE;
1501 lsw_taken = EQ;
1502 if (prob >= 0)
1504 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1506 msw_skip_prob = rev_prob;
1507 if (REG_BR_PROB_BASE <= 65535)
1508 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1509 else
1511 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1512 lsw_taken_prob
1513 = (prob
1514 ? (REG_BR_PROB_BASE
1515 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1516 / ((HOST_WIDEST_INT) prob << 32)))
1517 : 0);
1520 break;
1521 case NE:
1522 if (TARGET_CMPEQDI_T)
1524 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1525 emit_jump_insn (gen_branch_false (operands[3]));
1526 return true;
1528 msw_taken = NE;
1529 msw_taken_prob = prob;
1530 lsw_taken = NE;
1531 lsw_taken_prob = 0;
1532 break;
1533 case GTU: case GT:
1534 msw_taken = comparison;
1535 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1536 break;
1537 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1538 msw_skip = swap_condition (msw_taken);
1539 lsw_taken = GTU;
1540 break;
1541 case GEU: case GE:
1542 if (op2l == CONST0_RTX (SImode))
1543 msw_taken = comparison;
1544 else
1546 msw_taken = comparison == GE ? GT : GTU;
1547 msw_skip = swap_condition (msw_taken);
1548 lsw_taken = GEU;
1550 break;
1551 case LTU: case LT:
1552 msw_taken = comparison;
1553 if (op2l == CONST0_RTX (SImode))
1554 break;
1555 msw_skip = swap_condition (msw_taken);
1556 lsw_taken = LTU;
1557 break;
1558 case LEU: case LE:
1559 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1560 msw_taken = comparison;
1561 else
1563 lsw_taken = LEU;
1564 if (comparison == LE)
1565 msw_taken = LT;
1566 else if (op2h != CONST0_RTX (SImode))
1567 msw_taken = LTU;
1568 else
1569 break;
1570 msw_skip = swap_condition (msw_taken);
1572 break;
1573 default: return false;
1575 num_branches = ((msw_taken != CODE_FOR_nothing)
1576 + (msw_skip != CODE_FOR_nothing)
1577 + (lsw_taken != CODE_FOR_nothing));
1578 if (comparison != EQ && comparison != NE && num_branches > 1)
1580 if (!CONSTANT_P (operands[2])
1581 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1582 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1584 msw_taken_prob = prob / 2U;
1585 msw_skip_prob
1586 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1587 lsw_taken_prob = prob;
1589 else
1591 msw_taken_prob = prob;
1592 msw_skip_prob = REG_BR_PROB_BASE;
1593 /* ??? If we have a constant op2h, should we use that when
1594 calculating lsw_taken_prob? */
1595 lsw_taken_prob = prob;
1598 operands[1] = op1h;
1599 operands[2] = op2h;
1600 operands[4] = NULL_RTX;
1601 if (reload_completed
1602 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1603 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1605 emit_move_insn (scratch, operands[2]);
1606 operands[2] = scratch;
1608 if (msw_taken != CODE_FOR_nothing)
1609 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1610 if (msw_skip != CODE_FOR_nothing)
1612 rtx taken_label = operands[3];
1614 operands[3] = skip_label = gen_label_rtx ();
1615 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1616 operands[3] = taken_label;
1618 operands[1] = op1l;
1619 operands[2] = op2l;
1620 if (lsw_taken != CODE_FOR_nothing)
1622 if (reload_completed
1623 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1624 operands[4] = scratch;
1625 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1627 if (msw_skip != CODE_FOR_nothing)
1628 emit_label (skip_label);
1629 return true;
1632 /* Prepare the operands for an scc instruction; make sure that the
1633 compare has been done. */
1635 prepare_scc_operands (enum rtx_code code)
1637 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1638 enum rtx_code oldcode = code;
1639 enum machine_mode mode;
1641 /* First need a compare insn. */
1642 switch (code)
1644 case NE:
1645 /* It isn't possible to handle this case. */
1646 gcc_unreachable ();
1647 case LT:
1648 code = GT;
1649 break;
1650 case LE:
1651 code = GE;
1652 break;
1653 case LTU:
1654 code = GTU;
1655 break;
1656 case LEU:
1657 code = GEU;
1658 break;
1659 default:
1660 break;
1662 if (code != oldcode)
1664 rtx tmp = sh_compare_op0;
1665 sh_compare_op0 = sh_compare_op1;
1666 sh_compare_op1 = tmp;
1669 mode = GET_MODE (sh_compare_op0);
1670 if (mode == VOIDmode)
1671 mode = GET_MODE (sh_compare_op1);
1673 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1674 if ((code != EQ && code != NE
1675 && (sh_compare_op1 != const0_rtx
1676 || code == GTU || code == GEU || code == LTU || code == LEU))
1677 || (mode == DImode && sh_compare_op1 != const0_rtx)
1678 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1679 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1681 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1682 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1683 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1684 gen_rtx_SET (VOIDmode, t_reg,
1685 gen_rtx_fmt_ee (code, SImode,
1686 sh_compare_op0, sh_compare_op1)),
1687 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1688 else
1689 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1690 gen_rtx_fmt_ee (code, SImode,
1691 sh_compare_op0, sh_compare_op1)));
1693 return t_reg;
1696 /* Called from the md file, set up the operands of a compare instruction. */
1698 void
1699 from_compare (rtx *operands, int code)
1701 enum machine_mode mode = GET_MODE (sh_compare_op0);
1702 rtx insn;
1703 if (mode == VOIDmode)
1704 mode = GET_MODE (sh_compare_op1);
1705 if (code != EQ
1706 || mode == DImode
1707 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1709 /* Force args into regs, since we can't use constants here. */
1710 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1711 if (sh_compare_op1 != const0_rtx
1712 || code == GTU || code == GEU
1713 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1714 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1716 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1718 from_compare (operands, GT);
1719 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1721 else
1722 insn = gen_rtx_SET (VOIDmode,
1723 gen_rtx_REG (SImode, T_REG),
1724 gen_rtx_fmt_ee (code, SImode,
1725 sh_compare_op0, sh_compare_op1));
1726 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1728 insn = gen_rtx_PARALLEL (VOIDmode,
1729 gen_rtvec (2, insn,
1730 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1731 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1733 else
1734 emit_insn (insn);
1737 /* Functions to output assembly code. */
1739 /* Return a sequence of instructions to perform DI or DF move.
1741 Since the SH cannot move a DI or DF in one instruction, we have
1742 to take care when we see overlapping source and dest registers. */
1744 const char *
1745 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1746 enum machine_mode mode)
1748 rtx dst = operands[0];
1749 rtx src = operands[1];
1751 if (GET_CODE (dst) == MEM
1752 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1753 return "mov.l %T1,%0\n\tmov.l %1,%0";
1755 if (register_operand (dst, mode)
1756 && register_operand (src, mode))
1758 if (REGNO (src) == MACH_REG)
1759 return "sts mach,%S0\n\tsts macl,%R0";
1761 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1762 when mov.d r1,r0 do r1->r0 then r2->r1. */
1764 if (REGNO (src) + 1 == REGNO (dst))
1765 return "mov %T1,%T0\n\tmov %1,%0";
1766 else
1767 return "mov %1,%0\n\tmov %T1,%T0";
1769 else if (GET_CODE (src) == CONST_INT)
1771 if (INTVAL (src) < 0)
1772 output_asm_insn ("mov #-1,%S0", operands);
1773 else
1774 output_asm_insn ("mov #0,%S0", operands);
1776 return "mov %1,%R0";
1778 else if (GET_CODE (src) == MEM)
1780 int ptrreg = -1;
1781 int dreg = REGNO (dst);
1782 rtx inside = XEXP (src, 0);
1784 switch (GET_CODE (inside))
1786 case REG:
1787 ptrreg = REGNO (inside);
1788 break;
1790 case SUBREG:
1791 ptrreg = subreg_regno (inside);
1792 break;
1794 case PLUS:
1795 ptrreg = REGNO (XEXP (inside, 0));
1796 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1797 an offsettable address. Unfortunately, offsettable addresses use
1798 QImode to check the offset, and a QImode offsettable address
1799 requires r0 for the other operand, which is not currently
1800 supported, so we can't use the 'o' constraint.
1801 Thus we must check for and handle r0+REG addresses here.
1802 We punt for now, since this is likely very rare. */
1803 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1804 break;
1806 case LABEL_REF:
1807 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1808 case POST_INC:
1809 return "mov.l %1,%0\n\tmov.l %1,%T0";
1810 default:
1811 gcc_unreachable ();
1814 /* Work out the safe way to copy. Copy into the second half first. */
1815 if (dreg == ptrreg)
1816 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1819 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1822 /* Print an instruction which would have gone into a delay slot after
1823 another instruction, but couldn't because the other instruction expanded
1824 into a sequence where putting the slot insn at the end wouldn't work. */
1826 static void
1827 print_slot (rtx insn)
1829 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1831 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1834 const char *
1835 output_far_jump (rtx insn, rtx op)
1837 struct { rtx lab, reg, op; } this;
1838 rtx braf_base_lab = NULL_RTX;
1839 const char *jump;
1840 int far;
1841 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1842 rtx prev;
1844 this.lab = gen_label_rtx ();
1846 if (TARGET_SH2
1847 && offset >= -32764
1848 && offset - get_attr_length (insn) <= 32766)
1850 far = 0;
1851 jump = "mov.w %O0,%1; braf %1";
1853 else
1855 far = 1;
1856 if (flag_pic)
1858 if (TARGET_SH2)
1859 jump = "mov.l %O0,%1; braf %1";
1860 else
1861 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1863 else
1864 jump = "mov.l %O0,%1; jmp @%1";
1866 /* If we have a scratch register available, use it. */
1867 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1868 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1870 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1871 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1872 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1873 output_asm_insn (jump, &this.lab);
1874 if (dbr_sequence_length ())
1875 print_slot (final_sequence);
1876 else
1877 output_asm_insn ("nop", 0);
1879 else
1881 /* Output the delay slot insn first if any. */
1882 if (dbr_sequence_length ())
1883 print_slot (final_sequence);
1885 this.reg = gen_rtx_REG (SImode, 13);
1886 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1887 Fortunately, MACL is fixed and call-clobbered, and we never
1888 need its value across jumps, so save r13 in it instead of in
1889 the stack. */
1890 if (TARGET_SH5)
1891 output_asm_insn ("lds r13, macl", 0);
1892 else
1893 output_asm_insn ("mov.l r13,@-r15", 0);
1894 output_asm_insn (jump, &this.lab);
1895 if (TARGET_SH5)
1896 output_asm_insn ("sts macl, r13", 0);
1897 else
1898 output_asm_insn ("mov.l @r15+,r13", 0);
1900 if (far && flag_pic && TARGET_SH2)
1902 braf_base_lab = gen_label_rtx ();
1903 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1904 CODE_LABEL_NUMBER (braf_base_lab));
1906 if (far)
1907 output_asm_insn (".align 2", 0);
1908 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1909 this.op = op;
1910 if (far && flag_pic)
1912 if (TARGET_SH2)
1913 this.lab = braf_base_lab;
1914 output_asm_insn (".long %O2-%O0", &this.lab);
1916 else
1917 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1918 return "";
1921 /* Local label counter, used for constants in the pool and inside
1922 pattern branches. */
1924 static int lf = 100;
1926 /* Output code for ordinary branches. */
1928 const char *
1929 output_branch (int logic, rtx insn, rtx *operands)
1931 switch (get_attr_length (insn))
1933 case 6:
1934 /* This can happen if filling the delay slot has caused a forward
1935 branch to exceed its range (we could reverse it, but only
1936 when we know we won't overextend other branches; this should
1937 best be handled by relaxation).
1938 It can also happen when other condbranches hoist delay slot insn
1939 from their destination, thus leading to code size increase.
1940 But the branch will still be in the range -4092..+4098 bytes. */
1942 if (! TARGET_RELAX)
1944 int label = lf++;
1945 /* The call to print_slot will clobber the operands. */
1946 rtx op0 = operands[0];
1948 /* If the instruction in the delay slot is annulled (true), then
1949 there is no delay slot where we can put it now. The only safe
1950 place for it is after the label. final will do that by default. */
1952 if (final_sequence
1953 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1954 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1956 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1957 ASSEMBLER_DIALECT ? "/" : ".", label);
1958 print_slot (final_sequence);
1960 else
1961 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1963 output_asm_insn ("bra\t%l0", &op0);
1964 fprintf (asm_out_file, "\tnop\n");
1965 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1967 return "";
1969 /* When relaxing, handle this like a short branch. The linker
1970 will fix it up if it still doesn't fit after relaxation. */
1971 case 2:
1972 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1974 /* These are for SH2e, in which we have to account for the
1975 extra nop because of the hardware bug in annulled branches. */
1976 case 8:
1977 if (! TARGET_RELAX)
1979 int label = lf++;
1981 gcc_assert (!final_sequence
1982 || !(INSN_ANNULLED_BRANCH_P
1983 (XVECEXP (final_sequence, 0, 0))));
1984 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1985 logic ? "f" : "t",
1986 ASSEMBLER_DIALECT ? "/" : ".", label);
1987 fprintf (asm_out_file, "\tnop\n");
1988 output_asm_insn ("bra\t%l0", operands);
1989 fprintf (asm_out_file, "\tnop\n");
1990 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1992 return "";
1994 /* When relaxing, fall through. */
1995 case 4:
1997 char buffer[10];
1999 sprintf (buffer, "b%s%ss\t%%l0",
2000 logic ? "t" : "f",
2001 ASSEMBLER_DIALECT ? "/" : ".");
2002 output_asm_insn (buffer, &operands[0]);
2003 return "nop";
2006 default:
2007 /* There should be no longer branches now - that would
2008 indicate that something has destroyed the branches set
2009 up in machine_dependent_reorg. */
2010 gcc_unreachable ();
2014 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2015 fill in operands 9 as a label to the successor insn.
2016 We try to use jump threading where possible.
2017 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2018 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2019 follow jmp and bt, if the address is in range. */
2020 const char *
2021 output_branchy_insn (enum rtx_code code, const char *template,
2022 rtx insn, rtx *operands)
2024 rtx next_insn = NEXT_INSN (insn);
2026 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2028 rtx src = SET_SRC (PATTERN (next_insn));
2029 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2031 /* Following branch not taken */
2032 operands[9] = gen_label_rtx ();
2033 emit_label_after (operands[9], next_insn);
2034 INSN_ADDRESSES_NEW (operands[9],
2035 INSN_ADDRESSES (INSN_UID (next_insn))
2036 + get_attr_length (next_insn));
2037 return template;
2039 else
2041 int offset = (branch_dest (next_insn)
2042 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2043 if (offset >= -252 && offset <= 258)
2045 if (GET_CODE (src) == IF_THEN_ELSE)
2046 /* branch_true */
2047 src = XEXP (src, 1);
2048 operands[9] = src;
2049 return template;
2053 operands[9] = gen_label_rtx ();
2054 emit_label_after (operands[9], insn);
2055 INSN_ADDRESSES_NEW (operands[9],
2056 INSN_ADDRESSES (INSN_UID (insn))
2057 + get_attr_length (insn));
2058 return template;
2061 const char *
2062 output_ieee_ccmpeq (rtx insn, rtx *operands)
2064 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2065 insn, operands);
2068 /* Output the start of the assembler file. */
2070 static void
2071 sh_file_start (void)
2073 default_file_start ();
2075 #ifdef SYMBIAN
2076 /* Declare the .directive section before it is used. */
2077 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2078 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2079 #endif
2081 if (TARGET_ELF)
2082 /* We need to show the text section with the proper
2083 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2084 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2085 will complain. We can teach GAS specifically about the
2086 default attributes for our choice of text section, but
2087 then we would have to change GAS again if/when we change
2088 the text section name. */
2089 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2090 else
2091 /* Switch to the data section so that the coffsem symbol
2092 isn't in the text section. */
2093 switch_to_section (data_section);
2095 if (TARGET_LITTLE_ENDIAN)
2096 fputs ("\t.little\n", asm_out_file);
2098 if (!TARGET_ELF)
2100 if (TARGET_SHCOMPACT)
2101 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2102 else if (TARGET_SHMEDIA)
2103 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2104 TARGET_SHMEDIA64 ? 64 : 32);
2108 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2110 static bool
2111 unspec_caller_rtx_p (rtx pat)
2113 switch (GET_CODE (pat))
2115 case CONST:
2116 return unspec_caller_rtx_p (XEXP (pat, 0));
2117 case PLUS:
2118 case MINUS:
2119 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2120 return true;
2121 return unspec_caller_rtx_p (XEXP (pat, 1));
2122 case UNSPEC:
2123 if (XINT (pat, 1) == UNSPEC_CALLER)
2124 return true;
2125 default:
2126 break;
2129 return false;
2132 /* Indicate that INSN cannot be duplicated. This is true for insn
2133 that generates a unique label. */
2135 static bool
2136 sh_cannot_copy_insn_p (rtx insn)
2138 rtx pat;
2140 if (!reload_completed || !flag_pic)
2141 return false;
2143 if (GET_CODE (insn) != INSN)
2144 return false;
2145 if (asm_noperands (insn) >= 0)
2146 return false;
2148 pat = PATTERN (insn);
2149 if (GET_CODE (pat) != SET)
2150 return false;
2151 pat = SET_SRC (pat);
2153 if (unspec_caller_rtx_p (pat))
2154 return true;
2156 return false;
2159 /* Actual number of instructions used to make a shift by N. */
2160 static const char ashiftrt_insns[] =
2161 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2163 /* Left shift and logical right shift are the same. */
2164 static const char shift_insns[] =
2165 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2167 /* Individual shift amounts needed to get the above length sequences.
2168 One bit right shifts clobber the T bit, so when possible, put one bit
2169 shifts in the middle of the sequence, so the ends are eligible for
2170 branch delay slots. */
2171 static const short shift_amounts[32][5] = {
2172 {0}, {1}, {2}, {2, 1},
2173 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2174 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2175 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2176 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2177 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2178 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2179 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2181 /* Likewise, but for shift amounts < 16, up to three highmost bits
2182 might be clobbered. This is typically used when combined with some
2183 kind of sign or zero extension. */
2185 static const char ext_shift_insns[] =
2186 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2188 static const short ext_shift_amounts[32][4] = {
2189 {0}, {1}, {2}, {2, 1},
2190 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2191 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2192 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2193 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2194 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2195 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2196 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2198 /* Assuming we have a value that has been sign-extended by at least one bit,
2199 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2200 to shift it by N without data loss, and quicker than by other means? */
2201 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2203 /* This is used in length attributes in sh.md to help compute the length
2204 of arbitrary constant shift instructions. */
2207 shift_insns_rtx (rtx insn)
2209 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2210 int shift_count = INTVAL (XEXP (set_src, 1));
2211 enum rtx_code shift_code = GET_CODE (set_src);
2213 switch (shift_code)
2215 case ASHIFTRT:
2216 return ashiftrt_insns[shift_count];
2217 case LSHIFTRT:
2218 case ASHIFT:
2219 return shift_insns[shift_count];
2220 default:
2221 gcc_unreachable ();
2225 /* Return the cost of a shift. */
2227 static inline int
2228 shiftcosts (rtx x)
2230 int value;
2232 if (TARGET_SHMEDIA)
2233 return 1;
2235 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2237 if (GET_MODE (x) == DImode
2238 && GET_CODE (XEXP (x, 1)) == CONST_INT
2239 && INTVAL (XEXP (x, 1)) == 1)
2240 return 2;
2242 /* Everything else is invalid, because there is no pattern for it. */
2243 return MAX_COST;
2245 /* If shift by a non constant, then this will be expensive. */
2246 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2247 return SH_DYNAMIC_SHIFT_COST;
2249 value = INTVAL (XEXP (x, 1));
2251 /* Otherwise, return the true cost in instructions. */
2252 if (GET_CODE (x) == ASHIFTRT)
2254 int cost = ashiftrt_insns[value];
2255 /* If SH3, then we put the constant in a reg and use shad. */
2256 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2257 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2258 return cost;
2260 else
2261 return shift_insns[value];
2264 /* Return the cost of an AND operation. */
2266 static inline int
2267 andcosts (rtx x)
2269 int i;
2271 /* Anding with a register is a single cycle and instruction. */
2272 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2273 return 1;
2275 i = INTVAL (XEXP (x, 1));
2277 if (TARGET_SHMEDIA)
2279 if (satisfies_constraint_I10 (XEXP (x, 1))
2280 || satisfies_constraint_J16 (XEXP (x, 1)))
2281 return 1;
2282 else
2283 return 1 + rtx_cost (XEXP (x, 1), AND);
2286 /* These constants are single cycle extu.[bw] instructions. */
2287 if (i == 0xff || i == 0xffff)
2288 return 1;
2289 /* Constants that can be used in an and immediate instruction in a single
2290 cycle, but this requires r0, so make it a little more expensive. */
2291 if (CONST_OK_FOR_K08 (i))
2292 return 2;
2293 /* Constants that can be loaded with a mov immediate and an and.
2294 This case is probably unnecessary. */
2295 if (CONST_OK_FOR_I08 (i))
2296 return 2;
2297 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2298 This case is probably unnecessary. */
2299 return 3;
2302 /* Return the cost of an addition or a subtraction. */
2304 static inline int
2305 addsubcosts (rtx x)
2307 /* Adding a register is a single cycle insn. */
2308 if (GET_CODE (XEXP (x, 1)) == REG
2309 || GET_CODE (XEXP (x, 1)) == SUBREG)
2310 return 1;
2312 /* Likewise for small constants. */
2313 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2314 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2315 return 1;
2317 if (TARGET_SHMEDIA)
2318 switch (GET_CODE (XEXP (x, 1)))
2320 case CONST:
2321 case LABEL_REF:
2322 case SYMBOL_REF:
2323 return TARGET_SHMEDIA64 ? 5 : 3;
2325 case CONST_INT:
2326 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2327 return 2;
2328 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2329 return 3;
2330 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2331 return 4;
2333 /* Fall through. */
2334 default:
2335 return 5;
2338 /* Any other constant requires a 2 cycle pc-relative load plus an
2339 addition. */
2340 return 3;
2343 /* Return the cost of a multiply. */
2344 static inline int
2345 multcosts (rtx x ATTRIBUTE_UNUSED)
2347 if (sh_multcost >= 0)
2348 return sh_multcost;
2349 if (TARGET_SHMEDIA)
2350 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2351 accept constants. Ideally, we would use a cost of one or two and
2352 add the cost of the operand, but disregard the latter when inside loops
2353 and loop invariant code motion is still to follow.
2354 Using a multiply first and splitting it later if it's a loss
2355 doesn't work because of different sign / zero extension semantics
2356 of multiplies vs. shifts. */
2357 return TARGET_SMALLCODE ? 2 : 3;
2359 if (TARGET_SH2)
2361 /* We have a mul insn, so we can never take more than the mul and the
2362 read of the mac reg, but count more because of the latency and extra
2363 reg usage. */
2364 if (TARGET_SMALLCODE)
2365 return 2;
2366 return 3;
2369 /* If we're aiming at small code, then just count the number of
2370 insns in a multiply call sequence. */
2371 if (TARGET_SMALLCODE)
2372 return 5;
2374 /* Otherwise count all the insns in the routine we'd be calling too. */
2375 return 20;
2378 /* Compute a (partial) cost for rtx X. Return true if the complete
2379 cost has been computed, and false if subexpressions should be
2380 scanned. In either case, *TOTAL contains the cost result. */
2382 static bool
2383 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2385 switch (code)
2387 case CONST_INT:
2388 if (TARGET_SHMEDIA)
2390 if (INTVAL (x) == 0)
2391 *total = 0;
2392 else if (outer_code == AND && and_operand ((x), DImode))
2393 *total = 0;
2394 else if ((outer_code == IOR || outer_code == XOR
2395 || outer_code == PLUS)
2396 && CONST_OK_FOR_I10 (INTVAL (x)))
2397 *total = 0;
2398 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2399 *total = COSTS_N_INSNS (outer_code != SET);
2400 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2401 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2402 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2403 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2404 else
2405 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2406 return true;
2408 if (CONST_OK_FOR_I08 (INTVAL (x)))
2409 *total = 0;
2410 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2411 && CONST_OK_FOR_K08 (INTVAL (x)))
2412 *total = 1;
2413 /* prepare_cmp_insn will force costly constants int registers before
2414 the cbranch[sd]i4 patterns can see them, so preserve potentially
2415 interesting ones not covered by I08 above. */
2416 else if (outer_code == COMPARE
2417 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2418 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2419 || INTVAL (x) == 0x7fffffff
2420 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2421 *total = 1;
2422 else
2423 *total = 8;
2424 return true;
2426 case CONST:
2427 case LABEL_REF:
2428 case SYMBOL_REF:
2429 if (TARGET_SHMEDIA64)
2430 *total = COSTS_N_INSNS (4);
2431 else if (TARGET_SHMEDIA32)
2432 *total = COSTS_N_INSNS (2);
2433 else
2434 *total = 5;
2435 return true;
2437 case CONST_DOUBLE:
2438 if (TARGET_SHMEDIA)
2439 *total = COSTS_N_INSNS (4);
2440 /* prepare_cmp_insn will force costly constants int registers before
2441 the cbranchdi4 pattern can see them, so preserve potentially
2442 interesting ones. */
2443 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2444 *total = 1;
2445 else
2446 *total = 10;
2447 return true;
2448 case CONST_VECTOR:
2449 if (x == CONST0_RTX (GET_MODE (x)))
2450 *total = 0;
2451 else if (sh_1el_vec (x, VOIDmode))
2452 *total = outer_code != SET;
2453 if (sh_rep_vec (x, VOIDmode))
2454 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2455 + (outer_code != SET));
2456 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2457 return true;
2459 case PLUS:
2460 case MINUS:
2461 *total = COSTS_N_INSNS (addsubcosts (x));
2462 return true;
2464 case AND:
2465 *total = COSTS_N_INSNS (andcosts (x));
2466 return true;
2468 case MULT:
2469 *total = COSTS_N_INSNS (multcosts (x));
2470 return true;
2472 case ASHIFT:
2473 case ASHIFTRT:
2474 case LSHIFTRT:
2475 *total = COSTS_N_INSNS (shiftcosts (x));
2476 return true;
2478 case DIV:
2479 case UDIV:
2480 case MOD:
2481 case UMOD:
2482 *total = COSTS_N_INSNS (20);
2483 return true;
2485 case PARALLEL:
2486 if (sh_1el_vec (x, VOIDmode))
2487 *total = outer_code != SET;
2488 if (sh_rep_vec (x, VOIDmode))
2489 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2490 + (outer_code != SET));
2491 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2492 return true;
2494 case FLOAT:
2495 case FIX:
2496 *total = 100;
2497 return true;
2499 default:
2500 return false;
2504 /* Compute the cost of an address. For the SH, all valid addresses are
2505 the same cost. Use a slightly higher cost for reg + reg addressing,
2506 since it increases pressure on r0. */
2508 static int
2509 sh_address_cost (rtx X)
2511 return (GET_CODE (X) == PLUS
2512 && ! CONSTANT_P (XEXP (X, 1))
2513 && ! TARGET_SHMEDIA ? 1 : 0);
2516 /* Code to expand a shift. */
2518 void
2519 gen_ashift (int type, int n, rtx reg)
2521 /* Negative values here come from the shift_amounts array. */
2522 if (n < 0)
2524 if (type == ASHIFT)
2525 type = LSHIFTRT;
2526 else
2527 type = ASHIFT;
2528 n = -n;
2531 switch (type)
2533 case ASHIFTRT:
2534 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2535 break;
2536 case LSHIFTRT:
2537 if (n == 1)
2538 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2539 else
2540 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2541 break;
2542 case ASHIFT:
2543 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2544 break;
2548 /* Same for HImode */
2550 void
2551 gen_ashift_hi (int type, int n, rtx reg)
2553 /* Negative values here come from the shift_amounts array. */
2554 if (n < 0)
2556 if (type == ASHIFT)
2557 type = LSHIFTRT;
2558 else
2559 type = ASHIFT;
2560 n = -n;
2563 switch (type)
2565 case ASHIFTRT:
2566 case LSHIFTRT:
2567 /* We don't have HImode right shift operations because using the
2568 ordinary 32 bit shift instructions for that doesn't generate proper
2569 zero/sign extension.
2570 gen_ashift_hi is only called in contexts where we know that the
2571 sign extension works out correctly. */
2573 int offset = 0;
2574 if (GET_CODE (reg) == SUBREG)
2576 offset = SUBREG_BYTE (reg);
2577 reg = SUBREG_REG (reg);
2579 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2580 break;
2582 case ASHIFT:
2583 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2584 break;
2588 /* Output RTL to split a constant shift into its component SH constant
2589 shift instructions. */
2591 void
2592 gen_shifty_op (int code, rtx *operands)
2594 int value = INTVAL (operands[2]);
2595 int max, i;
2597 /* Truncate the shift count in case it is out of bounds. */
2598 value = value & 0x1f;
2600 if (value == 31)
2602 if (code == LSHIFTRT)
2604 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2605 emit_insn (gen_movt (operands[0]));
2606 return;
2608 else if (code == ASHIFT)
2610 /* There is a two instruction sequence for 31 bit left shifts,
2611 but it requires r0. */
2612 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2614 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2615 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2616 return;
2620 else if (value == 0)
2622 /* This can happen even when optimizing, if there were subregs before
2623 reload. Don't output a nop here, as this is never optimized away;
2624 use a no-op move instead. */
2625 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2626 return;
2629 max = shift_insns[value];
2630 for (i = 0; i < max; i++)
2631 gen_ashift (code, shift_amounts[value][i], operands[0]);
2634 /* Same as above, but optimized for values where the topmost bits don't
2635 matter. */
2637 void
2638 gen_shifty_hi_op (int code, rtx *operands)
2640 int value = INTVAL (operands[2]);
2641 int max, i;
2642 void (*gen_fun) (int, int, rtx);
2644 /* This operation is used by and_shl for SImode values with a few
2645 high bits known to be cleared. */
2646 value &= 31;
2647 if (value == 0)
2649 emit_insn (gen_nop ());
2650 return;
2653 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2654 if (code == ASHIFT)
2656 max = ext_shift_insns[value];
2657 for (i = 0; i < max; i++)
2658 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2660 else
2661 /* When shifting right, emit the shifts in reverse order, so that
2662 solitary negative values come first. */
2663 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2664 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2667 /* Output RTL for an arithmetic right shift. */
2669 /* ??? Rewrite to use super-optimizer sequences. */
2672 expand_ashiftrt (rtx *operands)
2674 rtx wrk;
2675 char func[18];
2676 int value;
2678 if (TARGET_SH3)
2680 if (GET_CODE (operands[2]) != CONST_INT)
2682 rtx count = copy_to_mode_reg (SImode, operands[2]);
2683 emit_insn (gen_negsi2 (count, count));
2684 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2685 return 1;
2687 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2688 > 1 + SH_DYNAMIC_SHIFT_COST)
2690 rtx count
2691 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2692 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2693 return 1;
2696 if (GET_CODE (operands[2]) != CONST_INT)
2697 return 0;
2699 value = INTVAL (operands[2]) & 31;
2701 if (value == 31)
2703 /* If we are called from abs expansion, arrange things so that we
2704 we can use a single MT instruction that doesn't clobber the source,
2705 if LICM can hoist out the load of the constant zero. */
2706 if (currently_expanding_to_rtl)
2708 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2709 operands[1]));
2710 emit_insn (gen_mov_neg_si_t (operands[0]));
2711 return 1;
2713 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2714 return 1;
2716 else if (value >= 16 && value <= 19)
2718 wrk = gen_reg_rtx (SImode);
2719 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2720 value -= 16;
2721 while (value--)
2722 gen_ashift (ASHIFTRT, 1, wrk);
2723 emit_move_insn (operands[0], wrk);
2724 return 1;
2726 /* Expand a short sequence inline, longer call a magic routine. */
2727 else if (value <= 5)
2729 wrk = gen_reg_rtx (SImode);
2730 emit_move_insn (wrk, operands[1]);
2731 while (value--)
2732 gen_ashift (ASHIFTRT, 1, wrk);
2733 emit_move_insn (operands[0], wrk);
2734 return 1;
2737 wrk = gen_reg_rtx (Pmode);
2739 /* Load the value into an arg reg and call a helper. */
2740 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2741 sprintf (func, "__ashiftrt_r4_%d", value);
2742 function_symbol (wrk, func, SFUNC_STATIC);
2743 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2744 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2745 return 1;
2749 sh_dynamicalize_shift_p (rtx count)
2751 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2754 /* Try to find a good way to implement the combiner pattern
2755 [(set (match_operand:SI 0 "register_operand" "r")
2756 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2757 (match_operand:SI 2 "const_int_operand" "n"))
2758 (match_operand:SI 3 "const_int_operand" "n"))) .
2759 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2760 return 0 for simple right / left or left/right shift combination.
2761 return 1 for a combination of shifts with zero_extend.
2762 return 2 for a combination of shifts with an AND that needs r0.
2763 return 3 for a combination of shifts with an AND that needs an extra
2764 scratch register, when the three highmost bits of the AND mask are clear.
2765 return 4 for a combination of shifts with an AND that needs an extra
2766 scratch register, when any of the three highmost bits of the AND mask
2767 is set.
2768 If ATTRP is set, store an initial right shift width in ATTRP[0],
2769 and the instruction length in ATTRP[1] . These values are not valid
2770 when returning 0.
2771 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2772 shift_amounts for the last shift value that is to be used before the
2773 sign extend. */
2775 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2777 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2778 int left = INTVAL (left_rtx), right;
2779 int best = 0;
2780 int cost, best_cost = 10000;
2781 int best_right = 0, best_len = 0;
2782 int i;
2783 int can_ext;
2785 if (left < 0 || left > 31)
2786 return 0;
2787 if (GET_CODE (mask_rtx) == CONST_INT)
2788 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2789 else
2790 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2791 /* Can this be expressed as a right shift / left shift pair? */
2792 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2793 right = exact_log2 (lsb);
2794 mask2 = ~(mask + lsb - 1);
2795 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2796 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2797 if (! mask2)
2798 best_cost = shift_insns[right] + shift_insns[right + left];
2799 /* mask has no trailing zeroes <==> ! right */
2800 else if (! right && mask2 == ~(lsb2 - 1))
2802 int late_right = exact_log2 (lsb2);
2803 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2805 /* Try to use zero extend. */
2806 if (mask2 == ~(lsb2 - 1))
2808 int width, first;
2810 for (width = 8; width <= 16; width += 8)
2812 /* Can we zero-extend right away? */
2813 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2815 cost
2816 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2817 if (cost < best_cost)
2819 best = 1;
2820 best_cost = cost;
2821 best_right = right;
2822 best_len = cost;
2823 if (attrp)
2824 attrp[2] = -1;
2826 continue;
2828 /* ??? Could try to put zero extend into initial right shift,
2829 or even shift a bit left before the right shift. */
2830 /* Determine value of first part of left shift, to get to the
2831 zero extend cut-off point. */
2832 first = width - exact_log2 (lsb2) + right;
2833 if (first >= 0 && right + left - first >= 0)
2835 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2836 + ext_shift_insns[right + left - first];
2837 if (cost < best_cost)
2839 best = 1;
2840 best_cost = cost;
2841 best_right = right;
2842 best_len = cost;
2843 if (attrp)
2844 attrp[2] = first;
2849 /* Try to use r0 AND pattern */
2850 for (i = 0; i <= 2; i++)
2852 if (i > right)
2853 break;
2854 if (! CONST_OK_FOR_K08 (mask >> i))
2855 continue;
2856 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2857 if (cost < best_cost)
2859 best = 2;
2860 best_cost = cost;
2861 best_right = i;
2862 best_len = cost - 1;
2865 /* Try to use a scratch register to hold the AND operand. */
2866 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2867 for (i = 0; i <= 2; i++)
2869 if (i > right)
2870 break;
2871 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2872 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2873 if (cost < best_cost)
2875 best = 4 - can_ext;
2876 best_cost = cost;
2877 best_right = i;
2878 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2882 if (attrp)
2884 attrp[0] = best_right;
2885 attrp[1] = best_len;
2887 return best;
2890 /* This is used in length attributes of the unnamed instructions
2891 corresponding to shl_and_kind return values of 1 and 2. */
2893 shl_and_length (rtx insn)
2895 rtx set_src, left_rtx, mask_rtx;
2896 int attributes[3];
2898 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2899 left_rtx = XEXP (XEXP (set_src, 0), 1);
2900 mask_rtx = XEXP (set_src, 1);
2901 shl_and_kind (left_rtx, mask_rtx, attributes);
2902 return attributes[1];
2905 /* This is used in length attribute of the and_shl_scratch instruction. */
2908 shl_and_scr_length (rtx insn)
2910 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2911 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2912 rtx op = XEXP (set_src, 0);
2913 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2914 op = XEXP (XEXP (op, 0), 0);
2915 return len + shift_insns[INTVAL (XEXP (op, 1))];
2918 /* Generate rtl for instructions for which shl_and_kind advised a particular
2919 method of generating them, i.e. returned zero. */
2922 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2924 int attributes[3];
2925 unsigned HOST_WIDE_INT mask;
2926 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2927 int right, total_shift;
2928 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2930 right = attributes[0];
2931 total_shift = INTVAL (left_rtx) + right;
2932 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2933 switch (kind)
2935 default:
2936 return -1;
2937 case 1:
2939 int first = attributes[2];
2940 rtx operands[3];
2942 if (first < 0)
2944 emit_insn ((mask << right) <= 0xff
2945 ? gen_zero_extendqisi2 (dest,
2946 gen_lowpart (QImode, source))
2947 : gen_zero_extendhisi2 (dest,
2948 gen_lowpart (HImode, source)));
2949 source = dest;
2951 if (source != dest)
2952 emit_insn (gen_movsi (dest, source));
2953 operands[0] = dest;
2954 if (right)
2956 operands[2] = GEN_INT (right);
2957 gen_shifty_hi_op (LSHIFTRT, operands);
2959 if (first > 0)
2961 operands[2] = GEN_INT (first);
2962 gen_shifty_hi_op (ASHIFT, operands);
2963 total_shift -= first;
2964 mask <<= first;
2966 if (first >= 0)
2967 emit_insn (mask <= 0xff
2968 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2969 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2970 if (total_shift > 0)
2972 operands[2] = GEN_INT (total_shift);
2973 gen_shifty_hi_op (ASHIFT, operands);
2975 break;
2977 case 4:
2978 shift_gen_fun = gen_shifty_op;
2979 case 3:
2980 /* If the topmost bit that matters is set, set the topmost bits
2981 that don't matter. This way, we might be able to get a shorter
2982 signed constant. */
2983 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2984 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2985 case 2:
2986 /* Don't expand fine-grained when combining, because that will
2987 make the pattern fail. */
2988 if (currently_expanding_to_rtl
2989 || reload_in_progress || reload_completed)
2991 rtx operands[3];
2993 /* Cases 3 and 4 should be handled by this split
2994 only while combining */
2995 gcc_assert (kind <= 2);
2996 if (right)
2998 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2999 source = dest;
3001 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3002 if (total_shift)
3004 operands[0] = dest;
3005 operands[1] = dest;
3006 operands[2] = GEN_INT (total_shift);
3007 shift_gen_fun (ASHIFT, operands);
3009 break;
3011 else
3013 int neg = 0;
3014 if (kind != 4 && total_shift < 16)
3016 neg = -ext_shift_amounts[total_shift][1];
3017 if (neg > 0)
3018 neg -= ext_shift_amounts[total_shift][2];
3019 else
3020 neg = 0;
3022 emit_insn (gen_and_shl_scratch (dest, source,
3023 GEN_INT (right),
3024 GEN_INT (mask),
3025 GEN_INT (total_shift + neg),
3026 GEN_INT (neg)));
3027 emit_insn (gen_movsi (dest, dest));
3028 break;
3031 return 0;
3034 /* Try to find a good way to implement the combiner pattern
3035 [(set (match_operand:SI 0 "register_operand" "=r")
3036 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3037 (match_operand:SI 2 "const_int_operand" "n")
3038 (match_operand:SI 3 "const_int_operand" "n")
3039 (const_int 0)))
3040 (clobber (reg:SI T_REG))]
3041 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3042 return 0 for simple left / right shift combination.
3043 return 1 for left shift / 8 bit sign extend / left shift.
3044 return 2 for left shift / 16 bit sign extend / left shift.
3045 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3046 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3047 return 5 for left shift / 16 bit sign extend / right shift
3048 return 6 for < 8 bit sign extend / left shift.
3049 return 7 for < 8 bit sign extend / left shift / single right shift.
3050 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3053 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3055 int left, size, insize, ext;
3056 int cost = 0, best_cost;
3057 int kind;
3059 left = INTVAL (left_rtx);
3060 size = INTVAL (size_rtx);
3061 insize = size - left;
3062 gcc_assert (insize > 0);
3063 /* Default to left / right shift. */
3064 kind = 0;
3065 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3066 if (size <= 16)
3068 /* 16 bit shift / sign extend / 16 bit shift */
3069 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3070 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3071 below, by alternative 3 or something even better. */
3072 if (cost < best_cost)
3074 kind = 5;
3075 best_cost = cost;
3078 /* Try a plain sign extend between two shifts. */
3079 for (ext = 16; ext >= insize; ext -= 8)
3081 if (ext <= size)
3083 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3084 if (cost < best_cost)
3086 kind = ext / (unsigned) 8;
3087 best_cost = cost;
3090 /* Check if we can do a sloppy shift with a final signed shift
3091 restoring the sign. */
3092 if (EXT_SHIFT_SIGNED (size - ext))
3093 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3094 /* If not, maybe it's still cheaper to do the second shift sloppy,
3095 and do a final sign extend? */
3096 else if (size <= 16)
3097 cost = ext_shift_insns[ext - insize] + 1
3098 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3099 else
3100 continue;
3101 if (cost < best_cost)
3103 kind = ext / (unsigned) 8 + 2;
3104 best_cost = cost;
3107 /* Check if we can sign extend in r0 */
3108 if (insize < 8)
3110 cost = 3 + shift_insns[left];
3111 if (cost < best_cost)
3113 kind = 6;
3114 best_cost = cost;
3116 /* Try the same with a final signed shift. */
3117 if (left < 31)
3119 cost = 3 + ext_shift_insns[left + 1] + 1;
3120 if (cost < best_cost)
3122 kind = 7;
3123 best_cost = cost;
3127 if (TARGET_SH3)
3129 /* Try to use a dynamic shift. */
3130 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3131 if (cost < best_cost)
3133 kind = 0;
3134 best_cost = cost;
3137 if (costp)
3138 *costp = cost;
3139 return kind;
3142 /* Function to be used in the length attribute of the instructions
3143 implementing this pattern. */
3146 shl_sext_length (rtx insn)
3148 rtx set_src, left_rtx, size_rtx;
3149 int cost;
3151 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3152 left_rtx = XEXP (XEXP (set_src, 0), 1);
3153 size_rtx = XEXP (set_src, 1);
3154 shl_sext_kind (left_rtx, size_rtx, &cost);
3155 return cost;
3158 /* Generate rtl for this pattern */
3161 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3163 int kind;
3164 int left, size, insize, cost;
3165 rtx operands[3];
3167 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3168 left = INTVAL (left_rtx);
3169 size = INTVAL (size_rtx);
3170 insize = size - left;
3171 switch (kind)
3173 case 1:
3174 case 2:
3175 case 3:
3176 case 4:
3178 int ext = kind & 1 ? 8 : 16;
3179 int shift2 = size - ext;
3181 /* Don't expand fine-grained when combining, because that will
3182 make the pattern fail. */
3183 if (! currently_expanding_to_rtl
3184 && ! reload_in_progress && ! reload_completed)
3186 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3187 emit_insn (gen_movsi (dest, source));
3188 break;
3190 if (dest != source)
3191 emit_insn (gen_movsi (dest, source));
3192 operands[0] = dest;
3193 if (ext - insize)
3195 operands[2] = GEN_INT (ext - insize);
3196 gen_shifty_hi_op (ASHIFT, operands);
3198 emit_insn (kind & 1
3199 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3200 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3201 if (kind <= 2)
3203 if (shift2)
3205 operands[2] = GEN_INT (shift2);
3206 gen_shifty_op (ASHIFT, operands);
3209 else
3211 if (shift2 > 0)
3213 if (EXT_SHIFT_SIGNED (shift2))
3215 operands[2] = GEN_INT (shift2 + 1);
3216 gen_shifty_op (ASHIFT, operands);
3217 operands[2] = const1_rtx;
3218 gen_shifty_op (ASHIFTRT, operands);
3219 break;
3221 operands[2] = GEN_INT (shift2);
3222 gen_shifty_hi_op (ASHIFT, operands);
3224 else if (shift2)
3226 operands[2] = GEN_INT (-shift2);
3227 gen_shifty_hi_op (LSHIFTRT, operands);
3229 emit_insn (size <= 8
3230 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3231 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3233 break;
3235 case 5:
3237 int i = 16 - size;
3238 if (! currently_expanding_to_rtl
3239 && ! reload_in_progress && ! reload_completed)
3240 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3241 else
3243 operands[0] = dest;
3244 operands[2] = GEN_INT (16 - insize);
3245 gen_shifty_hi_op (ASHIFT, operands);
3246 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3248 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3249 while (--i >= 0)
3250 gen_ashift (ASHIFTRT, 1, dest);
3251 break;
3253 case 6:
3254 case 7:
3255 /* Don't expand fine-grained when combining, because that will
3256 make the pattern fail. */
3257 if (! currently_expanding_to_rtl
3258 && ! reload_in_progress && ! reload_completed)
3260 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3261 emit_insn (gen_movsi (dest, source));
3262 break;
3264 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3265 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3266 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3267 operands[0] = dest;
3268 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3269 gen_shifty_op (ASHIFT, operands);
3270 if (kind == 7)
3271 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3272 break;
3273 default:
3274 return -1;
3276 return 0;
3279 /* Prefix a symbol_ref name with "datalabel". */
3282 gen_datalabel_ref (rtx sym)
3284 const char *str;
3286 if (GET_CODE (sym) == LABEL_REF)
3287 return gen_rtx_CONST (GET_MODE (sym),
3288 gen_rtx_UNSPEC (GET_MODE (sym),
3289 gen_rtvec (1, sym),
3290 UNSPEC_DATALABEL));
3292 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3294 str = XSTR (sym, 0);
3295 /* Share all SYMBOL_REF strings with the same value - that is important
3296 for cse. */
3297 str = IDENTIFIER_POINTER (get_identifier (str));
3298 XSTR (sym, 0) = str;
3300 return sym;
3304 static alloc_pool label_ref_list_pool;
3306 typedef struct label_ref_list_d
3308 rtx label;
3309 struct label_ref_list_d *next;
3310 } *label_ref_list_t;
3312 /* The SH cannot load a large constant into a register, constants have to
3313 come from a pc relative load. The reference of a pc relative load
3314 instruction must be less than 1k in front of the instruction. This
3315 means that we often have to dump a constant inside a function, and
3316 generate code to branch around it.
3318 It is important to minimize this, since the branches will slow things
3319 down and make things bigger.
3321 Worst case code looks like:
3323 mov.l L1,rn
3324 bra L2
3326 align
3327 L1: .long value
3331 mov.l L3,rn
3332 bra L4
3334 align
3335 L3: .long value
3339 We fix this by performing a scan before scheduling, which notices which
3340 instructions need to have their operands fetched from the constant table
3341 and builds the table.
3343 The algorithm is:
3345 scan, find an instruction which needs a pcrel move. Look forward, find the
3346 last barrier which is within MAX_COUNT bytes of the requirement.
3347 If there isn't one, make one. Process all the instructions between
3348 the find and the barrier.
3350 In the above example, we can tell that L3 is within 1k of L1, so
3351 the first move can be shrunk from the 3 insn+constant sequence into
3352 just 1 insn, and the constant moved to L3 to make:
3354 mov.l L1,rn
3356 mov.l L3,rn
3357 bra L4
3359 align
3360 L3:.long value
3361 L4:.long value
3363 Then the second move becomes the target for the shortening process. */
3365 typedef struct
3367 rtx value; /* Value in table. */
3368 rtx label; /* Label of value. */
3369 label_ref_list_t wend; /* End of window. */
3370 enum machine_mode mode; /* Mode of value. */
3372 /* True if this constant is accessed as part of a post-increment
3373 sequence. Note that HImode constants are never accessed in this way. */
3374 bool part_of_sequence_p;
3375 } pool_node;
3377 /* The maximum number of constants that can fit into one pool, since
3378 constants in the range 0..510 are at least 2 bytes long, and in the
3379 range from there to 1018 at least 4 bytes. */
3381 #define MAX_POOL_SIZE 372
3382 static pool_node pool_vector[MAX_POOL_SIZE];
3383 static int pool_size;
3384 static rtx pool_window_label;
3385 static int pool_window_last;
3387 static int max_labelno_before_reorg;
3389 /* ??? If we need a constant in HImode which is the truncated value of a
3390 constant we need in SImode, we could combine the two entries thus saving
3391 two bytes. Is this common enough to be worth the effort of implementing
3392 it? */
3394 /* ??? This stuff should be done at the same time that we shorten branches.
3395 As it is now, we must assume that all branches are the maximum size, and
3396 this causes us to almost always output constant pools sooner than
3397 necessary. */
3399 /* Add a constant to the pool and return its label. */
3401 static rtx
3402 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3404 int i;
3405 rtx lab, new;
3406 label_ref_list_t ref, newref;
3408 /* First see if we've already got it. */
3409 for (i = 0; i < pool_size; i++)
3411 if (x->code == pool_vector[i].value->code
3412 && mode == pool_vector[i].mode)
3414 if (x->code == CODE_LABEL)
3416 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3417 continue;
3419 if (rtx_equal_p (x, pool_vector[i].value))
3421 lab = new = 0;
3422 if (! last_value
3423 || ! i
3424 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3426 new = gen_label_rtx ();
3427 LABEL_REFS (new) = pool_vector[i].label;
3428 pool_vector[i].label = lab = new;
3430 if (lab && pool_window_label)
3432 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3433 newref->label = pool_window_label;
3434 ref = pool_vector[pool_window_last].wend;
3435 newref->next = ref;
3436 pool_vector[pool_window_last].wend = newref;
3438 if (new)
3439 pool_window_label = new;
3440 pool_window_last = i;
3441 return lab;
3446 /* Need a new one. */
3447 pool_vector[pool_size].value = x;
3448 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3450 lab = 0;
3451 pool_vector[pool_size - 1].part_of_sequence_p = true;
3453 else
3454 lab = gen_label_rtx ();
3455 pool_vector[pool_size].mode = mode;
3456 pool_vector[pool_size].label = lab;
3457 pool_vector[pool_size].wend = NULL;
3458 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3459 if (lab && pool_window_label)
3461 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3462 newref->label = pool_window_label;
3463 ref = pool_vector[pool_window_last].wend;
3464 newref->next = ref;
3465 pool_vector[pool_window_last].wend = newref;
3467 if (lab)
3468 pool_window_label = lab;
3469 pool_window_last = pool_size;
3470 pool_size++;
3471 return lab;
3474 /* Output the literal table. START, if nonzero, is the first instruction
3475 this table is needed for, and also indicates that there is at least one
3476 casesi_worker_2 instruction; We have to emit the operand3 labels from
3477 these insns at a 4-byte aligned position. BARRIER is the barrier
3478 after which we are to place the table. */
3480 static void
3481 dump_table (rtx start, rtx barrier)
3483 rtx scan = barrier;
3484 int i;
3485 int need_align = 1;
3486 rtx lab;
3487 label_ref_list_t ref;
3488 int have_df = 0;
3490 /* Do two passes, first time dump out the HI sized constants. */
3492 for (i = 0; i < pool_size; i++)
3494 pool_node *p = &pool_vector[i];
3496 if (p->mode == HImode)
3498 if (need_align)
3500 scan = emit_insn_after (gen_align_2 (), scan);
3501 need_align = 0;
3503 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3504 scan = emit_label_after (lab, scan);
3505 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3506 scan);
3507 for (ref = p->wend; ref; ref = ref->next)
3509 lab = ref->label;
3510 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3513 else if (p->mode == DFmode)
3514 have_df = 1;
3517 need_align = 1;
3519 if (start)
3521 scan = emit_insn_after (gen_align_4 (), scan);
3522 need_align = 0;
3523 for (; start != barrier; start = NEXT_INSN (start))
3524 if (GET_CODE (start) == INSN
3525 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3527 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3528 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3530 scan = emit_label_after (lab, scan);
3533 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3535 rtx align_insn = NULL_RTX;
3537 scan = emit_label_after (gen_label_rtx (), scan);
3538 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3539 need_align = 0;
3541 for (i = 0; i < pool_size; i++)
3543 pool_node *p = &pool_vector[i];
3545 switch (p->mode)
3547 case HImode:
3548 break;
3549 case SImode:
3550 case SFmode:
3551 if (align_insn && !p->part_of_sequence_p)
3553 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3554 emit_label_before (lab, align_insn);
3555 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3556 align_insn);
3557 for (ref = p->wend; ref; ref = ref->next)
3559 lab = ref->label;
3560 emit_insn_before (gen_consttable_window_end (lab),
3561 align_insn);
3563 delete_insn (align_insn);
3564 align_insn = NULL_RTX;
3565 continue;
3567 else
3569 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3570 scan = emit_label_after (lab, scan);
3571 scan = emit_insn_after (gen_consttable_4 (p->value,
3572 const0_rtx), scan);
3573 need_align = ! need_align;
3575 break;
3576 case DFmode:
3577 if (need_align)
3579 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3580 align_insn = scan;
3581 need_align = 0;
3583 case DImode:
3584 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3585 scan = emit_label_after (lab, scan);
3586 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3587 scan);
3588 break;
3589 default:
3590 gcc_unreachable ();
3593 if (p->mode != HImode)
3595 for (ref = p->wend; ref; ref = ref->next)
3597 lab = ref->label;
3598 scan = emit_insn_after (gen_consttable_window_end (lab),
3599 scan);
3604 pool_size = 0;
3607 for (i = 0; i < pool_size; i++)
3609 pool_node *p = &pool_vector[i];
3611 switch (p->mode)
3613 case HImode:
3614 break;
3615 case SImode:
3616 case SFmode:
3617 if (need_align)
3619 need_align = 0;
3620 scan = emit_label_after (gen_label_rtx (), scan);
3621 scan = emit_insn_after (gen_align_4 (), scan);
3623 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3624 scan = emit_label_after (lab, scan);
3625 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3626 scan);
3627 break;
3628 case DFmode:
3629 case DImode:
3630 if (need_align)
3632 need_align = 0;
3633 scan = emit_label_after (gen_label_rtx (), scan);
3634 scan = emit_insn_after (gen_align_4 (), scan);
3636 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3637 scan = emit_label_after (lab, scan);
3638 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3639 scan);
3640 break;
3641 default:
3642 gcc_unreachable ();
3645 if (p->mode != HImode)
3647 for (ref = p->wend; ref; ref = ref->next)
3649 lab = ref->label;
3650 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3655 scan = emit_insn_after (gen_consttable_end (), scan);
3656 scan = emit_barrier_after (scan);
3657 pool_size = 0;
3658 pool_window_label = NULL_RTX;
3659 pool_window_last = 0;
3662 /* Return nonzero if constant would be an ok source for a
3663 mov.w instead of a mov.l. */
3665 static int
3666 hi_const (rtx src)
3668 return (GET_CODE (src) == CONST_INT
3669 && INTVAL (src) >= -32768
3670 && INTVAL (src) <= 32767);
3673 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3675 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3677 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3678 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3679 need to fix it if the input value is CONST_OK_FOR_I08. */
3681 static int
3682 broken_move (rtx insn)
3684 if (GET_CODE (insn) == INSN)
3686 rtx pat = PATTERN (insn);
3687 if (GET_CODE (pat) == PARALLEL)
3688 pat = XVECEXP (pat, 0, 0);
3689 if (GET_CODE (pat) == SET
3690 /* We can load any 8-bit value if we don't care what the high
3691 order bits end up as. */
3692 && GET_MODE (SET_DEST (pat)) != QImode
3693 && (CONSTANT_P (SET_SRC (pat))
3694 /* Match mova_const. */
3695 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3696 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3697 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3698 && ! (TARGET_SH2E
3699 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3700 && (fp_zero_operand (SET_SRC (pat))
3701 || fp_one_operand (SET_SRC (pat)))
3702 /* ??? If this is a -m4 or -m4-single compilation, in general
3703 we don't know the current setting of fpscr, so disable fldi.
3704 There is an exception if this was a register-register move
3705 before reload - and hence it was ascertained that we have
3706 single precision setting - and in a post-reload optimization
3707 we changed this to do a constant load. In that case
3708 we don't have an r0 clobber, hence we must use fldi. */
3709 && (! TARGET_SH4 || TARGET_FMOVD
3710 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3711 == SCRATCH))
3712 && GET_CODE (SET_DEST (pat)) == REG
3713 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3714 && ! (TARGET_SH2A
3715 && GET_MODE (SET_DEST (pat)) == SImode
3716 && satisfies_constraint_I20 (SET_SRC (pat)))
3717 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3718 return 1;
3721 return 0;
3724 static int
3725 mova_p (rtx insn)
3727 return (GET_CODE (insn) == INSN
3728 && GET_CODE (PATTERN (insn)) == SET
3729 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3730 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3731 /* Don't match mova_const. */
3732 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3735 /* Fix up a mova from a switch that went out of range. */
3736 static void
3737 fixup_mova (rtx mova)
3739 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3740 if (! flag_pic)
3742 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3743 INSN_CODE (mova) = -1;
3745 else
3747 rtx worker = mova;
3748 rtx lab = gen_label_rtx ();
3749 rtx wpat, wpat0, wpat1, wsrc, diff;
3753 worker = NEXT_INSN (worker);
3754 gcc_assert (worker
3755 && GET_CODE (worker) != CODE_LABEL
3756 && GET_CODE (worker) != JUMP_INSN);
3757 } while (GET_CODE (worker) == NOTE
3758 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3759 wpat = PATTERN (worker);
3760 wpat0 = XVECEXP (wpat, 0, 0);
3761 wpat1 = XVECEXP (wpat, 0, 1);
3762 wsrc = SET_SRC (wpat0);
3763 PATTERN (worker) = (gen_casesi_worker_2
3764 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3765 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3766 XEXP (wpat1, 0)));
3767 INSN_CODE (worker) = -1;
3768 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3769 gen_rtx_LABEL_REF (Pmode, lab));
3770 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3771 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3772 INSN_CODE (mova) = -1;
3776 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3777 *num_mova, and check if the new mova is not nested within the first one.
3778 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3779 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3780 static int
3781 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3783 int n_addr = 0; /* Initialization to shut up spurious warning. */
3784 int f_target, n_target = 0; /* Likewise. */
3786 if (optimize)
3788 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3789 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3790 if (n_addr > n_target || n_addr + 1022 < n_target)
3792 /* Change the mova into a load.
3793 broken_move will then return true for it. */
3794 fixup_mova (new_mova);
3795 return 1;
3798 if (!(*num_mova)++)
3800 *first_mova = new_mova;
3801 return 2;
3803 if (!optimize
3804 || ((f_target
3805 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3806 >= n_target))
3807 return -1;
3809 (*num_mova)--;
3810 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3811 > n_target - n_addr)
3813 fixup_mova (*first_mova);
3814 return 0;
3816 else
3818 fixup_mova (new_mova);
3819 return 1;
3823 /* Find the last barrier from insn FROM which is close enough to hold the
3824 constant pool. If we can't find one, then create one near the end of
3825 the range. */
3827 static rtx
3828 find_barrier (int num_mova, rtx mova, rtx from)
3830 int count_si = 0;
3831 int count_hi = 0;
3832 int found_hi = 0;
3833 int found_si = 0;
3834 int found_di = 0;
3835 int hi_align = 2;
3836 int si_align = 2;
3837 int leading_mova = num_mova;
3838 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3839 int si_limit;
3840 int hi_limit;
3842 /* For HImode: range is 510, add 4 because pc counts from address of
3843 second instruction after this one, subtract 2 for the jump instruction
3844 that we may need to emit before the table, subtract 2 for the instruction
3845 that fills the jump delay slot (in very rare cases, reorg will take an
3846 instruction from after the constant pool or will leave the delay slot
3847 empty). This gives 510.
3848 For SImode: range is 1020, add 4 because pc counts from address of
3849 second instruction after this one, subtract 2 in case pc is 2 byte
3850 aligned, subtract 2 for the jump instruction that we may need to emit
3851 before the table, subtract 2 for the instruction that fills the jump
3852 delay slot. This gives 1018. */
3854 /* The branch will always be shortened now that the reference address for
3855 forward branches is the successor address, thus we need no longer make
3856 adjustments to the [sh]i_limit for -O0. */
3858 si_limit = 1018;
3859 hi_limit = 510;
3861 while (from && count_si < si_limit && count_hi < hi_limit)
3863 int inc = get_attr_length (from);
3864 int new_align = 1;
3866 /* If this is a label that existed at the time of the compute_alignments
3867 call, determine the alignment. N.B. When find_barrier recurses for
3868 an out-of-reach mova, we might see labels at the start of previously
3869 inserted constant tables. */
3870 if (GET_CODE (from) == CODE_LABEL
3871 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3873 if (optimize)
3874 new_align = 1 << label_to_alignment (from);
3875 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3876 new_align = 1 << barrier_align (from);
3877 else
3878 new_align = 1;
3879 inc = 0;
3881 /* In case we are scanning a constant table because of recursion, check
3882 for explicit alignments. If the table is long, we might be forced
3883 to emit the new table in front of it; the length of the alignment
3884 might be the last straw. */
3885 else if (GET_CODE (from) == INSN
3886 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3887 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3888 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3889 /* When we find the end of a constant table, paste the new constant
3890 at the end. That is better than putting it in front because
3891 this way, we don't need extra alignment for adding a 4-byte-aligned
3892 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3893 else if (GET_CODE (from) == INSN
3894 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3895 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3896 return from;
3898 if (GET_CODE (from) == BARRIER)
3901 found_barrier = from;
3903 /* If we are at the end of the function, or in front of an alignment
3904 instruction, we need not insert an extra alignment. We prefer
3905 this kind of barrier. */
3906 if (barrier_align (from) > 2)
3907 good_barrier = from;
3910 if (broken_move (from))
3912 rtx pat, src, dst;
3913 enum machine_mode mode;
3915 pat = PATTERN (from);
3916 if (GET_CODE (pat) == PARALLEL)
3917 pat = XVECEXP (pat, 0, 0);
3918 src = SET_SRC (pat);
3919 dst = SET_DEST (pat);
3920 mode = GET_MODE (dst);
3922 /* We must explicitly check the mode, because sometimes the
3923 front end will generate code to load unsigned constants into
3924 HImode targets without properly sign extending them. */
3925 if (mode == HImode
3926 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3928 found_hi += 2;
3929 /* We put the short constants before the long constants, so
3930 we must count the length of short constants in the range
3931 for the long constants. */
3932 /* ??? This isn't optimal, but is easy to do. */
3933 si_limit -= 2;
3935 else
3937 /* We dump DF/DI constants before SF/SI ones, because
3938 the limit is the same, but the alignment requirements
3939 are higher. We may waste up to 4 additional bytes
3940 for alignment, and the DF/DI constant may have
3941 another SF/SI constant placed before it. */
3942 if (TARGET_SHCOMPACT
3943 && ! found_di
3944 && (mode == DFmode || mode == DImode))
3946 found_di = 1;
3947 si_limit -= 8;
3949 while (si_align > 2 && found_si + si_align - 2 > count_si)
3950 si_align >>= 1;
3951 if (found_si > count_si)
3952 count_si = found_si;
3953 found_si += GET_MODE_SIZE (mode);
3954 if (num_mova)
3955 si_limit -= GET_MODE_SIZE (mode);
3959 if (mova_p (from))
3961 switch (untangle_mova (&num_mova, &mova, from))
3963 case 0: return find_barrier (0, 0, mova);
3964 case 2:
3966 leading_mova = 0;
3967 barrier_before_mova
3968 = good_barrier ? good_barrier : found_barrier;
3970 default: break;
3972 if (found_si > count_si)
3973 count_si = found_si;
3975 else if (GET_CODE (from) == JUMP_INSN
3976 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3977 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3979 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3980 || (num_mova
3981 && (prev_nonnote_insn (from)
3982 == XEXP (MOVA_LABELREF (mova), 0))))
3983 num_mova--;
3984 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3986 /* We have just passed the barrier in front of the
3987 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3988 the ADDR_DIFF_VEC is accessed as data, just like our pool
3989 constants, this is a good opportunity to accommodate what
3990 we have gathered so far.
3991 If we waited any longer, we could end up at a barrier in
3992 front of code, which gives worse cache usage for separated
3993 instruction / data caches. */
3994 good_barrier = found_barrier;
3995 break;
3997 else
3999 rtx body = PATTERN (from);
4000 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4003 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4004 else if (GET_CODE (from) == JUMP_INSN
4005 && ! TARGET_SH2
4006 && ! TARGET_SMALLCODE)
4007 new_align = 4;
4009 if (found_si)
4011 count_si += inc;
4012 if (new_align > si_align)
4014 si_limit -= (count_si - 1) & (new_align - si_align);
4015 si_align = new_align;
4017 count_si = (count_si + new_align - 1) & -new_align;
4019 if (found_hi)
4021 count_hi += inc;
4022 if (new_align > hi_align)
4024 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4025 hi_align = new_align;
4027 count_hi = (count_hi + new_align - 1) & -new_align;
4029 from = NEXT_INSN (from);
4032 if (num_mova)
4034 if (leading_mova)
4036 /* Try as we might, the leading mova is out of range. Change
4037 it into a load (which will become a pcload) and retry. */
4038 fixup_mova (mova);
4039 return find_barrier (0, 0, mova);
4041 else
4043 /* Insert the constant pool table before the mova instruction,
4044 to prevent the mova label reference from going out of range. */
4045 from = mova;
4046 good_barrier = found_barrier = barrier_before_mova;
4050 if (found_barrier)
4052 if (good_barrier && next_real_insn (found_barrier))
4053 found_barrier = good_barrier;
4055 else
4057 /* We didn't find a barrier in time to dump our stuff,
4058 so we'll make one. */
4059 rtx label = gen_label_rtx ();
4061 /* If we exceeded the range, then we must back up over the last
4062 instruction we looked at. Otherwise, we just need to undo the
4063 NEXT_INSN at the end of the loop. */
4064 if (count_hi > hi_limit || count_si > si_limit)
4065 from = PREV_INSN (PREV_INSN (from));
4066 else
4067 from = PREV_INSN (from);
4069 /* Walk back to be just before any jump or label.
4070 Putting it before a label reduces the number of times the branch
4071 around the constant pool table will be hit. Putting it before
4072 a jump makes it more likely that the bra delay slot will be
4073 filled. */
4074 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4075 || GET_CODE (from) == CODE_LABEL)
4076 from = PREV_INSN (from);
4078 from = emit_jump_insn_after (gen_jump (label), from);
4079 JUMP_LABEL (from) = label;
4080 LABEL_NUSES (label) = 1;
4081 found_barrier = emit_barrier_after (from);
4082 emit_label_after (label, found_barrier);
4085 return found_barrier;
4088 /* If the instruction INSN is implemented by a special function, and we can
4089 positively find the register that is used to call the sfunc, and this
4090 register is not used anywhere else in this instruction - except as the
4091 destination of a set, return this register; else, return 0. */
4093 sfunc_uses_reg (rtx insn)
4095 int i;
4096 rtx pattern, part, reg_part, reg;
4098 if (GET_CODE (insn) != INSN)
4099 return 0;
4100 pattern = PATTERN (insn);
4101 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4102 return 0;
4104 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4106 part = XVECEXP (pattern, 0, i);
4107 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4108 reg_part = part;
4110 if (! reg_part)
4111 return 0;
4112 reg = XEXP (reg_part, 0);
4113 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4115 part = XVECEXP (pattern, 0, i);
4116 if (part == reg_part || GET_CODE (part) == CLOBBER)
4117 continue;
4118 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4119 && GET_CODE (SET_DEST (part)) == REG)
4120 ? SET_SRC (part) : part)))
4121 return 0;
4123 return reg;
4126 /* See if the only way in which INSN uses REG is by calling it, or by
4127 setting it while calling it. Set *SET to a SET rtx if the register
4128 is set by INSN. */
4130 static int
4131 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4133 rtx pattern, reg2;
4135 *set = NULL_RTX;
4137 reg2 = sfunc_uses_reg (insn);
4138 if (reg2 && REGNO (reg2) == REGNO (reg))
4140 pattern = single_set (insn);
4141 if (pattern
4142 && GET_CODE (SET_DEST (pattern)) == REG
4143 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4144 *set = pattern;
4145 return 0;
4147 if (GET_CODE (insn) != CALL_INSN)
4149 /* We don't use rtx_equal_p because we don't care if the mode is
4150 different. */
4151 pattern = single_set (insn);
4152 if (pattern
4153 && GET_CODE (SET_DEST (pattern)) == REG
4154 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4156 rtx par, part;
4157 int i;
4159 *set = pattern;
4160 par = PATTERN (insn);
4161 if (GET_CODE (par) == PARALLEL)
4162 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4164 part = XVECEXP (par, 0, i);
4165 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4166 return 1;
4168 return reg_mentioned_p (reg, SET_SRC (pattern));
4171 return 1;
4174 pattern = PATTERN (insn);
4176 if (GET_CODE (pattern) == PARALLEL)
4178 int i;
4180 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4181 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4182 return 1;
4183 pattern = XVECEXP (pattern, 0, 0);
4186 if (GET_CODE (pattern) == SET)
4188 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4190 /* We don't use rtx_equal_p, because we don't care if the
4191 mode is different. */
4192 if (GET_CODE (SET_DEST (pattern)) != REG
4193 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4194 return 1;
4196 *set = pattern;
4199 pattern = SET_SRC (pattern);
4202 if (GET_CODE (pattern) != CALL
4203 || GET_CODE (XEXP (pattern, 0)) != MEM
4204 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4205 return 1;
4207 return 0;
4210 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4211 general registers. Bits 0..15 mean that the respective registers
4212 are used as inputs in the instruction. Bits 16..31 mean that the
4213 registers 0..15, respectively, are used as outputs, or are clobbered.
4214 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4216 regs_used (rtx x, int is_dest)
4218 enum rtx_code code;
4219 const char *fmt;
4220 int i, used = 0;
4222 if (! x)
4223 return used;
4224 code = GET_CODE (x);
4225 switch (code)
4227 case REG:
4228 if (REGNO (x) < 16)
4229 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4230 << (REGNO (x) + is_dest));
4231 return 0;
4232 case SUBREG:
4234 rtx y = SUBREG_REG (x);
4236 if (GET_CODE (y) != REG)
4237 break;
4238 if (REGNO (y) < 16)
4239 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4240 << (REGNO (y) +
4241 subreg_regno_offset (REGNO (y),
4242 GET_MODE (y),
4243 SUBREG_BYTE (x),
4244 GET_MODE (x)) + is_dest));
4245 return 0;
4247 case SET:
4248 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4249 case RETURN:
4250 /* If there was a return value, it must have been indicated with USE. */
4251 return 0x00ffff00;
4252 case CLOBBER:
4253 is_dest = 1;
4254 break;
4255 case MEM:
4256 is_dest = 0;
4257 break;
4258 case CALL:
4259 used |= 0x00ff00f0;
4260 break;
4261 default:
4262 break;
4265 fmt = GET_RTX_FORMAT (code);
4267 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4269 if (fmt[i] == 'E')
4271 register int j;
4272 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4273 used |= regs_used (XVECEXP (x, i, j), is_dest);
4275 else if (fmt[i] == 'e')
4276 used |= regs_used (XEXP (x, i), is_dest);
4278 return used;
4281 /* Create an instruction that prevents redirection of a conditional branch
4282 to the destination of the JUMP with address ADDR.
4283 If the branch needs to be implemented as an indirect jump, try to find
4284 a scratch register for it.
4285 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4286 If any preceding insn that doesn't fit into a delay slot is good enough,
4287 pass 1. Pass 2 if a definite blocking insn is needed.
4288 -1 is used internally to avoid deep recursion.
4289 If a blocking instruction is made or recognized, return it. */
4291 static rtx
4292 gen_block_redirect (rtx jump, int addr, int need_block)
4294 int dead = 0;
4295 rtx prev = prev_nonnote_insn (jump);
4296 rtx dest;
4298 /* First, check if we already have an instruction that satisfies our need. */
4299 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4301 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4302 return prev;
4303 if (GET_CODE (PATTERN (prev)) == USE
4304 || GET_CODE (PATTERN (prev)) == CLOBBER
4305 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4306 prev = jump;
4307 else if ((need_block &= ~1) < 0)
4308 return prev;
4309 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4310 need_block = 0;
4312 if (GET_CODE (PATTERN (jump)) == RETURN)
4314 if (! need_block)
4315 return prev;
4316 /* Reorg even does nasty things with return insns that cause branches
4317 to go out of range - see find_end_label and callers. */
4318 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4320 /* We can't use JUMP_LABEL here because it might be undefined
4321 when not optimizing. */
4322 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4323 /* If the branch is out of range, try to find a scratch register for it. */
4324 if (optimize
4325 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4326 > 4092 + 4098))
4328 rtx scan;
4329 /* Don't look for the stack pointer as a scratch register,
4330 it would cause trouble if an interrupt occurred. */
4331 unsigned try = 0x7fff, used;
4332 int jump_left = flag_expensive_optimizations + 1;
4334 /* It is likely that the most recent eligible instruction is wanted for
4335 the delay slot. Therefore, find out which registers it uses, and
4336 try to avoid using them. */
4338 for (scan = jump; (scan = PREV_INSN (scan)); )
4340 enum rtx_code code;
4342 if (INSN_DELETED_P (scan))
4343 continue;
4344 code = GET_CODE (scan);
4345 if (code == CODE_LABEL || code == JUMP_INSN)
4346 break;
4347 if (code == INSN
4348 && GET_CODE (PATTERN (scan)) != USE
4349 && GET_CODE (PATTERN (scan)) != CLOBBER
4350 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4352 try &= ~regs_used (PATTERN (scan), 0);
4353 break;
4356 for (used = dead = 0, scan = JUMP_LABEL (jump);
4357 (scan = NEXT_INSN (scan)); )
4359 enum rtx_code code;
4361 if (INSN_DELETED_P (scan))
4362 continue;
4363 code = GET_CODE (scan);
4364 if (INSN_P (scan))
4366 used |= regs_used (PATTERN (scan), 0);
4367 if (code == CALL_INSN)
4368 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4369 dead |= (used >> 16) & ~used;
4370 if (dead & try)
4372 dead &= try;
4373 break;
4375 if (code == JUMP_INSN)
4377 if (jump_left-- && simplejump_p (scan))
4378 scan = JUMP_LABEL (scan);
4379 else
4380 break;
4384 /* Mask out the stack pointer again, in case it was
4385 the only 'free' register we have found. */
4386 dead &= 0x7fff;
4388 /* If the immediate destination is still in range, check for possible
4389 threading with a jump beyond the delay slot insn.
4390 Don't check if we are called recursively; the jump has been or will be
4391 checked in a different invocation then. */
4393 else if (optimize && need_block >= 0)
4395 rtx next = next_active_insn (next_active_insn (dest));
4396 if (next && GET_CODE (next) == JUMP_INSN
4397 && GET_CODE (PATTERN (next)) == SET
4398 && recog_memoized (next) == CODE_FOR_jump_compact)
4400 dest = JUMP_LABEL (next);
4401 if (dest
4402 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4403 > 4092 + 4098))
4404 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4408 if (dead)
4410 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4412 /* It would be nice if we could convert the jump into an indirect
4413 jump / far branch right now, and thus exposing all constituent
4414 instructions to further optimization. However, reorg uses
4415 simplejump_p to determine if there is an unconditional jump where
4416 it should try to schedule instructions from the target of the
4417 branch; simplejump_p fails for indirect jumps even if they have
4418 a JUMP_LABEL. */
4419 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4420 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4421 , jump);
4422 /* ??? We would like this to have the scope of the jump, but that
4423 scope will change when a delay slot insn of an inner scope is added.
4424 Hence, after delay slot scheduling, we'll have to expect
4425 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4426 the jump. */
4428 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4429 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4430 return insn;
4432 else if (need_block)
4433 /* We can't use JUMP_LABEL here because it might be undefined
4434 when not optimizing. */
4435 return emit_insn_before (gen_block_branch_redirect
4436 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4437 , jump);
4438 return prev;
4441 #define CONDJUMP_MIN -252
4442 #define CONDJUMP_MAX 262
4443 struct far_branch
4445 /* A label (to be placed) in front of the jump
4446 that jumps to our ultimate destination. */
4447 rtx near_label;
4448 /* Where we are going to insert it if we cannot move the jump any farther,
4449 or the jump itself if we have picked up an existing jump. */
4450 rtx insert_place;
4451 /* The ultimate destination. */
4452 rtx far_label;
4453 struct far_branch *prev;
4454 /* If the branch has already been created, its address;
4455 else the address of its first prospective user. */
4456 int address;
4459 static void gen_far_branch (struct far_branch *);
4460 enum mdep_reorg_phase_e mdep_reorg_phase;
4461 static void
4462 gen_far_branch (struct far_branch *bp)
4464 rtx insn = bp->insert_place;
4465 rtx jump;
4466 rtx label = gen_label_rtx ();
4467 int ok;
4469 emit_label_after (label, insn);
4470 if (bp->far_label)
4472 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4473 LABEL_NUSES (bp->far_label)++;
4475 else
4476 jump = emit_jump_insn_after (gen_return (), insn);
4477 /* Emit a barrier so that reorg knows that any following instructions
4478 are not reachable via a fall-through path.
4479 But don't do this when not optimizing, since we wouldn't suppress the
4480 alignment for the barrier then, and could end up with out-of-range
4481 pc-relative loads. */
4482 if (optimize)
4483 emit_barrier_after (jump);
4484 emit_label_after (bp->near_label, insn);
4485 JUMP_LABEL (jump) = bp->far_label;
4486 ok = invert_jump (insn, label, 1);
4487 gcc_assert (ok);
4489 /* If we are branching around a jump (rather than a return), prevent
4490 reorg from using an insn from the jump target as the delay slot insn -
4491 when reorg did this, it pessimized code (we rather hide the delay slot)
4492 and it could cause branches to go out of range. */
4493 if (bp->far_label)
4494 (emit_insn_after
4495 (gen_stuff_delay_slot
4496 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4497 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4498 insn));
4499 /* Prevent reorg from undoing our splits. */
4500 gen_block_redirect (jump, bp->address += 2, 2);
4503 /* Fix up ADDR_DIFF_VECs. */
4504 void
4505 fixup_addr_diff_vecs (rtx first)
4507 rtx insn;
4509 for (insn = first; insn; insn = NEXT_INSN (insn))
4511 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4513 if (GET_CODE (insn) != JUMP_INSN
4514 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4515 continue;
4516 pat = PATTERN (insn);
4517 vec_lab = XEXP (XEXP (pat, 0), 0);
4519 /* Search the matching casesi_jump_2. */
4520 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4522 if (GET_CODE (prev) != JUMP_INSN)
4523 continue;
4524 prevpat = PATTERN (prev);
4525 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4526 continue;
4527 x = XVECEXP (prevpat, 0, 1);
4528 if (GET_CODE (x) != USE)
4529 continue;
4530 x = XEXP (x, 0);
4531 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4532 break;
4534 /* FIXME: This is a bug in the optimizer, but it seems harmless
4535 to just avoid panicing. */
4536 if (!prev)
4537 continue;
4539 /* Emit the reference label of the braf where it belongs, right after
4540 the casesi_jump_2 (i.e. braf). */
4541 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4542 emit_label_after (braf_label, prev);
4544 /* Fix up the ADDR_DIF_VEC to be relative
4545 to the reference address of the braf. */
4546 XEXP (XEXP (pat, 0), 0) = braf_label;
4550 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4551 a barrier. Return the base 2 logarithm of the desired alignment. */
4553 barrier_align (rtx barrier_or_label)
4555 rtx next = next_real_insn (barrier_or_label), pat, prev;
4556 int slot, credit, jump_to_next = 0;
4558 if (! next)
4559 return 0;
4561 pat = PATTERN (next);
4563 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4564 return 2;
4566 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4567 /* This is a barrier in front of a constant table. */
4568 return 0;
4570 prev = prev_real_insn (barrier_or_label);
4571 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4573 pat = PATTERN (prev);
4574 /* If this is a very small table, we want to keep the alignment after
4575 the table to the minimum for proper code alignment. */
4576 return ((TARGET_SMALLCODE
4577 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4578 <= (unsigned) 1 << (CACHE_LOG - 2)))
4579 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4582 if (TARGET_SMALLCODE)
4583 return 0;
4585 if (! TARGET_SH2 || ! optimize)
4586 return align_jumps_log;
4588 /* When fixing up pcloads, a constant table might be inserted just before
4589 the basic block that ends with the barrier. Thus, we can't trust the
4590 instruction lengths before that. */
4591 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4593 /* Check if there is an immediately preceding branch to the insn beyond
4594 the barrier. We must weight the cost of discarding useful information
4595 from the current cache line when executing this branch and there is
4596 an alignment, against that of fetching unneeded insn in front of the
4597 branch target when there is no alignment. */
4599 /* There are two delay_slot cases to consider. One is the simple case
4600 where the preceding branch is to the insn beyond the barrier (simple
4601 delay slot filling), and the other is where the preceding branch has
4602 a delay slot that is a duplicate of the insn after the barrier
4603 (fill_eager_delay_slots) and the branch is to the insn after the insn
4604 after the barrier. */
4606 /* PREV is presumed to be the JUMP_INSN for the barrier under
4607 investigation. Skip to the insn before it. */
4608 prev = prev_real_insn (prev);
4610 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4611 credit >= 0 && prev && GET_CODE (prev) == INSN;
4612 prev = prev_real_insn (prev))
4614 jump_to_next = 0;
4615 if (GET_CODE (PATTERN (prev)) == USE
4616 || GET_CODE (PATTERN (prev)) == CLOBBER)
4617 continue;
4618 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4620 prev = XVECEXP (PATTERN (prev), 0, 1);
4621 if (INSN_UID (prev) == INSN_UID (next))
4623 /* Delay slot was filled with insn at jump target. */
4624 jump_to_next = 1;
4625 continue;
4629 if (slot &&
4630 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4631 slot = 0;
4632 credit -= get_attr_length (prev);
4634 if (prev
4635 && GET_CODE (prev) == JUMP_INSN
4636 && JUMP_LABEL (prev))
4638 rtx x;
4639 if (jump_to_next
4640 || next_real_insn (JUMP_LABEL (prev)) == next
4641 /* If relax_delay_slots() decides NEXT was redundant
4642 with some previous instruction, it will have
4643 redirected PREV's jump to the following insn. */
4644 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4645 /* There is no upper bound on redundant instructions
4646 that might have been skipped, but we must not put an
4647 alignment where none had been before. */
4648 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4649 (INSN_P (x)
4650 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4651 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4652 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4654 rtx pat = PATTERN (prev);
4655 if (GET_CODE (pat) == PARALLEL)
4656 pat = XVECEXP (pat, 0, 0);
4657 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4658 return 0;
4663 return align_jumps_log;
4666 /* If we are inside a phony loop, almost any kind of label can turn up as the
4667 first one in the loop. Aligning a braf label causes incorrect switch
4668 destination addresses; we can detect braf labels because they are
4669 followed by a BARRIER.
4670 Applying loop alignment to small constant or switch tables is a waste
4671 of space, so we suppress this too. */
4673 sh_loop_align (rtx label)
4675 rtx next = label;
4678 next = next_nonnote_insn (next);
4679 while (next && GET_CODE (next) == CODE_LABEL);
4681 if (! next
4682 || ! INSN_P (next)
4683 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4684 || recog_memoized (next) == CODE_FOR_consttable_2)
4685 return 0;
4687 return align_loops_log;
4690 /* Do a final pass over the function, just before delayed branch
4691 scheduling. */
4693 static void
4694 sh_reorg (void)
4696 rtx first, insn, mova = NULL_RTX;
4697 int num_mova;
4698 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4699 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4701 first = get_insns ();
4702 max_labelno_before_reorg = max_label_num ();
4704 /* We must split call insns before introducing `mova's. If we're
4705 optimizing, they'll have already been split. Otherwise, make
4706 sure we don't split them too late. */
4707 if (! optimize)
4708 split_all_insns_noflow ();
4710 if (TARGET_SHMEDIA)
4711 return;
4713 /* If relaxing, generate pseudo-ops to associate function calls with
4714 the symbols they call. It does no harm to not generate these
4715 pseudo-ops. However, when we can generate them, it enables to
4716 linker to potentially relax the jsr to a bsr, and eliminate the
4717 register load and, possibly, the constant pool entry. */
4719 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4720 if (TARGET_RELAX)
4722 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
4723 own purposes. This works because none of the remaining passes
4724 need to look at them.
4726 ??? But it may break in the future. We should use a machine
4727 dependent REG_NOTE, or some other approach entirely. */
4728 for (insn = first; insn; insn = NEXT_INSN (insn))
4730 if (INSN_P (insn))
4732 rtx note;
4734 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
4735 NULL_RTX)) != 0)
4736 remove_note (insn, note);
4740 for (insn = first; insn; insn = NEXT_INSN (insn))
4742 rtx pattern, reg, link, set, scan, dies, label;
4743 int rescan = 0, foundinsn = 0;
4745 if (GET_CODE (insn) == CALL_INSN)
4747 pattern = PATTERN (insn);
4749 if (GET_CODE (pattern) == PARALLEL)
4750 pattern = XVECEXP (pattern, 0, 0);
4751 if (GET_CODE (pattern) == SET)
4752 pattern = SET_SRC (pattern);
4754 if (GET_CODE (pattern) != CALL
4755 || GET_CODE (XEXP (pattern, 0)) != MEM)
4756 continue;
4758 reg = XEXP (XEXP (pattern, 0), 0);
4760 else
4762 reg = sfunc_uses_reg (insn);
4763 if (! reg)
4764 continue;
4767 if (GET_CODE (reg) != REG)
4768 continue;
4770 /* Try scanning backward to find where the register is set. */
4771 link = NULL;
4772 for (scan = PREV_INSN (insn);
4773 scan && GET_CODE (scan) != CODE_LABEL;
4774 scan = PREV_INSN (scan))
4776 if (! INSN_P (scan))
4777 continue;
4779 if (! reg_mentioned_p (reg, scan))
4780 continue;
4782 if (noncall_uses_reg (reg, scan, &set))
4783 break;
4785 if (set)
4787 link = scan;
4788 break;
4792 if (! link)
4793 continue;
4795 /* The register is set at LINK. */
4797 /* We can only optimize the function call if the register is
4798 being set to a symbol. In theory, we could sometimes
4799 optimize calls to a constant location, but the assembler
4800 and linker do not support that at present. */
4801 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4802 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4803 continue;
4805 /* Scan forward from LINK to the place where REG dies, and
4806 make sure that the only insns which use REG are
4807 themselves function calls. */
4809 /* ??? This doesn't work for call targets that were allocated
4810 by reload, since there may not be a REG_DEAD note for the
4811 register. */
4813 dies = NULL_RTX;
4814 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4816 rtx scanset;
4818 /* Don't try to trace forward past a CODE_LABEL if we haven't
4819 seen INSN yet. Ordinarily, we will only find the setting insn
4820 if it is in the same basic block. However,
4821 cross-jumping can insert code labels in between the load and
4822 the call, and can result in situations where a single call
4823 insn may have two targets depending on where we came from. */
4825 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4826 break;
4828 if (! INSN_P (scan))
4829 continue;
4831 /* Don't try to trace forward past a JUMP. To optimize
4832 safely, we would have to check that all the
4833 instructions at the jump destination did not use REG. */
4835 if (GET_CODE (scan) == JUMP_INSN)
4836 break;
4838 if (! reg_mentioned_p (reg, scan))
4839 continue;
4841 if (noncall_uses_reg (reg, scan, &scanset))
4842 break;
4844 if (scan == insn)
4845 foundinsn = 1;
4847 if (scan != insn
4848 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4850 /* There is a function call to this register other
4851 than the one we are checking. If we optimize
4852 this call, we need to rescan again below. */
4853 rescan = 1;
4856 /* ??? We shouldn't have to worry about SCANSET here.
4857 We should just be able to check for a REG_DEAD note
4858 on a function call. However, the REG_DEAD notes are
4859 apparently not dependable around libcalls; c-torture
4860 execute/920501-2 is a test case. If SCANSET is set,
4861 then this insn sets the register, so it must have
4862 died earlier. Unfortunately, this will only handle
4863 the cases in which the register is, in fact, set in a
4864 later insn. */
4866 /* ??? We shouldn't have to use FOUNDINSN here.
4867 This dates back to when we used LOG_LINKS to find
4868 the most recent insn which sets the register. */
4870 if (foundinsn
4871 && (scanset
4872 || find_reg_note (scan, REG_DEAD, reg)))
4874 dies = scan;
4875 break;
4879 if (! dies)
4881 /* Either there was a branch, or some insn used REG
4882 other than as a function call address. */
4883 continue;
4886 /* Create a code label, and put it in a REG_LABEL_OPERAND note
4887 on the insn which sets the register, and on each call insn
4888 which uses the register. In final_prescan_insn we look for
4889 the REG_LABEL_OPERAND notes, and output the appropriate label
4890 or pseudo-op. */
4892 label = gen_label_rtx ();
4893 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4894 REG_NOTES (link));
4895 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4896 REG_NOTES (insn));
4897 if (rescan)
4899 scan = link;
4902 rtx reg2;
4904 scan = NEXT_INSN (scan);
4905 if (scan != insn
4906 && ((GET_CODE (scan) == CALL_INSN
4907 && reg_mentioned_p (reg, scan))
4908 || ((reg2 = sfunc_uses_reg (scan))
4909 && REGNO (reg2) == REGNO (reg))))
4910 REG_NOTES (scan)
4911 = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4912 REG_NOTES (scan));
4914 while (scan != dies);
4919 if (TARGET_SH2)
4920 fixup_addr_diff_vecs (first);
4922 if (optimize)
4924 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4925 shorten_branches (first);
4928 /* Scan the function looking for move instructions which have to be
4929 changed to pc-relative loads and insert the literal tables. */
4930 label_ref_list_pool = create_alloc_pool ("label references list",
4931 sizeof (struct label_ref_list_d),
4932 30);
4933 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4934 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4936 if (mova_p (insn))
4938 /* ??? basic block reordering can move a switch table dispatch
4939 below the switch table. Check if that has happened.
4940 We only have the addresses available when optimizing; but then,
4941 this check shouldn't be needed when not optimizing. */
4942 if (!untangle_mova (&num_mova, &mova, insn))
4944 insn = mova;
4945 num_mova = 0;
4948 else if (GET_CODE (insn) == JUMP_INSN
4949 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4950 && num_mova
4951 /* ??? loop invariant motion can also move a mova out of a
4952 loop. Since loop does this code motion anyway, maybe we
4953 should wrap UNSPEC_MOVA into a CONST, so that reload can
4954 move it back. */
4955 && ((num_mova > 1
4956 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
4957 || (prev_nonnote_insn (insn)
4958 == XEXP (MOVA_LABELREF (mova), 0))))
4960 rtx scan;
4961 int total;
4963 num_mova--;
4965 /* Some code might have been inserted between the mova and
4966 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4967 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4968 total += get_attr_length (scan);
4970 /* range of mova is 1020, add 4 because pc counts from address of
4971 second instruction after this one, subtract 2 in case pc is 2
4972 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4973 cancels out with alignment effects of the mova itself. */
4974 if (total > 1022)
4976 /* Change the mova into a load, and restart scanning
4977 there. broken_move will then return true for mova. */
4978 fixup_mova (mova);
4979 insn = mova;
4982 if (broken_move (insn)
4983 || (GET_CODE (insn) == INSN
4984 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4986 rtx scan;
4987 /* Scan ahead looking for a barrier to stick the constant table
4988 behind. */
4989 rtx barrier = find_barrier (num_mova, mova, insn);
4990 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4991 int need_aligned_label = 0;
4993 if (num_mova && ! mova_p (mova))
4995 /* find_barrier had to change the first mova into a
4996 pcload; thus, we have to start with this new pcload. */
4997 insn = mova;
4998 num_mova = 0;
5000 /* Now find all the moves between the points and modify them. */
5001 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5003 if (GET_CODE (scan) == CODE_LABEL)
5004 last_float = 0;
5005 if (GET_CODE (scan) == INSN
5006 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5007 need_aligned_label = 1;
5008 if (broken_move (scan))
5010 rtx *patp = &PATTERN (scan), pat = *patp;
5011 rtx src, dst;
5012 rtx lab;
5013 rtx newsrc;
5014 enum machine_mode mode;
5016 if (GET_CODE (pat) == PARALLEL)
5017 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5018 src = SET_SRC (pat);
5019 dst = SET_DEST (pat);
5020 mode = GET_MODE (dst);
5022 if (mode == SImode && hi_const (src)
5023 && REGNO (dst) != FPUL_REG)
5025 int offset = 0;
5027 mode = HImode;
5028 while (GET_CODE (dst) == SUBREG)
5030 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5031 GET_MODE (SUBREG_REG (dst)),
5032 SUBREG_BYTE (dst),
5033 GET_MODE (dst));
5034 dst = SUBREG_REG (dst);
5036 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5038 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5040 /* This must be an insn that clobbers r0. */
5041 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5042 XVECLEN (PATTERN (scan), 0)
5043 - 1);
5044 rtx clobber = *clobberp;
5046 gcc_assert (GET_CODE (clobber) == CLOBBER
5047 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5049 if (last_float
5050 && reg_set_between_p (r0_rtx, last_float_move, scan))
5051 last_float = 0;
5052 if (last_float
5053 && TARGET_SHCOMPACT
5054 && GET_MODE_SIZE (mode) != 4
5055 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5056 last_float = 0;
5057 lab = add_constant (src, mode, last_float);
5058 if (lab)
5059 emit_insn_before (gen_mova (lab), scan);
5060 else
5062 /* There will be a REG_UNUSED note for r0 on
5063 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5064 lest reorg:mark_target_live_regs will not
5065 consider r0 to be used, and we end up with delay
5066 slot insn in front of SCAN that clobbers r0. */
5067 rtx note
5068 = find_regno_note (last_float_move, REG_UNUSED, 0);
5070 /* If we are not optimizing, then there may not be
5071 a note. */
5072 if (note)
5073 PUT_MODE (note, REG_INC);
5075 *last_float_addr = r0_inc_rtx;
5077 last_float_move = scan;
5078 last_float = src;
5079 newsrc = gen_const_mem (mode,
5080 (((TARGET_SH4 && ! TARGET_FMOVD)
5081 || REGNO (dst) == FPUL_REG)
5082 ? r0_inc_rtx
5083 : r0_rtx));
5084 last_float_addr = &XEXP (newsrc, 0);
5086 /* Remove the clobber of r0. */
5087 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5088 gen_rtx_SCRATCH (Pmode));
5090 /* This is a mova needing a label. Create it. */
5091 else if (GET_CODE (src) == UNSPEC
5092 && XINT (src, 1) == UNSPEC_MOVA
5093 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5095 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5096 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5097 newsrc = gen_rtx_UNSPEC (SImode,
5098 gen_rtvec (1, newsrc),
5099 UNSPEC_MOVA);
5101 else
5103 lab = add_constant (src, mode, 0);
5104 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5105 newsrc = gen_const_mem (mode, newsrc);
5107 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5108 INSN_CODE (scan) = -1;
5111 dump_table (need_aligned_label ? insn : 0, barrier);
5112 insn = barrier;
5115 free_alloc_pool (label_ref_list_pool);
5116 for (insn = first; insn; insn = NEXT_INSN (insn))
5117 PUT_MODE (insn, VOIDmode);
5119 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5120 INSN_ADDRESSES_FREE ();
5121 split_branches (first);
5123 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5124 also has an effect on the register that holds the address of the sfunc.
5125 Insert an extra dummy insn in front of each sfunc that pretends to
5126 use this register. */
5127 if (flag_delayed_branch)
5129 for (insn = first; insn; insn = NEXT_INSN (insn))
5131 rtx reg = sfunc_uses_reg (insn);
5133 if (! reg)
5134 continue;
5135 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5138 #if 0
5139 /* fpscr is not actually a user variable, but we pretend it is for the
5140 sake of the previous optimization passes, since we want it handled like
5141 one. However, we don't have any debugging information for it, so turn
5142 it into a non-user variable now. */
5143 if (TARGET_SH4)
5144 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5145 #endif
5146 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5150 get_dest_uid (rtx label, int max_uid)
5152 rtx dest = next_real_insn (label);
5153 int dest_uid;
5154 if (! dest)
5155 /* This can happen for an undefined label. */
5156 return 0;
5157 dest_uid = INSN_UID (dest);
5158 /* If this is a newly created branch redirection blocking instruction,
5159 we cannot index the branch_uid or insn_addresses arrays with its
5160 uid. But then, we won't need to, because the actual destination is
5161 the following branch. */
5162 while (dest_uid >= max_uid)
5164 dest = NEXT_INSN (dest);
5165 dest_uid = INSN_UID (dest);
5167 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5168 return 0;
5169 return dest_uid;
5172 /* Split condbranches that are out of range. Also add clobbers for
5173 scratch registers that are needed in far jumps.
5174 We do this before delay slot scheduling, so that it can take our
5175 newly created instructions into account. It also allows us to
5176 find branches with common targets more easily. */
5178 static void
5179 split_branches (rtx first)
5181 rtx insn;
5182 struct far_branch **uid_branch, *far_branch_list = 0;
5183 int max_uid = get_max_uid ();
5184 int ok;
5186 /* Find out which branches are out of range. */
5187 shorten_branches (first);
5189 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5190 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5192 for (insn = first; insn; insn = NEXT_INSN (insn))
5193 if (! INSN_P (insn))
5194 continue;
5195 else if (INSN_DELETED_P (insn))
5197 /* Shorten_branches would split this instruction again,
5198 so transform it into a note. */
5199 SET_INSN_DELETED (insn);
5201 else if (GET_CODE (insn) == JUMP_INSN
5202 /* Don't mess with ADDR_DIFF_VEC */
5203 && (GET_CODE (PATTERN (insn)) == SET
5204 || GET_CODE (PATTERN (insn)) == RETURN))
5206 enum attr_type type = get_attr_type (insn);
5207 if (type == TYPE_CBRANCH)
5209 rtx next, beyond;
5211 if (get_attr_length (insn) > 4)
5213 rtx src = SET_SRC (PATTERN (insn));
5214 rtx olabel = XEXP (XEXP (src, 1), 0);
5215 int addr = INSN_ADDRESSES (INSN_UID (insn));
5216 rtx label = 0;
5217 int dest_uid = get_dest_uid (olabel, max_uid);
5218 struct far_branch *bp = uid_branch[dest_uid];
5220 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5221 the label if the LABEL_NUSES count drops to zero. There is
5222 always a jump_optimize pass that sets these values, but it
5223 proceeds to delete unreferenced code, and then if not
5224 optimizing, to un-delete the deleted instructions, thus
5225 leaving labels with too low uses counts. */
5226 if (! optimize)
5228 JUMP_LABEL (insn) = olabel;
5229 LABEL_NUSES (olabel)++;
5231 if (! bp)
5233 bp = (struct far_branch *) alloca (sizeof *bp);
5234 uid_branch[dest_uid] = bp;
5235 bp->prev = far_branch_list;
5236 far_branch_list = bp;
5237 bp->far_label
5238 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5239 LABEL_NUSES (bp->far_label)++;
5241 else
5243 label = bp->near_label;
5244 if (! label && bp->address - addr >= CONDJUMP_MIN)
5246 rtx block = bp->insert_place;
5248 if (GET_CODE (PATTERN (block)) == RETURN)
5249 block = PREV_INSN (block);
5250 else
5251 block = gen_block_redirect (block,
5252 bp->address, 2);
5253 label = emit_label_after (gen_label_rtx (),
5254 PREV_INSN (block));
5255 bp->near_label = label;
5257 else if (label && ! NEXT_INSN (label))
5259 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5260 bp->insert_place = insn;
5261 else
5262 gen_far_branch (bp);
5265 if (! label
5266 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5268 bp->near_label = label = gen_label_rtx ();
5269 bp->insert_place = insn;
5270 bp->address = addr;
5272 ok = redirect_jump (insn, label, 1);
5273 gcc_assert (ok);
5275 else
5277 /* get_attr_length (insn) == 2 */
5278 /* Check if we have a pattern where reorg wants to redirect
5279 the branch to a label from an unconditional branch that
5280 is too far away. */
5281 /* We can't use JUMP_LABEL here because it might be undefined
5282 when not optimizing. */
5283 /* A syntax error might cause beyond to be NULL_RTX. */
5284 beyond
5285 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5286 0));
5288 if (beyond
5289 && (GET_CODE (beyond) == JUMP_INSN
5290 || ((beyond = next_active_insn (beyond))
5291 && GET_CODE (beyond) == JUMP_INSN))
5292 && GET_CODE (PATTERN (beyond)) == SET
5293 && recog_memoized (beyond) == CODE_FOR_jump_compact
5294 && ((INSN_ADDRESSES
5295 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5296 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5297 > 252 + 258 + 2))
5298 gen_block_redirect (beyond,
5299 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5302 next = next_active_insn (insn);
5304 if ((GET_CODE (next) == JUMP_INSN
5305 || ((next = next_active_insn (next))
5306 && GET_CODE (next) == JUMP_INSN))
5307 && GET_CODE (PATTERN (next)) == SET
5308 && recog_memoized (next) == CODE_FOR_jump_compact
5309 && ((INSN_ADDRESSES
5310 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5311 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5312 > 252 + 258 + 2))
5313 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5315 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5317 int addr = INSN_ADDRESSES (INSN_UID (insn));
5318 rtx far_label = 0;
5319 int dest_uid = 0;
5320 struct far_branch *bp;
5322 if (type == TYPE_JUMP)
5324 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5325 dest_uid = get_dest_uid (far_label, max_uid);
5326 if (! dest_uid)
5328 /* Parse errors can lead to labels outside
5329 the insn stream. */
5330 if (! NEXT_INSN (far_label))
5331 continue;
5333 if (! optimize)
5335 JUMP_LABEL (insn) = far_label;
5336 LABEL_NUSES (far_label)++;
5338 redirect_jump (insn, NULL_RTX, 1);
5339 far_label = 0;
5342 bp = uid_branch[dest_uid];
5343 if (! bp)
5345 bp = (struct far_branch *) alloca (sizeof *bp);
5346 uid_branch[dest_uid] = bp;
5347 bp->prev = far_branch_list;
5348 far_branch_list = bp;
5349 bp->near_label = 0;
5350 bp->far_label = far_label;
5351 if (far_label)
5352 LABEL_NUSES (far_label)++;
5354 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5355 if (addr - bp->address <= CONDJUMP_MAX)
5356 emit_label_after (bp->near_label, PREV_INSN (insn));
5357 else
5359 gen_far_branch (bp);
5360 bp->near_label = 0;
5362 else
5363 bp->near_label = 0;
5364 bp->address = addr;
5365 bp->insert_place = insn;
5366 if (! far_label)
5367 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5368 else
5369 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5372 /* Generate all pending far branches,
5373 and free our references to the far labels. */
5374 while (far_branch_list)
5376 if (far_branch_list->near_label
5377 && ! NEXT_INSN (far_branch_list->near_label))
5378 gen_far_branch (far_branch_list);
5379 if (optimize
5380 && far_branch_list->far_label
5381 && ! --LABEL_NUSES (far_branch_list->far_label))
5382 delete_insn (far_branch_list->far_label);
5383 far_branch_list = far_branch_list->prev;
5386 /* Instruction length information is no longer valid due to the new
5387 instructions that have been generated. */
5388 init_insn_lengths ();
5391 /* Dump out instruction addresses, which is useful for debugging the
5392 constant pool table stuff.
5394 If relaxing, output the label and pseudo-ops used to link together
5395 calls and the instruction which set the registers. */
5397 /* ??? The addresses printed by this routine for insns are nonsense for
5398 insns which are inside of a sequence where none of the inner insns have
5399 variable length. This is because the second pass of shorten_branches
5400 does not bother to update them. */
5402 void
5403 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5404 int noperands ATTRIBUTE_UNUSED)
5406 if (TARGET_DUMPISIZE)
5407 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5409 if (TARGET_RELAX)
5411 rtx note;
5413 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5414 if (note)
5416 rtx pattern;
5418 pattern = PATTERN (insn);
5419 if (GET_CODE (pattern) == PARALLEL)
5420 pattern = XVECEXP (pattern, 0, 0);
5421 switch (GET_CODE (pattern))
5423 case SET:
5424 if (GET_CODE (SET_SRC (pattern)) != CALL
5425 && get_attr_type (insn) != TYPE_SFUNC)
5427 targetm.asm_out.internal_label
5428 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5429 break;
5431 /* else FALLTHROUGH */
5432 case CALL:
5433 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5434 CODE_LABEL_NUMBER (XEXP (note, 0)));
5435 break;
5437 default:
5438 gcc_unreachable ();
5444 /* Dump out any constants accumulated in the final pass. These will
5445 only be labels. */
5447 const char *
5448 output_jump_label_table (void)
5450 int i;
5452 if (pool_size)
5454 fprintf (asm_out_file, "\t.align 2\n");
5455 for (i = 0; i < pool_size; i++)
5457 pool_node *p = &pool_vector[i];
5459 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5460 CODE_LABEL_NUMBER (p->label));
5461 output_asm_insn (".long %O0", &p->value);
5463 pool_size = 0;
5466 return "";
5469 /* A full frame looks like:
5471 arg-5
5472 arg-4
5473 [ if current_function_anonymous_args
5474 arg-3
5475 arg-2
5476 arg-1
5477 arg-0 ]
5478 saved-fp
5479 saved-r10
5480 saved-r11
5481 saved-r12
5482 saved-pr
5483 local-n
5485 local-1
5486 local-0 <- fp points here. */
5488 /* Number of bytes pushed for anonymous args, used to pass information
5489 between expand_prologue and expand_epilogue. */
5491 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5492 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5493 for an epilogue and a negative value means that it's for a sibcall
5494 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5495 all the registers that are about to be restored, and hence dead. */
5497 static void
5498 output_stack_adjust (int size, rtx reg, int epilogue_p,
5499 HARD_REG_SET *live_regs_mask)
5501 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5502 if (size)
5504 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5506 /* This test is bogus, as output_stack_adjust is used to re-align the
5507 stack. */
5508 #if 0
5509 gcc_assert (!(size % align));
5510 #endif
5512 if (CONST_OK_FOR_ADD (size))
5513 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5514 /* Try to do it with two partial adjustments; however, we must make
5515 sure that the stack is properly aligned at all times, in case
5516 an interrupt occurs between the two partial adjustments. */
5517 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5518 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5520 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5521 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5523 else
5525 rtx const_reg;
5526 rtx insn;
5527 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5528 int i;
5530 /* If TEMP is invalid, we could temporarily save a general
5531 register to MACL. However, there is currently no need
5532 to handle this case, so just die when we see it. */
5533 if (epilogue_p < 0
5534 || current_function_interrupt
5535 || ! call_really_used_regs[temp] || fixed_regs[temp])
5536 temp = -1;
5537 if (temp < 0 && ! current_function_interrupt
5538 && (TARGET_SHMEDIA || epilogue_p >= 0))
5540 HARD_REG_SET temps;
5541 COPY_HARD_REG_SET (temps, call_used_reg_set);
5542 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5543 if (epilogue_p > 0)
5545 int nreg = 0;
5546 if (current_function_return_rtx)
5548 enum machine_mode mode;
5549 mode = GET_MODE (current_function_return_rtx);
5550 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5551 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5553 for (i = 0; i < nreg; i++)
5554 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5555 if (current_function_calls_eh_return)
5557 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5558 for (i = 0; i <= 3; i++)
5559 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5562 if (TARGET_SHMEDIA && epilogue_p < 0)
5563 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5564 CLEAR_HARD_REG_BIT (temps, i);
5565 if (epilogue_p <= 0)
5567 for (i = FIRST_PARM_REG;
5568 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5569 CLEAR_HARD_REG_BIT (temps, i);
5570 if (cfun->static_chain_decl != NULL)
5571 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5573 temp = scavenge_reg (&temps);
5575 if (temp < 0 && live_regs_mask)
5577 HARD_REG_SET temps;
5579 COPY_HARD_REG_SET (temps, *live_regs_mask);
5580 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5581 temp = scavenge_reg (&temps);
5583 if (temp < 0)
5585 rtx adj_reg, tmp_reg, mem;
5587 /* If we reached here, the most likely case is the (sibcall)
5588 epilogue for non SHmedia. Put a special push/pop sequence
5589 for such case as the last resort. This looks lengthy but
5590 would not be problem because it seems to be very
5591 rare. */
5593 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5596 /* ??? There is still the slight possibility that r4 or
5597 r5 have been reserved as fixed registers or assigned
5598 as global registers, and they change during an
5599 interrupt. There are possible ways to handle this:
5601 - If we are adjusting the frame pointer (r14), we can do
5602 with a single temp register and an ordinary push / pop
5603 on the stack.
5604 - Grab any call-used or call-saved registers (i.e. not
5605 fixed or globals) for the temps we need. We might
5606 also grab r14 if we are adjusting the stack pointer.
5607 If we can't find enough available registers, issue
5608 a diagnostic and die - the user must have reserved
5609 way too many registers.
5610 But since all this is rather unlikely to happen and
5611 would require extra testing, we just die if r4 / r5
5612 are not available. */
5613 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5614 && !global_regs[4] && !global_regs[5]);
5616 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5617 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5618 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5619 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5620 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5621 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5622 emit_move_insn (mem, tmp_reg);
5623 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5624 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5625 emit_move_insn (mem, tmp_reg);
5626 emit_move_insn (reg, adj_reg);
5627 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5628 emit_move_insn (adj_reg, mem);
5629 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5630 emit_move_insn (tmp_reg, mem);
5631 /* Tell flow the insns that pop r4/r5 aren't dead. */
5632 emit_insn (gen_rtx_USE (VOIDmode, tmp_reg));
5633 emit_insn (gen_rtx_USE (VOIDmode, adj_reg));
5634 return;
5636 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5638 /* If SIZE is negative, subtract the positive value.
5639 This sometimes allows a constant pool entry to be shared
5640 between prologue and epilogue code. */
5641 if (size < 0)
5643 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5644 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5646 else
5648 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5649 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5651 if (! epilogue_p)
5652 REG_NOTES (insn)
5653 = (gen_rtx_EXPR_LIST
5654 (REG_FRAME_RELATED_EXPR,
5655 gen_rtx_SET (VOIDmode, reg,
5656 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5657 REG_NOTES (insn)));
5662 static rtx
5663 frame_insn (rtx x)
5665 x = emit_insn (x);
5666 RTX_FRAME_RELATED_P (x) = 1;
5667 return x;
5670 /* Output RTL to push register RN onto the stack. */
5672 static rtx
5673 push (int rn)
5675 rtx x;
5676 if (rn == FPUL_REG)
5677 x = gen_push_fpul ();
5678 else if (rn == FPSCR_REG)
5679 x = gen_push_fpscr ();
5680 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5681 && FP_OR_XD_REGISTER_P (rn))
5683 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5684 return NULL_RTX;
5685 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5687 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5688 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5689 else
5690 x = gen_push (gen_rtx_REG (SImode, rn));
5692 x = frame_insn (x);
5693 REG_NOTES (x)
5694 = gen_rtx_EXPR_LIST (REG_INC,
5695 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5696 return x;
5699 /* Output RTL to pop register RN from the stack. */
5701 static void
5702 pop (int rn)
5704 rtx x;
5705 if (rn == FPUL_REG)
5706 x = gen_pop_fpul ();
5707 else if (rn == FPSCR_REG)
5708 x = gen_pop_fpscr ();
5709 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5710 && FP_OR_XD_REGISTER_P (rn))
5712 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5713 return;
5714 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5716 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5717 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5718 else
5719 x = gen_pop (gen_rtx_REG (SImode, rn));
5721 x = emit_insn (x);
5722 REG_NOTES (x)
5723 = gen_rtx_EXPR_LIST (REG_INC,
5724 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5727 /* Generate code to push the regs specified in the mask. */
5729 static void
5730 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5732 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5733 int skip_fpscr = 0;
5735 /* Push PR last; this gives better latencies after the prologue, and
5736 candidates for the return delay slot when there are no general
5737 registers pushed. */
5738 for (; i < FIRST_PSEUDO_REGISTER; i++)
5740 /* If this is an interrupt handler, and the SZ bit varies,
5741 and we have to push any floating point register, we need
5742 to switch to the correct precision first. */
5743 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5744 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5746 HARD_REG_SET unsaved;
5748 push (FPSCR_REG);
5749 COMPL_HARD_REG_SET (unsaved, *mask);
5750 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5751 skip_fpscr = 1;
5753 if (i != PR_REG
5754 && (i != FPSCR_REG || ! skip_fpscr)
5755 && TEST_HARD_REG_BIT (*mask, i))
5756 push (i);
5759 /* Push banked registers last to improve delay slot opportunities. */
5760 if (interrupt_handler)
5761 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5762 if (TEST_HARD_REG_BIT (*mask, i))
5763 push (i);
5765 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5766 push (PR_REG);
5769 /* Calculate how much extra space is needed to save all callee-saved
5770 target registers.
5771 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5773 static int
5774 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5776 int reg;
5777 int stack_space = 0;
5778 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5780 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5781 if ((! call_really_used_regs[reg] || interrupt_handler)
5782 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5783 /* Leave space to save this target register on the stack,
5784 in case target register allocation wants to use it. */
5785 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5786 return stack_space;
5789 /* Decide whether we should reserve space for callee-save target registers,
5790 in case target register allocation wants to use them. REGS_SAVED is
5791 the space, in bytes, that is already required for register saves.
5792 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5794 static int
5795 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5796 HARD_REG_SET *live_regs_mask)
5798 if (optimize_size)
5799 return 0;
5800 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5803 /* Decide how much space to reserve for callee-save target registers
5804 in case target register allocation wants to use them.
5805 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5807 static int
5808 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5810 if (shmedia_space_reserved_for_target_registers)
5811 return shmedia_target_regs_stack_space (live_regs_mask);
5812 else
5813 return 0;
5816 /* Work out the registers which need to be saved, both as a mask and a
5817 count of saved words. Return the count.
5819 If doing a pragma interrupt function, then push all regs used by the
5820 function, and if we call another function (we can tell by looking at PR),
5821 make sure that all the regs it clobbers are safe too. */
5823 static int
5824 calc_live_regs (HARD_REG_SET *live_regs_mask)
5826 unsigned int reg;
5827 int count;
5828 tree attrs;
5829 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5830 bool nosave_low_regs;
5831 int pr_live, has_call;
5833 attrs = DECL_ATTRIBUTES (current_function_decl);
5834 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5835 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5836 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5837 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5839 CLEAR_HARD_REG_SET (*live_regs_mask);
5840 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5841 && df_regs_ever_live_p (FPSCR_REG))
5842 target_flags &= ~MASK_FPU_SINGLE;
5843 /* If we can save a lot of saves by switching to double mode, do that. */
5844 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5845 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5846 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
5847 && (! call_really_used_regs[reg]
5848 || interrupt_handler)
5849 && ++count > 2)
5851 target_flags &= ~MASK_FPU_SINGLE;
5852 break;
5854 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5855 knows how to use it. That means the pseudo originally allocated for
5856 the initial value can become the PR_MEDIA_REG hard register, as seen for
5857 execute/20010122-1.c:test9. */
5858 if (TARGET_SHMEDIA)
5859 /* ??? this function is called from initial_elimination_offset, hence we
5860 can't use the result of sh_media_register_for_return here. */
5861 pr_live = sh_pr_n_sets ();
5862 else
5864 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5865 pr_live = (pr_initial
5866 ? (GET_CODE (pr_initial) != REG
5867 || REGNO (pr_initial) != (PR_REG))
5868 : df_regs_ever_live_p (PR_REG));
5869 /* For Shcompact, if not optimizing, we end up with a memory reference
5870 using the return address pointer for __builtin_return_address even
5871 though there is no actual need to put the PR register on the stack. */
5872 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
5874 /* Force PR to be live if the prologue has to call the SHmedia
5875 argument decoder or register saver. */
5876 if (TARGET_SHCOMPACT
5877 && ((current_function_args_info.call_cookie
5878 & ~ CALL_COOKIE_RET_TRAMP (1))
5879 || current_function_saves_all_registers))
5880 pr_live = 1;
5881 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5882 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5884 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5885 ? pr_live
5886 : interrupt_handler
5887 ? (/* Need to save all the regs ever live. */
5888 (df_regs_ever_live_p (reg)
5889 || (call_really_used_regs[reg]
5890 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5891 || reg == PIC_OFFSET_TABLE_REGNUM)
5892 && has_call)
5893 || (TARGET_SHMEDIA && has_call
5894 && REGISTER_NATURAL_MODE (reg) == SImode
5895 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5896 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5897 && reg != RETURN_ADDRESS_POINTER_REGNUM
5898 && reg != T_REG && reg != GBR_REG
5899 /* Push fpscr only on targets which have FPU */
5900 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5901 : (/* Only push those regs which are used and need to be saved. */
5902 (TARGET_SHCOMPACT
5903 && flag_pic
5904 && current_function_args_info.call_cookie
5905 && reg == PIC_OFFSET_TABLE_REGNUM)
5906 || (df_regs_ever_live_p (reg)
5907 && (!call_really_used_regs[reg]
5908 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5909 || (current_function_calls_eh_return
5910 && (reg == EH_RETURN_DATA_REGNO (0)
5911 || reg == EH_RETURN_DATA_REGNO (1)
5912 || reg == EH_RETURN_DATA_REGNO (2)
5913 || reg == EH_RETURN_DATA_REGNO (3)))
5914 || ((reg == MACL_REG || reg == MACH_REG)
5915 && df_regs_ever_live_p (reg)
5916 && sh_cfun_attr_renesas_p ())
5919 SET_HARD_REG_BIT (*live_regs_mask, reg);
5920 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5922 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5923 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5925 if (FP_REGISTER_P (reg))
5927 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
5929 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5930 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5933 else if (XD_REGISTER_P (reg))
5935 /* Must switch to double mode to access these registers. */
5936 target_flags &= ~MASK_FPU_SINGLE;
5940 if (nosave_low_regs && reg == R8_REG)
5941 break;
5943 /* If we have a target register optimization pass after prologue / epilogue
5944 threading, we need to assume all target registers will be live even if
5945 they aren't now. */
5946 if (flag_branch_target_load_optimize2
5947 && TARGET_SAVE_ALL_TARGET_REGS
5948 && shmedia_space_reserved_for_target_registers)
5949 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5950 if ((! call_really_used_regs[reg] || interrupt_handler)
5951 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5953 SET_HARD_REG_BIT (*live_regs_mask, reg);
5954 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5956 /* If this is an interrupt handler, we don't have any call-clobbered
5957 registers we can conveniently use for target register save/restore.
5958 Make sure we save at least one general purpose register when we need
5959 to save target registers. */
5960 if (interrupt_handler
5961 && hard_reg_set_intersect_p (*live_regs_mask,
5962 reg_class_contents[TARGET_REGS])
5963 && ! hard_reg_set_intersect_p (*live_regs_mask,
5964 reg_class_contents[GENERAL_REGS]))
5966 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5967 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5970 return count;
5973 /* Code to generate prologue and epilogue sequences */
5975 /* PUSHED is the number of bytes that are being pushed on the
5976 stack for register saves. Return the frame size, padded
5977 appropriately so that the stack stays properly aligned. */
5978 static HOST_WIDE_INT
5979 rounded_frame_size (int pushed)
5981 HOST_WIDE_INT size = get_frame_size ();
5982 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5984 return ((size + pushed + align - 1) & -align) - pushed;
5987 /* Choose a call-clobbered target-branch register that remains
5988 unchanged along the whole function. We set it up as the return
5989 value in the prologue. */
5991 sh_media_register_for_return (void)
5993 int regno;
5994 int tr0_used;
5996 if (! current_function_is_leaf)
5997 return -1;
5998 if (lookup_attribute ("interrupt_handler",
5999 DECL_ATTRIBUTES (current_function_decl)))
6000 return -1;
6001 if (sh_cfun_interrupt_handler_p ())
6002 return -1;
6004 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6006 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6007 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6008 return regno;
6010 return -1;
6013 /* The maximum registers we need to save are:
6014 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6015 - 32 floating point registers (for each pair, we save none,
6016 one single precision value, or a double precision value).
6017 - 8 target registers
6018 - add 1 entry for a delimiter. */
6019 #define MAX_SAVED_REGS (62+32+8)
6021 typedef struct save_entry_s
6023 unsigned char reg;
6024 unsigned char mode;
6025 short offset;
6026 } save_entry;
6028 #define MAX_TEMPS 4
6030 /* There will be a delimiter entry with VOIDmode both at the start and the
6031 end of a filled in schedule. The end delimiter has the offset of the
6032 save with the smallest (i.e. most negative) offset. */
6033 typedef struct save_schedule_s
6035 save_entry entries[MAX_SAVED_REGS + 2];
6036 int temps[MAX_TEMPS+1];
6037 } save_schedule;
6039 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6040 use reverse order. Returns the last entry written to (not counting
6041 the delimiter). OFFSET_BASE is a number to be added to all offset
6042 entries. */
6044 static save_entry *
6045 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6046 int offset_base)
6048 int align, i;
6049 save_entry *entry = schedule->entries;
6050 int tmpx = 0;
6051 int offset;
6053 if (! current_function_interrupt)
6054 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6055 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6056 && ! FUNCTION_ARG_REGNO_P (i)
6057 && i != FIRST_RET_REG
6058 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6059 && ! (current_function_calls_eh_return
6060 && (i == EH_RETURN_STACKADJ_REGNO
6061 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6062 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6063 schedule->temps[tmpx++] = i;
6064 entry->reg = -1;
6065 entry->mode = VOIDmode;
6066 entry->offset = offset_base;
6067 entry++;
6068 /* We loop twice: first, we save 8-byte aligned registers in the
6069 higher addresses, that are known to be aligned. Then, we
6070 proceed to saving 32-bit registers that don't need 8-byte
6071 alignment.
6072 If this is an interrupt function, all registers that need saving
6073 need to be saved in full. moreover, we need to postpone saving
6074 target registers till we have saved some general purpose registers
6075 we can then use as scratch registers. */
6076 offset = offset_base;
6077 for (align = 1; align >= 0; align--)
6079 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6080 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6082 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6083 int reg = i;
6085 if (current_function_interrupt)
6087 if (TARGET_REGISTER_P (i))
6088 continue;
6089 if (GENERAL_REGISTER_P (i))
6090 mode = DImode;
6092 if (mode == SFmode && (i % 2) == 1
6093 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6094 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6096 mode = DFmode;
6097 i--;
6098 reg--;
6101 /* If we're doing the aligned pass and this is not aligned,
6102 or we're doing the unaligned pass and this is aligned,
6103 skip it. */
6104 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6105 != align)
6106 continue;
6108 if (current_function_interrupt
6109 && GENERAL_REGISTER_P (i)
6110 && tmpx < MAX_TEMPS)
6111 schedule->temps[tmpx++] = i;
6113 offset -= GET_MODE_SIZE (mode);
6114 entry->reg = i;
6115 entry->mode = mode;
6116 entry->offset = offset;
6117 entry++;
6119 if (align && current_function_interrupt)
6120 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6121 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6123 offset -= GET_MODE_SIZE (DImode);
6124 entry->reg = i;
6125 entry->mode = DImode;
6126 entry->offset = offset;
6127 entry++;
6130 entry->reg = -1;
6131 entry->mode = VOIDmode;
6132 entry->offset = offset;
6133 schedule->temps[tmpx] = -1;
6134 return entry - 1;
6137 void
6138 sh_expand_prologue (void)
6140 HARD_REG_SET live_regs_mask;
6141 int d, i;
6142 int d_rounding = 0;
6143 int save_flags = target_flags;
6144 int pretend_args;
6145 tree sp_switch_attr
6146 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6148 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6150 /* We have pretend args if we had an object sent partially in registers
6151 and partially on the stack, e.g. a large structure. */
6152 pretend_args = current_function_pretend_args_size;
6153 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6154 && (NPARM_REGS(SImode)
6155 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
6156 pretend_args = 0;
6157 output_stack_adjust (-pretend_args
6158 - current_function_args_info.stack_regs * 8,
6159 stack_pointer_rtx, 0, NULL);
6161 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
6162 /* We're going to use the PIC register to load the address of the
6163 incoming-argument decoder and/or of the return trampoline from
6164 the GOT, so make sure the PIC register is preserved and
6165 initialized. */
6166 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6168 if (TARGET_SHCOMPACT
6169 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6171 int reg;
6173 /* First, make all registers with incoming arguments that will
6174 be pushed onto the stack live, so that register renaming
6175 doesn't overwrite them. */
6176 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6177 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
6178 >= NPARM_REGS (SImode) - reg)
6179 for (; reg < NPARM_REGS (SImode); reg++)
6180 emit_insn (gen_shcompact_preserve_incoming_args
6181 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6182 else if (CALL_COOKIE_INT_REG_GET
6183 (current_function_args_info.call_cookie, reg) == 1)
6184 emit_insn (gen_shcompact_preserve_incoming_args
6185 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6187 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6188 stack_pointer_rtx);
6189 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6190 GEN_INT (current_function_args_info.call_cookie));
6191 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6192 gen_rtx_REG (SImode, R0_REG));
6194 else if (TARGET_SHMEDIA)
6196 int tr = sh_media_register_for_return ();
6198 if (tr >= 0)
6199 emit_move_insn (gen_rtx_REG (DImode, tr),
6200 gen_rtx_REG (DImode, PR_MEDIA_REG));
6203 /* Emit the code for SETUP_VARARGS. */
6204 if (current_function_stdarg)
6206 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6208 /* Push arg regs as if they'd been provided by caller in stack. */
6209 for (i = 0; i < NPARM_REGS(SImode); i++)
6211 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6212 rtx insn;
6214 if (i >= (NPARM_REGS(SImode)
6215 - current_function_args_info.arg_count[(int) SH_ARG_INT]
6217 break;
6218 insn = push (rn);
6219 RTX_FRAME_RELATED_P (insn) = 0;
6224 /* If we're supposed to switch stacks at function entry, do so now. */
6225 if (sp_switch_attr)
6227 /* The argument specifies a variable holding the address of the
6228 stack the interrupt function should switch to/from at entry/exit. */
6229 const char *s
6230 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6231 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6233 emit_insn (gen_sp_switch_1 (sp_switch));
6236 d = calc_live_regs (&live_regs_mask);
6237 /* ??? Maybe we could save some switching if we can move a mode switch
6238 that already happens to be at the function start into the prologue. */
6239 if (target_flags != save_flags && ! current_function_interrupt)
6240 emit_insn (gen_toggle_sz ());
6242 if (TARGET_SH5)
6244 int offset_base, offset;
6245 rtx r0 = NULL_RTX;
6246 int offset_in_r0 = -1;
6247 int sp_in_r0 = 0;
6248 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6249 int total_size, save_size;
6250 save_schedule schedule;
6251 save_entry *entry;
6252 int *tmp_pnt;
6254 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6255 && ! current_function_interrupt)
6256 r0 = gen_rtx_REG (Pmode, R0_REG);
6258 /* D is the actual number of bytes that we need for saving registers,
6259 however, in initial_elimination_offset we have committed to using
6260 an additional TREGS_SPACE amount of bytes - in order to keep both
6261 addresses to arguments supplied by the caller and local variables
6262 valid, we must keep this gap. Place it between the incoming
6263 arguments and the actually saved registers in a bid to optimize
6264 locality of reference. */
6265 total_size = d + tregs_space;
6266 total_size += rounded_frame_size (total_size);
6267 save_size = total_size - rounded_frame_size (d);
6268 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6269 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6270 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6272 /* If adjusting the stack in a single step costs nothing extra, do so.
6273 I.e. either if a single addi is enough, or we need a movi anyway,
6274 and we don't exceed the maximum offset range (the test for the
6275 latter is conservative for simplicity). */
6276 if (TARGET_SHMEDIA
6277 && (CONST_OK_FOR_I10 (-total_size)
6278 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6279 && total_size <= 2044)))
6280 d_rounding = total_size - save_size;
6282 offset_base = d + d_rounding;
6284 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6285 0, NULL);
6287 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6288 tmp_pnt = schedule.temps;
6289 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6291 enum machine_mode mode = entry->mode;
6292 unsigned int reg = entry->reg;
6293 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6294 rtx orig_reg_rtx;
6296 offset = entry->offset;
6298 reg_rtx = gen_rtx_REG (mode, reg);
6300 mem_rtx = gen_frame_mem (mode,
6301 gen_rtx_PLUS (Pmode,
6302 stack_pointer_rtx,
6303 GEN_INT (offset)));
6305 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6307 gcc_assert (r0);
6308 mem_rtx = NULL_RTX;
6310 try_pre_dec:
6312 if (HAVE_PRE_DECREMENT
6313 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6314 || mem_rtx == NULL_RTX
6315 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6317 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6319 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6320 pre_dec_ok);
6322 pre_dec = NULL_RTX;
6324 break;
6326 pre_dec_ok:
6327 mem_rtx = NULL_RTX;
6328 offset += GET_MODE_SIZE (mode);
6330 while (0);
6332 if (mem_rtx != NULL_RTX)
6333 goto addr_ok;
6335 if (offset_in_r0 == -1)
6337 emit_move_insn (r0, GEN_INT (offset));
6338 offset_in_r0 = offset;
6340 else if (offset != offset_in_r0)
6342 emit_move_insn (r0,
6343 gen_rtx_PLUS
6344 (Pmode, r0,
6345 GEN_INT (offset - offset_in_r0)));
6346 offset_in_r0 += offset - offset_in_r0;
6349 if (pre_dec != NULL_RTX)
6351 if (! sp_in_r0)
6353 emit_move_insn (r0,
6354 gen_rtx_PLUS
6355 (Pmode, r0, stack_pointer_rtx));
6356 sp_in_r0 = 1;
6359 offset -= GET_MODE_SIZE (mode);
6360 offset_in_r0 -= GET_MODE_SIZE (mode);
6362 mem_rtx = pre_dec;
6364 else if (sp_in_r0)
6365 mem_rtx = gen_frame_mem (mode, r0);
6366 else
6367 mem_rtx = gen_frame_mem (mode,
6368 gen_rtx_PLUS (Pmode,
6369 stack_pointer_rtx,
6370 r0));
6372 /* We must not use an r0-based address for target-branch
6373 registers or for special registers without pre-dec
6374 memory addresses, since we store their values in r0
6375 first. */
6376 gcc_assert (!TARGET_REGISTER_P (reg)
6377 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6378 || mem_rtx == pre_dec));
6380 addr_ok:
6381 orig_reg_rtx = reg_rtx;
6382 if (TARGET_REGISTER_P (reg)
6383 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6384 && mem_rtx != pre_dec))
6386 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6388 emit_move_insn (tmp_reg, reg_rtx);
6390 if (REGNO (tmp_reg) == R0_REG)
6392 offset_in_r0 = -1;
6393 sp_in_r0 = 0;
6394 gcc_assert (!refers_to_regno_p
6395 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6398 if (*++tmp_pnt <= 0)
6399 tmp_pnt = schedule.temps;
6401 reg_rtx = tmp_reg;
6404 rtx insn;
6406 /* Mark as interesting for dwarf cfi generator */
6407 insn = emit_move_insn (mem_rtx, reg_rtx);
6408 RTX_FRAME_RELATED_P (insn) = 1;
6409 /* If we use an intermediate register for the save, we can't
6410 describe this exactly in cfi as a copy of the to-be-saved
6411 register into the temporary register and then the temporary
6412 register on the stack, because the temporary register can
6413 have a different natural size than the to-be-saved register.
6414 Thus, we gloss over the intermediate copy and pretend we do
6415 a direct save from the to-be-saved register. */
6416 if (REGNO (reg_rtx) != reg)
6418 rtx set, note_rtx;
6420 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6421 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6422 REG_NOTES (insn));
6423 REG_NOTES (insn) = note_rtx;
6426 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6428 rtx reg_rtx = gen_rtx_REG (mode, reg);
6429 rtx set, note_rtx;
6430 rtx mem_rtx = gen_frame_mem (mode,
6431 gen_rtx_PLUS (Pmode,
6432 stack_pointer_rtx,
6433 GEN_INT (offset)));
6435 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6436 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6437 REG_NOTES (insn));
6438 REG_NOTES (insn) = note_rtx;
6443 gcc_assert (entry->offset == d_rounding);
6445 else
6446 push_regs (&live_regs_mask, current_function_interrupt);
6448 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6449 emit_insn (gen_GOTaddr2picreg ());
6451 if (SHMEDIA_REGS_STACK_ADJUST ())
6453 /* This must NOT go through the PLT, otherwise mach and macl
6454 may be clobbered. */
6455 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6456 (TARGET_FPU_ANY
6457 ? "__GCC_push_shmedia_regs"
6458 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6459 emit_insn (gen_shmedia_save_restore_regs_compact
6460 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6463 if (target_flags != save_flags && ! current_function_interrupt)
6464 emit_insn (gen_toggle_sz ());
6466 target_flags = save_flags;
6468 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6469 stack_pointer_rtx, 0, NULL);
6471 if (frame_pointer_needed)
6472 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6474 if (TARGET_SHCOMPACT
6475 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6477 /* This must NOT go through the PLT, otherwise mach and macl
6478 may be clobbered. */
6479 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6480 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6481 emit_insn (gen_shcompact_incoming_args ());
6485 void
6486 sh_expand_epilogue (bool sibcall_p)
6488 HARD_REG_SET live_regs_mask;
6489 int d, i;
6490 int d_rounding = 0;
6492 int save_flags = target_flags;
6493 int frame_size, save_size;
6494 int fpscr_deferred = 0;
6495 int e = sibcall_p ? -1 : 1;
6497 d = calc_live_regs (&live_regs_mask);
6499 save_size = d;
6500 frame_size = rounded_frame_size (d);
6502 if (TARGET_SH5)
6504 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6505 int total_size;
6506 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6507 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6508 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6510 total_size = d + tregs_space;
6511 total_size += rounded_frame_size (total_size);
6512 save_size = total_size - frame_size;
6514 /* If adjusting the stack in a single step costs nothing extra, do so.
6515 I.e. either if a single addi is enough, or we need a movi anyway,
6516 and we don't exceed the maximum offset range (the test for the
6517 latter is conservative for simplicity). */
6518 if (TARGET_SHMEDIA
6519 && ! frame_pointer_needed
6520 && (CONST_OK_FOR_I10 (total_size)
6521 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6522 && total_size <= 2044)))
6523 d_rounding = frame_size;
6525 frame_size -= d_rounding;
6528 if (frame_pointer_needed)
6530 /* We must avoid scheduling the epilogue with previous basic blocks
6531 when exception handling is enabled. See PR/18032. */
6532 if (flag_exceptions)
6533 emit_insn (gen_blockage ());
6534 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6535 &live_regs_mask);
6537 /* We must avoid moving the stack pointer adjustment past code
6538 which reads from the local frame, else an interrupt could
6539 occur after the SP adjustment and clobber data in the local
6540 frame. */
6541 emit_insn (gen_blockage ());
6542 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6544 else if (frame_size)
6546 /* We must avoid moving the stack pointer adjustment past code
6547 which reads from the local frame, else an interrupt could
6548 occur after the SP adjustment and clobber data in the local
6549 frame. */
6550 emit_insn (gen_blockage ());
6551 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6554 if (SHMEDIA_REGS_STACK_ADJUST ())
6556 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6557 (TARGET_FPU_ANY
6558 ? "__GCC_pop_shmedia_regs"
6559 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6560 /* This must NOT go through the PLT, otherwise mach and macl
6561 may be clobbered. */
6562 emit_insn (gen_shmedia_save_restore_regs_compact
6563 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6566 /* Pop all the registers. */
6568 if (target_flags != save_flags && ! current_function_interrupt)
6569 emit_insn (gen_toggle_sz ());
6570 if (TARGET_SH5)
6572 int offset_base, offset;
6573 int offset_in_r0 = -1;
6574 int sp_in_r0 = 0;
6575 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6576 save_schedule schedule;
6577 save_entry *entry;
6578 int *tmp_pnt;
6580 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6581 offset_base = -entry[1].offset + d_rounding;
6582 tmp_pnt = schedule.temps;
6583 for (; entry->mode != VOIDmode; entry--)
6585 enum machine_mode mode = entry->mode;
6586 int reg = entry->reg;
6587 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6589 offset = offset_base + entry->offset;
6590 reg_rtx = gen_rtx_REG (mode, reg);
6592 mem_rtx = gen_frame_mem (mode,
6593 gen_rtx_PLUS (Pmode,
6594 stack_pointer_rtx,
6595 GEN_INT (offset)));
6597 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6599 mem_rtx = NULL_RTX;
6601 try_post_inc:
6603 if (HAVE_POST_INCREMENT
6604 && (offset == offset_in_r0
6605 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6606 && mem_rtx == NULL_RTX)
6607 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6609 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6611 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6612 post_inc_ok);
6614 post_inc = NULL_RTX;
6616 break;
6618 post_inc_ok:
6619 mem_rtx = NULL_RTX;
6621 while (0);
6623 if (mem_rtx != NULL_RTX)
6624 goto addr_ok;
6626 if (offset_in_r0 == -1)
6628 emit_move_insn (r0, GEN_INT (offset));
6629 offset_in_r0 = offset;
6631 else if (offset != offset_in_r0)
6633 emit_move_insn (r0,
6634 gen_rtx_PLUS
6635 (Pmode, r0,
6636 GEN_INT (offset - offset_in_r0)));
6637 offset_in_r0 += offset - offset_in_r0;
6640 if (post_inc != NULL_RTX)
6642 if (! sp_in_r0)
6644 emit_move_insn (r0,
6645 gen_rtx_PLUS
6646 (Pmode, r0, stack_pointer_rtx));
6647 sp_in_r0 = 1;
6650 mem_rtx = post_inc;
6652 offset_in_r0 += GET_MODE_SIZE (mode);
6654 else if (sp_in_r0)
6655 mem_rtx = gen_frame_mem (mode, r0);
6656 else
6657 mem_rtx = gen_frame_mem (mode,
6658 gen_rtx_PLUS (Pmode,
6659 stack_pointer_rtx,
6660 r0));
6662 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6663 || mem_rtx == post_inc);
6665 addr_ok:
6666 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6667 && mem_rtx != post_inc)
6669 insn = emit_move_insn (r0, mem_rtx);
6670 mem_rtx = r0;
6672 else if (TARGET_REGISTER_P (reg))
6674 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6676 /* Give the scheduler a bit of freedom by using up to
6677 MAX_TEMPS registers in a round-robin fashion. */
6678 insn = emit_move_insn (tmp_reg, mem_rtx);
6679 mem_rtx = tmp_reg;
6680 if (*++tmp_pnt < 0)
6681 tmp_pnt = schedule.temps;
6684 insn = emit_move_insn (reg_rtx, mem_rtx);
6687 gcc_assert (entry->offset + offset_base == d + d_rounding);
6689 else /* ! TARGET_SH5 */
6691 int last_reg;
6693 save_size = 0;
6694 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6696 if (!frame_pointer_needed)
6697 emit_insn (gen_blockage ());
6698 pop (PR_REG);
6701 /* Banked registers are poped first to avoid being scheduled in the
6702 delay slot. RTE switches banks before the ds instruction. */
6703 if (current_function_interrupt)
6705 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6706 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6707 pop (LAST_BANKED_REG - i);
6709 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6711 else
6712 last_reg = FIRST_PSEUDO_REGISTER;
6714 for (i = 0; i < last_reg; i++)
6716 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6718 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6719 && hard_reg_set_intersect_p (live_regs_mask,
6720 reg_class_contents[DF_REGS]))
6721 fpscr_deferred = 1;
6722 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6723 pop (j);
6725 if (j == FIRST_FP_REG && fpscr_deferred)
6726 pop (FPSCR_REG);
6729 if (target_flags != save_flags && ! current_function_interrupt)
6730 emit_insn (gen_toggle_sz ());
6731 target_flags = save_flags;
6733 output_stack_adjust (current_function_pretend_args_size
6734 + save_size + d_rounding
6735 + current_function_args_info.stack_regs * 8,
6736 stack_pointer_rtx, e, NULL);
6738 if (current_function_calls_eh_return)
6739 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6740 EH_RETURN_STACKADJ_RTX));
6742 /* Switch back to the normal stack if necessary. */
6743 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6744 emit_insn (gen_sp_switch_2 ());
6746 /* Tell flow the insn that pops PR isn't dead. */
6747 /* PR_REG will never be live in SHmedia mode, and we don't need to
6748 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6749 by the return pattern. */
6750 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6751 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6754 static int sh_need_epilogue_known = 0;
6757 sh_need_epilogue (void)
6759 if (! sh_need_epilogue_known)
6761 rtx epilogue;
6763 start_sequence ();
6764 sh_expand_epilogue (0);
6765 epilogue = get_insns ();
6766 end_sequence ();
6767 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6769 return sh_need_epilogue_known > 0;
6772 /* Emit code to change the current function's return address to RA.
6773 TEMP is available as a scratch register, if needed. */
6775 void
6776 sh_set_return_address (rtx ra, rtx tmp)
6778 HARD_REG_SET live_regs_mask;
6779 int d;
6780 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6781 int pr_offset;
6783 d = calc_live_regs (&live_regs_mask);
6785 /* If pr_reg isn't life, we can set it (or the register given in
6786 sh_media_register_for_return) directly. */
6787 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6789 rtx rr;
6791 if (TARGET_SHMEDIA)
6793 int rr_regno = sh_media_register_for_return ();
6795 if (rr_regno < 0)
6796 rr_regno = pr_reg;
6798 rr = gen_rtx_REG (DImode, rr_regno);
6800 else
6801 rr = gen_rtx_REG (SImode, pr_reg);
6803 emit_insn (GEN_MOV (rr, ra));
6804 /* Tell flow the register for return isn't dead. */
6805 emit_insn (gen_rtx_USE (VOIDmode, rr));
6806 return;
6809 if (TARGET_SH5)
6811 int offset;
6812 save_schedule schedule;
6813 save_entry *entry;
6815 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6816 offset = entry[1].offset;
6817 for (; entry->mode != VOIDmode; entry--)
6818 if (entry->reg == pr_reg)
6819 goto found;
6821 /* We can't find pr register. */
6822 gcc_unreachable ();
6824 found:
6825 offset = entry->offset - offset;
6826 pr_offset = (rounded_frame_size (d) + offset
6827 + SHMEDIA_REGS_STACK_ADJUST ());
6829 else
6830 pr_offset = rounded_frame_size (d);
6832 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6833 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6835 tmp = gen_frame_mem (Pmode, tmp);
6836 emit_insn (GEN_MOV (tmp, ra));
6839 /* Clear variables at function end. */
6841 static void
6842 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6843 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6845 sh_need_epilogue_known = 0;
6848 static rtx
6849 sh_builtin_saveregs (void)
6851 /* First unnamed integer register. */
6852 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6853 /* Number of integer registers we need to save. */
6854 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6855 /* First unnamed SFmode float reg */
6856 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6857 /* Number of SFmode float regs to save. */
6858 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6859 rtx regbuf, fpregs;
6860 int bufsize, regno;
6861 alias_set_type alias_set;
6863 if (TARGET_SH5)
6865 if (n_intregs)
6867 int pushregs = n_intregs;
6869 while (pushregs < NPARM_REGS (SImode) - 1
6870 && (CALL_COOKIE_INT_REG_GET
6871 (current_function_args_info.call_cookie,
6872 NPARM_REGS (SImode) - pushregs)
6873 == 1))
6875 current_function_args_info.call_cookie
6876 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6877 - pushregs, 1);
6878 pushregs++;
6881 if (pushregs == NPARM_REGS (SImode))
6882 current_function_args_info.call_cookie
6883 |= (CALL_COOKIE_INT_REG (0, 1)
6884 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6885 else
6886 current_function_args_info.call_cookie
6887 |= CALL_COOKIE_STACKSEQ (pushregs);
6889 current_function_pretend_args_size += 8 * n_intregs;
6891 if (TARGET_SHCOMPACT)
6892 return const0_rtx;
6895 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6897 error ("__builtin_saveregs not supported by this subtarget");
6898 return const0_rtx;
6901 if (TARGET_SHMEDIA)
6902 n_floatregs = 0;
6904 /* Allocate block of memory for the regs. */
6905 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6906 Or can assign_stack_local accept a 0 SIZE argument? */
6907 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6909 if (TARGET_SHMEDIA)
6910 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6911 else if (n_floatregs & 1)
6913 rtx addr;
6915 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6916 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6917 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6918 regbuf = change_address (regbuf, BLKmode, addr);
6920 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6922 rtx addr, mask;
6924 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6925 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6926 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6927 emit_insn (gen_andsi3 (addr, addr, mask));
6928 regbuf = change_address (regbuf, BLKmode, addr);
6930 else
6931 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6932 alias_set = get_varargs_alias_set ();
6933 set_mem_alias_set (regbuf, alias_set);
6935 /* Save int args.
6936 This is optimized to only save the regs that are necessary. Explicitly
6937 named args need not be saved. */
6938 if (n_intregs > 0)
6939 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6940 adjust_address (regbuf, BLKmode,
6941 n_floatregs * UNITS_PER_WORD),
6942 n_intregs);
6944 if (TARGET_SHMEDIA)
6945 /* Return the address of the regbuf. */
6946 return XEXP (regbuf, 0);
6948 /* Save float args.
6949 This is optimized to only save the regs that are necessary. Explicitly
6950 named args need not be saved.
6951 We explicitly build a pointer to the buffer because it halves the insn
6952 count when not optimizing (otherwise the pointer is built for each reg
6953 saved).
6954 We emit the moves in reverse order so that we can use predecrement. */
6956 fpregs = copy_to_mode_reg (Pmode,
6957 plus_constant (XEXP (regbuf, 0),
6958 n_floatregs * UNITS_PER_WORD));
6959 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6961 rtx mem;
6962 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6964 emit_insn (gen_addsi3 (fpregs, fpregs,
6965 GEN_INT (-2 * UNITS_PER_WORD)));
6966 mem = change_address (regbuf, DFmode, fpregs);
6967 emit_move_insn (mem,
6968 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6970 regno = first_floatreg;
6971 if (regno & 1)
6973 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6974 mem = change_address (regbuf, SFmode, fpregs);
6975 emit_move_insn (mem,
6976 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6977 - (TARGET_LITTLE_ENDIAN != 0)));
6980 else
6981 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6983 rtx mem;
6985 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6986 mem = change_address (regbuf, SFmode, fpregs);
6987 emit_move_insn (mem,
6988 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6991 /* Return the address of the regbuf. */
6992 return XEXP (regbuf, 0);
6995 /* Define the `__builtin_va_list' type for the ABI. */
6997 static tree
6998 sh_build_builtin_va_list (void)
7000 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7001 tree record;
7003 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7004 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7005 return ptr_type_node;
7007 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7009 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7010 ptr_type_node);
7011 f_next_o_limit = build_decl (FIELD_DECL,
7012 get_identifier ("__va_next_o_limit"),
7013 ptr_type_node);
7014 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7015 ptr_type_node);
7016 f_next_fp_limit = build_decl (FIELD_DECL,
7017 get_identifier ("__va_next_fp_limit"),
7018 ptr_type_node);
7019 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7020 ptr_type_node);
7022 DECL_FIELD_CONTEXT (f_next_o) = record;
7023 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7024 DECL_FIELD_CONTEXT (f_next_fp) = record;
7025 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7026 DECL_FIELD_CONTEXT (f_next_stack) = record;
7028 TYPE_FIELDS (record) = f_next_o;
7029 TREE_CHAIN (f_next_o) = f_next_o_limit;
7030 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7031 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7032 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7034 layout_type (record);
7036 return record;
7039 /* Implement `va_start' for varargs and stdarg. */
7041 static void
7042 sh_va_start (tree valist, rtx nextarg)
7044 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7045 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7046 tree t, u;
7047 int nfp, nint;
7049 if (TARGET_SH5)
7051 expand_builtin_saveregs ();
7052 std_expand_builtin_va_start (valist, nextarg);
7053 return;
7056 if ((! TARGET_SH2E && ! TARGET_SH4)
7057 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7059 std_expand_builtin_va_start (valist, nextarg);
7060 return;
7063 f_next_o = TYPE_FIELDS (va_list_type_node);
7064 f_next_o_limit = TREE_CHAIN (f_next_o);
7065 f_next_fp = TREE_CHAIN (f_next_o_limit);
7066 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7067 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7069 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7070 NULL_TREE);
7071 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7072 valist, f_next_o_limit, NULL_TREE);
7073 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7074 NULL_TREE);
7075 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7076 valist, f_next_fp_limit, NULL_TREE);
7077 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7078 valist, f_next_stack, NULL_TREE);
7080 /* Call __builtin_saveregs. */
7081 u = make_tree (sizetype, expand_builtin_saveregs ());
7082 u = fold_convert (ptr_type_node, u);
7083 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp, u);
7084 TREE_SIDE_EFFECTS (t) = 1;
7085 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7087 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
7088 if (nfp < 8)
7089 nfp = 8 - nfp;
7090 else
7091 nfp = 0;
7092 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7093 size_int (UNITS_PER_WORD * nfp));
7094 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_limit, u);
7095 TREE_SIDE_EFFECTS (t) = 1;
7096 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7098 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o, u);
7099 TREE_SIDE_EFFECTS (t) = 1;
7100 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7102 nint = current_function_args_info.arg_count[SH_ARG_INT];
7103 if (nint < 4)
7104 nint = 4 - nint;
7105 else
7106 nint = 0;
7107 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7108 size_int (UNITS_PER_WORD * nint));
7109 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o_limit, u);
7110 TREE_SIDE_EFFECTS (t) = 1;
7111 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7113 u = make_tree (ptr_type_node, nextarg);
7114 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_stack, u);
7115 TREE_SIDE_EFFECTS (t) = 1;
7116 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7119 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7120 member, return it. */
7121 static tree
7122 find_sole_member (tree type)
7124 tree field, member = NULL_TREE;
7126 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7128 if (TREE_CODE (field) != FIELD_DECL)
7129 continue;
7130 if (!DECL_SIZE (field))
7131 return NULL_TREE;
7132 if (integer_zerop (DECL_SIZE (field)))
7133 continue;
7134 if (member)
7135 return NULL_TREE;
7136 member = field;
7138 return member;
7140 /* Implement `va_arg'. */
7142 static tree
7143 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
7144 tree *post_p ATTRIBUTE_UNUSED)
7146 HOST_WIDE_INT size, rsize;
7147 tree tmp, pptr_type_node;
7148 tree addr, lab_over = NULL, result = NULL;
7149 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7150 tree eff_type;
7152 if (pass_by_ref)
7153 type = build_pointer_type (type);
7155 size = int_size_in_bytes (type);
7156 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7157 pptr_type_node = build_pointer_type (ptr_type_node);
7159 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7160 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7162 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7163 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7164 int pass_as_float;
7165 tree lab_false;
7166 tree member;
7168 f_next_o = TYPE_FIELDS (va_list_type_node);
7169 f_next_o_limit = TREE_CHAIN (f_next_o);
7170 f_next_fp = TREE_CHAIN (f_next_o_limit);
7171 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7172 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7174 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7175 NULL_TREE);
7176 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7177 valist, f_next_o_limit, NULL_TREE);
7178 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7179 valist, f_next_fp, NULL_TREE);
7180 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7181 valist, f_next_fp_limit, NULL_TREE);
7182 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7183 valist, f_next_stack, NULL_TREE);
7185 /* Structures with a single member with a distinct mode are passed
7186 like their member. This is relevant if the latter has a REAL_TYPE
7187 or COMPLEX_TYPE type. */
7188 eff_type = type;
7189 while (TREE_CODE (eff_type) == RECORD_TYPE
7190 && (member = find_sole_member (eff_type))
7191 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7192 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7193 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7195 tree field_type = TREE_TYPE (member);
7197 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7198 eff_type = field_type;
7199 else
7201 gcc_assert ((TYPE_ALIGN (eff_type)
7202 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7203 || (TYPE_ALIGN (eff_type)
7204 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7205 break;
7209 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7211 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7212 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7213 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7214 && size <= 16));
7216 else
7218 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7221 addr = create_tmp_var (pptr_type_node, NULL);
7222 lab_false = create_artificial_label ();
7223 lab_over = create_artificial_label ();
7225 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7227 if (pass_as_float)
7229 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7230 tree cmp;
7231 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7233 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
7234 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7235 gimplify_and_add (tmp, pre_p);
7237 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7238 gimplify_and_add (tmp, pre_p);
7239 tmp = next_fp_limit;
7240 if (size > 4 && !is_double)
7241 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), tmp,
7242 size_int (4 - size));
7243 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
7244 cmp = build3 (COND_EXPR, void_type_node, tmp,
7245 build1 (GOTO_EXPR, void_type_node, lab_false),
7246 NULL_TREE);
7247 if (!is_double)
7248 gimplify_and_add (cmp, pre_p);
7250 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7251 || (is_double || size == 16))
7253 tmp = fold_convert (sizetype, next_fp_tmp);
7254 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7255 size_int (UNITS_PER_WORD));
7256 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7257 next_fp_tmp, tmp);
7258 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7259 next_fp_tmp, tmp);
7260 gimplify_and_add (tmp, pre_p);
7262 if (is_double)
7263 gimplify_and_add (cmp, pre_p);
7265 #ifdef FUNCTION_ARG_SCmode_WART
7266 if (TYPE_MODE (eff_type) == SCmode
7267 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7269 tree subtype = TREE_TYPE (eff_type);
7270 tree real, imag;
7272 imag
7273 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7274 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7276 real
7277 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7278 real = get_initialized_tmp_var (real, pre_p, NULL);
7280 result = build2 (COMPLEX_EXPR, type, real, imag);
7281 result = get_initialized_tmp_var (result, pre_p, NULL);
7283 #endif /* FUNCTION_ARG_SCmode_WART */
7285 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7286 gimplify_and_add (tmp, pre_p);
7288 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7289 gimplify_and_add (tmp, pre_p);
7291 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7292 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7293 gimplify_and_add (tmp, pre_p);
7294 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7295 gimplify_and_add (tmp, pre_p);
7297 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, valist, next_fp_tmp);
7298 gimplify_and_add (tmp, post_p);
7299 valist = next_fp_tmp;
7301 else
7303 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, next_o,
7304 size_int (rsize));
7305 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7306 tmp = build3 (COND_EXPR, void_type_node, tmp,
7307 build1 (GOTO_EXPR, void_type_node, lab_false),
7308 NULL_TREE);
7309 gimplify_and_add (tmp, pre_p);
7311 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7312 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7313 gimplify_and_add (tmp, pre_p);
7315 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7316 gimplify_and_add (tmp, pre_p);
7318 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7319 gimplify_and_add (tmp, pre_p);
7321 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7323 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7324 next_o, next_o_limit);
7325 gimplify_and_add (tmp, pre_p);
7328 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7329 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7330 gimplify_and_add (tmp, pre_p);
7333 if (!result)
7335 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7336 gimplify_and_add (tmp, pre_p);
7340 /* ??? In va-sh.h, there had been code to make values larger than
7341 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7343 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7344 if (result)
7346 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, result, tmp);
7347 gimplify_and_add (tmp, pre_p);
7349 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7350 gimplify_and_add (tmp, pre_p);
7352 else
7353 result = tmp;
7355 if (pass_by_ref)
7356 result = build_va_arg_indirect_ref (result);
7358 return result;
7361 bool
7362 sh_promote_prototypes (const_tree type)
7364 if (TARGET_HITACHI)
7365 return 0;
7366 if (! type)
7367 return 1;
7368 return ! sh_attr_renesas_p (type);
7371 /* Whether an argument must be passed by reference. On SHcompact, we
7372 pretend arguments wider than 32-bits that would have been passed in
7373 registers are passed by reference, so that an SHmedia trampoline
7374 loads them into the full 64-bits registers. */
7376 static int
7377 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7378 const_tree type, bool named)
7380 unsigned HOST_WIDE_INT size;
7382 if (type)
7383 size = int_size_in_bytes (type);
7384 else
7385 size = GET_MODE_SIZE (mode);
7387 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7388 && (!named
7389 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7390 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7391 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7392 && size > 4
7393 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7394 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7395 return size;
7396 else
7397 return 0;
7400 static bool
7401 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7402 const_tree type, bool named)
7404 if (targetm.calls.must_pass_in_stack (mode, type))
7405 return true;
7407 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7408 wants to know about pass-by-reference semantics for incoming
7409 arguments. */
7410 if (! cum)
7411 return false;
7413 if (TARGET_SHCOMPACT)
7415 cum->byref = shcompact_byref (cum, mode, type, named);
7416 return cum->byref != 0;
7419 return false;
7422 static bool
7423 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7424 const_tree type, bool named ATTRIBUTE_UNUSED)
7426 /* ??? How can it possibly be correct to return true only on the
7427 caller side of the equation? Is there someplace else in the
7428 sh backend that's magically producing the copies? */
7429 return (cum->outgoing
7430 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7431 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7434 static int
7435 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7436 tree type, bool named ATTRIBUTE_UNUSED)
7438 int words = 0;
7440 if (!TARGET_SH5
7441 && PASS_IN_REG_P (*cum, mode, type)
7442 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7443 && (ROUND_REG (*cum, mode)
7444 + (mode != BLKmode
7445 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7446 : ROUND_ADVANCE (int_size_in_bytes (type)))
7447 > NPARM_REGS (mode)))
7448 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7450 else if (!TARGET_SHCOMPACT
7451 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7452 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7454 return words * UNITS_PER_WORD;
7458 /* Define where to put the arguments to a function.
7459 Value is zero to push the argument on the stack,
7460 or a hard register in which to store the argument.
7462 MODE is the argument's machine mode.
7463 TYPE is the data type of the argument (as a tree).
7464 This is null for libcalls where that information may
7465 not be available.
7466 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7467 the preceding args and about the function being called.
7468 NAMED is nonzero if this argument is a named parameter
7469 (otherwise it is an extra parameter matching an ellipsis).
7471 On SH the first args are normally in registers
7472 and the rest are pushed. Any arg that starts within the first
7473 NPARM_REGS words is at least partially passed in a register unless
7474 its data type forbids. */
7478 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7479 tree type, int named)
7481 if (! TARGET_SH5 && mode == VOIDmode)
7482 return GEN_INT (ca->renesas_abi ? 1 : 0);
7484 if (! TARGET_SH5
7485 && PASS_IN_REG_P (*ca, mode, type)
7486 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7488 int regno;
7490 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7491 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7493 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7494 gen_rtx_REG (SFmode,
7495 BASE_ARG_REG (mode)
7496 + (ROUND_REG (*ca, mode) ^ 1)),
7497 const0_rtx);
7498 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7499 gen_rtx_REG (SFmode,
7500 BASE_ARG_REG (mode)
7501 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7502 GEN_INT (4));
7503 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7506 /* If the alignment of a DF value causes an SF register to be
7507 skipped, we will use that skipped register for the next SF
7508 value. */
7509 if ((TARGET_HITACHI || ca->renesas_abi)
7510 && ca->free_single_fp_reg
7511 && mode == SFmode)
7512 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7514 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7515 ^ (mode == SFmode && TARGET_SH4
7516 && TARGET_LITTLE_ENDIAN != 0
7517 && ! TARGET_HITACHI && ! ca->renesas_abi);
7518 return gen_rtx_REG (mode, regno);
7522 if (TARGET_SH5)
7524 if (mode == VOIDmode && TARGET_SHCOMPACT)
7525 return GEN_INT (ca->call_cookie);
7527 /* The following test assumes unnamed arguments are promoted to
7528 DFmode. */
7529 if (mode == SFmode && ca->free_single_fp_reg)
7530 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7532 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7533 && (named || ! ca->prototype_p)
7534 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7536 if (! ca->prototype_p && TARGET_SHMEDIA)
7537 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7539 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7540 FIRST_FP_PARM_REG
7541 + ca->arg_count[(int) SH_ARG_FLOAT]);
7544 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7545 && (! TARGET_SHCOMPACT
7546 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7547 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7548 type, named))))
7550 return gen_rtx_REG (mode, (FIRST_PARM_REG
7551 + ca->arg_count[(int) SH_ARG_INT]));
7554 return 0;
7557 return 0;
7560 /* Update the data in CUM to advance over an argument
7561 of mode MODE and data type TYPE.
7562 (TYPE is null for libcalls where that information may not be
7563 available.) */
7565 void
7566 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7567 tree type, int named)
7569 if (ca->force_mem)
7570 ca->force_mem = 0;
7571 else if (TARGET_SH5)
7573 tree type2 = (ca->byref && type
7574 ? TREE_TYPE (type)
7575 : type);
7576 enum machine_mode mode2 = (ca->byref && type
7577 ? TYPE_MODE (type2)
7578 : mode);
7579 int dwords = ((ca->byref
7580 ? ca->byref
7581 : mode2 == BLKmode
7582 ? int_size_in_bytes (type2)
7583 : GET_MODE_SIZE (mode2)) + 7) / 8;
7584 int numregs = MIN (dwords, NPARM_REGS (SImode)
7585 - ca->arg_count[(int) SH_ARG_INT]);
7587 if (numregs)
7589 ca->arg_count[(int) SH_ARG_INT] += numregs;
7590 if (TARGET_SHCOMPACT
7591 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7593 ca->call_cookie
7594 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7595 - numregs, 1);
7596 /* N.B. We want this also for outgoing. */
7597 ca->stack_regs += numregs;
7599 else if (ca->byref)
7601 if (! ca->outgoing)
7602 ca->stack_regs += numregs;
7603 ca->byref_regs += numregs;
7604 ca->byref = 0;
7606 ca->call_cookie
7607 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7608 - numregs, 2);
7609 while (--numregs);
7610 ca->call_cookie
7611 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7612 - 1, 1);
7614 else if (dwords > numregs)
7616 int pushregs = numregs;
7618 if (TARGET_SHCOMPACT)
7619 ca->stack_regs += numregs;
7620 while (pushregs < NPARM_REGS (SImode) - 1
7621 && (CALL_COOKIE_INT_REG_GET
7622 (ca->call_cookie,
7623 NPARM_REGS (SImode) - pushregs)
7624 == 1))
7626 ca->call_cookie
7627 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7628 - pushregs, 1);
7629 pushregs++;
7631 if (numregs == NPARM_REGS (SImode))
7632 ca->call_cookie
7633 |= CALL_COOKIE_INT_REG (0, 1)
7634 | CALL_COOKIE_STACKSEQ (numregs - 1);
7635 else
7636 ca->call_cookie
7637 |= CALL_COOKIE_STACKSEQ (numregs);
7640 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7641 && (named || ! ca->prototype_p))
7643 if (mode2 == SFmode && ca->free_single_fp_reg)
7644 ca->free_single_fp_reg = 0;
7645 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7646 < NPARM_REGS (SFmode))
7648 int numfpregs
7649 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7650 NPARM_REGS (SFmode)
7651 - ca->arg_count[(int) SH_ARG_FLOAT]);
7653 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7655 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7657 if (ca->outgoing && numregs > 0)
7660 ca->call_cookie
7661 |= (CALL_COOKIE_INT_REG
7662 (ca->arg_count[(int) SH_ARG_INT]
7663 - numregs + ((numfpregs - 2) / 2),
7664 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7665 - numfpregs) / 2));
7667 while (numfpregs -= 2);
7669 else if (mode2 == SFmode && (named)
7670 && (ca->arg_count[(int) SH_ARG_FLOAT]
7671 < NPARM_REGS (SFmode)))
7672 ca->free_single_fp_reg
7673 = FIRST_FP_PARM_REG - numfpregs
7674 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7677 return;
7680 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7682 /* Note that we've used the skipped register. */
7683 if (mode == SFmode && ca->free_single_fp_reg)
7685 ca->free_single_fp_reg = 0;
7686 return;
7688 /* When we have a DF after an SF, there's an SF register that get
7689 skipped in order to align the DF value. We note this skipped
7690 register, because the next SF value will use it, and not the
7691 SF that follows the DF. */
7692 if (mode == DFmode
7693 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7695 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7696 + BASE_ARG_REG (mode));
7700 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7701 || PASS_IN_REG_P (*ca, mode, type))
7702 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7703 = (ROUND_REG (*ca, mode)
7704 + (mode == BLKmode
7705 ? ROUND_ADVANCE (int_size_in_bytes (type))
7706 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7709 /* The Renesas calling convention doesn't quite fit into this scheme since
7710 the address is passed like an invisible argument, but one that is always
7711 passed in memory. */
7712 static rtx
7713 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7715 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7716 return 0;
7717 return gen_rtx_REG (Pmode, 2);
7720 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7722 static bool
7723 sh_return_in_memory (const_tree type, const_tree fndecl)
7725 if (TARGET_SH5)
7727 if (TYPE_MODE (type) == BLKmode)
7728 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7729 else
7730 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7732 else
7734 return (TYPE_MODE (type) == BLKmode
7735 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7736 && TREE_CODE (type) == RECORD_TYPE));
7740 /* We actually emit the code in sh_expand_prologue. We used to use
7741 a static variable to flag that we need to emit this code, but that
7742 doesn't when inlining, when functions are deferred and then emitted
7743 later. Fortunately, we already have two flags that are part of struct
7744 function that tell if a function uses varargs or stdarg. */
7745 static void
7746 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7747 enum machine_mode mode,
7748 tree type,
7749 int *pretend_arg_size,
7750 int second_time ATTRIBUTE_UNUSED)
7752 gcc_assert (current_function_stdarg);
7753 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7755 int named_parm_regs, anon_parm_regs;
7757 named_parm_regs = (ROUND_REG (*ca, mode)
7758 + (mode == BLKmode
7759 ? ROUND_ADVANCE (int_size_in_bytes (type))
7760 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7761 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7762 if (anon_parm_regs > 0)
7763 *pretend_arg_size = anon_parm_regs * 4;
7767 static bool
7768 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7770 return TARGET_SH5;
7773 static bool
7774 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7776 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7780 /* Define the offset between two registers, one to be eliminated, and
7781 the other its replacement, at the start of a routine. */
7784 initial_elimination_offset (int from, int to)
7786 int regs_saved;
7787 int regs_saved_rounding = 0;
7788 int total_saved_regs_space;
7789 int total_auto_space;
7790 int save_flags = target_flags;
7791 int copy_flags;
7792 HARD_REG_SET live_regs_mask;
7794 shmedia_space_reserved_for_target_registers = false;
7795 regs_saved = calc_live_regs (&live_regs_mask);
7796 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7798 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7800 shmedia_space_reserved_for_target_registers = true;
7801 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7804 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7805 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7806 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7808 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7809 copy_flags = target_flags;
7810 target_flags = save_flags;
7812 total_saved_regs_space = regs_saved + regs_saved_rounding;
7814 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7815 return total_saved_regs_space + total_auto_space
7816 + current_function_args_info.byref_regs * 8;
7818 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7819 return total_saved_regs_space + total_auto_space
7820 + current_function_args_info.byref_regs * 8;
7822 /* Initial gap between fp and sp is 0. */
7823 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7824 return 0;
7826 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7827 return rounded_frame_size (0);
7829 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7830 return rounded_frame_size (0);
7832 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7833 && (to == HARD_FRAME_POINTER_REGNUM
7834 || to == STACK_POINTER_REGNUM));
7835 if (TARGET_SH5)
7837 int n = total_saved_regs_space;
7838 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7839 save_schedule schedule;
7840 save_entry *entry;
7842 n += total_auto_space;
7844 /* If it wasn't saved, there's not much we can do. */
7845 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7846 return n;
7848 target_flags = copy_flags;
7850 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7851 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7852 if (entry->reg == pr_reg)
7854 target_flags = save_flags;
7855 return entry->offset;
7857 gcc_unreachable ();
7859 else
7860 return total_auto_space;
7863 /* Insert any deferred function attributes from earlier pragmas. */
7864 static void
7865 sh_insert_attributes (tree node, tree *attributes)
7867 tree attrs;
7869 if (TREE_CODE (node) != FUNCTION_DECL)
7870 return;
7872 /* We are only interested in fields. */
7873 if (!DECL_P (node))
7874 return;
7876 /* Append the attributes to the deferred attributes. */
7877 *sh_deferred_function_attributes_tail = *attributes;
7878 attrs = sh_deferred_function_attributes;
7879 if (!attrs)
7880 return;
7882 /* Some attributes imply or require the interrupt attribute. */
7883 if (!lookup_attribute ("interrupt_handler", attrs)
7884 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7886 /* If we have a trapa_handler, but no interrupt_handler attribute,
7887 insert an interrupt_handler attribute. */
7888 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7889 /* We can't use sh_pr_interrupt here because that's not in the
7890 java frontend. */
7891 attrs
7892 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7893 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7894 interrupt attribute is missing, we ignore the attribute and warn. */
7895 else if (lookup_attribute ("sp_switch", attrs)
7896 || lookup_attribute ("trap_exit", attrs)
7897 || lookup_attribute ("nosave_low_regs", attrs))
7899 tree *tail;
7901 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7903 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7904 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7905 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7906 warning (OPT_Wattributes,
7907 "%qs attribute only applies to interrupt functions",
7908 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7909 else
7911 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7912 NULL_TREE);
7913 tail = &TREE_CHAIN (*tail);
7916 attrs = *attributes;
7920 /* Install the processed list. */
7921 *attributes = attrs;
7923 /* Clear deferred attributes. */
7924 sh_deferred_function_attributes = NULL_TREE;
7925 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7927 return;
7930 /* Supported attributes:
7932 interrupt_handler -- specifies this function is an interrupt handler.
7934 trapa_handler - like above, but don't save all registers.
7936 sp_switch -- specifies an alternate stack for an interrupt handler
7937 to run on.
7939 trap_exit -- use a trapa to exit an interrupt function instead of
7940 an rte instruction.
7942 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7943 This is useful on the SH3 and upwards,
7944 which has a separate set of low regs for User and Supervisor modes.
7945 This should only be used for the lowest level of interrupts. Higher levels
7946 of interrupts must save the registers in case they themselves are
7947 interrupted.
7949 renesas -- use Renesas calling/layout conventions (functions and
7950 structures).
7954 const struct attribute_spec sh_attribute_table[] =
7956 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7957 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7958 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7959 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7960 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7961 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7962 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7963 #ifdef SYMBIAN
7964 /* Symbian support adds three new attributes:
7965 dllexport - for exporting a function/variable that will live in a dll
7966 dllimport - for importing a function/variable from a dll
7968 Microsoft allows multiple declspecs in one __declspec, separating
7969 them with spaces. We do NOT support this. Instead, use __declspec
7970 multiple times. */
7971 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7972 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7973 #endif
7974 { NULL, 0, 0, false, false, false, NULL }
7977 /* Handle an "interrupt_handler" attribute; arguments as in
7978 struct attribute_spec.handler. */
7979 static tree
7980 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7981 tree args ATTRIBUTE_UNUSED,
7982 int flags ATTRIBUTE_UNUSED,
7983 bool *no_add_attrs)
7985 if (TREE_CODE (*node) != FUNCTION_DECL)
7987 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7988 IDENTIFIER_POINTER (name));
7989 *no_add_attrs = true;
7991 else if (TARGET_SHCOMPACT)
7993 error ("attribute interrupt_handler is not compatible with -m5-compact");
7994 *no_add_attrs = true;
7997 return NULL_TREE;
8000 /* Handle an "sp_switch" attribute; arguments as in
8001 struct attribute_spec.handler. */
8002 static tree
8003 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8004 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8006 if (TREE_CODE (*node) != FUNCTION_DECL)
8008 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8009 IDENTIFIER_POINTER (name));
8010 *no_add_attrs = true;
8012 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8014 /* The argument must be a constant string. */
8015 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8016 IDENTIFIER_POINTER (name));
8017 *no_add_attrs = true;
8020 return NULL_TREE;
8023 /* Handle an "trap_exit" attribute; arguments as in
8024 struct attribute_spec.handler. */
8025 static tree
8026 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8027 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8029 if (TREE_CODE (*node) != FUNCTION_DECL)
8031 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8032 IDENTIFIER_POINTER (name));
8033 *no_add_attrs = true;
8035 /* The argument specifies a trap number to be used in a trapa instruction
8036 at function exit (instead of an rte instruction). */
8037 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8039 /* The argument must be a constant integer. */
8040 warning (OPT_Wattributes, "%qs attribute argument not an "
8041 "integer constant", IDENTIFIER_POINTER (name));
8042 *no_add_attrs = true;
8045 return NULL_TREE;
8048 static tree
8049 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8050 tree name ATTRIBUTE_UNUSED,
8051 tree args ATTRIBUTE_UNUSED,
8052 int flags ATTRIBUTE_UNUSED,
8053 bool *no_add_attrs ATTRIBUTE_UNUSED)
8055 return NULL_TREE;
8058 /* True if __attribute__((renesas)) or -mrenesas. */
8060 sh_attr_renesas_p (const_tree td)
8062 if (TARGET_HITACHI)
8063 return 1;
8064 if (td == 0)
8065 return 0;
8066 if (DECL_P (td))
8067 td = TREE_TYPE (td);
8068 if (td == error_mark_node)
8069 return 0;
8070 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8071 != NULL_TREE);
8074 /* True if __attribute__((renesas)) or -mrenesas, for the current
8075 function. */
8077 sh_cfun_attr_renesas_p (void)
8079 return sh_attr_renesas_p (current_function_decl);
8083 sh_cfun_interrupt_handler_p (void)
8085 return (lookup_attribute ("interrupt_handler",
8086 DECL_ATTRIBUTES (current_function_decl))
8087 != NULL_TREE);
8090 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8092 static const char *
8093 sh_check_pch_target_flags (int old_flags)
8095 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8096 | MASK_SH_E | MASK_HARD_SH4
8097 | MASK_FPU_SINGLE | MASK_SH4))
8098 return _("created and used with different architectures / ABIs");
8099 if ((old_flags ^ target_flags) & MASK_HITACHI)
8100 return _("created and used with different ABIs");
8101 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8102 return _("created and used with different endianness");
8103 return NULL;
8106 /* Predicates used by the templates. */
8108 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8109 Used only in general_movsrc_operand. */
8112 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8114 switch (REGNO (op))
8116 case PR_REG:
8117 case MACL_REG:
8118 case MACH_REG:
8119 return 1;
8121 return 0;
8124 /* Nonzero if OP is a floating point value with value 0.0. */
8127 fp_zero_operand (rtx op)
8129 REAL_VALUE_TYPE r;
8131 if (GET_MODE (op) != SFmode)
8132 return 0;
8134 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8135 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8138 /* Nonzero if OP is a floating point value with value 1.0. */
8141 fp_one_operand (rtx op)
8143 REAL_VALUE_TYPE r;
8145 if (GET_MODE (op) != SFmode)
8146 return 0;
8148 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8149 return REAL_VALUES_EQUAL (r, dconst1);
8152 /* For -m4 and -m4-single-only, mode switching is used. If we are
8153 compiling without -mfmovd, movsf_ie isn't taken into account for
8154 mode switching. We could check in machine_dependent_reorg for
8155 cases where we know we are in single precision mode, but there is
8156 interface to find that out during reload, so we must avoid
8157 choosing an fldi alternative during reload and thus failing to
8158 allocate a scratch register for the constant loading. */
8160 fldi_ok (void)
8162 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8166 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8168 enum rtx_code code = GET_CODE (op);
8169 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8172 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8174 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8176 if (GET_CODE (op) != SYMBOL_REF)
8177 return 0;
8178 return SYMBOL_REF_TLS_MODEL (op);
8181 /* Return the destination address of a branch. */
8183 static int
8184 branch_dest (rtx branch)
8186 rtx dest = SET_SRC (PATTERN (branch));
8187 int dest_uid;
8189 if (GET_CODE (dest) == IF_THEN_ELSE)
8190 dest = XEXP (dest, 1);
8191 dest = XEXP (dest, 0);
8192 dest_uid = INSN_UID (dest);
8193 return INSN_ADDRESSES (dest_uid);
8196 /* Return nonzero if REG is not used after INSN.
8197 We assume REG is a reload reg, and therefore does
8198 not live past labels. It may live past calls or jumps though. */
8200 reg_unused_after (rtx reg, rtx insn)
8202 enum rtx_code code;
8203 rtx set;
8205 /* If the reg is set by this instruction, then it is safe for our
8206 case. Disregard the case where this is a store to memory, since
8207 we are checking a register used in the store address. */
8208 set = single_set (insn);
8209 if (set && GET_CODE (SET_DEST (set)) != MEM
8210 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8211 return 1;
8213 while ((insn = NEXT_INSN (insn)))
8215 rtx set;
8216 if (!INSN_P (insn))
8217 continue;
8219 code = GET_CODE (insn);
8221 #if 0
8222 /* If this is a label that existed before reload, then the register
8223 if dead here. However, if this is a label added by reorg, then
8224 the register may still be live here. We can't tell the difference,
8225 so we just ignore labels completely. */
8226 if (code == CODE_LABEL)
8227 return 1;
8228 /* else */
8229 #endif
8231 if (code == JUMP_INSN)
8232 return 0;
8234 /* If this is a sequence, we must handle them all at once.
8235 We could have for instance a call that sets the target register,
8236 and an insn in a delay slot that uses the register. In this case,
8237 we must return 0. */
8238 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8240 int i;
8241 int retval = 0;
8243 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8245 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8246 rtx set = single_set (this_insn);
8248 if (GET_CODE (this_insn) == CALL_INSN)
8249 code = CALL_INSN;
8250 else if (GET_CODE (this_insn) == JUMP_INSN)
8252 if (INSN_ANNULLED_BRANCH_P (this_insn))
8253 return 0;
8254 code = JUMP_INSN;
8257 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8258 return 0;
8259 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8261 if (GET_CODE (SET_DEST (set)) != MEM)
8262 retval = 1;
8263 else
8264 return 0;
8266 if (set == 0
8267 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8268 return 0;
8270 if (retval == 1)
8271 return 1;
8272 else if (code == JUMP_INSN)
8273 return 0;
8276 set = single_set (insn);
8277 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8278 return 0;
8279 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8280 return GET_CODE (SET_DEST (set)) != MEM;
8281 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8282 return 0;
8284 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8285 return 1;
8287 return 1;
8290 #include "ggc.h"
8292 static GTY(()) rtx fpscr_rtx;
8294 get_fpscr_rtx (void)
8296 if (! fpscr_rtx)
8298 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8299 REG_USERVAR_P (fpscr_rtx) = 1;
8300 mark_user_reg (fpscr_rtx);
8302 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8303 mark_user_reg (fpscr_rtx);
8304 return fpscr_rtx;
8307 static GTY(()) tree fpscr_values;
8309 static void
8310 emit_fpu_switch (rtx scratch, int index)
8312 rtx dst, src;
8314 if (fpscr_values == NULL)
8316 tree t;
8318 t = build_index_type (integer_one_node);
8319 t = build_array_type (integer_type_node, t);
8320 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8321 DECL_ARTIFICIAL (t) = 1;
8322 DECL_IGNORED_P (t) = 1;
8323 DECL_EXTERNAL (t) = 1;
8324 TREE_STATIC (t) = 1;
8325 TREE_PUBLIC (t) = 1;
8326 TREE_USED (t) = 1;
8328 fpscr_values = t;
8331 src = DECL_RTL (fpscr_values);
8332 if (!can_create_pseudo_p ())
8334 emit_move_insn (scratch, XEXP (src, 0));
8335 if (index != 0)
8336 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8337 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8339 else
8340 src = adjust_address (src, PSImode, index * 4);
8342 dst = get_fpscr_rtx ();
8343 emit_move_insn (dst, src);
8346 void
8347 emit_sf_insn (rtx pat)
8349 emit_insn (pat);
8352 void
8353 emit_df_insn (rtx pat)
8355 emit_insn (pat);
8358 void
8359 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8361 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8364 void
8365 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8367 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8368 get_fpscr_rtx ()));
8371 void
8372 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8374 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8377 void
8378 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8380 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8381 get_fpscr_rtx ()));
8384 static rtx get_free_reg (HARD_REG_SET);
8386 /* This function returns a register to use to load the address to load
8387 the fpscr from. Currently it always returns r1 or r7, but when we are
8388 able to use pseudo registers after combine, or have a better mechanism
8389 for choosing a register, it should be done here. */
8390 /* REGS_LIVE is the liveness information for the point for which we
8391 need this allocation. In some bare-bones exit blocks, r1 is live at the
8392 start. We can even have all of r0..r3 being live:
8393 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8394 INSN before which new insns are placed with will clobber the register
8395 we return. If a basic block consists only of setting the return value
8396 register to a pseudo and using that register, the return value is not
8397 live before or after this block, yet we we'll insert our insns right in
8398 the middle. */
8400 static rtx
8401 get_free_reg (HARD_REG_SET regs_live)
8403 if (! TEST_HARD_REG_BIT (regs_live, 1))
8404 return gen_rtx_REG (Pmode, 1);
8406 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8407 there shouldn't be anything but a jump before the function end. */
8408 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8409 return gen_rtx_REG (Pmode, 7);
8412 /* This function will set the fpscr from memory.
8413 MODE is the mode we are setting it to. */
8414 void
8415 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8417 enum attr_fp_mode fp_mode = mode;
8418 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8419 rtx addr_reg;
8421 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8422 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8425 /* Is the given character a logical line separator for the assembler? */
8426 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8427 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8428 #endif
8431 sh_insn_length_adjustment (rtx insn)
8433 /* Instructions with unfilled delay slots take up an extra two bytes for
8434 the nop in the delay slot. */
8435 if (((GET_CODE (insn) == INSN
8436 && GET_CODE (PATTERN (insn)) != USE
8437 && GET_CODE (PATTERN (insn)) != CLOBBER)
8438 || GET_CODE (insn) == CALL_INSN
8439 || (GET_CODE (insn) == JUMP_INSN
8440 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8441 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8442 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8443 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8444 return 2;
8446 /* SH2e has a bug that prevents the use of annulled branches, so if
8447 the delay slot is not filled, we'll have to put a NOP in it. */
8448 if (sh_cpu == CPU_SH2E
8449 && GET_CODE (insn) == JUMP_INSN
8450 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8451 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8452 && get_attr_type (insn) == TYPE_CBRANCH
8453 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8454 return 2;
8456 /* sh-dsp parallel processing insn take four bytes instead of two. */
8458 if (GET_CODE (insn) == INSN)
8460 int sum = 0;
8461 rtx body = PATTERN (insn);
8462 const char *template;
8463 char c;
8464 int maybe_label = 1;
8466 if (GET_CODE (body) == ASM_INPUT)
8467 template = XSTR (body, 0);
8468 else if (asm_noperands (body) >= 0)
8469 template
8470 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8471 else
8472 return 0;
8475 int ppi_adjust = 0;
8478 c = *template++;
8479 while (c == ' ' || c == '\t');
8480 /* all sh-dsp parallel-processing insns start with p.
8481 The only non-ppi sh insn starting with p is pref.
8482 The only ppi starting with pr is prnd. */
8483 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8484 ppi_adjust = 2;
8485 /* The repeat pseudo-insn expands two three insns, a total of
8486 six bytes in size. */
8487 else if ((c == 'r' || c == 'R')
8488 && ! strncasecmp ("epeat", template, 5))
8489 ppi_adjust = 4;
8490 while (c && c != '\n'
8491 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, template))
8493 /* If this is a label, it is obviously not a ppi insn. */
8494 if (c == ':' && maybe_label)
8496 ppi_adjust = 0;
8497 break;
8499 else if (c == '\'' || c == '"')
8500 maybe_label = 0;
8501 c = *template++;
8503 sum += ppi_adjust;
8504 maybe_label = c != ':';
8506 while (c);
8507 return sum;
8509 return 0;
8512 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8513 isn't protected by a PIC unspec. */
8515 nonpic_symbol_mentioned_p (rtx x)
8517 register const char *fmt;
8518 register int i;
8520 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8521 || GET_CODE (x) == PC)
8522 return 1;
8524 /* We don't want to look into the possible MEM location of a
8525 CONST_DOUBLE, since we're not going to use it, in general. */
8526 if (GET_CODE (x) == CONST_DOUBLE)
8527 return 0;
8529 if (GET_CODE (x) == UNSPEC
8530 && (XINT (x, 1) == UNSPEC_PIC
8531 || XINT (x, 1) == UNSPEC_GOT
8532 || XINT (x, 1) == UNSPEC_GOTOFF
8533 || XINT (x, 1) == UNSPEC_GOTPLT
8534 || XINT (x, 1) == UNSPEC_GOTTPOFF
8535 || XINT (x, 1) == UNSPEC_DTPOFF
8536 || XINT (x, 1) == UNSPEC_PLT))
8537 return 0;
8539 fmt = GET_RTX_FORMAT (GET_CODE (x));
8540 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8542 if (fmt[i] == 'E')
8544 register int j;
8546 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8547 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8548 return 1;
8550 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8551 return 1;
8554 return 0;
8557 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8558 @GOTOFF in `reg'. */
8560 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8561 rtx reg)
8563 if (tls_symbolic_operand (orig, Pmode))
8564 return orig;
8566 if (GET_CODE (orig) == LABEL_REF
8567 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8569 if (reg == 0)
8570 reg = gen_reg_rtx (Pmode);
8572 emit_insn (gen_symGOTOFF2reg (reg, orig));
8573 return reg;
8575 else if (GET_CODE (orig) == SYMBOL_REF)
8577 if (reg == 0)
8578 reg = gen_reg_rtx (Pmode);
8580 emit_insn (gen_symGOT2reg (reg, orig));
8581 return reg;
8583 return orig;
8586 /* Mark the use of a constant in the literal table. If the constant
8587 has multiple labels, make it unique. */
8588 static rtx
8589 mark_constant_pool_use (rtx x)
8591 rtx insn, lab, pattern;
8593 if (x == NULL)
8594 return x;
8596 switch (GET_CODE (x))
8598 case LABEL_REF:
8599 x = XEXP (x, 0);
8600 case CODE_LABEL:
8601 break;
8602 default:
8603 return x;
8606 /* Get the first label in the list of labels for the same constant
8607 and delete another labels in the list. */
8608 lab = x;
8609 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8611 if (GET_CODE (insn) != CODE_LABEL
8612 || LABEL_REFS (insn) != NEXT_INSN (insn))
8613 break;
8614 lab = insn;
8617 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8618 INSN_DELETED_P (insn) = 1;
8620 /* Mark constants in a window. */
8621 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8623 if (GET_CODE (insn) != INSN)
8624 continue;
8626 pattern = PATTERN (insn);
8627 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8628 continue;
8630 switch (XINT (pattern, 1))
8632 case UNSPECV_CONST2:
8633 case UNSPECV_CONST4:
8634 case UNSPECV_CONST8:
8635 XVECEXP (pattern, 0, 1) = const1_rtx;
8636 break;
8637 case UNSPECV_WINDOW_END:
8638 if (XVECEXP (pattern, 0, 0) == x)
8639 return lab;
8640 break;
8641 case UNSPECV_CONST_END:
8642 return lab;
8643 default:
8644 break;
8648 return lab;
8651 /* Return true if it's possible to redirect BRANCH1 to the destination
8652 of an unconditional jump BRANCH2. We only want to do this if the
8653 resulting branch will have a short displacement. */
8655 sh_can_redirect_branch (rtx branch1, rtx branch2)
8657 if (flag_expensive_optimizations && simplejump_p (branch2))
8659 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8660 rtx insn;
8661 int distance;
8663 for (distance = 0, insn = NEXT_INSN (branch1);
8664 insn && distance < 256;
8665 insn = PREV_INSN (insn))
8667 if (insn == dest)
8668 return 1;
8669 else
8670 distance += get_attr_length (insn);
8672 for (distance = 0, insn = NEXT_INSN (branch1);
8673 insn && distance < 256;
8674 insn = NEXT_INSN (insn))
8676 if (insn == dest)
8677 return 1;
8678 else
8679 distance += get_attr_length (insn);
8682 return 0;
8685 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8687 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8688 unsigned int new_reg)
8690 /* Interrupt functions can only use registers that have already been
8691 saved by the prologue, even if they would normally be
8692 call-clobbered. */
8694 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
8695 return 0;
8697 return 1;
8700 /* Function to update the integer COST
8701 based on the relationship between INSN that is dependent on
8702 DEP_INSN through the dependence LINK. The default is to make no
8703 adjustment to COST. This can be used for example to specify to
8704 the scheduler that an output- or anti-dependence does not incur
8705 the same cost as a data-dependence. The return value should be
8706 the new value for COST. */
8707 static int
8708 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8710 rtx reg, use_pat;
8712 if (TARGET_SHMEDIA)
8714 /* On SHmedia, if the dependence is an anti-dependence or
8715 output-dependence, there is no cost. */
8716 if (REG_NOTE_KIND (link) != 0)
8718 /* However, dependencies between target register loads and
8719 uses of the register in a subsequent block that are separated
8720 by a conditional branch are not modelled - we have to do with
8721 the anti-dependency between the target register load and the
8722 conditional branch that ends the current block. */
8723 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8724 && GET_CODE (PATTERN (dep_insn)) == SET
8725 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8726 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8727 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8729 int orig_cost = cost;
8730 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8731 rtx target = ((! note
8732 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8733 ? insn : JUMP_LABEL (insn));
8734 /* On the likely path, the branch costs 1, on the unlikely path,
8735 it costs 3. */
8736 cost--;
8738 target = next_active_insn (target);
8739 while (target && ! flow_dependent_p (target, dep_insn)
8740 && --cost > 0);
8741 /* If two branches are executed in immediate succession, with the
8742 first branch properly predicted, this causes a stall at the
8743 second branch, hence we won't need the target for the
8744 second branch for two cycles after the launch of the first
8745 branch. */
8746 if (cost > orig_cost - 2)
8747 cost = orig_cost - 2;
8749 else
8750 cost = 0;
8753 else if (get_attr_is_mac_media (insn)
8754 && get_attr_is_mac_media (dep_insn))
8755 cost = 1;
8757 else if (! reload_completed
8758 && GET_CODE (PATTERN (insn)) == SET
8759 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8760 && GET_CODE (PATTERN (dep_insn)) == SET
8761 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8762 && cost < 4)
8763 cost = 4;
8764 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8765 that is needed at the target. */
8766 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8767 && ! flow_dependent_p (insn, dep_insn))
8768 cost--;
8770 else if (REG_NOTE_KIND (link) == 0)
8772 enum attr_type type;
8773 rtx dep_set;
8775 if (recog_memoized (insn) < 0
8776 || recog_memoized (dep_insn) < 0)
8777 return cost;
8779 dep_set = single_set (dep_insn);
8781 /* The latency that we specify in the scheduling description refers
8782 to the actual output, not to an auto-increment register; for that,
8783 the latency is one. */
8784 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
8786 rtx set = single_set (insn);
8788 if (set
8789 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
8790 && (!MEM_P (SET_DEST (set))
8791 || !reg_mentioned_p (SET_DEST (dep_set),
8792 XEXP (SET_DEST (set), 0))))
8793 cost = 1;
8795 /* The only input for a call that is timing-critical is the
8796 function's address. */
8797 if (GET_CODE (insn) == CALL_INSN)
8799 rtx call = PATTERN (insn);
8801 if (GET_CODE (call) == PARALLEL)
8802 call = XVECEXP (call, 0 ,0);
8803 if (GET_CODE (call) == SET)
8804 call = SET_SRC (call);
8805 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8806 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8807 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8808 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8809 cost -= TARGET_SH4_300 ? 3 : 6;
8811 /* Likewise, the most timing critical input for an sfuncs call
8812 is the function address. However, sfuncs typically start
8813 using their arguments pretty quickly.
8814 Assume a four cycle delay for SH4 before they are needed.
8815 Cached ST40-300 calls are quicker, so assume only a one
8816 cycle delay there.
8817 ??? Maybe we should encode the delays till input registers
8818 are needed by sfuncs into the sfunc call insn. */
8819 /* All sfunc calls are parallels with at least four components.
8820 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8821 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8822 && XVECLEN (PATTERN (insn), 0) >= 4
8823 && (reg = sfunc_uses_reg (insn)))
8825 if (! reg_set_p (reg, dep_insn))
8826 cost -= TARGET_SH4_300 ? 1 : 4;
8828 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
8830 enum attr_type dep_type = get_attr_type (dep_insn);
8832 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8833 cost--;
8834 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8835 && (type = get_attr_type (insn)) != TYPE_CALL
8836 && type != TYPE_SFUNC)
8837 cost--;
8838 /* When the preceding instruction loads the shift amount of
8839 the following SHAD/SHLD, the latency of the load is increased
8840 by 1 cycle. */
8841 if (get_attr_type (insn) == TYPE_DYN_SHIFT
8842 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8843 && reg_overlap_mentioned_p (SET_DEST (dep_set),
8844 XEXP (SET_SRC (single_set (insn)),
8845 1)))
8846 cost++;
8847 /* When an LS group instruction with a latency of less than
8848 3 cycles is followed by a double-precision floating-point
8849 instruction, FIPR, or FTRV, the latency of the first
8850 instruction is increased to 3 cycles. */
8851 else if (cost < 3
8852 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8853 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8854 cost = 3;
8855 /* The lsw register of a double-precision computation is ready one
8856 cycle earlier. */
8857 else if (reload_completed
8858 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8859 && (use_pat = single_set (insn))
8860 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8861 SET_SRC (use_pat)))
8862 cost -= 1;
8864 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8865 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8866 cost -= 1;
8868 else if (TARGET_SH4_300)
8870 /* Stores need their input register two cycles later. */
8871 if (dep_set && cost >= 1
8872 && ((type = get_attr_type (insn)) == TYPE_STORE
8873 || type == TYPE_PSTORE
8874 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
8876 rtx set = single_set (insn);
8878 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
8879 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
8881 cost -= 2;
8882 /* But don't reduce the cost below 1 if the address depends
8883 on a side effect of dep_insn. */
8884 if (cost < 1
8885 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
8886 cost = 1;
8891 /* An anti-dependence penalty of two applies if the first insn is a double
8892 precision fadd / fsub / fmul. */
8893 else if (!TARGET_SH4_300
8894 && REG_NOTE_KIND (link) == REG_DEP_ANTI
8895 && recog_memoized (dep_insn) >= 0
8896 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
8897 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
8898 /* A lot of alleged anti-flow dependences are fake,
8899 so check this one is real. */
8900 && flow_dependent_p (dep_insn, insn))
8901 cost = 2;
8903 return cost;
8906 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8907 if DEP_INSN is anti-flow dependent on INSN. */
8908 static int
8909 flow_dependent_p (rtx insn, rtx dep_insn)
8911 rtx tmp = PATTERN (insn);
8913 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8914 return tmp == NULL_RTX;
8917 /* A helper function for flow_dependent_p called through note_stores. */
8918 static void
8919 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
8921 rtx * pinsn = (rtx *) data;
8923 if (*pinsn && reg_referenced_p (x, *pinsn))
8924 *pinsn = NULL_RTX;
8927 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8928 'special function' patterns (type sfunc) that clobber pr, but that
8929 do not look like function calls to leaf_function_p. Hence we must
8930 do this extra check. */
8931 static int
8932 sh_pr_n_sets (void)
8934 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8937 /* Return where to allocate pseudo for a given hard register initial
8938 value. */
8939 static rtx
8940 sh_allocate_initial_value (rtx hard_reg)
8942 rtx x;
8944 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8946 if (current_function_is_leaf
8947 && ! sh_pr_n_sets ()
8948 && ! (TARGET_SHCOMPACT
8949 && ((current_function_args_info.call_cookie
8950 & ~ CALL_COOKIE_RET_TRAMP (1))
8951 || current_function_saves_all_registers)))
8952 x = hard_reg;
8953 else
8954 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8956 else
8957 x = NULL_RTX;
8959 return x;
8962 /* This function returns "2" to indicate dual issue for the SH4
8963 processor. To be used by the DFA pipeline description. */
8964 static int
8965 sh_issue_rate (void)
8967 if (TARGET_SUPERSCALAR)
8968 return 2;
8969 else
8970 return 1;
8973 /* Functions for ready queue reordering for sched1. */
8975 /* Get weight for mode for a set x. */
8976 static short
8977 find_set_regmode_weight (rtx x, enum machine_mode mode)
8979 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8980 return 1;
8981 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8983 if (GET_CODE (SET_DEST (x)) == REG)
8985 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8986 return 1;
8987 else
8988 return 0;
8990 return 1;
8992 return 0;
8995 /* Get regmode weight for insn. */
8996 static short
8997 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8999 short reg_weight = 0;
9000 rtx x;
9002 /* Increment weight for each register born here. */
9003 x = PATTERN (insn);
9004 reg_weight += find_set_regmode_weight (x, mode);
9005 if (GET_CODE (x) == PARALLEL)
9007 int j;
9008 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9010 x = XVECEXP (PATTERN (insn), 0, j);
9011 reg_weight += find_set_regmode_weight (x, mode);
9014 /* Decrement weight for each register that dies here. */
9015 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9017 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9019 rtx note = XEXP (x, 0);
9020 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9021 reg_weight--;
9024 return reg_weight;
9027 /* Calculate regmode weights for all insns of a basic block. */
9028 static void
9029 find_regmode_weight (basic_block b, enum machine_mode mode)
9031 rtx insn, next_tail, head, tail;
9033 get_ebb_head_tail (b, b, &head, &tail);
9034 next_tail = NEXT_INSN (tail);
9036 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9038 /* Handle register life information. */
9039 if (!INSN_P (insn))
9040 continue;
9042 if (mode == SFmode)
9043 INSN_REGMODE_WEIGHT (insn, mode) =
9044 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9045 else if (mode == SImode)
9046 INSN_REGMODE_WEIGHT (insn, mode) =
9047 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9051 /* Comparison function for ready queue sorting. */
9052 static int
9053 rank_for_reorder (const void *x, const void *y)
9055 rtx tmp = *(const rtx *) y;
9056 rtx tmp2 = *(const rtx *) x;
9058 /* The insn in a schedule group should be issued the first. */
9059 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9060 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9062 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9063 minimizes instruction movement, thus minimizing sched's effect on
9064 register pressure. */
9065 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9068 /* Resort the array A in which only element at index N may be out of order. */
9069 static void
9070 swap_reorder (rtx *a, int n)
9072 rtx insn = a[n - 1];
9073 int i = n - 2;
9075 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9077 a[i + 1] = a[i];
9078 i -= 1;
9080 a[i + 1] = insn;
9083 #define SCHED_REORDER(READY, N_READY) \
9084 do \
9086 if ((N_READY) == 2) \
9087 swap_reorder (READY, N_READY); \
9088 else if ((N_READY) > 2) \
9089 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9091 while (0)
9093 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9094 macro. */
9095 static void
9096 ready_reorder (rtx *ready, int nready)
9098 SCHED_REORDER (ready, nready);
9101 /* Count life regions of r0 for a block. */
9102 static int
9103 find_r0_life_regions (basic_block b)
9105 rtx end, insn;
9106 rtx pset;
9107 rtx r0_reg;
9108 int live;
9109 int set;
9110 int death = 0;
9112 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9114 set = 1;
9115 live = 1;
9117 else
9119 set = 0;
9120 live = 0;
9123 insn = BB_HEAD (b);
9124 end = BB_END (b);
9125 r0_reg = gen_rtx_REG (SImode, R0_REG);
9126 while (1)
9128 if (INSN_P (insn))
9130 if (find_regno_note (insn, REG_DEAD, R0_REG))
9132 death++;
9133 live = 0;
9135 if (!live
9136 && (pset = single_set (insn))
9137 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9138 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9140 set++;
9141 live = 1;
9144 if (insn == end)
9145 break;
9146 insn = NEXT_INSN (insn);
9148 return set - death;
9151 /* Calculate regmode weights for all insns of all basic block. */
9152 static void
9153 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9154 int verbose ATTRIBUTE_UNUSED,
9155 int old_max_uid)
9157 basic_block b;
9159 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9160 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9161 r0_life_regions = 0;
9163 FOR_EACH_BB_REVERSE (b)
9165 find_regmode_weight (b, SImode);
9166 find_regmode_weight (b, SFmode);
9167 if (!reload_completed)
9168 r0_life_regions += find_r0_life_regions (b);
9171 CURR_REGMODE_PRESSURE (SImode) = 0;
9172 CURR_REGMODE_PRESSURE (SFmode) = 0;
9176 /* Cleanup. */
9177 static void
9178 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9179 int verbose ATTRIBUTE_UNUSED)
9181 if (regmode_weight[0])
9183 free (regmode_weight[0]);
9184 regmode_weight[0] = NULL;
9186 if (regmode_weight[1])
9188 free (regmode_weight[1]);
9189 regmode_weight[1] = NULL;
9193 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9194 keep count of register pressures on SImode and SFmode. */
9195 static int
9196 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9197 int sched_verbose ATTRIBUTE_UNUSED,
9198 rtx insn,
9199 int can_issue_more)
9201 if (GET_CODE (PATTERN (insn)) != USE
9202 && GET_CODE (PATTERN (insn)) != CLOBBER)
9203 cached_can_issue_more = can_issue_more - 1;
9204 else
9205 cached_can_issue_more = can_issue_more;
9207 if (reload_completed)
9208 return cached_can_issue_more;
9210 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9211 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9213 return cached_can_issue_more;
9216 static void
9217 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9218 int verbose ATTRIBUTE_UNUSED,
9219 int veclen ATTRIBUTE_UNUSED)
9221 CURR_REGMODE_PRESSURE (SImode) = 0;
9222 CURR_REGMODE_PRESSURE (SFmode) = 0;
9225 /* Some magic numbers. */
9226 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9227 functions that already have high pressure on r0. */
9228 #define R0_MAX_LIFE_REGIONS 2
9229 /* Register Pressure thresholds for SImode and SFmode registers. */
9230 #define SIMODE_MAX_WEIGHT 5
9231 #define SFMODE_MAX_WEIGHT 10
9233 /* Return true if the pressure is high for MODE. */
9234 static short
9235 high_pressure (enum machine_mode mode)
9237 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9238 functions that already have high pressure on r0. */
9239 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9240 return 1;
9242 if (mode == SFmode)
9243 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9244 else
9245 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9248 /* Reorder ready queue if register pressure is high. */
9249 static int
9250 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9251 int sched_verbose ATTRIBUTE_UNUSED,
9252 rtx *ready,
9253 int *n_readyp,
9254 int clock_var ATTRIBUTE_UNUSED)
9256 if (reload_completed)
9257 return sh_issue_rate ();
9259 if (high_pressure (SFmode) || high_pressure (SImode))
9261 ready_reorder (ready, *n_readyp);
9264 return sh_issue_rate ();
9267 /* Skip cycles if the current register pressure is high. */
9268 static int
9269 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9270 int sched_verbose ATTRIBUTE_UNUSED,
9271 rtx *ready ATTRIBUTE_UNUSED,
9272 int *n_readyp ATTRIBUTE_UNUSED,
9273 int clock_var ATTRIBUTE_UNUSED)
9275 if (reload_completed)
9276 return cached_can_issue_more;
9278 if (high_pressure(SFmode) || high_pressure (SImode))
9279 skip_cycles = 1;
9281 return cached_can_issue_more;
9284 /* Skip cycles without sorting the ready queue. This will move insn from
9285 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9286 queue by sh_reorder. */
9288 /* Generally, skipping these many cycles are sufficient for all insns to move
9289 from Q -> R. */
9290 #define MAX_SKIPS 8
9292 static int
9293 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9294 int sched_verbose ATTRIBUTE_UNUSED,
9295 rtx insn ATTRIBUTE_UNUSED,
9296 int last_clock_var,
9297 int clock_var,
9298 int *sort_p)
9300 if (reload_completed)
9301 return 0;
9303 if (skip_cycles)
9305 if ((clock_var - last_clock_var) < MAX_SKIPS)
9307 *sort_p = 0;
9308 return 1;
9310 /* If this is the last cycle we are skipping, allow reordering of R. */
9311 if ((clock_var - last_clock_var) == MAX_SKIPS)
9313 *sort_p = 1;
9314 return 1;
9318 skip_cycles = 0;
9320 return 0;
9323 /* SHmedia requires registers for branches, so we can't generate new
9324 branches past reload. */
9325 static bool
9326 sh_cannot_modify_jumps_p (void)
9328 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9331 static int
9332 sh_target_reg_class (void)
9334 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9337 static bool
9338 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9340 HARD_REG_SET dummy;
9341 #if 0
9342 rtx insn;
9343 #endif
9345 if (! shmedia_space_reserved_for_target_registers)
9346 return 0;
9347 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9348 return 0;
9349 if (calc_live_regs (&dummy) >= 6 * 8)
9350 return 1;
9351 return 0;
9354 static bool
9355 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9357 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9361 On the SH1..SH4, the trampoline looks like
9362 2 0002 D202 mov.l l2,r2
9363 1 0000 D301 mov.l l1,r3
9364 3 0004 422B jmp @r2
9365 4 0006 0009 nop
9366 5 0008 00000000 l1: .long area
9367 6 000c 00000000 l2: .long function
9369 SH5 (compact) uses r1 instead of r3 for the static chain. */
9372 /* Emit RTL insns to initialize the variable parts of a trampoline.
9373 FNADDR is an RTX for the address of the function's pure code.
9374 CXT is an RTX for the static chain value for the function. */
9376 void
9377 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9379 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9381 if (TARGET_SHMEDIA64)
9383 rtx tramp_templ;
9384 int fixed_len;
9386 rtx movi1 = GEN_INT (0xcc000010);
9387 rtx shori1 = GEN_INT (0xc8000010);
9388 rtx src, dst;
9390 /* The following trampoline works within a +- 128 KB range for cxt:
9391 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9392 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9393 gettr tr1,r1; blink tr0,r63 */
9394 /* Address rounding makes it hard to compute the exact bounds of the
9395 offset for this trampoline, but we have a rather generous offset
9396 range, so frame_offset should do fine as an upper bound. */
9397 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9399 /* ??? could optimize this trampoline initialization
9400 by writing DImode words with two insns each. */
9401 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9402 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9403 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9404 insn = gen_rtx_AND (DImode, insn, mask);
9405 /* Or in ptb/u .,tr1 pattern */
9406 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9407 insn = force_operand (insn, NULL_RTX);
9408 insn = gen_lowpart (SImode, insn);
9409 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9410 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9411 insn = gen_rtx_AND (DImode, insn, mask);
9412 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9413 insn = gen_lowpart (SImode, insn);
9414 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9415 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9416 insn = gen_rtx_AND (DImode, insn, mask);
9417 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9418 insn = gen_lowpart (SImode, insn);
9419 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9420 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9421 insn = gen_rtx_AND (DImode, insn, mask);
9422 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9423 insn = gen_lowpart (SImode, insn);
9424 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9425 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9426 insn = gen_rtx_AND (DImode, insn, mask);
9427 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9428 insn = gen_lowpart (SImode, insn);
9429 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9430 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9431 GEN_INT (0x6bf10600));
9432 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9433 GEN_INT (0x4415fc10));
9434 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9435 GEN_INT (0x4401fff0));
9436 emit_insn (gen_ic_invalidate_line (tramp));
9437 return;
9439 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9440 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9442 tramp_templ = gen_datalabel_ref (tramp_templ);
9443 dst = tramp_mem;
9444 src = gen_const_mem (BLKmode, tramp_templ);
9445 set_mem_align (dst, 256);
9446 set_mem_align (src, 64);
9447 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9449 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9450 emit_move_insn (adjust_address (tramp_mem, Pmode,
9451 fixed_len + GET_MODE_SIZE (Pmode)),
9452 cxt);
9453 emit_insn (gen_ic_invalidate_line (tramp));
9454 return;
9456 else if (TARGET_SHMEDIA)
9458 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9459 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9460 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9461 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9462 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9463 rotated 10 right, and higher 16 bit of every 32 selected. */
9464 rtx movishori
9465 = force_reg (V2HImode, (simplify_gen_subreg
9466 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9467 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9468 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9470 tramp = force_reg (Pmode, tramp);
9471 fnaddr = force_reg (SImode, fnaddr);
9472 cxt = force_reg (SImode, cxt);
9473 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9474 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9475 movishori));
9476 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9477 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9478 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9479 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9480 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9481 gen_rtx_SUBREG (V2HImode, cxt, 0),
9482 movishori));
9483 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9484 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9485 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9486 if (TARGET_LITTLE_ENDIAN)
9488 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9489 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9491 else
9493 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9494 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9496 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9497 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9498 emit_insn (gen_ic_invalidate_line (tramp));
9499 return;
9501 else if (TARGET_SHCOMPACT)
9503 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9504 return;
9506 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9507 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9508 SImode));
9509 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9510 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9511 SImode));
9512 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9513 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9514 if (TARGET_HARVARD)
9516 if (!TARGET_INLINE_IC_INVALIDATE
9517 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9518 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9519 FUNCTION_ORDINARY),
9520 0, VOIDmode, 1, tramp, SImode);
9521 else
9522 emit_insn (gen_ic_invalidate_line (tramp));
9526 /* FIXME: This is overly conservative. A SHcompact function that
9527 receives arguments ``by reference'' will have them stored in its
9528 own stack frame, so it must not pass pointers or references to
9529 these arguments to other functions by means of sibling calls. */
9530 /* If PIC, we cannot make sibling calls to global functions
9531 because the PLT requires r12 to be live. */
9532 static bool
9533 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9535 return (1
9536 && (! TARGET_SHCOMPACT
9537 || current_function_args_info.stack_regs == 0)
9538 && ! sh_cfun_interrupt_handler_p ()
9539 && (! flag_pic
9540 || (decl && ! TREE_PUBLIC (decl))
9541 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9544 /* Machine specific built-in functions. */
9546 struct builtin_description
9548 const enum insn_code icode;
9549 const char *const name;
9550 int signature;
9553 /* describe number and signedness of arguments; arg[0] == result
9554 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9555 /* 9: 64-bit pointer, 10: 32-bit pointer */
9556 static const char signature_args[][4] =
9558 #define SH_BLTIN_V2SI2 0
9559 { 4, 4 },
9560 #define SH_BLTIN_V4HI2 1
9561 { 4, 4 },
9562 #define SH_BLTIN_V2SI3 2
9563 { 4, 4, 4 },
9564 #define SH_BLTIN_V4HI3 3
9565 { 4, 4, 4 },
9566 #define SH_BLTIN_V8QI3 4
9567 { 4, 4, 4 },
9568 #define SH_BLTIN_MAC_HISI 5
9569 { 1, 4, 4, 1 },
9570 #define SH_BLTIN_SH_HI 6
9571 { 4, 4, 1 },
9572 #define SH_BLTIN_SH_SI 7
9573 { 4, 4, 1 },
9574 #define SH_BLTIN_V4HI2V2SI 8
9575 { 4, 4, 4 },
9576 #define SH_BLTIN_V4HI2V8QI 9
9577 { 4, 4, 4 },
9578 #define SH_BLTIN_SISF 10
9579 { 4, 2 },
9580 #define SH_BLTIN_LDUA_L 11
9581 { 2, 10 },
9582 #define SH_BLTIN_LDUA_Q 12
9583 { 1, 10 },
9584 #define SH_BLTIN_STUA_L 13
9585 { 0, 10, 2 },
9586 #define SH_BLTIN_STUA_Q 14
9587 { 0, 10, 1 },
9588 #define SH_BLTIN_LDUA_L64 15
9589 { 2, 9 },
9590 #define SH_BLTIN_LDUA_Q64 16
9591 { 1, 9 },
9592 #define SH_BLTIN_STUA_L64 17
9593 { 0, 9, 2 },
9594 #define SH_BLTIN_STUA_Q64 18
9595 { 0, 9, 1 },
9596 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9597 #define SH_BLTIN_2 19
9598 #define SH_BLTIN_SU 19
9599 { 1, 2 },
9600 #define SH_BLTIN_3 20
9601 #define SH_BLTIN_SUS 20
9602 { 2, 2, 1 },
9603 #define SH_BLTIN_PSSV 21
9604 { 0, 8, 2, 2 },
9605 #define SH_BLTIN_XXUU 22
9606 #define SH_BLTIN_UUUU 22
9607 { 1, 1, 1, 1 },
9608 #define SH_BLTIN_PV 23
9609 { 0, 8 },
9611 /* mcmv: operands considered unsigned. */
9612 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9613 /* mperm: control value considered unsigned int. */
9614 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9615 /* mshards_q: returns signed short. */
9616 /* nsb: takes long long arg, returns unsigned char. */
9617 static const struct builtin_description bdesc[] =
9619 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9620 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9621 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9622 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9623 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9624 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9625 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9626 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9627 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9628 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9629 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9630 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9631 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9632 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9633 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9634 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9635 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9636 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9637 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9638 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9639 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9640 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9641 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9642 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9643 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9644 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9645 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9646 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9647 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9648 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9649 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9650 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9651 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9652 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9653 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9654 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9655 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9656 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9657 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9658 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9659 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9660 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9661 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9662 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9663 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9664 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9665 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9666 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9667 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9668 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9669 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9670 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9671 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9672 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9673 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9674 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9675 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9676 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9677 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9678 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9679 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9680 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9681 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9682 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9683 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9684 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9685 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9686 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9687 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9688 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9689 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9690 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9691 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9692 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9693 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9694 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9695 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9696 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9697 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9698 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9699 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9700 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9701 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9702 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9705 static void
9706 sh_media_init_builtins (void)
9708 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9709 const struct builtin_description *d;
9711 memset (shared, 0, sizeof shared);
9712 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9714 tree type, arg_type = 0;
9715 int signature = d->signature;
9716 int i;
9718 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9719 type = shared[signature];
9720 else
9722 int has_result = signature_args[signature][0] != 0;
9724 if ((signature_args[signature][1] & 8)
9725 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9726 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9727 continue;
9728 if (! TARGET_FPU_ANY
9729 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9730 continue;
9731 type = void_list_node;
9732 for (i = 3; ; i--)
9734 int arg = signature_args[signature][i];
9735 int opno = i - 1 + has_result;
9737 if (arg & 8)
9738 arg_type = ptr_type_node;
9739 else if (arg)
9740 arg_type = (*lang_hooks.types.type_for_mode)
9741 (insn_data[d->icode].operand[opno].mode,
9742 (arg & 1));
9743 else if (i)
9744 continue;
9745 else
9746 arg_type = void_type_node;
9747 if (i == 0)
9748 break;
9749 type = tree_cons (NULL_TREE, arg_type, type);
9751 type = build_function_type (arg_type, type);
9752 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9753 shared[signature] = type;
9755 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9756 NULL, NULL_TREE);
9760 /* Implements target hook vector_mode_supported_p. */
9761 bool
9762 sh_vector_mode_supported_p (enum machine_mode mode)
9764 if (TARGET_FPU_ANY
9765 && ((mode == V2SFmode)
9766 || (mode == V4SFmode)
9767 || (mode == V16SFmode)))
9768 return true;
9770 else if (TARGET_SHMEDIA
9771 && ((mode == V8QImode)
9772 || (mode == V2HImode)
9773 || (mode == V4HImode)
9774 || (mode == V2SImode)))
9775 return true;
9777 return false;
9780 /* Implements target hook dwarf_calling_convention. Return an enum
9781 of dwarf_calling_convention. */
9783 sh_dwarf_calling_convention (const_tree func)
9785 if (sh_attr_renesas_p (func))
9786 return DW_CC_GNU_renesas_sh;
9788 return DW_CC_normal;
9791 static void
9792 sh_init_builtins (void)
9794 if (TARGET_SHMEDIA)
9795 sh_media_init_builtins ();
9798 /* Expand an expression EXP that calls a built-in function,
9799 with result going to TARGET if that's convenient
9800 (and in mode MODE if that's convenient).
9801 SUBTARGET may be used as the target for computing one of EXP's operands.
9802 IGNORE is nonzero if the value is to be ignored. */
9804 static rtx
9805 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9806 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9808 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9809 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9810 const struct builtin_description *d = &bdesc[fcode];
9811 enum insn_code icode = d->icode;
9812 int signature = d->signature;
9813 enum machine_mode tmode = VOIDmode;
9814 int nop = 0, i;
9815 rtx op[4];
9816 rtx pat = 0;
9818 if (signature_args[signature][0])
9820 if (ignore)
9821 return 0;
9823 tmode = insn_data[icode].operand[0].mode;
9824 if (! target
9825 || GET_MODE (target) != tmode
9826 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9827 target = gen_reg_rtx (tmode);
9828 op[nop++] = target;
9830 else
9831 target = 0;
9833 for (i = 1; i <= 3; i++, nop++)
9835 tree arg;
9836 enum machine_mode opmode, argmode;
9837 tree optype;
9839 if (! signature_args[signature][i])
9840 break;
9841 arg = CALL_EXPR_ARG (exp, i - 1);
9842 if (arg == error_mark_node)
9843 return const0_rtx;
9844 if (signature_args[signature][i] & 8)
9846 opmode = ptr_mode;
9847 optype = ptr_type_node;
9849 else
9851 opmode = insn_data[icode].operand[nop].mode;
9852 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9854 argmode = TYPE_MODE (TREE_TYPE (arg));
9855 if (argmode != opmode)
9856 arg = build1 (NOP_EXPR, optype, arg);
9857 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9858 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9859 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9862 switch (nop)
9864 case 1:
9865 pat = (*insn_data[d->icode].genfun) (op[0]);
9866 break;
9867 case 2:
9868 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9869 break;
9870 case 3:
9871 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9872 break;
9873 case 4:
9874 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9875 break;
9876 default:
9877 gcc_unreachable ();
9879 if (! pat)
9880 return 0;
9881 emit_insn (pat);
9882 return target;
9885 void
9886 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9888 rtx sel0 = const0_rtx;
9889 rtx sel1 = const1_rtx;
9890 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9891 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9893 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9894 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9897 void
9898 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9900 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9902 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
9903 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
9906 /* Return the class of registers for which a mode change from FROM to TO
9907 is invalid. */
9908 bool
9909 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9910 enum reg_class class)
9912 /* We want to enable the use of SUBREGs as a means to
9913 VEC_SELECT a single element of a vector. */
9914 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9915 return (reg_classes_intersect_p (GENERAL_REGS, class));
9917 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9919 if (TARGET_LITTLE_ENDIAN)
9921 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9922 return reg_classes_intersect_p (DF_REGS, class);
9924 else
9926 if (GET_MODE_SIZE (from) < 8)
9927 return reg_classes_intersect_p (DF_HI_REGS, class);
9930 return 0;
9934 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9935 that label is used. */
9937 void
9938 sh_mark_label (rtx address, int nuses)
9940 if (GOTOFF_P (address))
9942 /* Extract the label or symbol. */
9943 address = XEXP (address, 0);
9944 if (GET_CODE (address) == PLUS)
9945 address = XEXP (address, 0);
9946 address = XVECEXP (address, 0, 0);
9948 if (GET_CODE (address) == LABEL_REF
9949 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9950 LABEL_NUSES (XEXP (address, 0)) += nuses;
9953 /* Compute extra cost of moving data between one register class
9954 and another. */
9956 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9957 uses this information. Hence, the general register <-> floating point
9958 register information here is not used for SFmode. */
9961 sh_register_move_cost (enum machine_mode mode,
9962 enum reg_class srcclass, enum reg_class dstclass)
9964 if (dstclass == T_REGS || dstclass == PR_REGS)
9965 return 10;
9967 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9968 return 4;
9970 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9971 && REGCLASS_HAS_FP_REG (srcclass)
9972 && REGCLASS_HAS_FP_REG (dstclass))
9973 return 4;
9975 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9976 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9978 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9979 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9980 return 9;
9982 if ((REGCLASS_HAS_FP_REG (dstclass)
9983 && REGCLASS_HAS_GENERAL_REG (srcclass))
9984 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9985 && REGCLASS_HAS_FP_REG (srcclass)))
9986 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9987 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9989 if ((dstclass == FPUL_REGS
9990 && REGCLASS_HAS_GENERAL_REG (srcclass))
9991 || (srcclass == FPUL_REGS
9992 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9993 return 5;
9995 if ((dstclass == FPUL_REGS
9996 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9997 || (srcclass == FPUL_REGS
9998 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9999 return 7;
10001 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10002 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10003 return 20;
10005 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10006 if (TARGET_SHMEDIA
10007 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10009 if (sh_gettrcost >= 0)
10010 return sh_gettrcost;
10011 else if (!TARGET_PT_FIXED)
10012 return 100;
10015 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10016 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10017 return 4;
10019 if (TARGET_SHMEDIA
10020 || (TARGET_FMOVD
10021 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10022 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10023 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10025 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10028 static rtx emit_load_ptr (rtx, rtx);
10030 static rtx
10031 emit_load_ptr (rtx reg, rtx addr)
10033 rtx mem = gen_const_mem (ptr_mode, addr);
10035 if (Pmode != ptr_mode)
10036 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10037 return emit_move_insn (reg, mem);
10040 static void
10041 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10042 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10043 tree function)
10045 CUMULATIVE_ARGS cum;
10046 int structure_value_byref = 0;
10047 rtx this, this_value, sibcall, insns, funexp;
10048 tree funtype = TREE_TYPE (function);
10049 int simple_add = CONST_OK_FOR_ADD (delta);
10050 int did_load = 0;
10051 rtx scratch0, scratch1, scratch2;
10052 unsigned i;
10054 reload_completed = 1;
10055 epilogue_completed = 1;
10056 current_function_uses_only_leaf_regs = 1;
10058 emit_note (NOTE_INSN_PROLOGUE_END);
10060 /* Find the "this" pointer. We have such a wide range of ABIs for the
10061 SH that it's best to do this completely machine independently.
10062 "this" is passed as first argument, unless a structure return pointer
10063 comes first, in which case "this" comes second. */
10064 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10065 #ifndef PCC_STATIC_STRUCT_RETURN
10066 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10067 structure_value_byref = 1;
10068 #endif /* not PCC_STATIC_STRUCT_RETURN */
10069 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10071 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10073 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10075 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10077 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10078 static chain pointer (even if you can't have nested virtual functions
10079 right now, someone might implement them sometime), and the rest of the
10080 registers are used for argument passing, are callee-saved, or reserved. */
10081 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10082 -ffixed-reg has been used. */
10083 if (! call_used_regs[0] || fixed_regs[0])
10084 error ("r0 needs to be available as a call-clobbered register");
10085 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10086 if (! TARGET_SH5)
10088 if (call_used_regs[1] && ! fixed_regs[1])
10089 scratch1 = gen_rtx_REG (ptr_mode, 1);
10090 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10091 pointing where to return struct values. */
10092 if (call_used_regs[3] && ! fixed_regs[3])
10093 scratch2 = gen_rtx_REG (Pmode, 3);
10095 else if (TARGET_SHMEDIA)
10097 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10098 if (i != REGNO (scratch0) &&
10099 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10101 scratch1 = gen_rtx_REG (ptr_mode, i);
10102 break;
10104 if (scratch1 == scratch0)
10105 error ("Need a second call-clobbered general purpose register");
10106 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10107 if (call_used_regs[i] && ! fixed_regs[i])
10109 scratch2 = gen_rtx_REG (Pmode, i);
10110 break;
10112 if (scratch2 == scratch0)
10113 error ("Need a call-clobbered target register");
10116 this_value = plus_constant (this, delta);
10117 if (vcall_offset
10118 && (simple_add || scratch0 != scratch1)
10119 && strict_memory_address_p (ptr_mode, this_value))
10121 emit_load_ptr (scratch0, this_value);
10122 did_load = 1;
10125 if (!delta)
10126 ; /* Do nothing. */
10127 else if (simple_add)
10128 emit_move_insn (this, this_value);
10129 else
10131 emit_move_insn (scratch1, GEN_INT (delta));
10132 emit_insn (gen_add2_insn (this, scratch1));
10135 if (vcall_offset)
10137 rtx offset_addr;
10139 if (!did_load)
10140 emit_load_ptr (scratch0, this);
10142 offset_addr = plus_constant (scratch0, vcall_offset);
10143 if (strict_memory_address_p (ptr_mode, offset_addr))
10144 ; /* Do nothing. */
10145 else if (! TARGET_SH5 && scratch0 != scratch1)
10147 /* scratch0 != scratch1, and we have indexed loads. Get better
10148 schedule by loading the offset into r1 and using an indexed
10149 load - then the load of r1 can issue before the load from
10150 (this + delta) finishes. */
10151 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10152 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10154 else if (CONST_OK_FOR_ADD (vcall_offset))
10156 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10157 offset_addr = scratch0;
10159 else if (scratch0 != scratch1)
10161 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10162 emit_insn (gen_add2_insn (scratch0, scratch1));
10163 offset_addr = scratch0;
10165 else
10166 gcc_unreachable (); /* FIXME */
10167 emit_load_ptr (scratch0, offset_addr);
10169 if (Pmode != ptr_mode)
10170 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10171 emit_insn (gen_add2_insn (this, scratch0));
10174 /* Generate a tail call to the target function. */
10175 if (! TREE_USED (function))
10177 assemble_external (function);
10178 TREE_USED (function) = 1;
10180 funexp = XEXP (DECL_RTL (function), 0);
10181 /* If the function is overridden, so is the thunk, hence we don't
10182 need GOT addressing even if this is a public symbol. */
10183 #if 0
10184 if (TARGET_SH1 && ! flag_weak)
10185 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10186 else
10187 #endif
10188 if (TARGET_SH2 && flag_pic)
10190 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10191 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10193 else
10195 if (TARGET_SHMEDIA && flag_pic)
10197 funexp = gen_sym2PIC (funexp);
10198 PUT_MODE (funexp, Pmode);
10200 emit_move_insn (scratch2, funexp);
10201 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10202 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10204 sibcall = emit_call_insn (sibcall);
10205 SIBLING_CALL_P (sibcall) = 1;
10206 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10207 emit_barrier ();
10209 /* Run just enough of rest_of_compilation to do scheduling and get
10210 the insns emitted. Note that use_thunk calls
10211 assemble_start_function and assemble_end_function. */
10213 insn_locators_alloc ();
10214 insns = get_insns ();
10216 #if 0
10217 if (optimize > 0)
10219 /* Initialize the bitmap obstacks. */
10220 bitmap_obstack_initialize (NULL);
10221 bitmap_obstack_initialize (&reg_obstack);
10222 if (! cfun->cfg)
10223 init_flow ();
10224 rtl_register_cfg_hooks ();
10225 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10226 init_rtl_bb_info (EXIT_BLOCK_PTR);
10227 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10228 EXIT_BLOCK_PTR->flags |= BB_RTL;
10229 find_basic_blocks (insns);
10231 if (flag_schedule_insns_after_reload)
10233 life_analysis (PROP_FINAL);
10235 split_all_insns (1);
10237 schedule_insns ();
10239 /* We must split jmp insn in PIC case. */
10240 else if (flag_pic)
10241 split_all_insns_noflow ();
10243 #else
10244 if (optimize > 0)
10246 if (! cfun->cfg)
10247 init_flow ();
10248 split_all_insns_noflow ();
10250 #endif
10252 sh_reorg ();
10254 if (optimize > 0 && flag_delayed_branch)
10255 dbr_schedule (insns);
10257 shorten_branches (insns);
10258 final_start_function (insns, file, 1);
10259 final (insns, file, 1);
10260 final_end_function ();
10262 reload_completed = 0;
10263 epilogue_completed = 0;
10267 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10269 rtx sym;
10271 /* If this is not an ordinary function, the name usually comes from a
10272 string literal or an sprintf buffer. Make sure we use the same
10273 string consistently, so that cse will be able to unify address loads. */
10274 if (kind != FUNCTION_ORDINARY)
10275 name = IDENTIFIER_POINTER (get_identifier (name));
10276 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10277 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10278 if (flag_pic)
10279 switch (kind)
10281 case FUNCTION_ORDINARY:
10282 break;
10283 case SFUNC_GOT:
10285 rtx reg = target ? target : gen_reg_rtx (Pmode);
10287 emit_insn (gen_symGOT2reg (reg, sym));
10288 sym = reg;
10289 break;
10291 case SFUNC_STATIC:
10293 /* ??? To allow cse to work, we use GOTOFF relocations.
10294 we could add combiner patterns to transform this into
10295 straight pc-relative calls with sym2PIC / bsrf when
10296 label load and function call are still 1:1 and in the
10297 same basic block during combine. */
10298 rtx reg = target ? target : gen_reg_rtx (Pmode);
10300 emit_insn (gen_symGOTOFF2reg (reg, sym));
10301 sym = reg;
10302 break;
10305 if (target && sym != target)
10307 emit_move_insn (target, sym);
10308 return target;
10310 return sym;
10313 /* Find the number of a general purpose register in S. */
10314 static int
10315 scavenge_reg (HARD_REG_SET *s)
10317 int r;
10318 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10319 if (TEST_HARD_REG_BIT (*s, r))
10320 return r;
10321 return -1;
10325 sh_get_pr_initial_val (void)
10327 rtx val;
10329 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10330 PR register on SHcompact, because it might be clobbered by the prologue.
10331 We check first if that is known to be the case. */
10332 if (TARGET_SHCOMPACT
10333 && ((current_function_args_info.call_cookie
10334 & ~ CALL_COOKIE_RET_TRAMP (1))
10335 || current_function_saves_all_registers))
10336 return gen_frame_mem (SImode, return_address_pointer_rtx);
10338 /* If we haven't finished rtl generation, there might be a nonlocal label
10339 that we haven't seen yet.
10340 ??? get_hard_reg_initial_val fails if it is called after register
10341 allocation has started, unless it has been called before for the
10342 same register. And even then, we end in trouble if we didn't use
10343 the register in the same basic block before. So call
10344 get_hard_reg_initial_val now and wrap it in an unspec if we might
10345 need to replace it. */
10346 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10347 combine can put the pseudo returned by get_hard_reg_initial_val into
10348 instructions that need a general purpose registers, which will fail to
10349 be recognized when the pseudo becomes allocated to PR. */
10351 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10352 if (TARGET_SH1)
10353 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10354 return val;
10358 sh_expand_t_scc (enum rtx_code code, rtx target)
10360 rtx result = target;
10361 HOST_WIDE_INT val;
10363 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10364 || GET_CODE (sh_compare_op1) != CONST_INT)
10365 return 0;
10366 if (GET_CODE (result) != REG)
10367 result = gen_reg_rtx (SImode);
10368 val = INTVAL (sh_compare_op1);
10369 if ((code == EQ && val == 1) || (code == NE && val == 0))
10370 emit_insn (gen_movt (result));
10371 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10373 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10374 emit_insn (gen_subc (result, result, result));
10375 emit_insn (gen_addsi3 (result, result, const1_rtx));
10377 else if (code == EQ || code == NE)
10378 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10379 else
10380 return 0;
10381 if (result != target)
10382 emit_move_insn (target, result);
10383 return 1;
10386 /* INSN is an sfunc; return the rtx that describes the address used. */
10387 static rtx
10388 extract_sfunc_addr (rtx insn)
10390 rtx pattern, part = NULL_RTX;
10391 int len, i;
10393 pattern = PATTERN (insn);
10394 len = XVECLEN (pattern, 0);
10395 for (i = 0; i < len; i++)
10397 part = XVECEXP (pattern, 0, i);
10398 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10399 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10400 return XEXP (part, 0);
10402 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10403 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10406 /* Verify that the register in use_sfunc_addr still agrees with the address
10407 used in the sfunc. This prevents fill_slots_from_thread from changing
10408 use_sfunc_addr.
10409 INSN is the use_sfunc_addr instruction, and REG is the register it
10410 guards. */
10412 check_use_sfunc_addr (rtx insn, rtx reg)
10414 /* Search for the sfunc. It should really come right after INSN. */
10415 while ((insn = NEXT_INSN (insn)))
10417 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10418 break;
10419 if (! INSN_P (insn))
10420 continue;
10422 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10423 insn = XVECEXP (PATTERN (insn), 0, 0);
10424 if (GET_CODE (PATTERN (insn)) != PARALLEL
10425 || get_attr_type (insn) != TYPE_SFUNC)
10426 continue;
10427 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10429 gcc_unreachable ();
10432 /* This function returns a constant rtx that represents pi / 2**15 in
10433 SFmode. it's used to scale SFmode angles, in radians, to a
10434 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10435 maps to 0x10000). */
10437 static GTY(()) rtx sh_fsca_sf2int_rtx;
10440 sh_fsca_sf2int (void)
10442 if (! sh_fsca_sf2int_rtx)
10444 REAL_VALUE_TYPE rv;
10446 real_from_string (&rv, "10430.378350470453");
10447 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10450 return sh_fsca_sf2int_rtx;
10453 /* This function returns a constant rtx that represents pi / 2**15 in
10454 DFmode. it's used to scale DFmode angles, in radians, to a
10455 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10456 maps to 0x10000). */
10458 static GTY(()) rtx sh_fsca_df2int_rtx;
10461 sh_fsca_df2int (void)
10463 if (! sh_fsca_df2int_rtx)
10465 REAL_VALUE_TYPE rv;
10467 real_from_string (&rv, "10430.378350470453");
10468 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10471 return sh_fsca_df2int_rtx;
10474 /* This function returns a constant rtx that represents 2**15 / pi in
10475 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10476 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10477 2*pi). */
10479 static GTY(()) rtx sh_fsca_int2sf_rtx;
10482 sh_fsca_int2sf (void)
10484 if (! sh_fsca_int2sf_rtx)
10486 REAL_VALUE_TYPE rv;
10488 real_from_string (&rv, "9.587379924285257e-5");
10489 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10492 return sh_fsca_int2sf_rtx;
10495 /* Initialize the CUMULATIVE_ARGS structure. */
10497 void
10498 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10499 tree fntype,
10500 rtx libname ATTRIBUTE_UNUSED,
10501 tree fndecl,
10502 signed int n_named_args,
10503 enum machine_mode mode)
10505 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10506 pcum->free_single_fp_reg = 0;
10507 pcum->stack_regs = 0;
10508 pcum->byref_regs = 0;
10509 pcum->byref = 0;
10510 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10512 /* XXX - Should we check TARGET_HITACHI here ??? */
10513 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10515 if (fntype)
10517 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10518 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10519 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10520 pcum->arg_count [(int) SH_ARG_INT]
10521 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10523 pcum->call_cookie
10524 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10525 && pcum->arg_count [(int) SH_ARG_INT] == 0
10526 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10527 ? int_size_in_bytes (TREE_TYPE (fntype))
10528 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10529 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10530 == FIRST_RET_REG));
10532 else
10534 pcum->arg_count [(int) SH_ARG_INT] = 0;
10535 pcum->prototype_p = FALSE;
10536 if (mode != VOIDmode)
10538 pcum->call_cookie =
10539 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10540 && GET_MODE_SIZE (mode) > 4
10541 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10543 /* If the default ABI is the Renesas ABI then all library
10544 calls must assume that the library will be using the
10545 Renesas ABI. So if the function would return its result
10546 in memory then we must force the address of this memory
10547 block onto the stack. Ideally we would like to call
10548 targetm.calls.return_in_memory() here but we do not have
10549 the TYPE or the FNDECL available so we synthesize the
10550 contents of that function as best we can. */
10551 pcum->force_mem =
10552 (TARGET_DEFAULT & MASK_HITACHI)
10553 && (mode == BLKmode
10554 || (GET_MODE_SIZE (mode) > 4
10555 && !(mode == DFmode
10556 && TARGET_FPU_DOUBLE)));
10558 else
10560 pcum->call_cookie = 0;
10561 pcum->force_mem = FALSE;
10566 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10567 not enter into CONST_DOUBLE for the replace.
10569 Note that copying is not done so X must not be shared unless all copies
10570 are to be modified.
10572 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10573 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10574 replacements[n*2+1] - and that we take mode changes into account.
10576 If a replacement is ambiguous, return NULL_RTX.
10578 If MODIFY is zero, don't modify any rtl in place,
10579 just return zero or nonzero for failure / success. */
10582 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10584 int i, j;
10585 const char *fmt;
10587 /* The following prevents loops occurrence when we change MEM in
10588 CONST_DOUBLE onto the same CONST_DOUBLE. */
10589 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10590 return x;
10592 for (i = n_replacements - 1; i >= 0 ; i--)
10593 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10594 return replacements[i*2+1];
10596 /* Allow this function to make replacements in EXPR_LISTs. */
10597 if (x == 0)
10598 return 0;
10600 if (GET_CODE (x) == SUBREG)
10602 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10603 n_replacements, modify);
10605 if (GET_CODE (new) == CONST_INT)
10607 x = simplify_subreg (GET_MODE (x), new,
10608 GET_MODE (SUBREG_REG (x)),
10609 SUBREG_BYTE (x));
10610 if (! x)
10611 abort ();
10613 else if (modify)
10614 SUBREG_REG (x) = new;
10616 return x;
10618 else if (GET_CODE (x) == REG)
10620 unsigned regno = REGNO (x);
10621 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10622 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10623 rtx result = NULL_RTX;
10625 for (i = n_replacements - 1; i >= 0; i--)
10627 rtx from = replacements[i*2];
10628 rtx to = replacements[i*2+1];
10629 unsigned from_regno, from_nregs, to_regno, new_regno;
10631 if (GET_CODE (from) != REG)
10632 continue;
10633 from_regno = REGNO (from);
10634 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10635 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10636 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10638 if (regno < from_regno
10639 || regno + nregs > from_regno + nregs
10640 || GET_CODE (to) != REG
10641 || result)
10642 return NULL_RTX;
10643 to_regno = REGNO (to);
10644 if (to_regno < FIRST_PSEUDO_REGISTER)
10646 new_regno = regno + to_regno - from_regno;
10647 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10648 != nregs)
10649 return NULL_RTX;
10650 result = gen_rtx_REG (GET_MODE (x), new_regno);
10652 else if (GET_MODE (x) <= GET_MODE (to))
10653 result = gen_lowpart_common (GET_MODE (x), to);
10654 else
10655 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10658 return result ? result : x;
10660 else if (GET_CODE (x) == ZERO_EXTEND)
10662 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10663 n_replacements, modify);
10665 if (GET_CODE (new) == CONST_INT)
10667 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10668 new, GET_MODE (XEXP (x, 0)));
10669 if (! x)
10670 abort ();
10672 else if (modify)
10673 XEXP (x, 0) = new;
10675 return x;
10678 fmt = GET_RTX_FORMAT (GET_CODE (x));
10679 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10681 rtx new;
10683 if (fmt[i] == 'e')
10685 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10686 n_replacements, modify);
10687 if (!new)
10688 return NULL_RTX;
10689 if (modify)
10690 XEXP (x, i) = new;
10692 else if (fmt[i] == 'E')
10693 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10695 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10696 n_replacements, modify);
10697 if (!new)
10698 return NULL_RTX;
10699 if (modify)
10700 XVECEXP (x, i, j) = new;
10704 return x;
10708 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10710 enum rtx_code code = TRUNCATE;
10712 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10714 rtx inner = XEXP (x, 0);
10715 enum machine_mode inner_mode = GET_MODE (inner);
10717 if (inner_mode == mode)
10718 return inner;
10719 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10720 x = inner;
10721 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10722 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10724 code = GET_CODE (x);
10725 x = inner;
10728 return gen_rtx_fmt_e (code, mode, x);
10731 /* called via for_each_rtx after reload, to clean up truncates of
10732 registers that span multiple actual hard registers. */
10734 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10736 rtx x = *p, reg;
10738 if (GET_CODE (x) != TRUNCATE)
10739 return 0;
10740 reg = XEXP (x, 0);
10741 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10743 enum machine_mode reg_mode = GET_MODE (reg);
10744 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10745 subreg_lowpart_offset (DImode, reg_mode));
10746 *(int*) n_changes += 1;
10747 return -1;
10749 return 0;
10752 /* Load and store depend on the highpart of the address. However,
10753 set_attr_alternative does not give well-defined results before reload,
10754 so we must look at the rtl ourselves to see if any of the feeding
10755 registers is used in a memref. */
10757 /* Called by sh_contains_memref_p via for_each_rtx. */
10758 static int
10759 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10761 return (GET_CODE (*loc) == MEM);
10764 /* Return nonzero iff INSN contains a MEM. */
10766 sh_contains_memref_p (rtx insn)
10768 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10771 /* Return nonzero iff INSN loads a banked register. */
10773 sh_loads_bankedreg_p (rtx insn)
10775 if (GET_CODE (PATTERN (insn)) == SET)
10777 rtx op = SET_DEST (PATTERN(insn));
10778 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
10779 return 1;
10782 return 0;
10785 /* FNADDR is the MEM expression from a call expander. Return an address
10786 to use in an SHmedia insn pattern. */
10788 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10790 int is_sym;
10792 fnaddr = XEXP (fnaddr, 0);
10793 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10794 if (flag_pic && is_sym)
10796 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10798 rtx reg = gen_reg_rtx (Pmode);
10800 /* We must not use GOTPLT for sibcalls, because PIC_REG
10801 must be restored before the PLT code gets to run. */
10802 if (is_sibcall)
10803 emit_insn (gen_symGOT2reg (reg, fnaddr));
10804 else
10805 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10806 fnaddr = reg;
10808 else
10810 fnaddr = gen_sym2PIC (fnaddr);
10811 PUT_MODE (fnaddr, Pmode);
10814 /* If ptabs might trap, make this visible to the rest of the compiler.
10815 We generally assume that symbols pertain to valid locations, but
10816 it is possible to generate invalid symbols with asm or linker tricks.
10817 In a list of functions where each returns its successor, an invalid
10818 symbol might denote an empty list. */
10819 if (!TARGET_PT_FIXED
10820 && (!is_sym || TARGET_INVALID_SYMBOLS)
10821 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10823 rtx tr = gen_reg_rtx (PDImode);
10825 emit_insn (gen_ptabs (tr, fnaddr));
10826 fnaddr = tr;
10828 else if (! target_reg_operand (fnaddr, Pmode))
10829 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10830 return fnaddr;
10833 enum reg_class
10834 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10835 enum machine_mode mode, secondary_reload_info *sri)
10837 if (in_p)
10839 if (REGCLASS_HAS_FP_REG (class)
10840 && ! TARGET_SHMEDIA
10841 && immediate_operand ((x), mode)
10842 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10843 && mode == SFmode && fldi_ok ()))
10844 switch (mode)
10846 case SFmode:
10847 sri->icode = CODE_FOR_reload_insf__frn;
10848 return NO_REGS;
10849 case DFmode:
10850 sri->icode = CODE_FOR_reload_indf__frn;
10851 return NO_REGS;
10852 case SImode:
10853 /* ??? If we knew that we are in the appropriate mode -
10854 single precision - we could use a reload pattern directly. */
10855 return FPUL_REGS;
10856 default:
10857 abort ();
10859 if (class == FPUL_REGS
10860 && ((GET_CODE (x) == REG
10861 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10862 || REGNO (x) == T_REG))
10863 || GET_CODE (x) == PLUS))
10864 return GENERAL_REGS;
10865 if (class == FPUL_REGS && immediate_operand (x, mode))
10867 if (satisfies_constraint_I08 (x))
10868 return GENERAL_REGS;
10869 sri->icode = CODE_FOR_reload_insi__i_fpul;
10870 return NO_REGS;
10872 if (class == FPSCR_REGS
10873 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10874 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10875 return GENERAL_REGS;
10876 if (REGCLASS_HAS_FP_REG (class)
10877 && TARGET_SHMEDIA
10878 && immediate_operand (x, mode)
10879 && x != CONST0_RTX (GET_MODE (x))
10880 && GET_MODE (x) != V4SFmode)
10881 return GENERAL_REGS;
10882 if ((mode == QImode || mode == HImode)
10883 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10885 sri->icode = ((mode == QImode)
10886 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10887 return NO_REGS;
10889 if (TARGET_SHMEDIA && class == GENERAL_REGS
10890 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10891 return TARGET_REGS;
10892 } /* end of input-only processing. */
10894 if (((REGCLASS_HAS_FP_REG (class)
10895 && (GET_CODE (x) == REG
10896 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10897 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10898 && TARGET_FMOVD))))
10899 || (REGCLASS_HAS_GENERAL_REG (class)
10900 && GET_CODE (x) == REG
10901 && FP_REGISTER_P (REGNO (x))))
10902 && ! TARGET_SHMEDIA
10903 && (mode == SFmode || mode == SImode))
10904 return FPUL_REGS;
10905 if ((class == FPUL_REGS
10906 || (REGCLASS_HAS_FP_REG (class)
10907 && ! TARGET_SHMEDIA && mode == SImode))
10908 && (GET_CODE (x) == MEM
10909 || (GET_CODE (x) == REG
10910 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10911 || REGNO (x) == T_REG
10912 || system_reg_operand (x, VOIDmode)))))
10914 if (class == FPUL_REGS)
10915 return GENERAL_REGS;
10916 return FPUL_REGS;
10918 if ((class == TARGET_REGS
10919 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10920 && !satisfies_constraint_Csy (x)
10921 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10922 return GENERAL_REGS;
10923 if ((class == MAC_REGS || class == PR_REGS)
10924 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10925 && class != REGNO_REG_CLASS (REGNO (x)))
10926 return GENERAL_REGS;
10927 if (class != GENERAL_REGS && GET_CODE (x) == REG
10928 && TARGET_REGISTER_P (REGNO (x)))
10929 return GENERAL_REGS;
10930 return NO_REGS;
10933 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10935 #include "gt-sh.h"