PR target/66591
[official-gcc.git] / gcc / config / sh / sh.c
blob6f03206ccb907ff232c5594cce7fc5b18d81320f
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2015 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
23 #include <vector>
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "insn-config.h"
30 #include "rtl.h"
31 #include "alias.h"
32 #include "symtab.h"
33 #include "tree.h"
34 #include "fold-const.h"
35 #include "stringpool.h"
36 #include "stor-layout.h"
37 #include "calls.h"
38 #include "varasm.h"
39 #include "flags.h"
40 #include "hard-reg-set.h"
41 #include "function.h"
42 #include "expmed.h"
43 #include "dojump.h"
44 #include "explow.h"
45 #include "emit-rtl.h"
46 #include "stmt.h"
47 #include "expr.h"
48 #include "insn-codes.h"
49 #include "optabs.h"
50 #include "reload.h"
51 #include "regs.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "diagnostic-core.h"
55 #include "recog.h"
56 #include "dwarf2.h"
57 #include "tm_p.h"
58 #include "target.h"
59 #include "target-def.h"
60 #include "langhooks.h"
61 #include "predict.h"
62 #include "dominance.h"
63 #include "cfg.h"
64 #include "cfgrtl.h"
65 #include "cfganal.h"
66 #include "lcm.h"
67 #include "cfgbuild.h"
68 #include "cfgcleanup.h"
69 #include "basic-block.h"
70 #include "df.h"
71 #include "intl.h"
72 #include "sched-int.h"
73 #include "params.h"
74 #include "tree-ssa-alias.h"
75 #include "internal-fn.h"
76 #include "gimple-fold.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "gimple.h"
80 #include "gimplify.h"
81 #include "cfgloop.h"
82 #include "alloc-pool.h"
83 #include "tm-constrs.h"
84 #include "opts.h"
85 #include "tree-pass.h"
86 #include "pass_manager.h"
87 #include "context.h"
88 #include "builtins.h"
89 #include "rtl-iter.h"
91 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
93 /* These are some macros to abstract register modes. */
94 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
95 && ((HOST_WIDE_INT)(VALUE)) <= 511)
97 #define CONST_OK_FOR_ADD(size) \
98 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
99 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
100 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
101 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
103 /* Used to simplify the logic below. Find the attributes wherever
104 they may be. */
105 #define SH_ATTRIBUTES(decl) \
106 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
107 : DECL_ATTRIBUTES (decl) \
108 ? (DECL_ATTRIBUTES (decl)) \
109 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
111 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
112 int current_function_interrupt;
114 tree sh_deferred_function_attributes;
115 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
117 /* Global variables for machine-dependent things. */
119 /* Which cpu are we scheduling for. */
120 enum processor_type sh_cpu;
122 /* Definitions used in ready queue reordering for first scheduling pass. */
124 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
125 static short *regmode_weight[2];
127 /* Total SFmode and SImode weights of scheduled insns. */
128 static int curr_regmode_pressure[2];
130 /* Number of r0 life regions. */
131 static int r0_life_regions;
133 /* If true, skip cycles for Q -> R movement. */
134 static int skip_cycles = 0;
136 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
137 and returned from sh_reorder2. */
138 static short cached_can_issue_more;
140 /* Unique number for UNSPEC_BBR pattern. */
141 static unsigned int unspec_bbr_uid = 1;
143 /* Provides the class number of the smallest class containing
144 reg number. */
145 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
147 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
151 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
152 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
153 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
154 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
155 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
156 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
157 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
161 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
167 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
168 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
169 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
170 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
171 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
172 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
173 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
174 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
175 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
176 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
177 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
178 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
179 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
180 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
181 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
182 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
183 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
184 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
185 GENERAL_REGS, GENERAL_REGS,
188 char sh_register_names[FIRST_PSEUDO_REGISTER] \
189 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
191 char sh_additional_register_names[ADDREGNAMES_SIZE] \
192 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
193 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
195 int assembler_dialect;
197 static bool shmedia_space_reserved_for_target_registers;
199 static void split_branches (rtx_insn *);
200 static int branch_dest (rtx);
201 static void print_slot (rtx_sequence *);
202 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
203 static void dump_table (rtx_insn *, rtx_insn *);
204 static bool broken_move (rtx_insn *);
205 static bool mova_p (rtx_insn *);
206 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
207 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
208 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
209 static void sh_reorg (void);
210 static void sh_option_override (void);
211 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
212 static rtx_insn *frame_insn (rtx);
213 static rtx push (int);
214 static void pop (int);
215 static void push_regs (HARD_REG_SET *, int);
216 static int calc_live_regs (HARD_REG_SET *);
217 static HOST_WIDE_INT rounded_frame_size (int);
218 static bool sh_frame_pointer_required (void);
219 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
220 static int sh_mode_needed (int, rtx_insn *);
221 static int sh_mode_after (int, int, rtx_insn *);
222 static int sh_mode_entry (int);
223 static int sh_mode_exit (int);
224 static int sh_mode_priority (int entity, int n);
225 static bool sh_lra_p (void);
227 static rtx mark_constant_pool_use (rtx);
228 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
229 int, bool *);
230 static tree sh_handle_resbank_handler_attribute (tree *, tree,
231 tree, int, bool *);
232 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
233 tree, int, bool *);
234 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
235 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
236 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
237 static void sh_print_operand (FILE *, rtx, int);
238 static void sh_print_operand_address (FILE *, rtx);
239 static bool sh_print_operand_punct_valid_p (unsigned char code);
240 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
241 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
242 static void sh_insert_attributes (tree, tree *);
243 static const char *sh_check_pch_target_flags (int);
244 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
245 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
246 static int sh_issue_rate (void);
247 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
248 static short find_set_regmode_weight (rtx, machine_mode);
249 static short find_insn_regmode_weight (rtx, machine_mode);
250 static void find_regmode_weight (basic_block, machine_mode);
251 static int find_r0_life_regions (basic_block);
252 static void sh_md_init_global (FILE *, int, int);
253 static void sh_md_finish_global (FILE *, int);
254 static int rank_for_reorder (const void *, const void *);
255 static void swap_reorder (rtx_insn **, int);
256 static void ready_reorder (rtx_insn **, int);
257 static bool high_pressure (machine_mode);
258 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
259 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
260 static void sh_md_init (FILE *, int, int);
261 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
263 static bool sh_function_ok_for_sibcall (tree, tree);
265 static bool sh_cannot_modify_jumps_p (void);
266 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
267 static reg_class_t sh_target_reg_class (void);
268 static bool sh_optimize_target_register_callee_saved (bool);
269 static bool sh_ms_bitfield_layout_p (const_tree);
271 static void sh_init_builtins (void);
272 static tree sh_builtin_decl (unsigned, bool);
273 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
274 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
275 HOST_WIDE_INT, tree);
276 static void sh_file_start (void);
277 static bool flow_dependent_p (rtx, rtx);
278 static void flow_dependent_p_1 (rtx, const_rtx, void *);
279 static int shiftcosts (rtx);
280 static int and_xor_ior_costs (rtx, int);
281 static int addsubcosts (rtx);
282 static int multcosts (rtx);
283 static bool unspec_caller_rtx_p (rtx);
284 static bool sh_cannot_copy_insn_p (rtx_insn *);
285 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
286 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
287 static int sh_pr_n_sets (void);
288 static rtx sh_allocate_initial_value (rtx);
289 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
290 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
291 machine_mode,
292 struct secondary_reload_info *);
293 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
294 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
295 static rtx sh_delegitimize_address (rtx);
296 static bool sh_cannot_substitute_mem_equiv_p (rtx);
297 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
298 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
299 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
300 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
301 static int scavenge_reg (HARD_REG_SET *s);
302 struct save_schedule_s;
303 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
304 struct save_schedule_s *, int);
306 static rtx sh_struct_value_rtx (tree, int);
307 static rtx sh_function_value (const_tree, const_tree, bool);
308 static bool sh_function_value_regno_p (const unsigned int);
309 static rtx sh_libcall_value (machine_mode, const_rtx);
310 static bool sh_return_in_memory (const_tree, const_tree);
311 static rtx sh_builtin_saveregs (void);
312 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
313 tree, int *, int);
314 static bool sh_strict_argument_naming (cumulative_args_t);
315 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
316 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
317 static tree sh_build_builtin_va_list (void);
318 static void sh_va_start (tree, rtx);
319 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
320 static bool sh_promote_prototypes (const_tree);
321 static machine_mode sh_promote_function_mode (const_tree type,
322 machine_mode,
323 int *punsignedp,
324 const_tree funtype,
325 int for_return);
326 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
327 const_tree, bool);
328 static bool sh_callee_copies (cumulative_args_t, machine_mode,
329 const_tree, bool);
330 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
331 tree, bool);
332 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
333 const_tree, bool);
334 static rtx sh_function_arg (cumulative_args_t, machine_mode,
335 const_tree, bool);
336 static bool sh_scalar_mode_supported_p (machine_mode);
337 static int sh_dwarf_calling_convention (const_tree);
338 static void sh_encode_section_info (tree, rtx, int);
339 static bool sh2a_function_vector_p (tree);
340 static void sh_trampoline_init (rtx, tree, rtx);
341 static rtx sh_trampoline_adjust_address (rtx);
342 static void sh_conditional_register_usage (void);
343 static bool sh_legitimate_constant_p (machine_mode, rtx);
344 static int mov_insn_size (machine_mode, bool);
345 static int mov_insn_alignment_mask (machine_mode, bool);
346 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
347 unsigned int,
348 enum by_pieces_operation,
349 bool);
350 static bool sequence_insn_p (rtx_insn *);
351 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
352 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
353 machine_mode, bool);
354 static bool sh_legitimate_combined_insn (rtx_insn* insn);
356 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
358 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
360 static const struct attribute_spec sh_attribute_table[] =
362 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
363 affects_type_identity } */
364 { "interrupt_handler", 0, 0, true, false, false,
365 sh_handle_interrupt_handler_attribute, false },
366 { "sp_switch", 1, 1, true, false, false,
367 sh_handle_sp_switch_attribute, false },
368 { "trap_exit", 1, 1, true, false, false,
369 sh_handle_trap_exit_attribute, false },
370 { "renesas", 0, 0, false, true, false,
371 sh_handle_renesas_attribute, false },
372 { "trapa_handler", 0, 0, true, false, false,
373 sh_handle_interrupt_handler_attribute, false },
374 { "nosave_low_regs", 0, 0, true, false, false,
375 sh_handle_interrupt_handler_attribute, false },
376 { "resbank", 0, 0, true, false, false,
377 sh_handle_resbank_handler_attribute, false },
378 { "function_vector", 1, 1, true, false, false,
379 sh2a_handle_function_vector_handler_attribute, false },
380 { NULL, 0, 0, false, false, false, NULL, false }
383 /* Initialize the GCC target structure. */
384 #undef TARGET_ATTRIBUTE_TABLE
385 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
387 /* The next two are used for debug info when compiling with -gdwarf. */
388 #undef TARGET_ASM_UNALIGNED_HI_OP
389 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
390 #undef TARGET_ASM_UNALIGNED_SI_OP
391 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
393 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
394 #undef TARGET_ASM_UNALIGNED_DI_OP
395 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
396 #undef TARGET_ASM_ALIGNED_DI_OP
397 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE sh_option_override
402 #undef TARGET_PRINT_OPERAND
403 #define TARGET_PRINT_OPERAND sh_print_operand
404 #undef TARGET_PRINT_OPERAND_ADDRESS
405 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
406 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
407 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
408 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
409 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
411 #undef TARGET_ASM_FUNCTION_EPILOGUE
412 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
414 #undef TARGET_ASM_OUTPUT_MI_THUNK
415 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
417 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
418 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
419 hook_bool_const_tree_hwi_hwi_const_tree_true
421 #undef TARGET_ASM_FILE_START
422 #define TARGET_ASM_FILE_START sh_file_start
423 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
424 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
426 #undef TARGET_REGISTER_MOVE_COST
427 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
429 #undef TARGET_INSERT_ATTRIBUTES
430 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
432 #undef TARGET_SCHED_ADJUST_COST
433 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
435 #undef TARGET_SCHED_ISSUE_RATE
436 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
438 /* The next 5 hooks have been implemented for reenabling sched1. With the
439 help of these macros we are limiting the movement of insns in sched1 to
440 reduce the register pressure. The overall idea is to keep count of SImode
441 and SFmode regs required by already scheduled insns. When these counts
442 cross some threshold values; give priority to insns that free registers.
443 The insn that frees registers is most likely to be the insn with lowest
444 LUID (original insn order); but such an insn might be there in the stalled
445 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
446 up to a max of 8 cycles so that such insns may move from Q -> R.
448 The description of the hooks are as below:
450 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
451 scheduler; it is called inside the sched_init function just after
452 find_insn_reg_weights function call. It is used to calculate the SImode
453 and SFmode weights of insns of basic blocks; much similar to what
454 find_insn_reg_weights does.
455 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
457 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
458 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
459 (Q)->(R).
461 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
462 high; reorder the ready queue so that the insn with lowest LUID will be
463 issued next.
465 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
466 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
468 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
469 can be returned from TARGET_SCHED_REORDER2.
471 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
473 #undef TARGET_SCHED_DFA_NEW_CYCLE
474 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
476 #undef TARGET_SCHED_INIT_GLOBAL
477 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
479 #undef TARGET_SCHED_FINISH_GLOBAL
480 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
482 #undef TARGET_SCHED_VARIABLE_ISSUE
483 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
485 #undef TARGET_SCHED_REORDER
486 #define TARGET_SCHED_REORDER sh_reorder
488 #undef TARGET_SCHED_REORDER2
489 #define TARGET_SCHED_REORDER2 sh_reorder2
491 #undef TARGET_SCHED_INIT
492 #define TARGET_SCHED_INIT sh_md_init
494 #undef TARGET_DELEGITIMIZE_ADDRESS
495 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
497 #undef TARGET_LEGITIMIZE_ADDRESS
498 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
500 #undef TARGET_CANNOT_MODIFY_JUMPS_P
501 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
502 #undef TARGET_CAN_FOLLOW_JUMP
503 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
504 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
505 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
506 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
507 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
508 sh_optimize_target_register_callee_saved
510 #undef TARGET_MS_BITFIELD_LAYOUT_P
511 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
513 #undef TARGET_INIT_BUILTINS
514 #define TARGET_INIT_BUILTINS sh_init_builtins
515 #undef TARGET_BUILTIN_DECL
516 #define TARGET_BUILTIN_DECL sh_builtin_decl
517 #undef TARGET_EXPAND_BUILTIN
518 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
520 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
521 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
523 #undef TARGET_CANNOT_COPY_INSN_P
524 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
525 #undef TARGET_RTX_COSTS
526 #define TARGET_RTX_COSTS sh_rtx_costs
527 #undef TARGET_ADDRESS_COST
528 #define TARGET_ADDRESS_COST sh_address_cost
529 #undef TARGET_ALLOCATE_INITIAL_VALUE
530 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
532 #undef TARGET_MACHINE_DEPENDENT_REORG
533 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
535 #undef TARGET_DWARF_REGISTER_SPAN
536 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
538 #ifdef HAVE_AS_TLS
539 #undef TARGET_HAVE_TLS
540 #define TARGET_HAVE_TLS true
541 #endif
543 #undef TARGET_PROMOTE_PROTOTYPES
544 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
545 #undef TARGET_PROMOTE_FUNCTION_MODE
546 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
548 #undef TARGET_FUNCTION_VALUE
549 #define TARGET_FUNCTION_VALUE sh_function_value
550 #undef TARGET_FUNCTION_VALUE_REGNO_P
551 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
552 #undef TARGET_LIBCALL_VALUE
553 #define TARGET_LIBCALL_VALUE sh_libcall_value
554 #undef TARGET_STRUCT_VALUE_RTX
555 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
556 #undef TARGET_RETURN_IN_MEMORY
557 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
559 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
560 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
561 #undef TARGET_SETUP_INCOMING_VARARGS
562 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
563 #undef TARGET_STRICT_ARGUMENT_NAMING
564 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
565 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
566 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
567 #undef TARGET_MUST_PASS_IN_STACK
568 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
569 #undef TARGET_PASS_BY_REFERENCE
570 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
571 #undef TARGET_CALLEE_COPIES
572 #define TARGET_CALLEE_COPIES sh_callee_copies
573 #undef TARGET_ARG_PARTIAL_BYTES
574 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
575 #undef TARGET_FUNCTION_ARG
576 #define TARGET_FUNCTION_ARG sh_function_arg
577 #undef TARGET_FUNCTION_ARG_ADVANCE
578 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
580 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
581 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
583 #undef TARGET_BUILD_BUILTIN_VA_LIST
584 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
585 #undef TARGET_EXPAND_BUILTIN_VA_START
586 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
587 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
588 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
590 #undef TARGET_SCALAR_MODE_SUPPORTED_P
591 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
592 #undef TARGET_VECTOR_MODE_SUPPORTED_P
593 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
595 #undef TARGET_CHECK_PCH_TARGET_FLAGS
596 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
598 #undef TARGET_DWARF_CALLING_CONVENTION
599 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
601 #undef TARGET_FRAME_POINTER_REQUIRED
602 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
604 #undef TARGET_MODE_EMIT
605 #define TARGET_MODE_EMIT sh_emit_mode_set
607 #undef TARGET_MODE_NEEDED
608 #define TARGET_MODE_NEEDED sh_mode_needed
610 #undef TARGET_MODE_AFTER
611 #define TARGET_MODE_AFTER sh_mode_after
613 #undef TARGET_MODE_ENTRY
614 #define TARGET_MODE_ENTRY sh_mode_entry
616 #undef TARGET_MODE_EXIT
617 #define TARGET_MODE_EXIT sh_mode_exit
619 #undef TARGET_MODE_PRIORITY
620 #define TARGET_MODE_PRIORITY sh_mode_priority
622 /* Return regmode weight for insn. */
623 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
624 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
626 /* Return current register pressure for regmode. */
627 #define CURR_REGMODE_PRESSURE(MODE)\
628 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
630 #undef TARGET_ENCODE_SECTION_INFO
631 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
633 #undef TARGET_LRA_P
634 #define TARGET_LRA_P sh_lra_p
636 #undef TARGET_SECONDARY_RELOAD
637 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
639 #undef TARGET_PREFERRED_RELOAD_CLASS
640 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
642 #undef TARGET_CONDITIONAL_REGISTER_USAGE
643 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
645 #undef TARGET_LEGITIMATE_ADDRESS_P
646 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
648 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
649 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
651 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
652 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
653 sh_legitimize_address_displacement
655 #undef TARGET_TRAMPOLINE_INIT
656 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
657 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
658 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
660 #undef TARGET_LEGITIMATE_CONSTANT_P
661 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
663 #undef TARGET_CANONICALIZE_COMPARISON
664 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
666 #undef TARGET_LEGITIMATE_COMBINED_INSN
667 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
669 #undef TARGET_FIXED_CONDITION_CODE_REGS
670 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
672 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
673 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
674 sh_use_by_pieces_infrastructure_p
676 /* Machine-specific symbol_ref flags. */
677 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
679 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
680 is used by optabs.c atomic op expansion code as well as in sync.md. */
681 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
682 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
684 struct gcc_target targetm = TARGET_INITIALIZER;
687 /* Information on the currently selected atomic model.
688 This is initialized in sh_option_override. */
689 static sh_atomic_model selected_atomic_model_;
691 const sh_atomic_model&
692 selected_atomic_model (void)
694 return selected_atomic_model_;
697 static sh_atomic_model
698 parse_validate_atomic_model_option (const char* str)
700 const char* model_names[sh_atomic_model::num_models];
701 model_names[sh_atomic_model::none] = "none";
702 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
703 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
704 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
705 model_names[sh_atomic_model::soft_imask] = "soft-imask";
707 const char* model_cdef_names[sh_atomic_model::num_models];
708 model_cdef_names[sh_atomic_model::none] = "NONE";
709 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
710 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
711 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
712 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
714 sh_atomic_model ret;
715 ret.type = sh_atomic_model::none;
716 ret.name = model_names[sh_atomic_model::none];
717 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
718 ret.strict = false;
719 ret.tcb_gbr_offset = -1;
721 /* Handle empty string as 'none'. */
722 if (str == NULL || *str == '\0')
723 return ret;
725 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
727 std::vector<std::string> tokens;
728 for (std::stringstream ss (str); ss.good (); )
730 tokens.push_back (std::string ());
731 std::getline (ss, tokens.back (), ',');
734 if (tokens.empty ())
735 err_ret ("invalid atomic model option");
737 /* The first token must be the atomic model name. */
739 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
740 if (tokens.front () == model_names[i])
742 ret.type = (sh_atomic_model::enum_type)i;
743 ret.name = model_names[i];
744 ret.cdef_name = model_cdef_names[i];
745 goto got_mode_name;
748 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
749 got_mode_name:;
752 /* Go through the remaining tokens. */
753 for (size_t i = 1; i < tokens.size (); ++i)
755 if (tokens[i] == "strict")
756 ret.strict = true;
757 else if (tokens[i].find ("gbr-offset=") == 0)
759 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
760 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
761 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
762 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
763 "option", offset_str.c_str ());
765 else
766 err_ret ("unknown parameter \"%s\" in atomic model option",
767 tokens[i].c_str ());
770 /* Check that the selection makes sense. */
771 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
772 err_ret ("atomic operations are not supported on SHmedia");
774 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
775 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
776 ret.name);
778 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
779 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
781 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
782 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
784 if (ret.type == sh_atomic_model::soft_tcb
785 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
786 || (ret.tcb_gbr_offset & 3) != 0))
787 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
788 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
789 ret.name);
791 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
792 err_ret ("cannot use atomic model %s in user mode", ret.name);
794 return ret;
796 #undef err_ret
799 /* Register SH specific RTL passes. */
800 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
801 const char* name);
802 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
803 const char* name);
804 static void
805 register_sh_passes (void)
807 if (!TARGET_SH1)
808 return;
810 /* Running the sh_treg_combine pass after ce1 generates better code when
811 comparisons are combined and reg-reg moves are introduced, because
812 reg-reg moves will be eliminated afterwards. However, there are quite
813 some cases where combine will be unable to fold comparison related insns,
814 thus for now don't do it.
815 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
816 PASS_POS_INSERT_AFTER, "ce1", 1);
819 /* Run sh_treg_combine pass after combine but before register allocation. */
820 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
821 PASS_POS_INSERT_AFTER, "split1", 1);
823 /* Run sh_treg_combine pass after register allocation and basic block
824 reordering as this sometimes creates new opportunities. */
825 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
826 PASS_POS_INSERT_AFTER, "split4", 1);
828 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
829 is known after a conditional branch.
830 This must be done after basic blocks and branch conditions have
831 stabilized and won't be changed by further passes. */
832 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
833 PASS_POS_INSERT_BEFORE, "sched2", 1);
836 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
837 various options, and do some machine dependent initialization. */
838 static void
839 sh_option_override (void)
841 int regno;
843 SUBTARGET_OVERRIDE_OPTIONS;
844 if (optimize > 1 && !optimize_size)
845 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
847 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
848 TARGET_CBRANCHDI4 = 1;
849 TARGET_CMPEQDI_T = 0;
851 sh_cpu = PROCESSOR_SH1;
852 assembler_dialect = 0;
853 if (TARGET_SH2)
854 sh_cpu = PROCESSOR_SH2;
855 if (TARGET_SH2E)
856 sh_cpu = PROCESSOR_SH2E;
857 if (TARGET_SH2A)
858 sh_cpu = PROCESSOR_SH2A;
859 if (TARGET_SH3)
860 sh_cpu = PROCESSOR_SH3;
861 if (TARGET_SH3E)
862 sh_cpu = PROCESSOR_SH3E;
863 if (TARGET_SH4)
865 assembler_dialect = 1;
866 sh_cpu = PROCESSOR_SH4;
868 if (TARGET_SH4A)
870 assembler_dialect = 1;
871 sh_cpu = PROCESSOR_SH4A;
873 if (TARGET_SH5)
875 sh_cpu = PROCESSOR_SH5;
876 target_flags |= MASK_ALIGN_DOUBLE;
877 if (TARGET_SHMEDIA_FPU)
878 target_flags |= MASK_FMOVD;
879 if (TARGET_SHMEDIA)
881 /* There are no delay slots on SHmedia. */
882 flag_delayed_branch = 0;
883 /* Relaxation isn't yet supported for SHmedia */
884 target_flags &= ~MASK_RELAX;
885 /* After reload, if conversion does little good but can cause
886 ICEs:
887 - find_if_block doesn't do anything for SH because we don't
888 have conditional execution patterns. (We use conditional
889 move patterns, which are handled differently, and only
890 before reload).
891 - find_cond_trap doesn't do anything for the SH because we
892 don't have conditional traps.
893 - find_if_case_1 uses redirect_edge_and_branch_force in
894 the only path that does an optimization, and this causes
895 an ICE when branch targets are in registers.
896 - find_if_case_2 doesn't do anything for the SHmedia after
897 reload except when it can redirect a tablejump - and
898 that's rather rare. */
899 flag_if_conversion2 = 0;
900 if (! strcmp (sh_div_str, "call"))
901 sh_div_strategy = SH_DIV_CALL;
902 else if (! strcmp (sh_div_str, "call2"))
903 sh_div_strategy = SH_DIV_CALL2;
904 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
905 sh_div_strategy = SH_DIV_FP;
906 else if (! strcmp (sh_div_str, "inv"))
907 sh_div_strategy = SH_DIV_INV;
908 else if (! strcmp (sh_div_str, "inv:minlat"))
909 sh_div_strategy = SH_DIV_INV_MINLAT;
910 else if (! strcmp (sh_div_str, "inv20u"))
911 sh_div_strategy = SH_DIV_INV20U;
912 else if (! strcmp (sh_div_str, "inv20l"))
913 sh_div_strategy = SH_DIV_INV20L;
914 else if (! strcmp (sh_div_str, "inv:call2"))
915 sh_div_strategy = SH_DIV_INV_CALL2;
916 else if (! strcmp (sh_div_str, "inv:call"))
917 sh_div_strategy = SH_DIV_INV_CALL;
918 else if (! strcmp (sh_div_str, "inv:fp"))
920 if (TARGET_FPU_ANY)
921 sh_div_strategy = SH_DIV_INV_FP;
922 else
923 sh_div_strategy = SH_DIV_INV;
925 TARGET_CBRANCHDI4 = 0;
926 /* Assembler CFI isn't yet fully supported for SHmedia. */
927 flag_dwarf2_cfi_asm = 0;
930 else
932 /* Only the sh64-elf assembler fully supports .quad properly. */
933 targetm.asm_out.aligned_op.di = NULL;
934 targetm.asm_out.unaligned_op.di = NULL;
937 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
938 Disable it for everything else. */
939 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
940 TARGET_USERMODE = false;
942 if (TARGET_SH1)
944 if (! strcmp (sh_div_str, "call-div1"))
945 sh_div_strategy = SH_DIV_CALL_DIV1;
946 else if (! strcmp (sh_div_str, "call-fp")
947 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
948 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
949 sh_div_strategy = SH_DIV_CALL_FP;
950 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
951 sh_div_strategy = SH_DIV_CALL_TABLE;
952 else
953 /* Pick one that makes most sense for the target in general.
954 It is not much good to use different functions depending
955 on -Os, since then we'll end up with two different functions
956 when some of the code is compiled for size, and some for
957 speed. */
959 /* SH4 tends to emphasize speed. */
960 if (TARGET_HARD_SH4)
961 sh_div_strategy = SH_DIV_CALL_TABLE;
962 /* These have their own way of doing things. */
963 else if (TARGET_SH2A)
964 sh_div_strategy = SH_DIV_INTRINSIC;
965 /* ??? Should we use the integer SHmedia function instead? */
966 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
967 sh_div_strategy = SH_DIV_CALL_FP;
968 /* SH1 .. SH3 cores often go into small-footprint systems, so
969 default to the smallest implementation available. */
970 else
971 sh_div_strategy = SH_DIV_CALL_DIV1;
973 if (!TARGET_SH1)
974 TARGET_PRETEND_CMOVE = 0;
975 if (sh_divsi3_libfunc[0])
976 ; /* User supplied - leave it alone. */
977 else if (TARGET_DIVIDE_CALL_FP)
978 sh_divsi3_libfunc = "__sdivsi3_i4";
979 else if (TARGET_DIVIDE_CALL_TABLE)
980 sh_divsi3_libfunc = "__sdivsi3_i4i";
981 else if (TARGET_SH5)
982 sh_divsi3_libfunc = "__sdivsi3_1";
983 else
984 sh_divsi3_libfunc = "__sdivsi3";
986 if (sh_branch_cost == -1)
988 /* The SH1 does not have delay slots, hence we get a pipeline stall
989 at every branch. The SH4 is superscalar, so the single delay slot
990 is not sufficient to keep both pipelines filled.
991 In any case, set the default branch cost to '2', as it results in
992 slightly overall smaller code and also enables some if conversions
993 that are required for matching special T bit related insns. */
994 sh_branch_cost = 2;
997 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
998 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
999 TARGET_ZDCBRANCH = 1;
1001 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1002 if (! VALID_REGISTER_P (regno))
1003 sh_register_names[regno][0] = '\0';
1005 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
1006 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
1007 sh_additional_register_names[regno][0] = '\0';
1009 if ((flag_pic && ! TARGET_PREFERGOT)
1010 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
1011 flag_no_function_cse = 1;
1013 if (targetm.small_register_classes_for_mode_p (VOIDmode))
1015 /* Never run scheduling before reload, since that can
1016 break global alloc, and generates slower code anyway due
1017 to the pressure on R0. */
1018 /* Enable sched1 for SH4 if the user explicitly requests.
1019 When sched1 is enabled, the ready queue will be reordered by
1020 the target hooks if pressure is high. We can not do this for
1021 PIC, SH3 and lower as they give spill failures for R0. */
1022 if (!TARGET_HARD_SH4 || flag_pic)
1023 flag_schedule_insns = 0;
1024 /* ??? Current exception handling places basic block boundaries
1025 after call_insns. It causes the high pressure on R0 and gives
1026 spill failures for R0 in reload. See PR 22553 and the thread
1027 on gcc-patches
1028 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
1029 else if (flag_exceptions)
1031 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
1032 warning (0, "ignoring -fschedule-insns because of exception "
1033 "handling bug");
1034 flag_schedule_insns = 0;
1036 else if (flag_schedule_insns
1037 && !global_options_set.x_flag_schedule_insns)
1038 flag_schedule_insns = 0;
1041 /* Unwind info is not correct around the CFG unless either a frame
1042 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1043 unwind info generation to be aware of the CFG and propagating states
1044 around edges. */
1045 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1046 || flag_exceptions || flag_non_call_exceptions)
1047 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1049 warning (0, "unwind tables currently require either a frame pointer "
1050 "or -maccumulate-outgoing-args for correctness");
1051 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1054 /* Adjust loop, jump and function alignment values (in bytes), if those
1055 were not specified by the user using -falign-loops, -falign-jumps
1056 and -falign-functions options.
1057 32 bit alignment is better for speed, because instructions can be
1058 fetched as a pair from a longword boundary. For size use 16 bit
1059 alignment to get more compact code.
1060 Aligning all jumps increases the code size, even if it might
1061 result in slightly faster code. Thus, it is set to the smallest
1062 alignment possible if not specified by the user. */
1063 if (align_loops == 0)
1065 if (TARGET_SH5)
1066 align_loops = 8;
1067 else
1068 align_loops = optimize_size ? 2 : 4;
1071 if (align_jumps == 0)
1073 if (TARGET_SHMEDIA)
1074 align_jumps = 1 << CACHE_LOG;
1075 else
1076 align_jumps = 2;
1078 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1079 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1081 if (align_functions == 0)
1083 if (TARGET_SHMEDIA)
1084 align_functions = optimize_size
1085 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1086 else
1087 align_functions = optimize_size ? 2 : 4;
1090 /* The linker relaxation code breaks when a function contains
1091 alignments that are larger than that at the start of a
1092 compilation unit. */
1093 if (TARGET_RELAX)
1095 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1097 /* Also take possible .long constants / mova tables into account. */
1098 if (min_align < 4)
1099 min_align = 4;
1100 if (align_functions < min_align)
1101 align_functions = min_align;
1104 if (flag_unsafe_math_optimizations)
1106 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1107 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1108 TARGET_FSCA = 1;
1110 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1111 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1112 TARGET_FSRRA = 1;
1115 /* Allow fsrra insn only if -funsafe-math-optimizations and
1116 -ffinite-math-only is enabled. */
1117 TARGET_FSRRA = TARGET_FSRRA
1118 && flag_unsafe_math_optimizations
1119 && flag_finite_math_only;
1121 /* If the -mieee option was not explicitly set by the user, turn it on
1122 unless -ffinite-math-only was specified. See also PR 33135. */
1123 if (! global_options_set.x_TARGET_IEEE)
1124 TARGET_IEEE = ! flag_finite_math_only;
1126 if (sh_fixed_range_str)
1127 sh_fix_range (sh_fixed_range_str);
1129 /* This target defaults to strict volatile bitfields. */
1130 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1131 flag_strict_volatile_bitfields = 1;
1133 /* Parse atomic model option and make sure it is valid for the current
1134 target CPU. */
1135 selected_atomic_model_
1136 = parse_validate_atomic_model_option (sh_atomic_model_str);
1138 register_sh_passes ();
1141 /* Print the operand address in x to the stream. */
1142 static void
1143 sh_print_operand_address (FILE *stream, rtx x)
1145 switch (GET_CODE (x))
1147 case REG:
1148 case SUBREG:
1149 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1150 break;
1152 case PLUS:
1154 rtx base = XEXP (x, 0);
1155 rtx index = XEXP (x, 1);
1157 switch (GET_CODE (index))
1159 case CONST_INT:
1160 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1161 reg_names[true_regnum (base)]);
1162 break;
1164 case REG:
1165 case SUBREG:
1167 int base_num = true_regnum (base);
1168 int index_num = true_regnum (index);
1170 fprintf (stream, "@(r0,%s)",
1171 reg_names[MAX (base_num, index_num)]);
1172 break;
1175 default:
1176 gcc_unreachable ();
1179 break;
1181 case PRE_DEC:
1182 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1183 break;
1185 case POST_INC:
1186 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1187 break;
1189 default:
1190 x = mark_constant_pool_use (x);
1191 output_addr_const (stream, x);
1192 break;
1196 /* Print operand x (an rtx) in assembler syntax to file stream
1197 according to modifier code.
1199 '.' print a .s if insn needs delay slot
1200 ',' print LOCAL_LABEL_PREFIX
1201 '@' print trap, rte or rts depending upon pragma interruptness
1202 '#' output a nop if there is nothing to put in the delay slot
1203 ''' print likelihood suffix (/u for unlikely).
1204 '>' print branch target if -fverbose-asm
1205 'O' print a constant without the #
1206 'R' print the LSW of a dp value - changes if in little endian
1207 'S' print the MSW of a dp value - changes if in little endian
1208 'T' print the next word of a dp value - same as 'R' in big endian mode.
1209 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1210 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1211 'N' print 'r63' if the operand is (const_int 0).
1212 'd' print a V2SF reg as dN instead of fpN.
1213 'm' print a pair `base,offset' or `base,index', for LD and ST.
1214 'U' Likewise for {LD,ST}{HI,LO}.
1215 'V' print the position of a single bit set.
1216 'W' print the position of a single bit cleared.
1217 't' print a memory address which is a register.
1218 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1219 'o' output an operator. */
1220 static void
1221 sh_print_operand (FILE *stream, rtx x, int code)
1223 int regno;
1224 machine_mode mode;
1226 switch (code)
1228 tree trapa_attr;
1230 case '.':
1231 if (final_sequence
1232 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1233 && get_attr_length (final_sequence->insn (1)))
1234 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1235 break;
1236 case ',':
1237 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1238 break;
1239 case '@':
1240 trapa_attr = lookup_attribute ("trap_exit",
1241 DECL_ATTRIBUTES (current_function_decl));
1242 if (trapa_attr)
1243 fprintf (stream, "trapa #%ld",
1244 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1245 else if (sh_cfun_interrupt_handler_p ())
1247 if (sh_cfun_resbank_handler_p ())
1248 fprintf (stream, "resbank\n");
1249 fprintf (stream, "rte");
1251 else
1252 fprintf (stream, "rts");
1253 break;
1254 case '#':
1255 /* Output a nop if there's nothing in the delay slot. */
1256 if (dbr_sequence_length () == 0)
1257 fprintf (stream, "\n\tnop");
1258 break;
1259 case '\'':
1261 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1263 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1264 fputs ("/u", stream);
1265 break;
1267 case '>':
1268 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1270 fputs ("\t! target: ", stream);
1271 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1273 break;
1274 case 'O':
1275 x = mark_constant_pool_use (x);
1276 output_addr_const (stream, x);
1277 break;
1278 /* N.B.: %R / %S / %T adjust memory addresses by four.
1279 For SHMEDIA, that means they can be used to access the first and
1280 second 32 bit part of a 64 bit (or larger) value that
1281 might be held in floating point registers or memory.
1282 While they can be used to access 64 bit parts of a larger value
1283 held in general purpose registers, that won't work with memory -
1284 neither for fp registers, since the frxx names are used. */
1285 case 'R':
1286 if (REG_P (x) || GET_CODE (x) == SUBREG)
1288 regno = true_regnum (x);
1289 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1290 fputs (reg_names[regno], (stream));
1292 else if (MEM_P (x))
1294 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1295 sh_print_operand_address (stream, XEXP (x, 0));
1297 else
1299 rtx sub = NULL_RTX;
1301 mode = GET_MODE (x);
1302 if (mode == VOIDmode)
1303 mode = DImode;
1304 if (GET_MODE_SIZE (mode) >= 8)
1305 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1306 if (sub)
1307 sh_print_operand (stream, sub, 0);
1308 else
1309 output_operand_lossage ("invalid operand to %%R");
1311 break;
1312 case 'S':
1313 if (REG_P (x) || GET_CODE (x) == SUBREG)
1315 regno = true_regnum (x);
1316 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1317 fputs (reg_names[regno], (stream));
1319 else if (MEM_P (x))
1321 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1322 sh_print_operand_address (stream, XEXP (x, 0));
1324 else
1326 rtx sub = NULL_RTX;
1328 mode = GET_MODE (x);
1329 if (mode == VOIDmode)
1330 mode = DImode;
1331 if (GET_MODE_SIZE (mode) >= 8)
1332 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1333 if (sub)
1334 sh_print_operand (stream, sub, 0);
1335 else
1336 output_operand_lossage ("invalid operand to %%S");
1338 break;
1339 case 'T':
1340 /* Next word of a double. */
1341 switch (GET_CODE (x))
1343 case REG:
1344 fputs (reg_names[REGNO (x) + 1], (stream));
1345 break;
1346 case MEM:
1347 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1348 && GET_CODE (XEXP (x, 0)) != POST_INC)
1349 x = adjust_address (x, SImode, 4);
1350 sh_print_operand_address (stream, XEXP (x, 0));
1351 break;
1352 default:
1353 break;
1355 break;
1357 case 't':
1358 gcc_assert (MEM_P (x));
1359 x = XEXP (x, 0);
1360 switch (GET_CODE (x))
1362 case REG:
1363 case SUBREG:
1364 sh_print_operand (stream, x, 0);
1365 break;
1366 default:
1367 break;
1369 break;
1371 case 'o':
1372 switch (GET_CODE (x))
1374 case PLUS: fputs ("add", stream); break;
1375 case MINUS: fputs ("sub", stream); break;
1376 case MULT: fputs ("mul", stream); break;
1377 case DIV: fputs ("div", stream); break;
1378 case EQ: fputs ("eq", stream); break;
1379 case NE: fputs ("ne", stream); break;
1380 case GT: case LT: fputs ("gt", stream); break;
1381 case GE: case LE: fputs ("ge", stream); break;
1382 case GTU: case LTU: fputs ("gtu", stream); break;
1383 case GEU: case LEU: fputs ("geu", stream); break;
1384 default:
1385 break;
1387 break;
1388 case 'M':
1389 if (TARGET_SHMEDIA)
1391 if (MEM_P (x)
1392 && GET_CODE (XEXP (x, 0)) == PLUS
1393 && (REG_P (XEXP (XEXP (x, 0), 1))
1394 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1395 fputc ('x', stream);
1397 else
1399 if (MEM_P (x))
1401 switch (GET_MODE (x))
1403 case QImode: fputs (".b", stream); break;
1404 case HImode: fputs (".w", stream); break;
1405 case SImode: fputs (".l", stream); break;
1406 case SFmode: fputs (".s", stream); break;
1407 case DFmode: fputs (".d", stream); break;
1408 default: gcc_unreachable ();
1412 break;
1414 case 'm':
1415 gcc_assert (MEM_P (x));
1416 x = XEXP (x, 0);
1417 /* Fall through. */
1418 case 'U':
1419 switch (GET_CODE (x))
1421 case REG:
1422 case SUBREG:
1423 sh_print_operand (stream, x, 0);
1424 fputs (", 0", stream);
1425 break;
1427 case PLUS:
1428 sh_print_operand (stream, XEXP (x, 0), 0);
1429 fputs (", ", stream);
1430 sh_print_operand (stream, XEXP (x, 1), 0);
1431 break;
1433 default:
1434 gcc_unreachable ();
1436 break;
1438 case 'V':
1440 int num = exact_log2 (INTVAL (x));
1441 gcc_assert (num >= 0);
1442 fprintf (stream, "#%d", num);
1444 break;
1446 case 'W':
1448 int num = exact_log2 (~INTVAL (x));
1449 gcc_assert (num >= 0);
1450 fprintf (stream, "#%d", num);
1452 break;
1454 case 'd':
1455 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1457 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1458 break;
1460 case 'N':
1461 if (x == CONST0_RTX (GET_MODE (x)))
1463 fprintf ((stream), "r63");
1464 break;
1466 goto default_output;
1467 case 'u':
1468 if (CONST_INT_P (x))
1470 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1471 break;
1473 /* Fall through. */
1475 default_output:
1476 default:
1477 regno = 0;
1478 mode = GET_MODE (x);
1480 switch (GET_CODE (x))
1482 case TRUNCATE:
1484 rtx inner = XEXP (x, 0);
1485 int offset = 0;
1486 machine_mode inner_mode;
1488 /* We might see SUBREGs with vector mode registers inside. */
1489 if (GET_CODE (inner) == SUBREG
1490 && (GET_MODE_SIZE (GET_MODE (inner))
1491 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1492 && subreg_lowpart_p (inner))
1493 inner = SUBREG_REG (inner);
1494 if (CONST_INT_P (inner))
1496 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1497 goto default_output;
1499 inner_mode = GET_MODE (inner);
1500 if (GET_CODE (inner) == SUBREG
1501 && (GET_MODE_SIZE (GET_MODE (inner))
1502 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1503 && REG_P (SUBREG_REG (inner)))
1505 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1506 GET_MODE (SUBREG_REG (inner)),
1507 SUBREG_BYTE (inner),
1508 GET_MODE (inner));
1509 inner = SUBREG_REG (inner);
1511 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1512 abort ();
1513 /* Floating point register pairs are always big endian;
1514 general purpose registers are 64 bit wide. */
1515 regno = REGNO (inner);
1516 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1517 - HARD_REGNO_NREGS (regno, mode))
1518 + offset;
1519 x = inner;
1520 goto reg;
1522 case SIGN_EXTEND:
1523 x = XEXP (x, 0);
1524 goto reg;
1525 /* FIXME: We need this on SHmedia32 because reload generates
1526 some sign-extended HI or QI loads into DImode registers
1527 but, because Pmode is SImode, the address ends up with a
1528 subreg:SI of the DImode register. Maybe reload should be
1529 fixed so as to apply alter_subreg to such loads? */
1530 case IF_THEN_ELSE:
1531 gcc_assert (trapping_target_operand (x, VOIDmode));
1532 x = XEXP (XEXP (x, 2), 0);
1533 goto default_output;
1534 case SUBREG:
1535 gcc_assert (SUBREG_BYTE (x) == 0
1536 && REG_P (SUBREG_REG (x)));
1538 x = SUBREG_REG (x);
1539 /* Fall through. */
1541 reg:
1542 case REG:
1543 regno += REGNO (x);
1544 if (FP_REGISTER_P (regno)
1545 && mode == V16SFmode)
1546 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1547 else if (FP_REGISTER_P (REGNO (x))
1548 && mode == V4SFmode)
1549 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1550 else if (REG_P (x)
1551 && mode == V2SFmode)
1552 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1553 else if (FP_REGISTER_P (REGNO (x))
1554 && GET_MODE_SIZE (mode) > 4)
1555 fprintf ((stream), "d%s", reg_names[regno] + 1);
1556 else
1557 fputs (reg_names[regno], (stream));
1558 break;
1560 case MEM:
1561 output_address (XEXP (x, 0));
1562 break;
1564 default:
1565 if (TARGET_SH1)
1566 fputc ('#', stream);
1567 output_addr_const (stream, x);
1568 break;
1570 break;
1574 static bool
1575 sh_print_operand_punct_valid_p (unsigned char code)
1577 return (code == '.' || code == '#' || code == '@' || code == ','
1578 || code == '$' || code == '\'' || code == '>');
1581 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1582 static bool
1583 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1585 if (GET_CODE (x) == UNSPEC)
1587 switch (XINT (x, 1))
1589 case UNSPEC_DATALABEL:
1590 fputs ("datalabel ", file);
1591 output_addr_const (file, XVECEXP (x, 0, 0));
1592 break;
1593 case UNSPEC_PIC:
1594 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1595 output_addr_const (file, XVECEXP (x, 0, 0));
1596 break;
1597 case UNSPEC_GOT:
1598 output_addr_const (file, XVECEXP (x, 0, 0));
1599 fputs ("@GOT", file);
1600 break;
1601 case UNSPEC_GOTOFF:
1602 output_addr_const (file, XVECEXP (x, 0, 0));
1603 fputs ("@GOTOFF", file);
1604 break;
1605 case UNSPEC_PLT:
1606 output_addr_const (file, XVECEXP (x, 0, 0));
1607 fputs ("@PLT", file);
1608 break;
1609 case UNSPEC_GOTPLT:
1610 output_addr_const (file, XVECEXP (x, 0, 0));
1611 fputs ("@GOTPLT", file);
1612 break;
1613 case UNSPEC_DTPOFF:
1614 output_addr_const (file, XVECEXP (x, 0, 0));
1615 fputs ("@DTPOFF", file);
1616 break;
1617 case UNSPEC_GOTTPOFF:
1618 output_addr_const (file, XVECEXP (x, 0, 0));
1619 fputs ("@GOTTPOFF", file);
1620 break;
1621 case UNSPEC_TPOFF:
1622 output_addr_const (file, XVECEXP (x, 0, 0));
1623 fputs ("@TPOFF", file);
1624 break;
1625 case UNSPEC_CALLER:
1627 char name[32];
1628 /* LPCS stands for Label for PIC Call Site. */
1629 targetm.asm_out.generate_internal_label (name, "LPCS",
1630 INTVAL (XVECEXP (x, 0, 0)));
1631 assemble_name (file, name);
1633 break;
1634 case UNSPEC_EXTRACT_S16:
1635 case UNSPEC_EXTRACT_U16:
1637 rtx val, shift;
1639 val = XVECEXP (x, 0, 0);
1640 shift = XVECEXP (x, 0, 1);
1641 fputc ('(', file);
1642 if (shift != const0_rtx)
1643 fputc ('(', file);
1644 if (GET_CODE (val) == CONST
1645 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1647 fputc ('(', file);
1648 output_addr_const (file, val);
1649 fputc (')', file);
1651 else
1652 output_addr_const (file, val);
1653 if (shift != const0_rtx)
1655 fputs (" >> ", file);
1656 output_addr_const (file, shift);
1657 fputc (')', file);
1659 fputs (" & 65535)", file);
1661 break;
1662 case UNSPEC_SYMOFF:
1663 output_addr_const (file, XVECEXP (x, 0, 0));
1664 fputc ('-', file);
1665 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1667 fputc ('(', file);
1668 output_addr_const (file, XVECEXP (x, 0, 1));
1669 fputc (')', file);
1671 else
1672 output_addr_const (file, XVECEXP (x, 0, 1));
1673 break;
1674 case UNSPEC_PCREL_SYMOFF:
1675 output_addr_const (file, XVECEXP (x, 0, 0));
1676 fputs ("-(", file);
1677 output_addr_const (file, XVECEXP (x, 0, 1));
1678 fputs ("-.)", file);
1679 break;
1680 default:
1681 return false;
1683 return true;
1685 else
1686 return false;
1689 /* Encode symbol attributes of a SYMBOL_REF into its
1690 SYMBOL_REF_FLAGS. */
1691 static void
1692 sh_encode_section_info (tree decl, rtx rtl, int first)
1694 default_encode_section_info (decl, rtl, first);
1696 if (TREE_CODE (decl) == FUNCTION_DECL
1697 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1698 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1701 /* Prepare operands for a move define_expand; specifically, one of the
1702 operands must be in a register. */
1703 void
1704 prepare_move_operands (rtx operands[], machine_mode mode)
1706 if ((mode == SImode || mode == DImode)
1707 && flag_pic
1708 && ! ((mode == Pmode || mode == ptr_mode)
1709 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1711 rtx temp;
1712 if (SYMBOLIC_CONST_P (operands[1]))
1714 if (MEM_P (operands[0]))
1715 operands[1] = force_reg (Pmode, operands[1]);
1716 else if (TARGET_SHMEDIA
1717 && GET_CODE (operands[1]) == LABEL_REF
1718 && target_reg_operand (operands[0], mode))
1719 /* It's ok. */;
1720 else
1722 temp = (!can_create_pseudo_p ()
1723 ? operands[0]
1724 : gen_reg_rtx (Pmode));
1725 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1728 else if (GET_CODE (operands[1]) == CONST
1729 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1730 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1732 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1733 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1734 mode, temp);
1735 operands[1] = expand_binop (mode, add_optab, temp,
1736 XEXP (XEXP (operands[1], 0), 1),
1737 (!can_create_pseudo_p ()
1738 ? temp
1739 : gen_reg_rtx (Pmode)),
1740 0, OPTAB_LIB_WIDEN);
1744 if (! reload_in_progress && ! reload_completed)
1746 /* Copy the source to a register if both operands aren't registers. */
1747 if (! register_operand (operands[0], mode)
1748 && ! sh_register_operand (operands[1], mode))
1749 operands[1] = copy_to_mode_reg (mode, operands[1]);
1751 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1753 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1754 except that we can't use that function because it is static. */
1755 rtx new_rtx = change_address (operands[0], mode, 0);
1756 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1757 operands[0] = new_rtx;
1760 /* This case can happen while generating code to move the result
1761 of a library call to the target. Reject `st r0,@(rX,rY)' because
1762 reload will fail to find a spill register for rX, since r0 is already
1763 being used for the source. */
1764 else if (TARGET_SH1
1765 && refers_to_regno_p (R0_REG, operands[1])
1766 && MEM_P (operands[0])
1767 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1768 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1769 operands[1] = copy_to_mode_reg (mode, operands[1]);
1771 /* When the displacement addressing is used, RA will assign r0 to
1772 the pseudo register operand for the QI/HImode load/store.
1773 This tends to make a long live range for R0 and might cause
1774 anomalous register spills in some case with LRA. See PR
1775 target/55212.
1776 We split possible load/store to two move insns via r0 so as to
1777 shorten R0 live range. It will make some codes worse but will
1778 win on average for LRA.
1779 Also when base+index addressing is used and the index term is
1780 a subreg, LRA assumes that more hard registers can be available
1781 in some situation. It isn't the case for SH in the problematic
1782 case. We can pre-allocate R0 for that index term to avoid
1783 the issue. See PR target/66591. */
1784 else if (sh_lra_p ()
1785 && TARGET_SH1 && ! TARGET_SH2A
1786 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1787 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1789 bool load_p = REG_P (operands[0]);
1790 rtx reg = operands[load_p ? 0 : 1];
1791 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1793 if ((mode == QImode || mode == HImode)
1794 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1795 && GET_CODE (adr) == PLUS
1796 && REG_P (XEXP (adr, 0))
1797 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1798 && CONST_INT_P (XEXP (adr, 1))
1799 && INTVAL (XEXP (adr, 1)) != 0
1800 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1802 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1803 emit_move_insn (r0_rtx, operands[1]);
1804 operands[1] = r0_rtx;
1806 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1807 && GET_CODE (adr) == PLUS
1808 && REG_P (XEXP (adr, 0))
1809 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1810 && SUBREG_P (XEXP (adr, 1))
1811 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1813 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1814 emit_move_insn (r0_rtx, XEXP (adr, 1));
1815 XEXP (adr, 1) = r0_rtx;
1820 if (mode == Pmode || mode == ptr_mode)
1822 rtx op0, op1, opc;
1823 enum tls_model tls_kind;
1825 op0 = operands[0];
1826 op1 = operands[1];
1827 if (GET_CODE (op1) == CONST
1828 && GET_CODE (XEXP (op1, 0)) == PLUS
1829 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1830 != TLS_MODEL_NONE))
1832 opc = XEXP (XEXP (op1, 0), 1);
1833 op1 = XEXP (XEXP (op1, 0), 0);
1835 else
1836 opc = NULL_RTX;
1838 if (! reload_in_progress && ! reload_completed
1839 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1841 rtx tga_op1, tga_ret, tmp, tmp2;
1843 if (! flag_pic
1844 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1845 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1846 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1848 /* Don't schedule insns for getting GOT address when
1849 the first scheduling is enabled, to avoid spill
1850 failures for R0. */
1851 if (flag_schedule_insns)
1852 emit_insn (gen_blockage ());
1853 emit_insn (gen_GOTaddr2picreg ());
1854 emit_use (gen_rtx_REG (SImode, PIC_REG));
1855 if (flag_schedule_insns)
1856 emit_insn (gen_blockage ());
1859 switch (tls_kind)
1861 case TLS_MODEL_GLOBAL_DYNAMIC:
1862 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1863 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1864 tmp = gen_reg_rtx (Pmode);
1865 emit_move_insn (tmp, tga_ret);
1866 op1 = tmp;
1867 break;
1869 case TLS_MODEL_LOCAL_DYNAMIC:
1870 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1871 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1873 tmp = gen_reg_rtx (Pmode);
1874 emit_move_insn (tmp, tga_ret);
1876 if (register_operand (op0, Pmode))
1877 tmp2 = op0;
1878 else
1879 tmp2 = gen_reg_rtx (Pmode);
1881 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1882 op1 = tmp2;
1883 break;
1885 case TLS_MODEL_INITIAL_EXEC:
1886 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1887 tmp = gen_sym2GOTTPOFF (op1);
1888 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1889 op1 = tga_op1;
1890 break;
1892 case TLS_MODEL_LOCAL_EXEC:
1893 tmp2 = gen_reg_rtx (Pmode);
1894 emit_insn (gen_store_gbr (tmp2));
1895 tmp = gen_reg_rtx (Pmode);
1896 emit_insn (gen_symTPOFF2reg (tmp, op1));
1898 if (register_operand (op0, Pmode))
1899 op1 = op0;
1900 else
1901 op1 = gen_reg_rtx (Pmode);
1903 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1904 break;
1906 default:
1907 gcc_unreachable ();
1909 if (opc)
1910 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1911 operands[1] = op1;
1916 /* Implement the canonicalize_comparison target hook for the combine
1917 pass. For the target hook this function is invoked via
1918 sh_canonicalize_comparison. This function is also re-used to
1919 canonicalize comparisons in cbranch pattern expanders. */
1920 static void
1921 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1922 machine_mode mode,
1923 bool op0_preserve_value)
1925 /* When invoked from within the combine pass the mode is not specified,
1926 so try to get it from one of the operands. */
1927 if (mode == VOIDmode)
1928 mode = GET_MODE (op0);
1929 if (mode == VOIDmode)
1930 mode = GET_MODE (op1);
1932 // We need to have a mode to do something useful here.
1933 if (mode == VOIDmode)
1934 return;
1936 // Currently, we don't deal with floats here.
1937 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1938 return;
1940 // Make sure that the constant operand is the second operand.
1941 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1943 if (op0_preserve_value)
1944 return;
1946 std::swap (op0, op1);
1947 cmp = swap_condition (cmp);
1950 if (CONST_INT_P (op1))
1952 /* Try to adjust the constant operand in such a way that available
1953 comparison insns can be utilized better and the constant can be
1954 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1955 constant pool. */
1956 const HOST_WIDE_INT val = INTVAL (op1);
1958 /* x > -1 --> x >= 0
1959 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1960 x <= -1 --> x < 0
1961 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1962 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1964 cmp = cmp == GT ? GE : LT;
1965 op1 = gen_int_mode (val + 1, mode);
1968 /* x >= 1 --> x > 0
1969 x >= 0x80 --> x > 0x7F
1970 x < 1 --> x <= 0
1971 x < 0x80 --> x <= 0x7F */
1972 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1974 cmp = cmp == GE ? GT : LE;
1975 op1 = gen_int_mode (val - 1, mode);
1978 /* unsigned x >= 1 --> x != 0
1979 unsigned x < 1 --> x == 0 */
1980 else if (val == 1 && (cmp == GEU || cmp == LTU))
1982 cmp = cmp == GEU ? NE : EQ;
1983 op1 = CONST0_RTX (mode);
1986 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1987 unsigned x < 0x80 --> unsigned x < 0x7F */
1988 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1990 cmp = cmp == GEU ? GTU : LEU;
1991 op1 = gen_int_mode (val - 1, mode);
1994 /* unsigned x > 0 --> x != 0
1995 unsigned x <= 0 --> x == 0 */
1996 else if (val == 0 && (cmp == GTU || cmp == LEU))
1997 cmp = cmp == GTU ? NE : EQ;
1999 /* unsigned x > 0x7FFFFFFF --> signed x < 0
2000 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
2001 else if (mode == SImode && (cmp == GTU || cmp == LEU)
2002 && val == 0x7FFFFFFF)
2004 cmp = cmp == GTU ? LT : GE;
2005 op1 = const0_rtx;
2008 /* unsigned x >= 0x80000000 --> signed x < 0
2009 unsigned x < 0x80000000 --> signed x >= 0 */
2010 else if (mode == SImode && (cmp == GEU || cmp == LTU)
2011 && (unsigned HOST_WIDE_INT)val
2012 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
2014 cmp = cmp == GEU ? LT : GE;
2015 op1 = const0_rtx;
2020 /* This function implements the canonicalize_comparison target hook.
2021 This wrapper around the internally used sh_canonicalize_comparison
2022 function is needed to do the enum rtx_code <-> int conversion.
2023 Target hooks cannot use enum rtx_code in its definition. */
2024 static void
2025 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
2026 bool op0_preserve_value)
2028 enum rtx_code tmp_code = (enum rtx_code)*code;
2029 sh_canonicalize_comparison (tmp_code, *op0, *op1,
2030 VOIDmode, op0_preserve_value);
2031 *code = (int)tmp_code;
2034 /* This function implements the legitimate_combined_insn target hook,
2035 which the combine pass uses to early reject combined insns, before
2036 it tries to recog the insn and determine its cost. */
2037 static bool
2038 sh_legitimate_combined_insn (rtx_insn* insn)
2040 /* Reject combinations of memory loads and zero extensions, as these
2041 interfere with other combine patterns such as zero extracts and bit
2042 tests. The SH2A movu.{b|w} insns are formed later in the
2043 'sh_optimize_extu_exts' pass after combine/split1. */
2044 rtx p = PATTERN (insn);
2045 if (GET_CODE (p) == SET
2046 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
2047 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
2048 && MEM_P (XEXP (XEXP (p, 1), 0)))
2049 return false;
2051 return true;
2054 bool
2055 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
2057 *p1 = T_REG;
2058 *p2 = INVALID_REGNUM;
2059 return true;
2062 enum rtx_code
2063 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2064 enum rtx_code comparison)
2066 /* The scratch reg is only available when this is invoked from within
2067 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2068 rtx scratch = NULL_RTX;
2070 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2071 comparison = GET_CODE (operands[0]);
2072 else
2073 scratch = operands[4];
2075 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2076 mode, false);
2078 /* Notice that this function is also invoked after reload by
2079 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2080 rtx op1 = operands[1];
2082 if (can_create_pseudo_p ())
2083 operands[1] = force_reg (mode, op1);
2084 /* When we are handling DImode comparisons, we want to keep constants so
2085 that we can optimize the component comparisons; however, memory loads
2086 are better issued as a whole so that they can be scheduled well.
2087 SImode equality comparisons allow I08 constants, but only when they
2088 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2089 into a register, that register might as well be r0, and we allow the
2090 constant. If it is already in a register, this is likely to be
2091 allocated to a different hard register, thus we load the constant into
2092 a register unless it is zero. */
2093 if (!REG_P (operands[2])
2094 && (!CONST_INT_P (operands[2])
2095 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2096 && ((comparison != EQ && comparison != NE)
2097 || (REG_P (op1) && REGNO (op1) != R0_REG)
2098 || !satisfies_constraint_I08 (operands[2])))))
2100 if (scratch && GET_MODE (scratch) == mode)
2102 emit_move_insn (scratch, operands[2]);
2103 operands[2] = scratch;
2105 else if (can_create_pseudo_p ())
2106 operands[2] = force_reg (mode, operands[2]);
2108 return comparison;
2111 void
2112 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2114 rtx (*branch_expander) (rtx) = gen_branch_true;
2115 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2116 switch (comparison)
2118 case NE: case LT: case LE: case LTU: case LEU:
2119 comparison = reverse_condition (comparison);
2120 branch_expander = gen_branch_false;
2121 default: ;
2123 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2124 gen_rtx_fmt_ee (comparison, SImode,
2125 operands[1], operands[2])));
2126 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2127 if (probability >= 0)
2128 add_int_reg_note (jump, REG_BR_PROB, probability);
2131 /* ??? How should we distribute probabilities when more than one branch
2132 is generated. So far we only have some ad-hoc observations:
2133 - If the operands are random, they are likely to differ in both parts.
2134 - If comparing items in a hash chain, the operands are random or equal;
2135 operation should be EQ or NE.
2136 - If items are searched in an ordered tree from the root, we can expect
2137 the highpart to be unequal about half of the time; operation should be
2138 an inequality comparison, operands non-constant, and overall probability
2139 about 50%. Likewise for quicksort.
2140 - Range checks will be often made against constants. Even if we assume for
2141 simplicity an even distribution of the non-constant operand over a
2142 sub-range here, the same probability could be generated with differently
2143 wide sub-ranges - as long as the ratio of the part of the subrange that
2144 is before the threshold to the part that comes after the threshold stays
2145 the same. Thus, we can't really tell anything here;
2146 assuming random distribution is at least simple.
2148 bool
2149 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2151 enum rtx_code msw_taken, msw_skip, lsw_taken;
2152 rtx_code_label *skip_label = NULL;
2153 rtx op1h, op1l, op2h, op2l;
2154 int num_branches;
2155 int prob, rev_prob;
2156 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2157 rtx scratch = operands[4];
2159 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2160 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2161 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2162 op1l = gen_lowpart (SImode, operands[1]);
2163 op2l = gen_lowpart (SImode, operands[2]);
2164 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2165 prob = split_branch_probability;
2166 rev_prob = REG_BR_PROB_BASE - prob;
2167 switch (comparison)
2169 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2170 That costs 1 cycle more when the first branch can be predicted taken,
2171 but saves us mispredicts because only one branch needs prediction.
2172 It also enables generating the cmpeqdi_t-1 pattern. */
2173 case EQ:
2174 if (TARGET_CMPEQDI_T)
2176 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2177 emit_jump_insn (gen_branch_true (operands[3]));
2178 return true;
2180 msw_skip = NE;
2181 lsw_taken = EQ;
2182 if (prob >= 0)
2184 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2185 msw_skip_prob = rev_prob;
2186 if (REG_BR_PROB_BASE <= 65535)
2187 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2188 else
2190 lsw_taken_prob
2191 = (prob
2192 ? (REG_BR_PROB_BASE
2193 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2194 / ((gcov_type) prob << 32)))
2195 : 0);
2198 break;
2199 case NE:
2200 if (TARGET_CMPEQDI_T)
2202 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2203 emit_jump_insn (gen_branch_false (operands[3]));
2204 return true;
2206 msw_taken = NE;
2207 msw_taken_prob = prob;
2208 lsw_taken = NE;
2209 lsw_taken_prob = 0;
2210 break;
2211 case GTU: case GT:
2212 msw_taken = comparison;
2213 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2214 break;
2215 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2216 msw_skip = swap_condition (msw_taken);
2217 lsw_taken = GTU;
2218 break;
2219 case GEU: case GE:
2220 if (op2l == CONST0_RTX (SImode))
2221 msw_taken = comparison;
2222 else
2224 msw_taken = comparison == GE ? GT : GTU;
2225 msw_skip = swap_condition (msw_taken);
2226 lsw_taken = GEU;
2228 break;
2229 case LTU: case LT:
2230 msw_taken = comparison;
2231 if (op2l == CONST0_RTX (SImode))
2232 break;
2233 msw_skip = swap_condition (msw_taken);
2234 lsw_taken = LTU;
2235 break;
2236 case LEU: case LE:
2237 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2238 msw_taken = comparison;
2239 else
2241 lsw_taken = LEU;
2242 if (comparison == LE)
2243 msw_taken = LT;
2244 else if (op2h != CONST0_RTX (SImode))
2245 msw_taken = LTU;
2246 else
2248 msw_skip = swap_condition (LTU);
2249 break;
2251 msw_skip = swap_condition (msw_taken);
2253 break;
2254 default: return false;
2256 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2257 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2258 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2259 if (comparison != EQ && comparison != NE && num_branches > 1)
2261 if (!CONSTANT_P (operands[2])
2262 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2263 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2265 msw_taken_prob = prob / 2U;
2266 msw_skip_prob
2267 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2268 lsw_taken_prob = prob;
2270 else
2272 msw_taken_prob = prob;
2273 msw_skip_prob = REG_BR_PROB_BASE;
2274 /* ??? If we have a constant op2h, should we use that when
2275 calculating lsw_taken_prob? */
2276 lsw_taken_prob = prob;
2279 operands[1] = op1h;
2280 operands[2] = op2h;
2281 operands[4] = NULL_RTX;
2282 if (reload_completed
2283 && ! arith_reg_or_0_operand (op2h, SImode)
2284 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2285 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2286 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2288 emit_move_insn (scratch, operands[2]);
2289 operands[2] = scratch;
2291 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2292 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2293 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2295 rtx taken_label = operands[3];
2297 /* Operands were possibly modified, but msw_skip doesn't expect this.
2298 Always use the original ones. */
2299 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2301 operands[1] = op1h;
2302 operands[2] = op2h;
2303 if (reload_completed
2304 && ! arith_reg_or_0_operand (op2h, SImode)
2305 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2307 emit_move_insn (scratch, operands[2]);
2308 operands[2] = scratch;
2312 operands[3] = skip_label = gen_label_rtx ();
2313 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2314 operands[3] = taken_label;
2316 operands[1] = op1l;
2317 operands[2] = op2l;
2318 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2320 if (reload_completed
2321 && ! arith_reg_or_0_operand (op2l, SImode)
2322 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2324 emit_move_insn (scratch, operands[2]);
2325 operands[2] = scratch;
2327 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2329 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2330 emit_label (skip_label);
2331 return true;
2334 /* Given an operand, return 1 if the evaluated operand plugged into an
2335 if_then_else will result in a branch_true, 0 if branch_false, or
2336 -1 if neither nor applies. The truth table goes like this:
2338 op | cmpval | code | result
2339 ---------+--------+---------+--------------------
2340 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2341 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2342 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2343 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2344 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2345 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2346 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2347 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2349 sh_eval_treg_value (rtx op)
2351 if (t_reg_operand (op, GET_MODE (op)))
2352 return 1;
2353 if (negt_reg_operand (op, GET_MODE (op)))
2354 return 0;
2356 rtx_code code = GET_CODE (op);
2357 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2358 return -1;
2360 int cmpop = code == EQ ? 1 : 0;
2361 int cmpval = INTVAL (XEXP (op, 1));
2362 if (cmpval != 0 && cmpval != 1)
2363 return -1;
2365 int t;
2366 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2367 t = 0;
2368 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2369 t = 1;
2370 else
2371 return -1;
2373 return t ^ (cmpval == cmpop);
2376 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2377 of floating-point comparisons. */
2378 static void
2379 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2381 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2382 && GET_CODE (insn) != PARALLEL)
2384 insn = gen_rtx_PARALLEL (VOIDmode,
2385 gen_rtvec (3, insn,
2386 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2387 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2389 emit_insn (insn);
2392 /* Prepare the operands for an scc instruction; make sure that the
2393 compare has been done and the result is in T_REG. */
2394 void
2395 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2397 rtx t_reg = get_t_reg_rtx ();
2398 enum rtx_code oldcode = code;
2399 machine_mode mode;
2401 /* First need a compare insn. */
2402 switch (code)
2404 case NE:
2405 /* It isn't possible to handle this case. */
2406 gcc_unreachable ();
2407 case LT:
2408 code = GT;
2409 break;
2410 case LE:
2411 code = GE;
2412 break;
2413 case LTU:
2414 code = GTU;
2415 break;
2416 case LEU:
2417 code = GEU;
2418 break;
2419 default:
2420 break;
2422 if (code != oldcode)
2423 std::swap (op0, op1);
2425 mode = GET_MODE (op0);
2426 if (mode == VOIDmode)
2427 mode = GET_MODE (op1);
2429 op0 = force_reg (mode, op0);
2430 if ((code != EQ && code != NE
2431 && (op1 != const0_rtx
2432 || code == GTU || code == GEU || code == LTU || code == LEU))
2433 || (mode == DImode && op1 != const0_rtx)
2434 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2435 op1 = force_reg (mode, op1);
2437 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2438 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2439 mode);
2443 sh_emit_cheap_store_flag (machine_mode mode, enum rtx_code code,
2444 rtx op0, rtx op1)
2446 rtx target = gen_reg_rtx (SImode);
2447 rtx tmp;
2449 gcc_assert (TARGET_SHMEDIA);
2450 switch (code)
2452 case EQ:
2453 case GT:
2454 case LT:
2455 case UNORDERED:
2456 case GTU:
2457 case LTU:
2458 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2459 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2460 code = NE;
2461 break;
2463 case NE:
2464 case GE:
2465 case LE:
2466 case ORDERED:
2467 case GEU:
2468 case LEU:
2469 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2470 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2471 code = EQ;
2472 break;
2474 case UNEQ:
2475 case UNGE:
2476 case UNGT:
2477 case UNLE:
2478 case UNLT:
2479 case LTGT:
2480 return NULL_RTX;
2482 default:
2483 gcc_unreachable ();
2486 if (mode == DImode)
2488 rtx t2 = gen_reg_rtx (DImode);
2489 emit_insn (gen_extendsidi2 (t2, target));
2490 target = t2;
2493 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2496 /* Called from the md file, set up the operands of a compare instruction. */
2497 void
2498 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2500 enum rtx_code code = GET_CODE (operands[0]);
2501 enum rtx_code branch_code;
2502 rtx op0 = operands[1];
2503 rtx op1 = operands[2];
2504 rtx insn;
2505 bool need_ccmpeq = false;
2507 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2509 op0 = force_reg (mode, op0);
2510 op1 = force_reg (mode, op1);
2512 else
2514 if (code != EQ || mode == DImode)
2516 /* Force args into regs, since we can't use constants here. */
2517 op0 = force_reg (mode, op0);
2518 if (op1 != const0_rtx || code == GTU || code == GEU)
2519 op1 = force_reg (mode, op1);
2523 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2525 if (code == LT
2526 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2527 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2529 std::swap (op0, op1);
2530 code = swap_condition (code);
2533 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2534 if (code == GE)
2536 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2537 need_ccmpeq = true;
2538 code = GT;
2541 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2542 to EQ/GT respectively. */
2543 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2546 switch (code)
2548 case EQ:
2549 case GT:
2550 case GE:
2551 case GTU:
2552 case GEU:
2553 branch_code = code;
2554 break;
2555 case NE:
2556 case LT:
2557 case LE:
2558 case LTU:
2559 case LEU:
2560 branch_code = reverse_condition (code);
2561 break;
2562 default:
2563 gcc_unreachable ();
2566 insn = gen_rtx_SET (get_t_reg_rtx (),
2567 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2569 sh_emit_set_t_insn (insn, mode);
2570 if (need_ccmpeq)
2571 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2573 if (branch_code == code)
2574 emit_jump_insn (gen_branch_true (operands[3]));
2575 else
2576 emit_jump_insn (gen_branch_false (operands[3]));
2579 void
2580 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2582 enum rtx_code code = GET_CODE (operands[1]);
2583 rtx op0 = operands[2];
2584 rtx op1 = operands[3];
2585 rtx_code_label *lab = NULL;
2586 bool invert = false;
2588 op0 = force_reg (mode, op0);
2589 if ((code != EQ && code != NE
2590 && (op1 != const0_rtx
2591 || code == GTU || code == GEU || code == LTU || code == LEU))
2592 || (mode == DImode && op1 != const0_rtx)
2593 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2594 op1 = force_reg (mode, op1);
2596 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2598 if (code == LT || code == LE)
2600 std::swap (op0, op1);
2601 code = swap_condition (code);
2603 if (code == GE)
2605 if (TARGET_IEEE)
2607 lab = gen_label_rtx ();
2608 sh_emit_scc_to_t (EQ, op0, op1);
2609 emit_jump_insn (gen_branch_true (lab));
2610 code = GT;
2612 else
2614 code = LT;
2615 invert = true;
2620 if (code == NE)
2622 code = EQ;
2623 invert = true;
2626 sh_emit_scc_to_t (code, op0, op1);
2627 if (lab)
2628 emit_label (lab);
2629 if (invert)
2630 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2631 else
2632 emit_move_insn (operands[0], get_t_reg_rtx ());
2635 /* Functions to output assembly code. */
2637 /* Return a sequence of instructions to perform DI or DF move.
2639 Since the SH cannot move a DI or DF in one instruction, we have
2640 to take care when we see overlapping source and dest registers. */
2641 const char *
2642 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2643 machine_mode mode)
2645 rtx dst = operands[0];
2646 rtx src = operands[1];
2648 if (MEM_P (dst)
2649 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2650 return "mov.l %T1,%0" "\n"
2651 " mov.l %1,%0";
2653 if (register_operand (dst, mode)
2654 && register_operand (src, mode))
2656 if (REGNO (src) == MACH_REG)
2657 return "sts mach,%S0" "\n"
2658 " sts macl,%R0";
2660 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2661 when mov.d r1,r0 do r1->r0 then r2->r1. */
2662 if (REGNO (src) + 1 == REGNO (dst))
2663 return "mov %T1,%T0" "\n"
2664 " mov %1,%0";
2665 else
2666 return "mov %1,%0" "\n"
2667 " mov %T1,%T0";
2669 else if (CONST_INT_P (src))
2671 if (INTVAL (src) < 0)
2672 output_asm_insn ("mov #-1,%S0", operands);
2673 else
2674 output_asm_insn ("mov #0,%S0", operands);
2676 return "mov %1,%R0";
2678 else if (MEM_P (src))
2680 int ptrreg = -1;
2681 int dreg = REGNO (dst);
2682 rtx inside = XEXP (src, 0);
2684 switch (GET_CODE (inside))
2686 case REG:
2687 ptrreg = REGNO (inside);
2688 break;
2690 case SUBREG:
2691 ptrreg = subreg_regno (inside);
2692 break;
2694 case PLUS:
2695 ptrreg = REGNO (XEXP (inside, 0));
2696 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2697 an offsettable address. Unfortunately, offsettable addresses use
2698 QImode to check the offset, and a QImode offsettable address
2699 requires r0 for the other operand, which is not currently
2700 supported, so we can't use the 'o' constraint.
2701 Thus we must check for and handle r0+REG addresses here.
2702 We punt for now, since this is likely very rare. */
2703 gcc_assert (!REG_P (XEXP (inside, 1)));
2704 break;
2706 case LABEL_REF:
2707 return "mov.l %1,%0" "\n"
2708 " mov.l %1+4,%T0";
2709 case POST_INC:
2710 return "mov.l %1,%0" "\n"
2711 " mov.l %1,%T0";
2712 default:
2713 gcc_unreachable ();
2716 /* Work out the safe way to copy. Copy into the second half first. */
2717 if (dreg == ptrreg)
2718 return "mov.l %T1,%T0" "\n"
2719 " mov.l %1,%0";
2722 return "mov.l %1,%0" "\n"
2723 " mov.l %T1,%T0";
2726 /* Print an instruction which would have gone into a delay slot after
2727 another instruction, but couldn't because the other instruction expanded
2728 into a sequence where putting the slot insn at the end wouldn't work. */
2729 static void
2730 print_slot (rtx_sequence *seq)
2732 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2734 seq->insn (1)->set_deleted ();
2737 const char *
2738 output_far_jump (rtx_insn *insn, rtx op)
2740 struct { rtx lab, reg, op; } this_jmp;
2741 rtx_code_label *braf_base_lab = NULL;
2742 const char *jump;
2743 int far;
2744 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2745 rtx_insn *prev;
2747 this_jmp.lab = gen_label_rtx ();
2749 if (TARGET_SH2
2750 && offset >= -32764
2751 && offset - get_attr_length (insn) <= 32766
2752 && ! CROSSING_JUMP_P (insn))
2754 far = 0;
2755 jump = "mov.w %O0,%1" "\n"
2756 " braf %1";
2758 else
2760 far = 1;
2761 if (flag_pic)
2763 if (TARGET_SH2)
2764 jump = "mov.l %O0,%1" "\n"
2765 " braf %1";
2766 else
2767 jump = "mov.l r0,@-r15" "\n"
2768 " mova %O0,r0" "\n"
2769 " mov.l @r0,%1" "\n"
2770 " add r0,%1" "\n"
2771 " mov.l @r15+,r0" "\n"
2772 " jmp @%1";
2774 else
2775 jump = "mov.l %O0,%1" "\n"
2776 " jmp @%1";
2778 /* If we have a scratch register available, use it. */
2779 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2780 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2782 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2783 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2784 jump = "mov.l r1,@-r15" "\n"
2785 " mova %O0,r0" "\n"
2786 " mov.l @r0,r1" "\n"
2787 " add r1,r0" "\n"
2788 " mov.l @r15+,r1" "\n"
2789 " jmp @%1";
2790 output_asm_insn (jump, &this_jmp.lab);
2791 if (dbr_sequence_length ())
2792 print_slot (final_sequence);
2793 else
2794 output_asm_insn ("nop", 0);
2796 else
2798 /* Output the delay slot insn first if any. */
2799 if (dbr_sequence_length ())
2800 print_slot (final_sequence);
2802 this_jmp.reg = gen_rtx_REG (SImode, 13);
2803 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2804 Fortunately, MACL is fixed and call-clobbered, and we never
2805 need its value across jumps, so save r13 in it instead of in
2806 the stack. */
2807 if (TARGET_SH5)
2808 output_asm_insn ("lds r13,macl", 0);
2809 else
2810 output_asm_insn ("mov.l r13,@-r15", 0);
2811 output_asm_insn (jump, &this_jmp.lab);
2812 if (TARGET_SH5)
2813 output_asm_insn ("sts macl,r13", 0);
2814 else
2815 output_asm_insn ("mov.l @r15+,r13", 0);
2817 if (far && flag_pic && TARGET_SH2)
2819 braf_base_lab = gen_label_rtx ();
2820 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2821 CODE_LABEL_NUMBER (braf_base_lab));
2823 if (far)
2824 output_asm_insn (".align 2", 0);
2825 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2826 this_jmp.op = op;
2827 if (far && flag_pic)
2829 if (TARGET_SH2)
2830 this_jmp.lab = braf_base_lab;
2831 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2833 else
2834 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2835 return "";
2838 /* Local label counter, used for constants in the pool and inside
2839 pattern branches. */
2840 static int lf = 100;
2842 /* Output code for ordinary branches. */
2843 const char *
2844 output_branch (int logic, rtx_insn *insn, rtx *operands)
2846 switch (get_attr_length (insn))
2848 case 6:
2849 /* This can happen if filling the delay slot has caused a forward
2850 branch to exceed its range (we could reverse it, but only
2851 when we know we won't overextend other branches; this should
2852 best be handled by relaxation).
2853 It can also happen when other condbranches hoist delay slot insn
2854 from their destination, thus leading to code size increase.
2855 But the branch will still be in the range -4092..+4098 bytes. */
2856 if (! TARGET_RELAX)
2858 int label = lf++;
2859 /* The call to print_slot will clobber the operands. */
2860 rtx op0 = operands[0];
2862 /* If the instruction in the delay slot is annulled (true), then
2863 there is no delay slot where we can put it now. The only safe
2864 place for it is after the label. final will do that by default. */
2866 if (final_sequence
2867 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2868 && get_attr_length (final_sequence->insn (1)))
2870 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2871 ASSEMBLER_DIALECT ? "/" : ".", label);
2872 print_slot (final_sequence);
2874 else
2875 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2877 output_asm_insn ("bra\t%l0", &op0);
2878 fprintf (asm_out_file, "\tnop\n");
2879 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2881 return "";
2883 /* When relaxing, handle this like a short branch. The linker
2884 will fix it up if it still doesn't fit after relaxation. */
2885 case 2:
2886 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2888 /* These are for SH2e, in which we have to account for the
2889 extra nop because of the hardware bug in annulled branches. */
2890 case 8:
2891 if (! TARGET_RELAX)
2893 int label = lf++;
2895 gcc_assert (!final_sequence
2896 || !(INSN_ANNULLED_BRANCH_P
2897 (XVECEXP (final_sequence, 0, 0))));
2898 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2899 logic ? "f" : "t",
2900 ASSEMBLER_DIALECT ? "/" : ".", label);
2901 fprintf (asm_out_file, "\tnop\n");
2902 output_asm_insn ("bra\t%l0", operands);
2903 fprintf (asm_out_file, "\tnop\n");
2904 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2906 return "";
2908 /* When relaxing, fall through. */
2909 case 4:
2911 char buffer[10];
2913 sprintf (buffer, "b%s%ss\t%%l0",
2914 logic ? "t" : "f",
2915 ASSEMBLER_DIALECT ? "/" : ".");
2916 output_asm_insn (buffer, &operands[0]);
2917 return "nop";
2920 default:
2921 /* There should be no longer branches now - that would
2922 indicate that something has destroyed the branches set
2923 up in machine_dependent_reorg. */
2924 gcc_unreachable ();
2928 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2929 fill in operands 9 as a label to the successor insn.
2930 We try to use jump threading where possible.
2931 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2932 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2933 follow jmp and bt, if the address is in range. */
2934 const char *
2935 output_branchy_insn (enum rtx_code code, const char *templ,
2936 rtx_insn *insn, rtx *operands)
2938 rtx_insn *next_insn = NEXT_INSN (insn);
2940 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2942 rtx src = SET_SRC (PATTERN (next_insn));
2943 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2945 /* Following branch not taken */
2946 rtx_code_label *lab = gen_label_rtx ();
2947 emit_label_after (lab, next_insn);
2948 INSN_ADDRESSES_NEW (lab,
2949 INSN_ADDRESSES (INSN_UID (next_insn))
2950 + get_attr_length (next_insn));
2951 operands[9] = lab;
2952 return templ;
2954 else
2956 int offset = (branch_dest (next_insn)
2957 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2958 if (offset >= -252 && offset <= 258)
2960 if (GET_CODE (src) == IF_THEN_ELSE)
2961 /* branch_true */
2962 src = XEXP (src, 1);
2963 operands[9] = src;
2964 return templ;
2968 rtx_code_label *lab = gen_label_rtx ();
2969 emit_label_after (lab, insn);
2970 INSN_ADDRESSES_NEW (lab,
2971 INSN_ADDRESSES (INSN_UID (insn))
2972 + get_attr_length (insn));
2973 operands[9] = lab;
2974 return templ;
2977 const char *
2978 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2980 return output_branchy_insn (NE, "bt %l9" "\n"
2981 " fcmp/eq %1,%0",
2982 insn, operands);
2985 /* Output the start of the assembler file. */
2986 static void
2987 sh_file_start (void)
2989 default_file_start ();
2991 if (TARGET_ELF)
2992 /* We need to show the text section with the proper
2993 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2994 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2995 will complain. We can teach GAS specifically about the
2996 default attributes for our choice of text section, but
2997 then we would have to change GAS again if/when we change
2998 the text section name. */
2999 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
3000 else
3001 /* Switch to the data section so that the coffsem symbol
3002 isn't in the text section. */
3003 switch_to_section (data_section);
3005 if (TARGET_LITTLE_ENDIAN)
3006 fputs ("\t.little\n", asm_out_file);
3008 if (!TARGET_ELF)
3010 if (TARGET_SHCOMPACT)
3011 fputs ("\t.mode\tSHcompact\n", asm_out_file);
3012 else if (TARGET_SHMEDIA)
3013 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
3014 TARGET_SHMEDIA64 ? 64 : 32);
3018 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
3019 static bool
3020 unspec_caller_rtx_p (rtx pat)
3022 rtx base, offset;
3023 int i;
3025 split_const (pat, &base, &offset);
3026 if (GET_CODE (base) == UNSPEC)
3028 if (XINT (base, 1) == UNSPEC_CALLER)
3029 return true;
3030 for (i = 0; i < XVECLEN (base, 0); i++)
3031 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
3032 return true;
3034 return false;
3037 /* Indicate that INSN cannot be duplicated. This is true for insn
3038 that generates a unique label. */
3039 static bool
3040 sh_cannot_copy_insn_p (rtx_insn *insn)
3042 rtx pat;
3044 if (!reload_completed || !flag_pic)
3045 return false;
3047 if (!NONJUMP_INSN_P (insn))
3048 return false;
3049 if (asm_noperands (insn) >= 0)
3050 return false;
3052 pat = PATTERN (insn);
3053 if (GET_CODE (pat) != SET)
3054 return false;
3055 pat = SET_SRC (pat);
3057 if (unspec_caller_rtx_p (pat))
3058 return true;
3060 return false;
3063 /* Number of instructions used to make an arithmetic right shift by N. */
3064 static const char ashiftrt_insns[] =
3065 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3067 /* Description of a logical left or right shift, when expanded to a sequence
3068 of 1/2/8/16 shifts.
3069 Notice that one bit right shifts clobber the T bit. One bit left shifts
3070 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3071 enum
3073 ASHL_CLOBBERS_T = 1 << 0,
3074 LSHR_CLOBBERS_T = 1 << 1
3077 struct ashl_lshr_sequence
3079 char insn_count;
3080 signed char amount[6];
3081 char clobbers_t;
3084 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3086 { 0, { 0 }, 0 }, // 0
3087 { 1, { 1 }, LSHR_CLOBBERS_T },
3088 { 1, { 2 }, 0 },
3089 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3090 { 2, { 2, 2 }, 0 }, // 4
3091 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3092 { 3, { 2, 2, 2 }, 0 },
3093 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3094 { 1, { 8 }, 0 }, // 8
3095 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3096 { 2, { 8, 2 }, 0 },
3097 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3098 { 3, { 8, 2, 2 }, 0 }, // 12
3099 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3100 { 3, { 8, -2, 8 }, 0 },
3101 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3102 { 1, { 16 }, 0 }, // 16
3103 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3104 { 2, { 16, 2 }, 0 },
3105 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3106 { 3, { 16, 2, 2 }, 0 }, // 20
3107 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3108 { 3, { 16, -2, 8 }, 0 },
3109 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3110 { 2, { 16, 8 }, 0 }, // 24
3111 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3112 { 3, { 16, 8, 2 }, 0 },
3113 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3114 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3115 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3116 { 3, { 16, -2, 16 }, 0 },
3118 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3119 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3120 However, the shift-and combiner code needs this entry here to be in
3121 terms of real shift insns. */
3122 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3125 /* Individual shift amounts for shift amounts < 16, up to three highmost
3126 bits might be clobbered. This is typically used when combined with some
3127 kind of sign or zero extension. */
3128 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3130 { 0, { 0 }, 0 }, // 0
3131 { 1, { 1 }, LSHR_CLOBBERS_T },
3132 { 1, { 2 }, 0 },
3133 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3134 { 2, { 2, 2 }, 0 }, // 4
3135 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3136 { 2, { 8, -2 }, 0 },
3137 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3138 { 1, { 8 }, 0 }, // 8
3139 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3140 { 2, { 8, 2 }, 0 },
3141 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3142 { 3, { 8, 2, 2 }, 0 }, // 12
3143 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3144 { 2, { 16, -2 }, 0 },
3145 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3146 { 1, { 16 }, 0 }, // 16
3147 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3148 { 2, { 16, 2 }, 0 },
3149 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3150 { 3, { 16, 2, 2 }, 0 }, // 20
3151 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3152 { 3, { 16, -2, 8 }, 0 },
3153 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3154 { 2, { 16, 8 }, 0 }, // 24
3155 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3156 { 3, { 16, 8, 2 }, 0 },
3157 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3158 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3159 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3160 { 3, { 16, -2, 16 }, 0 },
3161 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3164 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3165 will clobber the T bit. */
3166 bool
3167 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3169 gcc_assert (CONST_INT_P (shift_amount));
3171 const int shift_amount_i = INTVAL (shift_amount) & 31;
3173 /* Special case for shift count of 31: use and-rotl sequence. */
3174 if (shift_amount_i == 31)
3175 return true;
3177 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3178 & ASHL_CLOBBERS_T) != 0;
3181 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3182 instructions will clobber the T bit. */
3183 bool
3184 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3186 gcc_assert (CONST_INT_P (shift_amount));
3188 const int shift_amount_i = INTVAL (shift_amount) & 31;
3190 /* Special case for shift count of 31: use shll-movt sequence. */
3191 if (shift_amount_i == 31)
3192 return true;
3194 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3195 & LSHR_CLOBBERS_T) != 0;
3198 /* Return true if it is potentially beneficial to use a dynamic shift
3199 instruction (shad / shar) instead of a combination of 1/2/8/16
3200 shift instructions for the specified shift count.
3201 If dynamic shifts are not available, always return false. */
3202 bool
3203 sh_dynamicalize_shift_p (rtx count)
3205 gcc_assert (CONST_INT_P (count));
3207 const int shift_amount_i = INTVAL (count) & 31;
3208 int insn_count;
3210 /* For left and right shifts, there are shorter 2 insn sequences for
3211 shift amounts of 31. */
3212 if (shift_amount_i == 31)
3213 insn_count = 2;
3214 else
3215 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3217 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3220 /* Assuming we have a value that has been sign-extended by at least one bit,
3221 can we use the ext_shift_amounts with the last shift turned to an
3222 arithmetic shift to shift it by N without data loss, and quicker than by
3223 other means? */
3224 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3226 /* Return the cost of a shift. */
3227 static inline int
3228 shiftcosts (rtx x)
3230 int value;
3232 if (TARGET_SHMEDIA)
3233 return 1;
3235 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3237 if (GET_MODE (x) == DImode
3238 && CONST_INT_P (XEXP (x, 1))
3239 && INTVAL (XEXP (x, 1)) == 1)
3240 return 2;
3242 /* Everything else is invalid, because there is no pattern for it. */
3243 return -1;
3245 /* If shift by a non constant, then this will be expensive. */
3246 if (!CONST_INT_P (XEXP (x, 1)))
3247 return SH_DYNAMIC_SHIFT_COST;
3249 /* Otherwise, return the true cost in instructions. Cope with out of range
3250 shift counts more or less arbitrarily. */
3251 value = INTVAL (XEXP (x, 1)) & 31;
3253 if (GET_CODE (x) == ASHIFTRT)
3255 int cost = ashiftrt_insns[value];
3256 /* If dynamic shifts are available and profitable in this case, then we
3257 put the constant in a reg and use shad. */
3258 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3259 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3260 return cost;
3262 else
3263 return ashl_lshr_seq[value].insn_count;
3266 /* Return the cost of an AND/XOR/IOR operation. */
3267 static inline int
3268 and_xor_ior_costs (rtx x, int code)
3270 /* On SH1-4 we have only max. SImode operations.
3271 Double the cost for modes > SImode. */
3272 const int cost_scale = !TARGET_SHMEDIA
3273 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3274 ? 2 : 1;
3276 /* A logical operation with two registers is a single cycle
3277 instruction. */
3278 if (!CONST_INT_P (XEXP (x, 1)))
3279 return 1 * cost_scale;
3281 int i = INTVAL (XEXP (x, 1));
3283 if (TARGET_SHMEDIA)
3285 if (satisfies_constraint_I10 (XEXP (x, 1))
3286 || satisfies_constraint_J16 (XEXP (x, 1)))
3287 return 1;
3288 else
3289 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3292 /* These constants are single cycle extu.[bw] instructions. */
3293 if ((i == 0xff || i == 0xffff) && code == AND)
3294 return 1 * cost_scale;
3295 /* Constants that can be used in an instruction as an immediate are
3296 a single cycle, but this requires r0, so make it a little more
3297 expensive. */
3298 if (CONST_OK_FOR_K08 (i))
3299 return 2 * cost_scale;
3300 /* Constants that can be loaded with a mov immediate need one more cycle.
3301 This case is probably unnecessary. */
3302 if (CONST_OK_FOR_I08 (i))
3303 return 2 * cost_scale;
3304 /* Any other constant requires an additional 2 cycle pc-relative load.
3305 This case is probably unnecessary. */
3306 return 3 * cost_scale;
3309 /* Return the cost of an addition or a subtraction. */
3310 static inline int
3311 addsubcosts (rtx x)
3313 if (GET_MODE (x) == SImode)
3315 /* The addc or subc patterns will eventually become one or two
3316 instructions. Below are some costs for some of the patterns
3317 which combine would reject because the costs of the individual
3318 insns in the patterns are lower.
3320 FIXME: It would be much easier if we had something like insn cost
3321 attributes and the cost calculation machinery used those attributes
3322 in the first place. This would eliminate redundant recog-like C
3323 code to calculate costs of complex patterns. */
3324 rtx op0 = XEXP (x, 0);
3325 rtx op1 = XEXP (x, 1);
3327 if (GET_CODE (x) == PLUS)
3329 if (GET_CODE (op0) == AND
3330 && XEXP (op0, 1) == const1_rtx
3331 && (GET_CODE (op1) == PLUS
3332 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3333 return 1;
3335 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3336 && GET_CODE (op1) == LSHIFTRT
3337 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3338 return 1;
3340 /* Let's assume that adding the result of an insns that stores into
3341 the T bit is cheap. */
3342 if (treg_set_expr (op1, SImode))
3343 return 1;
3344 if (treg_set_expr (op0, SImode))
3345 return 1;
3348 /* On SH1-4 we have only max. SImode operations.
3349 Double the cost for modes > SImode. */
3350 const int cost_scale = !TARGET_SHMEDIA
3351 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3352 ? 2 : 1;
3354 /* Adding a register is a single cycle insn. */
3355 if (REG_P (XEXP (x, 1))
3356 || GET_CODE (XEXP (x, 1)) == SUBREG)
3357 return 1 * cost_scale;
3359 /* Likewise for small constants. */
3360 if (CONST_INT_P (XEXP (x, 1))
3361 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3362 return 1 * cost_scale;
3364 if (TARGET_SHMEDIA)
3365 switch (GET_CODE (XEXP (x, 1)))
3367 case CONST:
3368 case LABEL_REF:
3369 case SYMBOL_REF:
3370 return TARGET_SHMEDIA64 ? 5 : 3;
3372 case CONST_INT:
3373 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3374 return 2;
3375 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3376 return 3;
3377 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3378 return 4;
3380 /* Fall through. */
3381 default:
3382 return 5;
3385 /* Any other constant requires a 2 cycle pc-relative load plus an
3386 addition. */
3387 return 3 * cost_scale;
3390 /* Return the cost of a multiply. */
3391 static inline int
3392 multcosts (rtx x ATTRIBUTE_UNUSED)
3394 if (sh_multcost >= 0)
3395 return sh_multcost;
3396 if (TARGET_SHMEDIA)
3397 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3398 accept constants. Ideally, we would use a cost of one or two and
3399 add the cost of the operand, but disregard the latter when inside loops
3400 and loop invariant code motion is still to follow.
3401 Using a multiply first and splitting it later if it's a loss
3402 doesn't work because of different sign / zero extension semantics
3403 of multiplies vs. shifts. */
3404 return optimize_size ? 2 : 3;
3406 if (TARGET_SH2)
3408 /* We have a mul insn, so we can never take more than the mul and the
3409 read of the mac reg, but count more because of the latency and extra
3410 reg usage. */
3411 if (optimize_size)
3412 return 2;
3413 return 3;
3416 /* If we're aiming at small code, then just count the number of
3417 insns in a multiply call sequence. */
3418 if (optimize_size)
3419 return 5;
3421 /* Otherwise count all the insns in the routine we'd be calling too. */
3422 return 20;
3425 /* Compute a (partial) cost for rtx X. Return true if the complete
3426 cost has been computed, and false if subexpressions should be
3427 scanned. In either case, *TOTAL contains the cost result. */
3428 static bool
3429 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3430 int *total, bool speed ATTRIBUTE_UNUSED)
3432 switch (code)
3434 /* The lower-subreg pass decides whether to split multi-word regs
3435 into individual regs by looking at the cost for a SET of certain
3436 modes with the following patterns:
3437 (set (reg) (reg))
3438 (set (reg) (const_int 0))
3439 On machines that support vector-move operations a multi-word move
3440 is the same cost as individual reg move. On SH there is no
3441 vector-move, so we have to provide the correct cost in the number
3442 of move insns to load/store the reg of the mode in question. */
3443 case SET:
3444 if (register_operand (SET_DEST (x), VOIDmode)
3445 && (register_operand (SET_SRC (x), VOIDmode)
3446 || satisfies_constraint_Z (SET_SRC (x))))
3448 const machine_mode mode = GET_MODE (SET_DEST (x));
3449 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3450 / mov_insn_size (mode, TARGET_SH2A));
3451 return true;
3453 return false;
3455 /* The cost of a mem access is mainly the cost of the address mode. */
3456 case MEM:
3457 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3458 true);
3459 return true;
3461 case IF_THEN_ELSE:
3462 /* This case is required for the if_then_else negc pattern. */
3463 if (treg_set_expr (XEXP (x, 0), SImode))
3465 *total = COSTS_N_INSNS (1);
3466 return true;
3468 else
3469 return false;
3471 /* Zero extracts of single bits are usually combine patterns for the
3472 tst insns. */
3473 case ZERO_EXTRACT:
3474 if (GET_CODE (XEXP (x, 0)) == XOR
3475 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3476 && XEXP (x, 1) == const1_rtx
3477 && CONST_INT_P (XEXP (x, 2))
3478 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3479 /* Check that the xor constaint overlaps with the extracted bit. */
3480 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3482 *total = 1; //COSTS_N_INSNS (1);
3483 return true;
3485 return false;
3487 /* The cost of a sign or zero extend depends on whether the source is a
3488 reg or a mem. In case of a mem take the address into acount. */
3489 case SIGN_EXTEND:
3490 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3492 *total = COSTS_N_INSNS (1);
3493 return true;
3495 if (MEM_P (XEXP (x, 0)))
3497 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3498 GET_MODE (XEXP (x, 0)),
3499 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3500 return true;
3502 return false;
3504 case ZERO_EXTEND:
3505 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3507 *total = COSTS_N_INSNS (1);
3508 return true;
3510 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3511 && (GET_MODE (XEXP (x, 0)) == QImode
3512 || GET_MODE (XEXP (x, 0)) == HImode))
3514 /* Handle SH2A's movu.b and movu.w insn. */
3515 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3516 GET_MODE (XEXP (x, 0)),
3517 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3518 return true;
3520 return false;
3522 /* mems for SFmode and DFmode can be inside a parallel due to
3523 the way the fpscr is handled. */
3524 case PARALLEL:
3525 for (int i = 0; i < XVECLEN (x, 0); i++)
3527 rtx xx = XVECEXP (x, 0, i);
3528 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3530 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3531 GET_MODE (XEXP (xx, 0)),
3532 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3533 return true;
3535 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3537 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3538 GET_MODE (XEXP (xx, 1)),
3539 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3540 return true;
3544 if (sh_1el_vec (x, VOIDmode))
3545 *total = outer_code != SET;
3546 else if (sh_rep_vec (x, VOIDmode))
3547 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3548 + (outer_code != SET));
3549 else
3550 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3551 return true;
3553 case CONST_INT:
3554 if (TARGET_SHMEDIA)
3556 if (INTVAL (x) == 0)
3557 *total = 0;
3558 else if (outer_code == AND && and_operand ((x), DImode))
3559 *total = 0;
3560 else if ((outer_code == IOR || outer_code == XOR
3561 || outer_code == PLUS)
3562 && CONST_OK_FOR_I10 (INTVAL (x)))
3563 *total = 0;
3564 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3565 *total = COSTS_N_INSNS (outer_code != SET);
3566 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3567 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3568 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3569 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3570 else
3571 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3572 return true;
3574 if (CONST_OK_FOR_I08 (INTVAL (x)))
3575 *total = 0;
3576 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3577 && CONST_OK_FOR_K08 (INTVAL (x)))
3578 *total = 1;
3579 /* prepare_cmp_insn will force costly constants int registers before
3580 the cbranch[sd]i4 patterns can see them, so preserve potentially
3581 interesting ones not covered by I08 above. */
3582 else if (outer_code == COMPARE
3583 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3584 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3585 || INTVAL (x) == 0x7fffffff
3586 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3587 *total = 1;
3588 else
3589 *total = 8;
3590 return true;
3592 case EQ:
3593 /* An and with a constant compared against zero is
3594 most likely going to be a TST #imm, R0 instruction.
3595 Notice that this does not catch the zero_extract variants from
3596 the md file. */
3597 if (XEXP (x, 1) == const0_rtx
3598 && (GET_CODE (XEXP (x, 0)) == AND
3599 || (SUBREG_P (XEXP (x, 0))
3600 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND)))
3602 *total = 1;
3603 return true;
3606 else if (XEXP (x, 1) == const0_rtx
3607 && GET_CODE (XEXP (x, 0)) == AND
3608 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3609 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3610 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3611 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3613 *total = 1;
3614 return true;
3616 else
3617 return false;
3619 case SMIN:
3620 case SMAX:
3621 /* This is most likely a clips.b or clips.w insn that is being made up
3622 by combine. */
3623 if (TARGET_SH2A
3624 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3625 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3626 && REG_P (XEXP (XEXP (x, 0), 0))
3627 && CONST_INT_P (XEXP (x, 1)))
3629 *total = COSTS_N_INSNS (1);
3630 return true;
3632 else
3633 return false;
3635 case CONST:
3636 case LABEL_REF:
3637 case SYMBOL_REF:
3638 if (TARGET_SHMEDIA64)
3639 *total = COSTS_N_INSNS (4);
3640 else if (TARGET_SHMEDIA32)
3641 *total = COSTS_N_INSNS (2);
3642 else
3643 *total = 5;
3644 return true;
3646 case CONST_DOUBLE:
3647 if (TARGET_SHMEDIA)
3648 *total = COSTS_N_INSNS (4);
3649 /* prepare_cmp_insn will force costly constants int registers before
3650 the cbranchdi4 pattern can see them, so preserve potentially
3651 interesting ones. */
3652 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3653 *total = 1;
3654 else
3655 *total = 10;
3656 return true;
3658 case CONST_VECTOR:
3659 /* FIXME: This looks broken. Only the last statement has any effect.
3660 Probably this could be folded with the PARALLEL case? */
3661 if (x == CONST0_RTX (GET_MODE (x)))
3662 *total = 0;
3663 else if (sh_1el_vec (x, VOIDmode))
3664 *total = outer_code != SET;
3665 if (sh_rep_vec (x, VOIDmode))
3666 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3667 + (outer_code != SET));
3668 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3669 return true;
3671 case PLUS:
3672 case MINUS:
3673 *total = COSTS_N_INSNS (addsubcosts (x));
3674 return true;
3676 case AND:
3677 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3678 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3680 *total = COSTS_N_INSNS (1);
3681 return true;
3683 /* Fall through. */
3685 case XOR:
3686 case IOR:
3687 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3688 return true;
3690 case MULT:
3691 *total = COSTS_N_INSNS (multcosts (x));
3692 return true;
3694 case LT:
3695 case GE:
3696 /* div0s sign comparison. */
3697 if (GET_CODE (XEXP (x, 0)) == XOR
3698 && REG_P ((XEXP (XEXP (x, 0), 0)))
3699 && REG_P ((XEXP (XEXP (x, 0), 1)))
3700 && satisfies_constraint_Z (XEXP (x, 1)))
3702 *total = COSTS_N_INSNS (1);
3703 return true;
3705 else
3706 return false;
3708 case LSHIFTRT:
3709 /* div0s sign comparison. */
3710 if (GET_CODE (XEXP (x, 0)) == XOR
3711 && REG_P ((XEXP (XEXP (x, 0), 0)))
3712 && REG_P ((XEXP (XEXP (x, 0), 1)))
3713 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3715 *total = COSTS_N_INSNS (1);
3716 return true;
3718 /* Fall through to shiftcosts. */
3719 case ASHIFT:
3720 case ASHIFTRT:
3722 int cost = shiftcosts (x);
3723 if (cost < 0)
3724 return false;
3725 *total = COSTS_N_INSNS (cost);
3726 return true;
3729 case DIV:
3730 case UDIV:
3731 case MOD:
3732 case UMOD:
3733 *total = COSTS_N_INSNS (20);
3734 return true;
3736 case FLOAT:
3737 case FIX:
3738 *total = 100;
3739 return true;
3741 default:
3742 return false;
3746 /* Determine the size of the fundamental move insn that will be used
3747 for the specified mode. */
3748 static inline int
3749 mov_insn_size (machine_mode mode, bool consider_sh2a)
3751 const int mode_sz = GET_MODE_SIZE (mode);
3753 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3754 || (TARGET_FMOVD && mode == DFmode))
3755 return mode_sz;
3756 else
3758 /* The max. available mode for actual move insns is SImode.
3759 Larger accesses will be split into multiple loads/stores. */
3760 const int max_mov_sz = GET_MODE_SIZE (SImode);
3761 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3765 /* Determine the maximum possible displacement for a move insn for the
3766 specified mode. */
3768 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3770 /* The 4 byte displacement move insns are the same as the 2 byte
3771 versions but take a 12 bit displacement. All we need to do is to
3772 scale the max. displacement value accordingly. */
3773 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3775 /* SH2A supports FPU move insns with 12 bit displacements.
3776 Other variants to do not support any kind of displacements for
3777 FPU move insns. */
3778 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3779 return 0;
3780 else
3782 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3783 const int mode_sz = GET_MODE_SIZE (mode);
3784 int r = 15 * mov_insn_sz * disp_scale;
3786 /* If the mov insn will be split into multiple loads/stores, the
3787 maximum possible displacement is a bit smaller. */
3788 if (mode_sz > mov_insn_sz)
3789 r -= mode_sz - mov_insn_sz;
3790 return r;
3794 /* Determine the alignment mask for a move insn of the
3795 specified mode. */
3796 static inline int
3797 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3799 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3800 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3803 /* Return the displacement value of a displacement address. */
3804 HOST_WIDE_INT
3805 sh_disp_addr_displacement (rtx x)
3807 gcc_assert (satisfies_constraint_Sdd (x));
3808 return INTVAL (XEXP (XEXP (x, 0), 1));
3811 /* Compute the cost of an address. */
3812 static int
3813 sh_address_cost (rtx x, machine_mode mode,
3814 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3816 /* 'GBR + 0'. Account one more because of R0 restriction. */
3817 if (REG_P (x) && REGNO (x) == GBR_REG)
3818 return 2;
3820 /* Simple reg, post-inc, pre-dec addressing. */
3821 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3822 return 1;
3824 /* 'reg + disp' addressing. */
3825 if (GET_CODE (x) == PLUS
3826 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3828 /* 'GBR + disp'. Account one more because of R0 restriction. */
3829 if (REGNO (XEXP (x, 0)) == GBR_REG
3830 && gbr_displacement (XEXP (x, 1), mode))
3831 return 2;
3833 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3835 if (offset == 0)
3836 return 1;
3838 /* The displacement would fit into a 2 byte move insn.
3839 HImode and QImode loads/stores with displacement put pressure on
3840 R0 which will most likely require another reg copy. Thus account
3841 a higher cost for that. */
3842 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3843 return (mode == HImode || mode == QImode) ? 2 : 1;
3845 /* The displacement would fit into a 4 byte move insn (SH2A). */
3846 if (TARGET_SH2A
3847 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3848 return 2;
3850 /* The displacement is probably out of range and will require extra
3851 calculations. */
3852 return 3;
3855 /* 'reg + reg' addressing. Account a slightly higher cost because of
3856 increased pressure on R0. */
3857 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3858 && ! TARGET_SHMEDIA)
3859 return 3;
3861 /* Not sure what it is - probably expensive. */
3862 return 10;
3865 /* Code to expand a shift. */
3866 static void
3867 gen_ashift (int type, int n, rtx reg)
3869 rtx n_rtx;
3871 /* Negative values here come from the shift_amounts array. */
3872 if (n < 0)
3874 if (type == ASHIFT)
3875 type = LSHIFTRT;
3876 else
3877 type = ASHIFT;
3878 n = -n;
3881 n_rtx = GEN_INT (n);
3882 gcc_assert (satisfies_constraint_P27 (n_rtx));
3884 switch (type)
3886 case ASHIFTRT:
3887 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3888 break;
3889 case LSHIFTRT:
3890 if (n == 1)
3891 emit_insn (gen_shlr (reg, reg));
3892 else
3893 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3894 break;
3895 case ASHIFT:
3896 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3897 break;
3898 default:
3899 gcc_unreachable ();
3903 /* Code to expand a HImode shift. */
3904 static void
3905 gen_ashift_hi (int type, int n, rtx reg)
3907 /* Negative values here come from the shift_amounts array. */
3908 if (n < 0)
3910 if (type == ASHIFT)
3911 type = LSHIFTRT;
3912 else
3913 type = ASHIFT;
3914 n = -n;
3917 switch (type)
3919 case ASHIFTRT:
3920 case LSHIFTRT:
3921 /* We don't have HImode right shift operations because using the
3922 ordinary 32 bit shift instructions for that doesn't generate proper
3923 zero/sign extension.
3924 gen_ashift_hi is only called in contexts where we know that the
3925 sign extension works out correctly. */
3927 int offset = 0;
3928 if (GET_CODE (reg) == SUBREG)
3930 offset = SUBREG_BYTE (reg);
3931 reg = SUBREG_REG (reg);
3933 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3934 break;
3936 case ASHIFT:
3937 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3938 break;
3942 /* Output RTL to split a constant shift into its component SH constant
3943 shift instructions. */
3944 void
3945 gen_shifty_op (int code, rtx *operands)
3947 int value = INTVAL (operands[2]);
3948 int max, i;
3950 /* Truncate the shift count in case it is out of bounds. */
3951 value = value & 31;
3953 if (value == 31)
3955 if (code == LSHIFTRT)
3957 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3958 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3959 return;
3961 else if (code == ASHIFT)
3963 /* There is a two instruction sequence for 31 bit left shifts,
3964 but it requires r0. */
3965 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3967 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3968 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3969 return;
3973 else if (value == 0)
3975 /* This can happen even when optimizing, if there were subregs before
3976 reload. Don't output a nop here, as this is never optimized away;
3977 use a no-op move instead. */
3978 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3979 return;
3982 max = ashl_lshr_seq[value].insn_count;
3983 for (i = 0; i < max; i++)
3984 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3987 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3988 don't matter. */
3989 void
3990 gen_shifty_hi_op (int code, rtx *operands)
3992 int value = INTVAL (operands[2]);
3993 int max, i;
3994 void (*gen_fun) (int, int, rtx);
3996 /* This operation is used by and_shl for SImode values with a few
3997 high bits known to be cleared. */
3998 value &= 31;
3999 if (value == 0)
4001 emit_insn (gen_nop ());
4002 return;
4005 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
4006 if (code == ASHIFT)
4008 max = ext_ashl_lshr_seq[value].insn_count;
4009 for (i = 0; i < max; i++)
4010 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4012 else
4013 /* When shifting right, emit the shifts in reverse order, so that
4014 solitary negative values come first. */
4015 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
4016 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4019 /* Output RTL for an arithmetic right shift.
4020 ??? Rewrite to use super-optimizer sequences. */
4021 bool
4022 expand_ashiftrt (rtx *operands)
4024 rtx wrk;
4025 char func[18];
4026 int value;
4028 if (TARGET_DYNSHIFT)
4030 if (!CONST_INT_P (operands[2]))
4032 rtx count = copy_to_mode_reg (SImode, operands[2]);
4033 emit_insn (gen_negsi2 (count, count));
4034 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4035 return true;
4037 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
4038 > 1 + SH_DYNAMIC_SHIFT_COST)
4040 rtx count
4041 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
4042 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4043 return true;
4046 if (!CONST_INT_P (operands[2]))
4047 return false;
4049 value = INTVAL (operands[2]) & 31;
4051 if (value == 31)
4053 /* If we are called from abs expansion, arrange things so that we
4054 we can use a single MT instruction that doesn't clobber the source,
4055 if LICM can hoist out the load of the constant zero. */
4056 if (currently_expanding_to_rtl)
4058 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
4059 operands[1]));
4060 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
4061 return true;
4063 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
4064 return true;
4066 else if (value >= 16 && value <= 19)
4068 wrk = gen_reg_rtx (SImode);
4069 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
4070 value -= 16;
4071 while (value--)
4072 gen_ashift (ASHIFTRT, 1, wrk);
4073 emit_move_insn (operands[0], wrk);
4074 return true;
4076 /* Expand a short sequence inline, longer call a magic routine. */
4077 else if (value <= 5)
4079 wrk = gen_reg_rtx (SImode);
4080 emit_move_insn (wrk, operands[1]);
4081 while (value--)
4082 gen_ashift (ASHIFTRT, 1, wrk);
4083 emit_move_insn (operands[0], wrk);
4084 return true;
4087 wrk = gen_reg_rtx (Pmode);
4089 /* Load the value into an arg reg and call a helper. */
4090 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
4091 sprintf (func, "__ashiftrt_r4_%d", value);
4092 function_symbol (wrk, func, SFUNC_STATIC);
4093 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
4094 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
4095 return true;
4098 /* Try to find a good way to implement the combiner pattern
4099 [(set (match_operand:SI 0 "register_operand" "r")
4100 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4101 (match_operand:SI 2 "const_int_operand" "n"))
4102 (match_operand:SI 3 "const_int_operand" "n"))) .
4103 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
4104 return 0 for simple right / left or left/right shift combination.
4105 return 1 for a combination of shifts with zero_extend.
4106 return 2 for a combination of shifts with an AND that needs r0.
4107 return 3 for a combination of shifts with an AND that needs an extra
4108 scratch register, when the three highmost bits of the AND mask are clear.
4109 return 4 for a combination of shifts with an AND that needs an extra
4110 scratch register, when any of the three highmost bits of the AND mask
4111 is set.
4112 If ATTRP is set, store an initial right shift width in ATTRP[0],
4113 and the instruction length in ATTRP[1] . These values are not valid
4114 when returning 0.
4115 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
4116 shift_amounts for the last shift value that is to be used before the
4117 sign extend. */
4119 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
4121 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
4122 int left = INTVAL (left_rtx), right;
4123 int best = 0;
4124 int cost, best_cost = 10000;
4125 int best_right = 0, best_len = 0;
4126 int i;
4127 int can_ext;
4129 if (left < 0 || left > 31)
4130 return 0;
4131 if (CONST_INT_P (mask_rtx))
4132 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
4133 else
4134 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
4135 /* Can this be expressed as a right shift / left shift pair? */
4136 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
4137 right = exact_log2 (lsb);
4138 mask2 = ~(mask + lsb - 1);
4139 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4140 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4141 if (! mask2)
4142 best_cost = ashl_lshr_seq[right].insn_count
4143 + ashl_lshr_seq[right + left].insn_count;
4144 /* mask has no trailing zeroes <==> ! right */
4145 else if (! right && mask2 == ~(lsb2 - 1))
4147 int late_right = exact_log2 (lsb2);
4148 best_cost = ashl_lshr_seq[left + late_right].insn_count
4149 + ashl_lshr_seq[late_right].insn_count;
4151 /* Try to use zero extend. */
4152 if (mask2 == ~(lsb2 - 1))
4154 int width, first;
4156 for (width = 8; width <= 16; width += 8)
4158 /* Can we zero-extend right away? */
4159 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4161 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4162 + ext_ashl_lshr_seq[left + right].insn_count;
4163 if (cost < best_cost)
4165 best = 1;
4166 best_cost = cost;
4167 best_right = right;
4168 best_len = cost;
4169 if (attrp)
4170 attrp[2] = -1;
4172 continue;
4174 /* ??? Could try to put zero extend into initial right shift,
4175 or even shift a bit left before the right shift. */
4176 /* Determine value of first part of left shift, to get to the
4177 zero extend cut-off point. */
4178 first = width - exact_log2 (lsb2) + right;
4179 if (first >= 0 && right + left - first >= 0)
4181 cost = ext_ashl_lshr_seq[right].insn_count
4182 + ext_ashl_lshr_seq[first].insn_count + 1
4183 + ext_ashl_lshr_seq[right + left - first].insn_count;
4185 if (cost < best_cost)
4187 best = 1;
4188 best_cost = cost;
4189 best_right = right;
4190 best_len = cost;
4191 if (attrp)
4192 attrp[2] = first;
4197 /* Try to use r0 AND pattern */
4198 for (i = 0; i <= 2; i++)
4200 if (i > right)
4201 break;
4202 if (! CONST_OK_FOR_K08 (mask >> i))
4203 continue;
4204 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4205 if (cost < best_cost)
4207 best = 2;
4208 best_cost = cost;
4209 best_right = i;
4210 best_len = cost - 1;
4213 /* Try to use a scratch register to hold the AND operand. */
4214 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4215 for (i = 0; i <= 2; i++)
4217 if (i > right)
4218 break;
4219 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4220 + (can_ext
4221 ? ext_ashl_lshr_seq
4222 : ashl_lshr_seq)[left + i].insn_count;
4223 if (cost < best_cost)
4225 best = 4 - can_ext;
4226 best_cost = cost;
4227 best_right = i;
4228 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4232 if (attrp)
4234 attrp[0] = best_right;
4235 attrp[1] = best_len;
4237 return best;
4240 /* This is used in length attributes of the unnamed instructions
4241 corresponding to shl_and_kind return values of 1 and 2. */
4243 shl_and_length (rtx insn)
4245 rtx set_src, left_rtx, mask_rtx;
4246 int attributes[3];
4248 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4249 left_rtx = XEXP (XEXP (set_src, 0), 1);
4250 mask_rtx = XEXP (set_src, 1);
4251 shl_and_kind (left_rtx, mask_rtx, attributes);
4252 return attributes[1];
4255 /* This is used in length attribute of the and_shl_scratch instruction. */
4257 shl_and_scr_length (rtx insn)
4259 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4260 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4261 rtx op = XEXP (set_src, 0);
4262 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4263 op = XEXP (XEXP (op, 0), 0);
4264 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4267 /* Generate rtl for instructions for which shl_and_kind advised a particular
4268 method of generating them, i.e. returned zero. */
4269 bool
4270 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4272 int attributes[3];
4273 unsigned HOST_WIDE_INT mask;
4274 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4275 int right, total_shift;
4276 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4278 right = attributes[0];
4279 total_shift = INTVAL (left_rtx) + right;
4280 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4281 switch (kind)
4283 default:
4284 return true;
4285 case 1:
4287 int first = attributes[2];
4288 rtx operands[3];
4290 if (first < 0)
4292 emit_insn ((mask << right) <= 0xff
4293 ? gen_zero_extendqisi2 (dest,
4294 gen_lowpart (QImode, source))
4295 : gen_zero_extendhisi2 (dest,
4296 gen_lowpart (HImode, source)));
4297 source = dest;
4299 if (source != dest)
4300 emit_insn (gen_movsi (dest, source));
4301 operands[0] = dest;
4302 if (right)
4304 operands[2] = GEN_INT (right);
4305 gen_shifty_hi_op (LSHIFTRT, operands);
4307 if (first > 0)
4309 operands[2] = GEN_INT (first);
4310 gen_shifty_hi_op (ASHIFT, operands);
4311 total_shift -= first;
4312 mask <<= first;
4314 if (first >= 0)
4315 emit_insn (mask <= 0xff
4316 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4317 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4318 if (total_shift > 0)
4320 operands[2] = GEN_INT (total_shift);
4321 gen_shifty_hi_op (ASHIFT, operands);
4323 break;
4325 case 4:
4326 shift_gen_fun = gen_shifty_op;
4327 case 3:
4328 /* If the topmost bit that matters is set, set the topmost bits
4329 that don't matter. This way, we might be able to get a shorter
4330 signed constant. */
4331 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4332 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4333 case 2:
4334 /* Don't expand fine-grained when combining, because that will
4335 make the pattern fail. */
4336 if (currently_expanding_to_rtl
4337 || reload_in_progress || reload_completed)
4339 rtx operands[3];
4341 /* Cases 3 and 4 should be handled by this split
4342 only while combining */
4343 gcc_assert (kind <= 2);
4344 if (right)
4346 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4347 source = dest;
4349 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4350 if (total_shift)
4352 operands[0] = dest;
4353 operands[1] = dest;
4354 operands[2] = GEN_INT (total_shift);
4355 shift_gen_fun (ASHIFT, operands);
4357 break;
4359 else
4361 int neg = 0;
4362 if (kind != 4 && total_shift < 16)
4364 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4365 if (neg > 0)
4366 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4367 else
4368 neg = 0;
4370 emit_insn (gen_and_shl_scratch (dest, source,
4371 GEN_INT (right),
4372 GEN_INT (mask),
4373 GEN_INT (total_shift + neg),
4374 GEN_INT (neg)));
4375 emit_insn (gen_movsi (dest, dest));
4376 break;
4379 return false;
4382 /* Try to find a good way to implement the combiner pattern
4383 [(set (match_operand:SI 0 "register_operand" "=r")
4384 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4385 (match_operand:SI 2 "const_int_operand" "n")
4386 (match_operand:SI 3 "const_int_operand" "n")
4387 (const_int 0)))
4388 (clobber (reg:SI T_REG))]
4389 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4390 return 0 for simple left / right shift combination.
4391 return 1 for left shift / 8 bit sign extend / left shift.
4392 return 2 for left shift / 16 bit sign extend / left shift.
4393 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4394 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4395 return 5 for left shift / 16 bit sign extend / right shift
4396 return 6 for < 8 bit sign extend / left shift.
4397 return 7 for < 8 bit sign extend / left shift / single right shift.
4398 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4400 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4402 int left, size, insize, ext;
4403 int cost = 0, best_cost;
4404 int kind;
4406 left = INTVAL (left_rtx);
4407 size = INTVAL (size_rtx);
4408 insize = size - left;
4409 gcc_assert (insize > 0);
4410 /* Default to left / right shift. */
4411 kind = 0;
4412 best_cost = ashl_lshr_seq[32 - insize].insn_count
4413 + ashl_lshr_seq[32 - size].insn_count;
4414 if (size <= 16)
4416 /* 16 bit shift / sign extend / 16 bit shift */
4417 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4418 + ashl_lshr_seq[16 - size].insn_count;
4419 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4420 below, by alternative 3 or something even better. */
4421 if (cost < best_cost)
4423 kind = 5;
4424 best_cost = cost;
4427 /* Try a plain sign extend between two shifts. */
4428 for (ext = 16; ext >= insize; ext -= 8)
4430 if (ext <= size)
4432 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4433 + ashl_lshr_seq[size - ext].insn_count;
4434 if (cost < best_cost)
4436 kind = ext / (unsigned) 8;
4437 best_cost = cost;
4440 /* Check if we can do a sloppy shift with a final signed shift
4441 restoring the sign. */
4442 if (EXT_SHIFT_SIGNED (size - ext))
4443 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4444 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4445 /* If not, maybe it's still cheaper to do the second shift sloppy,
4446 and do a final sign extend? */
4447 else if (size <= 16)
4448 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4449 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4450 + 1;
4451 else
4452 continue;
4453 if (cost < best_cost)
4455 kind = ext / (unsigned) 8 + 2;
4456 best_cost = cost;
4459 /* Check if we can sign extend in r0 */
4460 if (insize < 8)
4462 cost = 3 + ashl_lshr_seq[left].insn_count;
4463 if (cost < best_cost)
4465 kind = 6;
4466 best_cost = cost;
4468 /* Try the same with a final signed shift. */
4469 if (left < 31)
4471 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4472 if (cost < best_cost)
4474 kind = 7;
4475 best_cost = cost;
4479 if (TARGET_DYNSHIFT)
4481 /* Try to use a dynamic shift. */
4482 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4483 if (cost < best_cost)
4485 kind = 0;
4486 best_cost = cost;
4489 if (costp)
4490 *costp = cost;
4491 return kind;
4494 /* Function to be used in the length attribute of the instructions
4495 implementing this pattern. */
4497 shl_sext_length (rtx insn)
4499 rtx set_src, left_rtx, size_rtx;
4500 int cost;
4502 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4503 left_rtx = XEXP (XEXP (set_src, 0), 1);
4504 size_rtx = XEXP (set_src, 1);
4505 shl_sext_kind (left_rtx, size_rtx, &cost);
4506 return cost;
4509 /* Generate rtl for this pattern */
4510 bool
4511 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4513 int kind;
4514 int left, size, insize, cost;
4515 rtx operands[3];
4517 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4518 left = INTVAL (left_rtx);
4519 size = INTVAL (size_rtx);
4520 insize = size - left;
4521 switch (kind)
4523 case 1:
4524 case 2:
4525 case 3:
4526 case 4:
4528 int ext = kind & 1 ? 8 : 16;
4529 int shift2 = size - ext;
4531 /* Don't expand fine-grained when combining, because that will
4532 make the pattern fail. */
4533 if (! currently_expanding_to_rtl
4534 && ! reload_in_progress && ! reload_completed)
4536 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4537 emit_insn (gen_movsi (dest, source));
4538 break;
4540 if (dest != source)
4541 emit_insn (gen_movsi (dest, source));
4542 operands[0] = dest;
4543 if (ext - insize)
4545 operands[2] = GEN_INT (ext - insize);
4546 gen_shifty_hi_op (ASHIFT, operands);
4548 emit_insn (kind & 1
4549 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4550 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4551 if (kind <= 2)
4553 if (shift2)
4555 operands[2] = GEN_INT (shift2);
4556 gen_shifty_op (ASHIFT, operands);
4559 else
4561 if (shift2 > 0)
4563 if (EXT_SHIFT_SIGNED (shift2))
4565 operands[2] = GEN_INT (shift2 + 1);
4566 gen_shifty_op (ASHIFT, operands);
4567 operands[2] = const1_rtx;
4568 gen_shifty_op (ASHIFTRT, operands);
4569 break;
4571 operands[2] = GEN_INT (shift2);
4572 gen_shifty_hi_op (ASHIFT, operands);
4574 else if (shift2)
4576 operands[2] = GEN_INT (-shift2);
4577 gen_shifty_hi_op (LSHIFTRT, operands);
4579 emit_insn (size <= 8
4580 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4581 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4583 break;
4585 case 5:
4587 int i = 16 - size;
4588 if (! currently_expanding_to_rtl
4589 && ! reload_in_progress && ! reload_completed)
4590 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4591 else
4593 operands[0] = dest;
4594 operands[2] = GEN_INT (16 - insize);
4595 gen_shifty_hi_op (ASHIFT, operands);
4596 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4598 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4599 while (--i >= 0)
4600 gen_ashift (ASHIFTRT, 1, dest);
4601 break;
4603 case 6:
4604 case 7:
4605 /* Don't expand fine-grained when combining, because that will
4606 make the pattern fail. */
4607 if (! currently_expanding_to_rtl
4608 && ! reload_in_progress && ! reload_completed)
4610 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4611 emit_insn (gen_movsi (dest, source));
4612 break;
4614 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4615 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4616 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4617 operands[0] = dest;
4618 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4619 gen_shifty_op (ASHIFT, operands);
4620 if (kind == 7)
4621 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4622 break;
4623 default:
4624 return true;
4626 return false;
4629 /* Prefix a symbol_ref name with "datalabel". */
4631 gen_datalabel_ref (rtx sym)
4633 const char *str;
4635 if (GET_CODE (sym) == LABEL_REF)
4636 return gen_rtx_CONST (GET_MODE (sym),
4637 gen_rtx_UNSPEC (GET_MODE (sym),
4638 gen_rtvec (1, sym),
4639 UNSPEC_DATALABEL));
4641 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4643 str = XSTR (sym, 0);
4644 /* Share all SYMBOL_REF strings with the same value - that is important
4645 for cse. */
4646 str = IDENTIFIER_POINTER (get_identifier (str));
4647 XSTR (sym, 0) = str;
4649 return sym;
4653 typedef struct label_ref_list_d
4655 rtx_code_label *label;
4656 struct label_ref_list_d *next;
4658 /* Pool allocation new operator. */
4659 inline void *operator new (size_t)
4661 return pool.allocate ();
4664 /* Delete operator utilizing pool allocation. */
4665 inline void operator delete (void *ptr)
4667 pool.remove ((label_ref_list_d *) ptr);
4670 /* Memory allocation pool. */
4671 static pool_allocator<label_ref_list_d> pool;
4673 } *label_ref_list_t;
4675 pool_allocator<label_ref_list_d> label_ref_list_d::pool
4676 ("label references list", 30);
4678 /* The SH cannot load a large constant into a register, constants have to
4679 come from a pc relative load. The reference of a pc relative load
4680 instruction must be less than 1k in front of the instruction. This
4681 means that we often have to dump a constant inside a function, and
4682 generate code to branch around it.
4684 It is important to minimize this, since the branches will slow things
4685 down and make things bigger.
4687 Worst case code looks like:
4689 mov.l L1,rn
4690 bra L2
4692 align
4693 L1: .long value
4697 mov.l L3,rn
4698 bra L4
4700 align
4701 L3: .long value
4705 We fix this by performing a scan before scheduling, which notices which
4706 instructions need to have their operands fetched from the constant table
4707 and builds the table.
4709 The algorithm is:
4711 scan, find an instruction which needs a pcrel move. Look forward, find the
4712 last barrier which is within MAX_COUNT bytes of the requirement.
4713 If there isn't one, make one. Process all the instructions between
4714 the find and the barrier.
4716 In the above example, we can tell that L3 is within 1k of L1, so
4717 the first move can be shrunk from the 3 insn+constant sequence into
4718 just 1 insn, and the constant moved to L3 to make:
4720 mov.l L1,rn
4722 mov.l L3,rn
4723 bra L4
4725 align
4726 L3:.long value
4727 L4:.long value
4729 Then the second move becomes the target for the shortening process. */
4731 typedef struct
4733 rtx value; /* Value in table. */
4734 rtx_code_label *label; /* Label of value. */
4735 label_ref_list_t wend; /* End of window. */
4736 machine_mode mode; /* Mode of value. */
4738 /* True if this constant is accessed as part of a post-increment
4739 sequence. Note that HImode constants are never accessed in this way. */
4740 bool part_of_sequence_p;
4741 } pool_node;
4743 /* The maximum number of constants that can fit into one pool, since
4744 constants in the range 0..510 are at least 2 bytes long, and in the
4745 range from there to 1018 at least 4 bytes. */
4747 #define MAX_POOL_SIZE 372
4748 static pool_node pool_vector[MAX_POOL_SIZE];
4749 static int pool_size;
4750 static rtx_code_label *pool_window_label;
4751 static int pool_window_last;
4753 static int max_labelno_before_reorg;
4755 /* ??? If we need a constant in HImode which is the truncated value of a
4756 constant we need in SImode, we could combine the two entries thus saving
4757 two bytes. Is this common enough to be worth the effort of implementing
4758 it? */
4760 /* ??? This stuff should be done at the same time that we shorten branches.
4761 As it is now, we must assume that all branches are the maximum size, and
4762 this causes us to almost always output constant pools sooner than
4763 necessary. */
4765 /* Add a constant to the pool and return its label. */
4766 static rtx_code_label *
4767 add_constant (rtx x, machine_mode mode, rtx last_value)
4769 int i;
4770 rtx_code_label *lab, *new_rtx;
4771 label_ref_list_t ref, newref;
4773 /* First see if we've already got it. */
4774 for (i = 0; i < pool_size; i++)
4776 if (x->code == pool_vector[i].value->code
4777 && mode == pool_vector[i].mode)
4779 if (x->code == CODE_LABEL)
4781 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4782 continue;
4784 if (rtx_equal_p (x, pool_vector[i].value))
4786 lab = new_rtx = 0;
4787 if (! last_value
4788 || ! i
4789 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4791 new_rtx = gen_label_rtx ();
4792 LABEL_REFS (new_rtx) = pool_vector[i].label;
4793 pool_vector[i].label = lab = new_rtx;
4795 if (lab && pool_window_label)
4797 newref = new label_ref_list_d;
4798 newref->label = pool_window_label;
4799 ref = pool_vector[pool_window_last].wend;
4800 newref->next = ref;
4801 pool_vector[pool_window_last].wend = newref;
4803 if (new_rtx)
4804 pool_window_label = new_rtx;
4805 pool_window_last = i;
4806 return lab;
4811 /* Need a new one. */
4812 pool_vector[pool_size].value = x;
4813 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4815 lab = 0;
4816 pool_vector[pool_size - 1].part_of_sequence_p = true;
4818 else
4819 lab = gen_label_rtx ();
4820 pool_vector[pool_size].mode = mode;
4821 pool_vector[pool_size].label = lab;
4822 pool_vector[pool_size].wend = NULL;
4823 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4824 if (lab && pool_window_label)
4826 newref = new label_ref_list_d;
4827 newref->label = pool_window_label;
4828 ref = pool_vector[pool_window_last].wend;
4829 newref->next = ref;
4830 pool_vector[pool_window_last].wend = newref;
4832 if (lab)
4833 pool_window_label = lab;
4834 pool_window_last = pool_size;
4835 pool_size++;
4836 return lab;
4839 /* Output the literal table. START, if nonzero, is the first instruction
4840 this table is needed for, and also indicates that there is at least one
4841 casesi_worker_2 instruction; We have to emit the operand3 labels from
4842 these insns at a 4-byte aligned position. BARRIER is the barrier
4843 after which we are to place the table. */
4844 static void
4845 dump_table (rtx_insn *start, rtx_insn *barrier)
4847 rtx_insn *scan = barrier;
4848 int i;
4849 bool need_align = true;
4850 rtx lab;
4851 label_ref_list_t ref;
4852 bool have_df = false;
4854 /* Do two passes, first time dump out the HI sized constants. */
4856 for (i = 0; i < pool_size; i++)
4858 pool_node *p = &pool_vector[i];
4860 if (p->mode == HImode)
4862 if (need_align)
4864 scan = emit_insn_after (gen_align_2 (), scan);
4865 need_align = false;
4867 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4868 scan = emit_label_after (lab, scan);
4869 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4870 scan);
4871 for (ref = p->wend; ref; ref = ref->next)
4873 lab = ref->label;
4874 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4877 else if (p->mode == DFmode)
4878 have_df = true;
4881 need_align = true;
4883 if (start)
4885 scan = emit_insn_after (gen_align_4 (), scan);
4886 need_align = false;
4887 for (; start != barrier; start = NEXT_INSN (start))
4888 if (NONJUMP_INSN_P (start)
4889 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4891 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4892 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4894 scan = emit_label_after (lab, scan);
4897 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4899 rtx_insn *align_insn = NULL;
4901 scan = emit_label_after (gen_label_rtx (), scan);
4902 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4903 need_align = false;
4905 for (i = 0; i < pool_size; i++)
4907 pool_node *p = &pool_vector[i];
4909 switch (p->mode)
4911 case HImode:
4912 break;
4913 case SImode:
4914 case SFmode:
4915 if (align_insn && !p->part_of_sequence_p)
4917 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4918 emit_label_before (lab, align_insn);
4919 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4920 align_insn);
4921 for (ref = p->wend; ref; ref = ref->next)
4923 lab = ref->label;
4924 emit_insn_before (gen_consttable_window_end (lab),
4925 align_insn);
4927 delete_insn (align_insn);
4928 align_insn = NULL;
4929 continue;
4931 else
4933 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4934 scan = emit_label_after (lab, scan);
4935 scan = emit_insn_after (gen_consttable_4 (p->value,
4936 const0_rtx), scan);
4937 need_align = ! need_align;
4939 break;
4940 case DFmode:
4941 if (need_align)
4943 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4944 align_insn = scan;
4945 need_align = false;
4947 case DImode:
4948 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4949 scan = emit_label_after (lab, scan);
4950 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4951 scan);
4952 break;
4953 default:
4954 gcc_unreachable ();
4957 if (p->mode != HImode)
4959 for (ref = p->wend; ref; ref = ref->next)
4961 lab = ref->label;
4962 scan = emit_insn_after (gen_consttable_window_end (lab),
4963 scan);
4968 pool_size = 0;
4971 for (i = 0; i < pool_size; i++)
4973 pool_node *p = &pool_vector[i];
4975 switch (p->mode)
4977 case HImode:
4978 break;
4979 case SImode:
4980 case SFmode:
4981 if (need_align)
4983 need_align = false;
4984 scan = emit_label_after (gen_label_rtx (), scan);
4985 scan = emit_insn_after (gen_align_4 (), scan);
4987 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4988 scan = emit_label_after (lab, scan);
4989 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4990 scan);
4991 break;
4992 case DFmode:
4993 case DImode:
4994 if (need_align)
4996 need_align = false;
4997 scan = emit_label_after (gen_label_rtx (), scan);
4998 scan = emit_insn_after (gen_align_4 (), scan);
5000 for (lab = p->label; lab; lab = LABEL_REFS (lab))
5001 scan = emit_label_after (lab, scan);
5002 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
5003 scan);
5004 break;
5005 default:
5006 gcc_unreachable ();
5009 if (p->mode != HImode)
5011 for (ref = p->wend; ref; ref = ref->next)
5013 lab = ref->label;
5014 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
5019 scan = emit_insn_after (gen_consttable_end (), scan);
5020 scan = emit_barrier_after (scan);
5021 pool_size = 0;
5022 pool_window_label = NULL;
5023 pool_window_last = 0;
5026 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
5028 /* Nonzero if the insn is a move instruction which needs to be fixed. */
5030 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
5031 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
5032 need to fix it if the input value is CONST_OK_FOR_I08. */
5033 static bool
5034 broken_move (rtx_insn *insn)
5036 if (NONJUMP_INSN_P (insn))
5038 rtx pat = PATTERN (insn);
5039 if (GET_CODE (pat) == PARALLEL)
5040 pat = XVECEXP (pat, 0, 0);
5041 if (GET_CODE (pat) == SET
5042 /* We can load any 8-bit value if we don't care what the high
5043 order bits end up as. */
5044 && GET_MODE (SET_DEST (pat)) != QImode
5045 && (CONSTANT_P (SET_SRC (pat))
5046 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
5047 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
5048 /* Match mova_const. */
5049 || (GET_CODE (SET_SRC (pat)) == UNSPEC
5050 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
5051 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
5052 && ! (TARGET_SH2E
5053 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
5054 && (fp_zero_operand (SET_SRC (pat))
5055 || fp_one_operand (SET_SRC (pat)))
5056 /* In general we don't know the current setting of fpscr, so
5057 disable fldi.
5058 There is an exception if this was a register-register move
5059 before reload - and hence it was ascertained that we have
5060 single precision setting - and in a post-reload optimization
5061 we changed this to do a constant load. In that case
5062 we don't have an r0 clobber, hence we must use fldi. */
5063 && (TARGET_FMOVD
5064 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
5065 == SCRATCH))
5066 && REG_P (SET_DEST (pat))
5067 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
5068 && ! (TARGET_SH2A
5069 && GET_MODE (SET_DEST (pat)) == SImode
5070 && (satisfies_constraint_I20 (SET_SRC (pat))
5071 || satisfies_constraint_I28 (SET_SRC (pat))))
5072 && ! satisfies_constraint_I08 (SET_SRC (pat)))
5073 return true;
5076 return false;
5079 /* Return true if the specified insn is a mova insn. */
5080 static bool
5081 mova_p (rtx_insn *insn)
5083 return (NONJUMP_INSN_P (insn)
5084 && GET_CODE (PATTERN (insn)) == SET
5085 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
5086 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
5087 /* Don't match mova_const. */
5088 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
5091 /* Fix up a mova from a switch that went out of range. */
5092 static void
5093 fixup_mova (rtx_insn *mova)
5095 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
5096 if (! flag_pic)
5098 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
5099 INSN_CODE (mova) = -1;
5101 else
5103 rtx_insn *worker = mova;
5104 rtx_code_label *lab = gen_label_rtx ();
5105 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
5109 worker = NEXT_INSN (worker);
5110 gcc_assert (worker
5111 && !LABEL_P (worker)
5112 && !JUMP_P (worker));
5113 } while (NOTE_P (worker)
5114 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
5115 wpat = PATTERN (worker);
5116 wpat0 = XVECEXP (wpat, 0, 0);
5117 wpat1 = XVECEXP (wpat, 0, 1);
5118 wsrc = SET_SRC (wpat0);
5119 PATTERN (worker) = (gen_casesi_worker_2
5120 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
5121 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
5122 XEXP (wpat1, 0)));
5123 INSN_CODE (worker) = -1;
5124 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
5125 base = gen_rtx_LABEL_REF (Pmode, lab);
5126 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
5127 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
5128 INSN_CODE (mova) = -1;
5132 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
5133 *num_mova, and check if the new mova is not nested within the first one.
5134 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
5135 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
5136 static int
5137 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
5139 int n_addr = 0; /* Initialization to shut up spurious warning. */
5140 int f_target, n_target = 0; /* Likewise. */
5142 if (optimize)
5144 /* If NEW_MOVA has no address yet, it will be handled later. */
5145 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
5146 return -1;
5148 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
5149 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
5150 if (n_addr > n_target || n_addr + 1022 < n_target)
5152 /* Change the mova into a load.
5153 broken_move will then return true for it. */
5154 fixup_mova (new_mova);
5155 return 1;
5158 if (!(*num_mova)++)
5160 *first_mova = new_mova;
5161 return 2;
5163 if (!optimize
5164 || ((f_target
5165 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5166 >= n_target))
5167 return -1;
5169 (*num_mova)--;
5170 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5171 > n_target - n_addr)
5173 fixup_mova (*first_mova);
5174 return 0;
5176 else
5178 fixup_mova (new_mova);
5179 return 1;
5183 /* Find the last barrier from insn FROM which is close enough to hold the
5184 constant pool. If we can't find one, then create one near the end of
5185 the range. */
5186 static rtx_insn *
5187 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5189 int count_si = 0;
5190 int count_hi = 0;
5191 int found_hi = 0;
5192 int found_si = 0;
5193 int found_di = 0;
5194 int hi_align = 2;
5195 int si_align = 2;
5196 int leading_mova = num_mova;
5197 rtx_insn *barrier_before_mova = NULL;
5198 rtx_insn *found_barrier = NULL;
5199 rtx_insn *good_barrier = NULL;
5200 int si_limit;
5201 int hi_limit;
5202 rtx_insn *orig = from;
5203 rtx_insn *last_got = NULL;
5204 rtx_insn *last_symoff = NULL;
5206 /* For HImode: range is 510, add 4 because pc counts from address of
5207 second instruction after this one, subtract 2 for the jump instruction
5208 that we may need to emit before the table, subtract 2 for the instruction
5209 that fills the jump delay slot (in very rare cases, reorg will take an
5210 instruction from after the constant pool or will leave the delay slot
5211 empty). This gives 510.
5212 For SImode: range is 1020, add 4 because pc counts from address of
5213 second instruction after this one, subtract 2 in case pc is 2 byte
5214 aligned, subtract 2 for the jump instruction that we may need to emit
5215 before the table, subtract 2 for the instruction that fills the jump
5216 delay slot. This gives 1018. */
5218 /* The branch will always be shortened now that the reference address for
5219 forward branches is the successor address, thus we need no longer make
5220 adjustments to the [sh]i_limit for -O0. */
5222 si_limit = 1018;
5223 hi_limit = 510;
5225 while (from && count_si < si_limit && count_hi < hi_limit)
5227 int inc = get_attr_length (from);
5228 int new_align = 1;
5230 /* If this is a label that existed at the time of the compute_alignments
5231 call, determine the alignment. N.B. When find_barrier recurses for
5232 an out-of-reach mova, we might see labels at the start of previously
5233 inserted constant tables. */
5234 if (LABEL_P (from)
5235 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5237 if (optimize)
5238 new_align = 1 << label_to_alignment (from);
5239 else if (BARRIER_P (prev_nonnote_insn (from)))
5240 new_align = 1 << barrier_align (from);
5241 else
5242 new_align = 1;
5243 inc = 0;
5245 /* In case we are scanning a constant table because of recursion, check
5246 for explicit alignments. If the table is long, we might be forced
5247 to emit the new table in front of it; the length of the alignment
5248 might be the last straw. */
5249 else if (NONJUMP_INSN_P (from)
5250 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5251 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5252 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5253 /* When we find the end of a constant table, paste the new constant
5254 at the end. That is better than putting it in front because
5255 this way, we don't need extra alignment for adding a 4-byte-aligned
5256 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5257 else if (NONJUMP_INSN_P (from)
5258 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5259 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5260 return from;
5262 if (BARRIER_P (from))
5264 rtx_insn *next;
5266 found_barrier = from;
5268 /* If we are at the end of the function, or in front of an alignment
5269 instruction, we need not insert an extra alignment. We prefer
5270 this kind of barrier. */
5271 if (barrier_align (from) > 2)
5272 good_barrier = from;
5274 /* If we are at the end of a hot/cold block, dump the constants
5275 here. */
5276 next = NEXT_INSN (from);
5277 if (next
5278 && NOTE_P (next)
5279 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5280 break;
5283 if (broken_move (from))
5285 rtx pat, src, dst;
5286 machine_mode mode;
5288 pat = PATTERN (from);
5289 if (GET_CODE (pat) == PARALLEL)
5290 pat = XVECEXP (pat, 0, 0);
5291 src = SET_SRC (pat);
5292 dst = SET_DEST (pat);
5293 mode = GET_MODE (dst);
5295 /* GOT pcrelat setting comes in pair of
5296 mova .L8,r0
5297 mov.l .L8,r12
5298 instructions. (plus add r0,r12).
5299 Remember if we see one without the other. */
5300 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5301 last_got = last_got ? NULL : from;
5302 else if (PIC_ADDR_P (src))
5303 last_got = last_got ? NULL : from;
5305 /* We must explicitly check the mode, because sometimes the
5306 front end will generate code to load unsigned constants into
5307 HImode targets without properly sign extending them. */
5308 if (mode == HImode
5309 || (mode == SImode && satisfies_constraint_I16 (src)
5310 && REGNO (dst) != FPUL_REG))
5312 found_hi += 2;
5313 /* We put the short constants before the long constants, so
5314 we must count the length of short constants in the range
5315 for the long constants. */
5316 /* ??? This isn't optimal, but is easy to do. */
5317 si_limit -= 2;
5319 else
5321 /* We dump DF/DI constants before SF/SI ones, because
5322 the limit is the same, but the alignment requirements
5323 are higher. We may waste up to 4 additional bytes
5324 for alignment, and the DF/DI constant may have
5325 another SF/SI constant placed before it. */
5326 if (TARGET_SHCOMPACT
5327 && ! found_di
5328 && (mode == DFmode || mode == DImode))
5330 found_di = 1;
5331 si_limit -= 8;
5333 while (si_align > 2 && found_si + si_align - 2 > count_si)
5334 si_align >>= 1;
5335 if (found_si > count_si)
5336 count_si = found_si;
5337 found_si += GET_MODE_SIZE (mode);
5338 if (num_mova)
5339 si_limit -= GET_MODE_SIZE (mode);
5343 if (mova_p (from))
5345 switch (untangle_mova (&num_mova, &mova, from))
5347 case 1:
5348 if (flag_pic)
5350 rtx src = SET_SRC (PATTERN (from));
5351 if (GET_CODE (src) == CONST
5352 && GET_CODE (XEXP (src, 0)) == UNSPEC
5353 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5354 last_symoff = from;
5356 break;
5357 case 0: return find_barrier (0, 0, mova);
5358 case 2:
5360 leading_mova = 0;
5361 barrier_before_mova
5362 = good_barrier ? good_barrier : found_barrier;
5364 default: break;
5366 if (found_si > count_si)
5367 count_si = found_si;
5369 else if (JUMP_TABLE_DATA_P (from)
5370 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5372 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5373 || (num_mova
5374 && (prev_nonnote_insn (from)
5375 == XEXP (MOVA_LABELREF (mova), 0))))
5376 num_mova--;
5377 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5379 /* We have just passed the barrier in front of the
5380 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5381 the ADDR_DIFF_VEC is accessed as data, just like our pool
5382 constants, this is a good opportunity to accommodate what
5383 we have gathered so far.
5384 If we waited any longer, we could end up at a barrier in
5385 front of code, which gives worse cache usage for separated
5386 instruction / data caches. */
5387 good_barrier = found_barrier;
5388 break;
5390 else
5392 rtx body = PATTERN (from);
5393 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5396 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5397 else if (JUMP_P (from)
5398 && ! TARGET_SH2
5399 && ! optimize_size)
5400 new_align = 4;
5402 /* There is a possibility that a bf is transformed into a bf/s by the
5403 delay slot scheduler. */
5404 if (JUMP_P (from)
5405 && get_attr_type (from) == TYPE_CBRANCH
5406 && ! sequence_insn_p (from))
5407 inc += 2;
5409 if (found_si)
5411 count_si += inc;
5412 if (new_align > si_align)
5414 si_limit -= (count_si - 1) & (new_align - si_align);
5415 si_align = new_align;
5417 count_si = (count_si + new_align - 1) & -new_align;
5419 if (found_hi)
5421 count_hi += inc;
5422 if (new_align > hi_align)
5424 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5425 hi_align = new_align;
5427 count_hi = (count_hi + new_align - 1) & -new_align;
5429 from = NEXT_INSN (from);
5432 if (num_mova)
5434 if (leading_mova)
5436 /* Try as we might, the leading mova is out of range. Change
5437 it into a load (which will become a pcload) and retry. */
5438 fixup_mova (mova);
5439 return find_barrier (0, 0, mova);
5441 else
5443 /* Insert the constant pool table before the mova instruction,
5444 to prevent the mova label reference from going out of range. */
5445 from = mova;
5446 good_barrier = found_barrier = barrier_before_mova;
5450 if (found_barrier)
5452 if (good_barrier && next_real_insn (found_barrier))
5453 found_barrier = good_barrier;
5455 else
5457 /* We didn't find a barrier in time to dump our stuff,
5458 so we'll make one. */
5459 rtx_code_label *label = gen_label_rtx ();
5461 /* Don't emit a constant table in the middle of insns for
5462 casesi_worker_2. This is a bit overkill but is enough
5463 because casesi_worker_2 wouldn't appear so frequently. */
5464 if (last_symoff)
5465 from = last_symoff;
5467 /* If we exceeded the range, then we must back up over the last
5468 instruction we looked at. Otherwise, we just need to undo the
5469 NEXT_INSN at the end of the loop. */
5470 if (PREV_INSN (from) != orig
5471 && (count_hi > hi_limit || count_si > si_limit))
5472 from = PREV_INSN (PREV_INSN (from));
5473 else
5474 from = PREV_INSN (from);
5476 /* Don't emit a constant table int the middle of global pointer setting,
5477 since that that would move the addressing base GOT into another table.
5478 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5479 in the pool anyway, so just move up the whole constant pool.
5481 However, avoid doing so when the last single GOT mov is the starting
5482 insn itself. Going past above the start insn would create a negative
5483 offset, causing errors. */
5484 if (last_got && last_got != orig)
5485 from = PREV_INSN (last_got);
5487 /* Don't insert the constant pool table at the position which
5488 may be the landing pad. */
5489 if (flag_exceptions
5490 && CALL_P (from)
5491 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5492 from = PREV_INSN (from);
5494 /* Walk back to be just before any jump or label.
5495 Putting it before a label reduces the number of times the branch
5496 around the constant pool table will be hit. Putting it before
5497 a jump makes it more likely that the bra delay slot will be
5498 filled. */
5499 while (NOTE_P (from) || JUMP_P (from)
5500 || LABEL_P (from))
5501 from = PREV_INSN (from);
5503 /* Make sure we do not split between a call and its corresponding
5504 CALL_ARG_LOCATION note. */
5505 if (CALL_P (from))
5507 rtx_insn *next = NEXT_INSN (from);
5508 if (next && NOTE_P (next)
5509 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5510 from = next;
5513 from = emit_jump_insn_after (gen_jump (label), from);
5514 JUMP_LABEL (from) = label;
5515 LABEL_NUSES (label) = 1;
5516 found_barrier = emit_barrier_after (from);
5517 emit_label_after (label, found_barrier);
5520 return found_barrier;
5523 /* If the instruction INSN is implemented by a special function, and we can
5524 positively find the register that is used to call the sfunc, and this
5525 register is not used anywhere else in this instruction - except as the
5526 destination of a set, return this register; else, return 0. */
5528 sfunc_uses_reg (rtx_insn *insn)
5530 int i;
5531 rtx pattern, part, reg_part, reg;
5533 if (!NONJUMP_INSN_P (insn))
5534 return NULL_RTX;
5535 pattern = PATTERN (insn);
5536 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5537 return NULL_RTX;
5539 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5541 part = XVECEXP (pattern, 0, i);
5542 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5543 reg_part = part;
5545 if (! reg_part)
5546 return NULL_RTX;
5547 reg = XEXP (reg_part, 0);
5548 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5550 part = XVECEXP (pattern, 0, i);
5551 if (part == reg_part || GET_CODE (part) == CLOBBER)
5552 continue;
5553 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5554 && REG_P (SET_DEST (part)))
5555 ? SET_SRC (part) : part)))
5556 return NULL_RTX;
5558 return reg;
5561 /* See if the only way in which INSN uses REG is by calling it, or by
5562 setting it while calling it. Set *SET to a SET rtx if the register
5563 is set by INSN. */
5564 static bool
5565 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5567 rtx pattern, reg2;
5569 *set = NULL_RTX;
5571 reg2 = sfunc_uses_reg (insn);
5572 if (reg2 && REGNO (reg2) == REGNO (reg))
5574 pattern = single_set (insn);
5575 if (pattern
5576 && REG_P (SET_DEST (pattern))
5577 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5578 *set = pattern;
5579 return false;
5581 if (!CALL_P (insn))
5583 /* We don't use rtx_equal_p because we don't care if the mode is
5584 different. */
5585 pattern = single_set (insn);
5586 if (pattern
5587 && REG_P (SET_DEST (pattern))
5588 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5590 rtx par, part;
5591 int i;
5593 *set = pattern;
5594 par = PATTERN (insn);
5595 if (GET_CODE (par) == PARALLEL)
5596 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5598 part = XVECEXP (par, 0, i);
5599 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5600 return true;
5602 return reg_mentioned_p (reg, SET_SRC (pattern));
5605 return true;
5608 pattern = PATTERN (insn);
5610 if (GET_CODE (pattern) == PARALLEL)
5612 int i;
5614 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5615 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5616 return true;
5617 pattern = XVECEXP (pattern, 0, 0);
5620 if (GET_CODE (pattern) == SET)
5622 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5624 /* We don't use rtx_equal_p, because we don't care if the
5625 mode is different. */
5626 if (!REG_P (SET_DEST (pattern))
5627 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5628 return true;
5630 *set = pattern;
5633 pattern = SET_SRC (pattern);
5636 if (GET_CODE (pattern) != CALL
5637 || !MEM_P (XEXP (pattern, 0))
5638 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5639 return true;
5641 return false;
5644 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5645 general registers. Bits 0..15 mean that the respective registers
5646 are used as inputs in the instruction. Bits 16..31 mean that the
5647 registers 0..15, respectively, are used as outputs, or are clobbered.
5648 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5650 regs_used (rtx x, int is_dest)
5652 enum rtx_code code;
5653 const char *fmt;
5654 int i, used = 0;
5656 if (! x)
5657 return used;
5658 code = GET_CODE (x);
5659 switch (code)
5661 case REG:
5662 if (REGNO (x) < 16)
5663 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5664 << (REGNO (x) + is_dest));
5665 return 0;
5666 case SUBREG:
5668 rtx y = SUBREG_REG (x);
5670 if (!REG_P (y))
5671 break;
5672 if (REGNO (y) < 16)
5673 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5674 << (REGNO (y) +
5675 subreg_regno_offset (REGNO (y),
5676 GET_MODE (y),
5677 SUBREG_BYTE (x),
5678 GET_MODE (x)) + is_dest));
5679 return 0;
5681 case SET:
5682 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5683 case RETURN:
5684 /* If there was a return value, it must have been indicated with USE. */
5685 return 0x00ffff00;
5686 case CLOBBER:
5687 is_dest = 1;
5688 break;
5689 case MEM:
5690 is_dest = 0;
5691 break;
5692 case CALL:
5693 used |= 0x00ff00f0;
5694 break;
5695 default:
5696 break;
5699 fmt = GET_RTX_FORMAT (code);
5701 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5703 if (fmt[i] == 'E')
5705 int j;
5706 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5707 used |= regs_used (XVECEXP (x, i, j), is_dest);
5709 else if (fmt[i] == 'e')
5710 used |= regs_used (XEXP (x, i), is_dest);
5712 return used;
5715 /* Create an instruction that prevents redirection of a conditional branch
5716 to the destination of the JUMP with address ADDR.
5717 If the branch needs to be implemented as an indirect jump, try to find
5718 a scratch register for it.
5719 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5720 If any preceding insn that doesn't fit into a delay slot is good enough,
5721 pass 1. Pass 2 if a definite blocking insn is needed.
5722 -1 is used internally to avoid deep recursion.
5723 If a blocking instruction is made or recognized, return it. */
5724 static rtx_insn *
5725 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5727 int dead = 0;
5728 rtx_insn *prev = prev_nonnote_insn (jump);
5729 rtx dest;
5731 /* First, check if we already have an instruction that satisfies our need. */
5732 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5734 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5735 return prev;
5736 if (GET_CODE (PATTERN (prev)) == USE
5737 || GET_CODE (PATTERN (prev)) == CLOBBER
5738 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5739 prev = jump;
5740 else if ((need_block &= ~1) < 0)
5741 return prev;
5742 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5743 need_block = 0;
5745 if (GET_CODE (PATTERN (jump)) == RETURN)
5747 if (! need_block)
5748 return prev;
5749 /* Reorg even does nasty things with return insns that cause branches
5750 to go out of range - see find_end_label and callers. */
5751 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5753 /* We can't use JUMP_LABEL here because it might be undefined
5754 when not optimizing. */
5755 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5756 /* If the branch is out of range, try to find a scratch register for it. */
5757 if (optimize
5758 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5759 > 4092 + 4098))
5761 rtx_insn *scan;
5762 /* Don't look for the stack pointer as a scratch register,
5763 it would cause trouble if an interrupt occurred. */
5764 unsigned attempt = 0x7fff, used;
5765 int jump_left = flag_expensive_optimizations + 1;
5767 /* It is likely that the most recent eligible instruction is wanted for
5768 the delay slot. Therefore, find out which registers it uses, and
5769 try to avoid using them. */
5771 for (scan = jump; (scan = PREV_INSN (scan)); )
5773 enum rtx_code code;
5775 if (scan->deleted ())
5776 continue;
5777 code = GET_CODE (scan);
5778 if (code == CODE_LABEL || code == JUMP_INSN)
5779 break;
5780 if (code == INSN
5781 && GET_CODE (PATTERN (scan)) != USE
5782 && GET_CODE (PATTERN (scan)) != CLOBBER
5783 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5785 attempt &= ~regs_used (PATTERN (scan), 0);
5786 break;
5789 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5790 (scan = NEXT_INSN (scan)); )
5792 enum rtx_code code;
5794 if (scan->deleted ())
5795 continue;
5796 code = GET_CODE (scan);
5797 if (INSN_P (scan))
5799 used |= regs_used (PATTERN (scan), 0);
5800 if (code == CALL_INSN)
5801 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5802 dead |= (used >> 16) & ~used;
5803 if (dead & attempt)
5805 dead &= attempt;
5806 break;
5808 if (code == JUMP_INSN)
5810 if (jump_left-- && simplejump_p (scan))
5811 scan = JUMP_LABEL_AS_INSN (scan);
5812 else
5813 break;
5817 /* Mask out the stack pointer again, in case it was
5818 the only 'free' register we have found. */
5819 dead &= 0x7fff;
5821 /* If the immediate destination is still in range, check for possible
5822 threading with a jump beyond the delay slot insn.
5823 Don't check if we are called recursively; the jump has been or will be
5824 checked in a different invocation then. */
5826 else if (optimize && need_block >= 0)
5828 rtx_insn *next = next_active_insn (next_active_insn (dest));
5829 if (next && JUMP_P (next)
5830 && GET_CODE (PATTERN (next)) == SET
5831 && recog_memoized (next) == CODE_FOR_jump_compact)
5833 dest = JUMP_LABEL (next);
5834 if (dest
5835 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5836 > 4092 + 4098))
5837 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5841 if (dead)
5843 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5845 /* It would be nice if we could convert the jump into an indirect
5846 jump / far branch right now, and thus exposing all constituent
5847 instructions to further optimization. However, reorg uses
5848 simplejump_p to determine if there is an unconditional jump where
5849 it should try to schedule instructions from the target of the
5850 branch; simplejump_p fails for indirect jumps even if they have
5851 a JUMP_LABEL. */
5852 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5853 (reg, GEN_INT (unspec_bbr_uid++)),
5854 jump);
5855 /* ??? We would like this to have the scope of the jump, but that
5856 scope will change when a delay slot insn of an inner scope is added.
5857 Hence, after delay slot scheduling, we'll have to expect
5858 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5859 the jump. */
5861 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5862 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5863 return insn;
5865 else if (need_block)
5866 /* We can't use JUMP_LABEL here because it might be undefined
5867 when not optimizing. */
5868 return emit_insn_before (gen_block_branch_redirect
5869 (GEN_INT (unspec_bbr_uid++)),
5870 jump);
5871 return prev;
5874 #define CONDJUMP_MIN -252
5875 #define CONDJUMP_MAX 262
5876 struct far_branch
5878 /* A label (to be placed) in front of the jump
5879 that jumps to our ultimate destination. */
5880 rtx_insn *near_label;
5881 /* Where we are going to insert it if we cannot move the jump any farther,
5882 or the jump itself if we have picked up an existing jump. */
5883 rtx_insn *insert_place;
5884 /* The ultimate destination. */
5885 rtx_insn *far_label;
5886 struct far_branch *prev;
5887 /* If the branch has already been created, its address;
5888 else the address of its first prospective user. */
5889 int address;
5892 static void gen_far_branch (struct far_branch *);
5893 enum mdep_reorg_phase_e mdep_reorg_phase;
5894 static void
5895 gen_far_branch (struct far_branch *bp)
5897 rtx_insn *insn = bp->insert_place;
5898 rtx_jump_insn *jump;
5899 rtx_code_label *label = gen_label_rtx ();
5900 int ok;
5902 emit_label_after (label, insn);
5903 if (bp->far_label)
5905 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5906 LABEL_NUSES (bp->far_label)++;
5908 else
5909 jump = emit_jump_insn_after (gen_return (), insn);
5911 /* Emit a barrier so that reorg knows that any following instructions
5912 are not reachable via a fall-through path.
5913 But don't do this when not optimizing, since we wouldn't suppress the
5914 alignment for the barrier then, and could end up with out-of-range
5915 pc-relative loads. */
5916 if (optimize)
5917 emit_barrier_after (jump);
5918 emit_label_after (bp->near_label, insn);
5920 if (bp->far_label)
5921 JUMP_LABEL (jump) = bp->far_label;
5922 else
5924 rtx pat = PATTERN (jump);
5925 gcc_assert (ANY_RETURN_P (pat));
5926 JUMP_LABEL (jump) = pat;
5929 ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5930 gcc_assert (ok);
5932 /* If we are branching around a jump (rather than a return), prevent
5933 reorg from using an insn from the jump target as the delay slot insn -
5934 when reorg did this, it pessimized code (we rather hide the delay slot)
5935 and it could cause branches to go out of range. */
5936 if (bp->far_label)
5937 (emit_insn_after
5938 (gen_stuff_delay_slot
5939 (GEN_INT (unspec_bbr_uid++),
5940 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5941 insn));
5942 /* Prevent reorg from undoing our splits. */
5943 gen_block_redirect (jump, bp->address += 2, 2);
5946 /* Fix up ADDR_DIFF_VECs. */
5947 void
5948 fixup_addr_diff_vecs (rtx_insn *first)
5950 rtx_insn *insn;
5952 for (insn = first; insn; insn = NEXT_INSN (insn))
5954 rtx vec_lab, pat, prevpat, x, braf_label;
5955 rtx_insn *prev;
5957 if (! JUMP_TABLE_DATA_P (insn)
5958 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5959 continue;
5960 pat = PATTERN (insn);
5961 vec_lab = XEXP (XEXP (pat, 0), 0);
5963 /* Search the matching casesi_jump_2. */
5964 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5966 if (!JUMP_P (prev))
5967 continue;
5968 prevpat = PATTERN (prev);
5969 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5970 continue;
5971 x = XVECEXP (prevpat, 0, 1);
5972 if (GET_CODE (x) != USE)
5973 continue;
5974 x = XEXP (x, 0);
5975 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5976 break;
5978 /* FIXME: This is a bug in the optimizer, but it seems harmless
5979 to just avoid panicing. */
5980 if (!prev)
5981 continue;
5983 /* Emit the reference label of the braf where it belongs, right after
5984 the casesi_jump_2 (i.e. braf). */
5985 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5986 emit_label_after (braf_label, prev);
5988 /* Fix up the ADDR_DIF_VEC to be relative
5989 to the reference address of the braf. */
5990 XEXP (XEXP (pat, 0), 0) = braf_label;
5994 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5995 a barrier. Return the base 2 logarithm of the desired alignment. */
5997 barrier_align (rtx_insn *barrier_or_label)
5999 rtx next, pat;
6001 if (! barrier_or_label)
6002 return 0;
6004 if (LABEL_P (barrier_or_label)
6005 && NEXT_INSN (barrier_or_label)
6006 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
6007 return 2;
6009 if (BARRIER_P (barrier_or_label)
6010 && PREV_INSN (barrier_or_label)
6011 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
6013 pat = PATTERN (PREV_INSN (barrier_or_label));
6014 /* If this is a very small table, we want to keep the alignment after
6015 the table to the minimum for proper code alignment. */
6016 return ((optimize_size
6017 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
6018 <= (unsigned) 1 << (CACHE_LOG - 2)))
6019 ? 1 << TARGET_SHMEDIA : align_jumps_log);
6022 next = next_active_insn (barrier_or_label);
6024 if (! next)
6025 return 0;
6027 pat = PATTERN (next);
6029 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
6030 /* This is a barrier in front of a constant table. */
6031 return 0;
6033 if (optimize_size)
6034 return 0;
6036 if (! TARGET_SH2 || ! optimize)
6037 return align_jumps_log;
6039 /* When fixing up pcloads, a constant table might be inserted just before
6040 the basic block that ends with the barrier. Thus, we can't trust the
6041 instruction lengths before that. */
6042 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
6044 /* Check if there is an immediately preceding branch to the insn beyond
6045 the barrier. We must weight the cost of discarding useful information
6046 from the current cache line when executing this branch and there is
6047 an alignment, against that of fetching unneeded insn in front of the
6048 branch target when there is no alignment. */
6050 /* There are two delay_slot cases to consider. One is the simple case
6051 where the preceding branch is to the insn beyond the barrier (simple
6052 delay slot filling), and the other is where the preceding branch has
6053 a delay slot that is a duplicate of the insn after the barrier
6054 (fill_eager_delay_slots) and the branch is to the insn after the insn
6055 after the barrier. */
6057 int slot, credit;
6058 bool jump_to_next = false;
6060 /* Skip to the insn before the JUMP_INSN before the barrier under
6061 investigation. */
6062 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
6064 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
6065 credit >= 0 && prev && NONJUMP_INSN_P (prev);
6066 prev = prev_real_insn (prev))
6068 jump_to_next = false;
6069 if (GET_CODE (PATTERN (prev)) == USE
6070 || GET_CODE (PATTERN (prev)) == CLOBBER)
6071 continue;
6072 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
6074 prev = prev_seq->insn (1);
6075 if (INSN_UID (prev) == INSN_UID (next))
6077 /* Delay slot was filled with insn at jump target. */
6078 jump_to_next = true;
6079 continue;
6083 if (slot &&
6084 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
6085 slot = 0;
6086 credit -= get_attr_length (prev);
6088 if (prev && jump_to_label_p (prev))
6090 rtx_insn *x;
6091 if (jump_to_next
6092 || next_real_insn (JUMP_LABEL (prev)) == next
6093 /* If relax_delay_slots() decides NEXT was redundant
6094 with some previous instruction, it will have
6095 redirected PREV's jump to the following insn. */
6096 || JUMP_LABEL (prev) == next_nonnote_insn (next)
6097 /* There is no upper bound on redundant instructions
6098 that might have been skipped, but we must not put an
6099 alignment where none had been before. */
6100 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
6101 (INSN_P (x)
6102 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
6103 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
6104 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
6106 rtx pat = PATTERN (prev);
6107 if (GET_CODE (pat) == PARALLEL)
6108 pat = XVECEXP (pat, 0, 0);
6109 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
6110 return 0;
6115 return align_jumps_log;
6118 /* If we are inside a phony loop, almost any kind of label can turn up as the
6119 first one in the loop. Aligning a braf label causes incorrect switch
6120 destination addresses; we can detect braf labels because they are
6121 followed by a BARRIER.
6122 Applying loop alignment to small constant or switch tables is a waste
6123 of space, so we suppress this too. */
6125 sh_loop_align (rtx_insn *label)
6127 rtx_insn *next = label;
6129 if (! optimize || optimize_size)
6130 return 0;
6133 next = next_nonnote_insn (next);
6134 while (next && LABEL_P (next));
6136 if (! next
6137 || ! INSN_P (next)
6138 || recog_memoized (next) == CODE_FOR_consttable_2)
6139 return 0;
6141 return align_loops_log;
6144 /* Do a final pass over the function, just before delayed branch
6145 scheduling. */
6146 static void
6147 sh_reorg (void)
6149 rtx_insn *first, *insn, *mova = NULL;
6150 int num_mova;
6151 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
6152 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
6154 first = get_insns ();
6155 max_labelno_before_reorg = max_label_num ();
6157 /* We must split call insns before introducing `mova's. If we're
6158 optimizing, they'll have already been split. Otherwise, make
6159 sure we don't split them too late. */
6160 if (! optimize)
6161 split_all_insns_noflow ();
6163 if (TARGET_SHMEDIA)
6164 return;
6166 /* If relaxing, generate pseudo-ops to associate function calls with
6167 the symbols they call. It does no harm to not generate these
6168 pseudo-ops. However, when we can generate them, it enables the
6169 linker to potentially relax the jsr to a bsr, and eliminate the
6170 register load and, possibly, the constant pool entry. */
6172 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6173 if (TARGET_RELAX)
6175 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6176 own purposes. This works because none of the remaining passes
6177 need to look at them.
6179 ??? But it may break in the future. We should use a machine
6180 dependent REG_NOTE, or some other approach entirely. */
6181 for (insn = first; insn; insn = NEXT_INSN (insn))
6183 if (INSN_P (insn))
6185 rtx note;
6187 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6188 NULL_RTX)) != 0)
6189 remove_note (insn, note);
6193 for (insn = first; insn; insn = NEXT_INSN (insn))
6195 rtx pattern, reg, set, dies;
6196 rtx_code_label *label;
6197 rtx_insn *link, *scan;
6198 int rescan = 0, foundinsn = 0;
6200 if (CALL_P (insn))
6202 pattern = PATTERN (insn);
6204 if (GET_CODE (pattern) == PARALLEL)
6205 pattern = XVECEXP (pattern, 0, 0);
6206 if (GET_CODE (pattern) == SET)
6207 pattern = SET_SRC (pattern);
6209 if (GET_CODE (pattern) != CALL
6210 || !MEM_P (XEXP (pattern, 0)))
6211 continue;
6213 reg = XEXP (XEXP (pattern, 0), 0);
6215 else
6217 reg = sfunc_uses_reg (insn);
6218 if (! reg)
6219 continue;
6222 if (!REG_P (reg))
6223 continue;
6225 /* Try scanning backward to find where the register is set. */
6226 link = NULL;
6227 for (scan = PREV_INSN (insn);
6228 scan && !LABEL_P (scan);
6229 scan = PREV_INSN (scan))
6231 if (! INSN_P (scan))
6232 continue;
6234 if (! reg_mentioned_p (reg, scan))
6235 continue;
6237 if (noncall_uses_reg (reg, scan, &set))
6238 break;
6240 if (set)
6242 link = scan;
6243 break;
6247 if (! link)
6248 continue;
6250 /* The register is set at LINK. */
6252 /* We can only optimize the function call if the register is
6253 being set to a symbol. In theory, we could sometimes
6254 optimize calls to a constant location, but the assembler
6255 and linker do not support that at present. */
6256 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6257 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6258 continue;
6260 /* Scan forward from LINK to the place where REG dies, and
6261 make sure that the only insns which use REG are
6262 themselves function calls. */
6264 /* ??? This doesn't work for call targets that were allocated
6265 by reload, since there may not be a REG_DEAD note for the
6266 register. */
6268 dies = NULL_RTX;
6269 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6271 rtx scanset;
6273 /* Don't try to trace forward past a CODE_LABEL if we haven't
6274 seen INSN yet. Ordinarily, we will only find the setting insn
6275 if it is in the same basic block. However,
6276 cross-jumping can insert code labels in between the load and
6277 the call, and can result in situations where a single call
6278 insn may have two targets depending on where we came from. */
6280 if (LABEL_P (scan) && ! foundinsn)
6281 break;
6283 if (! INSN_P (scan))
6284 continue;
6286 /* Don't try to trace forward past a JUMP. To optimize
6287 safely, we would have to check that all the
6288 instructions at the jump destination did not use REG. */
6290 if (JUMP_P (scan))
6291 break;
6293 if (! reg_mentioned_p (reg, scan))
6294 continue;
6296 if (noncall_uses_reg (reg, scan, &scanset))
6297 break;
6299 if (scan == insn)
6300 foundinsn = 1;
6302 if (scan != insn
6303 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6305 /* There is a function call to this register other
6306 than the one we are checking. If we optimize
6307 this call, we need to rescan again below. */
6308 rescan = 1;
6311 /* ??? We shouldn't have to worry about SCANSET here.
6312 We should just be able to check for a REG_DEAD note
6313 on a function call. However, the REG_DEAD notes are
6314 apparently not dependable around libcalls; c-torture
6315 execute/920501-2 is a test case. If SCANSET is set,
6316 then this insn sets the register, so it must have
6317 died earlier. Unfortunately, this will only handle
6318 the cases in which the register is, in fact, set in a
6319 later insn. */
6321 /* ??? We shouldn't have to use FOUNDINSN here.
6322 This dates back to when we used LOG_LINKS to find
6323 the most recent insn which sets the register. */
6325 if (foundinsn
6326 && (scanset
6327 || find_reg_note (scan, REG_DEAD, reg)))
6329 dies = scan;
6330 break;
6334 if (! dies)
6336 /* Either there was a branch, or some insn used REG
6337 other than as a function call address. */
6338 continue;
6341 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6342 on the insn which sets the register, and on each call insn
6343 which uses the register. In final_prescan_insn we look for
6344 the REG_LABEL_OPERAND notes, and output the appropriate label
6345 or pseudo-op. */
6347 label = gen_label_rtx ();
6348 add_reg_note (link, REG_LABEL_OPERAND, label);
6349 add_reg_note (insn, REG_LABEL_OPERAND, label);
6350 if (rescan)
6352 scan = link;
6355 rtx reg2;
6357 scan = NEXT_INSN (scan);
6358 if (scan != insn
6359 && ((CALL_P (scan)
6360 && reg_mentioned_p (reg, scan))
6361 || ((reg2 = sfunc_uses_reg (scan))
6362 && REGNO (reg2) == REGNO (reg))))
6363 add_reg_note (scan, REG_LABEL_OPERAND, label);
6365 while (scan != dies);
6370 if (TARGET_SH2)
6371 fixup_addr_diff_vecs (first);
6373 if (optimize)
6375 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6376 shorten_branches (first);
6379 /* Scan the function looking for move instructions which have to be
6380 changed to pc-relative loads and insert the literal tables. */
6381 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6382 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6384 if (mova_p (insn))
6386 /* ??? basic block reordering can move a switch table dispatch
6387 below the switch table. Check if that has happened.
6388 We only have the addresses available when optimizing; but then,
6389 this check shouldn't be needed when not optimizing. */
6390 if (!untangle_mova (&num_mova, &mova, insn))
6392 insn = mova;
6393 num_mova = 0;
6396 else if (JUMP_TABLE_DATA_P (insn)
6397 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6398 && num_mova
6399 /* ??? loop invariant motion can also move a mova out of a
6400 loop. Since loop does this code motion anyway, maybe we
6401 should wrap UNSPEC_MOVA into a CONST, so that reload can
6402 move it back. */
6403 && ((num_mova > 1
6404 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6405 || (prev_nonnote_insn (insn)
6406 == XEXP (MOVA_LABELREF (mova), 0))))
6408 rtx_insn *scan;
6409 int total;
6411 num_mova--;
6413 /* Some code might have been inserted between the mova and
6414 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6415 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6416 total += get_attr_length (scan);
6418 /* range of mova is 1020, add 4 because pc counts from address of
6419 second instruction after this one, subtract 2 in case pc is 2
6420 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6421 cancels out with alignment effects of the mova itself. */
6422 if (total > 1022)
6424 /* Change the mova into a load, and restart scanning
6425 there. broken_move will then return true for mova. */
6426 fixup_mova (mova);
6427 insn = mova;
6430 if (broken_move (insn)
6431 || (NONJUMP_INSN_P (insn)
6432 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6434 rtx_insn *scan;
6435 /* Scan ahead looking for a barrier to stick the constant table
6436 behind. */
6437 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6438 rtx_insn *last_float_move = NULL;
6439 rtx last_float = 0, *last_float_addr = NULL;
6440 int need_aligned_label = 0;
6442 if (num_mova && ! mova_p (mova))
6444 /* find_barrier had to change the first mova into a
6445 pcload; thus, we have to start with this new pcload. */
6446 insn = mova;
6447 num_mova = 0;
6449 /* Now find all the moves between the points and modify them. */
6450 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6452 if (LABEL_P (scan))
6453 last_float = 0;
6454 if (NONJUMP_INSN_P (scan)
6455 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6456 need_aligned_label = 1;
6457 if (broken_move (scan))
6459 rtx *patp = &PATTERN (scan), pat = *patp;
6460 rtx src, dst;
6461 rtx lab;
6462 rtx newsrc;
6463 machine_mode mode;
6465 if (GET_CODE (pat) == PARALLEL)
6466 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6467 src = SET_SRC (pat);
6468 dst = SET_DEST (pat);
6469 mode = GET_MODE (dst);
6471 if (mode == SImode && satisfies_constraint_I16 (src)
6472 && REGNO (dst) != FPUL_REG)
6474 int offset = 0;
6476 mode = HImode;
6477 while (GET_CODE (dst) == SUBREG)
6479 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6480 GET_MODE (SUBREG_REG (dst)),
6481 SUBREG_BYTE (dst),
6482 GET_MODE (dst));
6483 dst = SUBREG_REG (dst);
6485 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6487 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6489 /* This must be an insn that clobbers r0. */
6490 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6491 XVECLEN (PATTERN (scan), 0)
6492 - 1);
6493 rtx clobber = *clobberp;
6495 gcc_assert (GET_CODE (clobber) == CLOBBER
6496 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6498 if (last_float
6499 && reg_set_between_p (r0_rtx, last_float_move, scan))
6500 last_float = 0;
6501 if (last_float
6502 && TARGET_SHCOMPACT
6503 && GET_MODE_SIZE (mode) != 4
6504 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6505 last_float = 0;
6506 lab = add_constant (src, mode, last_float);
6507 if (lab)
6508 emit_insn_before (gen_mova (lab), scan);
6509 else
6511 /* There will be a REG_UNUSED note for r0 on
6512 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6513 lest reorg:mark_target_live_regs will not
6514 consider r0 to be used, and we end up with delay
6515 slot insn in front of SCAN that clobbers r0. */
6516 rtx note
6517 = find_regno_note (last_float_move, REG_UNUSED, 0);
6519 /* If we are not optimizing, then there may not be
6520 a note. */
6521 if (note)
6522 PUT_REG_NOTE_KIND (note, REG_INC);
6524 *last_float_addr = r0_inc_rtx;
6526 last_float_move = scan;
6527 last_float = src;
6528 newsrc = gen_const_mem (mode,
6529 (((TARGET_SH4 && ! TARGET_FMOVD)
6530 || REGNO (dst) == FPUL_REG)
6531 ? r0_inc_rtx
6532 : r0_rtx));
6533 last_float_addr = &XEXP (newsrc, 0);
6535 /* Remove the clobber of r0. */
6536 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6537 gen_rtx_SCRATCH (Pmode));
6539 /* This is a mova needing a label. Create it. */
6540 else if (GET_CODE (src) == UNSPEC
6541 && XINT (src, 1) == UNSPEC_MOVA
6542 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6544 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6545 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6546 newsrc = gen_rtx_UNSPEC (SImode,
6547 gen_rtvec (1, newsrc),
6548 UNSPEC_MOVA);
6550 else if (GET_CODE (src) == UNSPEC_VOLATILE
6551 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6553 newsrc = XVECEXP (src, 0, 0);
6554 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6555 INSN_CODE (scan) = -1;
6556 continue;
6558 else
6560 lab = add_constant (src, mode, 0);
6561 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6562 newsrc = gen_const_mem (mode, newsrc);
6564 *patp = gen_rtx_SET (dst, newsrc);
6565 INSN_CODE (scan) = -1;
6568 dump_table (need_aligned_label ? insn : 0, barrier);
6569 insn = barrier;
6572 label_ref_list_d::pool.release ();
6573 for (insn = first; insn; insn = NEXT_INSN (insn))
6574 PUT_MODE (insn, VOIDmode);
6576 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6577 INSN_ADDRESSES_FREE ();
6578 split_branches (first);
6580 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6581 also has an effect on the register that holds the address of the sfunc.
6582 Insert an extra dummy insn in front of each sfunc that pretends to
6583 use this register. */
6584 if (flag_delayed_branch)
6586 for (insn = first; insn; insn = NEXT_INSN (insn))
6588 rtx reg = sfunc_uses_reg (insn);
6590 if (! reg)
6591 continue;
6592 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6595 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6598 /* Return the UID of the insn that follows the specified label. */
6600 get_dest_uid (rtx label, int max_uid)
6602 rtx_insn *dest = next_real_insn (label);
6603 int dest_uid;
6604 if (! dest)
6605 /* This can happen for an undefined label. */
6606 return 0;
6607 dest_uid = INSN_UID (dest);
6608 /* If this is a newly created branch redirection blocking instruction,
6609 we cannot index the branch_uid or insn_addresses arrays with its
6610 uid. But then, we won't need to, because the actual destination is
6611 the following branch. */
6612 while (dest_uid >= max_uid)
6614 dest = NEXT_INSN (dest);
6615 dest_uid = INSN_UID (dest);
6617 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6618 return 0;
6619 return dest_uid;
6622 /* Split condbranches that are out of range. Also add clobbers for
6623 scratch registers that are needed in far jumps.
6624 We do this before delay slot scheduling, so that it can take our
6625 newly created instructions into account. It also allows us to
6626 find branches with common targets more easily. */
6627 static void
6628 split_branches (rtx_insn *first)
6630 rtx_insn *insn;
6631 struct far_branch **uid_branch, *far_branch_list = 0;
6632 int max_uid = get_max_uid ();
6633 int ok;
6635 /* Find out which branches are out of range. */
6636 shorten_branches (first);
6638 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6639 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6641 for (insn = first; insn; insn = NEXT_INSN (insn))
6642 if (! INSN_P (insn))
6643 continue;
6644 else if (insn->deleted ())
6646 /* Shorten_branches would split this instruction again,
6647 so transform it into a note. */
6648 SET_INSN_DELETED (insn);
6650 else if (JUMP_P (insn))
6652 enum attr_type type = get_attr_type (insn);
6653 if (type == TYPE_CBRANCH)
6655 rtx_insn *next, *beyond;
6657 if (get_attr_length (insn) > 4)
6659 rtx src = SET_SRC (PATTERN (insn));
6660 rtx olabel = XEXP (XEXP (src, 1), 0);
6661 int addr = INSN_ADDRESSES (INSN_UID (insn));
6662 rtx_insn *label = 0;
6663 int dest_uid = get_dest_uid (olabel, max_uid);
6664 struct far_branch *bp = uid_branch[dest_uid];
6666 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6667 the label if the LABEL_NUSES count drops to zero. There is
6668 always a jump_optimize pass that sets these values, but it
6669 proceeds to delete unreferenced code, and then if not
6670 optimizing, to un-delete the deleted instructions, thus
6671 leaving labels with too low uses counts. */
6672 if (! optimize)
6674 JUMP_LABEL (insn) = olabel;
6675 LABEL_NUSES (olabel)++;
6677 if (! bp)
6679 bp = (struct far_branch *) alloca (sizeof *bp);
6680 uid_branch[dest_uid] = bp;
6681 bp->prev = far_branch_list;
6682 far_branch_list = bp;
6683 bp->far_label = as_a <rtx_insn *> (
6684 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6685 0));
6686 LABEL_NUSES (bp->far_label)++;
6688 else
6690 label = bp->near_label;
6691 if (! label && bp->address - addr >= CONDJUMP_MIN)
6693 rtx_insn *block = bp->insert_place;
6695 if (GET_CODE (PATTERN (block)) == RETURN)
6696 block = PREV_INSN (block);
6697 else
6698 block = gen_block_redirect (block,
6699 bp->address, 2);
6700 label = emit_label_after (gen_label_rtx (),
6701 PREV_INSN (block));
6702 bp->near_label = label;
6704 else if (label && ! NEXT_INSN (label))
6706 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6707 bp->insert_place = insn;
6708 else
6709 gen_far_branch (bp);
6712 if (! label
6713 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6715 bp->near_label = label = gen_label_rtx ();
6716 bp->insert_place = insn;
6717 bp->address = addr;
6719 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6720 gcc_assert (ok);
6722 else
6724 /* get_attr_length (insn) == 2 */
6725 /* Check if we have a pattern where reorg wants to redirect
6726 the branch to a label from an unconditional branch that
6727 is too far away. */
6728 /* We can't use JUMP_LABEL here because it might be undefined
6729 when not optimizing. */
6730 /* A syntax error might cause beyond to be NULL_RTX. */
6731 beyond
6732 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6733 0));
6735 if (beyond
6736 && (JUMP_P (beyond)
6737 || ((beyond = next_active_insn (beyond))
6738 && JUMP_P (beyond)))
6739 && GET_CODE (PATTERN (beyond)) == SET
6740 && recog_memoized (beyond) == CODE_FOR_jump_compact
6741 && ((INSN_ADDRESSES
6742 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6743 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6744 > 252 + 258 + 2))
6745 gen_block_redirect (beyond,
6746 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6749 next = next_active_insn (insn);
6751 if (next
6752 && (JUMP_P (next)
6753 || ((next = next_active_insn (next))
6754 && JUMP_P (next)))
6755 && GET_CODE (PATTERN (next)) == SET
6756 && recog_memoized (next) == CODE_FOR_jump_compact
6757 && ((INSN_ADDRESSES
6758 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6759 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6760 > 252 + 258 + 2))
6761 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6763 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6765 int addr = INSN_ADDRESSES (INSN_UID (insn));
6766 rtx_insn *far_label = 0;
6767 int dest_uid = 0;
6768 struct far_branch *bp;
6770 if (type == TYPE_JUMP)
6772 if (CROSSING_JUMP_P (insn))
6774 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6775 insn);
6776 continue;
6779 far_label = as_a <rtx_insn *> (
6780 XEXP (SET_SRC (PATTERN (insn)), 0));
6781 dest_uid = get_dest_uid (far_label, max_uid);
6782 if (! dest_uid)
6784 /* Parse errors can lead to labels outside
6785 the insn stream. */
6786 if (! NEXT_INSN (far_label))
6787 continue;
6789 if (! optimize)
6791 JUMP_LABEL (insn) = far_label;
6792 LABEL_NUSES (far_label)++;
6794 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6795 far_label = 0;
6798 bp = uid_branch[dest_uid];
6799 if (! bp)
6801 bp = (struct far_branch *) alloca (sizeof *bp);
6802 uid_branch[dest_uid] = bp;
6803 bp->prev = far_branch_list;
6804 far_branch_list = bp;
6805 bp->near_label = 0;
6806 bp->far_label = far_label;
6807 if (far_label)
6808 LABEL_NUSES (far_label)++;
6810 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6811 if (addr - bp->address <= CONDJUMP_MAX)
6812 emit_label_after (bp->near_label, PREV_INSN (insn));
6813 else
6815 gen_far_branch (bp);
6816 bp->near_label = 0;
6818 else
6819 bp->near_label = 0;
6820 bp->address = addr;
6821 bp->insert_place = insn;
6822 if (! far_label)
6823 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6824 else
6825 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6828 /* Generate all pending far branches,
6829 and free our references to the far labels. */
6830 while (far_branch_list)
6832 if (far_branch_list->near_label
6833 && ! NEXT_INSN (far_branch_list->near_label))
6834 gen_far_branch (far_branch_list);
6835 if (optimize
6836 && far_branch_list->far_label
6837 && ! --LABEL_NUSES (far_branch_list->far_label))
6838 delete_insn (far_branch_list->far_label);
6839 far_branch_list = far_branch_list->prev;
6842 /* Instruction length information is no longer valid due to the new
6843 instructions that have been generated. */
6844 init_insn_lengths ();
6847 /* Dump out instruction addresses, which is useful for debugging the
6848 constant pool table stuff.
6850 If relaxing, output the label and pseudo-ops used to link together
6851 calls and the instruction which set the registers.
6853 ??? The addresses printed by this routine for insns are nonsense for
6854 insns which are inside of a sequence where none of the inner insns have
6855 variable length. This is because the second pass of shorten_branches
6856 does not bother to update them. */
6857 void
6858 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6859 int noperands ATTRIBUTE_UNUSED)
6861 if (TARGET_DUMPISIZE)
6862 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6864 if (TARGET_RELAX)
6866 rtx note;
6868 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6869 if (note)
6871 rtx pattern;
6873 pattern = PATTERN (insn);
6874 if (GET_CODE (pattern) == PARALLEL)
6875 pattern = XVECEXP (pattern, 0, 0);
6876 switch (GET_CODE (pattern))
6878 case SET:
6879 if (GET_CODE (SET_SRC (pattern)) != CALL
6880 && get_attr_type (insn) != TYPE_SFUNC)
6882 targetm.asm_out.internal_label
6883 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6884 break;
6886 /* else FALLTHROUGH */
6887 case CALL:
6888 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6889 CODE_LABEL_NUMBER (XEXP (note, 0)));
6890 break;
6892 default:
6893 gcc_unreachable ();
6899 /* Dump out any constants accumulated in the final pass. These will
6900 only be labels. */
6901 const char *
6902 output_jump_label_table (void)
6904 int i;
6906 if (pool_size)
6908 fprintf (asm_out_file, "\t.align 2\n");
6909 for (i = 0; i < pool_size; i++)
6911 pool_node *p = &pool_vector[i];
6913 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6914 CODE_LABEL_NUMBER (p->label));
6915 output_asm_insn (".long %O0", &p->value);
6917 pool_size = 0;
6920 return "";
6923 /* A full frame looks like:
6925 arg-5
6926 arg-4
6927 [ if current_function_anonymous_args
6928 arg-3
6929 arg-2
6930 arg-1
6931 arg-0 ]
6932 saved-fp
6933 saved-r10
6934 saved-r11
6935 saved-r12
6936 saved-pr
6937 local-n
6939 local-1
6940 local-0 <- fp points here.
6942 Number of bytes pushed for anonymous args, used to pass information
6943 between expand_prologue and expand_epilogue.
6945 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6946 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6947 for an epilogue and a negative value means that it's for a sibcall
6948 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6949 all the registers that are about to be restored, and hence dead. */
6950 static void
6951 output_stack_adjust (int size, rtx reg, int epilogue_p,
6952 HARD_REG_SET *live_regs_mask, bool frame_p)
6954 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6955 if (size)
6957 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6959 /* This test is bogus, as output_stack_adjust is used to re-align the
6960 stack. */
6961 #if 0
6962 gcc_assert (!(size % align));
6963 #endif
6965 if (CONST_OK_FOR_ADD (size))
6966 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6967 /* Try to do it with two partial adjustments; however, we must make
6968 sure that the stack is properly aligned at all times, in case
6969 an interrupt occurs between the two partial adjustments. */
6970 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6971 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6973 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6974 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6976 else
6978 rtx const_reg;
6979 rtx insn;
6980 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6981 int i;
6983 /* If TEMP is invalid, we could temporarily save a general
6984 register to MACL. However, there is currently no need
6985 to handle this case, so just die when we see it. */
6986 if (epilogue_p < 0
6987 || current_function_interrupt
6988 || ! call_really_used_regs[temp] || fixed_regs[temp])
6989 temp = -1;
6990 if (temp < 0 && ! current_function_interrupt
6991 && (TARGET_SHMEDIA || epilogue_p >= 0))
6993 HARD_REG_SET temps;
6994 COPY_HARD_REG_SET (temps, call_used_reg_set);
6995 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6996 if (epilogue_p > 0)
6998 int nreg = 0;
6999 if (crtl->return_rtx)
7001 machine_mode mode;
7002 mode = GET_MODE (crtl->return_rtx);
7003 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
7004 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
7006 for (i = 0; i < nreg; i++)
7007 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
7008 if (crtl->calls_eh_return)
7010 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
7011 for (i = 0; i <= 3; i++)
7012 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
7015 if (TARGET_SHMEDIA && epilogue_p < 0)
7016 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
7017 CLEAR_HARD_REG_BIT (temps, i);
7018 if (epilogue_p <= 0)
7020 for (i = FIRST_PARM_REG;
7021 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
7022 CLEAR_HARD_REG_BIT (temps, i);
7023 if (cfun->static_chain_decl != NULL)
7024 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
7026 temp = scavenge_reg (&temps);
7028 if (temp < 0 && live_regs_mask)
7030 HARD_REG_SET temps;
7032 COPY_HARD_REG_SET (temps, *live_regs_mask);
7033 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
7034 temp = scavenge_reg (&temps);
7036 if (temp < 0)
7038 rtx adj_reg, tmp_reg, mem;
7040 /* If we reached here, the most likely case is the (sibcall)
7041 epilogue for non SHmedia. Put a special push/pop sequence
7042 for such case as the last resort. This looks lengthy but
7043 would not be problem because it seems to be very
7044 rare. */
7046 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
7049 /* ??? There is still the slight possibility that r4 or
7050 r5 have been reserved as fixed registers or assigned
7051 as global registers, and they change during an
7052 interrupt. There are possible ways to handle this:
7054 - If we are adjusting the frame pointer (r14), we can do
7055 with a single temp register and an ordinary push / pop
7056 on the stack.
7057 - Grab any call-used or call-saved registers (i.e. not
7058 fixed or globals) for the temps we need. We might
7059 also grab r14 if we are adjusting the stack pointer.
7060 If we can't find enough available registers, issue
7061 a diagnostic and die - the user must have reserved
7062 way too many registers.
7063 But since all this is rather unlikely to happen and
7064 would require extra testing, we just die if r4 / r5
7065 are not available. */
7066 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
7067 && !global_regs[4] && !global_regs[5]);
7069 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
7070 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
7071 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
7072 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
7073 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
7074 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7075 emit_move_insn (mem, tmp_reg);
7076 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
7077 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7078 emit_move_insn (mem, tmp_reg);
7079 emit_move_insn (reg, adj_reg);
7080 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7081 emit_move_insn (adj_reg, mem);
7082 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7083 emit_move_insn (tmp_reg, mem);
7084 /* Tell flow the insns that pop r4/r5 aren't dead. */
7085 emit_use (tmp_reg);
7086 emit_use (adj_reg);
7087 return;
7089 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
7091 /* If SIZE is negative, subtract the positive value.
7092 This sometimes allows a constant pool entry to be shared
7093 between prologue and epilogue code. */
7094 if (size < 0)
7096 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
7097 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
7099 else
7101 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
7102 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
7104 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7105 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
7106 GEN_INT (size))));
7111 /* Emit the specified insn and mark it as frame related.
7112 FIXME: Rename this to emit_frame_insn. */
7113 static rtx_insn *
7114 frame_insn (rtx x)
7116 rtx_insn *insn = emit_insn (x);
7117 RTX_FRAME_RELATED_P (insn) = 1;
7118 return insn;
7121 /* Output RTL to push register RN onto the stack. */
7122 static rtx
7123 push (int rn)
7125 rtx x;
7126 if (rn == FPUL_REG)
7127 x = gen_push_fpul ();
7128 else if (rn == FPSCR_REG)
7129 x = gen_push_fpscr ();
7130 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7131 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7133 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7134 return NULL_RTX;
7135 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
7137 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7138 x = gen_push_e (gen_rtx_REG (SFmode, rn));
7139 else
7140 x = gen_push (gen_rtx_REG (SImode, rn));
7142 x = frame_insn (x);
7143 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7144 return x;
7147 /* Output RTL to pop register RN from the stack. */
7148 static void
7149 pop (int rn)
7151 rtx x, sp_reg, reg;
7152 if (rn == FPUL_REG)
7153 x = gen_pop_fpul ();
7154 else if (rn == FPSCR_REG)
7155 x = gen_pop_fpscr ();
7156 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7157 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7159 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7160 return;
7161 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7163 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7164 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7165 else
7166 x = gen_pop (gen_rtx_REG (SImode, rn));
7168 x = emit_insn (x);
7170 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7171 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7172 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7173 : SET_DEST (PATTERN (x)));
7174 add_reg_note (x, REG_CFA_RESTORE, reg);
7175 add_reg_note (x, REG_CFA_ADJUST_CFA,
7176 gen_rtx_SET (sp_reg,
7177 plus_constant (SImode, sp_reg,
7178 GET_MODE_SIZE (GET_MODE (reg)))));
7179 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7180 RTX_FRAME_RELATED_P (x) = 1;
7183 /* Generate code to push the regs specified in the mask. */
7184 static void
7185 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7187 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7188 int skip_fpscr = 0;
7190 /* Push PR last; this gives better latencies after the prologue, and
7191 candidates for the return delay slot when there are no general
7192 registers pushed. */
7193 for (; i < FIRST_PSEUDO_REGISTER; i++)
7195 /* If this is an interrupt handler, and the SZ bit varies,
7196 and we have to push any floating point register, we need
7197 to switch to the correct precision first. */
7198 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7199 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7201 HARD_REG_SET unsaved;
7203 push (FPSCR_REG);
7204 COMPL_HARD_REG_SET (unsaved, *mask);
7205 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7206 skip_fpscr = 1;
7208 if (i != PR_REG
7209 && (i != FPSCR_REG || ! skip_fpscr)
7210 && TEST_HARD_REG_BIT (*mask, i))
7212 /* If the ISR has RESBANK attribute assigned, don't push any of
7213 the following registers - R0-R14, MACH, MACL and GBR. */
7214 if (! (sh_cfun_resbank_handler_p ()
7215 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7216 || i == MACH_REG
7217 || i == MACL_REG
7218 || i == GBR_REG)))
7219 push (i);
7223 /* Push banked registers last to improve delay slot opportunities. */
7224 if (interrupt_handler)
7226 bool use_movml = false;
7228 if (TARGET_SH2A)
7230 unsigned int count = 0;
7232 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7233 if (TEST_HARD_REG_BIT (*mask, i))
7234 count++;
7235 else
7236 break;
7238 /* Use movml when all banked registers are pushed. */
7239 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7240 use_movml = true;
7243 if (sh_cfun_resbank_handler_p ())
7244 ; /* Do nothing. */
7245 else if (use_movml)
7247 rtx x, mem, reg, set;
7248 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7250 /* We must avoid scheduling multiple store insn with another
7251 insns. */
7252 emit_insn (gen_blockage ());
7253 x = gen_movml_push_banked (sp_reg);
7254 x = frame_insn (x);
7255 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7257 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7258 reg = gen_rtx_REG (SImode, i);
7259 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7262 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
7263 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7264 emit_insn (gen_blockage ());
7266 else
7267 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7268 if (TEST_HARD_REG_BIT (*mask, i))
7269 push (i);
7272 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7273 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7274 push (PR_REG);
7277 /* Calculate how much extra space is needed to save all callee-saved
7278 target registers.
7279 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7280 static int
7281 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7283 int reg;
7284 int stack_space = 0;
7285 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7287 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7288 if ((! call_really_used_regs[reg] || interrupt_handler)
7289 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7290 /* Leave space to save this target register on the stack,
7291 in case target register allocation wants to use it. */
7292 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7293 return stack_space;
7296 /* Decide whether we should reserve space for callee-save target registers,
7297 in case target register allocation wants to use them. REGS_SAVED is
7298 the space, in bytes, that is already required for register saves.
7299 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7300 static int
7301 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7302 HARD_REG_SET *live_regs_mask)
7304 if (optimize_size)
7305 return 0;
7306 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7309 /* Decide how much space to reserve for callee-save target registers
7310 in case target register allocation wants to use them.
7311 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7312 static int
7313 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7315 if (shmedia_space_reserved_for_target_registers)
7316 return shmedia_target_regs_stack_space (live_regs_mask);
7317 else
7318 return 0;
7321 /* Work out the registers which need to be saved, both as a mask and a
7322 count of saved words. Return the count.
7324 If doing a pragma interrupt function, then push all regs used by the
7325 function, and if we call another function (we can tell by looking at PR),
7326 make sure that all the regs it clobbers are safe too. */
7327 static int
7328 calc_live_regs (HARD_REG_SET *live_regs_mask)
7330 unsigned int reg;
7331 int count;
7332 tree attrs;
7333 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7334 bool nosave_low_regs;
7335 int pr_live, has_call;
7337 attrs = DECL_ATTRIBUTES (current_function_decl);
7338 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7339 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7340 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7341 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7343 CLEAR_HARD_REG_SET (*live_regs_mask);
7344 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7345 && df_regs_ever_live_p (FPSCR_REG))
7346 target_flags &= ~MASK_FPU_SINGLE;
7347 /* If we can save a lot of saves by switching to double mode, do that. */
7348 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7349 && TARGET_FPU_SINGLE)
7350 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7351 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7352 && (! call_really_used_regs[reg]
7353 || interrupt_handler)
7354 && ++count > 2)
7356 target_flags &= ~MASK_FPU_SINGLE;
7357 break;
7359 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7360 knows how to use it. That means the pseudo originally allocated for
7361 the initial value can become the PR_MEDIA_REG hard register, as seen for
7362 execute/20010122-1.c:test9. */
7363 if (TARGET_SHMEDIA)
7364 /* ??? this function is called from initial_elimination_offset, hence we
7365 can't use the result of sh_media_register_for_return here. */
7366 pr_live = sh_pr_n_sets ();
7367 else
7369 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7370 pr_live = (pr_initial
7371 ? (!REG_P (pr_initial)
7372 || REGNO (pr_initial) != (PR_REG))
7373 : df_regs_ever_live_p (PR_REG));
7374 /* For Shcompact, if not optimizing, we end up with a memory reference
7375 using the return address pointer for __builtin_return_address even
7376 though there is no actual need to put the PR register on the stack. */
7377 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7379 /* Force PR to be live if the prologue has to call the SHmedia
7380 argument decoder or register saver. */
7381 if (TARGET_SHCOMPACT
7382 && ((crtl->args.info.call_cookie
7383 & ~ CALL_COOKIE_RET_TRAMP (1))
7384 || crtl->saves_all_registers))
7385 pr_live = 1;
7386 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7387 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7389 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7390 ? pr_live
7391 : interrupt_handler
7392 ? (/* Need to save all the regs ever live. */
7393 (df_regs_ever_live_p (reg)
7394 || (call_really_used_regs[reg]
7395 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7396 || reg == PIC_OFFSET_TABLE_REGNUM)
7397 && has_call)
7398 || (TARGET_SHMEDIA && has_call
7399 && REGISTER_NATURAL_MODE (reg) == SImode
7400 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7401 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7402 && reg != RETURN_ADDRESS_POINTER_REGNUM
7403 && reg != T_REG && reg != GBR_REG
7404 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7405 /* Push fpscr only on targets which have FPU */
7406 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7407 : (/* Only push those regs which are used and need to be saved. */
7408 (TARGET_SHCOMPACT
7409 && flag_pic
7410 && crtl->args.info.call_cookie
7411 && reg == PIC_OFFSET_TABLE_REGNUM)
7412 || (df_regs_ever_live_p (reg)
7413 && ((!call_really_used_regs[reg]
7414 && !(reg != PIC_OFFSET_TABLE_REGNUM
7415 && fixed_regs[reg] && call_used_regs[reg]))
7416 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7417 || (crtl->calls_eh_return
7418 && (reg == EH_RETURN_DATA_REGNO (0)
7419 || reg == EH_RETURN_DATA_REGNO (1)
7420 || reg == EH_RETURN_DATA_REGNO (2)
7421 || reg == EH_RETURN_DATA_REGNO (3)))
7422 || ((reg == MACL_REG || reg == MACH_REG)
7423 && df_regs_ever_live_p (reg)
7424 && sh_cfun_attr_renesas_p ())
7427 SET_HARD_REG_BIT (*live_regs_mask, reg);
7428 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7430 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7431 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7433 if (FP_REGISTER_P (reg))
7435 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7437 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7438 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7441 else if (XD_REGISTER_P (reg))
7443 /* Must switch to double mode to access these registers. */
7444 target_flags &= ~MASK_FPU_SINGLE;
7448 if (nosave_low_regs && reg == R8_REG)
7449 break;
7451 /* If we have a target register optimization pass after prologue / epilogue
7452 threading, we need to assume all target registers will be live even if
7453 they aren't now. */
7454 if (flag_branch_target_load_optimize2
7455 && TARGET_SAVE_ALL_TARGET_REGS
7456 && shmedia_space_reserved_for_target_registers)
7457 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7458 if ((! call_really_used_regs[reg] || interrupt_handler)
7459 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7461 SET_HARD_REG_BIT (*live_regs_mask, reg);
7462 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7464 /* If this is an interrupt handler, we don't have any call-clobbered
7465 registers we can conveniently use for target register save/restore.
7466 Make sure we save at least one general purpose register when we need
7467 to save target registers. */
7468 if (interrupt_handler
7469 && hard_reg_set_intersect_p (*live_regs_mask,
7470 reg_class_contents[TARGET_REGS])
7471 && ! hard_reg_set_intersect_p (*live_regs_mask,
7472 reg_class_contents[GENERAL_REGS]))
7474 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7475 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7478 return count;
7481 /* Code to generate prologue and epilogue sequences */
7483 /* PUSHED is the number of bytes that are being pushed on the
7484 stack for register saves. Return the frame size, padded
7485 appropriately so that the stack stays properly aligned. */
7486 static HOST_WIDE_INT
7487 rounded_frame_size (int pushed)
7489 HOST_WIDE_INT size = get_frame_size ();
7490 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7492 if (ACCUMULATE_OUTGOING_ARGS)
7493 size += crtl->outgoing_args_size;
7495 return ((size + pushed + align - 1) & -align) - pushed;
7498 /* Choose a call-clobbered target-branch register that remains
7499 unchanged along the whole function. We set it up as the return
7500 value in the prologue. */
7502 sh_media_register_for_return (void)
7504 int regno;
7505 int tr0_used;
7507 if (! crtl->is_leaf)
7508 return -1;
7509 if (lookup_attribute ("interrupt_handler",
7510 DECL_ATTRIBUTES (current_function_decl)))
7511 return -1;
7512 if (sh_cfun_interrupt_handler_p ())
7513 return -1;
7515 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7517 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7518 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7519 return regno;
7521 return -1;
7524 /* The maximum registers we need to save are:
7525 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7526 - 32 floating point registers (for each pair, we save none,
7527 one single precision value, or a double precision value).
7528 - 8 target registers
7529 - add 1 entry for a delimiter. */
7530 #define MAX_SAVED_REGS (62+32+8)
7532 typedef struct save_entry_s
7534 unsigned char reg;
7535 unsigned char mode;
7536 short offset;
7537 } save_entry;
7539 #define MAX_TEMPS 4
7541 /* There will be a delimiter entry with VOIDmode both at the start and the
7542 end of a filled in schedule. The end delimiter has the offset of the
7543 save with the smallest (i.e. most negative) offset. */
7544 typedef struct save_schedule_s
7546 save_entry entries[MAX_SAVED_REGS + 2];
7547 int temps[MAX_TEMPS+1];
7548 } save_schedule;
7550 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7551 use reverse order. Returns the last entry written to (not counting
7552 the delimiter). OFFSET_BASE is a number to be added to all offset
7553 entries. */
7554 static save_entry *
7555 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7556 int offset_base)
7558 int align, i;
7559 save_entry *entry = schedule->entries;
7560 int tmpx = 0;
7561 int offset;
7563 if (! current_function_interrupt)
7564 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7565 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7566 && ! FUNCTION_ARG_REGNO_P (i)
7567 && i != FIRST_RET_REG
7568 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7569 && ! (crtl->calls_eh_return
7570 && (i == EH_RETURN_STACKADJ_REGNO
7571 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7572 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7573 schedule->temps[tmpx++] = i;
7574 entry->reg = -1;
7575 entry->mode = VOIDmode;
7576 entry->offset = offset_base;
7577 entry++;
7578 /* We loop twice: first, we save 8-byte aligned registers in the
7579 higher addresses, that are known to be aligned. Then, we
7580 proceed to saving 32-bit registers that don't need 8-byte
7581 alignment.
7582 If this is an interrupt function, all registers that need saving
7583 need to be saved in full. moreover, we need to postpone saving
7584 target registers till we have saved some general purpose registers
7585 we can then use as scratch registers. */
7586 offset = offset_base;
7587 for (align = 1; align >= 0; align--)
7589 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7590 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7592 machine_mode mode = REGISTER_NATURAL_MODE (i);
7593 int reg = i;
7595 if (current_function_interrupt)
7597 if (TARGET_REGISTER_P (i))
7598 continue;
7599 if (GENERAL_REGISTER_P (i))
7600 mode = DImode;
7602 if (mode == SFmode && (i % 2) == 1
7603 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7604 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7606 mode = DFmode;
7607 i--;
7608 reg--;
7611 /* If we're doing the aligned pass and this is not aligned,
7612 or we're doing the unaligned pass and this is aligned,
7613 skip it. */
7614 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7615 != align)
7616 continue;
7618 if (current_function_interrupt
7619 && GENERAL_REGISTER_P (i)
7620 && tmpx < MAX_TEMPS)
7621 schedule->temps[tmpx++] = i;
7623 offset -= GET_MODE_SIZE (mode);
7624 entry->reg = i;
7625 entry->mode = mode;
7626 entry->offset = offset;
7627 entry++;
7629 if (align && current_function_interrupt)
7630 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7631 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7633 offset -= GET_MODE_SIZE (DImode);
7634 entry->reg = i;
7635 entry->mode = DImode;
7636 entry->offset = offset;
7637 entry++;
7640 entry->reg = -1;
7641 entry->mode = VOIDmode;
7642 entry->offset = offset;
7643 schedule->temps[tmpx] = -1;
7644 return entry - 1;
7647 /* Expand code for the function prologue. */
7648 void
7649 sh_expand_prologue (void)
7651 HARD_REG_SET live_regs_mask;
7652 int d, i;
7653 int d_rounding = 0;
7654 int save_flags = target_flags;
7655 int pretend_args;
7656 int stack_usage;
7657 tree sp_switch_attr
7658 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7660 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7662 /* We have pretend args if we had an object sent partially in registers
7663 and partially on the stack, e.g. a large structure. */
7664 pretend_args = crtl->args.pretend_args_size;
7665 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7666 && (NPARM_REGS(SImode)
7667 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7668 pretend_args = 0;
7670 output_stack_adjust (-pretend_args
7671 - crtl->args.info.stack_regs * 8,
7672 stack_pointer_rtx, 0, NULL, true);
7673 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7675 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7676 /* We're going to use the PIC register to load the address of the
7677 incoming-argument decoder and/or of the return trampoline from
7678 the GOT, so make sure the PIC register is preserved and
7679 initialized. */
7680 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7682 if (TARGET_SHCOMPACT
7683 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7685 int reg;
7687 /* First, make all registers with incoming arguments that will
7688 be pushed onto the stack live, so that register renaming
7689 doesn't overwrite them. */
7690 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7691 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7692 >= NPARM_REGS (SImode) - reg)
7693 for (; reg < NPARM_REGS (SImode); reg++)
7694 emit_insn (gen_shcompact_preserve_incoming_args
7695 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7696 else if (CALL_COOKIE_INT_REG_GET
7697 (crtl->args.info.call_cookie, reg) == 1)
7698 emit_insn (gen_shcompact_preserve_incoming_args
7699 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7701 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7702 stack_pointer_rtx);
7703 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7704 GEN_INT (crtl->args.info.call_cookie));
7705 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7706 gen_rtx_REG (SImode, R0_REG));
7708 else if (TARGET_SHMEDIA)
7710 int tr = sh_media_register_for_return ();
7712 if (tr >= 0)
7713 emit_move_insn (gen_rtx_REG (DImode, tr),
7714 gen_rtx_REG (DImode, PR_MEDIA_REG));
7717 /* Emit the code for SETUP_VARARGS. */
7718 if (cfun->stdarg)
7720 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7722 /* Push arg regs as if they'd been provided by caller in stack. */
7723 for (i = 0; i < NPARM_REGS(SImode); i++)
7725 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7727 if (i >= (NPARM_REGS(SImode)
7728 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7730 break;
7731 push (rn);
7732 stack_usage += GET_MODE_SIZE (SImode);
7737 /* If we're supposed to switch stacks at function entry, do so now. */
7738 if (sp_switch_attr)
7740 rtx lab, newsrc;
7741 /* The argument specifies a variable holding the address of the
7742 stack the interrupt function should switch to/from at entry/exit. */
7743 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7744 const char *s
7745 = ggc_strdup (TREE_STRING_POINTER (arg));
7746 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7748 lab = add_constant (sp_switch, SImode, 0);
7749 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7751 emit_insn (gen_sp_switch_1 (newsrc));
7754 d = calc_live_regs (&live_regs_mask);
7755 /* ??? Maybe we could save some switching if we can move a mode switch
7756 that already happens to be at the function start into the prologue. */
7757 if (target_flags != save_flags && ! current_function_interrupt)
7758 emit_insn (gen_toggle_sz ());
7760 if (TARGET_SH5)
7762 int offset_base, offset;
7763 rtx r0 = NULL_RTX;
7764 int offset_in_r0 = -1;
7765 int sp_in_r0 = 0;
7766 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7767 int total_size, save_size;
7768 save_schedule schedule;
7769 save_entry *entry;
7770 int *tmp_pnt;
7772 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7773 && ! current_function_interrupt)
7774 r0 = gen_rtx_REG (Pmode, R0_REG);
7776 /* D is the actual number of bytes that we need for saving registers,
7777 however, in initial_elimination_offset we have committed to using
7778 an additional TREGS_SPACE amount of bytes - in order to keep both
7779 addresses to arguments supplied by the caller and local variables
7780 valid, we must keep this gap. Place it between the incoming
7781 arguments and the actually saved registers in a bid to optimize
7782 locality of reference. */
7783 total_size = d + tregs_space;
7784 total_size += rounded_frame_size (total_size);
7785 save_size = total_size - rounded_frame_size (d);
7786 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7787 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7788 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7790 /* If adjusting the stack in a single step costs nothing extra, do so.
7791 I.e. either if a single addi is enough, or we need a movi anyway,
7792 and we don't exceed the maximum offset range (the test for the
7793 latter is conservative for simplicity). */
7794 if (TARGET_SHMEDIA
7795 && (CONST_OK_FOR_I10 (-total_size)
7796 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7797 && total_size <= 2044)))
7798 d_rounding = total_size - save_size;
7800 offset_base = d + d_rounding;
7802 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7803 0, NULL, true);
7804 stack_usage += save_size + d_rounding;
7806 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7807 tmp_pnt = schedule.temps;
7808 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7810 machine_mode mode = (machine_mode) entry->mode;
7811 unsigned int reg = entry->reg;
7812 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7813 rtx orig_reg_rtx;
7815 offset = entry->offset;
7817 reg_rtx = gen_rtx_REG (mode, reg);
7819 mem_rtx = gen_frame_mem (mode,
7820 gen_rtx_PLUS (Pmode,
7821 stack_pointer_rtx,
7822 GEN_INT (offset)));
7824 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7826 gcc_assert (r0);
7827 mem_rtx = NULL_RTX;
7830 if (HAVE_PRE_DECREMENT
7831 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7832 || mem_rtx == NULL_RTX
7833 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7835 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7837 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7838 pre_dec = NULL_RTX;
7839 else
7841 mem_rtx = NULL_RTX;
7842 offset += GET_MODE_SIZE (mode);
7846 if (mem_rtx != NULL_RTX)
7847 goto addr_ok;
7849 if (offset_in_r0 == -1)
7851 emit_move_insn (r0, GEN_INT (offset));
7852 offset_in_r0 = offset;
7854 else if (offset != offset_in_r0)
7856 emit_move_insn (r0,
7857 gen_rtx_PLUS
7858 (Pmode, r0,
7859 GEN_INT (offset - offset_in_r0)));
7860 offset_in_r0 += offset - offset_in_r0;
7863 if (pre_dec != NULL_RTX)
7865 if (! sp_in_r0)
7867 emit_move_insn (r0,
7868 gen_rtx_PLUS
7869 (Pmode, r0, stack_pointer_rtx));
7870 sp_in_r0 = 1;
7873 offset -= GET_MODE_SIZE (mode);
7874 offset_in_r0 -= GET_MODE_SIZE (mode);
7876 mem_rtx = pre_dec;
7878 else if (sp_in_r0)
7879 mem_rtx = gen_frame_mem (mode, r0);
7880 else
7881 mem_rtx = gen_frame_mem (mode,
7882 gen_rtx_PLUS (Pmode,
7883 stack_pointer_rtx,
7884 r0));
7886 /* We must not use an r0-based address for target-branch
7887 registers or for special registers without pre-dec
7888 memory addresses, since we store their values in r0
7889 first. */
7890 gcc_assert (!TARGET_REGISTER_P (reg)
7891 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7892 || mem_rtx == pre_dec));
7894 addr_ok:
7895 orig_reg_rtx = reg_rtx;
7896 if (TARGET_REGISTER_P (reg)
7897 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7898 && mem_rtx != pre_dec))
7900 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7902 emit_move_insn (tmp_reg, reg_rtx);
7904 if (REGNO (tmp_reg) == R0_REG)
7906 offset_in_r0 = -1;
7907 sp_in_r0 = 0;
7908 gcc_assert (!refers_to_regno_p (R0_REG, mem_rtx));
7911 if (*++tmp_pnt <= 0)
7912 tmp_pnt = schedule.temps;
7914 reg_rtx = tmp_reg;
7917 rtx insn;
7919 /* Mark as interesting for dwarf cfi generator */
7920 insn = emit_move_insn (mem_rtx, reg_rtx);
7921 RTX_FRAME_RELATED_P (insn) = 1;
7922 /* If we use an intermediate register for the save, we can't
7923 describe this exactly in cfi as a copy of the to-be-saved
7924 register into the temporary register and then the temporary
7925 register on the stack, because the temporary register can
7926 have a different natural size than the to-be-saved register.
7927 Thus, we gloss over the intermediate copy and pretend we do
7928 a direct save from the to-be-saved register. */
7929 if (REGNO (reg_rtx) != reg)
7931 rtx set;
7933 set = gen_rtx_SET (mem_rtx, orig_reg_rtx);
7934 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7937 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7939 rtx reg_rtx = gen_rtx_REG (mode, reg);
7940 rtx set;
7941 rtx mem_rtx = gen_frame_mem (mode,
7942 gen_rtx_PLUS (Pmode,
7943 stack_pointer_rtx,
7944 GEN_INT (offset)));
7946 set = gen_rtx_SET (mem_rtx, reg_rtx);
7947 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7952 gcc_assert (entry->offset == d_rounding);
7954 else
7956 push_regs (&live_regs_mask, current_function_interrupt);
7957 stack_usage += d;
7960 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7961 emit_insn (gen_GOTaddr2picreg ());
7963 if (SHMEDIA_REGS_STACK_ADJUST ())
7965 /* This must NOT go through the PLT, otherwise mach and macl
7966 may be clobbered. */
7967 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7968 (TARGET_FPU_ANY
7969 ? "__GCC_push_shmedia_regs"
7970 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7971 emit_insn (gen_shmedia_save_restore_regs_compact
7972 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7975 if (target_flags != save_flags && ! current_function_interrupt)
7976 emit_insn (gen_toggle_sz ());
7978 target_flags = save_flags;
7980 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7981 stack_pointer_rtx, 0, NULL, true);
7982 stack_usage += rounded_frame_size (d) - d_rounding;
7984 if (frame_pointer_needed)
7985 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7987 if (TARGET_SHCOMPACT
7988 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7990 /* This must NOT go through the PLT, otherwise mach and macl
7991 may be clobbered. */
7992 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7993 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7994 emit_insn (gen_shcompact_incoming_args ());
7997 /* If we are profiling, make sure no instructions are scheduled before
7998 the call to mcount. Similarly if some call instructions are swapped
7999 before frame related insns, it'll confuse the unwinder because
8000 currently SH has no unwind info for function epilogues. */
8001 if (crtl->profile || flag_exceptions || flag_unwind_tables)
8002 emit_insn (gen_blockage ());
8004 if (flag_stack_usage_info)
8005 current_function_static_stack_size = stack_usage;
8008 /* Expand code for the function epilogue. */
8009 void
8010 sh_expand_epilogue (bool sibcall_p)
8012 HARD_REG_SET live_regs_mask;
8013 int d, i;
8014 int d_rounding = 0;
8016 int save_flags = target_flags;
8017 int frame_size, save_size;
8018 int fpscr_deferred = 0;
8019 int e = sibcall_p ? -1 : 1;
8021 d = calc_live_regs (&live_regs_mask);
8023 save_size = d;
8024 frame_size = rounded_frame_size (d);
8026 if (TARGET_SH5)
8028 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
8029 int total_size;
8030 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
8031 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8032 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
8034 total_size = d + tregs_space;
8035 total_size += rounded_frame_size (total_size);
8036 save_size = total_size - frame_size;
8038 /* If adjusting the stack in a single step costs nothing extra, do so.
8039 I.e. either if a single addi is enough, or we need a movi anyway,
8040 and we don't exceed the maximum offset range (the test for the
8041 latter is conservative for simplicity). */
8042 if (TARGET_SHMEDIA
8043 && ! frame_pointer_needed
8044 && (CONST_OK_FOR_I10 (total_size)
8045 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
8046 && total_size <= 2044)))
8047 d_rounding = frame_size;
8049 frame_size -= d_rounding;
8052 if (frame_pointer_needed)
8054 /* We must avoid scheduling the epilogue with previous basic blocks.
8055 See PR/18032 and PR/40313. */
8056 emit_insn (gen_blockage ());
8057 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
8058 &live_regs_mask, true);
8060 /* We must avoid moving the stack pointer adjustment past code
8061 which reads from the local frame, else an interrupt could
8062 occur after the SP adjustment and clobber data in the local
8063 frame. */
8064 emit_insn (gen_blockage ());
8065 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
8067 else if (frame_size)
8069 /* We must avoid moving the stack pointer adjustment past code
8070 which reads from the local frame, else an interrupt could
8071 occur after the SP adjustment and clobber data in the local
8072 frame. */
8073 emit_insn (gen_blockage ());
8074 output_stack_adjust (frame_size, stack_pointer_rtx, e,
8075 &live_regs_mask, true);
8078 if (SHMEDIA_REGS_STACK_ADJUST ())
8080 function_symbol (gen_rtx_REG (Pmode, R0_REG),
8081 (TARGET_FPU_ANY
8082 ? "__GCC_pop_shmedia_regs"
8083 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
8084 /* This must NOT go through the PLT, otherwise mach and macl
8085 may be clobbered. */
8086 emit_insn (gen_shmedia_save_restore_regs_compact
8087 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
8090 /* Pop all the registers. */
8092 if (target_flags != save_flags && ! current_function_interrupt)
8093 emit_insn (gen_toggle_sz ());
8094 if (TARGET_SH5)
8096 int offset_base, offset;
8097 int offset_in_r0 = -1;
8098 int sp_in_r0 = 0;
8099 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
8100 save_schedule schedule;
8101 save_entry *entry;
8102 int *tmp_pnt;
8104 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
8105 offset_base = -entry[1].offset + d_rounding;
8106 tmp_pnt = schedule.temps;
8107 for (; entry->mode != VOIDmode; entry--)
8109 machine_mode mode = (machine_mode) entry->mode;
8110 int reg = entry->reg;
8111 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
8113 offset = offset_base + entry->offset;
8114 reg_rtx = gen_rtx_REG (mode, reg);
8116 mem_rtx = gen_frame_mem (mode,
8117 gen_rtx_PLUS (Pmode,
8118 stack_pointer_rtx,
8119 GEN_INT (offset)));
8121 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
8122 mem_rtx = NULL_RTX;
8124 if (HAVE_POST_INCREMENT
8125 && (offset == offset_in_r0
8126 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
8127 && mem_rtx == NULL_RTX)
8128 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
8130 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
8132 if (!memory_address_p (mode, XEXP (post_inc, 0)))
8133 post_inc = NULL_RTX;
8134 else
8135 mem_rtx = NULL_RTX;
8138 if (mem_rtx != NULL_RTX)
8139 goto addr_ok;
8141 if (offset_in_r0 == -1)
8143 emit_move_insn (r0, GEN_INT (offset));
8144 offset_in_r0 = offset;
8146 else if (offset != offset_in_r0)
8148 emit_move_insn (r0,
8149 gen_rtx_PLUS
8150 (Pmode, r0,
8151 GEN_INT (offset - offset_in_r0)));
8152 offset_in_r0 += offset - offset_in_r0;
8155 if (post_inc != NULL_RTX)
8157 if (! sp_in_r0)
8159 emit_move_insn (r0,
8160 gen_rtx_PLUS
8161 (Pmode, r0, stack_pointer_rtx));
8162 sp_in_r0 = 1;
8165 mem_rtx = post_inc;
8167 offset_in_r0 += GET_MODE_SIZE (mode);
8169 else if (sp_in_r0)
8170 mem_rtx = gen_frame_mem (mode, r0);
8171 else
8172 mem_rtx = gen_frame_mem (mode,
8173 gen_rtx_PLUS (Pmode,
8174 stack_pointer_rtx,
8175 r0));
8177 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8178 || mem_rtx == post_inc);
8180 addr_ok:
8181 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8182 && mem_rtx != post_inc)
8184 emit_move_insn (r0, mem_rtx);
8185 mem_rtx = r0;
8187 else if (TARGET_REGISTER_P (reg))
8189 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8191 /* Give the scheduler a bit of freedom by using up to
8192 MAX_TEMPS registers in a round-robin fashion. */
8193 emit_move_insn (tmp_reg, mem_rtx);
8194 mem_rtx = tmp_reg;
8195 if (*++tmp_pnt < 0)
8196 tmp_pnt = schedule.temps;
8199 emit_move_insn (reg_rtx, mem_rtx);
8202 gcc_assert (entry->offset + offset_base == d + d_rounding);
8204 else /* ! TARGET_SH5 */
8206 int last_reg;
8208 save_size = 0;
8209 /* For an ISR with RESBANK attribute assigned, don't pop PR
8210 register. */
8211 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8212 && !sh_cfun_resbank_handler_p ())
8214 if (!frame_pointer_needed)
8215 emit_insn (gen_blockage ());
8216 pop (PR_REG);
8219 /* Banked registers are popped first to avoid being scheduled in the
8220 delay slot. RTE switches banks before the ds instruction. */
8221 if (current_function_interrupt)
8223 bool use_movml = false;
8225 if (TARGET_SH2A)
8227 unsigned int count = 0;
8229 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8230 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8231 count++;
8232 else
8233 break;
8235 /* Use movml when all banked register are poped. */
8236 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8237 use_movml = true;
8240 if (sh_cfun_resbank_handler_p ())
8241 ; /* Do nothing. */
8242 else if (use_movml)
8244 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8246 /* We must avoid scheduling multiple load insn with another
8247 insns. */
8248 emit_insn (gen_blockage ());
8249 emit_insn (gen_movml_pop_banked (sp_reg));
8250 emit_insn (gen_blockage ());
8252 else
8253 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8254 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8255 pop (i);
8257 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8259 else
8260 last_reg = FIRST_PSEUDO_REGISTER;
8262 for (i = 0; i < last_reg; i++)
8264 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8266 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8267 && hard_reg_set_intersect_p (live_regs_mask,
8268 reg_class_contents[DF_REGS]))
8269 fpscr_deferred = 1;
8270 /* For an ISR with RESBANK attribute assigned, don't pop
8271 following registers, R0-R14, MACH, MACL and GBR. */
8272 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8273 && ! (sh_cfun_resbank_handler_p ()
8274 && ((j >= FIRST_GENERAL_REG
8275 && j < LAST_GENERAL_REG)
8276 || j == MACH_REG
8277 || j == MACL_REG
8278 || j == GBR_REG)))
8279 pop (j);
8281 if (j == FIRST_FP_REG && fpscr_deferred)
8282 pop (FPSCR_REG);
8285 if (target_flags != save_flags && ! current_function_interrupt)
8286 emit_insn (gen_toggle_sz ());
8287 target_flags = save_flags;
8289 output_stack_adjust (crtl->args.pretend_args_size
8290 + save_size + d_rounding
8291 + crtl->args.info.stack_regs * 8,
8292 stack_pointer_rtx, e, NULL, true);
8294 if (crtl->calls_eh_return)
8295 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8296 EH_RETURN_STACKADJ_RTX));
8298 /* Switch back to the normal stack if necessary. */
8299 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8300 emit_insn (gen_sp_switch_2 ());
8302 /* Tell flow the insn that pops PR isn't dead. */
8303 /* PR_REG will never be live in SHmedia mode, and we don't need to
8304 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8305 by the return pattern. */
8306 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8307 emit_use (gen_rtx_REG (SImode, PR_REG));
8310 /* Emit code to change the current function's return address to RA.
8311 TEMP is available as a scratch register, if needed. */
8312 void
8313 sh_set_return_address (rtx ra, rtx tmp)
8315 HARD_REG_SET live_regs_mask;
8316 int d;
8317 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8318 int pr_offset;
8320 d = calc_live_regs (&live_regs_mask);
8322 /* If pr_reg isn't life, we can set it (or the register given in
8323 sh_media_register_for_return) directly. */
8324 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8326 rtx rr;
8328 if (TARGET_SHMEDIA)
8330 int rr_regno = sh_media_register_for_return ();
8332 if (rr_regno < 0)
8333 rr_regno = pr_reg;
8335 rr = gen_rtx_REG (DImode, rr_regno);
8337 else
8338 rr = gen_rtx_REG (SImode, pr_reg);
8340 emit_insn (GEN_MOV (rr, ra));
8341 /* Tell flow the register for return isn't dead. */
8342 emit_use (rr);
8343 return;
8346 if (TARGET_SH5)
8348 int offset;
8349 save_schedule schedule;
8350 save_entry *entry;
8352 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8353 offset = entry[1].offset;
8354 for (; entry->mode != VOIDmode; entry--)
8355 if (entry->reg == pr_reg)
8356 goto found;
8358 /* We can't find pr register. */
8359 gcc_unreachable ();
8361 found:
8362 offset = entry->offset - offset;
8363 pr_offset = (rounded_frame_size (d) + offset
8364 + SHMEDIA_REGS_STACK_ADJUST ());
8366 else
8367 pr_offset = rounded_frame_size (d);
8369 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8371 if (frame_pointer_needed)
8372 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8373 else
8374 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8376 tmp = gen_frame_mem (Pmode, tmp);
8377 emit_insn (GEN_MOV (tmp, ra));
8378 /* Tell this store isn't dead. */
8379 emit_use (tmp);
8382 /* Clear variables at function end. */
8383 static void
8384 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8385 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8389 static rtx
8390 sh_builtin_saveregs (void)
8392 /* First unnamed integer register. */
8393 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8394 /* Number of integer registers we need to save. */
8395 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8396 /* First unnamed SFmode float reg */
8397 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8398 /* Number of SFmode float regs to save. */
8399 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8400 rtx regbuf, fpregs;
8401 int bufsize, regno;
8402 alias_set_type alias_set;
8404 if (TARGET_SH5)
8406 if (n_intregs)
8408 int pushregs = n_intregs;
8410 while (pushregs < NPARM_REGS (SImode) - 1
8411 && (CALL_COOKIE_INT_REG_GET
8412 (crtl->args.info.call_cookie,
8413 NPARM_REGS (SImode) - pushregs)
8414 == 1))
8416 crtl->args.info.call_cookie
8417 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8418 - pushregs, 1);
8419 pushregs++;
8422 if (pushregs == NPARM_REGS (SImode))
8423 crtl->args.info.call_cookie
8424 |= (CALL_COOKIE_INT_REG (0, 1)
8425 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8426 else
8427 crtl->args.info.call_cookie
8428 |= CALL_COOKIE_STACKSEQ (pushregs);
8430 crtl->args.pretend_args_size += 8 * n_intregs;
8432 if (TARGET_SHCOMPACT)
8433 return const0_rtx;
8436 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8438 error ("__builtin_saveregs not supported by this subtarget");
8439 return const0_rtx;
8442 if (TARGET_SHMEDIA)
8443 n_floatregs = 0;
8445 /* Allocate block of memory for the regs. */
8446 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8447 Or can assign_stack_local accept a 0 SIZE argument? */
8448 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8450 if (TARGET_SHMEDIA)
8451 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8452 else if (n_floatregs & 1)
8454 rtx addr;
8456 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8457 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8458 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8459 regbuf = change_address (regbuf, BLKmode, addr);
8461 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8463 rtx addr, mask;
8465 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8466 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8467 XEXP (regbuf, 0), 4));
8468 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8469 emit_insn (gen_andsi3 (addr, addr, mask));
8470 regbuf = change_address (regbuf, BLKmode, addr);
8472 else
8473 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8474 alias_set = get_varargs_alias_set ();
8475 set_mem_alias_set (regbuf, alias_set);
8477 /* Save int args.
8478 This is optimized to only save the regs that are necessary. Explicitly
8479 named args need not be saved. */
8480 if (n_intregs > 0)
8481 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8482 adjust_address (regbuf, BLKmode,
8483 n_floatregs * UNITS_PER_WORD),
8484 n_intregs);
8486 if (TARGET_SHMEDIA)
8487 /* Return the address of the regbuf. */
8488 return XEXP (regbuf, 0);
8490 /* Save float args.
8491 This is optimized to only save the regs that are necessary. Explicitly
8492 named args need not be saved.
8493 We explicitly build a pointer to the buffer because it halves the insn
8494 count when not optimizing (otherwise the pointer is built for each reg
8495 saved).
8496 We emit the moves in reverse order so that we can use predecrement. */
8498 fpregs = copy_to_mode_reg (Pmode,
8499 plus_constant (Pmode, XEXP (regbuf, 0),
8500 n_floatregs * UNITS_PER_WORD));
8501 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8503 rtx mem;
8504 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8506 emit_insn (gen_addsi3 (fpregs, fpregs,
8507 GEN_INT (-2 * UNITS_PER_WORD)));
8508 mem = change_address (regbuf, DFmode, fpregs);
8509 emit_move_insn (mem,
8510 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8512 regno = first_floatreg;
8513 if (regno & 1)
8515 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8516 mem = change_address (regbuf, SFmode, fpregs);
8517 emit_move_insn (mem,
8518 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8519 + regno - SH_REG_MSW_OFFSET));
8522 else
8523 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8525 rtx mem;
8527 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8528 mem = change_address (regbuf, SFmode, fpregs);
8529 emit_move_insn (mem,
8530 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8533 /* Return the address of the regbuf. */
8534 return XEXP (regbuf, 0);
8537 /* Define the `__builtin_va_list' type for the ABI. */
8538 static tree
8539 sh_build_builtin_va_list (void)
8541 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8542 tree record, type_decl;
8544 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8545 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8546 return ptr_type_node;
8548 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8549 type_decl = build_decl (BUILTINS_LOCATION,
8550 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8552 f_next_o = build_decl (BUILTINS_LOCATION,
8553 FIELD_DECL, get_identifier ("__va_next_o"),
8554 ptr_type_node);
8555 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8556 FIELD_DECL,
8557 get_identifier ("__va_next_o_limit"),
8558 ptr_type_node);
8559 f_next_fp = build_decl (BUILTINS_LOCATION,
8560 FIELD_DECL, get_identifier ("__va_next_fp"),
8561 ptr_type_node);
8562 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8563 FIELD_DECL,
8564 get_identifier ("__va_next_fp_limit"),
8565 ptr_type_node);
8566 f_next_stack = build_decl (BUILTINS_LOCATION,
8567 FIELD_DECL, get_identifier ("__va_next_stack"),
8568 ptr_type_node);
8570 DECL_FIELD_CONTEXT (f_next_o) = record;
8571 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8572 DECL_FIELD_CONTEXT (f_next_fp) = record;
8573 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8574 DECL_FIELD_CONTEXT (f_next_stack) = record;
8576 TYPE_STUB_DECL (record) = type_decl;
8577 TYPE_NAME (record) = type_decl;
8578 TYPE_FIELDS (record) = f_next_o;
8579 DECL_CHAIN (f_next_o) = f_next_o_limit;
8580 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8581 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8582 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8584 layout_type (record);
8586 return record;
8589 /* Implement `va_start' for varargs and stdarg. */
8590 static void
8591 sh_va_start (tree valist, rtx nextarg)
8593 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8594 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8595 tree t, u;
8596 int nfp, nint;
8598 if (TARGET_SH5)
8600 expand_builtin_saveregs ();
8601 std_expand_builtin_va_start (valist, nextarg);
8602 return;
8605 if ((! TARGET_SH2E && ! TARGET_SH4)
8606 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8608 std_expand_builtin_va_start (valist, nextarg);
8609 return;
8612 f_next_o = TYPE_FIELDS (va_list_type_node);
8613 f_next_o_limit = DECL_CHAIN (f_next_o);
8614 f_next_fp = DECL_CHAIN (f_next_o_limit);
8615 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8616 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8618 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8619 NULL_TREE);
8620 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8621 valist, f_next_o_limit, NULL_TREE);
8622 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8623 NULL_TREE);
8624 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8625 valist, f_next_fp_limit, NULL_TREE);
8626 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8627 valist, f_next_stack, NULL_TREE);
8629 /* Call __builtin_saveregs. */
8630 u = make_tree (sizetype, expand_builtin_saveregs ());
8631 u = fold_convert (ptr_type_node, u);
8632 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8633 TREE_SIDE_EFFECTS (t) = 1;
8634 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8636 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8637 if (nfp < 8)
8638 nfp = 8 - nfp;
8639 else
8640 nfp = 0;
8641 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8642 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8643 TREE_SIDE_EFFECTS (t) = 1;
8644 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8646 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8647 TREE_SIDE_EFFECTS (t) = 1;
8648 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8650 nint = crtl->args.info.arg_count[SH_ARG_INT];
8651 if (nint < 4)
8652 nint = 4 - nint;
8653 else
8654 nint = 0;
8655 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8656 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8657 TREE_SIDE_EFFECTS (t) = 1;
8658 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8660 u = make_tree (ptr_type_node, nextarg);
8661 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8662 TREE_SIDE_EFFECTS (t) = 1;
8663 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8666 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8667 member, return it. */
8668 static tree
8669 find_sole_member (tree type)
8671 tree field, member = NULL_TREE;
8673 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8675 if (TREE_CODE (field) != FIELD_DECL)
8676 continue;
8677 if (!DECL_SIZE (field))
8678 return NULL_TREE;
8679 if (integer_zerop (DECL_SIZE (field)))
8680 continue;
8681 if (member)
8682 return NULL_TREE;
8683 member = field;
8685 return member;
8688 /* Implement `va_arg'. */
8689 static tree
8690 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8691 gimple_seq *post_p ATTRIBUTE_UNUSED)
8693 HOST_WIDE_INT size, rsize;
8694 tree tmp, pptr_type_node;
8695 tree addr, lab_over = NULL, result = NULL;
8696 bool pass_by_ref;
8697 tree eff_type;
8699 if (!VOID_TYPE_P (type))
8700 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8701 else
8702 pass_by_ref = false;
8704 if (pass_by_ref)
8705 type = build_pointer_type (type);
8707 size = int_size_in_bytes (type);
8708 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8709 pptr_type_node = build_pointer_type (ptr_type_node);
8711 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8712 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8714 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8715 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8716 int pass_as_float;
8717 tree lab_false;
8718 tree member;
8720 f_next_o = TYPE_FIELDS (va_list_type_node);
8721 f_next_o_limit = DECL_CHAIN (f_next_o);
8722 f_next_fp = DECL_CHAIN (f_next_o_limit);
8723 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8724 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8726 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8727 NULL_TREE);
8728 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8729 valist, f_next_o_limit, NULL_TREE);
8730 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8731 valist, f_next_fp, NULL_TREE);
8732 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8733 valist, f_next_fp_limit, NULL_TREE);
8734 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8735 valist, f_next_stack, NULL_TREE);
8737 /* Structures with a single member with a distinct mode are passed
8738 like their member. This is relevant if the latter has a REAL_TYPE
8739 or COMPLEX_TYPE type. */
8740 eff_type = type;
8741 while (TREE_CODE (eff_type) == RECORD_TYPE
8742 && (member = find_sole_member (eff_type))
8743 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8744 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8745 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8747 tree field_type = TREE_TYPE (member);
8749 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8750 eff_type = field_type;
8751 else
8753 gcc_assert ((TYPE_ALIGN (eff_type)
8754 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8755 || (TYPE_ALIGN (eff_type)
8756 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8757 break;
8761 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8763 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8764 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8765 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8766 && size <= 16));
8768 else
8770 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8773 addr = create_tmp_var (pptr_type_node);
8774 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8775 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8777 valist = build_simple_mem_ref (addr);
8779 if (pass_as_float)
8781 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
8782 tree cmp;
8783 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8785 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8786 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8788 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8789 tmp = next_fp_limit;
8790 if (size > 4 && !is_double)
8791 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8792 tmp = build2 (GE_EXPR, boolean_type_node,
8793 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8794 cmp = build3 (COND_EXPR, void_type_node, tmp,
8795 build1 (GOTO_EXPR, void_type_node,
8796 unshare_expr (lab_false)), NULL_TREE);
8797 if (!is_double)
8798 gimplify_and_add (cmp, pre_p);
8800 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8801 || (is_double || size == 16))
8803 tmp = fold_convert (sizetype, next_fp_tmp);
8804 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8805 size_int (UNITS_PER_WORD));
8806 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8807 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8809 if (is_double)
8810 gimplify_and_add (cmp, pre_p);
8812 #ifdef FUNCTION_ARG_SCmode_WART
8813 if (TYPE_MODE (eff_type) == SCmode
8814 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8816 tree subtype = TREE_TYPE (eff_type);
8817 tree real, imag;
8819 imag
8820 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8821 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8823 real
8824 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8825 real = get_initialized_tmp_var (real, pre_p, NULL);
8827 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8828 if (type != eff_type)
8829 result = build1 (VIEW_CONVERT_EXPR, type, result);
8830 result = get_initialized_tmp_var (result, pre_p, NULL);
8832 #endif /* FUNCTION_ARG_SCmode_WART */
8834 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8835 gimplify_and_add (tmp, pre_p);
8837 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8838 gimplify_and_add (tmp, pre_p);
8840 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8841 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8842 gimplify_assign (unshare_expr (next_fp_tmp),
8843 unshare_expr (valist), pre_p);
8845 gimplify_assign (unshare_expr (valist),
8846 unshare_expr (next_fp_tmp), post_p);
8847 valist = next_fp_tmp;
8849 else
8851 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8852 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8853 unshare_expr (next_o_limit));
8854 tmp = build3 (COND_EXPR, void_type_node, tmp,
8855 build1 (GOTO_EXPR, void_type_node,
8856 unshare_expr (lab_false)),
8857 NULL_TREE);
8858 gimplify_and_add (tmp, pre_p);
8860 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8861 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8863 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8864 gimplify_and_add (tmp, pre_p);
8866 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8867 gimplify_and_add (tmp, pre_p);
8869 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8870 gimplify_assign (unshare_expr (next_o),
8871 unshare_expr (next_o_limit), pre_p);
8873 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8874 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8877 if (!result)
8879 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8880 gimplify_and_add (tmp, pre_p);
8884 /* ??? In va-sh.h, there had been code to make values larger than
8885 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8887 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8888 if (result)
8890 gimplify_assign (result, tmp, pre_p);
8891 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8892 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8893 gimplify_and_add (tmp, pre_p);
8895 else
8896 result = tmp;
8898 if (pass_by_ref)
8899 result = build_va_arg_indirect_ref (result);
8901 return result;
8904 /* 64 bit floating points memory transfers are paired single precision loads
8905 or store. So DWARF information needs fixing in little endian (unless
8906 PR=SZ=1 in FPSCR). */
8908 sh_dwarf_register_span (rtx reg)
8910 unsigned regno = REGNO (reg);
8912 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8913 return NULL_RTX;
8915 return
8916 gen_rtx_PARALLEL (VOIDmode,
8917 gen_rtvec (2,
8918 gen_rtx_REG (SFmode, regno + 1),
8919 gen_rtx_REG (SFmode, regno)));
8922 static machine_mode
8923 sh_promote_function_mode (const_tree type, machine_mode mode,
8924 int *punsignedp, const_tree funtype,
8925 int for_return)
8927 if (sh_promote_prototypes (funtype))
8928 return promote_mode (type, mode, punsignedp);
8929 else
8930 return default_promote_function_mode (type, mode, punsignedp, funtype,
8931 for_return);
8934 static bool
8935 sh_promote_prototypes (const_tree type)
8937 if (TARGET_HITACHI)
8938 return false;
8939 if (! type)
8940 return true;
8941 return ! sh_attr_renesas_p (type);
8944 /* Whether an argument must be passed by reference. On SHcompact, we
8945 pretend arguments wider than 32-bits that would have been passed in
8946 registers are passed by reference, so that an SHmedia trampoline
8947 loads them into the full 64-bits registers. */
8948 static int
8949 shcompact_byref (const CUMULATIVE_ARGS *cum, machine_mode mode,
8950 const_tree type, bool named)
8952 unsigned HOST_WIDE_INT size;
8954 if (type)
8955 size = int_size_in_bytes (type);
8956 else
8957 size = GET_MODE_SIZE (mode);
8959 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8960 && (!named
8961 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8962 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8963 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8964 && size > 4
8965 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8966 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8967 return size;
8968 else
8969 return 0;
8972 static bool
8973 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8974 const_tree type, bool named)
8976 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8978 if (targetm.calls.must_pass_in_stack (mode, type))
8979 return true;
8981 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8982 wants to know about pass-by-reference semantics for incoming
8983 arguments. */
8984 if (! cum)
8985 return false;
8987 if (TARGET_SHCOMPACT)
8989 cum->byref = shcompact_byref (cum, mode, type, named);
8990 return cum->byref != 0;
8993 return false;
8996 static bool
8997 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
8998 const_tree type, bool named ATTRIBUTE_UNUSED)
9000 /* ??? How can it possibly be correct to return true only on the
9001 caller side of the equation? Is there someplace else in the
9002 sh backend that's magically producing the copies? */
9003 return (get_cumulative_args (cum)->outgoing
9004 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
9005 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
9008 /* Round a register number up to a proper boundary for an arg of mode
9009 MODE.
9010 The SH doesn't care about double alignment, so we only
9011 round doubles to even regs when asked to explicitly. */
9012 static int
9013 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
9015 /* FIXME: This used to be a macro and has been copy pasted into this
9016 function as is. Make this more readable. */
9017 return
9018 (((TARGET_ALIGN_DOUBLE
9019 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9020 && (mode == DFmode || mode == DCmode)
9021 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
9022 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
9023 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
9024 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
9025 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
9028 /* Return true if arg of the specified mode should be be passed in a register
9029 or false otherwise. */
9030 static bool
9031 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
9032 const_tree type)
9034 /* FIXME: This used to be a macro and has been copy pasted into this
9035 function as is. Make this more readable. */
9036 return
9037 ((type == 0
9038 || (! TREE_ADDRESSABLE (type)
9039 && (! (TARGET_HITACHI || cum.renesas_abi)
9040 || ! (AGGREGATE_TYPE_P (type)
9041 || (!TARGET_FPU_ANY
9042 && (GET_MODE_CLASS (mode) == MODE_FLOAT
9043 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
9044 && ! cum.force_mem
9045 && (TARGET_SH2E
9046 ? ((mode) == BLKmode
9047 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
9048 + int_size_in_bytes (type))
9049 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
9050 : ((sh_round_reg (cum, mode)
9051 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
9052 <= NPARM_REGS (mode)))
9053 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
9056 static int
9057 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9058 tree type, bool named ATTRIBUTE_UNUSED)
9060 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9061 int words = 0;
9063 if (!TARGET_SH5
9064 && sh_pass_in_reg_p (*cum, mode, type)
9065 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
9066 && (sh_round_reg (*cum, mode)
9067 + (mode != BLKmode
9068 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
9069 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
9070 > NPARM_REGS (mode)))
9071 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
9073 else if (!TARGET_SHCOMPACT
9074 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
9075 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
9077 return words * UNITS_PER_WORD;
9081 /* Define where to put the arguments to a function.
9082 Value is zero to push the argument on the stack,
9083 or a hard register in which to store the argument.
9085 MODE is the argument's machine mode.
9086 TYPE is the data type of the argument (as a tree).
9087 This is null for libcalls where that information may
9088 not be available.
9089 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9090 the preceding args and about the function being called.
9091 NAMED is nonzero if this argument is a named parameter
9092 (otherwise it is an extra parameter matching an ellipsis).
9094 On SH the first args are normally in registers
9095 and the rest are pushed. Any arg that starts within the first
9096 NPARM_REGS words is at least partially passed in a register unless
9097 its data type forbids. */
9098 static rtx
9099 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
9100 const_tree type, bool named)
9102 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9104 if (! TARGET_SH5 && mode == VOIDmode)
9105 return GEN_INT (ca->renesas_abi ? 1 : 0);
9107 if (! TARGET_SH5
9108 && sh_pass_in_reg_p (*ca, mode, type)
9109 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
9111 int regno;
9113 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
9114 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
9116 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
9117 gen_rtx_REG (SFmode,
9118 BASE_ARG_REG (mode)
9119 + (sh_round_reg (*ca, mode) ^ 1)),
9120 const0_rtx);
9121 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
9122 gen_rtx_REG (SFmode,
9123 BASE_ARG_REG (mode)
9124 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
9125 GEN_INT (4));
9126 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
9129 /* If the alignment of a DF value causes an SF register to be
9130 skipped, we will use that skipped register for the next SF
9131 value. */
9132 if ((TARGET_HITACHI || ca->renesas_abi)
9133 && ca->free_single_fp_reg
9134 && mode == SFmode)
9135 return gen_rtx_REG (mode, ca->free_single_fp_reg);
9137 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
9138 ^ (mode == SFmode && TARGET_SH4
9139 && TARGET_LITTLE_ENDIAN
9140 && ! TARGET_HITACHI && ! ca->renesas_abi);
9141 return gen_rtx_REG (mode, regno);
9145 if (TARGET_SH5)
9147 if (mode == VOIDmode && TARGET_SHCOMPACT)
9148 return GEN_INT (ca->call_cookie);
9150 /* The following test assumes unnamed arguments are promoted to
9151 DFmode. */
9152 if (mode == SFmode && ca->free_single_fp_reg)
9153 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
9155 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9156 && (named || ! ca->prototype_p)
9157 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9159 if (! ca->prototype_p && TARGET_SHMEDIA)
9160 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9162 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9163 FIRST_FP_PARM_REG
9164 + ca->arg_count[(int) SH_ARG_FLOAT]);
9167 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9168 && (! TARGET_SHCOMPACT
9169 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9170 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9171 type, named))))
9173 return gen_rtx_REG (mode, (FIRST_PARM_REG
9174 + ca->arg_count[(int) SH_ARG_INT]));
9177 return NULL_RTX;
9180 return NULL_RTX;
9183 /* Update the data in CUM to advance over an argument
9184 of mode MODE and data type TYPE.
9185 (TYPE is null for libcalls where that information may not be
9186 available.) */
9187 static void
9188 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
9189 const_tree type, bool named)
9191 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9193 if (ca->force_mem)
9194 ca->force_mem = 0;
9195 else if (TARGET_SH5)
9197 const_tree type2 = (ca->byref && type
9198 ? TREE_TYPE (type)
9199 : type);
9200 machine_mode mode2 = (ca->byref && type
9201 ? TYPE_MODE (type2)
9202 : mode);
9203 int dwords = ((ca->byref
9204 ? ca->byref
9205 : mode2 == BLKmode
9206 ? int_size_in_bytes (type2)
9207 : GET_MODE_SIZE (mode2)) + 7) / 8;
9208 int numregs = MIN (dwords, NPARM_REGS (SImode)
9209 - ca->arg_count[(int) SH_ARG_INT]);
9211 if (numregs)
9213 ca->arg_count[(int) SH_ARG_INT] += numregs;
9214 if (TARGET_SHCOMPACT
9215 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9217 ca->call_cookie
9218 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9219 - numregs, 1);
9220 /* N.B. We want this also for outgoing. */
9221 ca->stack_regs += numregs;
9223 else if (ca->byref)
9225 if (! ca->outgoing)
9226 ca->stack_regs += numregs;
9227 ca->byref_regs += numregs;
9228 ca->byref = 0;
9230 ca->call_cookie
9231 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9232 - numregs, 2);
9233 while (--numregs);
9234 ca->call_cookie
9235 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9236 - 1, 1);
9238 else if (dwords > numregs)
9240 int pushregs = numregs;
9242 if (TARGET_SHCOMPACT)
9243 ca->stack_regs += numregs;
9244 while (pushregs < NPARM_REGS (SImode) - 1
9245 && (CALL_COOKIE_INT_REG_GET
9246 (ca->call_cookie,
9247 NPARM_REGS (SImode) - pushregs)
9248 == 1))
9250 ca->call_cookie
9251 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9252 - pushregs, 1);
9253 pushregs++;
9255 if (numregs == NPARM_REGS (SImode))
9256 ca->call_cookie
9257 |= CALL_COOKIE_INT_REG (0, 1)
9258 | CALL_COOKIE_STACKSEQ (numregs - 1);
9259 else
9260 ca->call_cookie
9261 |= CALL_COOKIE_STACKSEQ (numregs);
9264 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9265 && (named || ! ca->prototype_p))
9267 if (mode2 == SFmode && ca->free_single_fp_reg)
9268 ca->free_single_fp_reg = 0;
9269 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9270 < NPARM_REGS (SFmode))
9272 int numfpregs
9273 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9274 NPARM_REGS (SFmode)
9275 - ca->arg_count[(int) SH_ARG_FLOAT]);
9277 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9279 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9281 if (ca->outgoing && numregs > 0)
9284 ca->call_cookie
9285 |= (CALL_COOKIE_INT_REG
9286 (ca->arg_count[(int) SH_ARG_INT]
9287 - numregs + ((numfpregs - 2) / 2),
9288 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9289 - numfpregs) / 2));
9291 while (numfpregs -= 2);
9293 else if (mode2 == SFmode && (named)
9294 && (ca->arg_count[(int) SH_ARG_FLOAT]
9295 < NPARM_REGS (SFmode)))
9296 ca->free_single_fp_reg
9297 = FIRST_FP_PARM_REG - numfpregs
9298 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9301 return;
9304 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9306 /* Note that we've used the skipped register. */
9307 if (mode == SFmode && ca->free_single_fp_reg)
9309 ca->free_single_fp_reg = 0;
9310 return;
9312 /* When we have a DF after an SF, there's an SF register that get
9313 skipped in order to align the DF value. We note this skipped
9314 register, because the next SF value will use it, and not the
9315 SF that follows the DF. */
9316 if (mode == DFmode
9317 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9319 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9320 + BASE_ARG_REG (mode));
9324 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9325 || sh_pass_in_reg_p (*ca, mode, type))
9326 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9327 = (sh_round_reg (*ca, mode)
9328 + (mode == BLKmode
9329 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9330 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9333 /* The Renesas calling convention doesn't quite fit into this scheme since
9334 the address is passed like an invisible argument, but one that is always
9335 passed in memory. */
9336 static rtx
9337 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9339 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9340 return NULL_RTX;
9341 return gen_rtx_REG (Pmode, 2);
9344 /* Worker function for TARGET_FUNCTION_VALUE.
9346 For the SH, this is like LIBCALL_VALUE, except that we must change the
9347 mode like PROMOTE_MODE does.
9348 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9349 tested here has to be kept in sync with the one in
9350 explow.c:promote_mode. */
9351 static rtx
9352 sh_function_value (const_tree valtype,
9353 const_tree fn_decl_or_type,
9354 bool outgoing ATTRIBUTE_UNUSED)
9356 if (fn_decl_or_type
9357 && !DECL_P (fn_decl_or_type))
9358 fn_decl_or_type = NULL;
9360 return gen_rtx_REG (
9361 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9362 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9363 && (TREE_CODE (valtype) == INTEGER_TYPE
9364 || TREE_CODE (valtype) == ENUMERAL_TYPE
9365 || TREE_CODE (valtype) == BOOLEAN_TYPE
9366 || TREE_CODE (valtype) == REAL_TYPE
9367 || TREE_CODE (valtype) == OFFSET_TYPE))
9368 && sh_promote_prototypes (fn_decl_or_type)
9369 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9370 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9373 /* Worker function for TARGET_LIBCALL_VALUE. */
9374 static rtx
9375 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9377 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9380 /* Return true if N is a possible register number of function value. */
9381 static bool
9382 sh_function_value_regno_p (const unsigned int regno)
9384 return ((regno) == FIRST_RET_REG
9385 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9386 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9389 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9390 static bool
9391 sh_return_in_memory (const_tree type, const_tree fndecl)
9393 if (TARGET_SH5)
9395 if (TYPE_MODE (type) == BLKmode)
9396 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9397 else
9398 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9400 else
9402 return (TYPE_MODE (type) == BLKmode
9403 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9404 && TREE_CODE (type) == RECORD_TYPE));
9408 /* We actually emit the code in sh_expand_prologue. We used to use
9409 a static variable to flag that we need to emit this code, but that
9410 doesn't when inlining, when functions are deferred and then emitted
9411 later. Fortunately, we already have two flags that are part of struct
9412 function that tell if a function uses varargs or stdarg. */
9413 static void
9414 sh_setup_incoming_varargs (cumulative_args_t ca,
9415 machine_mode mode,
9416 tree type,
9417 int *pretend_arg_size,
9418 int second_time ATTRIBUTE_UNUSED)
9420 gcc_assert (cfun->stdarg);
9421 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9423 int named_parm_regs, anon_parm_regs;
9425 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9426 + (mode == BLKmode
9427 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9428 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9429 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9430 if (anon_parm_regs > 0)
9431 *pretend_arg_size = anon_parm_regs * 4;
9435 static bool
9436 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9438 return TARGET_SH5;
9441 static bool
9442 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9444 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9446 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9450 /* Define the offset between two registers, one to be eliminated, and
9451 the other its replacement, at the start of a routine. */
9453 initial_elimination_offset (int from, int to)
9455 int regs_saved;
9456 int regs_saved_rounding = 0;
9457 int total_saved_regs_space;
9458 int total_auto_space;
9459 int save_flags = target_flags;
9460 int copy_flags;
9461 HARD_REG_SET live_regs_mask;
9463 shmedia_space_reserved_for_target_registers = false;
9464 regs_saved = calc_live_regs (&live_regs_mask);
9465 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9467 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9469 shmedia_space_reserved_for_target_registers = true;
9470 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9473 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9474 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9475 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9477 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9478 copy_flags = target_flags;
9479 target_flags = save_flags;
9481 total_saved_regs_space = regs_saved + regs_saved_rounding;
9483 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9484 return total_saved_regs_space + total_auto_space
9485 + crtl->args.info.byref_regs * 8;
9487 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9488 return total_saved_regs_space + total_auto_space
9489 + crtl->args.info.byref_regs * 8;
9491 /* Initial gap between fp and sp is 0. */
9492 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9493 return 0;
9495 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9496 return rounded_frame_size (0);
9498 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9499 return rounded_frame_size (0);
9501 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9502 && (to == HARD_FRAME_POINTER_REGNUM
9503 || to == STACK_POINTER_REGNUM));
9504 if (TARGET_SH5)
9506 int n = total_saved_regs_space;
9507 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9508 save_schedule schedule;
9509 save_entry *entry;
9511 n += total_auto_space;
9513 /* If it wasn't saved, there's not much we can do. */
9514 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9515 return n;
9517 target_flags = copy_flags;
9519 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9520 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9521 if (entry->reg == pr_reg)
9523 target_flags = save_flags;
9524 return entry->offset;
9526 gcc_unreachable ();
9528 else
9529 return total_auto_space;
9532 /* Parse the -mfixed-range= option string. */
9533 void
9534 sh_fix_range (const char *const_str)
9536 int i, first, last;
9537 char *str, *dash, *comma;
9539 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9540 REG2 are either register names or register numbers. The effect
9541 of this option is to mark the registers in the range from REG1 to
9542 REG2 as ``fixed'' so they won't be used by the compiler. */
9544 i = strlen (const_str);
9545 str = (char *) alloca (i + 1);
9546 memcpy (str, const_str, i + 1);
9548 while (1)
9550 dash = strchr (str, '-');
9551 if (!dash)
9553 warning (0, "value of -mfixed-range must have form REG1-REG2");
9554 return;
9556 *dash = '\0';
9557 comma = strchr (dash + 1, ',');
9558 if (comma)
9559 *comma = '\0';
9561 first = decode_reg_name (str);
9562 if (first < 0)
9564 warning (0, "unknown register name: %s", str);
9565 return;
9568 last = decode_reg_name (dash + 1);
9569 if (last < 0)
9571 warning (0, "unknown register name: %s", dash + 1);
9572 return;
9575 *dash = '-';
9577 if (first > last)
9579 warning (0, "%s-%s is an empty range", str, dash + 1);
9580 return;
9583 for (i = first; i <= last; ++i)
9584 fixed_regs[i] = call_used_regs[i] = 1;
9586 if (!comma)
9587 break;
9589 *comma = ',';
9590 str = comma + 1;
9594 /* Insert any deferred function attributes from earlier pragmas. */
9595 static void
9596 sh_insert_attributes (tree node, tree *attributes)
9598 tree attrs;
9600 if (TREE_CODE (node) != FUNCTION_DECL)
9601 return;
9603 /* We are only interested in fields. */
9604 if (!DECL_P (node))
9605 return;
9607 /* Append the attributes to the deferred attributes. */
9608 *sh_deferred_function_attributes_tail = *attributes;
9609 attrs = sh_deferred_function_attributes;
9610 if (!attrs)
9611 return;
9613 /* Some attributes imply or require the interrupt attribute. */
9614 if (!lookup_attribute ("interrupt_handler", attrs)
9615 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9617 /* If we have a trapa_handler, but no interrupt_handler attribute,
9618 insert an interrupt_handler attribute. */
9619 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9620 /* We can't use sh_pr_interrupt here because that's not in the
9621 java frontend. */
9622 attrs
9623 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9624 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9625 if the interrupt attribute is missing, we ignore the attribute
9626 and warn. */
9627 else if (lookup_attribute ("sp_switch", attrs)
9628 || lookup_attribute ("trap_exit", attrs)
9629 || lookup_attribute ("nosave_low_regs", attrs)
9630 || lookup_attribute ("resbank", attrs))
9632 tree *tail;
9634 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9636 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9637 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9638 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9639 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9640 warning (OPT_Wattributes,
9641 "%qE attribute only applies to interrupt functions",
9642 TREE_PURPOSE (attrs));
9643 else
9645 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9646 NULL_TREE);
9647 tail = &TREE_CHAIN (*tail);
9650 attrs = *attributes;
9654 /* Install the processed list. */
9655 *attributes = attrs;
9657 /* Clear deferred attributes. */
9658 sh_deferred_function_attributes = NULL_TREE;
9659 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9661 return;
9664 /*------------------------------------------------------------------------------
9665 Target specific attributes
9666 Supported attributes are:
9668 * interrupt_handler
9669 Specifies this function is an interrupt handler.
9671 * trapa_handler
9672 Like interrupt_handler, but don't save all registers.
9674 * sp_switch
9675 Specifies an alternate stack for an interrupt handler to run on.
9677 * trap_exit
9678 Use a trapa to exit an interrupt function instead of rte.
9680 * nosave_low_regs
9681 Don't save r0..r7 in an interrupt handler function.
9682 This is useful on SH3* and SH4*, which have a separate set of low
9683 regs for user and privileged modes.
9684 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9685 those that run with interrupts disabled and thus can't be
9686 interrupted thenselves).
9688 * renesas
9689 Use Renesas calling/layout conventions (functions and structures).
9691 * resbank
9692 In case of an interrupt handler function, use a register bank to
9693 save registers R0-R14, MACH, MACL, GBR and PR.
9694 This is available only on SH2A targets.
9696 * function_vector
9697 Declares a function to be called using the TBR relative addressing
9698 mode. Takes an argument that specifies the slot number in the table
9699 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9702 /* Handle a 'resbank' attribute. */
9703 static tree
9704 sh_handle_resbank_handler_attribute (tree * node, tree name,
9705 tree args ATTRIBUTE_UNUSED,
9706 int flags ATTRIBUTE_UNUSED,
9707 bool * no_add_attrs)
9709 if (!TARGET_SH2A)
9711 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9712 name);
9713 *no_add_attrs = true;
9715 if (TREE_CODE (*node) != FUNCTION_DECL)
9717 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9718 name);
9719 *no_add_attrs = true;
9722 return NULL_TREE;
9725 /* Handle an "interrupt_handler" attribute; arguments as in
9726 struct attribute_spec.handler. */
9727 static tree
9728 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9729 tree args ATTRIBUTE_UNUSED,
9730 int flags ATTRIBUTE_UNUSED,
9731 bool *no_add_attrs)
9733 if (TREE_CODE (*node) != FUNCTION_DECL)
9735 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9736 name);
9737 *no_add_attrs = true;
9739 else if (TARGET_SHCOMPACT)
9741 error ("attribute interrupt_handler is not compatible with -m5-compact");
9742 *no_add_attrs = true;
9745 return NULL_TREE;
9748 /* Handle an 'function_vector' attribute; arguments as in
9749 struct attribute_spec.handler. */
9750 static tree
9751 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9752 tree args ATTRIBUTE_UNUSED,
9753 int flags ATTRIBUTE_UNUSED,
9754 bool * no_add_attrs)
9756 if (!TARGET_SH2A)
9758 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9759 name);
9760 *no_add_attrs = true;
9762 else if (TREE_CODE (*node) != FUNCTION_DECL)
9764 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9765 name);
9766 *no_add_attrs = true;
9768 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9770 /* The argument must be a constant integer. */
9771 warning (OPT_Wattributes,
9772 "%qE attribute argument not an integer constant",
9773 name);
9774 *no_add_attrs = true;
9776 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9778 /* The argument value must be between 0 to 255. */
9779 warning (OPT_Wattributes,
9780 "%qE attribute argument should be between 0 to 255",
9781 name);
9782 *no_add_attrs = true;
9784 return NULL_TREE;
9787 /* Returns true if current function has been assigned the attribute
9788 'function_vector'. */
9789 bool
9790 sh2a_is_function_vector_call (rtx x)
9792 if (GET_CODE (x) == SYMBOL_REF
9793 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9795 tree tr = SYMBOL_REF_DECL (x);
9797 if (sh2a_function_vector_p (tr))
9798 return true;
9801 return false;
9804 /* Returns the function vector number, if the attribute
9805 'function_vector' is assigned, otherwise returns zero. */
9807 sh2a_get_function_vector_number (rtx x)
9809 int num;
9810 tree list, t;
9812 if ((GET_CODE (x) == SYMBOL_REF)
9813 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9815 t = SYMBOL_REF_DECL (x);
9817 if (TREE_CODE (t) != FUNCTION_DECL)
9818 return 0;
9820 list = SH_ATTRIBUTES (t);
9821 while (list)
9823 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9825 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9826 return num;
9829 list = TREE_CHAIN (list);
9832 return 0;
9834 else
9835 return 0;
9838 /* Handle an "sp_switch" attribute; arguments as in
9839 struct attribute_spec.handler. */
9840 static tree
9841 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9842 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9844 if (TREE_CODE (*node) != FUNCTION_DECL)
9846 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9847 name);
9848 *no_add_attrs = true;
9850 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9852 /* The argument must be a constant string. */
9853 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9854 name);
9855 *no_add_attrs = true;
9858 return NULL_TREE;
9861 /* Handle an "trap_exit" attribute; arguments as in
9862 struct attribute_spec.handler. */
9863 static tree
9864 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9865 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9867 if (TREE_CODE (*node) != FUNCTION_DECL)
9869 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9870 name);
9871 *no_add_attrs = true;
9873 /* The argument specifies a trap number to be used in a trapa instruction
9874 at function exit (instead of an rte instruction). */
9875 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9877 /* The argument must be a constant integer. */
9878 warning (OPT_Wattributes, "%qE attribute argument not an "
9879 "integer constant", name);
9880 *no_add_attrs = true;
9883 return NULL_TREE;
9886 static tree
9887 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9888 tree name ATTRIBUTE_UNUSED,
9889 tree args ATTRIBUTE_UNUSED,
9890 int flags ATTRIBUTE_UNUSED,
9891 bool *no_add_attrs ATTRIBUTE_UNUSED)
9893 return NULL_TREE;
9896 /* True if __attribute__((renesas)) or -mrenesas. */
9897 bool
9898 sh_attr_renesas_p (const_tree td)
9900 if (TARGET_HITACHI)
9901 return true;
9902 if (td == NULL_TREE)
9903 return false;
9904 if (DECL_P (td))
9905 td = TREE_TYPE (td);
9906 if (td == error_mark_node)
9907 return false;
9908 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9909 != NULL_TREE);
9912 /* True if __attribute__((renesas)) or -mrenesas, for the current
9913 function. */
9914 bool
9915 sh_cfun_attr_renesas_p (void)
9917 return sh_attr_renesas_p (current_function_decl);
9920 /* Returns true if the current function has the "interrupt_handler"
9921 attribute set. */
9922 bool
9923 sh_cfun_interrupt_handler_p (void)
9925 return (lookup_attribute ("interrupt_handler",
9926 DECL_ATTRIBUTES (current_function_decl))
9927 != NULL_TREE);
9930 /* Returns true if FUNC has been assigned the attribute
9931 "function_vector". */
9932 bool
9933 sh2a_function_vector_p (tree func)
9935 tree list;
9936 if (TREE_CODE (func) != FUNCTION_DECL)
9937 return false;
9939 list = SH_ATTRIBUTES (func);
9940 while (list)
9942 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9943 return true;
9945 list = TREE_CHAIN (list);
9947 return false;
9950 /* Returns true if given tree has the "resbank" attribute set. */
9951 bool
9952 sh_cfun_resbank_handler_p (void)
9954 return ((lookup_attribute ("resbank",
9955 DECL_ATTRIBUTES (current_function_decl))
9956 != NULL_TREE)
9957 && (lookup_attribute ("interrupt_handler",
9958 DECL_ATTRIBUTES (current_function_decl))
9959 != NULL_TREE) && TARGET_SH2A);
9962 /* Returns true if the current function has a "trap_exit" attribute set. */
9963 bool
9964 sh_cfun_trap_exit_p (void)
9966 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9967 != NULL_TREE;
9970 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9971 static const char *
9972 sh_check_pch_target_flags (int old_flags)
9974 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9975 | MASK_SH_E | MASK_HARD_SH4
9976 | MASK_FPU_SINGLE | MASK_SH4))
9977 return _("created and used with different architectures / ABIs");
9978 if ((old_flags ^ target_flags) & MASK_HITACHI)
9979 return _("created and used with different ABIs");
9980 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9981 return _("created and used with different endianness");
9982 return NULL;
9985 /* Predicates used by the templates. */
9987 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9988 Used only in general_movsrc_operand. */
9989 bool
9990 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9992 switch (REGNO (op))
9994 case PR_REG:
9995 case MACL_REG:
9996 case MACH_REG:
9997 return true;
9999 return false;
10002 /* Returns true if OP is a floating point value with value 0.0. */
10003 bool
10004 fp_zero_operand (rtx op)
10006 REAL_VALUE_TYPE r;
10008 if (GET_MODE (op) != SFmode)
10009 return false;
10011 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10012 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
10015 /* Returns true if OP is a floating point value with value 1.0. */
10016 bool
10017 fp_one_operand (rtx op)
10019 REAL_VALUE_TYPE r;
10021 if (GET_MODE (op) != SFmode)
10022 return false;
10024 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10025 return REAL_VALUES_EQUAL (r, dconst1);
10028 /* Return the TLS type for TLS symbols. */
10029 enum tls_model
10030 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
10032 if (GET_CODE (op) != SYMBOL_REF)
10033 return TLS_MODEL_NONE;
10034 return SYMBOL_REF_TLS_MODEL (op);
10037 /* Return the destination address of a branch. */
10038 static int
10039 branch_dest (rtx branch)
10041 rtx dest = SET_SRC (PATTERN (branch));
10042 int dest_uid;
10044 if (GET_CODE (dest) == IF_THEN_ELSE)
10045 dest = XEXP (dest, 1);
10046 dest = XEXP (dest, 0);
10047 dest_uid = INSN_UID (dest);
10048 return INSN_ADDRESSES (dest_uid);
10051 /* Return nonzero if REG is not used after INSN.
10052 We assume REG is a reload reg, and therefore does
10053 not live past labels. It may live past calls or jumps though. */
10054 bool
10055 reg_unused_after (rtx reg, rtx_insn *insn)
10057 enum rtx_code code;
10058 rtx set;
10060 /* If the reg is set by this instruction, then it is safe for our
10061 case. Disregard the case where this is a store to memory, since
10062 we are checking a register used in the store address. */
10063 set = single_set (insn);
10064 if (set && !MEM_P (SET_DEST (set))
10065 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10066 return true;
10068 while ((insn = NEXT_INSN (insn)))
10070 rtx set;
10071 if (!INSN_P (insn))
10072 continue;
10074 code = GET_CODE (insn);
10076 #if 0
10077 /* If this is a label that existed before reload, then the register
10078 is dead here. However, if this is a label added by reorg, then
10079 the register may still be live here. We can't tell the difference,
10080 so we just ignore labels completely. */
10081 if (code == CODE_LABEL)
10082 return 1;
10083 /* else */
10084 #endif
10086 if (code == JUMP_INSN)
10087 return false;
10089 /* If this is a sequence, we must handle them all at once.
10090 We could have for instance a call that sets the target register,
10091 and an insn in a delay slot that uses the register. In this case,
10092 we must return 0. */
10093 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
10095 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
10096 int i;
10097 int retval = 0;
10099 for (i = 0; i < seq->len (); i++)
10101 rtx_insn *this_insn = seq->insn (i);
10102 rtx set = single_set (this_insn);
10104 if (CALL_P (this_insn))
10105 code = CALL_INSN;
10106 else if (JUMP_P (this_insn))
10108 if (INSN_ANNULLED_BRANCH_P (this_insn))
10109 return false;
10110 code = JUMP_INSN;
10113 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10114 return false;
10115 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10117 if (!MEM_P (SET_DEST (set)))
10118 retval = true;
10119 else
10120 return false;
10122 if (set == NULL_RTX
10123 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
10124 return false;
10126 if (retval == 1)
10127 return true;
10128 else if (code == JUMP_INSN)
10129 return false;
10132 set = single_set (insn);
10133 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10134 return false;
10135 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10136 return !MEM_P (SET_DEST (set));
10137 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
10138 return false;
10140 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10141 return true;
10143 return true;
10147 static GTY(()) rtx t_reg_rtx;
10149 get_t_reg_rtx (void)
10151 if (! t_reg_rtx)
10152 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10153 return t_reg_rtx;
10156 static GTY(()) tree fpscr_values;
10158 static void
10159 emit_fpu_switch (rtx scratch, int index)
10161 rtx src;
10163 if (fpscr_values == NULL)
10165 tree t;
10167 t = build_index_type (integer_one_node);
10168 t = build_array_type (integer_type_node, t);
10169 t = build_decl (BUILTINS_LOCATION,
10170 VAR_DECL, get_identifier ("__fpscr_values"), t);
10171 DECL_ARTIFICIAL (t) = 1;
10172 DECL_IGNORED_P (t) = 1;
10173 DECL_EXTERNAL (t) = 1;
10174 TREE_STATIC (t) = 1;
10175 TREE_PUBLIC (t) = 1;
10176 TREE_USED (t) = 1;
10178 fpscr_values = t;
10181 src = DECL_RTL (fpscr_values);
10182 if (!can_create_pseudo_p ())
10184 emit_move_insn (scratch, XEXP (src, 0));
10185 if (index != 0)
10186 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10187 src = adjust_automodify_address (src, SImode, scratch, index * 4);
10189 else
10190 src = adjust_address (src, SImode, index * 4);
10192 emit_insn (gen_lds_fpscr (src));
10195 static rtx get_free_reg (HARD_REG_SET);
10197 /* This function returns a register to use to load the address to load
10198 the fpscr from. Currently it always returns r1 or r7, but when we are
10199 able to use pseudo registers after combine, or have a better mechanism
10200 for choosing a register, it should be done here. */
10201 /* REGS_LIVE is the liveness information for the point for which we
10202 need this allocation. In some bare-bones exit blocks, r1 is live at the
10203 start. We can even have all of r0..r3 being live:
10204 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10205 INSN before which new insns are placed with will clobber the register
10206 we return. If a basic block consists only of setting the return value
10207 register to a pseudo and using that register, the return value is not
10208 live before or after this block, yet we we'll insert our insns right in
10209 the middle. */
10210 static rtx
10211 get_free_reg (HARD_REG_SET regs_live)
10213 if (! TEST_HARD_REG_BIT (regs_live, 1))
10214 return gen_rtx_REG (Pmode, 1);
10216 /* Hard reg 1 is live; since this is a small register classes target,
10217 there shouldn't be anything but a jump before the function end. */
10218 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10219 return gen_rtx_REG (Pmode, 7);
10222 /* This function will set the fpscr from memory.
10223 MODE is the mode we are setting it to. */
10224 void
10225 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10227 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10228 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10229 rtx addr_reg;
10231 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10232 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10235 /* Is the given character a logical line separator for the assembler? */
10236 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10237 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10238 #endif
10240 static bool
10241 sequence_insn_p (rtx_insn *insn)
10243 rtx_insn *prev, *next;
10245 prev = PREV_INSN (insn);
10246 if (prev == NULL)
10247 return false;
10249 next = NEXT_INSN (prev);
10250 if (next == NULL)
10251 return false;
10253 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10257 sh_insn_length_adjustment (rtx_insn *insn)
10259 /* Instructions with unfilled delay slots take up an extra two bytes for
10260 the nop in the delay slot. */
10261 if (((NONJUMP_INSN_P (insn)
10262 && GET_CODE (PATTERN (insn)) != USE
10263 && GET_CODE (PATTERN (insn)) != CLOBBER)
10264 || CALL_P (insn) || JUMP_P (insn))
10265 && ! sequence_insn_p (insn)
10266 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10267 return 2;
10269 /* Increase the insn length of a cbranch without a delay slot insn to
10270 force a delay slot which will be stuffed with a nop. */
10271 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
10272 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
10273 && ! sequence_insn_p (insn))
10274 return 2;
10276 /* sh-dsp parallel processing insn take four bytes instead of two. */
10278 if (NONJUMP_INSN_P (insn))
10280 int sum = 0;
10281 rtx body = PATTERN (insn);
10282 const char *templ;
10283 char c;
10284 bool maybe_label = true;
10286 if (GET_CODE (body) == ASM_INPUT)
10287 templ = XSTR (body, 0);
10288 else if (asm_noperands (body) >= 0)
10289 templ
10290 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10291 else
10292 return 0;
10295 int ppi_adjust = 0;
10298 c = *templ++;
10299 while (c == ' ' || c == '\t');
10300 /* all sh-dsp parallel-processing insns start with p.
10301 The only non-ppi sh insn starting with p is pref.
10302 The only ppi starting with pr is prnd. */
10303 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10304 ppi_adjust = 2;
10305 /* The repeat pseudo-insn expands two three insns, a total of
10306 six bytes in size. */
10307 else if ((c == 'r' || c == 'R')
10308 && ! strncasecmp ("epeat", templ, 5))
10309 ppi_adjust = 4;
10310 while (c && c != '\n'
10311 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10313 /* If this is a label, it is obviously not a ppi insn. */
10314 if (c == ':' && maybe_label)
10316 ppi_adjust = 0;
10317 break;
10319 else if (c == '\'' || c == '"')
10320 maybe_label = false;
10321 c = *templ++;
10323 sum += ppi_adjust;
10324 maybe_label = c != ':';
10326 while (c);
10327 return sum;
10329 return 0;
10332 /* Return TRUE for a valid displacement for the REG+disp addressing
10333 with MODE. */
10334 bool
10335 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
10336 bool allow_zero)
10338 if (! CONST_INT_P (op))
10339 return false;
10341 if (TARGET_SHMEDIA)
10343 int size;
10345 /* Check if this is the address of an unaligned load / store. */
10346 if (mode == VOIDmode)
10347 return satisfies_constraint_I06 (op);
10349 size = GET_MODE_SIZE (mode);
10350 return (!(INTVAL (op) & (size - 1))
10351 && INTVAL (op) >= -512 * size
10352 && INTVAL (op) < 512 * size);
10354 else
10356 const HOST_WIDE_INT offset = INTVAL (op);
10357 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10358 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10360 /* If the mode does not support any displacement always return false.
10361 Even though an index of '0' is actually always valid, it will cause
10362 troubles when e.g. a DFmode move is split into two SFmode moves,
10363 where one SFmode move will have index '0' and the other move will
10364 have index '4'. */
10365 if (!allow_zero && max_disp < 1)
10366 return false;
10368 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10372 /* Recognize an RTL expression that is a valid memory address for
10373 an instruction.
10374 The MODE argument is the machine mode for the MEM expression
10375 that wants to use this address.
10376 Allow REG
10377 REG+disp
10378 REG+r0
10379 REG++
10380 --REG
10382 GBR+disp */
10383 static bool
10384 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10386 if (! ALLOW_INDEXED_ADDRESS
10387 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10388 return false;
10390 if (REG_P (x) && REGNO (x) == GBR_REG)
10391 return true;
10393 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10394 return true;
10395 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10396 && ! TARGET_SHMEDIA
10397 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10398 return true;
10399 else if (GET_CODE (x) == PLUS)
10401 rtx xop0 = XEXP (x, 0);
10402 rtx xop1 = XEXP (x, 1);
10404 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10405 return gbr_displacement (xop1, mode);
10407 if (GET_MODE_SIZE (mode) <= 8
10408 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10409 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10410 return true;
10412 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10413 || ((xop0 == stack_pointer_rtx
10414 || xop0 == hard_frame_pointer_rtx)
10415 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10416 || ((xop1 == stack_pointer_rtx
10417 || xop1 == hard_frame_pointer_rtx)
10418 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10419 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10420 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10421 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10422 && TARGET_FMOVD && mode == DFmode)))
10424 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10425 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10426 return true;
10427 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10428 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10429 return true;
10433 return false;
10436 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10437 isn't protected by a PIC unspec. */
10438 bool
10439 nonpic_symbol_mentioned_p (rtx x)
10441 const char *fmt;
10442 int i;
10444 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10445 || GET_CODE (x) == PC)
10446 return true;
10448 /* We don't want to look into the possible MEM location of a
10449 CONST_DOUBLE, since we're not going to use it, in general. */
10450 if (GET_CODE (x) == CONST_DOUBLE)
10451 return false;
10453 if (GET_CODE (x) == UNSPEC
10454 && (XINT (x, 1) == UNSPEC_PIC
10455 || XINT (x, 1) == UNSPEC_GOT
10456 || XINT (x, 1) == UNSPEC_GOTOFF
10457 || XINT (x, 1) == UNSPEC_GOTPLT
10458 || XINT (x, 1) == UNSPEC_GOTTPOFF
10459 || XINT (x, 1) == UNSPEC_DTPOFF
10460 || XINT (x, 1) == UNSPEC_TPOFF
10461 || XINT (x, 1) == UNSPEC_PLT
10462 || XINT (x, 1) == UNSPEC_SYMOFF
10463 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10464 return false;
10466 fmt = GET_RTX_FORMAT (GET_CODE (x));
10467 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10469 if (fmt[i] == 'E')
10471 int j;
10472 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10473 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10474 return true;
10476 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10477 return true;
10480 return false;
10483 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10484 @GOTOFF in `reg'. */
10486 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
10487 rtx reg)
10489 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10490 return orig;
10492 if (GET_CODE (orig) == LABEL_REF
10493 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10495 if (reg == NULL_RTX)
10496 reg = gen_reg_rtx (Pmode);
10498 emit_insn (gen_symGOTOFF2reg (reg, orig));
10499 return reg;
10501 else if (GET_CODE (orig) == SYMBOL_REF)
10503 if (reg == NULL_RTX)
10504 reg = gen_reg_rtx (Pmode);
10506 emit_insn (gen_symGOT2reg (reg, orig));
10507 return reg;
10509 return orig;
10512 /* Given a (logical) mode size and an offset in bytes, try to find a the
10513 appropriate displacement value for a mov insn. On SH the displacements
10514 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10515 15 bytes in QImode. To compensate this we create a new base address by
10516 adding an adjustment value to it.
10518 If the originally requested offset is greater than 127 we prefer using
10519 values 124..127 over 128..131 to increase opportunities to use the
10520 add #imm, Rn insn.
10522 In some cases it is possible that a requested offset might seem unaligned
10523 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10524 This is compensated by adjusting the base address so that the effective
10525 address of the displacement move insn will be aligned.
10527 This is not the best possible way of rebasing the base address, as it
10528 does not look at other present displacement addressings around it.
10529 In some cases this can create more base address adjustments than would
10530 actually be necessary. */
10531 struct disp_adjust
10533 rtx offset_adjust;
10534 rtx mov_disp;
10537 static struct disp_adjust
10538 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
10540 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10542 /* Do not try to use SH2A's large displacements here, because this would
10543 effectively disable the small displacement insns. */
10544 const int mode_sz = GET_MODE_SIZE (mode);
10545 const int mov_insn_sz = mov_insn_size (mode, false);
10546 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10547 const int max_disp_next = max_disp + mov_insn_sz;
10548 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10549 HOST_WIDE_INT offset_adjust;
10551 /* In some cases this actually does happen and we must check for it. */
10552 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10553 return res;
10555 /* Keeps the previous behavior for QImode displacement addressing.
10556 This just decides how the offset is re-based. Removing this special
10557 case will result in slightly bigger code on average, but it's not that
10558 bad actually. */
10559 if (mov_insn_sz == 1)
10560 align_modifier = 0;
10562 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10564 if (mode_sz + offset - offset_adjust <= max_disp_next)
10566 res.offset_adjust = GEN_INT (offset_adjust);
10567 res.mov_disp = GEN_INT (offset - offset_adjust);
10570 return res;
10573 /* Try to modify an illegitimate address and make it legitimate.
10574 If we find one, return the new, valid address.
10575 Otherwise, return the original address. */
10576 static rtx
10577 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
10579 if (flag_pic)
10580 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10582 if (TARGET_SHMEDIA)
10583 return x;
10585 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10586 || (TARGET_SH2E && mode == SFmode))
10587 return x;
10589 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10590 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10592 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10593 INTVAL (XEXP (x, 1)));
10595 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10597 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10598 adj.offset_adjust, NULL_RTX, 0,
10599 OPTAB_LIB_WIDEN);
10600 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10603 return x;
10606 /* Attempt to replace *p, which is an address that needs reloading, with
10607 a valid memory address for an operand of mode MODE.
10608 Like for sh_legitimize_address, for the SH we try to get a normal form
10609 of the address. That will allow inheritance of the address reloads. */
10610 bool
10611 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10612 int itype)
10614 enum reload_type type = (enum reload_type) itype;
10615 const int mode_sz = GET_MODE_SIZE (mode);
10617 if (sh_lra_p ())
10618 return false;
10620 if (! ALLOW_INDEXED_ADDRESS
10621 && GET_CODE (*p) == PLUS
10622 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10624 *p = copy_rtx (*p);
10625 push_reload (*p, NULL_RTX, p, NULL,
10626 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10627 return true;
10630 if (! ALLOW_INDEXED_ADDRESS
10631 && GET_CODE (*p) == PLUS
10632 && GET_CODE (XEXP (*p, 0)) == PLUS)
10634 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10635 XEXP (XEXP (*p, 0), 1));
10636 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10637 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10638 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10639 return true;
10642 if (TARGET_SHMEDIA)
10643 return false;
10645 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10646 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10647 && (ALLOW_INDEXED_ADDRESS
10648 || XEXP (*p, 0) == stack_pointer_rtx
10649 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10651 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10652 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10654 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10656 push_reload (*p, NULL_RTX, p, NULL,
10657 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10658 return true;
10661 if (TARGET_SH2E && mode == SFmode)
10663 *p = copy_rtx (*p);
10664 push_reload (*p, NULL_RTX, p, NULL,
10665 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10666 return true;
10669 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10670 moves because then reload has a problem figuring the constraint
10671 that the move insn target/source reg must be R0.
10672 Or maybe some handling is wrong in sh_secondary_reload for this
10673 to work properly? */
10674 if ((mode_sz == 4 || mode_sz == 8)
10675 && ! (TARGET_SH4 && mode == DFmode)
10676 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10678 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10679 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10680 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10681 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10682 return true;
10686 /* We must re-recognize what we created before. */
10687 if (GET_CODE (*p) == PLUS
10688 && (mode_sz == 4 || mode_sz == 8)
10689 && GET_CODE (XEXP (*p, 0)) == PLUS
10690 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10691 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10692 && CONST_INT_P (XEXP (*p, 1))
10693 && ! (TARGET_SH2E && mode == SFmode))
10695 /* Because this address is so complex, we know it must have
10696 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10697 it is already unshared, and needs no further unsharing. */
10698 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10699 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10700 return true;
10703 return false;
10706 /* In the name of slightly smaller debug output, and to cater to
10707 general assembler lossage, recognize various UNSPEC sequences
10708 and turn them back into a direct symbol reference. */
10709 static rtx
10710 sh_delegitimize_address (rtx orig_x)
10712 rtx x, y;
10714 orig_x = delegitimize_mem_from_attrs (orig_x);
10716 x = orig_x;
10717 if (MEM_P (x))
10718 x = XEXP (x, 0);
10719 if (GET_CODE (x) == CONST)
10721 y = XEXP (x, 0);
10722 if (GET_CODE (y) == UNSPEC)
10724 if (XINT (y, 1) == UNSPEC_GOT
10725 || XINT (y, 1) == UNSPEC_GOTOFF
10726 || XINT (y, 1) == UNSPEC_SYMOFF)
10727 return XVECEXP (y, 0, 0);
10728 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10730 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10732 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10734 if (GET_CODE (symplt) == UNSPEC
10735 && XINT (symplt, 1) == UNSPEC_PLT)
10736 return XVECEXP (symplt, 0, 0);
10739 else if (TARGET_SHMEDIA
10740 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10741 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10743 rtx offset = XVECEXP (y, 0, 1);
10745 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10746 if (MEM_P (orig_x))
10747 x = replace_equiv_address_nv (orig_x, x);
10748 return x;
10753 return orig_x;
10756 /* Mark the use of a constant in the literal table. If the constant
10757 has multiple labels, make it unique. */
10758 static rtx
10759 mark_constant_pool_use (rtx x)
10761 rtx_insn *insn, *lab;
10762 rtx pattern;
10764 if (x == NULL_RTX)
10765 return x;
10767 switch (GET_CODE (x))
10769 case LABEL_REF:
10770 x = XEXP (x, 0);
10771 case CODE_LABEL:
10772 break;
10773 default:
10774 return x;
10777 /* Get the first label in the list of labels for the same constant
10778 and delete another labels in the list. */
10779 lab = as_a <rtx_insn *> (x);
10780 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10782 if (!LABEL_P (insn)
10783 || LABEL_REFS (insn) != NEXT_INSN (insn))
10784 break;
10785 lab = insn;
10788 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10789 as_a<rtx_insn *> (insn)->set_deleted ();
10791 /* Mark constants in a window. */
10792 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10794 if (!NONJUMP_INSN_P (insn))
10795 continue;
10797 pattern = PATTERN (insn);
10798 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10799 continue;
10801 switch (XINT (pattern, 1))
10803 case UNSPECV_CONST2:
10804 case UNSPECV_CONST4:
10805 case UNSPECV_CONST8:
10806 XVECEXP (pattern, 0, 1) = const1_rtx;
10807 break;
10808 case UNSPECV_WINDOW_END:
10809 if (XVECEXP (pattern, 0, 0) == x)
10810 return lab;
10811 break;
10812 case UNSPECV_CONST_END:
10813 return lab;
10814 default:
10815 break;
10819 return lab;
10822 /* Return true if it's possible to redirect BRANCH1 to the destination
10823 of an unconditional jump BRANCH2. We only want to do this if the
10824 resulting branch will have a short displacement. */
10825 static bool
10826 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
10828 /* Don't follow if BRANCH2 is possible to be a jump crossing between
10829 hot and cold partitions. */
10830 if (TARGET_SH1
10831 && flag_reorder_blocks_and_partition
10832 && simplejump_p (branch2)
10833 && CROSSING_JUMP_P (branch2))
10834 return false;
10836 if (flag_expensive_optimizations && simplejump_p (branch2))
10838 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10839 rtx_insn *insn;
10840 int distance;
10842 for (distance = 0, insn = NEXT_INSN (branch1);
10843 insn && distance < 256;
10844 insn = PREV_INSN (insn))
10846 if (insn == dest)
10847 return true;
10848 else
10849 distance += get_attr_length (insn);
10851 for (distance = 0, insn = NEXT_INSN (branch1);
10852 insn && distance < 256;
10853 insn = NEXT_INSN (insn))
10855 if (insn == dest)
10856 return true;
10857 else
10858 distance += get_attr_length (insn);
10861 return false;
10864 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10865 bool
10866 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10867 unsigned int new_reg)
10869 /* Interrupt functions can only use registers that have already been
10870 saved by the prologue, even if they would normally be
10871 call-clobbered. */
10872 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10873 return false;
10875 return true;
10878 /* Function to update the integer COST
10879 based on the relationship between INSN that is dependent on
10880 DEP_INSN through the dependence LINK. The default is to make no
10881 adjustment to COST. This can be used for example to specify to
10882 the scheduler that an output- or anti-dependence does not incur
10883 the same cost as a data-dependence. The return value should be
10884 the new value for COST. */
10885 static int
10886 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10887 rtx_insn *dep_insn, int cost)
10889 rtx reg, use_pat;
10891 if (TARGET_SHMEDIA)
10893 /* On SHmedia, if the dependence is an anti-dependence or
10894 output-dependence, there is no cost. */
10895 if (REG_NOTE_KIND (link) != 0)
10897 /* However, dependencies between target register loads and
10898 uses of the register in a subsequent block that are separated
10899 by a conditional branch are not modelled - we have to do with
10900 the anti-dependency between the target register load and the
10901 conditional branch that ends the current block. */
10902 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10903 && GET_CODE (PATTERN (dep_insn)) == SET
10904 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10905 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10906 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10908 int orig_cost = cost;
10909 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10910 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10911 ? insn : JUMP_LABEL (insn));
10912 /* On the likely path, the branch costs 1, on the unlikely path,
10913 it costs 3. */
10914 cost--;
10916 target = next_active_insn (target);
10917 while (target && ! flow_dependent_p (target, dep_insn)
10918 && --cost > 0);
10919 /* If two branches are executed in immediate succession, with the
10920 first branch properly predicted, this causes a stall at the
10921 second branch, hence we won't need the target for the
10922 second branch for two cycles after the launch of the first
10923 branch. */
10924 if (cost > orig_cost - 2)
10925 cost = orig_cost - 2;
10927 else
10928 cost = 0;
10931 else if (get_attr_is_mac_media (insn)
10932 && get_attr_is_mac_media (dep_insn))
10933 cost = 1;
10935 else if (! reload_completed
10936 && GET_CODE (PATTERN (insn)) == SET
10937 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10938 && GET_CODE (PATTERN (dep_insn)) == SET
10939 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10940 && cost < 4)
10941 cost = 4;
10942 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10943 that is needed at the target. */
10944 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10945 && ! flow_dependent_p (insn, dep_insn))
10946 cost--;
10948 else if (REG_NOTE_KIND (link) == 0)
10950 enum attr_type type;
10951 rtx dep_set;
10953 if (recog_memoized (insn) < 0
10954 || recog_memoized (dep_insn) < 0)
10955 return cost;
10957 dep_set = single_set (dep_insn);
10959 /* The latency that we specify in the scheduling description refers
10960 to the actual output, not to an auto-increment register; for that,
10961 the latency is one. */
10962 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10964 rtx set = single_set (insn);
10966 if (set
10967 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10968 && (!MEM_P (SET_DEST (set))
10969 || !reg_mentioned_p (SET_DEST (dep_set),
10970 XEXP (SET_DEST (set), 0))))
10971 cost = 1;
10973 /* The only input for a call that is timing-critical is the
10974 function's address. */
10975 if (CALL_P (insn))
10977 rtx call = get_call_rtx_from (insn);
10978 if (call
10979 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10980 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10981 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10982 cost -= TARGET_SH4_300 ? 3 : 6;
10984 /* Likewise, the most timing critical input for an sfuncs call
10985 is the function address. However, sfuncs typically start
10986 using their arguments pretty quickly.
10987 Assume a four cycle delay for SH4 before they are needed.
10988 Cached ST40-300 calls are quicker, so assume only a one
10989 cycle delay there.
10990 ??? Maybe we should encode the delays till input registers
10991 are needed by sfuncs into the sfunc call insn. */
10992 /* All sfunc calls are parallels with at least four components.
10993 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10994 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10995 && XVECLEN (PATTERN (insn), 0) >= 4
10996 && (reg = sfunc_uses_reg (insn)))
10998 if (! reg_set_p (reg, dep_insn))
10999 cost -= TARGET_SH4_300 ? 1 : 4;
11001 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
11003 enum attr_type dep_type = get_attr_type (dep_insn);
11005 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
11006 cost--;
11007 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
11008 && (type = get_attr_type (insn)) != TYPE_CALL
11009 && type != TYPE_SFUNC)
11010 cost--;
11011 /* When the preceding instruction loads the shift amount of
11012 the following SHAD/SHLD, the latency of the load is increased
11013 by 1 cycle. */
11014 if (get_attr_type (insn) == TYPE_DYN_SHIFT
11015 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
11016 && reg_overlap_mentioned_p (SET_DEST (dep_set),
11017 XEXP (SET_SRC (single_set (insn)),
11018 1)))
11019 cost++;
11020 /* When an LS group instruction with a latency of less than
11021 3 cycles is followed by a double-precision floating-point
11022 instruction, FIPR, or FTRV, the latency of the first
11023 instruction is increased to 3 cycles. */
11024 else if (cost < 3
11025 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
11026 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
11027 cost = 3;
11028 /* The lsw register of a double-precision computation is ready one
11029 cycle earlier. */
11030 else if (reload_completed
11031 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
11032 && (use_pat = single_set (insn))
11033 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
11034 SET_SRC (use_pat)))
11035 cost -= 1;
11037 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
11038 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
11039 cost -= 1;
11041 else if (TARGET_SH4_300)
11043 /* Stores need their input register two cycles later. */
11044 if (dep_set && cost >= 1
11045 && ((type = get_attr_type (insn)) == TYPE_STORE
11046 || type == TYPE_PSTORE
11047 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
11049 rtx set = single_set (insn);
11051 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
11052 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
11054 cost -= 2;
11055 /* But don't reduce the cost below 1 if the address depends
11056 on a side effect of dep_insn. */
11057 if (cost < 1
11058 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
11059 cost = 1;
11064 /* An anti-dependence penalty of two applies if the first insn is a double
11065 precision fadd / fsub / fmul. */
11066 else if (!TARGET_SH4_300
11067 && REG_NOTE_KIND (link) == REG_DEP_ANTI
11068 && recog_memoized (dep_insn) >= 0
11069 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
11070 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
11071 /* A lot of alleged anti-flow dependences are fake,
11072 so check this one is real. */
11073 && flow_dependent_p (dep_insn, insn))
11074 cost = 2;
11076 return cost;
11079 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
11080 if DEP_INSN is anti-flow dependent on INSN. */
11081 static bool
11082 flow_dependent_p (rtx insn, rtx dep_insn)
11084 rtx tmp = PATTERN (insn);
11086 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
11087 return tmp == NULL_RTX;
11090 /* A helper function for flow_dependent_p called through note_stores. */
11091 static void
11092 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
11094 rtx * pinsn = (rtx *) data;
11096 if (*pinsn && reg_referenced_p (x, *pinsn))
11097 *pinsn = NULL_RTX;
11100 /* For use by sh_allocate_initial_value. Note that sh.md contains some
11101 'special function' patterns (type sfunc) that clobber pr, but that
11102 do not look like function calls to leaf_function_p. Hence we must
11103 do this extra check. */
11104 static int
11105 sh_pr_n_sets (void)
11107 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11110 /* Return where to allocate pseudo for a given hard register initial
11111 value. */
11112 static rtx
11113 sh_allocate_initial_value (rtx hard_reg)
11115 rtx x;
11117 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11119 if (crtl->is_leaf
11120 && ! sh_pr_n_sets ()
11121 && ! (TARGET_SHCOMPACT
11122 && ((crtl->args.info.call_cookie
11123 & ~ CALL_COOKIE_RET_TRAMP (1))
11124 || crtl->saves_all_registers)))
11125 x = hard_reg;
11126 else
11127 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11129 else
11130 x = NULL_RTX;
11132 return x;
11135 /* This function returns "2" to indicate dual issue for the SH4
11136 processor. To be used by the DFA pipeline description. */
11137 static int
11138 sh_issue_rate (void)
11140 if (TARGET_SUPERSCALAR)
11141 return 2;
11142 else
11143 return 1;
11146 /* Functions for ready queue reordering for sched1. */
11148 /* Get weight for mode for a set x. */
11149 static short
11150 find_set_regmode_weight (rtx x, machine_mode mode)
11152 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11153 return 1;
11154 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11156 if (REG_P (SET_DEST (x)))
11158 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11159 return 1;
11160 else
11161 return 0;
11163 return 1;
11165 return 0;
11168 /* Get regmode weight for insn. */
11169 static short
11170 find_insn_regmode_weight (rtx insn, machine_mode mode)
11172 short reg_weight = 0;
11173 rtx x;
11175 /* Increment weight for each register born here. */
11176 x = PATTERN (insn);
11177 reg_weight += find_set_regmode_weight (x, mode);
11178 if (GET_CODE (x) == PARALLEL)
11180 int j;
11181 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11183 x = XVECEXP (PATTERN (insn), 0, j);
11184 reg_weight += find_set_regmode_weight (x, mode);
11187 /* Decrement weight for each register that dies here. */
11188 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11190 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11192 rtx note = XEXP (x, 0);
11193 if (REG_P (note) && GET_MODE (note) == mode)
11194 reg_weight--;
11197 return reg_weight;
11200 /* Calculate regmode weights for all insns of a basic block. */
11201 static void
11202 find_regmode_weight (basic_block b, machine_mode mode)
11204 rtx_insn *insn, *next_tail, *head, *tail;
11206 get_ebb_head_tail (b, b, &head, &tail);
11207 next_tail = NEXT_INSN (tail);
11209 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11211 /* Handle register life information. */
11212 if (!INSN_P (insn))
11213 continue;
11215 if (mode == SFmode)
11216 INSN_REGMODE_WEIGHT (insn, mode) =
11217 find_insn_regmode_weight (insn, mode)
11218 + 2 * find_insn_regmode_weight (insn, DFmode);
11219 else if (mode == SImode)
11220 INSN_REGMODE_WEIGHT (insn, mode) =
11221 find_insn_regmode_weight (insn, mode)
11222 + 2 * find_insn_regmode_weight (insn, DImode);
11226 /* Comparison function for ready queue sorting. */
11227 static int
11228 rank_for_reorder (const void *x, const void *y)
11230 rtx_insn *tmp = *(rtx_insn * const *) y;
11231 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11233 /* The insn in a schedule group should be issued the first. */
11234 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11235 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11237 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11238 minimizes instruction movement, thus minimizing sched's effect on
11239 register pressure. */
11240 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11243 /* Resort the array A in which only element at index N may be out of order. */
11244 static void
11245 swap_reorder (rtx_insn **a, int n)
11247 rtx_insn *insn = a[n - 1];
11248 int i = n - 2;
11250 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11252 a[i + 1] = a[i];
11253 i -= 1;
11255 a[i + 1] = insn;
11258 /* Sort the ready list by ascending priority. */
11259 static void
11260 ready_reorder (rtx_insn **ready, int nready)
11262 if (nready == 2)
11263 swap_reorder (ready, nready);
11264 else if (nready > 2)
11265 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11268 /* Count life regions of r0 for a block. */
11269 static int
11270 find_r0_life_regions (basic_block b)
11272 rtx_insn *end, *insn;
11273 rtx pset;
11274 rtx r0_reg;
11275 int live;
11276 int set;
11277 int death = 0;
11279 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11281 set = 1;
11282 live = 1;
11284 else
11286 set = 0;
11287 live = 0;
11290 insn = BB_HEAD (b);
11291 end = BB_END (b);
11292 r0_reg = gen_rtx_REG (SImode, R0_REG);
11293 while (1)
11295 if (INSN_P (insn))
11297 if (find_regno_note (insn, REG_DEAD, R0_REG))
11299 death++;
11300 live = 0;
11302 if (!live
11303 && (pset = single_set (insn))
11304 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11305 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11307 set++;
11308 live = 1;
11311 if (insn == end)
11312 break;
11313 insn = NEXT_INSN (insn);
11315 return set - death;
11318 /* Calculate regmode weights for all insns of all basic block. */
11319 static void
11320 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11321 int verbose ATTRIBUTE_UNUSED,
11322 int old_max_uid)
11324 basic_block b;
11326 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11327 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11328 r0_life_regions = 0;
11330 FOR_EACH_BB_REVERSE_FN (b, cfun)
11332 find_regmode_weight (b, SImode);
11333 find_regmode_weight (b, SFmode);
11334 if (!reload_completed)
11335 r0_life_regions += find_r0_life_regions (b);
11338 CURR_REGMODE_PRESSURE (SImode) = 0;
11339 CURR_REGMODE_PRESSURE (SFmode) = 0;
11342 /* Cleanup. */
11343 static void
11344 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11345 int verbose ATTRIBUTE_UNUSED)
11347 if (regmode_weight[0])
11349 free (regmode_weight[0]);
11350 regmode_weight[0] = NULL;
11352 if (regmode_weight[1])
11354 free (regmode_weight[1]);
11355 regmode_weight[1] = NULL;
11359 /* The scalar modes supported differs from the default version in TImode
11360 for 32-bit SHMEDIA. */
11361 static bool
11362 sh_scalar_mode_supported_p (machine_mode mode)
11364 if (TARGET_SHMEDIA32 && mode == TImode)
11365 return false;
11367 return default_scalar_mode_supported_p (mode);
11370 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11371 keep count of register pressures on SImode and SFmode. */
11372 static int
11373 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11374 int sched_verbose ATTRIBUTE_UNUSED,
11375 rtx_insn *insn,
11376 int can_issue_more)
11378 if (GET_CODE (PATTERN (insn)) != USE
11379 && GET_CODE (PATTERN (insn)) != CLOBBER)
11380 cached_can_issue_more = can_issue_more - 1;
11381 else
11382 cached_can_issue_more = can_issue_more;
11384 if (reload_completed)
11385 return cached_can_issue_more;
11387 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11388 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11390 return cached_can_issue_more;
11393 static void
11394 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11395 int verbose ATTRIBUTE_UNUSED,
11396 int veclen ATTRIBUTE_UNUSED)
11398 CURR_REGMODE_PRESSURE (SImode) = 0;
11399 CURR_REGMODE_PRESSURE (SFmode) = 0;
11402 /* Some magic numbers. */
11403 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11404 functions that already have high pressure on r0. */
11405 #define R0_MAX_LIFE_REGIONS 2
11406 /* Register Pressure thresholds for SImode and SFmode registers. */
11407 #define SIMODE_MAX_WEIGHT 5
11408 #define SFMODE_MAX_WEIGHT 10
11410 /* Return true if the pressure is high for MODE. */
11411 static bool
11412 high_pressure (machine_mode mode)
11414 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11415 functions that already have high pressure on r0. */
11416 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11417 return true;
11419 if (mode == SFmode)
11420 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11421 else
11422 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11425 /* Reorder ready queue if register pressure is high. */
11426 static int
11427 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11428 int sched_verbose ATTRIBUTE_UNUSED,
11429 rtx_insn **ready,
11430 int *n_readyp,
11431 int clock_var ATTRIBUTE_UNUSED)
11433 if (reload_completed)
11434 return sh_issue_rate ();
11436 if (high_pressure (SFmode) || high_pressure (SImode))
11438 ready_reorder (ready, *n_readyp);
11441 return sh_issue_rate ();
11444 /* Skip cycles if the current register pressure is high. */
11445 static int
11446 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11447 int sched_verbose ATTRIBUTE_UNUSED,
11448 rtx_insn **ready ATTRIBUTE_UNUSED,
11449 int *n_readyp ATTRIBUTE_UNUSED,
11450 int clock_var ATTRIBUTE_UNUSED)
11452 if (reload_completed)
11453 return cached_can_issue_more;
11455 if (high_pressure(SFmode) || high_pressure (SImode))
11456 skip_cycles = 1;
11458 return cached_can_issue_more;
11461 /* Skip cycles without sorting the ready queue. This will move insn from
11462 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11463 queue by sh_reorder. */
11465 /* Generally, skipping these many cycles are sufficient for all insns to move
11466 from Q -> R. */
11467 #define MAX_SKIPS 8
11469 static int
11470 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11471 int sched_verbose ATTRIBUTE_UNUSED,
11472 rtx_insn *insn ATTRIBUTE_UNUSED,
11473 int last_clock_var,
11474 int clock_var,
11475 int *sort_p)
11477 if (reload_completed)
11478 return 0;
11480 if (skip_cycles)
11482 if ((clock_var - last_clock_var) < MAX_SKIPS)
11484 *sort_p = 0;
11485 return 1;
11487 /* If this is the last cycle we are skipping, allow reordering of R. */
11488 if ((clock_var - last_clock_var) == MAX_SKIPS)
11490 *sort_p = 1;
11491 return 1;
11495 skip_cycles = 0;
11497 return 0;
11500 /* SHmedia requires registers for branches, so we can't generate new
11501 branches past reload. */
11502 static bool
11503 sh_cannot_modify_jumps_p (void)
11505 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11508 static reg_class_t
11509 sh_target_reg_class (void)
11511 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11514 static bool
11515 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11517 if (! shmedia_space_reserved_for_target_registers)
11518 return 0;
11519 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11520 return 0;
11522 HARD_REG_SET dummy;
11523 if (calc_live_regs (&dummy) >= 6 * 8)
11524 return 1;
11525 return 0;
11528 static bool
11529 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11531 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11535 On the SH1..SH4, the trampoline looks like
11536 2 0002 D202 mov.l l2,r2
11537 1 0000 D301 mov.l l1,r3
11538 3 0004 422B jmp @r2
11539 4 0006 0009 nop
11540 5 0008 00000000 l1: .long area
11541 6 000c 00000000 l2: .long function
11543 SH5 (compact) uses r1 instead of r3 for the static chain. */
11546 /* Emit RTL insns to initialize the variable parts of a trampoline.
11547 FNADDR is an RTX for the address of the function's pure code.
11548 CXT is an RTX for the static chain value for the function. */
11549 static void
11550 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11552 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11553 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11555 if (TARGET_SHMEDIA64)
11557 rtx tramp_templ;
11558 int fixed_len;
11560 rtx movi1 = GEN_INT (0xcc000010);
11561 rtx shori1 = GEN_INT (0xc8000010);
11562 rtx src, dst;
11564 /* The following trampoline works within a +- 128 KB range for cxt:
11565 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11566 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11567 gettr tr1,r1; blink tr0,r63 */
11568 /* Address rounding makes it hard to compute the exact bounds of the
11569 offset for this trampoline, but we have a rather generous offset
11570 range, so frame_offset should do fine as an upper bound. */
11571 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11573 /* ??? could optimize this trampoline initialization
11574 by writing DImode words with two insns each. */
11575 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11576 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11577 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11578 insn = gen_rtx_AND (DImode, insn, mask);
11579 /* Or in ptb/u .,tr1 pattern */
11580 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11581 insn = force_operand (insn, NULL_RTX);
11582 insn = gen_lowpart (SImode, insn);
11583 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11584 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11585 insn = gen_rtx_AND (DImode, insn, mask);
11586 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11587 insn = gen_lowpart (SImode, insn);
11588 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11589 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11590 insn = gen_rtx_AND (DImode, insn, mask);
11591 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11592 insn = gen_lowpart (SImode, insn);
11593 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11594 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11595 insn = gen_rtx_AND (DImode, insn, mask);
11596 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11597 insn = gen_lowpart (SImode, insn);
11598 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11599 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11600 insn = gen_rtx_AND (DImode, insn, mask);
11601 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11602 insn = gen_lowpart (SImode, insn);
11603 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11604 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11605 GEN_INT (0x6bf10600));
11606 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11607 GEN_INT (0x4415fc10));
11608 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11609 GEN_INT (0x4401fff0));
11610 emit_insn (gen_ic_invalidate_line (tramp));
11611 return;
11613 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11614 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11616 tramp_templ = gen_datalabel_ref (tramp_templ);
11617 dst = tramp_mem;
11618 src = gen_const_mem (BLKmode, tramp_templ);
11619 set_mem_align (dst, 256);
11620 set_mem_align (src, 64);
11621 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11623 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11624 emit_move_insn (adjust_address (tramp_mem, Pmode,
11625 fixed_len + GET_MODE_SIZE (Pmode)),
11626 cxt);
11627 emit_insn (gen_ic_invalidate_line (tramp));
11628 return;
11630 else if (TARGET_SHMEDIA)
11632 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11633 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11634 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11635 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11636 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11637 rotated 10 right, and higher 16 bit of every 32 selected. */
11638 rtx movishori
11639 = force_reg (V2HImode, (simplify_gen_subreg
11640 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11641 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11642 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11644 fnaddr = force_reg (SImode, fnaddr);
11645 cxt = force_reg (SImode, cxt);
11646 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11647 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11648 movishori));
11649 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11650 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11651 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11652 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11653 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11654 gen_rtx_SUBREG (V2HImode, cxt, 0),
11655 movishori));
11656 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11657 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11658 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11659 if (TARGET_LITTLE_ENDIAN)
11661 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11662 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11664 else
11666 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11667 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11669 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11670 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11671 emit_insn (gen_ic_invalidate_line (tramp));
11672 return;
11674 else if (TARGET_SHCOMPACT)
11676 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11677 return;
11679 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11680 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11681 SImode));
11682 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11683 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11684 SImode));
11685 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11686 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11687 if (TARGET_HARD_SH4 || TARGET_SH5)
11689 if (!TARGET_INLINE_IC_INVALIDATE
11690 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
11691 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11692 FUNCTION_ORDINARY),
11693 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11694 else
11695 emit_insn (gen_ic_invalidate_line (tramp));
11699 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11700 static rtx
11701 sh_trampoline_adjust_address (rtx tramp)
11703 if (TARGET_SHMEDIA)
11704 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11705 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11706 return tramp;
11709 /* FIXME: This is overly conservative. A SHcompact function that
11710 receives arguments ``by reference'' will have them stored in its
11711 own stack frame, so it must not pass pointers or references to
11712 these arguments to other functions by means of sibling calls. */
11713 /* If PIC, we cannot make sibling calls to global functions
11714 because the PLT requires r12 to be live. */
11715 static bool
11716 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11718 return (1
11719 && (! TARGET_SHCOMPACT
11720 || crtl->args.info.stack_regs == 0)
11721 && ! sh_cfun_interrupt_handler_p ()
11722 && (! flag_pic
11723 || (decl && ! TREE_PUBLIC (decl))
11724 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11727 /* Machine specific built-in functions. */
11729 struct builtin_description
11731 bool (* const is_enabled) (void);
11732 const enum insn_code icode;
11733 const char *const name;
11734 int signature;
11735 tree fndecl;
11738 static bool
11739 shmedia_builtin_p (void)
11741 return TARGET_SHMEDIA;
11744 /* This function can be used if there are any built-ins that are not for
11745 SHmedia. It's commented out to avoid the defined-but-unused warning. */
11746 static bool
11747 sh1_builtin_p (void)
11749 return TARGET_SH1;
11752 /* describe number and signedness of arguments; arg[0] == result
11753 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11754 /* 9: 64-bit pointer, 10: 32-bit pointer */
11755 static const char signature_args[][4] =
11757 #define SH_BLTIN_V2SI2 0
11758 { 4, 4 },
11759 #define SH_BLTIN_V4HI2 1
11760 { 4, 4 },
11761 #define SH_BLTIN_V2SI3 2
11762 { 4, 4, 4 },
11763 #define SH_BLTIN_V4HI3 3
11764 { 4, 4, 4 },
11765 #define SH_BLTIN_V8QI3 4
11766 { 4, 4, 4 },
11767 #define SH_BLTIN_MAC_HISI 5
11768 { 1, 4, 4, 1 },
11769 #define SH_BLTIN_SH_HI 6
11770 { 4, 4, 1 },
11771 #define SH_BLTIN_SH_SI 7
11772 { 4, 4, 1 },
11773 #define SH_BLTIN_V4HI2V2SI 8
11774 { 4, 4, 4 },
11775 #define SH_BLTIN_V4HI2V8QI 9
11776 { 4, 4, 4 },
11777 #define SH_BLTIN_SISF 10
11778 { 4, 2 },
11779 #define SH_BLTIN_LDUA_L 11
11780 { 2, 10 },
11781 #define SH_BLTIN_LDUA_Q 12
11782 { 1, 10 },
11783 #define SH_BLTIN_STUA_L 13
11784 { 0, 10, 2 },
11785 #define SH_BLTIN_STUA_Q 14
11786 { 0, 10, 1 },
11787 #define SH_BLTIN_LDUA_L64 15
11788 { 2, 9 },
11789 #define SH_BLTIN_LDUA_Q64 16
11790 { 1, 9 },
11791 #define SH_BLTIN_STUA_L64 17
11792 { 0, 9, 2 },
11793 #define SH_BLTIN_STUA_Q64 18
11794 { 0, 9, 1 },
11795 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11796 #define SH_BLTIN_2 19
11797 #define SH_BLTIN_SU 19
11798 { 1, 2 },
11799 #define SH_BLTIN_3 20
11800 #define SH_BLTIN_SUS 20
11801 { 2, 2, 1 },
11802 #define SH_BLTIN_PSSV 21
11803 { 0, 8, 2, 2 },
11804 #define SH_BLTIN_XXUU 22
11805 #define SH_BLTIN_UUUU 22
11806 { 1, 1, 1, 1 },
11807 #define SH_BLTIN_PV 23
11808 { 0, 8 },
11809 #define SH_BLTIN_VP 24
11810 { 8, 0 },
11811 #define SH_BLTIN_UV 25
11812 { 1, 0 },
11813 #define SH_BLTIN_VU 26
11814 { 0, 1 },
11816 /* mcmv: operands considered unsigned. */
11817 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11818 /* mperm: control value considered unsigned int. */
11819 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11820 /* mshards_q: returns signed short. */
11821 /* nsb: takes long long arg, returns unsigned char. */
11822 static struct builtin_description bdesc[] =
11824 { shmedia_builtin_p,
11825 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11826 { shmedia_builtin_p,
11827 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11828 { shmedia_builtin_p,
11829 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11830 { shmedia_builtin_p,
11831 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11832 { shmedia_builtin_p,
11833 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11834 { shmedia_builtin_p,
11835 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11836 { shmedia_builtin_p,
11837 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11838 { shmedia_builtin_p,
11839 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11840 { shmedia_builtin_p,
11841 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11842 { shmedia_builtin_p,
11843 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11844 { shmedia_builtin_p,
11845 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11846 { shmedia_builtin_p,
11847 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11848 { shmedia_builtin_p,
11849 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11850 { shmedia_builtin_p,
11851 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11852 { shmedia_builtin_p,
11853 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11854 { shmedia_builtin_p,
11855 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11856 { shmedia_builtin_p,
11857 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11858 { shmedia_builtin_p,
11859 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11860 { shmedia_builtin_p,
11861 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11862 { shmedia_builtin_p,
11863 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11864 { shmedia_builtin_p,
11865 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11866 { shmedia_builtin_p,
11867 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11868 { shmedia_builtin_p,
11869 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11870 { shmedia_builtin_p,
11871 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11872 { shmedia_builtin_p,
11873 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11874 { shmedia_builtin_p,
11875 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11876 { shmedia_builtin_p,
11877 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11878 { shmedia_builtin_p,
11879 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11880 { shmedia_builtin_p,
11881 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11882 { shmedia_builtin_p,
11883 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11884 { shmedia_builtin_p,
11885 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11886 { shmedia_builtin_p,
11887 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11888 { shmedia_builtin_p,
11889 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11890 { shmedia_builtin_p,
11891 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11892 { shmedia_builtin_p,
11893 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11894 { shmedia_builtin_p,
11895 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11896 { shmedia_builtin_p,
11897 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11898 { shmedia_builtin_p,
11899 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11900 { shmedia_builtin_p,
11901 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11902 { shmedia_builtin_p,
11903 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11904 { shmedia_builtin_p,
11905 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11906 { shmedia_builtin_p,
11907 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11908 { shmedia_builtin_p,
11909 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11910 { shmedia_builtin_p,
11911 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11912 { shmedia_builtin_p,
11913 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11914 { shmedia_builtin_p,
11915 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11916 { shmedia_builtin_p,
11917 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11918 { shmedia_builtin_p,
11919 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11920 { shmedia_builtin_p,
11921 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11922 { shmedia_builtin_p,
11923 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11924 { shmedia_builtin_p,
11925 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11926 { shmedia_builtin_p,
11927 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11928 { shmedia_builtin_p,
11929 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11930 { shmedia_builtin_p,
11931 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11932 { shmedia_builtin_p,
11933 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11934 { shmedia_builtin_p,
11935 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11936 { shmedia_builtin_p,
11937 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11938 { shmedia_builtin_p,
11939 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11940 { shmedia_builtin_p,
11941 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11942 { shmedia_builtin_p,
11943 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11944 { shmedia_builtin_p,
11945 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11946 { shmedia_builtin_p,
11947 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11948 { shmedia_builtin_p,
11949 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11950 { shmedia_builtin_p,
11951 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11952 { shmedia_builtin_p,
11953 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11954 { shmedia_builtin_p,
11955 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11956 { shmedia_builtin_p,
11957 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11958 { shmedia_builtin_p,
11959 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11960 { shmedia_builtin_p,
11961 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11962 { shmedia_builtin_p,
11963 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11964 { shmedia_builtin_p,
11965 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11966 { shmedia_builtin_p,
11967 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11968 { shmedia_builtin_p,
11969 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11970 { shmedia_builtin_p,
11971 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11972 { shmedia_builtin_p,
11973 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11974 { shmedia_builtin_p,
11975 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11976 { shmedia_builtin_p,
11977 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11978 { shmedia_builtin_p,
11979 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11980 { shmedia_builtin_p,
11981 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11982 { shmedia_builtin_p,
11983 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11984 { shmedia_builtin_p,
11985 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11986 { shmedia_builtin_p,
11987 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11988 { shmedia_builtin_p,
11989 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11991 { sh1_builtin_p,
11992 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
11993 { sh1_builtin_p,
11994 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
11997 static tree sh_builtin_get_fpscr;
11998 static tree sh_builtin_set_fpscr;
12000 static void
12001 sh_init_builtins (void)
12003 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
12004 memset (shared, 0, sizeof shared);
12006 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
12008 builtin_description* d = &bdesc[di];
12010 if (!d->is_enabled ())
12011 continue;
12013 tree type, arg_type = NULL_TREE;
12014 int signature = d->signature;
12016 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
12017 type = shared[signature];
12018 else
12020 int has_result = signature_args[signature][0] != 0;
12021 tree args[3];
12023 if ((signature_args[signature][1] & 8)
12024 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
12025 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
12026 continue;
12027 if (! TARGET_FPU_ANY
12028 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
12029 continue;
12030 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
12031 args[i] = NULL_TREE;
12032 for (int i = 3; ; i--)
12034 int arg = signature_args[signature][i];
12035 int opno = i - 1 + has_result;
12037 if (arg & 8)
12038 arg_type = ptr_type_node;
12039 else if (arg)
12040 arg_type = (*lang_hooks.types.type_for_mode)
12041 (insn_data[d->icode].operand[opno].mode, (arg & 1));
12042 else if (i)
12043 continue;
12044 else
12045 arg_type = void_type_node;
12046 if (i == 0)
12047 break;
12048 args[i-1] = arg_type;
12050 type = build_function_type_list (arg_type, args[0], args[1],
12051 args[2], NULL_TREE);
12052 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
12053 shared[signature] = type;
12055 d->fndecl =
12056 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
12057 NULL, NULL_TREE);
12058 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
12059 if (d->icode == CODE_FOR_sts_fpscr)
12060 sh_builtin_get_fpscr = d->fndecl;
12061 else if (d->icode == CODE_FOR_set_fpscr)
12062 sh_builtin_set_fpscr = d->fndecl;
12066 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
12068 static void
12069 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12071 const unsigned SH_FE_INVALID = 64;
12072 const unsigned SH_FE_DIVBYZERO = 32;
12073 const unsigned SH_FE_OVERFLOW = 16;
12074 const unsigned SH_FE_UNDERFLOW = 8;
12075 const unsigned SH_FE_INEXACT = 4;
12076 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
12077 | SH_FE_DIVBYZERO
12078 | SH_FE_OVERFLOW
12079 | SH_FE_UNDERFLOW
12080 | SH_FE_INEXACT);
12081 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
12082 tree fenv_var, mask, ld_fenv, masked_fenv;
12083 tree new_fenv_var, reload_fenv, restore_fnenv;
12084 tree update_call, atomic_feraiseexcept, hold_fnclex;
12086 if (! TARGET_FPU_ANY)
12087 return;
12089 /* Generate the equivalent of :
12090 unsigned int fenv_var;
12091 fenv_var = __builtin_sh_get_fpscr ();
12093 unsigned int masked_fenv;
12094 masked_fenv = fenv_var & mask;
12096 __builtin_sh_set_fpscr (masked_fenv); */
12098 fenv_var = create_tmp_var (unsigned_type_node);
12099 mask = build_int_cst (unsigned_type_node,
12100 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
12101 | SH_FE_ALL_EXCEPT));
12102 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
12103 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
12104 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
12105 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12106 *hold = build2 (COMPOUND_EXPR, void_type_node,
12107 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
12108 hold_fnclex);
12110 /* Store the value of masked_fenv to clear the exceptions:
12111 __builtin_sh_set_fpscr (masked_fenv); */
12113 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12115 /* Generate the equivalent of :
12116 unsigned int new_fenv_var;
12117 new_fenv_var = __builtin_sh_get_fpscr ();
12119 __builtin_sh_set_fpscr (fenv_var);
12121 __atomic_feraiseexcept (new_fenv_var); */
12123 new_fenv_var = create_tmp_var (unsigned_type_node);
12124 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
12125 build_call_expr (sh_builtin_get_fpscr, 0));
12126 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
12127 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12128 update_call = build_call_expr (atomic_feraiseexcept, 1,
12129 fold_convert (integer_type_node,
12130 new_fenv_var));
12131 *update = build2 (COMPOUND_EXPR, void_type_node,
12132 build2 (COMPOUND_EXPR, void_type_node,
12133 reload_fenv, restore_fnenv), update_call);
12136 /* Implements target hook vector_mode_supported_p. */
12137 bool
12138 sh_vector_mode_supported_p (machine_mode mode)
12140 if (TARGET_FPU_ANY
12141 && ((mode == V2SFmode)
12142 || (mode == V4SFmode)
12143 || (mode == V16SFmode)))
12144 return true;
12146 else if (TARGET_SHMEDIA
12147 && ((mode == V8QImode)
12148 || (mode == V2HImode)
12149 || (mode == V4HImode)
12150 || (mode == V2SImode)))
12151 return true;
12153 return false;
12156 bool
12157 sh_frame_pointer_required (void)
12159 /* If needed override this in other tm.h files to cope with various OS
12160 lossage requiring a frame pointer. */
12161 if (SUBTARGET_FRAME_POINTER_REQUIRED)
12162 return true;
12164 if (crtl->profile)
12165 return true;
12167 return false;
12170 /* Implements target hook dwarf_calling_convention. Return an enum
12171 of dwarf_calling_convention. */
12173 sh_dwarf_calling_convention (const_tree func)
12175 if (sh_attr_renesas_p (func))
12176 return DW_CC_GNU_renesas_sh;
12178 return DW_CC_normal;
12181 /* Returns the sh builtin decl for CODE. */
12182 static tree
12183 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
12185 if (code >= ARRAY_SIZE (bdesc))
12186 return error_mark_node;
12188 if (!bdesc[code].is_enabled ())
12189 return error_mark_node;
12191 return bdesc[code].fndecl;
12194 /* Expand an expression EXP that calls a built-in function,
12195 with result going to TARGET if that's convenient
12196 (and in mode MODE if that's convenient).
12197 SUBTARGET may be used as the target for computing one of EXP's operands.
12198 IGNORE is nonzero if the value is to be ignored. */
12199 static rtx
12200 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12201 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12203 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12204 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12205 const struct builtin_description *d = &bdesc[fcode];
12206 enum insn_code icode = d->icode;
12207 int signature = d->signature;
12208 int nop = 0;
12209 rtx op[4];
12211 if (signature_args[signature][0])
12213 if (ignore)
12214 return NULL_RTX;
12216 machine_mode tmode = insn_data[icode].operand[0].mode;
12217 if (! target || GET_MODE (target) != tmode
12218 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12219 target = gen_reg_rtx (tmode);
12220 op[nop++] = target;
12222 else
12223 target = NULL_RTX;
12225 for (int i = 1; i <= 3; i++, nop++)
12227 tree arg;
12228 machine_mode opmode, argmode;
12229 tree optype;
12231 if (! signature_args[signature][i])
12232 break;
12233 arg = CALL_EXPR_ARG (exp, i - 1);
12234 if (arg == error_mark_node)
12235 return const0_rtx;
12236 if (signature_args[signature][i] & 8)
12238 opmode = ptr_mode;
12239 optype = ptr_type_node;
12241 else
12243 opmode = insn_data[icode].operand[nop].mode;
12244 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12246 argmode = TYPE_MODE (TREE_TYPE (arg));
12247 if (argmode != opmode)
12248 arg = build1 (NOP_EXPR, optype, arg);
12249 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12250 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12251 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12254 rtx pat = NULL_RTX;
12256 switch (nop)
12258 case 1:
12259 pat = (*insn_data[d->icode].genfun) (op[0]);
12260 break;
12261 case 2:
12262 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12263 break;
12264 case 3:
12265 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12266 break;
12267 case 4:
12268 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12269 break;
12270 default:
12271 gcc_unreachable ();
12273 if (! pat)
12274 return NULL_RTX;
12275 emit_insn (pat);
12276 return target;
12279 void
12280 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12282 rtx sel0 = const0_rtx;
12283 rtx sel1 = const1_rtx;
12284 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12285 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12287 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12288 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12291 void
12292 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12294 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12296 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12297 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12300 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12301 We can allow any mode in any general register. The special registers
12302 only allow SImode. Don't allow any mode in the PR.
12304 We cannot hold DCmode values in the XD registers because alter_reg
12305 handles subregs of them incorrectly. We could work around this by
12306 spacing the XD registers like the DR registers, but this would require
12307 additional memory in every compilation to hold larger register vectors.
12308 We could hold SFmode / SCmode values in XD registers, but that
12309 would require a tertiary reload when reloading from / to memory,
12310 and a secondary reload to reload from / to general regs; that
12311 seems to be a losing proposition.
12313 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12314 it won't be ferried through GP registers first. */
12315 bool
12316 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
12318 if (SPECIAL_REGISTER_P (regno))
12319 return mode == SImode;
12321 if (regno == FPUL_REG)
12322 return (mode == SImode || mode == SFmode);
12324 if (FP_REGISTER_P (regno) && mode == SFmode)
12325 return true;
12327 if (mode == V2SFmode)
12329 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12330 || GENERAL_REGISTER_P (regno)))
12331 return true;
12332 else
12333 return false;
12336 if (mode == V4SFmode)
12338 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12339 || GENERAL_REGISTER_P (regno))
12340 return true;
12341 else
12342 return false;
12345 if (mode == V16SFmode)
12347 if (TARGET_SHMEDIA)
12349 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12350 return true;
12351 else
12352 return false;
12354 else
12355 return regno == FIRST_XD_REG;
12358 if (FP_REGISTER_P (regno))
12360 if (mode == SFmode
12361 || mode == SImode
12362 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12363 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12364 || mode == DCmode
12365 || (TARGET_SHMEDIA
12366 && (mode == DFmode || mode == DImode
12367 || mode == V2SFmode || mode == TImode)))
12368 && ((regno - FIRST_FP_REG) & 1) == 0)
12369 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12370 && ((regno - FIRST_FP_REG) & 3) == 0))
12371 return true;
12372 else
12373 return false;
12376 if (XD_REGISTER_P (regno))
12377 return mode == DFmode;
12379 if (TARGET_REGISTER_P (regno))
12380 return (mode == DImode || mode == SImode || mode == PDImode);
12382 if (regno == PR_REG)
12383 return mode == SImode;
12385 if (regno == FPSCR_REG)
12386 return mode == SImode;
12388 /* FIXME. This works around PR target/37633 for -O0. */
12389 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12391 unsigned int n = GET_MODE_SIZE (mode) / 8;
12393 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12394 && regno <= FIRST_GENERAL_REG + 14)
12395 return false;
12398 return true;
12401 /* Specify the modes required to caller save a given hard regno.
12402 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
12403 and returns ?Imode for float regs when sh_hard_regno_mode_ok
12404 permits integer modes on them. That makes LRA's split process
12405 unhappy. See PR55212.
12407 machine_mode
12408 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
12409 machine_mode mode)
12411 if (FP_REGISTER_P (regno)
12412 && (mode == SFmode
12413 || mode == SCmode
12414 || ((mode == DFmode || mode == DCmode)
12415 && ((regno - FIRST_FP_REG) & 1) == 0)))
12416 return mode;
12418 return choose_hard_reg_mode (regno, nregs, false);
12421 /* Return the class of registers for which a mode change from FROM to TO
12422 is invalid. */
12423 bool
12424 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
12425 enum reg_class rclass)
12427 /* We want to enable the use of SUBREGs as a means to
12428 VEC_SELECT a single element of a vector. */
12430 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12431 This can be problematic when SFmode vector subregs need to be accessed
12432 on the stack with displacement addressing, as it happens with -O0.
12433 Thus we disallow the mode change for -O0. */
12434 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12435 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12437 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12439 if (TARGET_LITTLE_ENDIAN)
12441 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12442 return reg_classes_intersect_p (DF_REGS, rclass);
12444 else
12446 if (GET_MODE_SIZE (from) < 8)
12447 return reg_classes_intersect_p (DF_REGS, rclass);
12450 return false;
12453 /* Return true if registers in machine mode MODE will likely be
12454 allocated to registers in small register classes. */
12455 bool
12456 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
12458 return (! TARGET_SHMEDIA);
12461 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12462 that label is used. */
12463 void
12464 sh_mark_label (rtx address, int nuses)
12466 if (GOTOFF_P (address))
12468 /* Extract the label or symbol. */
12469 address = XEXP (address, 0);
12470 if (GET_CODE (address) == PLUS)
12471 address = XEXP (address, 0);
12472 address = XVECEXP (address, 0, 0);
12474 if (GET_CODE (address) == LABEL_REF
12475 && LABEL_P (XEXP (address, 0)))
12476 LABEL_NUSES (XEXP (address, 0)) += nuses;
12479 /* Compute extra cost of moving data between one register class
12480 and another.
12482 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12483 uses this information. Hence, the general register <-> floating point
12484 register information here is not used for SFmode. */
12485 static int
12486 sh_register_move_cost (machine_mode mode,
12487 reg_class_t srcclass, reg_class_t dstclass)
12489 if (dstclass == T_REGS || dstclass == PR_REGS)
12490 return 10;
12492 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12493 return 4;
12495 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12496 && REGCLASS_HAS_FP_REG (srcclass)
12497 && REGCLASS_HAS_FP_REG (dstclass))
12498 return 4;
12500 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12501 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12503 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12504 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12505 return 9;
12507 if ((REGCLASS_HAS_FP_REG (dstclass)
12508 && REGCLASS_HAS_GENERAL_REG (srcclass))
12509 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12510 && REGCLASS_HAS_FP_REG (srcclass)))
12512 /* Discourage trying to use fp regs for a pointer. This also
12513 discourages fp regs with SImode because Pmode is an alias
12514 of SImode on this target. See PR target/48596. */
12515 int addend = (mode == Pmode) ? 40 : 0;
12517 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12518 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12521 if ((dstclass == FPUL_REGS
12522 && REGCLASS_HAS_GENERAL_REG (srcclass))
12523 || (srcclass == FPUL_REGS
12524 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12525 return 5;
12527 if ((dstclass == FPUL_REGS
12528 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12529 || (srcclass == FPUL_REGS
12530 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12531 return 7;
12533 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12534 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12535 return 20;
12537 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12538 if (TARGET_SHMEDIA
12539 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12541 if (sh_gettrcost >= 0)
12542 return sh_gettrcost;
12543 else if (!TARGET_PT_FIXED)
12544 return 100;
12547 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12548 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12549 return 4;
12551 if (TARGET_SHMEDIA
12552 || (TARGET_FMOVD
12553 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12554 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12555 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12557 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12560 static rtx
12561 emit_load_ptr (rtx reg, rtx addr)
12563 rtx mem = gen_const_mem (ptr_mode, addr);
12565 if (Pmode != ptr_mode)
12566 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12567 return emit_move_insn (reg, mem);
12570 static void
12571 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12572 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12573 tree function)
12575 CUMULATIVE_ARGS cum;
12576 int structure_value_byref = 0;
12577 rtx this_rtx, this_value, sibcall, funexp;
12578 rtx_insn *insns;
12579 tree funtype = TREE_TYPE (function);
12580 int simple_add = CONST_OK_FOR_ADD (delta);
12581 int did_load = 0;
12582 rtx scratch0, scratch1, scratch2;
12583 unsigned i;
12585 reload_completed = 1;
12586 epilogue_completed = 1;
12587 crtl->uses_only_leaf_regs = 1;
12589 emit_note (NOTE_INSN_PROLOGUE_END);
12591 /* Find the "this" pointer. We have such a wide range of ABIs for the
12592 SH that it's best to do this completely machine independently.
12593 "this" is passed as first argument, unless a structure return pointer
12594 comes first, in which case "this" comes second. */
12595 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12596 #ifndef PCC_STATIC_STRUCT_RETURN
12597 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12598 structure_value_byref = 1;
12599 #endif /* not PCC_STATIC_STRUCT_RETURN */
12600 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12602 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12604 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12606 this_rtx
12607 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12609 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12610 static chain pointer (even if you can't have nested virtual functions
12611 right now, someone might implement them sometime), and the rest of the
12612 registers are used for argument passing, are callee-saved, or reserved. */
12613 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12614 -ffixed-reg has been used. */
12615 if (! call_used_regs[0] || fixed_regs[0])
12616 error ("r0 needs to be available as a call-clobbered register");
12617 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12618 if (! TARGET_SH5)
12620 if (call_used_regs[1] && ! fixed_regs[1])
12621 scratch1 = gen_rtx_REG (ptr_mode, 1);
12622 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12623 pointing where to return struct values. */
12624 if (call_used_regs[3] && ! fixed_regs[3])
12625 scratch2 = gen_rtx_REG (Pmode, 3);
12627 else if (TARGET_SHMEDIA)
12629 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12630 if (i != REGNO (scratch0) &&
12631 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12633 scratch1 = gen_rtx_REG (ptr_mode, i);
12634 break;
12636 if (scratch1 == scratch0)
12637 error ("need a second call-clobbered general purpose register");
12638 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12639 if (call_used_regs[i] && ! fixed_regs[i])
12641 scratch2 = gen_rtx_REG (Pmode, i);
12642 break;
12644 if (scratch2 == scratch0)
12645 error ("need a call-clobbered target register");
12648 this_value = plus_constant (Pmode, this_rtx, delta);
12649 if (vcall_offset
12650 && (simple_add || scratch0 != scratch1)
12651 && strict_memory_address_p (ptr_mode, this_value))
12653 emit_load_ptr (scratch0, this_value);
12654 did_load = 1;
12657 if (!delta)
12658 ; /* Do nothing. */
12659 else if (simple_add)
12660 emit_move_insn (this_rtx, this_value);
12661 else
12663 emit_move_insn (scratch1, GEN_INT (delta));
12664 emit_insn (gen_add2_insn (this_rtx, scratch1));
12667 if (vcall_offset)
12669 rtx offset_addr;
12671 if (!did_load)
12672 emit_load_ptr (scratch0, this_rtx);
12674 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12675 if (strict_memory_address_p (ptr_mode, offset_addr))
12676 ; /* Do nothing. */
12677 else if (! TARGET_SH5 && scratch0 != scratch1)
12679 /* scratch0 != scratch1, and we have indexed loads. Get better
12680 schedule by loading the offset into r1 and using an indexed
12681 load - then the load of r1 can issue before the load from
12682 (this_rtx + delta) finishes. */
12683 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12684 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12686 else if (CONST_OK_FOR_ADD (vcall_offset))
12688 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12689 offset_addr = scratch0;
12691 else if (scratch0 != scratch1)
12693 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12694 emit_insn (gen_add2_insn (scratch0, scratch1));
12695 offset_addr = scratch0;
12697 else
12698 gcc_unreachable (); /* FIXME */
12699 emit_load_ptr (scratch0, offset_addr);
12701 if (Pmode != ptr_mode)
12702 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12703 emit_insn (gen_add2_insn (this_rtx, scratch0));
12706 /* Generate a tail call to the target function. */
12707 if (! TREE_USED (function))
12709 assemble_external (function);
12710 TREE_USED (function) = 1;
12712 funexp = XEXP (DECL_RTL (function), 0);
12713 /* If the function is overridden, so is the thunk, hence we don't
12714 need GOT addressing even if this is a public symbol. */
12715 #if 0
12716 if (TARGET_SH1 && ! flag_weak)
12717 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12718 else
12719 #endif
12720 if (TARGET_SH2 && flag_pic)
12722 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12723 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12725 else
12727 if (TARGET_SHMEDIA && flag_pic)
12729 funexp = gen_sym2PIC (funexp);
12730 PUT_MODE (funexp, Pmode);
12732 emit_move_insn (scratch2, funexp);
12733 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12734 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12736 sibcall = emit_call_insn (sibcall);
12737 SIBLING_CALL_P (sibcall) = 1;
12738 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12739 emit_barrier ();
12741 /* Run just enough of rest_of_compilation to do scheduling and get
12742 the insns emitted. Note that use_thunk calls
12743 assemble_start_function and assemble_end_function. */
12745 insns = get_insns ();
12747 if (optimize > 0)
12749 if (! cfun->cfg)
12750 init_flow (cfun);
12751 split_all_insns_noflow ();
12754 sh_reorg ();
12755 shorten_branches (insns);
12756 final_start_function (insns, file, 1);
12757 final (insns, file, 1);
12758 final_end_function ();
12760 reload_completed = 0;
12761 epilogue_completed = 0;
12765 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12767 rtx sym;
12769 /* If this is not an ordinary function, the name usually comes from a
12770 string literal or an sprintf buffer. Make sure we use the same
12771 string consistently, so that cse will be able to unify address loads. */
12772 if (kind != FUNCTION_ORDINARY)
12773 name = IDENTIFIER_POINTER (get_identifier (name));
12774 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12775 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12776 if (flag_pic)
12777 switch (kind)
12779 case FUNCTION_ORDINARY:
12780 break;
12781 case SFUNC_GOT:
12783 rtx reg = target ? target : gen_reg_rtx (Pmode);
12785 emit_insn (gen_symGOT2reg (reg, sym));
12786 sym = reg;
12787 break;
12789 case SFUNC_STATIC:
12791 /* ??? To allow cse to work, we use GOTOFF relocations.
12792 We could add combiner patterns to transform this into
12793 straight pc-relative calls with sym2PIC / bsrf when
12794 label load and function call are still 1:1 and in the
12795 same basic block during combine. */
12796 rtx reg = target ? target : gen_reg_rtx (Pmode);
12798 emit_insn (gen_symGOTOFF2reg (reg, sym));
12799 sym = reg;
12800 break;
12803 if (target && sym != target)
12805 emit_move_insn (target, sym);
12806 return target;
12808 return sym;
12811 /* Find the number of a general purpose register in S. */
12812 static int
12813 scavenge_reg (HARD_REG_SET *s)
12815 int r;
12816 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12817 if (TEST_HARD_REG_BIT (*s, r))
12818 return r;
12819 return -1;
12823 sh_get_pr_initial_val (void)
12825 rtx val;
12827 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12828 PR register on SHcompact, because it might be clobbered by the prologue.
12829 We check first if that is known to be the case. */
12830 if (TARGET_SHCOMPACT
12831 && ((crtl->args.info.call_cookie
12832 & ~ CALL_COOKIE_RET_TRAMP (1))
12833 || crtl->saves_all_registers))
12834 return gen_frame_mem (SImode, return_address_pointer_rtx);
12836 /* If we haven't finished rtl generation, there might be a nonlocal label
12837 that we haven't seen yet.
12838 ??? get_hard_reg_initial_val fails if it is called after register
12839 allocation has started, unless it has been called before for the
12840 same register. And even then, we end in trouble if we didn't use
12841 the register in the same basic block before. So call
12842 get_hard_reg_initial_val now and wrap it in an unspec if we might
12843 need to replace it. */
12844 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12845 combine can put the pseudo returned by get_hard_reg_initial_val into
12846 instructions that need a general purpose registers, which will fail to
12847 be recognized when the pseudo becomes allocated to PR. */
12849 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12850 if (TARGET_SH1)
12851 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12852 return val;
12855 bool
12856 sh_expand_t_scc (rtx operands[])
12858 enum rtx_code code = GET_CODE (operands[1]);
12859 rtx target = operands[0];
12860 rtx op0 = operands[2];
12861 rtx op1 = operands[3];
12862 rtx result = target;
12863 HOST_WIDE_INT val;
12865 if (!REG_P (op0) || REGNO (op0) != T_REG
12866 || !CONST_INT_P (op1))
12867 return false;
12868 if (!REG_P (result))
12869 result = gen_reg_rtx (SImode);
12870 val = INTVAL (op1);
12871 if ((code == EQ && val == 1) || (code == NE && val == 0))
12872 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12873 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12874 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12875 else if (code == EQ || code == NE)
12876 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12877 else
12878 return false;
12879 if (result != target)
12880 emit_move_insn (target, result);
12881 return true;
12884 /* INSN is an sfunc; return the rtx that describes the address used. */
12885 static rtx
12886 extract_sfunc_addr (rtx insn)
12888 rtx pattern, part = NULL_RTX;
12889 int len, i;
12891 pattern = PATTERN (insn);
12892 len = XVECLEN (pattern, 0);
12893 for (i = 0; i < len; i++)
12895 part = XVECEXP (pattern, 0, i);
12896 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12897 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12898 return XEXP (part, 0);
12900 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12901 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12904 /* Verify that the register in use_sfunc_addr still agrees with the address
12905 used in the sfunc. This prevents fill_slots_from_thread from changing
12906 use_sfunc_addr.
12907 INSN is the use_sfunc_addr instruction, and REG is the register it
12908 guards. */
12909 bool
12910 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12912 /* Search for the sfunc. It should really come right after INSN. */
12913 while ((insn = NEXT_INSN (insn)))
12915 if (LABEL_P (insn) || JUMP_P (insn))
12916 break;
12917 if (! INSN_P (insn))
12918 continue;
12920 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12921 insn = seq->insn (0);
12922 if (GET_CODE (PATTERN (insn)) != PARALLEL
12923 || get_attr_type (insn) != TYPE_SFUNC)
12924 continue;
12925 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12927 gcc_unreachable ();
12930 /* This function returns a constant rtx that represents 2**15 / pi in
12931 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12932 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12933 static GTY(()) rtx sh_fsca_sf2int_rtx;
12936 sh_fsca_sf2int (void)
12938 if (! sh_fsca_sf2int_rtx)
12940 REAL_VALUE_TYPE rv;
12942 real_from_string (&rv, "10430.378350470453");
12943 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12946 return sh_fsca_sf2int_rtx;
12949 /* This function returns a constant rtx that represents pi / 2**15 in
12950 SFmode. It's used to scale SFmode angles, in radians, to a
12951 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12952 maps to 0x10000. */
12953 static GTY(()) rtx sh_fsca_int2sf_rtx;
12956 sh_fsca_int2sf (void)
12958 if (! sh_fsca_int2sf_rtx)
12960 REAL_VALUE_TYPE rv;
12962 real_from_string (&rv, "9.587379924285257e-5");
12963 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12966 return sh_fsca_int2sf_rtx;
12969 /* Initialize the CUMULATIVE_ARGS structure. */
12970 void
12971 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12972 tree fntype,
12973 rtx libname ATTRIBUTE_UNUSED,
12974 tree fndecl,
12975 signed int n_named_args,
12976 machine_mode mode)
12978 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12979 pcum->free_single_fp_reg = 0;
12980 pcum->stack_regs = 0;
12981 pcum->byref_regs = 0;
12982 pcum->byref = 0;
12983 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12985 /* XXX - Should we check TARGET_HITACHI here ??? */
12986 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12988 if (fntype)
12990 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12991 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12992 pcum->prototype_p = prototype_p (fntype);
12993 pcum->arg_count [(int) SH_ARG_INT]
12994 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12996 pcum->call_cookie
12997 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12998 && pcum->arg_count [(int) SH_ARG_INT] == 0
12999 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
13000 ? int_size_in_bytes (TREE_TYPE (fntype))
13001 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
13002 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
13003 == FIRST_RET_REG));
13005 else
13007 pcum->arg_count [(int) SH_ARG_INT] = 0;
13008 pcum->prototype_p = FALSE;
13009 if (mode != VOIDmode)
13011 pcum->call_cookie =
13012 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
13013 && GET_MODE_SIZE (mode) > 4
13014 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
13016 /* If the default ABI is the Renesas ABI then all library
13017 calls must assume that the library will be using the
13018 Renesas ABI. So if the function would return its result
13019 in memory then we must force the address of this memory
13020 block onto the stack. Ideally we would like to call
13021 targetm.calls.return_in_memory() here but we do not have
13022 the TYPE or the FNDECL available so we synthesize the
13023 contents of that function as best we can. */
13024 pcum->force_mem =
13025 (TARGET_DEFAULT & MASK_HITACHI)
13026 && (mode == BLKmode
13027 || (GET_MODE_SIZE (mode) > 4
13028 && !(mode == DFmode
13029 && TARGET_FPU_DOUBLE)));
13031 else
13033 pcum->call_cookie = 0;
13034 pcum->force_mem = FALSE;
13040 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
13042 enum rtx_code code = TRUNCATE;
13044 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
13046 rtx inner = XEXP (x, 0);
13047 machine_mode inner_mode = GET_MODE (inner);
13049 if (inner_mode == mode)
13050 return inner;
13051 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
13052 x = inner;
13053 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
13054 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
13056 code = GET_CODE (x);
13057 x = inner;
13060 return gen_rtx_fmt_e (code, mode, x);
13063 /* Look through X cleaning up truncates of registers that span multiple
13064 actual hard registers. Return the number of changes made. */
13066 shmedia_cleanup_truncate (rtx x)
13068 int n_changes = 0;
13069 subrtx_var_iterator::array_type array;
13070 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
13072 rtx x = *iter;
13073 if (GET_CODE (x) == TRUNCATE)
13075 rtx reg = XEXP (x, 0);
13076 machine_mode reg_mode = GET_MODE (reg);
13077 if (REG_P (reg) && GET_MODE_SIZE (reg_mode) > 8)
13079 int offset = subreg_lowpart_offset (DImode, reg_mode);
13080 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, offset);
13081 n_changes += 1;
13082 iter.skip_subrtxes ();
13086 return n_changes;
13089 /* Load and store depend on the highpart of the address. However,
13090 set_attr_alternative does not give well-defined results before reload,
13091 so we must look at the rtl ourselves to see if any of the feeding
13092 registers is used in a memref.
13094 Return true iff INSN contains a MEM. */
13095 bool
13096 sh_contains_memref_p (rtx insn)
13098 subrtx_iterator::array_type array;
13099 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13100 if (MEM_P (*iter))
13101 return true;
13102 return false;
13105 /* Return true iff INSN loads a banked register. */
13106 bool
13107 sh_loads_bankedreg_p (rtx insn)
13109 if (GET_CODE (PATTERN (insn)) == SET)
13111 rtx op = SET_DEST (PATTERN(insn));
13112 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13113 return true;
13116 return false;
13119 /* FNADDR is the MEM expression from a call expander. Return an address
13120 to use in an SHmedia insn pattern. */
13122 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13124 int is_sym;
13126 fnaddr = XEXP (fnaddr, 0);
13127 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13128 if (flag_pic && is_sym)
13130 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13132 rtx reg = gen_reg_rtx (Pmode);
13134 /* We must not use GOTPLT for sibcalls, because PIC_REG
13135 must be restored before the PLT code gets to run. */
13136 if (is_sibcall)
13137 emit_insn (gen_symGOT2reg (reg, fnaddr));
13138 else
13139 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13140 fnaddr = reg;
13142 else
13144 fnaddr = gen_sym2PIC (fnaddr);
13145 PUT_MODE (fnaddr, Pmode);
13148 /* If ptabs might trap, make this visible to the rest of the compiler.
13149 We generally assume that symbols pertain to valid locations, but
13150 it is possible to generate invalid symbols with asm or linker tricks.
13151 In a list of functions where each returns its successor, an invalid
13152 symbol might denote an empty list. */
13153 if (!TARGET_PT_FIXED
13154 && (!is_sym || TARGET_INVALID_SYMBOLS)
13155 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13157 rtx tr = gen_reg_rtx (PDImode);
13159 emit_insn (gen_ptabs (tr, fnaddr));
13160 fnaddr = tr;
13162 else if (! target_reg_operand (fnaddr, Pmode))
13163 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13164 return fnaddr;
13167 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13168 static reg_class_t
13169 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13171 if (rclass == NO_REGS
13172 && TARGET_SHMEDIA
13173 && (CONST_DOUBLE_P (x)
13174 || GET_CODE (x) == SYMBOL_REF
13175 || PIC_ADDR_P (x)))
13176 return GENERAL_REGS;
13178 return rclass;
13181 /* Implement TARGET_SECONDARY_RELOAD. */
13182 static reg_class_t
13183 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13184 machine_mode mode, secondary_reload_info *sri)
13186 enum reg_class rclass = (enum reg_class) rclass_i;
13188 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13189 && REG_P (XEXP (XEXP (x, 0), 0))
13190 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13191 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13193 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13194 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13196 if (REG_P (x) && REGNO (x) == GBR_REG)
13197 return NO_REGS;
13199 if (in_p)
13201 if (REGCLASS_HAS_FP_REG (rclass)
13202 && ! TARGET_SHMEDIA
13203 && immediate_operand ((x), mode)
13204 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
13205 switch (mode)
13207 case SFmode:
13208 sri->icode = CODE_FOR_reload_insf__frn;
13209 return NO_REGS;
13210 case DFmode:
13211 sri->icode = CODE_FOR_reload_indf__frn;
13212 return NO_REGS;
13213 case SImode:
13214 /* ??? If we knew that we are in the appropriate mode -
13215 single precision - we could use a reload pattern directly. */
13216 return FPUL_REGS;
13217 default:
13218 abort ();
13220 if (rclass == FPUL_REGS
13221 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13222 || REGNO (x) == T_REG))
13223 || GET_CODE (x) == PLUS))
13224 return GENERAL_REGS;
13225 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13227 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13228 return GENERAL_REGS;
13229 else if (mode == SFmode)
13230 return FP_REGS;
13231 sri->icode = CODE_FOR_reload_insi__i_fpul;
13232 return NO_REGS;
13234 if (rclass == FPSCR_REGS
13235 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13236 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13237 return GENERAL_REGS;
13238 if (REGCLASS_HAS_FP_REG (rclass)
13239 && TARGET_SHMEDIA
13240 && immediate_operand (x, mode)
13241 && x != CONST0_RTX (GET_MODE (x))
13242 && GET_MODE (x) != V4SFmode)
13243 return GENERAL_REGS;
13244 if ((mode == QImode || mode == HImode)
13245 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13247 sri->icode = ((mode == QImode)
13248 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13249 return NO_REGS;
13251 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13252 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13253 return TARGET_REGS;
13254 } /* end of input-only processing. */
13256 if (((REGCLASS_HAS_FP_REG (rclass)
13257 && (REG_P (x)
13258 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13259 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13260 && TARGET_FMOVD))))
13261 || (REGCLASS_HAS_GENERAL_REG (rclass)
13262 && REG_P (x)
13263 && FP_REGISTER_P (REGNO (x))))
13264 && ! TARGET_SHMEDIA
13265 && (mode == SFmode || mode == SImode))
13266 return FPUL_REGS;
13267 if ((rclass == FPUL_REGS
13268 || (REGCLASS_HAS_FP_REG (rclass)
13269 && ! TARGET_SHMEDIA && mode == SImode))
13270 && (MEM_P (x)
13271 || (REG_P (x)
13272 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13273 || REGNO (x) == T_REG
13274 || system_reg_operand (x, VOIDmode)))))
13276 if (rclass == FPUL_REGS)
13277 return GENERAL_REGS;
13278 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
13280 if ((rclass == TARGET_REGS
13281 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13282 && !satisfies_constraint_Csy (x)
13283 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13284 return GENERAL_REGS;
13285 if ((rclass == MAC_REGS || rclass == PR_REGS)
13286 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13287 && rclass != REGNO_REG_CLASS (REGNO (x)))
13288 return GENERAL_REGS;
13289 if (rclass != GENERAL_REGS && REG_P (x)
13290 && TARGET_REGISTER_P (REGNO (x)))
13291 return GENERAL_REGS;
13293 /* If here fall back to loading FPUL register through general registers.
13294 This case can happen when movsi_ie insn is picked initially to
13295 load/store the FPUL register from/to another register, and then the
13296 other register is allocated on the stack. */
13297 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13298 return GENERAL_REGS;
13300 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13301 the other operand.
13302 On SH2A could also just leave it alone here, which would result in a
13303 4 byte move insn being generated instead. However, for this to work
13304 the insns must have the appropriate alternatives. */
13305 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13306 && satisfies_constraint_Sdd (x)
13307 && sh_disp_addr_displacement (x)
13308 <= sh_max_mov_insn_displacement (mode, false))
13309 return R0_REGS;
13311 /* When reload is trying to address a QImode or HImode subreg on the stack,
13312 force any subreg byte into R0_REGS, as this is going to become a
13313 displacement address.
13314 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13315 is on the stack, the memref to it might already require a displacement
13316 and that has to be added to the final address. At this point we don't
13317 know the cumulative displacement so we assume the worst case. */
13318 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13319 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13320 return R0_REGS;
13322 return NO_REGS;
13325 /* Return true if SUBST can't safely replace its equivalent during RA. */
13326 static bool
13327 sh_cannot_substitute_mem_equiv_p (rtx)
13329 if (TARGET_SHMEDIA)
13330 return false;
13332 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
13333 uses R0 and may cause spill failure when R0 is already used.
13334 We have to return true for that case at least.
13335 Moreover SH has strong R0 parity and also have not enough numbers of
13336 the hard registers to make the equiv substitution win in the size
13337 and the speed on average working sets. The pseudos produced to
13338 hold the equiv values can't get good hard registers for bad cases
13339 and end up memory save/restore insns which make the code worse. */
13340 return true;
13343 /* Return true if DISP can be legitimized. */
13344 static bool
13345 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
13346 machine_mode mode)
13348 if (TARGET_SHMEDIA)
13349 return false;
13351 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
13352 || (TARGET_SH2E && mode == SFmode))
13353 return false;
13355 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
13356 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
13358 *disp = adj.mov_disp;
13359 *offs = adj.offset_adjust;
13360 return true;
13363 return false;
13366 /* Return true if movsf insn should be splited with an additional
13367 register. */
13368 bool
13369 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
13371 /* op0 == op1 */
13372 if (rtx_equal_p (op0, op1))
13373 return true;
13374 /* fy, FQ, reg */
13375 if (GET_CODE (op1) == CONST_DOUBLE
13376 && ! satisfies_constraint_G (op1)
13377 && ! satisfies_constraint_H (op1)
13378 && REG_P (op0)
13379 && REG_P (op2))
13380 return true;
13381 /* f, r, y */
13382 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
13383 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
13384 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13385 return true;
13386 /* r, f, y */
13387 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
13388 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
13389 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13390 return true;
13392 return false;
13395 static void
13396 sh_conditional_register_usage (void)
13398 int regno;
13399 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13400 if (! VALID_REGISTER_P (regno))
13401 fixed_regs[regno] = call_used_regs[regno] = 1;
13402 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13403 if (TARGET_SH5)
13405 call_used_regs[FIRST_GENERAL_REG + 8]
13406 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13407 call_really_used_regs[FIRST_GENERAL_REG + 8]
13408 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13410 if (TARGET_SHMEDIA)
13412 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13413 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13414 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13416 if (flag_pic)
13418 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13419 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13421 /* Renesas saves and restores mac registers on call. */
13422 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13424 call_really_used_regs[MACH_REG] = 0;
13425 call_really_used_regs[MACL_REG] = 0;
13428 if (TARGET_SHMEDIA)
13430 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13431 if (! fixed_regs[regno] && call_really_used_regs[regno])
13432 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13434 else
13435 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13436 if (! fixed_regs[regno] && call_really_used_regs[regno])
13437 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13439 call_really_used_regs[FPSCR_MODES_REG] = 0;
13440 call_really_used_regs[FPSCR_STAT_REG] = 0;
13443 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13445 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13446 static bool
13447 sh_legitimate_constant_p (machine_mode mode, rtx x)
13449 return (TARGET_SHMEDIA
13450 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13451 || x == CONST0_RTX (mode)
13452 || !TARGET_SHMEDIA_FPU
13453 || TARGET_SHMEDIA64)
13454 : (GET_CODE (x) != CONST_DOUBLE
13455 || mode == DFmode || mode == SFmode
13456 || mode == DImode || GET_MODE (x) == VOIDmode));
13459 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13461 static void
13462 sh_init_sync_libfuncs (void)
13464 init_sync_libfuncs (UNITS_PER_WORD);
13467 /* Return true if it is appropriate to emit `ret' instructions in the
13468 body of a function. */
13469 bool
13470 sh_can_use_simple_return_p (void)
13472 HARD_REG_SET live_regs_mask;
13473 int d;
13475 /* Some targets require special return insns. */
13476 if (TARGET_SHMEDIA
13477 || (TARGET_SHCOMPACT
13478 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13479 return false;
13481 if (! reload_completed || frame_pointer_needed)
13482 return false;
13484 /* Moving prologue around does't reduce the size. */
13485 if (optimize_function_for_size_p (cfun))
13486 return false;
13488 /* Finally, allow for pr save. */
13489 d = calc_live_regs (&live_regs_mask);
13491 if (rounded_frame_size (d) > 4)
13492 return false;
13494 return true;
13497 /*------------------------------------------------------------------------------
13498 Address mode optimization support code
13501 typedef HOST_WIDE_INT disp_t;
13502 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13503 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13504 static const disp_t INVALID_DISP = MAX_DISP;
13506 /* A memory reference which is described by a base register and a
13507 displacement. */
13508 class base_reg_disp
13510 public:
13511 base_reg_disp (rtx br, disp_t d);
13513 bool is_reg (void) const;
13514 bool is_disp (void) const;
13515 rtx reg (void) const;
13516 disp_t disp (void) const;
13518 private:
13519 rtx reg_;
13520 disp_t disp_;
13523 inline
13524 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13525 : reg_ (br), disp_ (d)
13529 inline bool
13530 base_reg_disp::is_reg (void) const
13532 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13535 inline bool
13536 base_reg_disp::is_disp (void) const
13538 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13541 inline rtx
13542 base_reg_disp::reg (void) const
13544 return reg_;
13547 inline disp_t
13548 base_reg_disp::disp (void) const
13550 return disp_;
13553 /* Find the base register and calculate the displacement for a given
13554 address rtx 'x'. */
13555 static base_reg_disp
13556 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
13557 rtx base_reg = NULL)
13559 if (REG_P (x))
13561 if (REGNO (x) == GBR_REG)
13562 return base_reg_disp (x, disp);
13564 /* We've reached a hard-reg. This is probably the point where
13565 function args are copied to pseudos. Do not go any further and
13566 stick to the pseudo. If the original mem addr was in a hard reg
13567 from the beginning, it will become the base reg. */
13568 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13569 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13571 /* Find the def of the reg and trace it. If there are more than one
13572 defs and they are not the same, assume it's not safe to proceed. */
13573 rtx_insn* last_i = NULL;
13574 rtx last_set = NULL;
13575 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
13576 d = DF_REF_NEXT_REG (d))
13578 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
13580 /* Accept multiple defs, as long as they are equal. */
13581 if (last_set == NULL || rtx_equal_p (last_set, set))
13583 last_i = DF_REF_INSN (d);
13584 last_set = set;
13586 else
13588 last_i = NULL;
13589 last_set = NULL;
13590 break;
13594 if (last_set != NULL && last_i != NULL)
13595 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
13596 XEXP (last_set, 0));
13598 /* When here, no previous insn was found that sets the reg.
13599 The input reg is already the base reg. */
13600 return base_reg_disp (x, disp);
13603 else if (GET_CODE (x) == PLUS)
13605 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13606 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13608 /* Either left or right val must be a reg.
13609 We don't handle the case of 'reg + reg' here. */
13610 if (left_val.is_reg () && right_val.is_disp ())
13611 return base_reg_disp (left_val.reg (), left_val.disp ()
13612 + right_val.disp () + disp);
13613 else if (right_val.is_reg () && left_val.is_disp ())
13614 return base_reg_disp (right_val.reg (), right_val.disp ()
13615 + left_val.disp () + disp);
13616 else
13617 return base_reg_disp (base_reg, disp);
13620 else if (CONST_INT_P (x))
13621 return base_reg_disp (NULL, disp + INTVAL (x));
13623 /* Didn't find anything useful. */
13624 return base_reg_disp (base_reg, disp);
13627 /* Given an insn and a memory operand, try to find an equivalent GBR
13628 based memory address and return the corresponding new memory address.
13629 Return NULL_RTX if not found. */
13631 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
13633 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
13634 return NULL_RTX;
13636 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13637 if (side_effects_p (XEXP (mem, 0)))
13638 return NULL_RTX;
13640 /* When not optimizing there might be no dataflow available. */
13641 if (df == NULL)
13642 return NULL_RTX;
13644 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13646 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13648 /* If GBR is marked as call clobbered we bail out if we see a call.
13649 FIXME: Actually should check if this mem refers to the gbr value
13650 before or after the call. If there is a store_gbr preceeding this
13651 mem, it's safe to use GBR for this mem.
13653 If GBR is not marked as call clobbered, but there is some other
13654 def than a call, it's probably a load_gbr upon which we also
13655 bail out to be on the safe side.
13656 FIXME: Should check if we have a use-after-def case, such as
13657 the call case above. */
13658 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
13659 d = DF_REF_NEXT_REG (d))
13661 if (CALL_P (DF_REF_INSN (d)))
13663 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
13664 return NULL_RTX;
13665 else
13666 continue;
13668 else
13669 return NULL_RTX;
13672 rtx disp = GEN_INT (gbr_disp.disp ());
13673 if (gbr_displacement (disp, GET_MODE (mem)))
13674 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13677 return NULL_RTX;
13680 /*------------------------------------------------------------------------------
13681 Manual insn combine support code.
13684 /* Return true if the specified insn contains any UNSPECs or
13685 UNSPEC_VOLATILEs. */
13686 static bool
13687 sh_unspec_insn_p (rtx x)
13689 subrtx_iterator::array_type array;
13690 FOR_EACH_SUBRTX (i, array, x, ALL)
13691 if (*i != NULL
13692 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
13693 return true;
13695 return false;
13698 /* Return true if the register operands of the specified insn are modified
13699 between the specified from and to insns (exclusive of those two). */
13700 bool
13701 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
13702 const rtx_insn* from,
13703 const rtx_insn* to)
13705 /* FIXME: Return true for multiple sets for now. */
13706 rtx s = single_set (operands_insn);
13707 if (s == NULL_RTX)
13708 return true;
13710 subrtx_iterator::array_type array;
13711 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
13712 if (*i != NULL &&
13713 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
13714 return true;
13716 return false;
13719 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
13720 negates the T bit and stores the result in the T bit. */
13721 bool
13722 sh_is_nott_insn (const rtx_insn* i)
13724 return i != NULL && GET_CODE (PATTERN (i)) == SET
13725 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
13726 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
13730 sh_movt_set_dest (const rtx_insn* i)
13732 if (i == NULL)
13733 return NULL;
13735 const_rtx p = PATTERN (i);
13736 return GET_CODE (p) == SET
13737 && arith_reg_dest (XEXP (p, 0), SImode)
13738 && t_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13741 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
13742 that stores the negated T bit in a register, and return the destination
13743 register rtx, or null. */
13745 sh_movrt_set_dest (const rtx_insn* i)
13747 if (i == NULL)
13748 return NULL;
13750 const_rtx p = PATTERN (i);
13752 /* The negc movrt replacement is inside a parallel. */
13753 if (GET_CODE (p) == PARALLEL)
13754 p = XVECEXP (p, 0, 0);
13756 return GET_CODE (p) == SET
13757 && arith_reg_dest (XEXP (p, 0), SImode)
13758 && negt_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13761 /* Given an insn and a reg number, tell whether the reg dies or is unused
13762 after the insn. */
13763 bool
13764 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
13766 return find_regno_note (i, REG_DEAD, regno) != NULL
13767 || find_regno_note (i, REG_UNUSED, regno) != NULL;
13770 /* Given an insn and a reg number, remove reg dead or reg unused notes to
13771 mark it as being used after the insn. */
13772 void
13773 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
13775 if (rtx n = find_regno_note (i, REG_DEAD, regno))
13776 remove_note (i, n);
13777 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
13778 remove_note (i, n);
13781 /* Given an insn check if it contains any post/pre inc/dec mem operands and
13782 add the REG_INC notes accordingly.
13783 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
13784 FIXME: This function is currently used by peephole2 patterns because
13785 the peephole2 pass does not preserve REG_INC notes. If the notes
13786 are dropped the following passes will do wrong things. */
13787 rtx_insn*
13788 sh_check_add_incdec_notes (rtx_insn* i)
13790 struct for_each_inc_dec_clb
13792 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
13793 rtx dest, rtx src ATTRIBUTE_UNUSED,
13794 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
13796 gcc_assert (REG_P (dest));
13798 rtx_insn* i = (rtx_insn*)arg;
13799 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
13800 add_reg_note (i, REG_INC, dest);
13802 return 0;
13806 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
13807 return i;
13810 /* Given an op rtx and an insn, try to find out whether the result of the
13811 specified op consists only of logical operations on T bit stores. */
13812 bool
13813 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
13815 if (!logical_operator (op, SImode))
13816 return false;
13818 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13819 int op_is_t_count = 0;
13821 for (int i = 0; i < 2; ++i)
13823 if (t_reg_operand (ops[i], VOIDmode)
13824 || negt_reg_operand (ops[i], VOIDmode))
13825 op_is_t_count++;
13827 else
13829 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13830 prev_nonnote_insn_bb);
13831 if (op_set.set_src == NULL_RTX)
13832 continue;
13834 if (t_reg_operand (op_set.set_src, VOIDmode)
13835 || negt_reg_operand (op_set.set_src, VOIDmode)
13836 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13837 op_is_t_count++;
13841 return op_is_t_count == 2;
13844 /* Given the operand that is extended in a sign/zero extend insn, and the
13845 insn, try to figure out whether the sign/zero extension can be replaced
13846 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13847 NULL_RTX otherwise. */
13849 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
13851 if (REG_P (extended_op))
13852 extended_op = extended_op;
13853 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13854 extended_op = SUBREG_REG (extended_op);
13855 else
13856 return NULL_RTX;
13858 /* Reg moves must be of the same mode. */
13859 if (GET_MODE (extended_op) != SImode)
13860 return NULL_RTX;
13862 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13863 if (s.set_src == NULL_RTX)
13864 return NULL_RTX;
13866 if (t_reg_operand (s.set_src, VOIDmode)
13867 || negt_reg_operand (s.set_src, VOIDmode))
13868 return extended_op;
13870 /* If the zero extended reg was formed by a logical operation, check the
13871 operands of the logical operation. If both originated from T bit
13872 stores the zero extension can be eliminated. */
13873 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13874 return extended_op;
13876 return NULL_RTX;
13879 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
13880 figure out whether it should be converted into a movt-xor sequence in
13881 the movrt_negc splitter.
13882 Returns true if insns have been modified and the splitter has succeeded. */
13883 bool
13884 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
13886 /* In cases such as
13887 tst r4,r4
13888 mov #-1,r1
13889 negc r1,r1
13890 tst r4,r4
13891 we can replace the T bit clobbering negc with a movt-xor sequence and
13892 eliminate the redundant comparison.
13893 Because the xor insn depends on register allocation results, allow this
13894 only before reload. */
13895 if (!can_create_pseudo_p ())
13896 return false;
13898 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13899 prev_nonnote_insn_bb);
13900 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13901 next_nonnote_insn_bb);
13903 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
13904 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
13905 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
13906 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
13907 t_before_negc.insn,
13908 t_after_negc.insn)
13909 && !sh_unspec_insn_p (t_after_negc.insn)
13910 && !volatile_insn_p (PATTERN (t_after_negc.insn))
13911 && !side_effects_p (PATTERN (t_after_negc.insn))
13912 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
13914 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
13915 set_insn_deleted (t_after_negc.insn);
13916 return true;
13918 else
13919 return false;
13922 /* Given a reg and the current insn, see if the value of the reg originated
13923 from a sign or zero extension and return the discovered information. */
13924 sh_extending_set_of_reg
13925 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
13927 if (reg == NULL)
13928 return sh_extending_set_of_reg (curr_insn);
13930 if (SUBREG_P (reg))
13931 reg = SUBREG_REG (reg);
13933 if (!REG_P (reg))
13934 return sh_extending_set_of_reg (curr_insn);
13936 /* FIXME: Also search the predecessor basic blocks. It seems that checking
13937 only the adjacent predecessor blocks would cover most of the cases.
13938 Also try to look through the first extension that we hit. There are some
13939 cases, where a zero_extend is followed an (implicit) sign_extend, and it
13940 fails to see the sign_extend. */
13941 sh_extending_set_of_reg result =
13942 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
13944 if (result.set_src != NULL)
13946 if (GET_CODE (result.set_src) == SIGN_EXTEND
13947 || GET_CODE (result.set_src) == ZERO_EXTEND)
13949 if (dump_file)
13950 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13951 "explicitly sign/zero extended in insn %d\n",
13952 REGNO (reg), INSN_UID (result.insn));
13953 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
13954 result.ext_code = GET_CODE (result.set_src);
13956 else if (MEM_P (result.set_src)
13957 && (GET_MODE (result.set_src) == QImode
13958 || GET_MODE (result.set_src) == HImode)
13959 && !sh_unspec_insn_p (result.insn))
13961 /* On SH QIHImode memory loads always sign extend. However, in
13962 some cases where it seems that the higher bits are not
13963 interesting, the loads will not be expanded as sign extending
13964 insns, but as QIHImode loads into QIHImode regs. We report that
13965 the reg has been sign extended by the mem load. When it is used
13966 as such, we must convert the mem load into a sign extending insn,
13967 see also sh_extending_set_of_reg::use_as_extended_reg. */
13968 if (dump_file)
13969 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13970 "implicitly sign extended in insn %d\n",
13971 REGNO (reg), INSN_UID (result.insn));
13972 result.from_mode = GET_MODE (result.set_src);
13973 result.ext_code = SIGN_EXTEND;
13977 return result;
13980 /* Given a reg that is known to be sign or zero extended at some insn,
13981 take the appropriate measures so that the extended value can be used as
13982 a reg at the specified insn and return the resulting reg rtx. */
13984 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
13986 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
13987 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
13988 gcc_assert (from_mode == QImode || from_mode == HImode);
13990 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
13992 if (dump_file)
13993 fprintf (dump_file,
13994 "use_as_extended_reg: converting non-extending mem load in "
13995 "insn %d into sign-extending load\n", INSN_UID (insn));
13997 rtx r = gen_reg_rtx (SImode);
13998 rtx_insn* i0;
13999 if (from_mode == QImode)
14000 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
14001 else if (from_mode == HImode)
14002 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
14003 else
14004 gcc_unreachable ();
14006 emit_insn_after (
14007 gen_move_insn (XEXP (set_rtx, 0),
14008 gen_lowpart (GET_MODE (set_src), r)), i0);
14009 set_insn_deleted (insn);
14010 return r;
14012 else
14014 rtx extension_dst = XEXP (set_rtx, 0);
14015 if (modified_between_p (extension_dst, insn, use_at_insn))
14017 if (dump_file)
14018 fprintf (dump_file,
14019 "use_as_extended_reg: dest reg %d of extending insn %d is "
14020 "modified, inserting a reg-reg copy\n",
14021 REGNO (extension_dst), INSN_UID (insn));
14023 rtx r = gen_reg_rtx (SImode);
14024 emit_insn_after (gen_move_insn (r, extension_dst), insn);
14025 return r;
14027 else
14029 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
14030 return extension_dst;
14035 bool
14036 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
14038 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
14039 && (from_mode == QImode || from_mode == HImode)
14040 && set_src != NULL)
14041 return arith_reg_operand (XEXP (set_src, 0), from_mode);
14042 else
14043 return false;
14047 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
14049 gcc_assert (can_use_as_unextended_reg ());
14051 rtx r = XEXP (set_src, 0);
14052 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
14054 if (modified_between_p (r, insn, use_at_insn))
14056 rtx r1 = gen_reg_rtx (SImode);
14057 emit_insn_after (gen_move_insn (r1, r0), insn);
14058 return r1;
14060 else
14062 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
14063 ? REGNO (SUBREG_REG (r))
14064 : REGNO (r));
14065 return r0;
14069 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
14070 perform the necessary checks on the operands and split it accordingly. */
14071 void
14072 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
14073 int subreg_offset, rtx operands[])
14075 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
14077 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
14078 curr_insn);
14079 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
14080 curr_insn);
14082 /* If one of the operands is known to be zero extended, that's already
14083 sufficient to mask out the unwanted high bits. */
14084 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
14086 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14087 operands[1]));
14088 return;
14090 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
14092 emit_insn (gen_tstsi_t (operands[0],
14093 eop1.use_as_extended_reg (curr_insn)));
14094 return;
14097 /* None of the operands seem to be zero extended.
14098 If both are sign extended it's OK, too. */
14099 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
14100 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
14102 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14103 eop1.use_as_extended_reg (curr_insn)));
14104 return;
14107 /* Otherwise we have to insert a zero extension on one of the operands to
14108 mask out the unwanted high bits.
14109 Prefer the operand that has no known extension. */
14110 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
14111 std::swap (operands[0], operands[1]);
14113 rtx tmp0 = gen_reg_rtx (SImode);
14114 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
14115 GET_MODE (operands[0]), subreg_offset);
14116 emit_insn (subreg_mode == QImode
14117 ? gen_zero_extendqisi2 (tmp0, tmp1)
14118 : gen_zero_extendhisi2 (tmp0, tmp1));
14119 emit_insn (gen_tstsi_t (tmp0, operands[1]));
14122 /* A helper class to increment/decrement a counter variable each time a
14123 function is entered/left. */
14124 class scope_counter
14126 public:
14127 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
14129 ~scope_counter (void)
14131 --m_counter;
14132 gcc_assert (m_counter >= 0);
14135 int count (void) const { return m_counter; }
14137 private:
14138 int& m_counter;
14141 /* Given an rtx x, determine whether the expression can be used to create
14142 an insn that calulates x and stores the result in the T bit.
14143 This is used by the 'treg_set_expr' predicate to construct insns sequences
14144 where T bit results are fed into other insns, such as addc, subc, negc
14145 insns.
14147 FIXME: The patterns that expand 'treg_set_expr' operands tend to
14148 distinguish between 'positive' and 'negative' forms. For now this has to
14149 be done in the preparation code. We could also introduce
14150 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
14151 two different patterns for the 'postive' and 'negative' forms. However,
14152 the total amount of lines of code seems to be about the same and the
14153 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
14154 recog function would need to look inside the expression by temporarily
14155 splitting it. */
14156 static int sh_recog_treg_set_expr_reent_count = 0;
14158 bool
14159 sh_recog_treg_set_expr (rtx op, machine_mode mode)
14161 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
14163 /* Limit the recursion count to avoid nested expressions which we can't
14164 resolve to a single treg set insn. */
14165 if (recursion.count () > 1)
14166 return false;
14168 /* Early accept known possible operands before doing recog. */
14169 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode))
14170 return true;
14172 /* Early reject impossible operands before doing recog.
14173 There are some (set ((t) (subreg ...))) patterns, but we must be careful
14174 not to allow any invalid reg-reg or mem-reg moves, or else other passes
14175 such as lower-subreg will bail out. Some insns such as SH4A movua are
14176 done with UNSPEC, so must reject those, too, or else it would result
14177 in an invalid reg -> treg move. */
14178 if (register_operand (op, mode) || memory_operand (op, mode)
14179 || sh_unspec_insn_p (op))
14180 return false;
14182 if (!can_create_pseudo_p ())
14183 return false;
14185 /* We are going to invoke recog in a re-entrant way and thus
14186 have to capture its current state and restore it afterwards. */
14187 recog_data_d prev_recog_data = recog_data;
14189 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
14190 SET_PREV_INSN (i) = NULL;
14191 SET_NEXT_INSN (i) = NULL;
14193 int result = recog (PATTERN (i), i, 0);
14195 /* It seems there is no insn like that. Create a simple negated
14196 version and try again. If we hit a negated form, we'll allow that
14197 and append a nott sequence when splitting out the insns. Insns that
14198 do the split can then remove the trailing nott if they know how to
14199 deal with it. */
14200 if (result < 0 && GET_CODE (op) == EQ)
14202 PUT_CODE (op, NE);
14203 result = recog (PATTERN (i), i, 0);
14204 PUT_CODE (op, EQ);
14206 if (result < 0 && GET_CODE (op) == NE)
14208 PUT_CODE (op, EQ);
14209 result = recog (PATTERN (i), i, 0);
14210 PUT_CODE (op, NE);
14213 recog_data = prev_recog_data;
14214 return result >= 0;
14217 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
14218 This can be used as a condition for insn/split patterns to allow certain
14219 T bit setting patters only to be matched as sub expressions of other
14220 patterns. */
14221 bool
14222 sh_in_recog_treg_set_expr (void)
14224 return sh_recog_treg_set_expr_reent_count > 0;
14227 /* Given an rtx x, which is assumed to be some expression that has been
14228 matched by the 'treg_set_expr' predicate before, split and emit the
14229 insns that are necessary to calculate the expression and store the result
14230 in the T bit.
14231 The splitting is done recursively similar to 'try_split' in emit-rt.c.
14232 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
14233 'delete_insn' which then causes the DF parts to bail out, because we
14234 currently are inside another gen_split* function and would invoke
14235 'try_split' in a reentrant way. */
14236 static std::pair<rtx_insn*, rtx_insn*>
14237 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
14239 if (dump_file)
14241 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
14242 print_rtl_single (dump_file, i);
14243 fprintf (dump_file, "\n");
14246 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
14248 if (seq == NULL)
14249 return std::make_pair (i, i);
14251 /* Avoid infinite splitter loops if any insn of the result matches
14252 the original pattern. */
14253 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
14254 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
14255 return std::make_pair (i, i);
14257 unshare_all_rtl_in_chain (seq);
14259 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
14260 a linked list, replace the single insn with the new insns. */
14261 rtx_insn* seqlast = seq;
14262 while (NEXT_INSN (seqlast) != NULL)
14263 seqlast = NEXT_INSN (seqlast);
14265 if (rtx_insn* iprev = PREV_INSN (i))
14266 SET_NEXT_INSN (iprev) = seq;
14267 if (rtx_insn* inext = NEXT_INSN (i))
14268 SET_PREV_INSN (inext) = seqlast;
14270 SET_PREV_INSN (seq) = PREV_INSN (i);
14271 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
14273 SET_PREV_INSN (i) = NULL;
14274 SET_NEXT_INSN (i) = NULL;
14276 /* Recursively split all insns. */
14277 for (i = seq; ; i = NEXT_INSN (i))
14279 std::pair<rtx_insn*, rtx_insn*> ii =
14280 sh_try_split_insn_simple (i, curr_insn, n + 1);
14281 if (i == seq)
14282 seq = ii.first;
14283 if (i == seqlast)
14285 seqlast = ii.second;
14286 break;
14288 i = ii.first;
14291 return std::make_pair (seq, seqlast);
14294 sh_treg_insns
14295 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
14297 if (t_reg_operand (x, VOIDmode))
14298 return sh_treg_insns ();
14300 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
14302 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
14303 SET_PREV_INSN (i) = NULL;
14304 SET_NEXT_INSN (i) = NULL;
14306 if (dump_file)
14308 fprintf (dump_file, "split_treg_set_expr insn:\n");
14309 print_rtl (dump_file, i);
14310 fprintf (dump_file, "\n");
14313 /* We are going to invoke recog/split_insns in a re-entrant way and thus
14314 have to capture its current state and restore it afterwards. */
14315 recog_data_d prev_recog_data = recog_data;
14317 int insn_code = recog (PATTERN (i), i, 0);
14319 /* If the insn was not found, see if we matched the negated form before
14320 and append a nott. */
14321 bool append_nott = false;
14323 if (insn_code < 0 && GET_CODE (x) == EQ)
14325 PUT_CODE (x, NE);
14326 insn_code = recog (PATTERN (i), i, 0);
14327 if (insn_code >= 0)
14328 append_nott = true;
14329 else
14330 PUT_CODE (x, EQ);
14332 if (insn_code < 0 && GET_CODE (x) == NE)
14334 PUT_CODE (x, EQ);
14335 insn_code = recog (PATTERN (i), i, 0);
14336 if (insn_code >= 0)
14337 append_nott = true;
14338 else
14339 PUT_CODE (x, NE);
14342 gcc_assert (insn_code >= 0);
14344 /* Try to recursively split the insn. Some insns might refuse to split
14345 any further while we are in the treg_set_expr splitting phase. They
14346 will be emitted as part of the outer insn and then split again. */
14347 std::pair<rtx_insn*, rtx_insn*> insnlist =
14348 sh_try_split_insn_simple (i, curr_insn);
14350 /* Restore recog state. */
14351 recog_data = prev_recog_data;
14353 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
14354 ? insnlist.second
14355 : NULL;
14356 if (dump_file)
14358 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
14359 print_rtl (dump_file, insnlist.first);
14360 fprintf (dump_file, "\n");
14362 if (nott_insn != NULL)
14363 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
14366 emit_insn (insnlist.first);
14368 if (nott_insn != NULL && append_nott)
14370 if (dump_file)
14371 fprintf (dump_file, "removing trailing nott\n");
14372 remove_insn (nott_insn);
14373 nott_insn = NULL;
14374 append_nott = false;
14377 if (append_nott)
14378 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
14380 rtx_insn* first_insn = get_insns ();
14382 if (dump_file)
14384 fprintf (dump_file, "resulting insns:\n");
14385 print_rtl (dump_file, first_insn);
14386 fprintf (dump_file, "\n");
14389 return sh_treg_insns (first_insn, nott_insn);
14392 /*------------------------------------------------------------------------------
14393 Mode switching support code.
14396 static void
14397 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
14398 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14400 if ((TARGET_SH4A_FP || TARGET_SH4_300)
14401 && prev_mode != FP_MODE_NONE && prev_mode != mode)
14403 emit_insn (gen_toggle_pr ());
14404 if (TARGET_FMOVD)
14405 emit_insn (gen_toggle_sz ());
14407 else if (mode != FP_MODE_NONE)
14409 rtx tmp = gen_reg_rtx (SImode);
14410 emit_insn (gen_sts_fpscr (tmp));
14411 rtx i = NULL;
14413 const unsigned HOST_WIDE_INT fpbits =
14414 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
14416 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
14417 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14418 else if (mode == FP_MODE_SINGLE)
14419 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
14420 else if (mode == FP_MODE_DOUBLE)
14421 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14422 else
14423 gcc_unreachable ();
14425 emit_insn (i);
14426 emit_insn (gen_lds_fpscr (tmp));
14430 static int
14431 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
14433 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
14436 static int
14437 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
14439 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
14440 get_attr_fp_set (insn) != FP_SET_NONE)
14441 return (int) get_attr_fp_set (insn);
14442 else
14443 return mode;
14446 static int
14447 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
14449 return NORMAL_MODE (entity);
14452 static int
14453 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
14455 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
14458 static int
14459 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
14461 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
14464 /*------------------------------------------------------------------------------
14465 Misc
14468 /* Return true if we use LRA instead of reload pass. */
14469 static bool
14470 sh_lra_p (void)
14472 return sh_lra_flag;
14475 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14477 static bool
14478 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14479 unsigned int align,
14480 enum by_pieces_operation op,
14481 bool speed_p)
14483 switch (op)
14485 case MOVE_BY_PIECES:
14486 return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
14487 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14488 case STORE_BY_PIECES:
14489 case SET_BY_PIECES:
14490 return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
14491 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14492 default:
14493 return default_use_by_pieces_infrastructure_p (size, align,
14494 op, speed_p);
14498 #include "gt-sh.h"