PR target/66563
[official-gcc.git] / gcc / config / sh / sh.c
blob2c247b1af140429119bebce443ba29287a9c2a76
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2015 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
23 #include <vector>
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "insn-config.h"
30 #include "rtl.h"
31 #include "alias.h"
32 #include "symtab.h"
33 #include "tree.h"
34 #include "fold-const.h"
35 #include "stringpool.h"
36 #include "stor-layout.h"
37 #include "calls.h"
38 #include "varasm.h"
39 #include "flags.h"
40 #include "hard-reg-set.h"
41 #include "function.h"
42 #include "expmed.h"
43 #include "dojump.h"
44 #include "explow.h"
45 #include "emit-rtl.h"
46 #include "stmt.h"
47 #include "expr.h"
48 #include "insn-codes.h"
49 #include "optabs.h"
50 #include "reload.h"
51 #include "regs.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "diagnostic-core.h"
55 #include "recog.h"
56 #include "dwarf2.h"
57 #include "tm_p.h"
58 #include "target.h"
59 #include "target-def.h"
60 #include "langhooks.h"
61 #include "predict.h"
62 #include "dominance.h"
63 #include "cfg.h"
64 #include "cfgrtl.h"
65 #include "cfganal.h"
66 #include "lcm.h"
67 #include "cfgbuild.h"
68 #include "cfgcleanup.h"
69 #include "basic-block.h"
70 #include "df.h"
71 #include "intl.h"
72 #include "sched-int.h"
73 #include "params.h"
74 #include "tree-ssa-alias.h"
75 #include "internal-fn.h"
76 #include "gimple-fold.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "gimple.h"
80 #include "gimplify.h"
81 #include "cfgloop.h"
82 #include "alloc-pool.h"
83 #include "tm-constrs.h"
84 #include "opts.h"
85 #include "tree-pass.h"
86 #include "pass_manager.h"
87 #include "context.h"
88 #include "builtins.h"
89 #include "rtl-iter.h"
91 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
93 /* These are some macros to abstract register modes. */
94 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
95 && ((HOST_WIDE_INT)(VALUE)) <= 511)
97 #define CONST_OK_FOR_ADD(size) \
98 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
99 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
100 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
101 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
103 /* Used to simplify the logic below. Find the attributes wherever
104 they may be. */
105 #define SH_ATTRIBUTES(decl) \
106 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
107 : DECL_ATTRIBUTES (decl) \
108 ? (DECL_ATTRIBUTES (decl)) \
109 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
111 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
112 int current_function_interrupt;
114 tree sh_deferred_function_attributes;
115 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
117 /* Global variables for machine-dependent things. */
119 /* Which cpu are we scheduling for. */
120 enum processor_type sh_cpu;
122 /* Definitions used in ready queue reordering for first scheduling pass. */
124 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
125 static short *regmode_weight[2];
127 /* Total SFmode and SImode weights of scheduled insns. */
128 static int curr_regmode_pressure[2];
130 /* Number of r0 life regions. */
131 static int r0_life_regions;
133 /* If true, skip cycles for Q -> R movement. */
134 static int skip_cycles = 0;
136 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
137 and returned from sh_reorder2. */
138 static short cached_can_issue_more;
140 /* Unique number for UNSPEC_BBR pattern. */
141 static unsigned int unspec_bbr_uid = 1;
143 /* Provides the class number of the smallest class containing
144 reg number. */
145 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
147 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
151 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
152 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
153 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
154 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
155 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
156 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
157 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
161 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
167 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
168 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
169 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
170 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
171 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
172 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
173 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
174 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
175 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
176 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
177 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
178 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
179 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
180 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
181 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
182 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
183 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
184 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
185 GENERAL_REGS, GENERAL_REGS,
188 char sh_register_names[FIRST_PSEUDO_REGISTER] \
189 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
191 char sh_additional_register_names[ADDREGNAMES_SIZE] \
192 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
193 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
195 int assembler_dialect;
197 static bool shmedia_space_reserved_for_target_registers;
199 static void split_branches (rtx_insn *);
200 static int branch_dest (rtx);
201 static void print_slot (rtx_sequence *);
202 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
203 static void dump_table (rtx_insn *, rtx_insn *);
204 static bool broken_move (rtx_insn *);
205 static bool mova_p (rtx_insn *);
206 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
207 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
208 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
209 static void sh_reorg (void);
210 static void sh_option_override (void);
211 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
212 static rtx_insn *frame_insn (rtx);
213 static rtx push (int);
214 static void pop (int);
215 static void push_regs (HARD_REG_SET *, int);
216 static int calc_live_regs (HARD_REG_SET *);
217 static HOST_WIDE_INT rounded_frame_size (int);
218 static bool sh_frame_pointer_required (void);
219 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
220 static int sh_mode_needed (int, rtx_insn *);
221 static int sh_mode_after (int, int, rtx_insn *);
222 static int sh_mode_entry (int);
223 static int sh_mode_exit (int);
224 static int sh_mode_priority (int entity, int n);
225 static bool sh_lra_p (void);
227 static rtx mark_constant_pool_use (rtx);
228 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
229 int, bool *);
230 static tree sh_handle_resbank_handler_attribute (tree *, tree,
231 tree, int, bool *);
232 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
233 tree, int, bool *);
234 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
235 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
236 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
237 static void sh_print_operand (FILE *, rtx, int);
238 static void sh_print_operand_address (FILE *, rtx);
239 static bool sh_print_operand_punct_valid_p (unsigned char code);
240 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
241 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
242 static void sh_insert_attributes (tree, tree *);
243 static const char *sh_check_pch_target_flags (int);
244 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
245 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
246 static int sh_issue_rate (void);
247 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
248 static short find_set_regmode_weight (rtx, machine_mode);
249 static short find_insn_regmode_weight (rtx, machine_mode);
250 static void find_regmode_weight (basic_block, machine_mode);
251 static int find_r0_life_regions (basic_block);
252 static void sh_md_init_global (FILE *, int, int);
253 static void sh_md_finish_global (FILE *, int);
254 static int rank_for_reorder (const void *, const void *);
255 static void swap_reorder (rtx_insn **, int);
256 static void ready_reorder (rtx_insn **, int);
257 static bool high_pressure (machine_mode);
258 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
259 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
260 static void sh_md_init (FILE *, int, int);
261 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
263 static bool sh_function_ok_for_sibcall (tree, tree);
265 static bool sh_cannot_modify_jumps_p (void);
266 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
267 static reg_class_t sh_target_reg_class (void);
268 static bool sh_optimize_target_register_callee_saved (bool);
269 static bool sh_ms_bitfield_layout_p (const_tree);
271 static void sh_init_builtins (void);
272 static tree sh_builtin_decl (unsigned, bool);
273 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
274 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
275 HOST_WIDE_INT, tree);
276 static void sh_file_start (void);
277 static bool flow_dependent_p (rtx, rtx);
278 static void flow_dependent_p_1 (rtx, const_rtx, void *);
279 static int shiftcosts (rtx);
280 static int and_xor_ior_costs (rtx, int);
281 static int addsubcosts (rtx);
282 static int multcosts (rtx);
283 static bool unspec_caller_rtx_p (rtx);
284 static bool sh_cannot_copy_insn_p (rtx_insn *);
285 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
286 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
287 static int sh_pr_n_sets (void);
288 static rtx sh_allocate_initial_value (rtx);
289 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
290 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
291 machine_mode,
292 struct secondary_reload_info *);
293 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
294 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
295 static rtx sh_delegitimize_address (rtx);
296 static bool sh_cannot_substitute_mem_equiv_p (rtx);
297 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
298 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
299 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
300 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
301 static int scavenge_reg (HARD_REG_SET *s);
302 struct save_schedule_s;
303 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
304 struct save_schedule_s *, int);
306 static rtx sh_struct_value_rtx (tree, int);
307 static rtx sh_function_value (const_tree, const_tree, bool);
308 static bool sh_function_value_regno_p (const unsigned int);
309 static rtx sh_libcall_value (machine_mode, const_rtx);
310 static bool sh_return_in_memory (const_tree, const_tree);
311 static rtx sh_builtin_saveregs (void);
312 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
313 tree, int *, int);
314 static bool sh_strict_argument_naming (cumulative_args_t);
315 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
316 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
317 static tree sh_build_builtin_va_list (void);
318 static void sh_va_start (tree, rtx);
319 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
320 static bool sh_promote_prototypes (const_tree);
321 static machine_mode sh_promote_function_mode (const_tree type,
322 machine_mode,
323 int *punsignedp,
324 const_tree funtype,
325 int for_return);
326 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
327 const_tree, bool);
328 static bool sh_callee_copies (cumulative_args_t, machine_mode,
329 const_tree, bool);
330 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
331 tree, bool);
332 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
333 const_tree, bool);
334 static rtx sh_function_arg (cumulative_args_t, machine_mode,
335 const_tree, bool);
336 static bool sh_scalar_mode_supported_p (machine_mode);
337 static int sh_dwarf_calling_convention (const_tree);
338 static void sh_encode_section_info (tree, rtx, int);
339 static bool sh2a_function_vector_p (tree);
340 static void sh_trampoline_init (rtx, tree, rtx);
341 static rtx sh_trampoline_adjust_address (rtx);
342 static void sh_conditional_register_usage (void);
343 static bool sh_legitimate_constant_p (machine_mode, rtx);
344 static int mov_insn_size (machine_mode, bool);
345 static int mov_insn_alignment_mask (machine_mode, bool);
346 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
347 unsigned int,
348 enum by_pieces_operation,
349 bool);
350 static bool sequence_insn_p (rtx_insn *);
351 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
352 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
353 machine_mode, bool);
354 static bool sh_legitimate_combined_insn (rtx_insn* insn);
356 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
358 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
360 static const struct attribute_spec sh_attribute_table[] =
362 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
363 affects_type_identity } */
364 { "interrupt_handler", 0, 0, true, false, false,
365 sh_handle_interrupt_handler_attribute, false },
366 { "sp_switch", 1, 1, true, false, false,
367 sh_handle_sp_switch_attribute, false },
368 { "trap_exit", 1, 1, true, false, false,
369 sh_handle_trap_exit_attribute, false },
370 { "renesas", 0, 0, false, true, false,
371 sh_handle_renesas_attribute, false },
372 { "trapa_handler", 0, 0, true, false, false,
373 sh_handle_interrupt_handler_attribute, false },
374 { "nosave_low_regs", 0, 0, true, false, false,
375 sh_handle_interrupt_handler_attribute, false },
376 { "resbank", 0, 0, true, false, false,
377 sh_handle_resbank_handler_attribute, false },
378 { "function_vector", 1, 1, true, false, false,
379 sh2a_handle_function_vector_handler_attribute, false },
380 { NULL, 0, 0, false, false, false, NULL, false }
383 /* Initialize the GCC target structure. */
384 #undef TARGET_ATTRIBUTE_TABLE
385 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
387 /* The next two are used for debug info when compiling with -gdwarf. */
388 #undef TARGET_ASM_UNALIGNED_HI_OP
389 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
390 #undef TARGET_ASM_UNALIGNED_SI_OP
391 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
393 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
394 #undef TARGET_ASM_UNALIGNED_DI_OP
395 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
396 #undef TARGET_ASM_ALIGNED_DI_OP
397 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE sh_option_override
402 #undef TARGET_PRINT_OPERAND
403 #define TARGET_PRINT_OPERAND sh_print_operand
404 #undef TARGET_PRINT_OPERAND_ADDRESS
405 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
406 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
407 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
408 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
409 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
411 #undef TARGET_ASM_FUNCTION_EPILOGUE
412 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
414 #undef TARGET_ASM_OUTPUT_MI_THUNK
415 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
417 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
418 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
419 hook_bool_const_tree_hwi_hwi_const_tree_true
421 #undef TARGET_ASM_FILE_START
422 #define TARGET_ASM_FILE_START sh_file_start
423 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
424 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
426 #undef TARGET_REGISTER_MOVE_COST
427 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
429 #undef TARGET_INSERT_ATTRIBUTES
430 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
432 #undef TARGET_SCHED_ADJUST_COST
433 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
435 #undef TARGET_SCHED_ISSUE_RATE
436 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
438 /* The next 5 hooks have been implemented for reenabling sched1. With the
439 help of these macros we are limiting the movement of insns in sched1 to
440 reduce the register pressure. The overall idea is to keep count of SImode
441 and SFmode regs required by already scheduled insns. When these counts
442 cross some threshold values; give priority to insns that free registers.
443 The insn that frees registers is most likely to be the insn with lowest
444 LUID (original insn order); but such an insn might be there in the stalled
445 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
446 up to a max of 8 cycles so that such insns may move from Q -> R.
448 The description of the hooks are as below:
450 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
451 scheduler; it is called inside the sched_init function just after
452 find_insn_reg_weights function call. It is used to calculate the SImode
453 and SFmode weights of insns of basic blocks; much similar to what
454 find_insn_reg_weights does.
455 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
457 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
458 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
459 (Q)->(R).
461 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
462 high; reorder the ready queue so that the insn with lowest LUID will be
463 issued next.
465 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
466 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
468 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
469 can be returned from TARGET_SCHED_REORDER2.
471 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
473 #undef TARGET_SCHED_DFA_NEW_CYCLE
474 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
476 #undef TARGET_SCHED_INIT_GLOBAL
477 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
479 #undef TARGET_SCHED_FINISH_GLOBAL
480 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
482 #undef TARGET_SCHED_VARIABLE_ISSUE
483 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
485 #undef TARGET_SCHED_REORDER
486 #define TARGET_SCHED_REORDER sh_reorder
488 #undef TARGET_SCHED_REORDER2
489 #define TARGET_SCHED_REORDER2 sh_reorder2
491 #undef TARGET_SCHED_INIT
492 #define TARGET_SCHED_INIT sh_md_init
494 #undef TARGET_DELEGITIMIZE_ADDRESS
495 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
497 #undef TARGET_LEGITIMIZE_ADDRESS
498 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
500 #undef TARGET_CANNOT_MODIFY_JUMPS_P
501 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
502 #undef TARGET_CAN_FOLLOW_JUMP
503 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
504 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
505 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
506 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
507 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
508 sh_optimize_target_register_callee_saved
510 #undef TARGET_MS_BITFIELD_LAYOUT_P
511 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
513 #undef TARGET_INIT_BUILTINS
514 #define TARGET_INIT_BUILTINS sh_init_builtins
515 #undef TARGET_BUILTIN_DECL
516 #define TARGET_BUILTIN_DECL sh_builtin_decl
517 #undef TARGET_EXPAND_BUILTIN
518 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
520 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
521 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
523 #undef TARGET_CANNOT_COPY_INSN_P
524 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
525 #undef TARGET_RTX_COSTS
526 #define TARGET_RTX_COSTS sh_rtx_costs
527 #undef TARGET_ADDRESS_COST
528 #define TARGET_ADDRESS_COST sh_address_cost
529 #undef TARGET_ALLOCATE_INITIAL_VALUE
530 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
532 #undef TARGET_MACHINE_DEPENDENT_REORG
533 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
535 #undef TARGET_DWARF_REGISTER_SPAN
536 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
538 #ifdef HAVE_AS_TLS
539 #undef TARGET_HAVE_TLS
540 #define TARGET_HAVE_TLS true
541 #endif
543 #undef TARGET_PROMOTE_PROTOTYPES
544 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
545 #undef TARGET_PROMOTE_FUNCTION_MODE
546 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
548 #undef TARGET_FUNCTION_VALUE
549 #define TARGET_FUNCTION_VALUE sh_function_value
550 #undef TARGET_FUNCTION_VALUE_REGNO_P
551 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
552 #undef TARGET_LIBCALL_VALUE
553 #define TARGET_LIBCALL_VALUE sh_libcall_value
554 #undef TARGET_STRUCT_VALUE_RTX
555 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
556 #undef TARGET_RETURN_IN_MEMORY
557 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
559 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
560 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
561 #undef TARGET_SETUP_INCOMING_VARARGS
562 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
563 #undef TARGET_STRICT_ARGUMENT_NAMING
564 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
565 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
566 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
567 #undef TARGET_MUST_PASS_IN_STACK
568 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
569 #undef TARGET_PASS_BY_REFERENCE
570 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
571 #undef TARGET_CALLEE_COPIES
572 #define TARGET_CALLEE_COPIES sh_callee_copies
573 #undef TARGET_ARG_PARTIAL_BYTES
574 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
575 #undef TARGET_FUNCTION_ARG
576 #define TARGET_FUNCTION_ARG sh_function_arg
577 #undef TARGET_FUNCTION_ARG_ADVANCE
578 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
580 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
581 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
583 #undef TARGET_BUILD_BUILTIN_VA_LIST
584 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
585 #undef TARGET_EXPAND_BUILTIN_VA_START
586 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
587 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
588 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
590 #undef TARGET_SCALAR_MODE_SUPPORTED_P
591 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
592 #undef TARGET_VECTOR_MODE_SUPPORTED_P
593 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
595 #undef TARGET_CHECK_PCH_TARGET_FLAGS
596 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
598 #undef TARGET_DWARF_CALLING_CONVENTION
599 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
601 #undef TARGET_FRAME_POINTER_REQUIRED
602 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
604 #undef TARGET_MODE_EMIT
605 #define TARGET_MODE_EMIT sh_emit_mode_set
607 #undef TARGET_MODE_NEEDED
608 #define TARGET_MODE_NEEDED sh_mode_needed
610 #undef TARGET_MODE_AFTER
611 #define TARGET_MODE_AFTER sh_mode_after
613 #undef TARGET_MODE_ENTRY
614 #define TARGET_MODE_ENTRY sh_mode_entry
616 #undef TARGET_MODE_EXIT
617 #define TARGET_MODE_EXIT sh_mode_exit
619 #undef TARGET_MODE_PRIORITY
620 #define TARGET_MODE_PRIORITY sh_mode_priority
622 /* Return regmode weight for insn. */
623 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
624 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
626 /* Return current register pressure for regmode. */
627 #define CURR_REGMODE_PRESSURE(MODE)\
628 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
630 #undef TARGET_ENCODE_SECTION_INFO
631 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
633 #undef TARGET_LRA_P
634 #define TARGET_LRA_P sh_lra_p
636 #undef TARGET_SECONDARY_RELOAD
637 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
639 #undef TARGET_PREFERRED_RELOAD_CLASS
640 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
642 #undef TARGET_CONDITIONAL_REGISTER_USAGE
643 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
645 #undef TARGET_LEGITIMATE_ADDRESS_P
646 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
648 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
649 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
651 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
652 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
653 sh_legitimize_address_displacement
655 #undef TARGET_TRAMPOLINE_INIT
656 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
657 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
658 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
660 #undef TARGET_LEGITIMATE_CONSTANT_P
661 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
663 #undef TARGET_CANONICALIZE_COMPARISON
664 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
666 #undef TARGET_LEGITIMATE_COMBINED_INSN
667 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
669 #undef TARGET_FIXED_CONDITION_CODE_REGS
670 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
672 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
673 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
674 sh_use_by_pieces_infrastructure_p
676 /* Machine-specific symbol_ref flags. */
677 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
679 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
680 is used by optabs.c atomic op expansion code as well as in sync.md. */
681 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
682 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
684 struct gcc_target targetm = TARGET_INITIALIZER;
687 /* Information on the currently selected atomic model.
688 This is initialized in sh_option_override. */
689 static sh_atomic_model selected_atomic_model_;
691 const sh_atomic_model&
692 selected_atomic_model (void)
694 return selected_atomic_model_;
697 static sh_atomic_model
698 parse_validate_atomic_model_option (const char* str)
700 const char* model_names[sh_atomic_model::num_models];
701 model_names[sh_atomic_model::none] = "none";
702 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
703 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
704 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
705 model_names[sh_atomic_model::soft_imask] = "soft-imask";
707 const char* model_cdef_names[sh_atomic_model::num_models];
708 model_cdef_names[sh_atomic_model::none] = "NONE";
709 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
710 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
711 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
712 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
714 sh_atomic_model ret;
715 ret.type = sh_atomic_model::none;
716 ret.name = model_names[sh_atomic_model::none];
717 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
718 ret.strict = false;
719 ret.tcb_gbr_offset = -1;
721 /* Handle empty string as 'none'. */
722 if (str == NULL || *str == '\0')
723 return ret;
725 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
727 std::vector<std::string> tokens;
728 for (std::stringstream ss (str); ss.good (); )
730 tokens.push_back (std::string ());
731 std::getline (ss, tokens.back (), ',');
734 if (tokens.empty ())
735 err_ret ("invalid atomic model option");
737 /* The first token must be the atomic model name. */
739 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
740 if (tokens.front () == model_names[i])
742 ret.type = (sh_atomic_model::enum_type)i;
743 ret.name = model_names[i];
744 ret.cdef_name = model_cdef_names[i];
745 goto got_mode_name;
748 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
749 got_mode_name:;
752 /* Go through the remaining tokens. */
753 for (size_t i = 1; i < tokens.size (); ++i)
755 if (tokens[i] == "strict")
756 ret.strict = true;
757 else if (tokens[i].find ("gbr-offset=") == 0)
759 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
760 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
761 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
762 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
763 "option", offset_str.c_str ());
765 else
766 err_ret ("unknown parameter \"%s\" in atomic model option",
767 tokens[i].c_str ());
770 /* Check that the selection makes sense. */
771 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
772 err_ret ("atomic operations are not supported on SHmedia");
774 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
775 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
776 ret.name);
778 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
779 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
781 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
782 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
784 if (ret.type == sh_atomic_model::soft_tcb
785 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
786 || (ret.tcb_gbr_offset & 3) != 0))
787 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
788 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
789 ret.name);
791 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
792 err_ret ("cannot use atomic model %s in user mode", ret.name);
794 return ret;
796 #undef err_ret
799 /* Register SH specific RTL passes. */
800 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
801 const char* name);
802 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
803 const char* name);
804 static void
805 register_sh_passes (void)
807 if (!TARGET_SH1)
808 return;
810 /* Running the sh_treg_combine pass after ce1 generates better code when
811 comparisons are combined and reg-reg moves are introduced, because
812 reg-reg moves will be eliminated afterwards. However, there are quite
813 some cases where combine will be unable to fold comparison related insns,
814 thus for now don't do it.
815 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
816 PASS_POS_INSERT_AFTER, "ce1", 1);
819 /* Run sh_treg_combine pass after combine but before register allocation. */
820 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
821 PASS_POS_INSERT_AFTER, "split1", 1);
823 /* Run sh_treg_combine pass after register allocation and basic block
824 reordering as this sometimes creates new opportunities. */
825 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
826 PASS_POS_INSERT_AFTER, "split4", 1);
828 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
829 is known after a conditional branch.
830 This must be done after basic blocks and branch conditions have
831 stabilized and won't be changed by further passes. */
832 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
833 PASS_POS_INSERT_BEFORE, "sched2", 1);
836 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
837 various options, and do some machine dependent initialization. */
838 static void
839 sh_option_override (void)
841 int regno;
843 SUBTARGET_OVERRIDE_OPTIONS;
844 if (optimize > 1 && !optimize_size)
845 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
847 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
848 TARGET_CBRANCHDI4 = 1;
849 TARGET_CMPEQDI_T = 0;
851 sh_cpu = PROCESSOR_SH1;
852 assembler_dialect = 0;
853 if (TARGET_SH2)
854 sh_cpu = PROCESSOR_SH2;
855 if (TARGET_SH2E)
856 sh_cpu = PROCESSOR_SH2E;
857 if (TARGET_SH2A)
858 sh_cpu = PROCESSOR_SH2A;
859 if (TARGET_SH3)
860 sh_cpu = PROCESSOR_SH3;
861 if (TARGET_SH3E)
862 sh_cpu = PROCESSOR_SH3E;
863 if (TARGET_SH4)
865 assembler_dialect = 1;
866 sh_cpu = PROCESSOR_SH4;
868 if (TARGET_SH4A)
870 assembler_dialect = 1;
871 sh_cpu = PROCESSOR_SH4A;
873 if (TARGET_SH5)
875 sh_cpu = PROCESSOR_SH5;
876 target_flags |= MASK_ALIGN_DOUBLE;
877 if (TARGET_SHMEDIA_FPU)
878 target_flags |= MASK_FMOVD;
879 if (TARGET_SHMEDIA)
881 /* There are no delay slots on SHmedia. */
882 flag_delayed_branch = 0;
883 /* Relaxation isn't yet supported for SHmedia */
884 target_flags &= ~MASK_RELAX;
885 /* After reload, if conversion does little good but can cause
886 ICEs:
887 - find_if_block doesn't do anything for SH because we don't
888 have conditional execution patterns. (We use conditional
889 move patterns, which are handled differently, and only
890 before reload).
891 - find_cond_trap doesn't do anything for the SH because we
892 don't have conditional traps.
893 - find_if_case_1 uses redirect_edge_and_branch_force in
894 the only path that does an optimization, and this causes
895 an ICE when branch targets are in registers.
896 - find_if_case_2 doesn't do anything for the SHmedia after
897 reload except when it can redirect a tablejump - and
898 that's rather rare. */
899 flag_if_conversion2 = 0;
900 if (! strcmp (sh_div_str, "call"))
901 sh_div_strategy = SH_DIV_CALL;
902 else if (! strcmp (sh_div_str, "call2"))
903 sh_div_strategy = SH_DIV_CALL2;
904 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
905 sh_div_strategy = SH_DIV_FP;
906 else if (! strcmp (sh_div_str, "inv"))
907 sh_div_strategy = SH_DIV_INV;
908 else if (! strcmp (sh_div_str, "inv:minlat"))
909 sh_div_strategy = SH_DIV_INV_MINLAT;
910 else if (! strcmp (sh_div_str, "inv20u"))
911 sh_div_strategy = SH_DIV_INV20U;
912 else if (! strcmp (sh_div_str, "inv20l"))
913 sh_div_strategy = SH_DIV_INV20L;
914 else if (! strcmp (sh_div_str, "inv:call2"))
915 sh_div_strategy = SH_DIV_INV_CALL2;
916 else if (! strcmp (sh_div_str, "inv:call"))
917 sh_div_strategy = SH_DIV_INV_CALL;
918 else if (! strcmp (sh_div_str, "inv:fp"))
920 if (TARGET_FPU_ANY)
921 sh_div_strategy = SH_DIV_INV_FP;
922 else
923 sh_div_strategy = SH_DIV_INV;
925 TARGET_CBRANCHDI4 = 0;
926 /* Assembler CFI isn't yet fully supported for SHmedia. */
927 flag_dwarf2_cfi_asm = 0;
930 else
932 /* Only the sh64-elf assembler fully supports .quad properly. */
933 targetm.asm_out.aligned_op.di = NULL;
934 targetm.asm_out.unaligned_op.di = NULL;
937 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
938 Disable it for everything else. */
939 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
940 TARGET_USERMODE = false;
942 if (TARGET_SH1)
944 if (! strcmp (sh_div_str, "call-div1"))
945 sh_div_strategy = SH_DIV_CALL_DIV1;
946 else if (! strcmp (sh_div_str, "call-fp")
947 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
948 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
949 sh_div_strategy = SH_DIV_CALL_FP;
950 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
951 sh_div_strategy = SH_DIV_CALL_TABLE;
952 else
953 /* Pick one that makes most sense for the target in general.
954 It is not much good to use different functions depending
955 on -Os, since then we'll end up with two different functions
956 when some of the code is compiled for size, and some for
957 speed. */
959 /* SH4 tends to emphasize speed. */
960 if (TARGET_HARD_SH4)
961 sh_div_strategy = SH_DIV_CALL_TABLE;
962 /* These have their own way of doing things. */
963 else if (TARGET_SH2A)
964 sh_div_strategy = SH_DIV_INTRINSIC;
965 /* ??? Should we use the integer SHmedia function instead? */
966 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
967 sh_div_strategy = SH_DIV_CALL_FP;
968 /* SH1 .. SH3 cores often go into small-footprint systems, so
969 default to the smallest implementation available. */
970 else
971 sh_div_strategy = SH_DIV_CALL_DIV1;
973 if (!TARGET_SH1)
974 TARGET_PRETEND_CMOVE = 0;
975 if (sh_divsi3_libfunc[0])
976 ; /* User supplied - leave it alone. */
977 else if (TARGET_DIVIDE_CALL_FP)
978 sh_divsi3_libfunc = "__sdivsi3_i4";
979 else if (TARGET_DIVIDE_CALL_TABLE)
980 sh_divsi3_libfunc = "__sdivsi3_i4i";
981 else if (TARGET_SH5)
982 sh_divsi3_libfunc = "__sdivsi3_1";
983 else
984 sh_divsi3_libfunc = "__sdivsi3";
986 if (sh_branch_cost == -1)
988 /* The SH1 does not have delay slots, hence we get a pipeline stall
989 at every branch. The SH4 is superscalar, so the single delay slot
990 is not sufficient to keep both pipelines filled.
991 In any case, set the default branch cost to '2', as it results in
992 slightly overall smaller code and also enables some if conversions
993 that are required for matching special T bit related insns. */
994 sh_branch_cost = 2;
997 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
998 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
999 TARGET_ZDCBRANCH = 1;
1001 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1002 if (! VALID_REGISTER_P (regno))
1003 sh_register_names[regno][0] = '\0';
1005 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
1006 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
1007 sh_additional_register_names[regno][0] = '\0';
1009 if ((flag_pic && ! TARGET_PREFERGOT)
1010 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
1011 flag_no_function_cse = 1;
1013 if (targetm.small_register_classes_for_mode_p (VOIDmode))
1015 /* Never run scheduling before reload, since that can
1016 break global alloc, and generates slower code anyway due
1017 to the pressure on R0. */
1018 /* Enable sched1 for SH4 if the user explicitly requests.
1019 When sched1 is enabled, the ready queue will be reordered by
1020 the target hooks if pressure is high. We can not do this for
1021 PIC, SH3 and lower as they give spill failures for R0. */
1022 if (!TARGET_HARD_SH4 || flag_pic)
1023 flag_schedule_insns = 0;
1024 /* ??? Current exception handling places basic block boundaries
1025 after call_insns. It causes the high pressure on R0 and gives
1026 spill failures for R0 in reload. See PR 22553 and the thread
1027 on gcc-patches
1028 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
1029 else if (flag_exceptions)
1031 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
1032 warning (0, "ignoring -fschedule-insns because of exception "
1033 "handling bug");
1034 flag_schedule_insns = 0;
1036 else if (flag_schedule_insns
1037 && !global_options_set.x_flag_schedule_insns)
1038 flag_schedule_insns = 0;
1041 /* Unwind info is not correct around the CFG unless either a frame
1042 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1043 unwind info generation to be aware of the CFG and propagating states
1044 around edges. */
1045 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1046 || flag_exceptions || flag_non_call_exceptions)
1047 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1049 warning (0, "unwind tables currently require either a frame pointer "
1050 "or -maccumulate-outgoing-args for correctness");
1051 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1054 /* Adjust loop, jump and function alignment values (in bytes), if those
1055 were not specified by the user using -falign-loops, -falign-jumps
1056 and -falign-functions options.
1057 32 bit alignment is better for speed, because instructions can be
1058 fetched as a pair from a longword boundary. For size use 16 bit
1059 alignment to get more compact code.
1060 Aligning all jumps increases the code size, even if it might
1061 result in slightly faster code. Thus, it is set to the smallest
1062 alignment possible if not specified by the user. */
1063 if (align_loops == 0)
1065 if (TARGET_SH5)
1066 align_loops = 8;
1067 else
1068 align_loops = optimize_size ? 2 : 4;
1071 if (align_jumps == 0)
1073 if (TARGET_SHMEDIA)
1074 align_jumps = 1 << CACHE_LOG;
1075 else
1076 align_jumps = 2;
1078 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1079 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1081 if (align_functions == 0)
1083 if (TARGET_SHMEDIA)
1084 align_functions = optimize_size
1085 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1086 else
1087 align_functions = optimize_size ? 2 : 4;
1090 /* The linker relaxation code breaks when a function contains
1091 alignments that are larger than that at the start of a
1092 compilation unit. */
1093 if (TARGET_RELAX)
1095 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1097 /* Also take possible .long constants / mova tables into account. */
1098 if (min_align < 4)
1099 min_align = 4;
1100 if (align_functions < min_align)
1101 align_functions = min_align;
1104 if (flag_unsafe_math_optimizations)
1106 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1107 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1108 TARGET_FSCA = 1;
1110 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1111 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1112 TARGET_FSRRA = 1;
1115 /* Allow fsrra insn only if -funsafe-math-optimizations and
1116 -ffinite-math-only is enabled. */
1117 TARGET_FSRRA = TARGET_FSRRA
1118 && flag_unsafe_math_optimizations
1119 && flag_finite_math_only;
1121 /* If the -mieee option was not explicitly set by the user, turn it on
1122 unless -ffinite-math-only was specified. See also PR 33135. */
1123 if (! global_options_set.x_TARGET_IEEE)
1124 TARGET_IEEE = ! flag_finite_math_only;
1126 if (sh_fixed_range_str)
1127 sh_fix_range (sh_fixed_range_str);
1129 /* This target defaults to strict volatile bitfields. */
1130 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1131 flag_strict_volatile_bitfields = 1;
1133 /* Parse atomic model option and make sure it is valid for the current
1134 target CPU. */
1135 selected_atomic_model_
1136 = parse_validate_atomic_model_option (sh_atomic_model_str);
1138 register_sh_passes ();
1141 /* Print the operand address in x to the stream. */
1142 static void
1143 sh_print_operand_address (FILE *stream, rtx x)
1145 switch (GET_CODE (x))
1147 case REG:
1148 case SUBREG:
1149 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1150 break;
1152 case PLUS:
1154 rtx base = XEXP (x, 0);
1155 rtx index = XEXP (x, 1);
1157 switch (GET_CODE (index))
1159 case CONST_INT:
1160 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1161 reg_names[true_regnum (base)]);
1162 break;
1164 case REG:
1165 case SUBREG:
1167 int base_num = true_regnum (base);
1168 int index_num = true_regnum (index);
1170 fprintf (stream, "@(r0,%s)",
1171 reg_names[MAX (base_num, index_num)]);
1172 break;
1175 default:
1176 gcc_unreachable ();
1179 break;
1181 case PRE_DEC:
1182 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1183 break;
1185 case POST_INC:
1186 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1187 break;
1189 default:
1190 x = mark_constant_pool_use (x);
1191 output_addr_const (stream, x);
1192 break;
1196 /* Print operand x (an rtx) in assembler syntax to file stream
1197 according to modifier code.
1199 '.' print a .s if insn needs delay slot
1200 ',' print LOCAL_LABEL_PREFIX
1201 '@' print trap, rte or rts depending upon pragma interruptness
1202 '#' output a nop if there is nothing to put in the delay slot
1203 ''' print likelihood suffix (/u for unlikely).
1204 '>' print branch target if -fverbose-asm
1205 'O' print a constant without the #
1206 'R' print the LSW of a dp value - changes if in little endian
1207 'S' print the MSW of a dp value - changes if in little endian
1208 'T' print the next word of a dp value - same as 'R' in big endian mode.
1209 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1210 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1211 'N' print 'r63' if the operand is (const_int 0).
1212 'd' print a V2SF reg as dN instead of fpN.
1213 'm' print a pair `base,offset' or `base,index', for LD and ST.
1214 'U' Likewise for {LD,ST}{HI,LO}.
1215 'V' print the position of a single bit set.
1216 'W' print the position of a single bit cleared.
1217 't' print a memory address which is a register.
1218 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1219 'o' output an operator. */
1220 static void
1221 sh_print_operand (FILE *stream, rtx x, int code)
1223 int regno;
1224 machine_mode mode;
1226 switch (code)
1228 tree trapa_attr;
1230 case '.':
1231 if (final_sequence
1232 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1233 && get_attr_length (final_sequence->insn (1)))
1234 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1235 break;
1236 case ',':
1237 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1238 break;
1239 case '@':
1240 trapa_attr = lookup_attribute ("trap_exit",
1241 DECL_ATTRIBUTES (current_function_decl));
1242 if (trapa_attr)
1243 fprintf (stream, "trapa #%ld",
1244 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1245 else if (sh_cfun_interrupt_handler_p ())
1247 if (sh_cfun_resbank_handler_p ())
1248 fprintf (stream, "resbank\n");
1249 fprintf (stream, "rte");
1251 else
1252 fprintf (stream, "rts");
1253 break;
1254 case '#':
1255 /* Output a nop if there's nothing in the delay slot. */
1256 if (dbr_sequence_length () == 0)
1257 fprintf (stream, "\n\tnop");
1258 break;
1259 case '\'':
1261 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1263 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1264 fputs ("/u", stream);
1265 break;
1267 case '>':
1268 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1270 fputs ("\t! target: ", stream);
1271 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1273 break;
1274 case 'O':
1275 x = mark_constant_pool_use (x);
1276 output_addr_const (stream, x);
1277 break;
1278 /* N.B.: %R / %S / %T adjust memory addresses by four.
1279 For SHMEDIA, that means they can be used to access the first and
1280 second 32 bit part of a 64 bit (or larger) value that
1281 might be held in floating point registers or memory.
1282 While they can be used to access 64 bit parts of a larger value
1283 held in general purpose registers, that won't work with memory -
1284 neither for fp registers, since the frxx names are used. */
1285 case 'R':
1286 if (REG_P (x) || GET_CODE (x) == SUBREG)
1288 regno = true_regnum (x);
1289 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1290 fputs (reg_names[regno], (stream));
1292 else if (MEM_P (x))
1294 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1295 sh_print_operand_address (stream, XEXP (x, 0));
1297 else
1299 rtx sub = NULL_RTX;
1301 mode = GET_MODE (x);
1302 if (mode == VOIDmode)
1303 mode = DImode;
1304 if (GET_MODE_SIZE (mode) >= 8)
1305 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1306 if (sub)
1307 sh_print_operand (stream, sub, 0);
1308 else
1309 output_operand_lossage ("invalid operand to %%R");
1311 break;
1312 case 'S':
1313 if (REG_P (x) || GET_CODE (x) == SUBREG)
1315 regno = true_regnum (x);
1316 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1317 fputs (reg_names[regno], (stream));
1319 else if (MEM_P (x))
1321 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1322 sh_print_operand_address (stream, XEXP (x, 0));
1324 else
1326 rtx sub = NULL_RTX;
1328 mode = GET_MODE (x);
1329 if (mode == VOIDmode)
1330 mode = DImode;
1331 if (GET_MODE_SIZE (mode) >= 8)
1332 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1333 if (sub)
1334 sh_print_operand (stream, sub, 0);
1335 else
1336 output_operand_lossage ("invalid operand to %%S");
1338 break;
1339 case 'T':
1340 /* Next word of a double. */
1341 switch (GET_CODE (x))
1343 case REG:
1344 fputs (reg_names[REGNO (x) + 1], (stream));
1345 break;
1346 case MEM:
1347 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1348 && GET_CODE (XEXP (x, 0)) != POST_INC)
1349 x = adjust_address (x, SImode, 4);
1350 sh_print_operand_address (stream, XEXP (x, 0));
1351 break;
1352 default:
1353 break;
1355 break;
1357 case 't':
1358 gcc_assert (MEM_P (x));
1359 x = XEXP (x, 0);
1360 switch (GET_CODE (x))
1362 case REG:
1363 case SUBREG:
1364 sh_print_operand (stream, x, 0);
1365 break;
1366 default:
1367 break;
1369 break;
1371 case 'o':
1372 switch (GET_CODE (x))
1374 case PLUS: fputs ("add", stream); break;
1375 case MINUS: fputs ("sub", stream); break;
1376 case MULT: fputs ("mul", stream); break;
1377 case DIV: fputs ("div", stream); break;
1378 case EQ: fputs ("eq", stream); break;
1379 case NE: fputs ("ne", stream); break;
1380 case GT: case LT: fputs ("gt", stream); break;
1381 case GE: case LE: fputs ("ge", stream); break;
1382 case GTU: case LTU: fputs ("gtu", stream); break;
1383 case GEU: case LEU: fputs ("geu", stream); break;
1384 default:
1385 break;
1387 break;
1388 case 'M':
1389 if (TARGET_SHMEDIA)
1391 if (MEM_P (x)
1392 && GET_CODE (XEXP (x, 0)) == PLUS
1393 && (REG_P (XEXP (XEXP (x, 0), 1))
1394 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1395 fputc ('x', stream);
1397 else
1399 if (MEM_P (x))
1401 switch (GET_MODE (x))
1403 case QImode: fputs (".b", stream); break;
1404 case HImode: fputs (".w", stream); break;
1405 case SImode: fputs (".l", stream); break;
1406 case SFmode: fputs (".s", stream); break;
1407 case DFmode: fputs (".d", stream); break;
1408 default: gcc_unreachable ();
1412 break;
1414 case 'm':
1415 gcc_assert (MEM_P (x));
1416 x = XEXP (x, 0);
1417 /* Fall through. */
1418 case 'U':
1419 switch (GET_CODE (x))
1421 case REG:
1422 case SUBREG:
1423 sh_print_operand (stream, x, 0);
1424 fputs (", 0", stream);
1425 break;
1427 case PLUS:
1428 sh_print_operand (stream, XEXP (x, 0), 0);
1429 fputs (", ", stream);
1430 sh_print_operand (stream, XEXP (x, 1), 0);
1431 break;
1433 default:
1434 gcc_unreachable ();
1436 break;
1438 case 'V':
1440 int num = exact_log2 (INTVAL (x));
1441 gcc_assert (num >= 0);
1442 fprintf (stream, "#%d", num);
1444 break;
1446 case 'W':
1448 int num = exact_log2 (~INTVAL (x));
1449 gcc_assert (num >= 0);
1450 fprintf (stream, "#%d", num);
1452 break;
1454 case 'd':
1455 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1457 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1458 break;
1460 case 'N':
1461 if (x == CONST0_RTX (GET_MODE (x)))
1463 fprintf ((stream), "r63");
1464 break;
1466 goto default_output;
1467 case 'u':
1468 if (CONST_INT_P (x))
1470 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1471 break;
1473 /* Fall through. */
1475 default_output:
1476 default:
1477 regno = 0;
1478 mode = GET_MODE (x);
1480 switch (GET_CODE (x))
1482 case TRUNCATE:
1484 rtx inner = XEXP (x, 0);
1485 int offset = 0;
1486 machine_mode inner_mode;
1488 /* We might see SUBREGs with vector mode registers inside. */
1489 if (GET_CODE (inner) == SUBREG
1490 && (GET_MODE_SIZE (GET_MODE (inner))
1491 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1492 && subreg_lowpart_p (inner))
1493 inner = SUBREG_REG (inner);
1494 if (CONST_INT_P (inner))
1496 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1497 goto default_output;
1499 inner_mode = GET_MODE (inner);
1500 if (GET_CODE (inner) == SUBREG
1501 && (GET_MODE_SIZE (GET_MODE (inner))
1502 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1503 && REG_P (SUBREG_REG (inner)))
1505 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1506 GET_MODE (SUBREG_REG (inner)),
1507 SUBREG_BYTE (inner),
1508 GET_MODE (inner));
1509 inner = SUBREG_REG (inner);
1511 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1512 abort ();
1513 /* Floating point register pairs are always big endian;
1514 general purpose registers are 64 bit wide. */
1515 regno = REGNO (inner);
1516 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1517 - HARD_REGNO_NREGS (regno, mode))
1518 + offset;
1519 x = inner;
1520 goto reg;
1522 case SIGN_EXTEND:
1523 x = XEXP (x, 0);
1524 goto reg;
1525 /* FIXME: We need this on SHmedia32 because reload generates
1526 some sign-extended HI or QI loads into DImode registers
1527 but, because Pmode is SImode, the address ends up with a
1528 subreg:SI of the DImode register. Maybe reload should be
1529 fixed so as to apply alter_subreg to such loads? */
1530 case IF_THEN_ELSE:
1531 gcc_assert (trapping_target_operand (x, VOIDmode));
1532 x = XEXP (XEXP (x, 2), 0);
1533 goto default_output;
1534 case SUBREG:
1535 gcc_assert (SUBREG_BYTE (x) == 0
1536 && REG_P (SUBREG_REG (x)));
1538 x = SUBREG_REG (x);
1539 /* Fall through. */
1541 reg:
1542 case REG:
1543 regno += REGNO (x);
1544 if (FP_REGISTER_P (regno)
1545 && mode == V16SFmode)
1546 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1547 else if (FP_REGISTER_P (REGNO (x))
1548 && mode == V4SFmode)
1549 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1550 else if (REG_P (x)
1551 && mode == V2SFmode)
1552 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1553 else if (FP_REGISTER_P (REGNO (x))
1554 && GET_MODE_SIZE (mode) > 4)
1555 fprintf ((stream), "d%s", reg_names[regno] + 1);
1556 else
1557 fputs (reg_names[regno], (stream));
1558 break;
1560 case MEM:
1561 output_address (XEXP (x, 0));
1562 break;
1564 default:
1565 if (TARGET_SH1)
1566 fputc ('#', stream);
1567 output_addr_const (stream, x);
1568 break;
1570 break;
1574 static bool
1575 sh_print_operand_punct_valid_p (unsigned char code)
1577 return (code == '.' || code == '#' || code == '@' || code == ','
1578 || code == '$' || code == '\'' || code == '>');
1581 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1582 static bool
1583 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1585 if (GET_CODE (x) == UNSPEC)
1587 switch (XINT (x, 1))
1589 case UNSPEC_DATALABEL:
1590 fputs ("datalabel ", file);
1591 output_addr_const (file, XVECEXP (x, 0, 0));
1592 break;
1593 case UNSPEC_PIC:
1594 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1595 output_addr_const (file, XVECEXP (x, 0, 0));
1596 break;
1597 case UNSPEC_GOT:
1598 output_addr_const (file, XVECEXP (x, 0, 0));
1599 fputs ("@GOT", file);
1600 break;
1601 case UNSPEC_GOTOFF:
1602 output_addr_const (file, XVECEXP (x, 0, 0));
1603 fputs ("@GOTOFF", file);
1604 break;
1605 case UNSPEC_PLT:
1606 output_addr_const (file, XVECEXP (x, 0, 0));
1607 fputs ("@PLT", file);
1608 break;
1609 case UNSPEC_GOTPLT:
1610 output_addr_const (file, XVECEXP (x, 0, 0));
1611 fputs ("@GOTPLT", file);
1612 break;
1613 case UNSPEC_DTPOFF:
1614 output_addr_const (file, XVECEXP (x, 0, 0));
1615 fputs ("@DTPOFF", file);
1616 break;
1617 case UNSPEC_GOTTPOFF:
1618 output_addr_const (file, XVECEXP (x, 0, 0));
1619 fputs ("@GOTTPOFF", file);
1620 break;
1621 case UNSPEC_TPOFF:
1622 output_addr_const (file, XVECEXP (x, 0, 0));
1623 fputs ("@TPOFF", file);
1624 break;
1625 case UNSPEC_CALLER:
1627 char name[32];
1628 /* LPCS stands for Label for PIC Call Site. */
1629 targetm.asm_out.generate_internal_label (name, "LPCS",
1630 INTVAL (XVECEXP (x, 0, 0)));
1631 assemble_name (file, name);
1633 break;
1634 case UNSPEC_EXTRACT_S16:
1635 case UNSPEC_EXTRACT_U16:
1637 rtx val, shift;
1639 val = XVECEXP (x, 0, 0);
1640 shift = XVECEXP (x, 0, 1);
1641 fputc ('(', file);
1642 if (shift != const0_rtx)
1643 fputc ('(', file);
1644 if (GET_CODE (val) == CONST
1645 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1647 fputc ('(', file);
1648 output_addr_const (file, val);
1649 fputc (')', file);
1651 else
1652 output_addr_const (file, val);
1653 if (shift != const0_rtx)
1655 fputs (" >> ", file);
1656 output_addr_const (file, shift);
1657 fputc (')', file);
1659 fputs (" & 65535)", file);
1661 break;
1662 case UNSPEC_SYMOFF:
1663 output_addr_const (file, XVECEXP (x, 0, 0));
1664 fputc ('-', file);
1665 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1667 fputc ('(', file);
1668 output_addr_const (file, XVECEXP (x, 0, 1));
1669 fputc (')', file);
1671 else
1672 output_addr_const (file, XVECEXP (x, 0, 1));
1673 break;
1674 case UNSPEC_PCREL_SYMOFF:
1675 output_addr_const (file, XVECEXP (x, 0, 0));
1676 fputs ("-(", file);
1677 output_addr_const (file, XVECEXP (x, 0, 1));
1678 fputs ("-.)", file);
1679 break;
1680 default:
1681 return false;
1683 return true;
1685 else
1686 return false;
1689 /* Encode symbol attributes of a SYMBOL_REF into its
1690 SYMBOL_REF_FLAGS. */
1691 static void
1692 sh_encode_section_info (tree decl, rtx rtl, int first)
1694 default_encode_section_info (decl, rtl, first);
1696 if (TREE_CODE (decl) == FUNCTION_DECL
1697 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1698 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1701 /* Prepare operands for a move define_expand; specifically, one of the
1702 operands must be in a register. */
1703 void
1704 prepare_move_operands (rtx operands[], machine_mode mode)
1706 if ((mode == SImode || mode == DImode)
1707 && flag_pic
1708 && ! ((mode == Pmode || mode == ptr_mode)
1709 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1711 rtx temp;
1712 if (SYMBOLIC_CONST_P (operands[1]))
1714 if (MEM_P (operands[0]))
1715 operands[1] = force_reg (Pmode, operands[1]);
1716 else if (TARGET_SHMEDIA
1717 && GET_CODE (operands[1]) == LABEL_REF
1718 && target_reg_operand (operands[0], mode))
1719 /* It's ok. */;
1720 else
1722 temp = (!can_create_pseudo_p ()
1723 ? operands[0]
1724 : gen_reg_rtx (Pmode));
1725 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1728 else if (GET_CODE (operands[1]) == CONST
1729 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1730 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1732 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1733 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1734 mode, temp);
1735 operands[1] = expand_binop (mode, add_optab, temp,
1736 XEXP (XEXP (operands[1], 0), 1),
1737 (!can_create_pseudo_p ()
1738 ? temp
1739 : gen_reg_rtx (Pmode)),
1740 0, OPTAB_LIB_WIDEN);
1744 if (! reload_in_progress && ! reload_completed)
1746 /* Copy the source to a register if both operands aren't registers. */
1747 if (! register_operand (operands[0], mode)
1748 && ! sh_register_operand (operands[1], mode))
1749 operands[1] = copy_to_mode_reg (mode, operands[1]);
1751 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1753 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1754 except that we can't use that function because it is static. */
1755 rtx new_rtx = change_address (operands[0], mode, 0);
1756 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1757 operands[0] = new_rtx;
1760 /* This case can happen while generating code to move the result
1761 of a library call to the target. Reject `st r0,@(rX,rY)' because
1762 reload will fail to find a spill register for rX, since r0 is already
1763 being used for the source. */
1764 else if (TARGET_SH1
1765 && refers_to_regno_p (R0_REG, operands[1])
1766 && MEM_P (operands[0])
1767 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1768 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1769 operands[1] = copy_to_mode_reg (mode, operands[1]);
1771 /* When the displacement addressing is used, RA will assign r0 to
1772 the pseudo register operand for the QI/HImode load/store.
1773 This tends to make a long live range for R0 and might cause
1774 anomalous register spills in some case with LRA. See PR
1775 target/55212.
1776 We split possible load/store to two move insns via r0 so as to
1777 shorten R0 live range. It will make some codes worse but will
1778 win on average for LRA.
1779 Also when base+index addressing is used and the index term is
1780 a subreg, LRA assumes that more hard registers can be available
1781 in some situation. It isn't the case for SH in the problematic
1782 case. We can pre-allocate R0 for that index term to avoid
1783 the issue. See PR target/66591. */
1784 else if (sh_lra_p ()
1785 && TARGET_SH1 && ! TARGET_SH2A
1786 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1787 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1789 bool load_p = REG_P (operands[0]);
1790 rtx reg = operands[load_p ? 0 : 1];
1791 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1793 if ((mode == QImode || mode == HImode)
1794 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1795 && GET_CODE (adr) == PLUS
1796 && REG_P (XEXP (adr, 0))
1797 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1798 && CONST_INT_P (XEXP (adr, 1))
1799 && INTVAL (XEXP (adr, 1)) != 0
1800 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1802 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1803 emit_move_insn (r0_rtx, operands[1]);
1804 operands[1] = r0_rtx;
1806 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1807 && GET_CODE (adr) == PLUS
1808 && REG_P (XEXP (adr, 0))
1809 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1810 && SUBREG_P (XEXP (adr, 1))
1811 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1813 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1814 emit_move_insn (r0_rtx, XEXP (adr, 1));
1815 XEXP (adr, 1) = r0_rtx;
1820 if (mode == Pmode || mode == ptr_mode)
1822 rtx op0, op1, opc;
1823 enum tls_model tls_kind;
1825 op0 = operands[0];
1826 op1 = operands[1];
1827 if (GET_CODE (op1) == CONST
1828 && GET_CODE (XEXP (op1, 0)) == PLUS
1829 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1830 != TLS_MODEL_NONE))
1832 opc = XEXP (XEXP (op1, 0), 1);
1833 op1 = XEXP (XEXP (op1, 0), 0);
1835 else
1836 opc = NULL_RTX;
1838 if (! reload_in_progress && ! reload_completed
1839 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1841 rtx tga_op1, tga_ret, tmp, tmp2;
1843 if (! flag_pic
1844 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1845 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1846 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1848 static int got_labelno;
1849 /* Don't schedule insns for getting GOT address when
1850 the first scheduling is enabled, to avoid spill
1851 failures for R0. */
1852 if (flag_schedule_insns)
1853 emit_insn (gen_blockage ());
1854 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1855 emit_use (gen_rtx_REG (SImode, PIC_REG));
1856 if (flag_schedule_insns)
1857 emit_insn (gen_blockage ());
1860 switch (tls_kind)
1862 case TLS_MODEL_GLOBAL_DYNAMIC:
1863 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1864 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1865 tmp = gen_reg_rtx (Pmode);
1866 emit_move_insn (tmp, tga_ret);
1867 op1 = tmp;
1868 break;
1870 case TLS_MODEL_LOCAL_DYNAMIC:
1871 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1872 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1874 tmp = gen_reg_rtx (Pmode);
1875 emit_move_insn (tmp, tga_ret);
1877 if (register_operand (op0, Pmode))
1878 tmp2 = op0;
1879 else
1880 tmp2 = gen_reg_rtx (Pmode);
1882 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1883 op1 = tmp2;
1884 break;
1886 case TLS_MODEL_INITIAL_EXEC:
1887 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1888 tmp = gen_sym2GOTTPOFF (op1);
1889 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1890 op1 = tga_op1;
1891 break;
1893 case TLS_MODEL_LOCAL_EXEC:
1894 tmp2 = gen_reg_rtx (Pmode);
1895 emit_insn (gen_store_gbr (tmp2));
1896 tmp = gen_reg_rtx (Pmode);
1897 emit_insn (gen_symTPOFF2reg (tmp, op1));
1899 if (register_operand (op0, Pmode))
1900 op1 = op0;
1901 else
1902 op1 = gen_reg_rtx (Pmode);
1904 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1905 break;
1907 default:
1908 gcc_unreachable ();
1910 if (opc)
1911 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1912 operands[1] = op1;
1917 /* Implement the canonicalize_comparison target hook for the combine
1918 pass. For the target hook this function is invoked via
1919 sh_canonicalize_comparison. This function is also re-used to
1920 canonicalize comparisons in cbranch pattern expanders. */
1921 static void
1922 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1923 machine_mode mode,
1924 bool op0_preserve_value)
1926 /* When invoked from within the combine pass the mode is not specified,
1927 so try to get it from one of the operands. */
1928 if (mode == VOIDmode)
1929 mode = GET_MODE (op0);
1930 if (mode == VOIDmode)
1931 mode = GET_MODE (op1);
1933 // We need to have a mode to do something useful here.
1934 if (mode == VOIDmode)
1935 return;
1937 // Currently, we don't deal with floats here.
1938 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1939 return;
1941 // Make sure that the constant operand is the second operand.
1942 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1944 if (op0_preserve_value)
1945 return;
1947 std::swap (op0, op1);
1948 cmp = swap_condition (cmp);
1951 if (CONST_INT_P (op1))
1953 /* Try to adjust the constant operand in such a way that available
1954 comparison insns can be utilized better and the constant can be
1955 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1956 constant pool. */
1957 const HOST_WIDE_INT val = INTVAL (op1);
1959 /* x > -1 --> x >= 0
1960 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1961 x <= -1 --> x < 0
1962 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1963 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1965 cmp = cmp == GT ? GE : LT;
1966 op1 = gen_int_mode (val + 1, mode);
1969 /* x >= 1 --> x > 0
1970 x >= 0x80 --> x > 0x7F
1971 x < 1 --> x <= 0
1972 x < 0x80 --> x <= 0x7F */
1973 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1975 cmp = cmp == GE ? GT : LE;
1976 op1 = gen_int_mode (val - 1, mode);
1979 /* unsigned x >= 1 --> x != 0
1980 unsigned x < 1 --> x == 0 */
1981 else if (val == 1 && (cmp == GEU || cmp == LTU))
1983 cmp = cmp == GEU ? NE : EQ;
1984 op1 = CONST0_RTX (mode);
1987 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1988 unsigned x < 0x80 --> unsigned x < 0x7F */
1989 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1991 cmp = cmp == GEU ? GTU : LEU;
1992 op1 = gen_int_mode (val - 1, mode);
1995 /* unsigned x > 0 --> x != 0
1996 unsigned x <= 0 --> x == 0 */
1997 else if (val == 0 && (cmp == GTU || cmp == LEU))
1998 cmp = cmp == GTU ? NE : EQ;
2000 /* unsigned x > 0x7FFFFFFF --> signed x < 0
2001 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
2002 else if (mode == SImode && (cmp == GTU || cmp == LEU)
2003 && val == 0x7FFFFFFF)
2005 cmp = cmp == GTU ? LT : GE;
2006 op1 = const0_rtx;
2009 /* unsigned x >= 0x80000000 --> signed x < 0
2010 unsigned x < 0x80000000 --> signed x >= 0 */
2011 else if (mode == SImode && (cmp == GEU || cmp == LTU)
2012 && (unsigned HOST_WIDE_INT)val
2013 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
2015 cmp = cmp == GEU ? LT : GE;
2016 op1 = const0_rtx;
2021 /* This function implements the canonicalize_comparison target hook.
2022 This wrapper around the internally used sh_canonicalize_comparison
2023 function is needed to do the enum rtx_code <-> int conversion.
2024 Target hooks cannot use enum rtx_code in its definition. */
2025 static void
2026 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
2027 bool op0_preserve_value)
2029 enum rtx_code tmp_code = (enum rtx_code)*code;
2030 sh_canonicalize_comparison (tmp_code, *op0, *op1,
2031 VOIDmode, op0_preserve_value);
2032 *code = (int)tmp_code;
2035 /* This function implements the legitimate_combined_insn target hook,
2036 which the combine pass uses to early reject combined insns, before
2037 it tries to recog the insn and determine its cost. */
2038 static bool
2039 sh_legitimate_combined_insn (rtx_insn* insn)
2041 /* Reject combinations of memory loads and zero extensions, as these
2042 interfere with other combine patterns such as zero extracts and bit
2043 tests. The SH2A movu.{b|w} insns are formed later in the
2044 'sh_optimize_extu_exts' pass after combine/split1. */
2045 rtx p = PATTERN (insn);
2046 if (GET_CODE (p) == SET
2047 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
2048 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
2049 && MEM_P (XEXP (XEXP (p, 1), 0)))
2050 return false;
2052 return true;
2055 bool
2056 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
2058 *p1 = T_REG;
2059 *p2 = INVALID_REGNUM;
2060 return true;
2063 enum rtx_code
2064 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2065 enum rtx_code comparison)
2067 /* The scratch reg is only available when this is invoked from within
2068 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2069 rtx scratch = NULL_RTX;
2071 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2072 comparison = GET_CODE (operands[0]);
2073 else
2074 scratch = operands[4];
2076 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2077 mode, false);
2079 /* Notice that this function is also invoked after reload by
2080 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2081 rtx op1 = operands[1];
2083 if (can_create_pseudo_p ())
2084 operands[1] = force_reg (mode, op1);
2085 /* When we are handling DImode comparisons, we want to keep constants so
2086 that we can optimize the component comparisons; however, memory loads
2087 are better issued as a whole so that they can be scheduled well.
2088 SImode equality comparisons allow I08 constants, but only when they
2089 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2090 into a register, that register might as well be r0, and we allow the
2091 constant. If it is already in a register, this is likely to be
2092 allocated to a different hard register, thus we load the constant into
2093 a register unless it is zero. */
2094 if (!REG_P (operands[2])
2095 && (!CONST_INT_P (operands[2])
2096 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2097 && ((comparison != EQ && comparison != NE)
2098 || (REG_P (op1) && REGNO (op1) != R0_REG)
2099 || !satisfies_constraint_I08 (operands[2])))))
2101 if (scratch && GET_MODE (scratch) == mode)
2103 emit_move_insn (scratch, operands[2]);
2104 operands[2] = scratch;
2106 else if (can_create_pseudo_p ())
2107 operands[2] = force_reg (mode, operands[2]);
2109 return comparison;
2112 void
2113 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2115 rtx (*branch_expander) (rtx) = gen_branch_true;
2116 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2117 switch (comparison)
2119 case NE: case LT: case LE: case LTU: case LEU:
2120 comparison = reverse_condition (comparison);
2121 branch_expander = gen_branch_false;
2122 default: ;
2124 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2125 gen_rtx_fmt_ee (comparison, SImode,
2126 operands[1], operands[2])));
2127 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2128 if (probability >= 0)
2129 add_int_reg_note (jump, REG_BR_PROB, probability);
2132 /* ??? How should we distribute probabilities when more than one branch
2133 is generated. So far we only have some ad-hoc observations:
2134 - If the operands are random, they are likely to differ in both parts.
2135 - If comparing items in a hash chain, the operands are random or equal;
2136 operation should be EQ or NE.
2137 - If items are searched in an ordered tree from the root, we can expect
2138 the highpart to be unequal about half of the time; operation should be
2139 an inequality comparison, operands non-constant, and overall probability
2140 about 50%. Likewise for quicksort.
2141 - Range checks will be often made against constants. Even if we assume for
2142 simplicity an even distribution of the non-constant operand over a
2143 sub-range here, the same probability could be generated with differently
2144 wide sub-ranges - as long as the ratio of the part of the subrange that
2145 is before the threshold to the part that comes after the threshold stays
2146 the same. Thus, we can't really tell anything here;
2147 assuming random distribution is at least simple.
2149 bool
2150 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2152 enum rtx_code msw_taken, msw_skip, lsw_taken;
2153 rtx_code_label *skip_label = NULL;
2154 rtx op1h, op1l, op2h, op2l;
2155 int num_branches;
2156 int prob, rev_prob;
2157 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2158 rtx scratch = operands[4];
2160 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2161 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2162 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2163 op1l = gen_lowpart (SImode, operands[1]);
2164 op2l = gen_lowpart (SImode, operands[2]);
2165 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2166 prob = split_branch_probability;
2167 rev_prob = REG_BR_PROB_BASE - prob;
2168 switch (comparison)
2170 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2171 That costs 1 cycle more when the first branch can be predicted taken,
2172 but saves us mispredicts because only one branch needs prediction.
2173 It also enables generating the cmpeqdi_t-1 pattern. */
2174 case EQ:
2175 if (TARGET_CMPEQDI_T)
2177 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2178 emit_jump_insn (gen_branch_true (operands[3]));
2179 return true;
2181 msw_skip = NE;
2182 lsw_taken = EQ;
2183 if (prob >= 0)
2185 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2186 msw_skip_prob = rev_prob;
2187 if (REG_BR_PROB_BASE <= 65535)
2188 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2189 else
2191 lsw_taken_prob
2192 = (prob
2193 ? (REG_BR_PROB_BASE
2194 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2195 / ((gcov_type) prob << 32)))
2196 : 0);
2199 break;
2200 case NE:
2201 if (TARGET_CMPEQDI_T)
2203 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2204 emit_jump_insn (gen_branch_false (operands[3]));
2205 return true;
2207 msw_taken = NE;
2208 msw_taken_prob = prob;
2209 lsw_taken = NE;
2210 lsw_taken_prob = 0;
2211 break;
2212 case GTU: case GT:
2213 msw_taken = comparison;
2214 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2215 break;
2216 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2217 msw_skip = swap_condition (msw_taken);
2218 lsw_taken = GTU;
2219 break;
2220 case GEU: case GE:
2221 if (op2l == CONST0_RTX (SImode))
2222 msw_taken = comparison;
2223 else
2225 msw_taken = comparison == GE ? GT : GTU;
2226 msw_skip = swap_condition (msw_taken);
2227 lsw_taken = GEU;
2229 break;
2230 case LTU: case LT:
2231 msw_taken = comparison;
2232 if (op2l == CONST0_RTX (SImode))
2233 break;
2234 msw_skip = swap_condition (msw_taken);
2235 lsw_taken = LTU;
2236 break;
2237 case LEU: case LE:
2238 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2239 msw_taken = comparison;
2240 else
2242 lsw_taken = LEU;
2243 if (comparison == LE)
2244 msw_taken = LT;
2245 else if (op2h != CONST0_RTX (SImode))
2246 msw_taken = LTU;
2247 else
2249 msw_skip = swap_condition (LTU);
2250 break;
2252 msw_skip = swap_condition (msw_taken);
2254 break;
2255 default: return false;
2257 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2258 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2259 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2260 if (comparison != EQ && comparison != NE && num_branches > 1)
2262 if (!CONSTANT_P (operands[2])
2263 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2264 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2266 msw_taken_prob = prob / 2U;
2267 msw_skip_prob
2268 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2269 lsw_taken_prob = prob;
2271 else
2273 msw_taken_prob = prob;
2274 msw_skip_prob = REG_BR_PROB_BASE;
2275 /* ??? If we have a constant op2h, should we use that when
2276 calculating lsw_taken_prob? */
2277 lsw_taken_prob = prob;
2280 operands[1] = op1h;
2281 operands[2] = op2h;
2282 operands[4] = NULL_RTX;
2283 if (reload_completed
2284 && ! arith_reg_or_0_operand (op2h, SImode)
2285 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2286 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2287 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2289 emit_move_insn (scratch, operands[2]);
2290 operands[2] = scratch;
2292 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2293 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2294 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2296 rtx taken_label = operands[3];
2298 /* Operands were possibly modified, but msw_skip doesn't expect this.
2299 Always use the original ones. */
2300 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2302 operands[1] = op1h;
2303 operands[2] = op2h;
2304 if (reload_completed
2305 && ! arith_reg_or_0_operand (op2h, SImode)
2306 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2308 emit_move_insn (scratch, operands[2]);
2309 operands[2] = scratch;
2313 operands[3] = skip_label = gen_label_rtx ();
2314 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2315 operands[3] = taken_label;
2317 operands[1] = op1l;
2318 operands[2] = op2l;
2319 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2321 if (reload_completed
2322 && ! arith_reg_or_0_operand (op2l, SImode)
2323 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2325 emit_move_insn (scratch, operands[2]);
2326 operands[2] = scratch;
2328 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2330 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2331 emit_label (skip_label);
2332 return true;
2335 /* Given an operand, return 1 if the evaluated operand plugged into an
2336 if_then_else will result in a branch_true, 0 if branch_false, or
2337 -1 if neither nor applies. The truth table goes like this:
2339 op | cmpval | code | result
2340 ---------+--------+---------+--------------------
2341 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2342 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2343 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2344 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2345 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2346 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2347 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2348 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2350 sh_eval_treg_value (rtx op)
2352 if (t_reg_operand (op, GET_MODE (op)))
2353 return 1;
2354 if (negt_reg_operand (op, GET_MODE (op)))
2355 return 0;
2357 rtx_code code = GET_CODE (op);
2358 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2359 return -1;
2361 int cmpop = code == EQ ? 1 : 0;
2362 int cmpval = INTVAL (XEXP (op, 1));
2363 if (cmpval != 0 && cmpval != 1)
2364 return -1;
2366 int t;
2367 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2368 t = 0;
2369 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2370 t = 1;
2371 else
2372 return -1;
2374 return t ^ (cmpval == cmpop);
2377 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2378 of floating-point comparisons. */
2379 static void
2380 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2382 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2383 && GET_CODE (insn) != PARALLEL)
2385 insn = gen_rtx_PARALLEL (VOIDmode,
2386 gen_rtvec (3, insn,
2387 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2388 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2390 emit_insn (insn);
2393 /* Prepare the operands for an scc instruction; make sure that the
2394 compare has been done and the result is in T_REG. */
2395 void
2396 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2398 rtx t_reg = get_t_reg_rtx ();
2399 enum rtx_code oldcode = code;
2400 machine_mode mode;
2402 /* First need a compare insn. */
2403 switch (code)
2405 case NE:
2406 /* It isn't possible to handle this case. */
2407 gcc_unreachable ();
2408 case LT:
2409 code = GT;
2410 break;
2411 case LE:
2412 code = GE;
2413 break;
2414 case LTU:
2415 code = GTU;
2416 break;
2417 case LEU:
2418 code = GEU;
2419 break;
2420 default:
2421 break;
2423 if (code != oldcode)
2424 std::swap (op0, op1);
2426 mode = GET_MODE (op0);
2427 if (mode == VOIDmode)
2428 mode = GET_MODE (op1);
2430 op0 = force_reg (mode, op0);
2431 if ((code != EQ && code != NE
2432 && (op1 != const0_rtx
2433 || code == GTU || code == GEU || code == LTU || code == LEU))
2434 || (mode == DImode && op1 != const0_rtx)
2435 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2436 op1 = force_reg (mode, op1);
2438 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2439 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2440 mode);
2444 sh_emit_cheap_store_flag (machine_mode mode, enum rtx_code code,
2445 rtx op0, rtx op1)
2447 rtx target = gen_reg_rtx (SImode);
2448 rtx tmp;
2450 gcc_assert (TARGET_SHMEDIA);
2451 switch (code)
2453 case EQ:
2454 case GT:
2455 case LT:
2456 case UNORDERED:
2457 case GTU:
2458 case LTU:
2459 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2460 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2461 code = NE;
2462 break;
2464 case NE:
2465 case GE:
2466 case LE:
2467 case ORDERED:
2468 case GEU:
2469 case LEU:
2470 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2471 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2472 code = EQ;
2473 break;
2475 case UNEQ:
2476 case UNGE:
2477 case UNGT:
2478 case UNLE:
2479 case UNLT:
2480 case LTGT:
2481 return NULL_RTX;
2483 default:
2484 gcc_unreachable ();
2487 if (mode == DImode)
2489 rtx t2 = gen_reg_rtx (DImode);
2490 emit_insn (gen_extendsidi2 (t2, target));
2491 target = t2;
2494 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2497 /* Called from the md file, set up the operands of a compare instruction. */
2498 void
2499 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2501 enum rtx_code code = GET_CODE (operands[0]);
2502 enum rtx_code branch_code;
2503 rtx op0 = operands[1];
2504 rtx op1 = operands[2];
2505 rtx insn;
2506 bool need_ccmpeq = false;
2508 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2510 op0 = force_reg (mode, op0);
2511 op1 = force_reg (mode, op1);
2513 else
2515 if (code != EQ || mode == DImode)
2517 /* Force args into regs, since we can't use constants here. */
2518 op0 = force_reg (mode, op0);
2519 if (op1 != const0_rtx || code == GTU || code == GEU)
2520 op1 = force_reg (mode, op1);
2524 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2526 if (code == LT
2527 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2528 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2530 std::swap (op0, op1);
2531 code = swap_condition (code);
2534 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2535 if (code == GE)
2537 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2538 need_ccmpeq = true;
2539 code = GT;
2542 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2543 to EQ/GT respectively. */
2544 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2547 switch (code)
2549 case EQ:
2550 case GT:
2551 case GE:
2552 case GTU:
2553 case GEU:
2554 branch_code = code;
2555 break;
2556 case NE:
2557 case LT:
2558 case LE:
2559 case LTU:
2560 case LEU:
2561 branch_code = reverse_condition (code);
2562 break;
2563 default:
2564 gcc_unreachable ();
2567 insn = gen_rtx_SET (get_t_reg_rtx (),
2568 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2570 sh_emit_set_t_insn (insn, mode);
2571 if (need_ccmpeq)
2572 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2574 if (branch_code == code)
2575 emit_jump_insn (gen_branch_true (operands[3]));
2576 else
2577 emit_jump_insn (gen_branch_false (operands[3]));
2580 void
2581 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2583 enum rtx_code code = GET_CODE (operands[1]);
2584 rtx op0 = operands[2];
2585 rtx op1 = operands[3];
2586 rtx_code_label *lab = NULL;
2587 bool invert = false;
2589 op0 = force_reg (mode, op0);
2590 if ((code != EQ && code != NE
2591 && (op1 != const0_rtx
2592 || code == GTU || code == GEU || code == LTU || code == LEU))
2593 || (mode == DImode && op1 != const0_rtx)
2594 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2595 op1 = force_reg (mode, op1);
2597 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2599 if (code == LT || code == LE)
2601 std::swap (op0, op1);
2602 code = swap_condition (code);
2604 if (code == GE)
2606 if (TARGET_IEEE)
2608 lab = gen_label_rtx ();
2609 sh_emit_scc_to_t (EQ, op0, op1);
2610 emit_jump_insn (gen_branch_true (lab));
2611 code = GT;
2613 else
2615 code = LT;
2616 invert = true;
2621 if (code == NE)
2623 code = EQ;
2624 invert = true;
2627 sh_emit_scc_to_t (code, op0, op1);
2628 if (lab)
2629 emit_label (lab);
2630 if (invert)
2631 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2632 else
2633 emit_move_insn (operands[0], get_t_reg_rtx ());
2636 /* Functions to output assembly code. */
2638 /* Return a sequence of instructions to perform DI or DF move.
2640 Since the SH cannot move a DI or DF in one instruction, we have
2641 to take care when we see overlapping source and dest registers. */
2642 const char *
2643 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2644 machine_mode mode)
2646 rtx dst = operands[0];
2647 rtx src = operands[1];
2649 if (MEM_P (dst)
2650 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2651 return "mov.l %T1,%0" "\n"
2652 " mov.l %1,%0";
2654 if (register_operand (dst, mode)
2655 && register_operand (src, mode))
2657 if (REGNO (src) == MACH_REG)
2658 return "sts mach,%S0" "\n"
2659 " sts macl,%R0";
2661 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2662 when mov.d r1,r0 do r1->r0 then r2->r1. */
2663 if (REGNO (src) + 1 == REGNO (dst))
2664 return "mov %T1,%T0" "\n"
2665 " mov %1,%0";
2666 else
2667 return "mov %1,%0" "\n"
2668 " mov %T1,%T0";
2670 else if (CONST_INT_P (src))
2672 if (INTVAL (src) < 0)
2673 output_asm_insn ("mov #-1,%S0", operands);
2674 else
2675 output_asm_insn ("mov #0,%S0", operands);
2677 return "mov %1,%R0";
2679 else if (MEM_P (src))
2681 int ptrreg = -1;
2682 int dreg = REGNO (dst);
2683 rtx inside = XEXP (src, 0);
2685 switch (GET_CODE (inside))
2687 case REG:
2688 ptrreg = REGNO (inside);
2689 break;
2691 case SUBREG:
2692 ptrreg = subreg_regno (inside);
2693 break;
2695 case PLUS:
2696 ptrreg = REGNO (XEXP (inside, 0));
2697 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2698 an offsettable address. Unfortunately, offsettable addresses use
2699 QImode to check the offset, and a QImode offsettable address
2700 requires r0 for the other operand, which is not currently
2701 supported, so we can't use the 'o' constraint.
2702 Thus we must check for and handle r0+REG addresses here.
2703 We punt for now, since this is likely very rare. */
2704 gcc_assert (!REG_P (XEXP (inside, 1)));
2705 break;
2707 case LABEL_REF:
2708 return "mov.l %1,%0" "\n"
2709 " mov.l %1+4,%T0";
2710 case POST_INC:
2711 return "mov.l %1,%0" "\n"
2712 " mov.l %1,%T0";
2713 default:
2714 gcc_unreachable ();
2717 /* Work out the safe way to copy. Copy into the second half first. */
2718 if (dreg == ptrreg)
2719 return "mov.l %T1,%T0" "\n"
2720 " mov.l %1,%0";
2723 return "mov.l %1,%0" "\n"
2724 " mov.l %T1,%T0";
2727 /* Print an instruction which would have gone into a delay slot after
2728 another instruction, but couldn't because the other instruction expanded
2729 into a sequence where putting the slot insn at the end wouldn't work. */
2730 static void
2731 print_slot (rtx_sequence *seq)
2733 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2735 seq->insn (1)->set_deleted ();
2738 const char *
2739 output_far_jump (rtx_insn *insn, rtx op)
2741 struct { rtx lab, reg, op; } this_jmp;
2742 rtx_code_label *braf_base_lab = NULL;
2743 const char *jump;
2744 int far;
2745 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2746 rtx_insn *prev;
2748 this_jmp.lab = gen_label_rtx ();
2750 if (TARGET_SH2
2751 && offset >= -32764
2752 && offset - get_attr_length (insn) <= 32766
2753 && ! CROSSING_JUMP_P (insn))
2755 far = 0;
2756 jump = "mov.w %O0,%1" "\n"
2757 " braf %1";
2759 else
2761 far = 1;
2762 if (flag_pic)
2764 if (TARGET_SH2)
2765 jump = "mov.l %O0,%1" "\n"
2766 " braf %1";
2767 else
2768 jump = "mov.l r0,@-r15" "\n"
2769 " mova %O0,r0" "\n"
2770 " mov.l @r0,%1" "\n"
2771 " add r0,%1" "\n"
2772 " mov.l @r15+,r0" "\n"
2773 " jmp @%1";
2775 else
2776 jump = "mov.l %O0,%1" "\n"
2777 " jmp @%1";
2779 /* If we have a scratch register available, use it. */
2780 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2781 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2783 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2784 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2785 jump = "mov.l r1,@-r15" "\n"
2786 " mova %O0,r0" "\n"
2787 " mov.l @r0,r1" "\n"
2788 " add r1,r0" "\n"
2789 " mov.l @r15+,r1" "\n"
2790 " jmp @%1";
2791 output_asm_insn (jump, &this_jmp.lab);
2792 if (dbr_sequence_length ())
2793 print_slot (final_sequence);
2794 else
2795 output_asm_insn ("nop", 0);
2797 else
2799 /* Output the delay slot insn first if any. */
2800 if (dbr_sequence_length ())
2801 print_slot (final_sequence);
2803 this_jmp.reg = gen_rtx_REG (SImode, 13);
2804 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2805 Fortunately, MACL is fixed and call-clobbered, and we never
2806 need its value across jumps, so save r13 in it instead of in
2807 the stack. */
2808 if (TARGET_SH5)
2809 output_asm_insn ("lds r13,macl", 0);
2810 else
2811 output_asm_insn ("mov.l r13,@-r15", 0);
2812 output_asm_insn (jump, &this_jmp.lab);
2813 if (TARGET_SH5)
2814 output_asm_insn ("sts macl,r13", 0);
2815 else
2816 output_asm_insn ("mov.l @r15+,r13", 0);
2818 if (far && flag_pic && TARGET_SH2)
2820 braf_base_lab = gen_label_rtx ();
2821 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2822 CODE_LABEL_NUMBER (braf_base_lab));
2824 if (far)
2825 output_asm_insn (".align 2", 0);
2826 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2827 this_jmp.op = op;
2828 if (far && flag_pic)
2830 if (TARGET_SH2)
2831 this_jmp.lab = braf_base_lab;
2832 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2834 else
2835 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2836 return "";
2839 /* Local label counter, used for constants in the pool and inside
2840 pattern branches. */
2841 static int lf = 100;
2843 /* Output code for ordinary branches. */
2844 const char *
2845 output_branch (int logic, rtx_insn *insn, rtx *operands)
2847 switch (get_attr_length (insn))
2849 case 6:
2850 /* This can happen if filling the delay slot has caused a forward
2851 branch to exceed its range (we could reverse it, but only
2852 when we know we won't overextend other branches; this should
2853 best be handled by relaxation).
2854 It can also happen when other condbranches hoist delay slot insn
2855 from their destination, thus leading to code size increase.
2856 But the branch will still be in the range -4092..+4098 bytes. */
2857 if (! TARGET_RELAX)
2859 int label = lf++;
2860 /* The call to print_slot will clobber the operands. */
2861 rtx op0 = operands[0];
2863 /* If the instruction in the delay slot is annulled (true), then
2864 there is no delay slot where we can put it now. The only safe
2865 place for it is after the label. final will do that by default. */
2867 if (final_sequence
2868 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2869 && get_attr_length (final_sequence->insn (1)))
2871 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2872 ASSEMBLER_DIALECT ? "/" : ".", label);
2873 print_slot (final_sequence);
2875 else
2876 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2878 output_asm_insn ("bra\t%l0", &op0);
2879 fprintf (asm_out_file, "\tnop\n");
2880 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2882 return "";
2884 /* When relaxing, handle this like a short branch. The linker
2885 will fix it up if it still doesn't fit after relaxation. */
2886 case 2:
2887 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2889 /* These are for SH2e, in which we have to account for the
2890 extra nop because of the hardware bug in annulled branches. */
2891 case 8:
2892 if (! TARGET_RELAX)
2894 int label = lf++;
2896 gcc_assert (!final_sequence
2897 || !(INSN_ANNULLED_BRANCH_P
2898 (XVECEXP (final_sequence, 0, 0))));
2899 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2900 logic ? "f" : "t",
2901 ASSEMBLER_DIALECT ? "/" : ".", label);
2902 fprintf (asm_out_file, "\tnop\n");
2903 output_asm_insn ("bra\t%l0", operands);
2904 fprintf (asm_out_file, "\tnop\n");
2905 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2907 return "";
2909 /* When relaxing, fall through. */
2910 case 4:
2912 char buffer[10];
2914 sprintf (buffer, "b%s%ss\t%%l0",
2915 logic ? "t" : "f",
2916 ASSEMBLER_DIALECT ? "/" : ".");
2917 output_asm_insn (buffer, &operands[0]);
2918 return "nop";
2921 default:
2922 /* There should be no longer branches now - that would
2923 indicate that something has destroyed the branches set
2924 up in machine_dependent_reorg. */
2925 gcc_unreachable ();
2929 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2930 fill in operands 9 as a label to the successor insn.
2931 We try to use jump threading where possible.
2932 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2933 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2934 follow jmp and bt, if the address is in range. */
2935 const char *
2936 output_branchy_insn (enum rtx_code code, const char *templ,
2937 rtx_insn *insn, rtx *operands)
2939 rtx_insn *next_insn = NEXT_INSN (insn);
2941 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2943 rtx src = SET_SRC (PATTERN (next_insn));
2944 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2946 /* Following branch not taken */
2947 rtx_code_label *lab = gen_label_rtx ();
2948 emit_label_after (lab, next_insn);
2949 INSN_ADDRESSES_NEW (lab,
2950 INSN_ADDRESSES (INSN_UID (next_insn))
2951 + get_attr_length (next_insn));
2952 operands[9] = lab;
2953 return templ;
2955 else
2957 int offset = (branch_dest (next_insn)
2958 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2959 if (offset >= -252 && offset <= 258)
2961 if (GET_CODE (src) == IF_THEN_ELSE)
2962 /* branch_true */
2963 src = XEXP (src, 1);
2964 operands[9] = src;
2965 return templ;
2969 rtx_code_label *lab = gen_label_rtx ();
2970 emit_label_after (lab, insn);
2971 INSN_ADDRESSES_NEW (lab,
2972 INSN_ADDRESSES (INSN_UID (insn))
2973 + get_attr_length (insn));
2974 operands[9] = lab;
2975 return templ;
2978 const char *
2979 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2981 return output_branchy_insn (NE, "bt %l9" "\n"
2982 " fcmp/eq %1,%0",
2983 insn, operands);
2986 /* Output the start of the assembler file. */
2987 static void
2988 sh_file_start (void)
2990 default_file_start ();
2992 if (TARGET_ELF)
2993 /* We need to show the text section with the proper
2994 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2995 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2996 will complain. We can teach GAS specifically about the
2997 default attributes for our choice of text section, but
2998 then we would have to change GAS again if/when we change
2999 the text section name. */
3000 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
3001 else
3002 /* Switch to the data section so that the coffsem symbol
3003 isn't in the text section. */
3004 switch_to_section (data_section);
3006 if (TARGET_LITTLE_ENDIAN)
3007 fputs ("\t.little\n", asm_out_file);
3009 if (!TARGET_ELF)
3011 if (TARGET_SHCOMPACT)
3012 fputs ("\t.mode\tSHcompact\n", asm_out_file);
3013 else if (TARGET_SHMEDIA)
3014 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
3015 TARGET_SHMEDIA64 ? 64 : 32);
3019 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
3020 static bool
3021 unspec_caller_rtx_p (rtx pat)
3023 rtx base, offset;
3024 int i;
3026 split_const (pat, &base, &offset);
3027 if (GET_CODE (base) == UNSPEC)
3029 if (XINT (base, 1) == UNSPEC_CALLER)
3030 return true;
3031 for (i = 0; i < XVECLEN (base, 0); i++)
3032 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
3033 return true;
3035 return false;
3038 /* Indicate that INSN cannot be duplicated. This is true for insn
3039 that generates a unique label. */
3040 static bool
3041 sh_cannot_copy_insn_p (rtx_insn *insn)
3043 rtx pat;
3045 if (!reload_completed || !flag_pic)
3046 return false;
3048 if (!NONJUMP_INSN_P (insn))
3049 return false;
3050 if (asm_noperands (insn) >= 0)
3051 return false;
3053 pat = PATTERN (insn);
3054 if (GET_CODE (pat) != SET)
3055 return false;
3056 pat = SET_SRC (pat);
3058 if (unspec_caller_rtx_p (pat))
3059 return true;
3061 return false;
3064 /* Number of instructions used to make an arithmetic right shift by N. */
3065 static const char ashiftrt_insns[] =
3066 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3068 /* Description of a logical left or right shift, when expanded to a sequence
3069 of 1/2/8/16 shifts.
3070 Notice that one bit right shifts clobber the T bit. One bit left shifts
3071 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3072 enum
3074 ASHL_CLOBBERS_T = 1 << 0,
3075 LSHR_CLOBBERS_T = 1 << 1
3078 struct ashl_lshr_sequence
3080 char insn_count;
3081 signed char amount[6];
3082 char clobbers_t;
3085 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3087 { 0, { 0 }, 0 }, // 0
3088 { 1, { 1 }, LSHR_CLOBBERS_T },
3089 { 1, { 2 }, 0 },
3090 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3091 { 2, { 2, 2 }, 0 }, // 4
3092 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3093 { 3, { 2, 2, 2 }, 0 },
3094 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3095 { 1, { 8 }, 0 }, // 8
3096 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3097 { 2, { 8, 2 }, 0 },
3098 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3099 { 3, { 8, 2, 2 }, 0 }, // 12
3100 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3101 { 3, { 8, -2, 8 }, 0 },
3102 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3103 { 1, { 16 }, 0 }, // 16
3104 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3105 { 2, { 16, 2 }, 0 },
3106 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3107 { 3, { 16, 2, 2 }, 0 }, // 20
3108 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3109 { 3, { 16, -2, 8 }, 0 },
3110 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3111 { 2, { 16, 8 }, 0 }, // 24
3112 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3113 { 3, { 16, 8, 2 }, 0 },
3114 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3115 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3116 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3117 { 3, { 16, -2, 16 }, 0 },
3119 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3120 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3121 However, the shift-and combiner code needs this entry here to be in
3122 terms of real shift insns. */
3123 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3126 /* Individual shift amounts for shift amounts < 16, up to three highmost
3127 bits might be clobbered. This is typically used when combined with some
3128 kind of sign or zero extension. */
3129 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3131 { 0, { 0 }, 0 }, // 0
3132 { 1, { 1 }, LSHR_CLOBBERS_T },
3133 { 1, { 2 }, 0 },
3134 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3135 { 2, { 2, 2 }, 0 }, // 4
3136 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3137 { 2, { 8, -2 }, 0 },
3138 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3139 { 1, { 8 }, 0 }, // 8
3140 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3141 { 2, { 8, 2 }, 0 },
3142 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3143 { 3, { 8, 2, 2 }, 0 }, // 12
3144 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3145 { 2, { 16, -2 }, 0 },
3146 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3147 { 1, { 16 }, 0 }, // 16
3148 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3149 { 2, { 16, 2 }, 0 },
3150 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3151 { 3, { 16, 2, 2 }, 0 }, // 20
3152 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3153 { 3, { 16, -2, 8 }, 0 },
3154 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3155 { 2, { 16, 8 }, 0 }, // 24
3156 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3157 { 3, { 16, 8, 2 }, 0 },
3158 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3159 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3160 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3161 { 3, { 16, -2, 16 }, 0 },
3162 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3165 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3166 will clobber the T bit. */
3167 bool
3168 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3170 gcc_assert (CONST_INT_P (shift_amount));
3172 const int shift_amount_i = INTVAL (shift_amount) & 31;
3174 /* Special case for shift count of 31: use and-rotl sequence. */
3175 if (shift_amount_i == 31)
3176 return true;
3178 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3179 & ASHL_CLOBBERS_T) != 0;
3182 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3183 instructions will clobber the T bit. */
3184 bool
3185 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3187 gcc_assert (CONST_INT_P (shift_amount));
3189 const int shift_amount_i = INTVAL (shift_amount) & 31;
3191 /* Special case for shift count of 31: use shll-movt sequence. */
3192 if (shift_amount_i == 31)
3193 return true;
3195 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3196 & LSHR_CLOBBERS_T) != 0;
3199 /* Return true if it is potentially beneficial to use a dynamic shift
3200 instruction (shad / shar) instead of a combination of 1/2/8/16
3201 shift instructions for the specified shift count.
3202 If dynamic shifts are not available, always return false. */
3203 bool
3204 sh_dynamicalize_shift_p (rtx count)
3206 gcc_assert (CONST_INT_P (count));
3208 const int shift_amount_i = INTVAL (count) & 31;
3209 int insn_count;
3211 /* For left and right shifts, there are shorter 2 insn sequences for
3212 shift amounts of 31. */
3213 if (shift_amount_i == 31)
3214 insn_count = 2;
3215 else
3216 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3218 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3221 /* Assuming we have a value that has been sign-extended by at least one bit,
3222 can we use the ext_shift_amounts with the last shift turned to an
3223 arithmetic shift to shift it by N without data loss, and quicker than by
3224 other means? */
3225 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3227 /* Return the cost of a shift. */
3228 static inline int
3229 shiftcosts (rtx x)
3231 int value;
3233 if (TARGET_SHMEDIA)
3234 return 1;
3236 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3238 if (GET_MODE (x) == DImode
3239 && CONST_INT_P (XEXP (x, 1))
3240 && INTVAL (XEXP (x, 1)) == 1)
3241 return 2;
3243 /* Everything else is invalid, because there is no pattern for it. */
3244 return -1;
3246 /* If shift by a non constant, then this will be expensive. */
3247 if (!CONST_INT_P (XEXP (x, 1)))
3248 return SH_DYNAMIC_SHIFT_COST;
3250 /* Otherwise, return the true cost in instructions. Cope with out of range
3251 shift counts more or less arbitrarily. */
3252 value = INTVAL (XEXP (x, 1)) & 31;
3254 if (GET_CODE (x) == ASHIFTRT)
3256 int cost = ashiftrt_insns[value];
3257 /* If dynamic shifts are available and profitable in this case, then we
3258 put the constant in a reg and use shad. */
3259 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3260 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3261 return cost;
3263 else
3264 return ashl_lshr_seq[value].insn_count;
3267 /* Return the cost of an AND/XOR/IOR operation. */
3268 static inline int
3269 and_xor_ior_costs (rtx x, int code)
3271 /* On SH1-4 we have only max. SImode operations.
3272 Double the cost for modes > SImode. */
3273 const int cost_scale = !TARGET_SHMEDIA
3274 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3275 ? 2 : 1;
3277 /* A logical operation with two registers is a single cycle
3278 instruction. */
3279 if (!CONST_INT_P (XEXP (x, 1)))
3280 return 1 * cost_scale;
3282 int i = INTVAL (XEXP (x, 1));
3284 if (TARGET_SHMEDIA)
3286 if (satisfies_constraint_I10 (XEXP (x, 1))
3287 || satisfies_constraint_J16 (XEXP (x, 1)))
3288 return 1;
3289 else
3290 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3293 /* These constants are single cycle extu.[bw] instructions. */
3294 if ((i == 0xff || i == 0xffff) && code == AND)
3295 return 1 * cost_scale;
3296 /* Constants that can be used in an instruction as an immediate are
3297 a single cycle, but this requires r0, so make it a little more
3298 expensive. */
3299 if (CONST_OK_FOR_K08 (i))
3300 return 2 * cost_scale;
3301 /* Constants that can be loaded with a mov immediate need one more cycle.
3302 This case is probably unnecessary. */
3303 if (CONST_OK_FOR_I08 (i))
3304 return 2 * cost_scale;
3305 /* Any other constant requires an additional 2 cycle pc-relative load.
3306 This case is probably unnecessary. */
3307 return 3 * cost_scale;
3310 /* Return the cost of an addition or a subtraction. */
3311 static inline int
3312 addsubcosts (rtx x)
3314 if (GET_MODE (x) == SImode)
3316 /* The addc or subc patterns will eventually become one or two
3317 instructions. Below are some costs for some of the patterns
3318 which combine would reject because the costs of the individual
3319 insns in the patterns are lower.
3321 FIXME: It would be much easier if we had something like insn cost
3322 attributes and the cost calculation machinery used those attributes
3323 in the first place. This would eliminate redundant recog-like C
3324 code to calculate costs of complex patterns. */
3325 rtx op0 = XEXP (x, 0);
3326 rtx op1 = XEXP (x, 1);
3328 if (GET_CODE (x) == PLUS)
3330 if (GET_CODE (op0) == AND
3331 && XEXP (op0, 1) == const1_rtx
3332 && (GET_CODE (op1) == PLUS
3333 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3334 return 1;
3336 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3337 && GET_CODE (op1) == LSHIFTRT
3338 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3339 return 1;
3341 /* Let's assume that adding the result of an insns that stores into
3342 the T bit is cheap. */
3343 if (treg_set_expr (op1, SImode))
3344 return 1;
3345 if (treg_set_expr (op0, SImode))
3346 return 1;
3349 /* On SH1-4 we have only max. SImode operations.
3350 Double the cost for modes > SImode. */
3351 const int cost_scale = !TARGET_SHMEDIA
3352 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3353 ? 2 : 1;
3355 /* Adding a register is a single cycle insn. */
3356 if (REG_P (XEXP (x, 1))
3357 || GET_CODE (XEXP (x, 1)) == SUBREG)
3358 return 1 * cost_scale;
3360 /* Likewise for small constants. */
3361 if (CONST_INT_P (XEXP (x, 1))
3362 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3363 return 1 * cost_scale;
3365 if (TARGET_SHMEDIA)
3366 switch (GET_CODE (XEXP (x, 1)))
3368 case CONST:
3369 case LABEL_REF:
3370 case SYMBOL_REF:
3371 return TARGET_SHMEDIA64 ? 5 : 3;
3373 case CONST_INT:
3374 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3375 return 2;
3376 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3377 return 3;
3378 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3379 return 4;
3381 /* Fall through. */
3382 default:
3383 return 5;
3386 /* Any other constant requires a 2 cycle pc-relative load plus an
3387 addition. */
3388 return 3 * cost_scale;
3391 /* Return the cost of a multiply. */
3392 static inline int
3393 multcosts (rtx x ATTRIBUTE_UNUSED)
3395 if (sh_multcost >= 0)
3396 return sh_multcost;
3397 if (TARGET_SHMEDIA)
3398 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3399 accept constants. Ideally, we would use a cost of one or two and
3400 add the cost of the operand, but disregard the latter when inside loops
3401 and loop invariant code motion is still to follow.
3402 Using a multiply first and splitting it later if it's a loss
3403 doesn't work because of different sign / zero extension semantics
3404 of multiplies vs. shifts. */
3405 return optimize_size ? 2 : 3;
3407 if (TARGET_SH2)
3409 /* We have a mul insn, so we can never take more than the mul and the
3410 read of the mac reg, but count more because of the latency and extra
3411 reg usage. */
3412 if (optimize_size)
3413 return 2;
3414 return 3;
3417 /* If we're aiming at small code, then just count the number of
3418 insns in a multiply call sequence. */
3419 if (optimize_size)
3420 return 5;
3422 /* Otherwise count all the insns in the routine we'd be calling too. */
3423 return 20;
3426 /* Compute a (partial) cost for rtx X. Return true if the complete
3427 cost has been computed, and false if subexpressions should be
3428 scanned. In either case, *TOTAL contains the cost result. */
3429 static bool
3430 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3431 int *total, bool speed ATTRIBUTE_UNUSED)
3433 switch (code)
3435 /* The lower-subreg pass decides whether to split multi-word regs
3436 into individual regs by looking at the cost for a SET of certain
3437 modes with the following patterns:
3438 (set (reg) (reg))
3439 (set (reg) (const_int 0))
3440 On machines that support vector-move operations a multi-word move
3441 is the same cost as individual reg move. On SH there is no
3442 vector-move, so we have to provide the correct cost in the number
3443 of move insns to load/store the reg of the mode in question. */
3444 case SET:
3445 if (register_operand (SET_DEST (x), VOIDmode)
3446 && (register_operand (SET_SRC (x), VOIDmode)
3447 || satisfies_constraint_Z (SET_SRC (x))))
3449 const machine_mode mode = GET_MODE (SET_DEST (x));
3450 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3451 / mov_insn_size (mode, TARGET_SH2A));
3452 return true;
3454 return false;
3456 /* The cost of a mem access is mainly the cost of the address mode. */
3457 case MEM:
3458 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3459 true);
3460 return true;
3462 case IF_THEN_ELSE:
3463 /* This case is required for the if_then_else negc pattern. */
3464 if (treg_set_expr (XEXP (x, 0), SImode))
3466 *total = COSTS_N_INSNS (1);
3467 return true;
3469 else
3470 return false;
3472 /* Zero extracts of single bits are usually combine patterns for the
3473 tst insns. */
3474 case ZERO_EXTRACT:
3475 if (GET_CODE (XEXP (x, 0)) == XOR
3476 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3477 && XEXP (x, 1) == const1_rtx
3478 && CONST_INT_P (XEXP (x, 2))
3479 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3480 /* Check that the xor constaint overlaps with the extracted bit. */
3481 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3483 *total = 1; //COSTS_N_INSNS (1);
3484 return true;
3486 return false;
3488 /* The cost of a sign or zero extend depends on whether the source is a
3489 reg or a mem. In case of a mem take the address into acount. */
3490 case SIGN_EXTEND:
3491 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3493 *total = COSTS_N_INSNS (1);
3494 return true;
3496 if (MEM_P (XEXP (x, 0)))
3498 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3499 GET_MODE (XEXP (x, 0)),
3500 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3501 return true;
3503 return false;
3505 case ZERO_EXTEND:
3506 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3508 *total = COSTS_N_INSNS (1);
3509 return true;
3511 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3512 && (GET_MODE (XEXP (x, 0)) == QImode
3513 || GET_MODE (XEXP (x, 0)) == HImode))
3515 /* Handle SH2A's movu.b and movu.w insn. */
3516 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3517 GET_MODE (XEXP (x, 0)),
3518 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3519 return true;
3521 return false;
3523 /* mems for SFmode and DFmode can be inside a parallel due to
3524 the way the fpscr is handled. */
3525 case PARALLEL:
3526 for (int i = 0; i < XVECLEN (x, 0); i++)
3528 rtx xx = XVECEXP (x, 0, i);
3529 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3531 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3532 GET_MODE (XEXP (xx, 0)),
3533 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3534 return true;
3536 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3538 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3539 GET_MODE (XEXP (xx, 1)),
3540 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3541 return true;
3545 if (sh_1el_vec (x, VOIDmode))
3546 *total = outer_code != SET;
3547 else if (sh_rep_vec (x, VOIDmode))
3548 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3549 + (outer_code != SET));
3550 else
3551 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3552 return true;
3554 case CONST_INT:
3555 if (TARGET_SHMEDIA)
3557 if (INTVAL (x) == 0)
3558 *total = 0;
3559 else if (outer_code == AND && and_operand ((x), DImode))
3560 *total = 0;
3561 else if ((outer_code == IOR || outer_code == XOR
3562 || outer_code == PLUS)
3563 && CONST_OK_FOR_I10 (INTVAL (x)))
3564 *total = 0;
3565 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3566 *total = COSTS_N_INSNS (outer_code != SET);
3567 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3568 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3569 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3570 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3571 else
3572 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3573 return true;
3575 if (CONST_OK_FOR_I08 (INTVAL (x)))
3576 *total = 0;
3577 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3578 && CONST_OK_FOR_K08 (INTVAL (x)))
3579 *total = 1;
3580 /* prepare_cmp_insn will force costly constants int registers before
3581 the cbranch[sd]i4 patterns can see them, so preserve potentially
3582 interesting ones not covered by I08 above. */
3583 else if (outer_code == COMPARE
3584 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3585 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3586 || INTVAL (x) == 0x7fffffff
3587 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3588 *total = 1;
3589 else
3590 *total = 8;
3591 return true;
3593 case EQ:
3594 /* An and with a constant compared against zero is
3595 most likely going to be a TST #imm, R0 instruction.
3596 Notice that this does not catch the zero_extract variants from
3597 the md file. */
3598 if (XEXP (x, 1) == const0_rtx
3599 && (GET_CODE (XEXP (x, 0)) == AND
3600 || (SUBREG_P (XEXP (x, 0))
3601 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND)))
3603 *total = 1;
3604 return true;
3607 else if (XEXP (x, 1) == const0_rtx
3608 && GET_CODE (XEXP (x, 0)) == AND
3609 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3610 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3611 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3612 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3614 *total = 1;
3615 return true;
3617 else
3618 return false;
3620 case SMIN:
3621 case SMAX:
3622 /* This is most likely a clips.b or clips.w insn that is being made up
3623 by combine. */
3624 if (TARGET_SH2A
3625 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3626 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3627 && REG_P (XEXP (XEXP (x, 0), 0))
3628 && CONST_INT_P (XEXP (x, 1)))
3630 *total = COSTS_N_INSNS (1);
3631 return true;
3633 else
3634 return false;
3636 case CONST:
3637 case LABEL_REF:
3638 case SYMBOL_REF:
3639 if (TARGET_SHMEDIA64)
3640 *total = COSTS_N_INSNS (4);
3641 else if (TARGET_SHMEDIA32)
3642 *total = COSTS_N_INSNS (2);
3643 else
3644 *total = 5;
3645 return true;
3647 case CONST_DOUBLE:
3648 if (TARGET_SHMEDIA)
3649 *total = COSTS_N_INSNS (4);
3650 /* prepare_cmp_insn will force costly constants int registers before
3651 the cbranchdi4 pattern can see them, so preserve potentially
3652 interesting ones. */
3653 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3654 *total = 1;
3655 else
3656 *total = 10;
3657 return true;
3659 case CONST_VECTOR:
3660 /* FIXME: This looks broken. Only the last statement has any effect.
3661 Probably this could be folded with the PARALLEL case? */
3662 if (x == CONST0_RTX (GET_MODE (x)))
3663 *total = 0;
3664 else if (sh_1el_vec (x, VOIDmode))
3665 *total = outer_code != SET;
3666 if (sh_rep_vec (x, VOIDmode))
3667 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3668 + (outer_code != SET));
3669 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3670 return true;
3672 case PLUS:
3673 case MINUS:
3674 *total = COSTS_N_INSNS (addsubcosts (x));
3675 return true;
3677 case AND:
3678 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3679 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3681 *total = COSTS_N_INSNS (1);
3682 return true;
3684 /* Fall through. */
3686 case XOR:
3687 case IOR:
3688 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3689 return true;
3691 case MULT:
3692 *total = COSTS_N_INSNS (multcosts (x));
3693 return true;
3695 case LT:
3696 case GE:
3697 /* div0s sign comparison. */
3698 if (GET_CODE (XEXP (x, 0)) == XOR
3699 && REG_P ((XEXP (XEXP (x, 0), 0)))
3700 && REG_P ((XEXP (XEXP (x, 0), 1)))
3701 && satisfies_constraint_Z (XEXP (x, 1)))
3703 *total = COSTS_N_INSNS (1);
3704 return true;
3706 else
3707 return false;
3709 case LSHIFTRT:
3710 /* div0s sign comparison. */
3711 if (GET_CODE (XEXP (x, 0)) == XOR
3712 && REG_P ((XEXP (XEXP (x, 0), 0)))
3713 && REG_P ((XEXP (XEXP (x, 0), 1)))
3714 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3716 *total = COSTS_N_INSNS (1);
3717 return true;
3719 /* Fall through to shiftcosts. */
3720 case ASHIFT:
3721 case ASHIFTRT:
3723 int cost = shiftcosts (x);
3724 if (cost < 0)
3725 return false;
3726 *total = COSTS_N_INSNS (cost);
3727 return true;
3730 case DIV:
3731 case UDIV:
3732 case MOD:
3733 case UMOD:
3734 *total = COSTS_N_INSNS (20);
3735 return true;
3737 case FLOAT:
3738 case FIX:
3739 *total = 100;
3740 return true;
3742 default:
3743 return false;
3747 /* Determine the size of the fundamental move insn that will be used
3748 for the specified mode. */
3749 static inline int
3750 mov_insn_size (machine_mode mode, bool consider_sh2a)
3752 const int mode_sz = GET_MODE_SIZE (mode);
3754 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3755 || (TARGET_FMOVD && mode == DFmode))
3756 return mode_sz;
3757 else
3759 /* The max. available mode for actual move insns is SImode.
3760 Larger accesses will be split into multiple loads/stores. */
3761 const int max_mov_sz = GET_MODE_SIZE (SImode);
3762 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3766 /* Determine the maximum possible displacement for a move insn for the
3767 specified mode. */
3769 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3771 /* The 4 byte displacement move insns are the same as the 2 byte
3772 versions but take a 12 bit displacement. All we need to do is to
3773 scale the max. displacement value accordingly. */
3774 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3776 /* SH2A supports FPU move insns with 12 bit displacements.
3777 Other variants to do not support any kind of displacements for
3778 FPU move insns. */
3779 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3780 return 0;
3781 else
3783 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3784 const int mode_sz = GET_MODE_SIZE (mode);
3785 int r = 15 * mov_insn_sz * disp_scale;
3787 /* If the mov insn will be split into multiple loads/stores, the
3788 maximum possible displacement is a bit smaller. */
3789 if (mode_sz > mov_insn_sz)
3790 r -= mode_sz - mov_insn_sz;
3791 return r;
3795 /* Determine the alignment mask for a move insn of the
3796 specified mode. */
3797 static inline int
3798 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3800 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3801 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3804 /* Return the displacement value of a displacement address. */
3805 HOST_WIDE_INT
3806 sh_disp_addr_displacement (rtx x)
3808 gcc_assert (satisfies_constraint_Sdd (x));
3809 return INTVAL (XEXP (XEXP (x, 0), 1));
3812 /* Compute the cost of an address. */
3813 static int
3814 sh_address_cost (rtx x, machine_mode mode,
3815 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3817 /* 'GBR + 0'. Account one more because of R0 restriction. */
3818 if (REG_P (x) && REGNO (x) == GBR_REG)
3819 return 2;
3821 /* Simple reg, post-inc, pre-dec addressing. */
3822 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3823 return 1;
3825 /* 'reg + disp' addressing. */
3826 if (GET_CODE (x) == PLUS
3827 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3829 /* 'GBR + disp'. Account one more because of R0 restriction. */
3830 if (REGNO (XEXP (x, 0)) == GBR_REG
3831 && gbr_displacement (XEXP (x, 1), mode))
3832 return 2;
3834 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3836 if (offset == 0)
3837 return 1;
3839 /* The displacement would fit into a 2 byte move insn.
3840 HImode and QImode loads/stores with displacement put pressure on
3841 R0 which will most likely require another reg copy. Thus account
3842 a higher cost for that. */
3843 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3844 return (mode == HImode || mode == QImode) ? 2 : 1;
3846 /* The displacement would fit into a 4 byte move insn (SH2A). */
3847 if (TARGET_SH2A
3848 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3849 return 2;
3851 /* The displacement is probably out of range and will require extra
3852 calculations. */
3853 return 3;
3856 /* 'reg + reg' addressing. Account a slightly higher cost because of
3857 increased pressure on R0. */
3858 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3859 && ! TARGET_SHMEDIA)
3860 return 3;
3862 /* Not sure what it is - probably expensive. */
3863 return 10;
3866 /* Code to expand a shift. */
3867 static void
3868 gen_ashift (int type, int n, rtx reg)
3870 rtx n_rtx;
3872 /* Negative values here come from the shift_amounts array. */
3873 if (n < 0)
3875 if (type == ASHIFT)
3876 type = LSHIFTRT;
3877 else
3878 type = ASHIFT;
3879 n = -n;
3882 n_rtx = GEN_INT (n);
3883 gcc_assert (satisfies_constraint_P27 (n_rtx));
3885 switch (type)
3887 case ASHIFTRT:
3888 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3889 break;
3890 case LSHIFTRT:
3891 if (n == 1)
3892 emit_insn (gen_shlr (reg, reg));
3893 else
3894 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3895 break;
3896 case ASHIFT:
3897 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3898 break;
3899 default:
3900 gcc_unreachable ();
3904 /* Code to expand a HImode shift. */
3905 static void
3906 gen_ashift_hi (int type, int n, rtx reg)
3908 /* Negative values here come from the shift_amounts array. */
3909 if (n < 0)
3911 if (type == ASHIFT)
3912 type = LSHIFTRT;
3913 else
3914 type = ASHIFT;
3915 n = -n;
3918 switch (type)
3920 case ASHIFTRT:
3921 case LSHIFTRT:
3922 /* We don't have HImode right shift operations because using the
3923 ordinary 32 bit shift instructions for that doesn't generate proper
3924 zero/sign extension.
3925 gen_ashift_hi is only called in contexts where we know that the
3926 sign extension works out correctly. */
3928 int offset = 0;
3929 if (GET_CODE (reg) == SUBREG)
3931 offset = SUBREG_BYTE (reg);
3932 reg = SUBREG_REG (reg);
3934 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3935 break;
3937 case ASHIFT:
3938 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3939 break;
3943 /* Output RTL to split a constant shift into its component SH constant
3944 shift instructions. */
3945 void
3946 gen_shifty_op (int code, rtx *operands)
3948 int value = INTVAL (operands[2]);
3949 int max, i;
3951 /* Truncate the shift count in case it is out of bounds. */
3952 value = value & 31;
3954 if (value == 31)
3956 if (code == LSHIFTRT)
3958 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3959 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3960 return;
3962 else if (code == ASHIFT)
3964 /* There is a two instruction sequence for 31 bit left shifts,
3965 but it requires r0. */
3966 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3968 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3969 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3970 return;
3974 else if (value == 0)
3976 /* This can happen even when optimizing, if there were subregs before
3977 reload. Don't output a nop here, as this is never optimized away;
3978 use a no-op move instead. */
3979 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3980 return;
3983 max = ashl_lshr_seq[value].insn_count;
3984 for (i = 0; i < max; i++)
3985 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3988 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3989 don't matter. */
3990 void
3991 gen_shifty_hi_op (int code, rtx *operands)
3993 int value = INTVAL (operands[2]);
3994 int max, i;
3995 void (*gen_fun) (int, int, rtx);
3997 /* This operation is used by and_shl for SImode values with a few
3998 high bits known to be cleared. */
3999 value &= 31;
4000 if (value == 0)
4002 emit_insn (gen_nop ());
4003 return;
4006 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
4007 if (code == ASHIFT)
4009 max = ext_ashl_lshr_seq[value].insn_count;
4010 for (i = 0; i < max; i++)
4011 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4013 else
4014 /* When shifting right, emit the shifts in reverse order, so that
4015 solitary negative values come first. */
4016 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
4017 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4020 /* Output RTL for an arithmetic right shift.
4021 ??? Rewrite to use super-optimizer sequences. */
4022 bool
4023 expand_ashiftrt (rtx *operands)
4025 rtx wrk;
4026 char func[18];
4027 int value;
4029 if (TARGET_DYNSHIFT)
4031 if (!CONST_INT_P (operands[2]))
4033 rtx count = copy_to_mode_reg (SImode, operands[2]);
4034 emit_insn (gen_negsi2 (count, count));
4035 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4036 return true;
4038 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
4039 > 1 + SH_DYNAMIC_SHIFT_COST)
4041 rtx count
4042 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
4043 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4044 return true;
4047 if (!CONST_INT_P (operands[2]))
4048 return false;
4050 value = INTVAL (operands[2]) & 31;
4052 if (value == 31)
4054 /* If we are called from abs expansion, arrange things so that we
4055 we can use a single MT instruction that doesn't clobber the source,
4056 if LICM can hoist out the load of the constant zero. */
4057 if (currently_expanding_to_rtl)
4059 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
4060 operands[1]));
4061 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
4062 return true;
4064 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
4065 return true;
4067 else if (value >= 16 && value <= 19)
4069 wrk = gen_reg_rtx (SImode);
4070 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
4071 value -= 16;
4072 while (value--)
4073 gen_ashift (ASHIFTRT, 1, wrk);
4074 emit_move_insn (operands[0], wrk);
4075 return true;
4077 /* Expand a short sequence inline, longer call a magic routine. */
4078 else if (value <= 5)
4080 wrk = gen_reg_rtx (SImode);
4081 emit_move_insn (wrk, operands[1]);
4082 while (value--)
4083 gen_ashift (ASHIFTRT, 1, wrk);
4084 emit_move_insn (operands[0], wrk);
4085 return true;
4088 wrk = gen_reg_rtx (Pmode);
4090 /* Load the value into an arg reg and call a helper. */
4091 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
4092 sprintf (func, "__ashiftrt_r4_%d", value);
4093 function_symbol (wrk, func, SFUNC_STATIC);
4094 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
4095 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
4096 return true;
4099 /* Try to find a good way to implement the combiner pattern
4100 [(set (match_operand:SI 0 "register_operand" "r")
4101 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4102 (match_operand:SI 2 "const_int_operand" "n"))
4103 (match_operand:SI 3 "const_int_operand" "n"))) .
4104 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
4105 return 0 for simple right / left or left/right shift combination.
4106 return 1 for a combination of shifts with zero_extend.
4107 return 2 for a combination of shifts with an AND that needs r0.
4108 return 3 for a combination of shifts with an AND that needs an extra
4109 scratch register, when the three highmost bits of the AND mask are clear.
4110 return 4 for a combination of shifts with an AND that needs an extra
4111 scratch register, when any of the three highmost bits of the AND mask
4112 is set.
4113 If ATTRP is set, store an initial right shift width in ATTRP[0],
4114 and the instruction length in ATTRP[1] . These values are not valid
4115 when returning 0.
4116 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
4117 shift_amounts for the last shift value that is to be used before the
4118 sign extend. */
4120 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
4122 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
4123 int left = INTVAL (left_rtx), right;
4124 int best = 0;
4125 int cost, best_cost = 10000;
4126 int best_right = 0, best_len = 0;
4127 int i;
4128 int can_ext;
4130 if (left < 0 || left > 31)
4131 return 0;
4132 if (CONST_INT_P (mask_rtx))
4133 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
4134 else
4135 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
4136 /* Can this be expressed as a right shift / left shift pair? */
4137 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
4138 right = exact_log2 (lsb);
4139 mask2 = ~(mask + lsb - 1);
4140 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4141 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4142 if (! mask2)
4143 best_cost = ashl_lshr_seq[right].insn_count
4144 + ashl_lshr_seq[right + left].insn_count;
4145 /* mask has no trailing zeroes <==> ! right */
4146 else if (! right && mask2 == ~(lsb2 - 1))
4148 int late_right = exact_log2 (lsb2);
4149 best_cost = ashl_lshr_seq[left + late_right].insn_count
4150 + ashl_lshr_seq[late_right].insn_count;
4152 /* Try to use zero extend. */
4153 if (mask2 == ~(lsb2 - 1))
4155 int width, first;
4157 for (width = 8; width <= 16; width += 8)
4159 /* Can we zero-extend right away? */
4160 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4162 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4163 + ext_ashl_lshr_seq[left + right].insn_count;
4164 if (cost < best_cost)
4166 best = 1;
4167 best_cost = cost;
4168 best_right = right;
4169 best_len = cost;
4170 if (attrp)
4171 attrp[2] = -1;
4173 continue;
4175 /* ??? Could try to put zero extend into initial right shift,
4176 or even shift a bit left before the right shift. */
4177 /* Determine value of first part of left shift, to get to the
4178 zero extend cut-off point. */
4179 first = width - exact_log2 (lsb2) + right;
4180 if (first >= 0 && right + left - first >= 0)
4182 cost = ext_ashl_lshr_seq[right].insn_count
4183 + ext_ashl_lshr_seq[first].insn_count + 1
4184 + ext_ashl_lshr_seq[right + left - first].insn_count;
4186 if (cost < best_cost)
4188 best = 1;
4189 best_cost = cost;
4190 best_right = right;
4191 best_len = cost;
4192 if (attrp)
4193 attrp[2] = first;
4198 /* Try to use r0 AND pattern */
4199 for (i = 0; i <= 2; i++)
4201 if (i > right)
4202 break;
4203 if (! CONST_OK_FOR_K08 (mask >> i))
4204 continue;
4205 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4206 if (cost < best_cost)
4208 best = 2;
4209 best_cost = cost;
4210 best_right = i;
4211 best_len = cost - 1;
4214 /* Try to use a scratch register to hold the AND operand. */
4215 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4216 for (i = 0; i <= 2; i++)
4218 if (i > right)
4219 break;
4220 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4221 + (can_ext
4222 ? ext_ashl_lshr_seq
4223 : ashl_lshr_seq)[left + i].insn_count;
4224 if (cost < best_cost)
4226 best = 4 - can_ext;
4227 best_cost = cost;
4228 best_right = i;
4229 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4233 if (attrp)
4235 attrp[0] = best_right;
4236 attrp[1] = best_len;
4238 return best;
4241 /* This is used in length attributes of the unnamed instructions
4242 corresponding to shl_and_kind return values of 1 and 2. */
4244 shl_and_length (rtx insn)
4246 rtx set_src, left_rtx, mask_rtx;
4247 int attributes[3];
4249 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4250 left_rtx = XEXP (XEXP (set_src, 0), 1);
4251 mask_rtx = XEXP (set_src, 1);
4252 shl_and_kind (left_rtx, mask_rtx, attributes);
4253 return attributes[1];
4256 /* This is used in length attribute of the and_shl_scratch instruction. */
4258 shl_and_scr_length (rtx insn)
4260 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4261 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4262 rtx op = XEXP (set_src, 0);
4263 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4264 op = XEXP (XEXP (op, 0), 0);
4265 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4268 /* Generate rtl for instructions for which shl_and_kind advised a particular
4269 method of generating them, i.e. returned zero. */
4270 bool
4271 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4273 int attributes[3];
4274 unsigned HOST_WIDE_INT mask;
4275 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4276 int right, total_shift;
4277 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4279 right = attributes[0];
4280 total_shift = INTVAL (left_rtx) + right;
4281 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4282 switch (kind)
4284 default:
4285 return true;
4286 case 1:
4288 int first = attributes[2];
4289 rtx operands[3];
4291 if (first < 0)
4293 emit_insn ((mask << right) <= 0xff
4294 ? gen_zero_extendqisi2 (dest,
4295 gen_lowpart (QImode, source))
4296 : gen_zero_extendhisi2 (dest,
4297 gen_lowpart (HImode, source)));
4298 source = dest;
4300 if (source != dest)
4301 emit_insn (gen_movsi (dest, source));
4302 operands[0] = dest;
4303 if (right)
4305 operands[2] = GEN_INT (right);
4306 gen_shifty_hi_op (LSHIFTRT, operands);
4308 if (first > 0)
4310 operands[2] = GEN_INT (first);
4311 gen_shifty_hi_op (ASHIFT, operands);
4312 total_shift -= first;
4313 mask <<= first;
4315 if (first >= 0)
4316 emit_insn (mask <= 0xff
4317 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4318 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4319 if (total_shift > 0)
4321 operands[2] = GEN_INT (total_shift);
4322 gen_shifty_hi_op (ASHIFT, operands);
4324 break;
4326 case 4:
4327 shift_gen_fun = gen_shifty_op;
4328 case 3:
4329 /* If the topmost bit that matters is set, set the topmost bits
4330 that don't matter. This way, we might be able to get a shorter
4331 signed constant. */
4332 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4333 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4334 case 2:
4335 /* Don't expand fine-grained when combining, because that will
4336 make the pattern fail. */
4337 if (currently_expanding_to_rtl
4338 || reload_in_progress || reload_completed)
4340 rtx operands[3];
4342 /* Cases 3 and 4 should be handled by this split
4343 only while combining */
4344 gcc_assert (kind <= 2);
4345 if (right)
4347 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4348 source = dest;
4350 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4351 if (total_shift)
4353 operands[0] = dest;
4354 operands[1] = dest;
4355 operands[2] = GEN_INT (total_shift);
4356 shift_gen_fun (ASHIFT, operands);
4358 break;
4360 else
4362 int neg = 0;
4363 if (kind != 4 && total_shift < 16)
4365 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4366 if (neg > 0)
4367 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4368 else
4369 neg = 0;
4371 emit_insn (gen_and_shl_scratch (dest, source,
4372 GEN_INT (right),
4373 GEN_INT (mask),
4374 GEN_INT (total_shift + neg),
4375 GEN_INT (neg)));
4376 emit_insn (gen_movsi (dest, dest));
4377 break;
4380 return false;
4383 /* Try to find a good way to implement the combiner pattern
4384 [(set (match_operand:SI 0 "register_operand" "=r")
4385 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4386 (match_operand:SI 2 "const_int_operand" "n")
4387 (match_operand:SI 3 "const_int_operand" "n")
4388 (const_int 0)))
4389 (clobber (reg:SI T_REG))]
4390 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4391 return 0 for simple left / right shift combination.
4392 return 1 for left shift / 8 bit sign extend / left shift.
4393 return 2 for left shift / 16 bit sign extend / left shift.
4394 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4395 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4396 return 5 for left shift / 16 bit sign extend / right shift
4397 return 6 for < 8 bit sign extend / left shift.
4398 return 7 for < 8 bit sign extend / left shift / single right shift.
4399 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4401 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4403 int left, size, insize, ext;
4404 int cost = 0, best_cost;
4405 int kind;
4407 left = INTVAL (left_rtx);
4408 size = INTVAL (size_rtx);
4409 insize = size - left;
4410 gcc_assert (insize > 0);
4411 /* Default to left / right shift. */
4412 kind = 0;
4413 best_cost = ashl_lshr_seq[32 - insize].insn_count
4414 + ashl_lshr_seq[32 - size].insn_count;
4415 if (size <= 16)
4417 /* 16 bit shift / sign extend / 16 bit shift */
4418 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4419 + ashl_lshr_seq[16 - size].insn_count;
4420 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4421 below, by alternative 3 or something even better. */
4422 if (cost < best_cost)
4424 kind = 5;
4425 best_cost = cost;
4428 /* Try a plain sign extend between two shifts. */
4429 for (ext = 16; ext >= insize; ext -= 8)
4431 if (ext <= size)
4433 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4434 + ashl_lshr_seq[size - ext].insn_count;
4435 if (cost < best_cost)
4437 kind = ext / (unsigned) 8;
4438 best_cost = cost;
4441 /* Check if we can do a sloppy shift with a final signed shift
4442 restoring the sign. */
4443 if (EXT_SHIFT_SIGNED (size - ext))
4444 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4445 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4446 /* If not, maybe it's still cheaper to do the second shift sloppy,
4447 and do a final sign extend? */
4448 else if (size <= 16)
4449 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4450 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4451 + 1;
4452 else
4453 continue;
4454 if (cost < best_cost)
4456 kind = ext / (unsigned) 8 + 2;
4457 best_cost = cost;
4460 /* Check if we can sign extend in r0 */
4461 if (insize < 8)
4463 cost = 3 + ashl_lshr_seq[left].insn_count;
4464 if (cost < best_cost)
4466 kind = 6;
4467 best_cost = cost;
4469 /* Try the same with a final signed shift. */
4470 if (left < 31)
4472 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4473 if (cost < best_cost)
4475 kind = 7;
4476 best_cost = cost;
4480 if (TARGET_DYNSHIFT)
4482 /* Try to use a dynamic shift. */
4483 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4484 if (cost < best_cost)
4486 kind = 0;
4487 best_cost = cost;
4490 if (costp)
4491 *costp = cost;
4492 return kind;
4495 /* Function to be used in the length attribute of the instructions
4496 implementing this pattern. */
4498 shl_sext_length (rtx insn)
4500 rtx set_src, left_rtx, size_rtx;
4501 int cost;
4503 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4504 left_rtx = XEXP (XEXP (set_src, 0), 1);
4505 size_rtx = XEXP (set_src, 1);
4506 shl_sext_kind (left_rtx, size_rtx, &cost);
4507 return cost;
4510 /* Generate rtl for this pattern */
4511 bool
4512 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4514 int kind;
4515 int left, size, insize, cost;
4516 rtx operands[3];
4518 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4519 left = INTVAL (left_rtx);
4520 size = INTVAL (size_rtx);
4521 insize = size - left;
4522 switch (kind)
4524 case 1:
4525 case 2:
4526 case 3:
4527 case 4:
4529 int ext = kind & 1 ? 8 : 16;
4530 int shift2 = size - ext;
4532 /* Don't expand fine-grained when combining, because that will
4533 make the pattern fail. */
4534 if (! currently_expanding_to_rtl
4535 && ! reload_in_progress && ! reload_completed)
4537 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4538 emit_insn (gen_movsi (dest, source));
4539 break;
4541 if (dest != source)
4542 emit_insn (gen_movsi (dest, source));
4543 operands[0] = dest;
4544 if (ext - insize)
4546 operands[2] = GEN_INT (ext - insize);
4547 gen_shifty_hi_op (ASHIFT, operands);
4549 emit_insn (kind & 1
4550 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4551 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4552 if (kind <= 2)
4554 if (shift2)
4556 operands[2] = GEN_INT (shift2);
4557 gen_shifty_op (ASHIFT, operands);
4560 else
4562 if (shift2 > 0)
4564 if (EXT_SHIFT_SIGNED (shift2))
4566 operands[2] = GEN_INT (shift2 + 1);
4567 gen_shifty_op (ASHIFT, operands);
4568 operands[2] = const1_rtx;
4569 gen_shifty_op (ASHIFTRT, operands);
4570 break;
4572 operands[2] = GEN_INT (shift2);
4573 gen_shifty_hi_op (ASHIFT, operands);
4575 else if (shift2)
4577 operands[2] = GEN_INT (-shift2);
4578 gen_shifty_hi_op (LSHIFTRT, operands);
4580 emit_insn (size <= 8
4581 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4582 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4584 break;
4586 case 5:
4588 int i = 16 - size;
4589 if (! currently_expanding_to_rtl
4590 && ! reload_in_progress && ! reload_completed)
4591 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4592 else
4594 operands[0] = dest;
4595 operands[2] = GEN_INT (16 - insize);
4596 gen_shifty_hi_op (ASHIFT, operands);
4597 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4599 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4600 while (--i >= 0)
4601 gen_ashift (ASHIFTRT, 1, dest);
4602 break;
4604 case 6:
4605 case 7:
4606 /* Don't expand fine-grained when combining, because that will
4607 make the pattern fail. */
4608 if (! currently_expanding_to_rtl
4609 && ! reload_in_progress && ! reload_completed)
4611 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4612 emit_insn (gen_movsi (dest, source));
4613 break;
4615 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4616 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4617 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4618 operands[0] = dest;
4619 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4620 gen_shifty_op (ASHIFT, operands);
4621 if (kind == 7)
4622 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4623 break;
4624 default:
4625 return true;
4627 return false;
4630 /* Prefix a symbol_ref name with "datalabel". */
4632 gen_datalabel_ref (rtx sym)
4634 const char *str;
4636 if (GET_CODE (sym) == LABEL_REF)
4637 return gen_rtx_CONST (GET_MODE (sym),
4638 gen_rtx_UNSPEC (GET_MODE (sym),
4639 gen_rtvec (1, sym),
4640 UNSPEC_DATALABEL));
4642 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4644 str = XSTR (sym, 0);
4645 /* Share all SYMBOL_REF strings with the same value - that is important
4646 for cse. */
4647 str = IDENTIFIER_POINTER (get_identifier (str));
4648 XSTR (sym, 0) = str;
4650 return sym;
4654 typedef struct label_ref_list_d
4656 rtx_code_label *label;
4657 struct label_ref_list_d *next;
4659 /* Pool allocation new operator. */
4660 inline void *operator new (size_t)
4662 return pool.allocate ();
4665 /* Delete operator utilizing pool allocation. */
4666 inline void operator delete (void *ptr)
4668 pool.remove ((label_ref_list_d *) ptr);
4671 /* Memory allocation pool. */
4672 static pool_allocator<label_ref_list_d> pool;
4674 } *label_ref_list_t;
4676 pool_allocator<label_ref_list_d> label_ref_list_d::pool
4677 ("label references list", 30);
4679 /* The SH cannot load a large constant into a register, constants have to
4680 come from a pc relative load. The reference of a pc relative load
4681 instruction must be less than 1k in front of the instruction. This
4682 means that we often have to dump a constant inside a function, and
4683 generate code to branch around it.
4685 It is important to minimize this, since the branches will slow things
4686 down and make things bigger.
4688 Worst case code looks like:
4690 mov.l L1,rn
4691 bra L2
4693 align
4694 L1: .long value
4698 mov.l L3,rn
4699 bra L4
4701 align
4702 L3: .long value
4706 We fix this by performing a scan before scheduling, which notices which
4707 instructions need to have their operands fetched from the constant table
4708 and builds the table.
4710 The algorithm is:
4712 scan, find an instruction which needs a pcrel move. Look forward, find the
4713 last barrier which is within MAX_COUNT bytes of the requirement.
4714 If there isn't one, make one. Process all the instructions between
4715 the find and the barrier.
4717 In the above example, we can tell that L3 is within 1k of L1, so
4718 the first move can be shrunk from the 3 insn+constant sequence into
4719 just 1 insn, and the constant moved to L3 to make:
4721 mov.l L1,rn
4723 mov.l L3,rn
4724 bra L4
4726 align
4727 L3:.long value
4728 L4:.long value
4730 Then the second move becomes the target for the shortening process. */
4732 typedef struct
4734 rtx value; /* Value in table. */
4735 rtx_code_label *label; /* Label of value. */
4736 label_ref_list_t wend; /* End of window. */
4737 machine_mode mode; /* Mode of value. */
4739 /* True if this constant is accessed as part of a post-increment
4740 sequence. Note that HImode constants are never accessed in this way. */
4741 bool part_of_sequence_p;
4742 } pool_node;
4744 /* The maximum number of constants that can fit into one pool, since
4745 constants in the range 0..510 are at least 2 bytes long, and in the
4746 range from there to 1018 at least 4 bytes. */
4748 #define MAX_POOL_SIZE 372
4749 static pool_node pool_vector[MAX_POOL_SIZE];
4750 static int pool_size;
4751 static rtx_code_label *pool_window_label;
4752 static int pool_window_last;
4754 static int max_labelno_before_reorg;
4756 /* ??? If we need a constant in HImode which is the truncated value of a
4757 constant we need in SImode, we could combine the two entries thus saving
4758 two bytes. Is this common enough to be worth the effort of implementing
4759 it? */
4761 /* ??? This stuff should be done at the same time that we shorten branches.
4762 As it is now, we must assume that all branches are the maximum size, and
4763 this causes us to almost always output constant pools sooner than
4764 necessary. */
4766 /* Add a constant to the pool and return its label. */
4767 static rtx_code_label *
4768 add_constant (rtx x, machine_mode mode, rtx last_value)
4770 int i;
4771 rtx_code_label *lab, *new_rtx;
4772 label_ref_list_t ref, newref;
4774 /* First see if we've already got it. */
4775 for (i = 0; i < pool_size; i++)
4777 if (x->code == pool_vector[i].value->code
4778 && mode == pool_vector[i].mode)
4780 if (x->code == CODE_LABEL)
4782 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4783 continue;
4785 if (rtx_equal_p (x, pool_vector[i].value))
4787 lab = new_rtx = 0;
4788 if (! last_value
4789 || ! i
4790 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4792 new_rtx = gen_label_rtx ();
4793 LABEL_REFS (new_rtx) = pool_vector[i].label;
4794 pool_vector[i].label = lab = new_rtx;
4796 if (lab && pool_window_label)
4798 newref = new label_ref_list_d;
4799 newref->label = pool_window_label;
4800 ref = pool_vector[pool_window_last].wend;
4801 newref->next = ref;
4802 pool_vector[pool_window_last].wend = newref;
4804 if (new_rtx)
4805 pool_window_label = new_rtx;
4806 pool_window_last = i;
4807 return lab;
4812 /* Need a new one. */
4813 pool_vector[pool_size].value = x;
4814 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4816 lab = 0;
4817 pool_vector[pool_size - 1].part_of_sequence_p = true;
4819 else
4820 lab = gen_label_rtx ();
4821 pool_vector[pool_size].mode = mode;
4822 pool_vector[pool_size].label = lab;
4823 pool_vector[pool_size].wend = NULL;
4824 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4825 if (lab && pool_window_label)
4827 newref = new label_ref_list_d;
4828 newref->label = pool_window_label;
4829 ref = pool_vector[pool_window_last].wend;
4830 newref->next = ref;
4831 pool_vector[pool_window_last].wend = newref;
4833 if (lab)
4834 pool_window_label = lab;
4835 pool_window_last = pool_size;
4836 pool_size++;
4837 return lab;
4840 /* Output the literal table. START, if nonzero, is the first instruction
4841 this table is needed for, and also indicates that there is at least one
4842 casesi_worker_2 instruction; We have to emit the operand3 labels from
4843 these insns at a 4-byte aligned position. BARRIER is the barrier
4844 after which we are to place the table. */
4845 static void
4846 dump_table (rtx_insn *start, rtx_insn *barrier)
4848 rtx_insn *scan = barrier;
4849 int i;
4850 bool need_align = true;
4851 rtx lab;
4852 label_ref_list_t ref;
4853 bool have_df = false;
4855 /* Do two passes, first time dump out the HI sized constants. */
4857 for (i = 0; i < pool_size; i++)
4859 pool_node *p = &pool_vector[i];
4861 if (p->mode == HImode)
4863 if (need_align)
4865 scan = emit_insn_after (gen_align_2 (), scan);
4866 need_align = false;
4868 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4869 scan = emit_label_after (lab, scan);
4870 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4871 scan);
4872 for (ref = p->wend; ref; ref = ref->next)
4874 lab = ref->label;
4875 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4878 else if (p->mode == DFmode)
4879 have_df = true;
4882 need_align = true;
4884 if (start)
4886 scan = emit_insn_after (gen_align_4 (), scan);
4887 need_align = false;
4888 for (; start != barrier; start = NEXT_INSN (start))
4889 if (NONJUMP_INSN_P (start)
4890 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4892 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4893 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4895 scan = emit_label_after (lab, scan);
4898 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4900 rtx_insn *align_insn = NULL;
4902 scan = emit_label_after (gen_label_rtx (), scan);
4903 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4904 need_align = false;
4906 for (i = 0; i < pool_size; i++)
4908 pool_node *p = &pool_vector[i];
4910 switch (p->mode)
4912 case HImode:
4913 break;
4914 case SImode:
4915 case SFmode:
4916 if (align_insn && !p->part_of_sequence_p)
4918 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4919 emit_label_before (lab, align_insn);
4920 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4921 align_insn);
4922 for (ref = p->wend; ref; ref = ref->next)
4924 lab = ref->label;
4925 emit_insn_before (gen_consttable_window_end (lab),
4926 align_insn);
4928 delete_insn (align_insn);
4929 align_insn = NULL;
4930 continue;
4932 else
4934 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4935 scan = emit_label_after (lab, scan);
4936 scan = emit_insn_after (gen_consttable_4 (p->value,
4937 const0_rtx), scan);
4938 need_align = ! need_align;
4940 break;
4941 case DFmode:
4942 if (need_align)
4944 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4945 align_insn = scan;
4946 need_align = false;
4948 case DImode:
4949 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4950 scan = emit_label_after (lab, scan);
4951 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4952 scan);
4953 break;
4954 default:
4955 gcc_unreachable ();
4958 if (p->mode != HImode)
4960 for (ref = p->wend; ref; ref = ref->next)
4962 lab = ref->label;
4963 scan = emit_insn_after (gen_consttable_window_end (lab),
4964 scan);
4969 pool_size = 0;
4972 for (i = 0; i < pool_size; i++)
4974 pool_node *p = &pool_vector[i];
4976 switch (p->mode)
4978 case HImode:
4979 break;
4980 case SImode:
4981 case SFmode:
4982 if (need_align)
4984 need_align = false;
4985 scan = emit_label_after (gen_label_rtx (), scan);
4986 scan = emit_insn_after (gen_align_4 (), scan);
4988 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4989 scan = emit_label_after (lab, scan);
4990 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4991 scan);
4992 break;
4993 case DFmode:
4994 case DImode:
4995 if (need_align)
4997 need_align = false;
4998 scan = emit_label_after (gen_label_rtx (), scan);
4999 scan = emit_insn_after (gen_align_4 (), scan);
5001 for (lab = p->label; lab; lab = LABEL_REFS (lab))
5002 scan = emit_label_after (lab, scan);
5003 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
5004 scan);
5005 break;
5006 default:
5007 gcc_unreachable ();
5010 if (p->mode != HImode)
5012 for (ref = p->wend; ref; ref = ref->next)
5014 lab = ref->label;
5015 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
5020 scan = emit_insn_after (gen_consttable_end (), scan);
5021 scan = emit_barrier_after (scan);
5022 pool_size = 0;
5023 pool_window_label = NULL;
5024 pool_window_last = 0;
5027 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
5029 /* Nonzero if the insn is a move instruction which needs to be fixed. */
5031 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
5032 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
5033 need to fix it if the input value is CONST_OK_FOR_I08. */
5034 static bool
5035 broken_move (rtx_insn *insn)
5037 if (NONJUMP_INSN_P (insn))
5039 rtx pat = PATTERN (insn);
5040 if (GET_CODE (pat) == PARALLEL)
5041 pat = XVECEXP (pat, 0, 0);
5042 if (GET_CODE (pat) == SET
5043 /* We can load any 8-bit value if we don't care what the high
5044 order bits end up as. */
5045 && GET_MODE (SET_DEST (pat)) != QImode
5046 && (CONSTANT_P (SET_SRC (pat))
5047 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
5048 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
5049 /* Match mova_const. */
5050 || (GET_CODE (SET_SRC (pat)) == UNSPEC
5051 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
5052 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
5053 && ! (TARGET_SH2E
5054 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
5055 && (fp_zero_operand (SET_SRC (pat))
5056 || fp_one_operand (SET_SRC (pat)))
5057 /* In general we don't know the current setting of fpscr, so
5058 disable fldi.
5059 There is an exception if this was a register-register move
5060 before reload - and hence it was ascertained that we have
5061 single precision setting - and in a post-reload optimization
5062 we changed this to do a constant load. In that case
5063 we don't have an r0 clobber, hence we must use fldi. */
5064 && (TARGET_FMOVD
5065 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
5066 == SCRATCH))
5067 && REG_P (SET_DEST (pat))
5068 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
5069 && ! (TARGET_SH2A
5070 && GET_MODE (SET_DEST (pat)) == SImode
5071 && (satisfies_constraint_I20 (SET_SRC (pat))
5072 || satisfies_constraint_I28 (SET_SRC (pat))))
5073 && ! satisfies_constraint_I08 (SET_SRC (pat)))
5074 return true;
5077 return false;
5080 /* Return true if the specified insn is a mova insn. */
5081 static bool
5082 mova_p (rtx_insn *insn)
5084 return (NONJUMP_INSN_P (insn)
5085 && GET_CODE (PATTERN (insn)) == SET
5086 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
5087 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
5088 /* Don't match mova_const. */
5089 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
5092 /* Fix up a mova from a switch that went out of range. */
5093 static void
5094 fixup_mova (rtx_insn *mova)
5096 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
5097 if (! flag_pic)
5099 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
5100 INSN_CODE (mova) = -1;
5102 else
5104 rtx_insn *worker = mova;
5105 rtx_code_label *lab = gen_label_rtx ();
5106 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
5110 worker = NEXT_INSN (worker);
5111 gcc_assert (worker
5112 && !LABEL_P (worker)
5113 && !JUMP_P (worker));
5114 } while (NOTE_P (worker)
5115 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
5116 wpat = PATTERN (worker);
5117 wpat0 = XVECEXP (wpat, 0, 0);
5118 wpat1 = XVECEXP (wpat, 0, 1);
5119 wsrc = SET_SRC (wpat0);
5120 PATTERN (worker) = (gen_casesi_worker_2
5121 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
5122 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
5123 XEXP (wpat1, 0)));
5124 INSN_CODE (worker) = -1;
5125 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
5126 base = gen_rtx_LABEL_REF (Pmode, lab);
5127 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
5128 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
5129 INSN_CODE (mova) = -1;
5133 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
5134 *num_mova, and check if the new mova is not nested within the first one.
5135 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
5136 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
5137 static int
5138 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
5140 int n_addr = 0; /* Initialization to shut up spurious warning. */
5141 int f_target, n_target = 0; /* Likewise. */
5143 if (optimize)
5145 /* If NEW_MOVA has no address yet, it will be handled later. */
5146 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
5147 return -1;
5149 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
5150 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
5151 if (n_addr > n_target || n_addr + 1022 < n_target)
5153 /* Change the mova into a load.
5154 broken_move will then return true for it. */
5155 fixup_mova (new_mova);
5156 return 1;
5159 if (!(*num_mova)++)
5161 *first_mova = new_mova;
5162 return 2;
5164 if (!optimize
5165 || ((f_target
5166 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5167 >= n_target))
5168 return -1;
5170 (*num_mova)--;
5171 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5172 > n_target - n_addr)
5174 fixup_mova (*first_mova);
5175 return 0;
5177 else
5179 fixup_mova (new_mova);
5180 return 1;
5184 /* Find the last barrier from insn FROM which is close enough to hold the
5185 constant pool. If we can't find one, then create one near the end of
5186 the range. */
5187 static rtx_insn *
5188 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5190 int count_si = 0;
5191 int count_hi = 0;
5192 int found_hi = 0;
5193 int found_si = 0;
5194 int found_di = 0;
5195 int hi_align = 2;
5196 int si_align = 2;
5197 int leading_mova = num_mova;
5198 rtx_insn *barrier_before_mova = NULL;
5199 rtx_insn *found_barrier = NULL;
5200 rtx_insn *good_barrier = NULL;
5201 int si_limit;
5202 int hi_limit;
5203 rtx_insn *orig = from;
5204 rtx_insn *last_got = NULL;
5205 rtx_insn *last_symoff = NULL;
5207 /* For HImode: range is 510, add 4 because pc counts from address of
5208 second instruction after this one, subtract 2 for the jump instruction
5209 that we may need to emit before the table, subtract 2 for the instruction
5210 that fills the jump delay slot (in very rare cases, reorg will take an
5211 instruction from after the constant pool or will leave the delay slot
5212 empty). This gives 510.
5213 For SImode: range is 1020, add 4 because pc counts from address of
5214 second instruction after this one, subtract 2 in case pc is 2 byte
5215 aligned, subtract 2 for the jump instruction that we may need to emit
5216 before the table, subtract 2 for the instruction that fills the jump
5217 delay slot. This gives 1018. */
5219 /* The branch will always be shortened now that the reference address for
5220 forward branches is the successor address, thus we need no longer make
5221 adjustments to the [sh]i_limit for -O0. */
5223 si_limit = 1018;
5224 hi_limit = 510;
5226 while (from && count_si < si_limit && count_hi < hi_limit)
5228 int inc = get_attr_length (from);
5229 int new_align = 1;
5231 /* If this is a label that existed at the time of the compute_alignments
5232 call, determine the alignment. N.B. When find_barrier recurses for
5233 an out-of-reach mova, we might see labels at the start of previously
5234 inserted constant tables. */
5235 if (LABEL_P (from)
5236 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5238 if (optimize)
5239 new_align = 1 << label_to_alignment (from);
5240 else if (BARRIER_P (prev_nonnote_insn (from)))
5241 new_align = 1 << barrier_align (from);
5242 else
5243 new_align = 1;
5244 inc = 0;
5246 /* In case we are scanning a constant table because of recursion, check
5247 for explicit alignments. If the table is long, we might be forced
5248 to emit the new table in front of it; the length of the alignment
5249 might be the last straw. */
5250 else if (NONJUMP_INSN_P (from)
5251 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5252 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5253 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5254 /* When we find the end of a constant table, paste the new constant
5255 at the end. That is better than putting it in front because
5256 this way, we don't need extra alignment for adding a 4-byte-aligned
5257 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5258 else if (NONJUMP_INSN_P (from)
5259 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5260 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5261 return from;
5263 if (BARRIER_P (from))
5265 rtx_insn *next;
5267 found_barrier = from;
5269 /* If we are at the end of the function, or in front of an alignment
5270 instruction, we need not insert an extra alignment. We prefer
5271 this kind of barrier. */
5272 if (barrier_align (from) > 2)
5273 good_barrier = from;
5275 /* If we are at the end of a hot/cold block, dump the constants
5276 here. */
5277 next = NEXT_INSN (from);
5278 if (next
5279 && NOTE_P (next)
5280 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5281 break;
5284 if (broken_move (from))
5286 rtx pat, src, dst;
5287 machine_mode mode;
5289 pat = PATTERN (from);
5290 if (GET_CODE (pat) == PARALLEL)
5291 pat = XVECEXP (pat, 0, 0);
5292 src = SET_SRC (pat);
5293 dst = SET_DEST (pat);
5294 mode = GET_MODE (dst);
5296 /* GOT pcrelat setting comes in pair of
5297 mova .L8,r0
5298 mov.l .L8,r12
5299 instructions. (plus add r0,r12).
5300 Remember if we see one without the other. */
5301 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5302 last_got = last_got ? NULL : from;
5303 else if (PIC_ADDR_P (src))
5304 last_got = last_got ? NULL : from;
5306 /* We must explicitly check the mode, because sometimes the
5307 front end will generate code to load unsigned constants into
5308 HImode targets without properly sign extending them. */
5309 if (mode == HImode
5310 || (mode == SImode && satisfies_constraint_I16 (src)
5311 && REGNO (dst) != FPUL_REG))
5313 found_hi += 2;
5314 /* We put the short constants before the long constants, so
5315 we must count the length of short constants in the range
5316 for the long constants. */
5317 /* ??? This isn't optimal, but is easy to do. */
5318 si_limit -= 2;
5320 else
5322 /* We dump DF/DI constants before SF/SI ones, because
5323 the limit is the same, but the alignment requirements
5324 are higher. We may waste up to 4 additional bytes
5325 for alignment, and the DF/DI constant may have
5326 another SF/SI constant placed before it. */
5327 if (TARGET_SHCOMPACT
5328 && ! found_di
5329 && (mode == DFmode || mode == DImode))
5331 found_di = 1;
5332 si_limit -= 8;
5334 while (si_align > 2 && found_si + si_align - 2 > count_si)
5335 si_align >>= 1;
5336 if (found_si > count_si)
5337 count_si = found_si;
5338 found_si += GET_MODE_SIZE (mode);
5339 if (num_mova)
5340 si_limit -= GET_MODE_SIZE (mode);
5344 if (mova_p (from))
5346 switch (untangle_mova (&num_mova, &mova, from))
5348 case 1:
5349 if (flag_pic)
5351 rtx src = SET_SRC (PATTERN (from));
5352 if (GET_CODE (src) == CONST
5353 && GET_CODE (XEXP (src, 0)) == UNSPEC
5354 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5355 last_symoff = from;
5357 break;
5358 case 0: return find_barrier (0, 0, mova);
5359 case 2:
5361 leading_mova = 0;
5362 barrier_before_mova
5363 = good_barrier ? good_barrier : found_barrier;
5365 default: break;
5367 if (found_si > count_si)
5368 count_si = found_si;
5370 else if (JUMP_TABLE_DATA_P (from)
5371 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5373 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5374 || (num_mova
5375 && (prev_nonnote_insn (from)
5376 == XEXP (MOVA_LABELREF (mova), 0))))
5377 num_mova--;
5378 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5380 /* We have just passed the barrier in front of the
5381 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5382 the ADDR_DIFF_VEC is accessed as data, just like our pool
5383 constants, this is a good opportunity to accommodate what
5384 we have gathered so far.
5385 If we waited any longer, we could end up at a barrier in
5386 front of code, which gives worse cache usage for separated
5387 instruction / data caches. */
5388 good_barrier = found_barrier;
5389 break;
5391 else
5393 rtx body = PATTERN (from);
5394 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5397 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5398 else if (JUMP_P (from)
5399 && ! TARGET_SH2
5400 && ! optimize_size)
5401 new_align = 4;
5403 /* There is a possibility that a bf is transformed into a bf/s by the
5404 delay slot scheduler. */
5405 if (JUMP_P (from)
5406 && get_attr_type (from) == TYPE_CBRANCH
5407 && ! sequence_insn_p (from))
5408 inc += 2;
5410 if (found_si)
5412 count_si += inc;
5413 if (new_align > si_align)
5415 si_limit -= (count_si - 1) & (new_align - si_align);
5416 si_align = new_align;
5418 count_si = (count_si + new_align - 1) & -new_align;
5420 if (found_hi)
5422 count_hi += inc;
5423 if (new_align > hi_align)
5425 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5426 hi_align = new_align;
5428 count_hi = (count_hi + new_align - 1) & -new_align;
5430 from = NEXT_INSN (from);
5433 if (num_mova)
5435 if (leading_mova)
5437 /* Try as we might, the leading mova is out of range. Change
5438 it into a load (which will become a pcload) and retry. */
5439 fixup_mova (mova);
5440 return find_barrier (0, 0, mova);
5442 else
5444 /* Insert the constant pool table before the mova instruction,
5445 to prevent the mova label reference from going out of range. */
5446 from = mova;
5447 good_barrier = found_barrier = barrier_before_mova;
5451 if (found_barrier)
5453 if (good_barrier && next_real_insn (found_barrier))
5454 found_barrier = good_barrier;
5456 else
5458 /* We didn't find a barrier in time to dump our stuff,
5459 so we'll make one. */
5460 rtx_code_label *label = gen_label_rtx ();
5462 /* Don't emit a constant table in the middle of insns for
5463 casesi_worker_2. This is a bit overkill but is enough
5464 because casesi_worker_2 wouldn't appear so frequently. */
5465 if (last_symoff)
5466 from = last_symoff;
5468 /* If we exceeded the range, then we must back up over the last
5469 instruction we looked at. Otherwise, we just need to undo the
5470 NEXT_INSN at the end of the loop. */
5471 if (PREV_INSN (from) != orig
5472 && (count_hi > hi_limit || count_si > si_limit))
5473 from = PREV_INSN (PREV_INSN (from));
5474 else
5475 from = PREV_INSN (from);
5477 /* Don't emit a constant table int the middle of global pointer setting,
5478 since that that would move the addressing base GOT into another table.
5479 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5480 in the pool anyway, so just move up the whole constant pool.
5482 However, avoid doing so when the last single GOT mov is the starting
5483 insn itself. Going past above the start insn would create a negative
5484 offset, causing errors. */
5485 if (last_got && last_got != orig)
5486 from = PREV_INSN (last_got);
5488 /* Don't insert the constant pool table at the position which
5489 may be the landing pad. */
5490 if (flag_exceptions
5491 && CALL_P (from)
5492 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5493 from = PREV_INSN (from);
5495 /* Walk back to be just before any jump or label.
5496 Putting it before a label reduces the number of times the branch
5497 around the constant pool table will be hit. Putting it before
5498 a jump makes it more likely that the bra delay slot will be
5499 filled. */
5500 while (NOTE_P (from) || JUMP_P (from)
5501 || LABEL_P (from))
5502 from = PREV_INSN (from);
5504 /* Make sure we do not split between a call and its corresponding
5505 CALL_ARG_LOCATION note. */
5506 if (CALL_P (from))
5508 rtx_insn *next = NEXT_INSN (from);
5509 if (next && NOTE_P (next)
5510 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5511 from = next;
5514 from = emit_jump_insn_after (gen_jump (label), from);
5515 JUMP_LABEL (from) = label;
5516 LABEL_NUSES (label) = 1;
5517 found_barrier = emit_barrier_after (from);
5518 emit_label_after (label, found_barrier);
5521 return found_barrier;
5524 /* If the instruction INSN is implemented by a special function, and we can
5525 positively find the register that is used to call the sfunc, and this
5526 register is not used anywhere else in this instruction - except as the
5527 destination of a set, return this register; else, return 0. */
5529 sfunc_uses_reg (rtx_insn *insn)
5531 int i;
5532 rtx pattern, part, reg_part, reg;
5534 if (!NONJUMP_INSN_P (insn))
5535 return NULL_RTX;
5536 pattern = PATTERN (insn);
5537 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5538 return NULL_RTX;
5540 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5542 part = XVECEXP (pattern, 0, i);
5543 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5544 reg_part = part;
5546 if (! reg_part)
5547 return NULL_RTX;
5548 reg = XEXP (reg_part, 0);
5549 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5551 part = XVECEXP (pattern, 0, i);
5552 if (part == reg_part || GET_CODE (part) == CLOBBER)
5553 continue;
5554 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5555 && REG_P (SET_DEST (part)))
5556 ? SET_SRC (part) : part)))
5557 return NULL_RTX;
5559 return reg;
5562 /* See if the only way in which INSN uses REG is by calling it, or by
5563 setting it while calling it. Set *SET to a SET rtx if the register
5564 is set by INSN. */
5565 static bool
5566 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5568 rtx pattern, reg2;
5570 *set = NULL_RTX;
5572 reg2 = sfunc_uses_reg (insn);
5573 if (reg2 && REGNO (reg2) == REGNO (reg))
5575 pattern = single_set (insn);
5576 if (pattern
5577 && REG_P (SET_DEST (pattern))
5578 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5579 *set = pattern;
5580 return false;
5582 if (!CALL_P (insn))
5584 /* We don't use rtx_equal_p because we don't care if the mode is
5585 different. */
5586 pattern = single_set (insn);
5587 if (pattern
5588 && REG_P (SET_DEST (pattern))
5589 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5591 rtx par, part;
5592 int i;
5594 *set = pattern;
5595 par = PATTERN (insn);
5596 if (GET_CODE (par) == PARALLEL)
5597 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5599 part = XVECEXP (par, 0, i);
5600 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5601 return true;
5603 return reg_mentioned_p (reg, SET_SRC (pattern));
5606 return true;
5609 pattern = PATTERN (insn);
5611 if (GET_CODE (pattern) == PARALLEL)
5613 int i;
5615 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5616 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5617 return true;
5618 pattern = XVECEXP (pattern, 0, 0);
5621 if (GET_CODE (pattern) == SET)
5623 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5625 /* We don't use rtx_equal_p, because we don't care if the
5626 mode is different. */
5627 if (!REG_P (SET_DEST (pattern))
5628 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5629 return true;
5631 *set = pattern;
5634 pattern = SET_SRC (pattern);
5637 if (GET_CODE (pattern) != CALL
5638 || !MEM_P (XEXP (pattern, 0))
5639 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5640 return true;
5642 return false;
5645 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5646 general registers. Bits 0..15 mean that the respective registers
5647 are used as inputs in the instruction. Bits 16..31 mean that the
5648 registers 0..15, respectively, are used as outputs, or are clobbered.
5649 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5651 regs_used (rtx x, int is_dest)
5653 enum rtx_code code;
5654 const char *fmt;
5655 int i, used = 0;
5657 if (! x)
5658 return used;
5659 code = GET_CODE (x);
5660 switch (code)
5662 case REG:
5663 if (REGNO (x) < 16)
5664 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5665 << (REGNO (x) + is_dest));
5666 return 0;
5667 case SUBREG:
5669 rtx y = SUBREG_REG (x);
5671 if (!REG_P (y))
5672 break;
5673 if (REGNO (y) < 16)
5674 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5675 << (REGNO (y) +
5676 subreg_regno_offset (REGNO (y),
5677 GET_MODE (y),
5678 SUBREG_BYTE (x),
5679 GET_MODE (x)) + is_dest));
5680 return 0;
5682 case SET:
5683 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5684 case RETURN:
5685 /* If there was a return value, it must have been indicated with USE. */
5686 return 0x00ffff00;
5687 case CLOBBER:
5688 is_dest = 1;
5689 break;
5690 case MEM:
5691 is_dest = 0;
5692 break;
5693 case CALL:
5694 used |= 0x00ff00f0;
5695 break;
5696 default:
5697 break;
5700 fmt = GET_RTX_FORMAT (code);
5702 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5704 if (fmt[i] == 'E')
5706 int j;
5707 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5708 used |= regs_used (XVECEXP (x, i, j), is_dest);
5710 else if (fmt[i] == 'e')
5711 used |= regs_used (XEXP (x, i), is_dest);
5713 return used;
5716 /* Create an instruction that prevents redirection of a conditional branch
5717 to the destination of the JUMP with address ADDR.
5718 If the branch needs to be implemented as an indirect jump, try to find
5719 a scratch register for it.
5720 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5721 If any preceding insn that doesn't fit into a delay slot is good enough,
5722 pass 1. Pass 2 if a definite blocking insn is needed.
5723 -1 is used internally to avoid deep recursion.
5724 If a blocking instruction is made or recognized, return it. */
5725 static rtx_insn *
5726 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5728 int dead = 0;
5729 rtx_insn *prev = prev_nonnote_insn (jump);
5730 rtx dest;
5732 /* First, check if we already have an instruction that satisfies our need. */
5733 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5735 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5736 return prev;
5737 if (GET_CODE (PATTERN (prev)) == USE
5738 || GET_CODE (PATTERN (prev)) == CLOBBER
5739 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5740 prev = jump;
5741 else if ((need_block &= ~1) < 0)
5742 return prev;
5743 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5744 need_block = 0;
5746 if (GET_CODE (PATTERN (jump)) == RETURN)
5748 if (! need_block)
5749 return prev;
5750 /* Reorg even does nasty things with return insns that cause branches
5751 to go out of range - see find_end_label and callers. */
5752 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5754 /* We can't use JUMP_LABEL here because it might be undefined
5755 when not optimizing. */
5756 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5757 /* If the branch is out of range, try to find a scratch register for it. */
5758 if (optimize
5759 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5760 > 4092 + 4098))
5762 rtx_insn *scan;
5763 /* Don't look for the stack pointer as a scratch register,
5764 it would cause trouble if an interrupt occurred. */
5765 unsigned attempt = 0x7fff, used;
5766 int jump_left = flag_expensive_optimizations + 1;
5768 /* It is likely that the most recent eligible instruction is wanted for
5769 the delay slot. Therefore, find out which registers it uses, and
5770 try to avoid using them. */
5772 for (scan = jump; (scan = PREV_INSN (scan)); )
5774 enum rtx_code code;
5776 if (scan->deleted ())
5777 continue;
5778 code = GET_CODE (scan);
5779 if (code == CODE_LABEL || code == JUMP_INSN)
5780 break;
5781 if (code == INSN
5782 && GET_CODE (PATTERN (scan)) != USE
5783 && GET_CODE (PATTERN (scan)) != CLOBBER
5784 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5786 attempt &= ~regs_used (PATTERN (scan), 0);
5787 break;
5790 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5791 (scan = NEXT_INSN (scan)); )
5793 enum rtx_code code;
5795 if (scan->deleted ())
5796 continue;
5797 code = GET_CODE (scan);
5798 if (INSN_P (scan))
5800 used |= regs_used (PATTERN (scan), 0);
5801 if (code == CALL_INSN)
5802 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5803 dead |= (used >> 16) & ~used;
5804 if (dead & attempt)
5806 dead &= attempt;
5807 break;
5809 if (code == JUMP_INSN)
5811 if (jump_left-- && simplejump_p (scan))
5812 scan = JUMP_LABEL_AS_INSN (scan);
5813 else
5814 break;
5818 /* Mask out the stack pointer again, in case it was
5819 the only 'free' register we have found. */
5820 dead &= 0x7fff;
5822 /* If the immediate destination is still in range, check for possible
5823 threading with a jump beyond the delay slot insn.
5824 Don't check if we are called recursively; the jump has been or will be
5825 checked in a different invocation then. */
5827 else if (optimize && need_block >= 0)
5829 rtx_insn *next = next_active_insn (next_active_insn (dest));
5830 if (next && JUMP_P (next)
5831 && GET_CODE (PATTERN (next)) == SET
5832 && recog_memoized (next) == CODE_FOR_jump_compact)
5834 dest = JUMP_LABEL (next);
5835 if (dest
5836 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5837 > 4092 + 4098))
5838 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5842 if (dead)
5844 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5846 /* It would be nice if we could convert the jump into an indirect
5847 jump / far branch right now, and thus exposing all constituent
5848 instructions to further optimization. However, reorg uses
5849 simplejump_p to determine if there is an unconditional jump where
5850 it should try to schedule instructions from the target of the
5851 branch; simplejump_p fails for indirect jumps even if they have
5852 a JUMP_LABEL. */
5853 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5854 (reg, GEN_INT (unspec_bbr_uid++)),
5855 jump);
5856 /* ??? We would like this to have the scope of the jump, but that
5857 scope will change when a delay slot insn of an inner scope is added.
5858 Hence, after delay slot scheduling, we'll have to expect
5859 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5860 the jump. */
5862 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5863 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5864 return insn;
5866 else if (need_block)
5867 /* We can't use JUMP_LABEL here because it might be undefined
5868 when not optimizing. */
5869 return emit_insn_before (gen_block_branch_redirect
5870 (GEN_INT (unspec_bbr_uid++)),
5871 jump);
5872 return prev;
5875 #define CONDJUMP_MIN -252
5876 #define CONDJUMP_MAX 262
5877 struct far_branch
5879 /* A label (to be placed) in front of the jump
5880 that jumps to our ultimate destination. */
5881 rtx_insn *near_label;
5882 /* Where we are going to insert it if we cannot move the jump any farther,
5883 or the jump itself if we have picked up an existing jump. */
5884 rtx_insn *insert_place;
5885 /* The ultimate destination. */
5886 rtx_insn *far_label;
5887 struct far_branch *prev;
5888 /* If the branch has already been created, its address;
5889 else the address of its first prospective user. */
5890 int address;
5893 static void gen_far_branch (struct far_branch *);
5894 enum mdep_reorg_phase_e mdep_reorg_phase;
5895 static void
5896 gen_far_branch (struct far_branch *bp)
5898 rtx_insn *insn = bp->insert_place;
5899 rtx_jump_insn *jump;
5900 rtx_code_label *label = gen_label_rtx ();
5901 int ok;
5903 emit_label_after (label, insn);
5904 if (bp->far_label)
5906 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5907 LABEL_NUSES (bp->far_label)++;
5909 else
5910 jump = emit_jump_insn_after (gen_return (), insn);
5912 /* Emit a barrier so that reorg knows that any following instructions
5913 are not reachable via a fall-through path.
5914 But don't do this when not optimizing, since we wouldn't suppress the
5915 alignment for the barrier then, and could end up with out-of-range
5916 pc-relative loads. */
5917 if (optimize)
5918 emit_barrier_after (jump);
5919 emit_label_after (bp->near_label, insn);
5921 if (bp->far_label)
5922 JUMP_LABEL (jump) = bp->far_label;
5923 else
5925 rtx pat = PATTERN (jump);
5926 gcc_assert (ANY_RETURN_P (pat));
5927 JUMP_LABEL (jump) = pat;
5930 ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5931 gcc_assert (ok);
5933 /* If we are branching around a jump (rather than a return), prevent
5934 reorg from using an insn from the jump target as the delay slot insn -
5935 when reorg did this, it pessimized code (we rather hide the delay slot)
5936 and it could cause branches to go out of range. */
5937 if (bp->far_label)
5938 (emit_insn_after
5939 (gen_stuff_delay_slot
5940 (GEN_INT (unspec_bbr_uid++),
5941 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5942 insn));
5943 /* Prevent reorg from undoing our splits. */
5944 gen_block_redirect (jump, bp->address += 2, 2);
5947 /* Fix up ADDR_DIFF_VECs. */
5948 void
5949 fixup_addr_diff_vecs (rtx_insn *first)
5951 rtx_insn *insn;
5953 for (insn = first; insn; insn = NEXT_INSN (insn))
5955 rtx vec_lab, pat, prevpat, x, braf_label;
5956 rtx_insn *prev;
5958 if (! JUMP_TABLE_DATA_P (insn)
5959 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5960 continue;
5961 pat = PATTERN (insn);
5962 vec_lab = XEXP (XEXP (pat, 0), 0);
5964 /* Search the matching casesi_jump_2. */
5965 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5967 if (!JUMP_P (prev))
5968 continue;
5969 prevpat = PATTERN (prev);
5970 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5971 continue;
5972 x = XVECEXP (prevpat, 0, 1);
5973 if (GET_CODE (x) != USE)
5974 continue;
5975 x = XEXP (x, 0);
5976 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5977 break;
5979 /* FIXME: This is a bug in the optimizer, but it seems harmless
5980 to just avoid panicing. */
5981 if (!prev)
5982 continue;
5984 /* Emit the reference label of the braf where it belongs, right after
5985 the casesi_jump_2 (i.e. braf). */
5986 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5987 emit_label_after (braf_label, prev);
5989 /* Fix up the ADDR_DIF_VEC to be relative
5990 to the reference address of the braf. */
5991 XEXP (XEXP (pat, 0), 0) = braf_label;
5995 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5996 a barrier. Return the base 2 logarithm of the desired alignment. */
5998 barrier_align (rtx_insn *barrier_or_label)
6000 rtx next, pat;
6002 if (! barrier_or_label)
6003 return 0;
6005 if (LABEL_P (barrier_or_label)
6006 && NEXT_INSN (barrier_or_label)
6007 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
6008 return 2;
6010 if (BARRIER_P (barrier_or_label)
6011 && PREV_INSN (barrier_or_label)
6012 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
6014 pat = PATTERN (PREV_INSN (barrier_or_label));
6015 /* If this is a very small table, we want to keep the alignment after
6016 the table to the minimum for proper code alignment. */
6017 return ((optimize_size
6018 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
6019 <= (unsigned) 1 << (CACHE_LOG - 2)))
6020 ? 1 << TARGET_SHMEDIA : align_jumps_log);
6023 next = next_active_insn (barrier_or_label);
6025 if (! next)
6026 return 0;
6028 pat = PATTERN (next);
6030 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
6031 /* This is a barrier in front of a constant table. */
6032 return 0;
6034 if (optimize_size)
6035 return 0;
6037 if (! TARGET_SH2 || ! optimize)
6038 return align_jumps_log;
6040 /* When fixing up pcloads, a constant table might be inserted just before
6041 the basic block that ends with the barrier. Thus, we can't trust the
6042 instruction lengths before that. */
6043 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
6045 /* Check if there is an immediately preceding branch to the insn beyond
6046 the barrier. We must weight the cost of discarding useful information
6047 from the current cache line when executing this branch and there is
6048 an alignment, against that of fetching unneeded insn in front of the
6049 branch target when there is no alignment. */
6051 /* There are two delay_slot cases to consider. One is the simple case
6052 where the preceding branch is to the insn beyond the barrier (simple
6053 delay slot filling), and the other is where the preceding branch has
6054 a delay slot that is a duplicate of the insn after the barrier
6055 (fill_eager_delay_slots) and the branch is to the insn after the insn
6056 after the barrier. */
6058 int slot, credit;
6059 bool jump_to_next = false;
6061 /* Skip to the insn before the JUMP_INSN before the barrier under
6062 investigation. */
6063 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
6065 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
6066 credit >= 0 && prev && NONJUMP_INSN_P (prev);
6067 prev = prev_real_insn (prev))
6069 jump_to_next = false;
6070 if (GET_CODE (PATTERN (prev)) == USE
6071 || GET_CODE (PATTERN (prev)) == CLOBBER)
6072 continue;
6073 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
6075 prev = prev_seq->insn (1);
6076 if (INSN_UID (prev) == INSN_UID (next))
6078 /* Delay slot was filled with insn at jump target. */
6079 jump_to_next = true;
6080 continue;
6084 if (slot &&
6085 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
6086 slot = 0;
6087 credit -= get_attr_length (prev);
6089 if (prev && jump_to_label_p (prev))
6091 rtx_insn *x;
6092 if (jump_to_next
6093 || next_real_insn (JUMP_LABEL (prev)) == next
6094 /* If relax_delay_slots() decides NEXT was redundant
6095 with some previous instruction, it will have
6096 redirected PREV's jump to the following insn. */
6097 || JUMP_LABEL (prev) == next_nonnote_insn (next)
6098 /* There is no upper bound on redundant instructions
6099 that might have been skipped, but we must not put an
6100 alignment where none had been before. */
6101 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
6102 (INSN_P (x)
6103 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
6104 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
6105 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
6107 rtx pat = PATTERN (prev);
6108 if (GET_CODE (pat) == PARALLEL)
6109 pat = XVECEXP (pat, 0, 0);
6110 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
6111 return 0;
6116 return align_jumps_log;
6119 /* If we are inside a phony loop, almost any kind of label can turn up as the
6120 first one in the loop. Aligning a braf label causes incorrect switch
6121 destination addresses; we can detect braf labels because they are
6122 followed by a BARRIER.
6123 Applying loop alignment to small constant or switch tables is a waste
6124 of space, so we suppress this too. */
6126 sh_loop_align (rtx_insn *label)
6128 rtx_insn *next = label;
6130 if (! optimize || optimize_size)
6131 return 0;
6134 next = next_nonnote_insn (next);
6135 while (next && LABEL_P (next));
6137 if (! next
6138 || ! INSN_P (next)
6139 || recog_memoized (next) == CODE_FOR_consttable_2)
6140 return 0;
6142 return align_loops_log;
6145 /* Do a final pass over the function, just before delayed branch
6146 scheduling. */
6147 static void
6148 sh_reorg (void)
6150 rtx_insn *first, *insn, *mova = NULL;
6151 int num_mova;
6152 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
6153 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
6155 first = get_insns ();
6156 max_labelno_before_reorg = max_label_num ();
6158 /* We must split call insns before introducing `mova's. If we're
6159 optimizing, they'll have already been split. Otherwise, make
6160 sure we don't split them too late. */
6161 if (! optimize)
6162 split_all_insns_noflow ();
6164 if (TARGET_SHMEDIA)
6165 return;
6167 /* If relaxing, generate pseudo-ops to associate function calls with
6168 the symbols they call. It does no harm to not generate these
6169 pseudo-ops. However, when we can generate them, it enables the
6170 linker to potentially relax the jsr to a bsr, and eliminate the
6171 register load and, possibly, the constant pool entry. */
6173 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6174 if (TARGET_RELAX)
6176 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6177 own purposes. This works because none of the remaining passes
6178 need to look at them.
6180 ??? But it may break in the future. We should use a machine
6181 dependent REG_NOTE, or some other approach entirely. */
6182 for (insn = first; insn; insn = NEXT_INSN (insn))
6184 if (INSN_P (insn))
6186 rtx note;
6188 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6189 NULL_RTX)) != 0)
6190 remove_note (insn, note);
6194 for (insn = first; insn; insn = NEXT_INSN (insn))
6196 rtx pattern, reg, set, dies;
6197 rtx_code_label *label;
6198 rtx_insn *link, *scan;
6199 int rescan = 0, foundinsn = 0;
6201 if (CALL_P (insn))
6203 pattern = PATTERN (insn);
6205 if (GET_CODE (pattern) == PARALLEL)
6206 pattern = XVECEXP (pattern, 0, 0);
6207 if (GET_CODE (pattern) == SET)
6208 pattern = SET_SRC (pattern);
6210 if (GET_CODE (pattern) != CALL
6211 || !MEM_P (XEXP (pattern, 0)))
6212 continue;
6214 reg = XEXP (XEXP (pattern, 0), 0);
6216 else
6218 reg = sfunc_uses_reg (insn);
6219 if (! reg)
6220 continue;
6223 if (!REG_P (reg))
6224 continue;
6226 /* Try scanning backward to find where the register is set. */
6227 link = NULL;
6228 for (scan = PREV_INSN (insn);
6229 scan && !LABEL_P (scan);
6230 scan = PREV_INSN (scan))
6232 if (! INSN_P (scan))
6233 continue;
6235 if (! reg_mentioned_p (reg, scan))
6236 continue;
6238 if (noncall_uses_reg (reg, scan, &set))
6239 break;
6241 if (set)
6243 link = scan;
6244 break;
6248 if (! link)
6249 continue;
6251 /* The register is set at LINK. */
6253 /* We can only optimize the function call if the register is
6254 being set to a symbol. In theory, we could sometimes
6255 optimize calls to a constant location, but the assembler
6256 and linker do not support that at present. */
6257 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6258 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6259 continue;
6261 /* Scan forward from LINK to the place where REG dies, and
6262 make sure that the only insns which use REG are
6263 themselves function calls. */
6265 /* ??? This doesn't work for call targets that were allocated
6266 by reload, since there may not be a REG_DEAD note for the
6267 register. */
6269 dies = NULL_RTX;
6270 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6272 rtx scanset;
6274 /* Don't try to trace forward past a CODE_LABEL if we haven't
6275 seen INSN yet. Ordinarily, we will only find the setting insn
6276 if it is in the same basic block. However,
6277 cross-jumping can insert code labels in between the load and
6278 the call, and can result in situations where a single call
6279 insn may have two targets depending on where we came from. */
6281 if (LABEL_P (scan) && ! foundinsn)
6282 break;
6284 if (! INSN_P (scan))
6285 continue;
6287 /* Don't try to trace forward past a JUMP. To optimize
6288 safely, we would have to check that all the
6289 instructions at the jump destination did not use REG. */
6291 if (JUMP_P (scan))
6292 break;
6294 if (! reg_mentioned_p (reg, scan))
6295 continue;
6297 if (noncall_uses_reg (reg, scan, &scanset))
6298 break;
6300 if (scan == insn)
6301 foundinsn = 1;
6303 if (scan != insn
6304 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6306 /* There is a function call to this register other
6307 than the one we are checking. If we optimize
6308 this call, we need to rescan again below. */
6309 rescan = 1;
6312 /* ??? We shouldn't have to worry about SCANSET here.
6313 We should just be able to check for a REG_DEAD note
6314 on a function call. However, the REG_DEAD notes are
6315 apparently not dependable around libcalls; c-torture
6316 execute/920501-2 is a test case. If SCANSET is set,
6317 then this insn sets the register, so it must have
6318 died earlier. Unfortunately, this will only handle
6319 the cases in which the register is, in fact, set in a
6320 later insn. */
6322 /* ??? We shouldn't have to use FOUNDINSN here.
6323 This dates back to when we used LOG_LINKS to find
6324 the most recent insn which sets the register. */
6326 if (foundinsn
6327 && (scanset
6328 || find_reg_note (scan, REG_DEAD, reg)))
6330 dies = scan;
6331 break;
6335 if (! dies)
6337 /* Either there was a branch, or some insn used REG
6338 other than as a function call address. */
6339 continue;
6342 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6343 on the insn which sets the register, and on each call insn
6344 which uses the register. In final_prescan_insn we look for
6345 the REG_LABEL_OPERAND notes, and output the appropriate label
6346 or pseudo-op. */
6348 label = gen_label_rtx ();
6349 add_reg_note (link, REG_LABEL_OPERAND, label);
6350 add_reg_note (insn, REG_LABEL_OPERAND, label);
6351 if (rescan)
6353 scan = link;
6356 rtx reg2;
6358 scan = NEXT_INSN (scan);
6359 if (scan != insn
6360 && ((CALL_P (scan)
6361 && reg_mentioned_p (reg, scan))
6362 || ((reg2 = sfunc_uses_reg (scan))
6363 && REGNO (reg2) == REGNO (reg))))
6364 add_reg_note (scan, REG_LABEL_OPERAND, label);
6366 while (scan != dies);
6371 if (TARGET_SH2)
6372 fixup_addr_diff_vecs (first);
6374 if (optimize)
6376 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6377 shorten_branches (first);
6380 /* Scan the function looking for move instructions which have to be
6381 changed to pc-relative loads and insert the literal tables. */
6382 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6383 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6385 if (mova_p (insn))
6387 /* ??? basic block reordering can move a switch table dispatch
6388 below the switch table. Check if that has happened.
6389 We only have the addresses available when optimizing; but then,
6390 this check shouldn't be needed when not optimizing. */
6391 if (!untangle_mova (&num_mova, &mova, insn))
6393 insn = mova;
6394 num_mova = 0;
6397 else if (JUMP_TABLE_DATA_P (insn)
6398 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6399 && num_mova
6400 /* ??? loop invariant motion can also move a mova out of a
6401 loop. Since loop does this code motion anyway, maybe we
6402 should wrap UNSPEC_MOVA into a CONST, so that reload can
6403 move it back. */
6404 && ((num_mova > 1
6405 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6406 || (prev_nonnote_insn (insn)
6407 == XEXP (MOVA_LABELREF (mova), 0))))
6409 rtx_insn *scan;
6410 int total;
6412 num_mova--;
6414 /* Some code might have been inserted between the mova and
6415 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6416 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6417 total += get_attr_length (scan);
6419 /* range of mova is 1020, add 4 because pc counts from address of
6420 second instruction after this one, subtract 2 in case pc is 2
6421 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6422 cancels out with alignment effects of the mova itself. */
6423 if (total > 1022)
6425 /* Change the mova into a load, and restart scanning
6426 there. broken_move will then return true for mova. */
6427 fixup_mova (mova);
6428 insn = mova;
6431 if (broken_move (insn)
6432 || (NONJUMP_INSN_P (insn)
6433 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6435 rtx_insn *scan;
6436 /* Scan ahead looking for a barrier to stick the constant table
6437 behind. */
6438 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6439 rtx_insn *last_float_move = NULL;
6440 rtx last_float = 0, *last_float_addr = NULL;
6441 int need_aligned_label = 0;
6443 if (num_mova && ! mova_p (mova))
6445 /* find_barrier had to change the first mova into a
6446 pcload; thus, we have to start with this new pcload. */
6447 insn = mova;
6448 num_mova = 0;
6450 /* Now find all the moves between the points and modify them. */
6451 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6453 if (LABEL_P (scan))
6454 last_float = 0;
6455 if (NONJUMP_INSN_P (scan)
6456 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6457 need_aligned_label = 1;
6458 if (broken_move (scan))
6460 rtx *patp = &PATTERN (scan), pat = *patp;
6461 rtx src, dst;
6462 rtx lab;
6463 rtx newsrc;
6464 machine_mode mode;
6466 if (GET_CODE (pat) == PARALLEL)
6467 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6468 src = SET_SRC (pat);
6469 dst = SET_DEST (pat);
6470 mode = GET_MODE (dst);
6472 if (mode == SImode && satisfies_constraint_I16 (src)
6473 && REGNO (dst) != FPUL_REG)
6475 int offset = 0;
6477 mode = HImode;
6478 while (GET_CODE (dst) == SUBREG)
6480 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6481 GET_MODE (SUBREG_REG (dst)),
6482 SUBREG_BYTE (dst),
6483 GET_MODE (dst));
6484 dst = SUBREG_REG (dst);
6486 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6488 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6490 /* This must be an insn that clobbers r0. */
6491 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6492 XVECLEN (PATTERN (scan), 0)
6493 - 1);
6494 rtx clobber = *clobberp;
6496 gcc_assert (GET_CODE (clobber) == CLOBBER
6497 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6499 if (last_float
6500 && reg_set_between_p (r0_rtx, last_float_move, scan))
6501 last_float = 0;
6502 if (last_float
6503 && TARGET_SHCOMPACT
6504 && GET_MODE_SIZE (mode) != 4
6505 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6506 last_float = 0;
6507 lab = add_constant (src, mode, last_float);
6508 if (lab)
6509 emit_insn_before (gen_mova (lab), scan);
6510 else
6512 /* There will be a REG_UNUSED note for r0 on
6513 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6514 lest reorg:mark_target_live_regs will not
6515 consider r0 to be used, and we end up with delay
6516 slot insn in front of SCAN that clobbers r0. */
6517 rtx note
6518 = find_regno_note (last_float_move, REG_UNUSED, 0);
6520 /* If we are not optimizing, then there may not be
6521 a note. */
6522 if (note)
6523 PUT_REG_NOTE_KIND (note, REG_INC);
6525 *last_float_addr = r0_inc_rtx;
6527 last_float_move = scan;
6528 last_float = src;
6529 newsrc = gen_const_mem (mode,
6530 (((TARGET_SH4 && ! TARGET_FMOVD)
6531 || REGNO (dst) == FPUL_REG)
6532 ? r0_inc_rtx
6533 : r0_rtx));
6534 last_float_addr = &XEXP (newsrc, 0);
6536 /* Remove the clobber of r0. */
6537 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6538 gen_rtx_SCRATCH (Pmode));
6540 /* This is a mova needing a label. Create it. */
6541 else if (GET_CODE (src) == UNSPEC
6542 && XINT (src, 1) == UNSPEC_MOVA
6543 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6545 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6546 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6547 newsrc = gen_rtx_UNSPEC (SImode,
6548 gen_rtvec (1, newsrc),
6549 UNSPEC_MOVA);
6551 else if (GET_CODE (src) == UNSPEC_VOLATILE
6552 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6554 newsrc = XVECEXP (src, 0, 0);
6555 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6556 INSN_CODE (scan) = -1;
6557 continue;
6559 else
6561 lab = add_constant (src, mode, 0);
6562 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6563 newsrc = gen_const_mem (mode, newsrc);
6565 *patp = gen_rtx_SET (dst, newsrc);
6566 INSN_CODE (scan) = -1;
6569 dump_table (need_aligned_label ? insn : 0, barrier);
6570 insn = barrier;
6573 label_ref_list_d::pool.release ();
6574 for (insn = first; insn; insn = NEXT_INSN (insn))
6575 PUT_MODE (insn, VOIDmode);
6577 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6578 INSN_ADDRESSES_FREE ();
6579 split_branches (first);
6581 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6582 also has an effect on the register that holds the address of the sfunc.
6583 Insert an extra dummy insn in front of each sfunc that pretends to
6584 use this register. */
6585 if (flag_delayed_branch)
6587 for (insn = first; insn; insn = NEXT_INSN (insn))
6589 rtx reg = sfunc_uses_reg (insn);
6591 if (! reg)
6592 continue;
6593 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6596 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6599 /* Return the UID of the insn that follows the specified label. */
6601 get_dest_uid (rtx label, int max_uid)
6603 rtx_insn *dest = next_real_insn (label);
6604 int dest_uid;
6605 if (! dest)
6606 /* This can happen for an undefined label. */
6607 return 0;
6608 dest_uid = INSN_UID (dest);
6609 /* If this is a newly created branch redirection blocking instruction,
6610 we cannot index the branch_uid or insn_addresses arrays with its
6611 uid. But then, we won't need to, because the actual destination is
6612 the following branch. */
6613 while (dest_uid >= max_uid)
6615 dest = NEXT_INSN (dest);
6616 dest_uid = INSN_UID (dest);
6618 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6619 return 0;
6620 return dest_uid;
6623 /* Split condbranches that are out of range. Also add clobbers for
6624 scratch registers that are needed in far jumps.
6625 We do this before delay slot scheduling, so that it can take our
6626 newly created instructions into account. It also allows us to
6627 find branches with common targets more easily. */
6628 static void
6629 split_branches (rtx_insn *first)
6631 rtx_insn *insn;
6632 struct far_branch **uid_branch, *far_branch_list = 0;
6633 int max_uid = get_max_uid ();
6634 int ok;
6636 /* Find out which branches are out of range. */
6637 shorten_branches (first);
6639 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6640 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6642 for (insn = first; insn; insn = NEXT_INSN (insn))
6643 if (! INSN_P (insn))
6644 continue;
6645 else if (insn->deleted ())
6647 /* Shorten_branches would split this instruction again,
6648 so transform it into a note. */
6649 SET_INSN_DELETED (insn);
6651 else if (JUMP_P (insn))
6653 enum attr_type type = get_attr_type (insn);
6654 if (type == TYPE_CBRANCH)
6656 rtx_insn *next, *beyond;
6658 if (get_attr_length (insn) > 4)
6660 rtx src = SET_SRC (PATTERN (insn));
6661 rtx olabel = XEXP (XEXP (src, 1), 0);
6662 int addr = INSN_ADDRESSES (INSN_UID (insn));
6663 rtx_insn *label = 0;
6664 int dest_uid = get_dest_uid (olabel, max_uid);
6665 struct far_branch *bp = uid_branch[dest_uid];
6667 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6668 the label if the LABEL_NUSES count drops to zero. There is
6669 always a jump_optimize pass that sets these values, but it
6670 proceeds to delete unreferenced code, and then if not
6671 optimizing, to un-delete the deleted instructions, thus
6672 leaving labels with too low uses counts. */
6673 if (! optimize)
6675 JUMP_LABEL (insn) = olabel;
6676 LABEL_NUSES (olabel)++;
6678 if (! bp)
6680 bp = (struct far_branch *) alloca (sizeof *bp);
6681 uid_branch[dest_uid] = bp;
6682 bp->prev = far_branch_list;
6683 far_branch_list = bp;
6684 bp->far_label = as_a <rtx_insn *> (
6685 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6686 0));
6687 LABEL_NUSES (bp->far_label)++;
6689 else
6691 label = bp->near_label;
6692 if (! label && bp->address - addr >= CONDJUMP_MIN)
6694 rtx_insn *block = bp->insert_place;
6696 if (GET_CODE (PATTERN (block)) == RETURN)
6697 block = PREV_INSN (block);
6698 else
6699 block = gen_block_redirect (block,
6700 bp->address, 2);
6701 label = emit_label_after (gen_label_rtx (),
6702 PREV_INSN (block));
6703 bp->near_label = label;
6705 else if (label && ! NEXT_INSN (label))
6707 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6708 bp->insert_place = insn;
6709 else
6710 gen_far_branch (bp);
6713 if (! label
6714 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6716 bp->near_label = label = gen_label_rtx ();
6717 bp->insert_place = insn;
6718 bp->address = addr;
6720 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6721 gcc_assert (ok);
6723 else
6725 /* get_attr_length (insn) == 2 */
6726 /* Check if we have a pattern where reorg wants to redirect
6727 the branch to a label from an unconditional branch that
6728 is too far away. */
6729 /* We can't use JUMP_LABEL here because it might be undefined
6730 when not optimizing. */
6731 /* A syntax error might cause beyond to be NULL_RTX. */
6732 beyond
6733 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6734 0));
6736 if (beyond
6737 && (JUMP_P (beyond)
6738 || ((beyond = next_active_insn (beyond))
6739 && JUMP_P (beyond)))
6740 && GET_CODE (PATTERN (beyond)) == SET
6741 && recog_memoized (beyond) == CODE_FOR_jump_compact
6742 && ((INSN_ADDRESSES
6743 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6744 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6745 > 252 + 258 + 2))
6746 gen_block_redirect (beyond,
6747 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6750 next = next_active_insn (insn);
6752 if (next
6753 && (JUMP_P (next)
6754 || ((next = next_active_insn (next))
6755 && JUMP_P (next)))
6756 && GET_CODE (PATTERN (next)) == SET
6757 && recog_memoized (next) == CODE_FOR_jump_compact
6758 && ((INSN_ADDRESSES
6759 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6760 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6761 > 252 + 258 + 2))
6762 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6764 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6766 int addr = INSN_ADDRESSES (INSN_UID (insn));
6767 rtx_insn *far_label = 0;
6768 int dest_uid = 0;
6769 struct far_branch *bp;
6771 if (type == TYPE_JUMP)
6773 if (CROSSING_JUMP_P (insn))
6775 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6776 insn);
6777 continue;
6780 far_label = as_a <rtx_insn *> (
6781 XEXP (SET_SRC (PATTERN (insn)), 0));
6782 dest_uid = get_dest_uid (far_label, max_uid);
6783 if (! dest_uid)
6785 /* Parse errors can lead to labels outside
6786 the insn stream. */
6787 if (! NEXT_INSN (far_label))
6788 continue;
6790 if (! optimize)
6792 JUMP_LABEL (insn) = far_label;
6793 LABEL_NUSES (far_label)++;
6795 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6796 far_label = 0;
6799 bp = uid_branch[dest_uid];
6800 if (! bp)
6802 bp = (struct far_branch *) alloca (sizeof *bp);
6803 uid_branch[dest_uid] = bp;
6804 bp->prev = far_branch_list;
6805 far_branch_list = bp;
6806 bp->near_label = 0;
6807 bp->far_label = far_label;
6808 if (far_label)
6809 LABEL_NUSES (far_label)++;
6811 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6812 if (addr - bp->address <= CONDJUMP_MAX)
6813 emit_label_after (bp->near_label, PREV_INSN (insn));
6814 else
6816 gen_far_branch (bp);
6817 bp->near_label = 0;
6819 else
6820 bp->near_label = 0;
6821 bp->address = addr;
6822 bp->insert_place = insn;
6823 if (! far_label)
6824 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6825 else
6826 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6829 /* Generate all pending far branches,
6830 and free our references to the far labels. */
6831 while (far_branch_list)
6833 if (far_branch_list->near_label
6834 && ! NEXT_INSN (far_branch_list->near_label))
6835 gen_far_branch (far_branch_list);
6836 if (optimize
6837 && far_branch_list->far_label
6838 && ! --LABEL_NUSES (far_branch_list->far_label))
6839 delete_insn (far_branch_list->far_label);
6840 far_branch_list = far_branch_list->prev;
6843 /* Instruction length information is no longer valid due to the new
6844 instructions that have been generated. */
6845 init_insn_lengths ();
6848 /* Dump out instruction addresses, which is useful for debugging the
6849 constant pool table stuff.
6851 If relaxing, output the label and pseudo-ops used to link together
6852 calls and the instruction which set the registers.
6854 ??? The addresses printed by this routine for insns are nonsense for
6855 insns which are inside of a sequence where none of the inner insns have
6856 variable length. This is because the second pass of shorten_branches
6857 does not bother to update them. */
6858 void
6859 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6860 int noperands ATTRIBUTE_UNUSED)
6862 if (TARGET_DUMPISIZE)
6863 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6865 if (TARGET_RELAX)
6867 rtx note;
6869 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6870 if (note)
6872 rtx pattern;
6874 pattern = PATTERN (insn);
6875 if (GET_CODE (pattern) == PARALLEL)
6876 pattern = XVECEXP (pattern, 0, 0);
6877 switch (GET_CODE (pattern))
6879 case SET:
6880 if (GET_CODE (SET_SRC (pattern)) != CALL
6881 && get_attr_type (insn) != TYPE_SFUNC)
6883 targetm.asm_out.internal_label
6884 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6885 break;
6887 /* else FALLTHROUGH */
6888 case CALL:
6889 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6890 CODE_LABEL_NUMBER (XEXP (note, 0)));
6891 break;
6893 default:
6894 gcc_unreachable ();
6900 /* Dump out any constants accumulated in the final pass. These will
6901 only be labels. */
6902 const char *
6903 output_jump_label_table (void)
6905 int i;
6907 if (pool_size)
6909 fprintf (asm_out_file, "\t.align 2\n");
6910 for (i = 0; i < pool_size; i++)
6912 pool_node *p = &pool_vector[i];
6914 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6915 CODE_LABEL_NUMBER (p->label));
6916 output_asm_insn (".long %O0", &p->value);
6918 pool_size = 0;
6921 return "";
6924 /* A full frame looks like:
6926 arg-5
6927 arg-4
6928 [ if current_function_anonymous_args
6929 arg-3
6930 arg-2
6931 arg-1
6932 arg-0 ]
6933 saved-fp
6934 saved-r10
6935 saved-r11
6936 saved-r12
6937 saved-pr
6938 local-n
6940 local-1
6941 local-0 <- fp points here.
6943 Number of bytes pushed for anonymous args, used to pass information
6944 between expand_prologue and expand_epilogue.
6946 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6947 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6948 for an epilogue and a negative value means that it's for a sibcall
6949 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6950 all the registers that are about to be restored, and hence dead. */
6951 static void
6952 output_stack_adjust (int size, rtx reg, int epilogue_p,
6953 HARD_REG_SET *live_regs_mask, bool frame_p)
6955 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6956 if (size)
6958 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6960 /* This test is bogus, as output_stack_adjust is used to re-align the
6961 stack. */
6962 #if 0
6963 gcc_assert (!(size % align));
6964 #endif
6966 if (CONST_OK_FOR_ADD (size))
6967 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6968 /* Try to do it with two partial adjustments; however, we must make
6969 sure that the stack is properly aligned at all times, in case
6970 an interrupt occurs between the two partial adjustments. */
6971 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6972 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6974 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6975 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6977 else
6979 rtx const_reg;
6980 rtx insn;
6981 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6982 int i;
6984 /* If TEMP is invalid, we could temporarily save a general
6985 register to MACL. However, there is currently no need
6986 to handle this case, so just die when we see it. */
6987 if (epilogue_p < 0
6988 || current_function_interrupt
6989 || ! call_really_used_regs[temp] || fixed_regs[temp])
6990 temp = -1;
6991 if (temp < 0 && ! current_function_interrupt
6992 && (TARGET_SHMEDIA || epilogue_p >= 0))
6994 HARD_REG_SET temps;
6995 COPY_HARD_REG_SET (temps, call_used_reg_set);
6996 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6997 if (epilogue_p > 0)
6999 int nreg = 0;
7000 if (crtl->return_rtx)
7002 machine_mode mode;
7003 mode = GET_MODE (crtl->return_rtx);
7004 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
7005 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
7007 for (i = 0; i < nreg; i++)
7008 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
7009 if (crtl->calls_eh_return)
7011 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
7012 for (i = 0; i <= 3; i++)
7013 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
7016 if (TARGET_SHMEDIA && epilogue_p < 0)
7017 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
7018 CLEAR_HARD_REG_BIT (temps, i);
7019 if (epilogue_p <= 0)
7021 for (i = FIRST_PARM_REG;
7022 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
7023 CLEAR_HARD_REG_BIT (temps, i);
7024 if (cfun->static_chain_decl != NULL)
7025 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
7027 temp = scavenge_reg (&temps);
7029 if (temp < 0 && live_regs_mask)
7031 HARD_REG_SET temps;
7033 COPY_HARD_REG_SET (temps, *live_regs_mask);
7034 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
7035 temp = scavenge_reg (&temps);
7037 if (temp < 0)
7039 rtx adj_reg, tmp_reg, mem;
7041 /* If we reached here, the most likely case is the (sibcall)
7042 epilogue for non SHmedia. Put a special push/pop sequence
7043 for such case as the last resort. This looks lengthy but
7044 would not be problem because it seems to be very
7045 rare. */
7047 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
7050 /* ??? There is still the slight possibility that r4 or
7051 r5 have been reserved as fixed registers or assigned
7052 as global registers, and they change during an
7053 interrupt. There are possible ways to handle this:
7055 - If we are adjusting the frame pointer (r14), we can do
7056 with a single temp register and an ordinary push / pop
7057 on the stack.
7058 - Grab any call-used or call-saved registers (i.e. not
7059 fixed or globals) for the temps we need. We might
7060 also grab r14 if we are adjusting the stack pointer.
7061 If we can't find enough available registers, issue
7062 a diagnostic and die - the user must have reserved
7063 way too many registers.
7064 But since all this is rather unlikely to happen and
7065 would require extra testing, we just die if r4 / r5
7066 are not available. */
7067 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
7068 && !global_regs[4] && !global_regs[5]);
7070 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
7071 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
7072 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
7073 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
7074 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
7075 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7076 emit_move_insn (mem, tmp_reg);
7077 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
7078 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7079 emit_move_insn (mem, tmp_reg);
7080 emit_move_insn (reg, adj_reg);
7081 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7082 emit_move_insn (adj_reg, mem);
7083 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7084 emit_move_insn (tmp_reg, mem);
7085 /* Tell flow the insns that pop r4/r5 aren't dead. */
7086 emit_use (tmp_reg);
7087 emit_use (adj_reg);
7088 return;
7090 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
7092 /* If SIZE is negative, subtract the positive value.
7093 This sometimes allows a constant pool entry to be shared
7094 between prologue and epilogue code. */
7095 if (size < 0)
7097 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
7098 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
7100 else
7102 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
7103 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
7105 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7106 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
7107 GEN_INT (size))));
7112 /* Emit the specified insn and mark it as frame related.
7113 FIXME: Rename this to emit_frame_insn. */
7114 static rtx_insn *
7115 frame_insn (rtx x)
7117 rtx_insn *insn = emit_insn (x);
7118 RTX_FRAME_RELATED_P (insn) = 1;
7119 return insn;
7122 /* Output RTL to push register RN onto the stack. */
7123 static rtx
7124 push (int rn)
7126 rtx x;
7127 if (rn == FPUL_REG)
7128 x = gen_push_fpul ();
7129 else if (rn == FPSCR_REG)
7130 x = gen_push_fpscr ();
7131 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7132 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7134 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7135 return NULL_RTX;
7136 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
7138 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7139 x = gen_push_e (gen_rtx_REG (SFmode, rn));
7140 else
7141 x = gen_push (gen_rtx_REG (SImode, rn));
7143 x = frame_insn (x);
7144 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7145 return x;
7148 /* Output RTL to pop register RN from the stack. */
7149 static void
7150 pop (int rn)
7152 rtx x, sp_reg, reg;
7153 if (rn == FPUL_REG)
7154 x = gen_pop_fpul ();
7155 else if (rn == FPSCR_REG)
7156 x = gen_pop_fpscr ();
7157 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7158 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7160 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7161 return;
7162 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7164 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7165 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7166 else
7167 x = gen_pop (gen_rtx_REG (SImode, rn));
7169 x = emit_insn (x);
7171 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7172 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7173 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7174 : SET_DEST (PATTERN (x)));
7175 add_reg_note (x, REG_CFA_RESTORE, reg);
7176 add_reg_note (x, REG_CFA_ADJUST_CFA,
7177 gen_rtx_SET (sp_reg,
7178 plus_constant (SImode, sp_reg,
7179 GET_MODE_SIZE (GET_MODE (reg)))));
7180 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7181 RTX_FRAME_RELATED_P (x) = 1;
7184 /* Generate code to push the regs specified in the mask. */
7185 static void
7186 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7188 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7189 int skip_fpscr = 0;
7191 /* Push PR last; this gives better latencies after the prologue, and
7192 candidates for the return delay slot when there are no general
7193 registers pushed. */
7194 for (; i < FIRST_PSEUDO_REGISTER; i++)
7196 /* If this is an interrupt handler, and the SZ bit varies,
7197 and we have to push any floating point register, we need
7198 to switch to the correct precision first. */
7199 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7200 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7202 HARD_REG_SET unsaved;
7204 push (FPSCR_REG);
7205 COMPL_HARD_REG_SET (unsaved, *mask);
7206 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7207 skip_fpscr = 1;
7209 if (i != PR_REG
7210 && (i != FPSCR_REG || ! skip_fpscr)
7211 && TEST_HARD_REG_BIT (*mask, i))
7213 /* If the ISR has RESBANK attribute assigned, don't push any of
7214 the following registers - R0-R14, MACH, MACL and GBR. */
7215 if (! (sh_cfun_resbank_handler_p ()
7216 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7217 || i == MACH_REG
7218 || i == MACL_REG
7219 || i == GBR_REG)))
7220 push (i);
7224 /* Push banked registers last to improve delay slot opportunities. */
7225 if (interrupt_handler)
7227 bool use_movml = false;
7229 if (TARGET_SH2A)
7231 unsigned int count = 0;
7233 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7234 if (TEST_HARD_REG_BIT (*mask, i))
7235 count++;
7236 else
7237 break;
7239 /* Use movml when all banked registers are pushed. */
7240 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7241 use_movml = true;
7244 if (sh_cfun_resbank_handler_p ())
7245 ; /* Do nothing. */
7246 else if (use_movml)
7248 rtx x, mem, reg, set;
7249 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7251 /* We must avoid scheduling multiple store insn with another
7252 insns. */
7253 emit_insn (gen_blockage ());
7254 x = gen_movml_push_banked (sp_reg);
7255 x = frame_insn (x);
7256 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7258 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7259 reg = gen_rtx_REG (SImode, i);
7260 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7263 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
7264 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7265 emit_insn (gen_blockage ());
7267 else
7268 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7269 if (TEST_HARD_REG_BIT (*mask, i))
7270 push (i);
7273 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7274 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7275 push (PR_REG);
7278 /* Calculate how much extra space is needed to save all callee-saved
7279 target registers.
7280 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7281 static int
7282 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7284 int reg;
7285 int stack_space = 0;
7286 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7288 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7289 if ((! call_really_used_regs[reg] || interrupt_handler)
7290 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7291 /* Leave space to save this target register on the stack,
7292 in case target register allocation wants to use it. */
7293 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7294 return stack_space;
7297 /* Decide whether we should reserve space for callee-save target registers,
7298 in case target register allocation wants to use them. REGS_SAVED is
7299 the space, in bytes, that is already required for register saves.
7300 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7301 static int
7302 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7303 HARD_REG_SET *live_regs_mask)
7305 if (optimize_size)
7306 return 0;
7307 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7310 /* Decide how much space to reserve for callee-save target registers
7311 in case target register allocation wants to use them.
7312 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7313 static int
7314 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7316 if (shmedia_space_reserved_for_target_registers)
7317 return shmedia_target_regs_stack_space (live_regs_mask);
7318 else
7319 return 0;
7322 /* Work out the registers which need to be saved, both as a mask and a
7323 count of saved words. Return the count.
7325 If doing a pragma interrupt function, then push all regs used by the
7326 function, and if we call another function (we can tell by looking at PR),
7327 make sure that all the regs it clobbers are safe too. */
7328 static int
7329 calc_live_regs (HARD_REG_SET *live_regs_mask)
7331 unsigned int reg;
7332 int count;
7333 tree attrs;
7334 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7335 bool nosave_low_regs;
7336 int pr_live, has_call;
7338 attrs = DECL_ATTRIBUTES (current_function_decl);
7339 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7340 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7341 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7342 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7344 CLEAR_HARD_REG_SET (*live_regs_mask);
7345 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7346 && df_regs_ever_live_p (FPSCR_REG))
7347 target_flags &= ~MASK_FPU_SINGLE;
7348 /* If we can save a lot of saves by switching to double mode, do that. */
7349 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7350 && TARGET_FPU_SINGLE)
7351 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7352 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7353 && (! call_really_used_regs[reg]
7354 || interrupt_handler)
7355 && ++count > 2)
7357 target_flags &= ~MASK_FPU_SINGLE;
7358 break;
7360 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7361 knows how to use it. That means the pseudo originally allocated for
7362 the initial value can become the PR_MEDIA_REG hard register, as seen for
7363 execute/20010122-1.c:test9. */
7364 if (TARGET_SHMEDIA)
7365 /* ??? this function is called from initial_elimination_offset, hence we
7366 can't use the result of sh_media_register_for_return here. */
7367 pr_live = sh_pr_n_sets ();
7368 else
7370 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7371 pr_live = (pr_initial
7372 ? (!REG_P (pr_initial)
7373 || REGNO (pr_initial) != (PR_REG))
7374 : df_regs_ever_live_p (PR_REG));
7375 /* For Shcompact, if not optimizing, we end up with a memory reference
7376 using the return address pointer for __builtin_return_address even
7377 though there is no actual need to put the PR register on the stack. */
7378 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7380 /* Force PR to be live if the prologue has to call the SHmedia
7381 argument decoder or register saver. */
7382 if (TARGET_SHCOMPACT
7383 && ((crtl->args.info.call_cookie
7384 & ~ CALL_COOKIE_RET_TRAMP (1))
7385 || crtl->saves_all_registers))
7386 pr_live = 1;
7387 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7388 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7390 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7391 ? pr_live
7392 : interrupt_handler
7393 ? (/* Need to save all the regs ever live. */
7394 (df_regs_ever_live_p (reg)
7395 || (call_really_used_regs[reg]
7396 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7397 || reg == PIC_OFFSET_TABLE_REGNUM)
7398 && has_call)
7399 || (TARGET_SHMEDIA && has_call
7400 && REGISTER_NATURAL_MODE (reg) == SImode
7401 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7402 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7403 && reg != RETURN_ADDRESS_POINTER_REGNUM
7404 && reg != T_REG && reg != GBR_REG
7405 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7406 /* Push fpscr only on targets which have FPU */
7407 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7408 : (/* Only push those regs which are used and need to be saved. */
7409 (TARGET_SHCOMPACT
7410 && flag_pic
7411 && crtl->args.info.call_cookie
7412 && reg == PIC_OFFSET_TABLE_REGNUM)
7413 || (df_regs_ever_live_p (reg)
7414 && ((!call_really_used_regs[reg]
7415 && !(reg != PIC_OFFSET_TABLE_REGNUM
7416 && fixed_regs[reg] && call_used_regs[reg]))
7417 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7418 || (crtl->calls_eh_return
7419 && (reg == EH_RETURN_DATA_REGNO (0)
7420 || reg == EH_RETURN_DATA_REGNO (1)
7421 || reg == EH_RETURN_DATA_REGNO (2)
7422 || reg == EH_RETURN_DATA_REGNO (3)))
7423 || ((reg == MACL_REG || reg == MACH_REG)
7424 && df_regs_ever_live_p (reg)
7425 && sh_cfun_attr_renesas_p ())
7428 SET_HARD_REG_BIT (*live_regs_mask, reg);
7429 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7431 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7432 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7434 if (FP_REGISTER_P (reg))
7436 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7438 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7439 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7442 else if (XD_REGISTER_P (reg))
7444 /* Must switch to double mode to access these registers. */
7445 target_flags &= ~MASK_FPU_SINGLE;
7449 if (nosave_low_regs && reg == R8_REG)
7450 break;
7452 /* If we have a target register optimization pass after prologue / epilogue
7453 threading, we need to assume all target registers will be live even if
7454 they aren't now. */
7455 if (flag_branch_target_load_optimize2
7456 && TARGET_SAVE_ALL_TARGET_REGS
7457 && shmedia_space_reserved_for_target_registers)
7458 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7459 if ((! call_really_used_regs[reg] || interrupt_handler)
7460 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7462 SET_HARD_REG_BIT (*live_regs_mask, reg);
7463 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7465 /* If this is an interrupt handler, we don't have any call-clobbered
7466 registers we can conveniently use for target register save/restore.
7467 Make sure we save at least one general purpose register when we need
7468 to save target registers. */
7469 if (interrupt_handler
7470 && hard_reg_set_intersect_p (*live_regs_mask,
7471 reg_class_contents[TARGET_REGS])
7472 && ! hard_reg_set_intersect_p (*live_regs_mask,
7473 reg_class_contents[GENERAL_REGS]))
7475 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7476 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7479 return count;
7482 /* Code to generate prologue and epilogue sequences */
7484 /* PUSHED is the number of bytes that are being pushed on the
7485 stack for register saves. Return the frame size, padded
7486 appropriately so that the stack stays properly aligned. */
7487 static HOST_WIDE_INT
7488 rounded_frame_size (int pushed)
7490 HOST_WIDE_INT size = get_frame_size ();
7491 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7493 if (ACCUMULATE_OUTGOING_ARGS)
7494 size += crtl->outgoing_args_size;
7496 return ((size + pushed + align - 1) & -align) - pushed;
7499 /* Choose a call-clobbered target-branch register that remains
7500 unchanged along the whole function. We set it up as the return
7501 value in the prologue. */
7503 sh_media_register_for_return (void)
7505 int regno;
7506 int tr0_used;
7508 if (! crtl->is_leaf)
7509 return -1;
7510 if (lookup_attribute ("interrupt_handler",
7511 DECL_ATTRIBUTES (current_function_decl)))
7512 return -1;
7513 if (sh_cfun_interrupt_handler_p ())
7514 return -1;
7516 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7518 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7519 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7520 return regno;
7522 return -1;
7525 /* The maximum registers we need to save are:
7526 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7527 - 32 floating point registers (for each pair, we save none,
7528 one single precision value, or a double precision value).
7529 - 8 target registers
7530 - add 1 entry for a delimiter. */
7531 #define MAX_SAVED_REGS (62+32+8)
7533 typedef struct save_entry_s
7535 unsigned char reg;
7536 unsigned char mode;
7537 short offset;
7538 } save_entry;
7540 #define MAX_TEMPS 4
7542 /* There will be a delimiter entry with VOIDmode both at the start and the
7543 end of a filled in schedule. The end delimiter has the offset of the
7544 save with the smallest (i.e. most negative) offset. */
7545 typedef struct save_schedule_s
7547 save_entry entries[MAX_SAVED_REGS + 2];
7548 int temps[MAX_TEMPS+1];
7549 } save_schedule;
7551 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7552 use reverse order. Returns the last entry written to (not counting
7553 the delimiter). OFFSET_BASE is a number to be added to all offset
7554 entries. */
7555 static save_entry *
7556 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7557 int offset_base)
7559 int align, i;
7560 save_entry *entry = schedule->entries;
7561 int tmpx = 0;
7562 int offset;
7564 if (! current_function_interrupt)
7565 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7566 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7567 && ! FUNCTION_ARG_REGNO_P (i)
7568 && i != FIRST_RET_REG
7569 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7570 && ! (crtl->calls_eh_return
7571 && (i == EH_RETURN_STACKADJ_REGNO
7572 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7573 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7574 schedule->temps[tmpx++] = i;
7575 entry->reg = -1;
7576 entry->mode = VOIDmode;
7577 entry->offset = offset_base;
7578 entry++;
7579 /* We loop twice: first, we save 8-byte aligned registers in the
7580 higher addresses, that are known to be aligned. Then, we
7581 proceed to saving 32-bit registers that don't need 8-byte
7582 alignment.
7583 If this is an interrupt function, all registers that need saving
7584 need to be saved in full. moreover, we need to postpone saving
7585 target registers till we have saved some general purpose registers
7586 we can then use as scratch registers. */
7587 offset = offset_base;
7588 for (align = 1; align >= 0; align--)
7590 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7591 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7593 machine_mode mode = REGISTER_NATURAL_MODE (i);
7594 int reg = i;
7596 if (current_function_interrupt)
7598 if (TARGET_REGISTER_P (i))
7599 continue;
7600 if (GENERAL_REGISTER_P (i))
7601 mode = DImode;
7603 if (mode == SFmode && (i % 2) == 1
7604 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7605 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7607 mode = DFmode;
7608 i--;
7609 reg--;
7612 /* If we're doing the aligned pass and this is not aligned,
7613 or we're doing the unaligned pass and this is aligned,
7614 skip it. */
7615 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7616 != align)
7617 continue;
7619 if (current_function_interrupt
7620 && GENERAL_REGISTER_P (i)
7621 && tmpx < MAX_TEMPS)
7622 schedule->temps[tmpx++] = i;
7624 offset -= GET_MODE_SIZE (mode);
7625 entry->reg = i;
7626 entry->mode = mode;
7627 entry->offset = offset;
7628 entry++;
7630 if (align && current_function_interrupt)
7631 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7632 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7634 offset -= GET_MODE_SIZE (DImode);
7635 entry->reg = i;
7636 entry->mode = DImode;
7637 entry->offset = offset;
7638 entry++;
7641 entry->reg = -1;
7642 entry->mode = VOIDmode;
7643 entry->offset = offset;
7644 schedule->temps[tmpx] = -1;
7645 return entry - 1;
7648 /* Expand code for the function prologue. */
7649 void
7650 sh_expand_prologue (void)
7652 HARD_REG_SET live_regs_mask;
7653 int d, i;
7654 int d_rounding = 0;
7655 int save_flags = target_flags;
7656 int pretend_args;
7657 int stack_usage;
7658 tree sp_switch_attr
7659 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7661 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7663 /* We have pretend args if we had an object sent partially in registers
7664 and partially on the stack, e.g. a large structure. */
7665 pretend_args = crtl->args.pretend_args_size;
7666 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7667 && (NPARM_REGS(SImode)
7668 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7669 pretend_args = 0;
7671 output_stack_adjust (-pretend_args
7672 - crtl->args.info.stack_regs * 8,
7673 stack_pointer_rtx, 0, NULL, true);
7674 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7676 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7677 /* We're going to use the PIC register to load the address of the
7678 incoming-argument decoder and/or of the return trampoline from
7679 the GOT, so make sure the PIC register is preserved and
7680 initialized. */
7681 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7683 if (TARGET_SHCOMPACT
7684 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7686 int reg;
7688 /* First, make all registers with incoming arguments that will
7689 be pushed onto the stack live, so that register renaming
7690 doesn't overwrite them. */
7691 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7692 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7693 >= NPARM_REGS (SImode) - reg)
7694 for (; reg < NPARM_REGS (SImode); reg++)
7695 emit_insn (gen_shcompact_preserve_incoming_args
7696 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7697 else if (CALL_COOKIE_INT_REG_GET
7698 (crtl->args.info.call_cookie, reg) == 1)
7699 emit_insn (gen_shcompact_preserve_incoming_args
7700 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7702 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7703 stack_pointer_rtx);
7704 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7705 GEN_INT (crtl->args.info.call_cookie));
7706 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7707 gen_rtx_REG (SImode, R0_REG));
7709 else if (TARGET_SHMEDIA)
7711 int tr = sh_media_register_for_return ();
7713 if (tr >= 0)
7714 emit_move_insn (gen_rtx_REG (DImode, tr),
7715 gen_rtx_REG (DImode, PR_MEDIA_REG));
7718 /* Emit the code for SETUP_VARARGS. */
7719 if (cfun->stdarg)
7721 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7723 /* Push arg regs as if they'd been provided by caller in stack. */
7724 for (i = 0; i < NPARM_REGS(SImode); i++)
7726 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7728 if (i >= (NPARM_REGS(SImode)
7729 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7731 break;
7732 push (rn);
7733 stack_usage += GET_MODE_SIZE (SImode);
7738 /* If we're supposed to switch stacks at function entry, do so now. */
7739 if (sp_switch_attr)
7741 rtx lab, newsrc;
7742 /* The argument specifies a variable holding the address of the
7743 stack the interrupt function should switch to/from at entry/exit. */
7744 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7745 const char *s
7746 = ggc_strdup (TREE_STRING_POINTER (arg));
7747 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7749 lab = add_constant (sp_switch, SImode, 0);
7750 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7752 emit_insn (gen_sp_switch_1 (newsrc));
7755 d = calc_live_regs (&live_regs_mask);
7756 /* ??? Maybe we could save some switching if we can move a mode switch
7757 that already happens to be at the function start into the prologue. */
7758 if (target_flags != save_flags && ! current_function_interrupt)
7759 emit_insn (gen_toggle_sz ());
7761 if (TARGET_SH5)
7763 int offset_base, offset;
7764 rtx r0 = NULL_RTX;
7765 int offset_in_r0 = -1;
7766 int sp_in_r0 = 0;
7767 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7768 int total_size, save_size;
7769 save_schedule schedule;
7770 save_entry *entry;
7771 int *tmp_pnt;
7773 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7774 && ! current_function_interrupt)
7775 r0 = gen_rtx_REG (Pmode, R0_REG);
7777 /* D is the actual number of bytes that we need for saving registers,
7778 however, in initial_elimination_offset we have committed to using
7779 an additional TREGS_SPACE amount of bytes - in order to keep both
7780 addresses to arguments supplied by the caller and local variables
7781 valid, we must keep this gap. Place it between the incoming
7782 arguments and the actually saved registers in a bid to optimize
7783 locality of reference. */
7784 total_size = d + tregs_space;
7785 total_size += rounded_frame_size (total_size);
7786 save_size = total_size - rounded_frame_size (d);
7787 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7788 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7789 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7791 /* If adjusting the stack in a single step costs nothing extra, do so.
7792 I.e. either if a single addi is enough, or we need a movi anyway,
7793 and we don't exceed the maximum offset range (the test for the
7794 latter is conservative for simplicity). */
7795 if (TARGET_SHMEDIA
7796 && (CONST_OK_FOR_I10 (-total_size)
7797 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7798 && total_size <= 2044)))
7799 d_rounding = total_size - save_size;
7801 offset_base = d + d_rounding;
7803 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7804 0, NULL, true);
7805 stack_usage += save_size + d_rounding;
7807 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7808 tmp_pnt = schedule.temps;
7809 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7811 machine_mode mode = (machine_mode) entry->mode;
7812 unsigned int reg = entry->reg;
7813 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7814 rtx orig_reg_rtx;
7816 offset = entry->offset;
7818 reg_rtx = gen_rtx_REG (mode, reg);
7820 mem_rtx = gen_frame_mem (mode,
7821 gen_rtx_PLUS (Pmode,
7822 stack_pointer_rtx,
7823 GEN_INT (offset)));
7825 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7827 gcc_assert (r0);
7828 mem_rtx = NULL_RTX;
7831 if (HAVE_PRE_DECREMENT
7832 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7833 || mem_rtx == NULL_RTX
7834 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7836 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7838 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7839 pre_dec = NULL_RTX;
7840 else
7842 mem_rtx = NULL_RTX;
7843 offset += GET_MODE_SIZE (mode);
7847 if (mem_rtx != NULL_RTX)
7848 goto addr_ok;
7850 if (offset_in_r0 == -1)
7852 emit_move_insn (r0, GEN_INT (offset));
7853 offset_in_r0 = offset;
7855 else if (offset != offset_in_r0)
7857 emit_move_insn (r0,
7858 gen_rtx_PLUS
7859 (Pmode, r0,
7860 GEN_INT (offset - offset_in_r0)));
7861 offset_in_r0 += offset - offset_in_r0;
7864 if (pre_dec != NULL_RTX)
7866 if (! sp_in_r0)
7868 emit_move_insn (r0,
7869 gen_rtx_PLUS
7870 (Pmode, r0, stack_pointer_rtx));
7871 sp_in_r0 = 1;
7874 offset -= GET_MODE_SIZE (mode);
7875 offset_in_r0 -= GET_MODE_SIZE (mode);
7877 mem_rtx = pre_dec;
7879 else if (sp_in_r0)
7880 mem_rtx = gen_frame_mem (mode, r0);
7881 else
7882 mem_rtx = gen_frame_mem (mode,
7883 gen_rtx_PLUS (Pmode,
7884 stack_pointer_rtx,
7885 r0));
7887 /* We must not use an r0-based address for target-branch
7888 registers or for special registers without pre-dec
7889 memory addresses, since we store their values in r0
7890 first. */
7891 gcc_assert (!TARGET_REGISTER_P (reg)
7892 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7893 || mem_rtx == pre_dec));
7895 addr_ok:
7896 orig_reg_rtx = reg_rtx;
7897 if (TARGET_REGISTER_P (reg)
7898 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7899 && mem_rtx != pre_dec))
7901 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7903 emit_move_insn (tmp_reg, reg_rtx);
7905 if (REGNO (tmp_reg) == R0_REG)
7907 offset_in_r0 = -1;
7908 sp_in_r0 = 0;
7909 gcc_assert (!refers_to_regno_p (R0_REG, mem_rtx));
7912 if (*++tmp_pnt <= 0)
7913 tmp_pnt = schedule.temps;
7915 reg_rtx = tmp_reg;
7918 rtx insn;
7920 /* Mark as interesting for dwarf cfi generator */
7921 insn = emit_move_insn (mem_rtx, reg_rtx);
7922 RTX_FRAME_RELATED_P (insn) = 1;
7923 /* If we use an intermediate register for the save, we can't
7924 describe this exactly in cfi as a copy of the to-be-saved
7925 register into the temporary register and then the temporary
7926 register on the stack, because the temporary register can
7927 have a different natural size than the to-be-saved register.
7928 Thus, we gloss over the intermediate copy and pretend we do
7929 a direct save from the to-be-saved register. */
7930 if (REGNO (reg_rtx) != reg)
7932 rtx set;
7934 set = gen_rtx_SET (mem_rtx, orig_reg_rtx);
7935 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7938 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7940 rtx reg_rtx = gen_rtx_REG (mode, reg);
7941 rtx set;
7942 rtx mem_rtx = gen_frame_mem (mode,
7943 gen_rtx_PLUS (Pmode,
7944 stack_pointer_rtx,
7945 GEN_INT (offset)));
7947 set = gen_rtx_SET (mem_rtx, reg_rtx);
7948 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7953 gcc_assert (entry->offset == d_rounding);
7955 else
7957 push_regs (&live_regs_mask, current_function_interrupt);
7958 stack_usage += d;
7961 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7962 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7964 if (SHMEDIA_REGS_STACK_ADJUST ())
7966 /* This must NOT go through the PLT, otherwise mach and macl
7967 may be clobbered. */
7968 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7969 (TARGET_FPU_ANY
7970 ? "__GCC_push_shmedia_regs"
7971 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7972 emit_insn (gen_shmedia_save_restore_regs_compact
7973 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7976 if (target_flags != save_flags && ! current_function_interrupt)
7977 emit_insn (gen_toggle_sz ());
7979 target_flags = save_flags;
7981 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7982 stack_pointer_rtx, 0, NULL, true);
7983 stack_usage += rounded_frame_size (d) - d_rounding;
7985 if (frame_pointer_needed)
7986 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7988 if (TARGET_SHCOMPACT
7989 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7991 /* This must NOT go through the PLT, otherwise mach and macl
7992 may be clobbered. */
7993 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7994 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7995 emit_insn (gen_shcompact_incoming_args ());
7998 /* If we are profiling, make sure no instructions are scheduled before
7999 the call to mcount. Similarly if some call instructions are swapped
8000 before frame related insns, it'll confuse the unwinder because
8001 currently SH has no unwind info for function epilogues. */
8002 if (crtl->profile || flag_exceptions || flag_unwind_tables)
8003 emit_insn (gen_blockage ());
8005 if (flag_stack_usage_info)
8006 current_function_static_stack_size = stack_usage;
8009 /* Expand code for the function epilogue. */
8010 void
8011 sh_expand_epilogue (bool sibcall_p)
8013 HARD_REG_SET live_regs_mask;
8014 int d, i;
8015 int d_rounding = 0;
8017 int save_flags = target_flags;
8018 int frame_size, save_size;
8019 int fpscr_deferred = 0;
8020 int e = sibcall_p ? -1 : 1;
8022 d = calc_live_regs (&live_regs_mask);
8024 save_size = d;
8025 frame_size = rounded_frame_size (d);
8027 if (TARGET_SH5)
8029 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
8030 int total_size;
8031 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
8032 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8033 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
8035 total_size = d + tregs_space;
8036 total_size += rounded_frame_size (total_size);
8037 save_size = total_size - frame_size;
8039 /* If adjusting the stack in a single step costs nothing extra, do so.
8040 I.e. either if a single addi is enough, or we need a movi anyway,
8041 and we don't exceed the maximum offset range (the test for the
8042 latter is conservative for simplicity). */
8043 if (TARGET_SHMEDIA
8044 && ! frame_pointer_needed
8045 && (CONST_OK_FOR_I10 (total_size)
8046 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
8047 && total_size <= 2044)))
8048 d_rounding = frame_size;
8050 frame_size -= d_rounding;
8053 if (frame_pointer_needed)
8055 /* We must avoid scheduling the epilogue with previous basic blocks.
8056 See PR/18032 and PR/40313. */
8057 emit_insn (gen_blockage ());
8058 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
8059 &live_regs_mask, true);
8061 /* We must avoid moving the stack pointer adjustment past code
8062 which reads from the local frame, else an interrupt could
8063 occur after the SP adjustment and clobber data in the local
8064 frame. */
8065 emit_insn (gen_blockage ());
8066 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
8068 else if (frame_size)
8070 /* We must avoid moving the stack pointer adjustment past code
8071 which reads from the local frame, else an interrupt could
8072 occur after the SP adjustment and clobber data in the local
8073 frame. */
8074 emit_insn (gen_blockage ());
8075 output_stack_adjust (frame_size, stack_pointer_rtx, e,
8076 &live_regs_mask, true);
8079 if (SHMEDIA_REGS_STACK_ADJUST ())
8081 function_symbol (gen_rtx_REG (Pmode, R0_REG),
8082 (TARGET_FPU_ANY
8083 ? "__GCC_pop_shmedia_regs"
8084 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
8085 /* This must NOT go through the PLT, otherwise mach and macl
8086 may be clobbered. */
8087 emit_insn (gen_shmedia_save_restore_regs_compact
8088 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
8091 /* Pop all the registers. */
8093 if (target_flags != save_flags && ! current_function_interrupt)
8094 emit_insn (gen_toggle_sz ());
8095 if (TARGET_SH5)
8097 int offset_base, offset;
8098 int offset_in_r0 = -1;
8099 int sp_in_r0 = 0;
8100 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
8101 save_schedule schedule;
8102 save_entry *entry;
8103 int *tmp_pnt;
8105 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
8106 offset_base = -entry[1].offset + d_rounding;
8107 tmp_pnt = schedule.temps;
8108 for (; entry->mode != VOIDmode; entry--)
8110 machine_mode mode = (machine_mode) entry->mode;
8111 int reg = entry->reg;
8112 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
8114 offset = offset_base + entry->offset;
8115 reg_rtx = gen_rtx_REG (mode, reg);
8117 mem_rtx = gen_frame_mem (mode,
8118 gen_rtx_PLUS (Pmode,
8119 stack_pointer_rtx,
8120 GEN_INT (offset)));
8122 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
8123 mem_rtx = NULL_RTX;
8125 if (HAVE_POST_INCREMENT
8126 && (offset == offset_in_r0
8127 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
8128 && mem_rtx == NULL_RTX)
8129 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
8131 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
8133 if (!memory_address_p (mode, XEXP (post_inc, 0)))
8134 post_inc = NULL_RTX;
8135 else
8136 mem_rtx = NULL_RTX;
8139 if (mem_rtx != NULL_RTX)
8140 goto addr_ok;
8142 if (offset_in_r0 == -1)
8144 emit_move_insn (r0, GEN_INT (offset));
8145 offset_in_r0 = offset;
8147 else if (offset != offset_in_r0)
8149 emit_move_insn (r0,
8150 gen_rtx_PLUS
8151 (Pmode, r0,
8152 GEN_INT (offset - offset_in_r0)));
8153 offset_in_r0 += offset - offset_in_r0;
8156 if (post_inc != NULL_RTX)
8158 if (! sp_in_r0)
8160 emit_move_insn (r0,
8161 gen_rtx_PLUS
8162 (Pmode, r0, stack_pointer_rtx));
8163 sp_in_r0 = 1;
8166 mem_rtx = post_inc;
8168 offset_in_r0 += GET_MODE_SIZE (mode);
8170 else if (sp_in_r0)
8171 mem_rtx = gen_frame_mem (mode, r0);
8172 else
8173 mem_rtx = gen_frame_mem (mode,
8174 gen_rtx_PLUS (Pmode,
8175 stack_pointer_rtx,
8176 r0));
8178 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8179 || mem_rtx == post_inc);
8181 addr_ok:
8182 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8183 && mem_rtx != post_inc)
8185 emit_move_insn (r0, mem_rtx);
8186 mem_rtx = r0;
8188 else if (TARGET_REGISTER_P (reg))
8190 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8192 /* Give the scheduler a bit of freedom by using up to
8193 MAX_TEMPS registers in a round-robin fashion. */
8194 emit_move_insn (tmp_reg, mem_rtx);
8195 mem_rtx = tmp_reg;
8196 if (*++tmp_pnt < 0)
8197 tmp_pnt = schedule.temps;
8200 emit_move_insn (reg_rtx, mem_rtx);
8203 gcc_assert (entry->offset + offset_base == d + d_rounding);
8205 else /* ! TARGET_SH5 */
8207 int last_reg;
8209 save_size = 0;
8210 /* For an ISR with RESBANK attribute assigned, don't pop PR
8211 register. */
8212 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8213 && !sh_cfun_resbank_handler_p ())
8215 if (!frame_pointer_needed)
8216 emit_insn (gen_blockage ());
8217 pop (PR_REG);
8220 /* Banked registers are popped first to avoid being scheduled in the
8221 delay slot. RTE switches banks before the ds instruction. */
8222 if (current_function_interrupt)
8224 bool use_movml = false;
8226 if (TARGET_SH2A)
8228 unsigned int count = 0;
8230 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8231 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8232 count++;
8233 else
8234 break;
8236 /* Use movml when all banked register are poped. */
8237 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8238 use_movml = true;
8241 if (sh_cfun_resbank_handler_p ())
8242 ; /* Do nothing. */
8243 else if (use_movml)
8245 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8247 /* We must avoid scheduling multiple load insn with another
8248 insns. */
8249 emit_insn (gen_blockage ());
8250 emit_insn (gen_movml_pop_banked (sp_reg));
8251 emit_insn (gen_blockage ());
8253 else
8254 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8255 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8256 pop (i);
8258 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8260 else
8261 last_reg = FIRST_PSEUDO_REGISTER;
8263 for (i = 0; i < last_reg; i++)
8265 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8267 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8268 && hard_reg_set_intersect_p (live_regs_mask,
8269 reg_class_contents[DF_REGS]))
8270 fpscr_deferred = 1;
8271 /* For an ISR with RESBANK attribute assigned, don't pop
8272 following registers, R0-R14, MACH, MACL and GBR. */
8273 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8274 && ! (sh_cfun_resbank_handler_p ()
8275 && ((j >= FIRST_GENERAL_REG
8276 && j < LAST_GENERAL_REG)
8277 || j == MACH_REG
8278 || j == MACL_REG
8279 || j == GBR_REG)))
8280 pop (j);
8282 if (j == FIRST_FP_REG && fpscr_deferred)
8283 pop (FPSCR_REG);
8286 if (target_flags != save_flags && ! current_function_interrupt)
8287 emit_insn (gen_toggle_sz ());
8288 target_flags = save_flags;
8290 output_stack_adjust (crtl->args.pretend_args_size
8291 + save_size + d_rounding
8292 + crtl->args.info.stack_regs * 8,
8293 stack_pointer_rtx, e, NULL, true);
8295 if (crtl->calls_eh_return)
8296 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8297 EH_RETURN_STACKADJ_RTX));
8299 /* Switch back to the normal stack if necessary. */
8300 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8301 emit_insn (gen_sp_switch_2 ());
8303 /* Tell flow the insn that pops PR isn't dead. */
8304 /* PR_REG will never be live in SHmedia mode, and we don't need to
8305 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8306 by the return pattern. */
8307 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8308 emit_use (gen_rtx_REG (SImode, PR_REG));
8311 /* Emit code to change the current function's return address to RA.
8312 TEMP is available as a scratch register, if needed. */
8313 void
8314 sh_set_return_address (rtx ra, rtx tmp)
8316 HARD_REG_SET live_regs_mask;
8317 int d;
8318 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8319 int pr_offset;
8321 d = calc_live_regs (&live_regs_mask);
8323 /* If pr_reg isn't life, we can set it (or the register given in
8324 sh_media_register_for_return) directly. */
8325 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8327 rtx rr;
8329 if (TARGET_SHMEDIA)
8331 int rr_regno = sh_media_register_for_return ();
8333 if (rr_regno < 0)
8334 rr_regno = pr_reg;
8336 rr = gen_rtx_REG (DImode, rr_regno);
8338 else
8339 rr = gen_rtx_REG (SImode, pr_reg);
8341 emit_insn (GEN_MOV (rr, ra));
8342 /* Tell flow the register for return isn't dead. */
8343 emit_use (rr);
8344 return;
8347 if (TARGET_SH5)
8349 int offset;
8350 save_schedule schedule;
8351 save_entry *entry;
8353 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8354 offset = entry[1].offset;
8355 for (; entry->mode != VOIDmode; entry--)
8356 if (entry->reg == pr_reg)
8357 goto found;
8359 /* We can't find pr register. */
8360 gcc_unreachable ();
8362 found:
8363 offset = entry->offset - offset;
8364 pr_offset = (rounded_frame_size (d) + offset
8365 + SHMEDIA_REGS_STACK_ADJUST ());
8367 else
8368 pr_offset = rounded_frame_size (d);
8370 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8372 if (frame_pointer_needed)
8373 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8374 else
8375 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8377 tmp = gen_frame_mem (Pmode, tmp);
8378 emit_insn (GEN_MOV (tmp, ra));
8379 /* Tell this store isn't dead. */
8380 emit_use (tmp);
8383 /* Clear variables at function end. */
8384 static void
8385 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8386 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8390 static rtx
8391 sh_builtin_saveregs (void)
8393 /* First unnamed integer register. */
8394 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8395 /* Number of integer registers we need to save. */
8396 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8397 /* First unnamed SFmode float reg */
8398 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8399 /* Number of SFmode float regs to save. */
8400 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8401 rtx regbuf, fpregs;
8402 int bufsize, regno;
8403 alias_set_type alias_set;
8405 if (TARGET_SH5)
8407 if (n_intregs)
8409 int pushregs = n_intregs;
8411 while (pushregs < NPARM_REGS (SImode) - 1
8412 && (CALL_COOKIE_INT_REG_GET
8413 (crtl->args.info.call_cookie,
8414 NPARM_REGS (SImode) - pushregs)
8415 == 1))
8417 crtl->args.info.call_cookie
8418 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8419 - pushregs, 1);
8420 pushregs++;
8423 if (pushregs == NPARM_REGS (SImode))
8424 crtl->args.info.call_cookie
8425 |= (CALL_COOKIE_INT_REG (0, 1)
8426 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8427 else
8428 crtl->args.info.call_cookie
8429 |= CALL_COOKIE_STACKSEQ (pushregs);
8431 crtl->args.pretend_args_size += 8 * n_intregs;
8433 if (TARGET_SHCOMPACT)
8434 return const0_rtx;
8437 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8439 error ("__builtin_saveregs not supported by this subtarget");
8440 return const0_rtx;
8443 if (TARGET_SHMEDIA)
8444 n_floatregs = 0;
8446 /* Allocate block of memory for the regs. */
8447 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8448 Or can assign_stack_local accept a 0 SIZE argument? */
8449 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8451 if (TARGET_SHMEDIA)
8452 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8453 else if (n_floatregs & 1)
8455 rtx addr;
8457 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8458 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8459 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8460 regbuf = change_address (regbuf, BLKmode, addr);
8462 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8464 rtx addr, mask;
8466 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8467 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8468 XEXP (regbuf, 0), 4));
8469 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8470 emit_insn (gen_andsi3 (addr, addr, mask));
8471 regbuf = change_address (regbuf, BLKmode, addr);
8473 else
8474 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8475 alias_set = get_varargs_alias_set ();
8476 set_mem_alias_set (regbuf, alias_set);
8478 /* Save int args.
8479 This is optimized to only save the regs that are necessary. Explicitly
8480 named args need not be saved. */
8481 if (n_intregs > 0)
8482 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8483 adjust_address (regbuf, BLKmode,
8484 n_floatregs * UNITS_PER_WORD),
8485 n_intregs);
8487 if (TARGET_SHMEDIA)
8488 /* Return the address of the regbuf. */
8489 return XEXP (regbuf, 0);
8491 /* Save float args.
8492 This is optimized to only save the regs that are necessary. Explicitly
8493 named args need not be saved.
8494 We explicitly build a pointer to the buffer because it halves the insn
8495 count when not optimizing (otherwise the pointer is built for each reg
8496 saved).
8497 We emit the moves in reverse order so that we can use predecrement. */
8499 fpregs = copy_to_mode_reg (Pmode,
8500 plus_constant (Pmode, XEXP (regbuf, 0),
8501 n_floatregs * UNITS_PER_WORD));
8502 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8504 rtx mem;
8505 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8507 emit_insn (gen_addsi3 (fpregs, fpregs,
8508 GEN_INT (-2 * UNITS_PER_WORD)));
8509 mem = change_address (regbuf, DFmode, fpregs);
8510 emit_move_insn (mem,
8511 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8513 regno = first_floatreg;
8514 if (regno & 1)
8516 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8517 mem = change_address (regbuf, SFmode, fpregs);
8518 emit_move_insn (mem,
8519 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8520 + regno - SH_REG_MSW_OFFSET));
8523 else
8524 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8526 rtx mem;
8528 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8529 mem = change_address (regbuf, SFmode, fpregs);
8530 emit_move_insn (mem,
8531 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8534 /* Return the address of the regbuf. */
8535 return XEXP (regbuf, 0);
8538 /* Define the `__builtin_va_list' type for the ABI. */
8539 static tree
8540 sh_build_builtin_va_list (void)
8542 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8543 tree record, type_decl;
8545 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8546 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8547 return ptr_type_node;
8549 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8550 type_decl = build_decl (BUILTINS_LOCATION,
8551 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8553 f_next_o = build_decl (BUILTINS_LOCATION,
8554 FIELD_DECL, get_identifier ("__va_next_o"),
8555 ptr_type_node);
8556 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8557 FIELD_DECL,
8558 get_identifier ("__va_next_o_limit"),
8559 ptr_type_node);
8560 f_next_fp = build_decl (BUILTINS_LOCATION,
8561 FIELD_DECL, get_identifier ("__va_next_fp"),
8562 ptr_type_node);
8563 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8564 FIELD_DECL,
8565 get_identifier ("__va_next_fp_limit"),
8566 ptr_type_node);
8567 f_next_stack = build_decl (BUILTINS_LOCATION,
8568 FIELD_DECL, get_identifier ("__va_next_stack"),
8569 ptr_type_node);
8571 DECL_FIELD_CONTEXT (f_next_o) = record;
8572 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8573 DECL_FIELD_CONTEXT (f_next_fp) = record;
8574 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8575 DECL_FIELD_CONTEXT (f_next_stack) = record;
8577 TYPE_STUB_DECL (record) = type_decl;
8578 TYPE_NAME (record) = type_decl;
8579 TYPE_FIELDS (record) = f_next_o;
8580 DECL_CHAIN (f_next_o) = f_next_o_limit;
8581 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8582 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8583 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8585 layout_type (record);
8587 return record;
8590 /* Implement `va_start' for varargs and stdarg. */
8591 static void
8592 sh_va_start (tree valist, rtx nextarg)
8594 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8595 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8596 tree t, u;
8597 int nfp, nint;
8599 if (TARGET_SH5)
8601 expand_builtin_saveregs ();
8602 std_expand_builtin_va_start (valist, nextarg);
8603 return;
8606 if ((! TARGET_SH2E && ! TARGET_SH4)
8607 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8609 std_expand_builtin_va_start (valist, nextarg);
8610 return;
8613 f_next_o = TYPE_FIELDS (va_list_type_node);
8614 f_next_o_limit = DECL_CHAIN (f_next_o);
8615 f_next_fp = DECL_CHAIN (f_next_o_limit);
8616 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8617 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8619 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8620 NULL_TREE);
8621 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8622 valist, f_next_o_limit, NULL_TREE);
8623 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8624 NULL_TREE);
8625 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8626 valist, f_next_fp_limit, NULL_TREE);
8627 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8628 valist, f_next_stack, NULL_TREE);
8630 /* Call __builtin_saveregs. */
8631 u = make_tree (sizetype, expand_builtin_saveregs ());
8632 u = fold_convert (ptr_type_node, u);
8633 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8634 TREE_SIDE_EFFECTS (t) = 1;
8635 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8637 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8638 if (nfp < 8)
8639 nfp = 8 - nfp;
8640 else
8641 nfp = 0;
8642 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8643 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8644 TREE_SIDE_EFFECTS (t) = 1;
8645 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8647 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8648 TREE_SIDE_EFFECTS (t) = 1;
8649 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8651 nint = crtl->args.info.arg_count[SH_ARG_INT];
8652 if (nint < 4)
8653 nint = 4 - nint;
8654 else
8655 nint = 0;
8656 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8657 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8658 TREE_SIDE_EFFECTS (t) = 1;
8659 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8661 u = make_tree (ptr_type_node, nextarg);
8662 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8663 TREE_SIDE_EFFECTS (t) = 1;
8664 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8667 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8668 member, return it. */
8669 static tree
8670 find_sole_member (tree type)
8672 tree field, member = NULL_TREE;
8674 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8676 if (TREE_CODE (field) != FIELD_DECL)
8677 continue;
8678 if (!DECL_SIZE (field))
8679 return NULL_TREE;
8680 if (integer_zerop (DECL_SIZE (field)))
8681 continue;
8682 if (member)
8683 return NULL_TREE;
8684 member = field;
8686 return member;
8689 /* Implement `va_arg'. */
8690 static tree
8691 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8692 gimple_seq *post_p ATTRIBUTE_UNUSED)
8694 HOST_WIDE_INT size, rsize;
8695 tree tmp, pptr_type_node;
8696 tree addr, lab_over = NULL, result = NULL;
8697 bool pass_by_ref;
8698 tree eff_type;
8700 if (!VOID_TYPE_P (type))
8701 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8702 else
8703 pass_by_ref = false;
8705 if (pass_by_ref)
8706 type = build_pointer_type (type);
8708 size = int_size_in_bytes (type);
8709 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8710 pptr_type_node = build_pointer_type (ptr_type_node);
8712 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8713 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8715 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8716 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8717 int pass_as_float;
8718 tree lab_false;
8719 tree member;
8721 f_next_o = TYPE_FIELDS (va_list_type_node);
8722 f_next_o_limit = DECL_CHAIN (f_next_o);
8723 f_next_fp = DECL_CHAIN (f_next_o_limit);
8724 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8725 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8727 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8728 NULL_TREE);
8729 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8730 valist, f_next_o_limit, NULL_TREE);
8731 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8732 valist, f_next_fp, NULL_TREE);
8733 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8734 valist, f_next_fp_limit, NULL_TREE);
8735 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8736 valist, f_next_stack, NULL_TREE);
8738 /* Structures with a single member with a distinct mode are passed
8739 like their member. This is relevant if the latter has a REAL_TYPE
8740 or COMPLEX_TYPE type. */
8741 eff_type = type;
8742 while (TREE_CODE (eff_type) == RECORD_TYPE
8743 && (member = find_sole_member (eff_type))
8744 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8745 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8746 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8748 tree field_type = TREE_TYPE (member);
8750 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8751 eff_type = field_type;
8752 else
8754 gcc_assert ((TYPE_ALIGN (eff_type)
8755 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8756 || (TYPE_ALIGN (eff_type)
8757 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8758 break;
8762 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8764 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8765 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8766 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8767 && size <= 16));
8769 else
8771 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8774 addr = create_tmp_var (pptr_type_node);
8775 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8776 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8778 valist = build_simple_mem_ref (addr);
8780 if (pass_as_float)
8782 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
8783 tree cmp;
8784 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8786 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8787 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8789 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8790 tmp = next_fp_limit;
8791 if (size > 4 && !is_double)
8792 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8793 tmp = build2 (GE_EXPR, boolean_type_node,
8794 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8795 cmp = build3 (COND_EXPR, void_type_node, tmp,
8796 build1 (GOTO_EXPR, void_type_node,
8797 unshare_expr (lab_false)), NULL_TREE);
8798 if (!is_double)
8799 gimplify_and_add (cmp, pre_p);
8801 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8802 || (is_double || size == 16))
8804 tmp = fold_convert (sizetype, next_fp_tmp);
8805 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8806 size_int (UNITS_PER_WORD));
8807 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8808 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8810 if (is_double)
8811 gimplify_and_add (cmp, pre_p);
8813 #ifdef FUNCTION_ARG_SCmode_WART
8814 if (TYPE_MODE (eff_type) == SCmode
8815 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8817 tree subtype = TREE_TYPE (eff_type);
8818 tree real, imag;
8820 imag
8821 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8822 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8824 real
8825 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8826 real = get_initialized_tmp_var (real, pre_p, NULL);
8828 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8829 if (type != eff_type)
8830 result = build1 (VIEW_CONVERT_EXPR, type, result);
8831 result = get_initialized_tmp_var (result, pre_p, NULL);
8833 #endif /* FUNCTION_ARG_SCmode_WART */
8835 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8836 gimplify_and_add (tmp, pre_p);
8838 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8839 gimplify_and_add (tmp, pre_p);
8841 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8842 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8843 gimplify_assign (unshare_expr (next_fp_tmp),
8844 unshare_expr (valist), pre_p);
8846 gimplify_assign (unshare_expr (valist),
8847 unshare_expr (next_fp_tmp), post_p);
8848 valist = next_fp_tmp;
8850 else
8852 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8853 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8854 unshare_expr (next_o_limit));
8855 tmp = build3 (COND_EXPR, void_type_node, tmp,
8856 build1 (GOTO_EXPR, void_type_node,
8857 unshare_expr (lab_false)),
8858 NULL_TREE);
8859 gimplify_and_add (tmp, pre_p);
8861 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8862 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8864 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8865 gimplify_and_add (tmp, pre_p);
8867 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8868 gimplify_and_add (tmp, pre_p);
8870 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8871 gimplify_assign (unshare_expr (next_o),
8872 unshare_expr (next_o_limit), pre_p);
8874 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8875 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8878 if (!result)
8880 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8881 gimplify_and_add (tmp, pre_p);
8885 /* ??? In va-sh.h, there had been code to make values larger than
8886 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8888 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8889 if (result)
8891 gimplify_assign (result, tmp, pre_p);
8892 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8893 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8894 gimplify_and_add (tmp, pre_p);
8896 else
8897 result = tmp;
8899 if (pass_by_ref)
8900 result = build_va_arg_indirect_ref (result);
8902 return result;
8905 /* 64 bit floating points memory transfers are paired single precision loads
8906 or store. So DWARF information needs fixing in little endian (unless
8907 PR=SZ=1 in FPSCR). */
8909 sh_dwarf_register_span (rtx reg)
8911 unsigned regno = REGNO (reg);
8913 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8914 return NULL_RTX;
8916 return
8917 gen_rtx_PARALLEL (VOIDmode,
8918 gen_rtvec (2,
8919 gen_rtx_REG (SFmode, regno + 1),
8920 gen_rtx_REG (SFmode, regno)));
8923 static machine_mode
8924 sh_promote_function_mode (const_tree type, machine_mode mode,
8925 int *punsignedp, const_tree funtype,
8926 int for_return)
8928 if (sh_promote_prototypes (funtype))
8929 return promote_mode (type, mode, punsignedp);
8930 else
8931 return default_promote_function_mode (type, mode, punsignedp, funtype,
8932 for_return);
8935 static bool
8936 sh_promote_prototypes (const_tree type)
8938 if (TARGET_HITACHI)
8939 return false;
8940 if (! type)
8941 return true;
8942 return ! sh_attr_renesas_p (type);
8945 /* Whether an argument must be passed by reference. On SHcompact, we
8946 pretend arguments wider than 32-bits that would have been passed in
8947 registers are passed by reference, so that an SHmedia trampoline
8948 loads them into the full 64-bits registers. */
8949 static int
8950 shcompact_byref (const CUMULATIVE_ARGS *cum, machine_mode mode,
8951 const_tree type, bool named)
8953 unsigned HOST_WIDE_INT size;
8955 if (type)
8956 size = int_size_in_bytes (type);
8957 else
8958 size = GET_MODE_SIZE (mode);
8960 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8961 && (!named
8962 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8963 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8964 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8965 && size > 4
8966 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8967 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8968 return size;
8969 else
8970 return 0;
8973 static bool
8974 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8975 const_tree type, bool named)
8977 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8979 if (targetm.calls.must_pass_in_stack (mode, type))
8980 return true;
8982 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8983 wants to know about pass-by-reference semantics for incoming
8984 arguments. */
8985 if (! cum)
8986 return false;
8988 if (TARGET_SHCOMPACT)
8990 cum->byref = shcompact_byref (cum, mode, type, named);
8991 return cum->byref != 0;
8994 return false;
8997 static bool
8998 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
8999 const_tree type, bool named ATTRIBUTE_UNUSED)
9001 /* ??? How can it possibly be correct to return true only on the
9002 caller side of the equation? Is there someplace else in the
9003 sh backend that's magically producing the copies? */
9004 return (get_cumulative_args (cum)->outgoing
9005 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
9006 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
9009 /* Round a register number up to a proper boundary for an arg of mode
9010 MODE.
9011 The SH doesn't care about double alignment, so we only
9012 round doubles to even regs when asked to explicitly. */
9013 static int
9014 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
9016 /* FIXME: This used to be a macro and has been copy pasted into this
9017 function as is. Make this more readable. */
9018 return
9019 (((TARGET_ALIGN_DOUBLE
9020 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9021 && (mode == DFmode || mode == DCmode)
9022 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
9023 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
9024 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
9025 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
9026 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
9029 /* Return true if arg of the specified mode should be be passed in a register
9030 or false otherwise. */
9031 static bool
9032 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
9033 const_tree type)
9035 /* FIXME: This used to be a macro and has been copy pasted into this
9036 function as is. Make this more readable. */
9037 return
9038 ((type == 0
9039 || (! TREE_ADDRESSABLE (type)
9040 && (! (TARGET_HITACHI || cum.renesas_abi)
9041 || ! (AGGREGATE_TYPE_P (type)
9042 || (!TARGET_FPU_ANY
9043 && (GET_MODE_CLASS (mode) == MODE_FLOAT
9044 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
9045 && ! cum.force_mem
9046 && (TARGET_SH2E
9047 ? ((mode) == BLKmode
9048 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
9049 + int_size_in_bytes (type))
9050 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
9051 : ((sh_round_reg (cum, mode)
9052 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
9053 <= NPARM_REGS (mode)))
9054 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
9057 static int
9058 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9059 tree type, bool named ATTRIBUTE_UNUSED)
9061 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9062 int words = 0;
9064 if (!TARGET_SH5
9065 && sh_pass_in_reg_p (*cum, mode, type)
9066 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
9067 && (sh_round_reg (*cum, mode)
9068 + (mode != BLKmode
9069 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
9070 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
9071 > NPARM_REGS (mode)))
9072 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
9074 else if (!TARGET_SHCOMPACT
9075 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
9076 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
9078 return words * UNITS_PER_WORD;
9082 /* Define where to put the arguments to a function.
9083 Value is zero to push the argument on the stack,
9084 or a hard register in which to store the argument.
9086 MODE is the argument's machine mode.
9087 TYPE is the data type of the argument (as a tree).
9088 This is null for libcalls where that information may
9089 not be available.
9090 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9091 the preceding args and about the function being called.
9092 NAMED is nonzero if this argument is a named parameter
9093 (otherwise it is an extra parameter matching an ellipsis).
9095 On SH the first args are normally in registers
9096 and the rest are pushed. Any arg that starts within the first
9097 NPARM_REGS words is at least partially passed in a register unless
9098 its data type forbids. */
9099 static rtx
9100 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
9101 const_tree type, bool named)
9103 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9105 if (! TARGET_SH5 && mode == VOIDmode)
9106 return GEN_INT (ca->renesas_abi ? 1 : 0);
9108 if (! TARGET_SH5
9109 && sh_pass_in_reg_p (*ca, mode, type)
9110 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
9112 int regno;
9114 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
9115 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
9117 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
9118 gen_rtx_REG (SFmode,
9119 BASE_ARG_REG (mode)
9120 + (sh_round_reg (*ca, mode) ^ 1)),
9121 const0_rtx);
9122 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
9123 gen_rtx_REG (SFmode,
9124 BASE_ARG_REG (mode)
9125 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
9126 GEN_INT (4));
9127 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
9130 /* If the alignment of a DF value causes an SF register to be
9131 skipped, we will use that skipped register for the next SF
9132 value. */
9133 if ((TARGET_HITACHI || ca->renesas_abi)
9134 && ca->free_single_fp_reg
9135 && mode == SFmode)
9136 return gen_rtx_REG (mode, ca->free_single_fp_reg);
9138 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
9139 ^ (mode == SFmode && TARGET_SH4
9140 && TARGET_LITTLE_ENDIAN
9141 && ! TARGET_HITACHI && ! ca->renesas_abi);
9142 return gen_rtx_REG (mode, regno);
9146 if (TARGET_SH5)
9148 if (mode == VOIDmode && TARGET_SHCOMPACT)
9149 return GEN_INT (ca->call_cookie);
9151 /* The following test assumes unnamed arguments are promoted to
9152 DFmode. */
9153 if (mode == SFmode && ca->free_single_fp_reg)
9154 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
9156 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9157 && (named || ! ca->prototype_p)
9158 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9160 if (! ca->prototype_p && TARGET_SHMEDIA)
9161 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9163 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9164 FIRST_FP_PARM_REG
9165 + ca->arg_count[(int) SH_ARG_FLOAT]);
9168 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9169 && (! TARGET_SHCOMPACT
9170 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9171 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9172 type, named))))
9174 return gen_rtx_REG (mode, (FIRST_PARM_REG
9175 + ca->arg_count[(int) SH_ARG_INT]));
9178 return NULL_RTX;
9181 return NULL_RTX;
9184 /* Update the data in CUM to advance over an argument
9185 of mode MODE and data type TYPE.
9186 (TYPE is null for libcalls where that information may not be
9187 available.) */
9188 static void
9189 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
9190 const_tree type, bool named)
9192 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9194 if (ca->force_mem)
9195 ca->force_mem = 0;
9196 else if (TARGET_SH5)
9198 const_tree type2 = (ca->byref && type
9199 ? TREE_TYPE (type)
9200 : type);
9201 machine_mode mode2 = (ca->byref && type
9202 ? TYPE_MODE (type2)
9203 : mode);
9204 int dwords = ((ca->byref
9205 ? ca->byref
9206 : mode2 == BLKmode
9207 ? int_size_in_bytes (type2)
9208 : GET_MODE_SIZE (mode2)) + 7) / 8;
9209 int numregs = MIN (dwords, NPARM_REGS (SImode)
9210 - ca->arg_count[(int) SH_ARG_INT]);
9212 if (numregs)
9214 ca->arg_count[(int) SH_ARG_INT] += numregs;
9215 if (TARGET_SHCOMPACT
9216 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9218 ca->call_cookie
9219 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9220 - numregs, 1);
9221 /* N.B. We want this also for outgoing. */
9222 ca->stack_regs += numregs;
9224 else if (ca->byref)
9226 if (! ca->outgoing)
9227 ca->stack_regs += numregs;
9228 ca->byref_regs += numregs;
9229 ca->byref = 0;
9231 ca->call_cookie
9232 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9233 - numregs, 2);
9234 while (--numregs);
9235 ca->call_cookie
9236 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9237 - 1, 1);
9239 else if (dwords > numregs)
9241 int pushregs = numregs;
9243 if (TARGET_SHCOMPACT)
9244 ca->stack_regs += numregs;
9245 while (pushregs < NPARM_REGS (SImode) - 1
9246 && (CALL_COOKIE_INT_REG_GET
9247 (ca->call_cookie,
9248 NPARM_REGS (SImode) - pushregs)
9249 == 1))
9251 ca->call_cookie
9252 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9253 - pushregs, 1);
9254 pushregs++;
9256 if (numregs == NPARM_REGS (SImode))
9257 ca->call_cookie
9258 |= CALL_COOKIE_INT_REG (0, 1)
9259 | CALL_COOKIE_STACKSEQ (numregs - 1);
9260 else
9261 ca->call_cookie
9262 |= CALL_COOKIE_STACKSEQ (numregs);
9265 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9266 && (named || ! ca->prototype_p))
9268 if (mode2 == SFmode && ca->free_single_fp_reg)
9269 ca->free_single_fp_reg = 0;
9270 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9271 < NPARM_REGS (SFmode))
9273 int numfpregs
9274 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9275 NPARM_REGS (SFmode)
9276 - ca->arg_count[(int) SH_ARG_FLOAT]);
9278 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9280 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9282 if (ca->outgoing && numregs > 0)
9285 ca->call_cookie
9286 |= (CALL_COOKIE_INT_REG
9287 (ca->arg_count[(int) SH_ARG_INT]
9288 - numregs + ((numfpregs - 2) / 2),
9289 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9290 - numfpregs) / 2));
9292 while (numfpregs -= 2);
9294 else if (mode2 == SFmode && (named)
9295 && (ca->arg_count[(int) SH_ARG_FLOAT]
9296 < NPARM_REGS (SFmode)))
9297 ca->free_single_fp_reg
9298 = FIRST_FP_PARM_REG - numfpregs
9299 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9302 return;
9305 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9307 /* Note that we've used the skipped register. */
9308 if (mode == SFmode && ca->free_single_fp_reg)
9310 ca->free_single_fp_reg = 0;
9311 return;
9313 /* When we have a DF after an SF, there's an SF register that get
9314 skipped in order to align the DF value. We note this skipped
9315 register, because the next SF value will use it, and not the
9316 SF that follows the DF. */
9317 if (mode == DFmode
9318 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9320 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9321 + BASE_ARG_REG (mode));
9325 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9326 || sh_pass_in_reg_p (*ca, mode, type))
9327 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9328 = (sh_round_reg (*ca, mode)
9329 + (mode == BLKmode
9330 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9331 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9334 /* The Renesas calling convention doesn't quite fit into this scheme since
9335 the address is passed like an invisible argument, but one that is always
9336 passed in memory. */
9337 static rtx
9338 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9340 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9341 return NULL_RTX;
9342 return gen_rtx_REG (Pmode, 2);
9345 /* Worker function for TARGET_FUNCTION_VALUE.
9347 For the SH, this is like LIBCALL_VALUE, except that we must change the
9348 mode like PROMOTE_MODE does.
9349 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9350 tested here has to be kept in sync with the one in
9351 explow.c:promote_mode. */
9352 static rtx
9353 sh_function_value (const_tree valtype,
9354 const_tree fn_decl_or_type,
9355 bool outgoing ATTRIBUTE_UNUSED)
9357 if (fn_decl_or_type
9358 && !DECL_P (fn_decl_or_type))
9359 fn_decl_or_type = NULL;
9361 return gen_rtx_REG (
9362 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9363 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9364 && (TREE_CODE (valtype) == INTEGER_TYPE
9365 || TREE_CODE (valtype) == ENUMERAL_TYPE
9366 || TREE_CODE (valtype) == BOOLEAN_TYPE
9367 || TREE_CODE (valtype) == REAL_TYPE
9368 || TREE_CODE (valtype) == OFFSET_TYPE))
9369 && sh_promote_prototypes (fn_decl_or_type)
9370 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9371 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9374 /* Worker function for TARGET_LIBCALL_VALUE. */
9375 static rtx
9376 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9378 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9381 /* Return true if N is a possible register number of function value. */
9382 static bool
9383 sh_function_value_regno_p (const unsigned int regno)
9385 return ((regno) == FIRST_RET_REG
9386 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9387 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9390 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9391 static bool
9392 sh_return_in_memory (const_tree type, const_tree fndecl)
9394 if (TARGET_SH5)
9396 if (TYPE_MODE (type) == BLKmode)
9397 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9398 else
9399 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9401 else
9403 return (TYPE_MODE (type) == BLKmode
9404 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9405 && TREE_CODE (type) == RECORD_TYPE));
9409 /* We actually emit the code in sh_expand_prologue. We used to use
9410 a static variable to flag that we need to emit this code, but that
9411 doesn't when inlining, when functions are deferred and then emitted
9412 later. Fortunately, we already have two flags that are part of struct
9413 function that tell if a function uses varargs or stdarg. */
9414 static void
9415 sh_setup_incoming_varargs (cumulative_args_t ca,
9416 machine_mode mode,
9417 tree type,
9418 int *pretend_arg_size,
9419 int second_time ATTRIBUTE_UNUSED)
9421 gcc_assert (cfun->stdarg);
9422 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9424 int named_parm_regs, anon_parm_regs;
9426 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9427 + (mode == BLKmode
9428 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9429 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9430 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9431 if (anon_parm_regs > 0)
9432 *pretend_arg_size = anon_parm_regs * 4;
9436 static bool
9437 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9439 return TARGET_SH5;
9442 static bool
9443 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9445 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9447 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9451 /* Define the offset between two registers, one to be eliminated, and
9452 the other its replacement, at the start of a routine. */
9454 initial_elimination_offset (int from, int to)
9456 int regs_saved;
9457 int regs_saved_rounding = 0;
9458 int total_saved_regs_space;
9459 int total_auto_space;
9460 int save_flags = target_flags;
9461 int copy_flags;
9462 HARD_REG_SET live_regs_mask;
9464 shmedia_space_reserved_for_target_registers = false;
9465 regs_saved = calc_live_regs (&live_regs_mask);
9466 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9468 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9470 shmedia_space_reserved_for_target_registers = true;
9471 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9474 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9475 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9476 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9478 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9479 copy_flags = target_flags;
9480 target_flags = save_flags;
9482 total_saved_regs_space = regs_saved + regs_saved_rounding;
9484 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9485 return total_saved_regs_space + total_auto_space
9486 + crtl->args.info.byref_regs * 8;
9488 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9489 return total_saved_regs_space + total_auto_space
9490 + crtl->args.info.byref_regs * 8;
9492 /* Initial gap between fp and sp is 0. */
9493 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9494 return 0;
9496 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9497 return rounded_frame_size (0);
9499 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9500 return rounded_frame_size (0);
9502 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9503 && (to == HARD_FRAME_POINTER_REGNUM
9504 || to == STACK_POINTER_REGNUM));
9505 if (TARGET_SH5)
9507 int n = total_saved_regs_space;
9508 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9509 save_schedule schedule;
9510 save_entry *entry;
9512 n += total_auto_space;
9514 /* If it wasn't saved, there's not much we can do. */
9515 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9516 return n;
9518 target_flags = copy_flags;
9520 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9521 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9522 if (entry->reg == pr_reg)
9524 target_flags = save_flags;
9525 return entry->offset;
9527 gcc_unreachable ();
9529 else
9530 return total_auto_space;
9533 /* Parse the -mfixed-range= option string. */
9534 void
9535 sh_fix_range (const char *const_str)
9537 int i, first, last;
9538 char *str, *dash, *comma;
9540 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9541 REG2 are either register names or register numbers. The effect
9542 of this option is to mark the registers in the range from REG1 to
9543 REG2 as ``fixed'' so they won't be used by the compiler. */
9545 i = strlen (const_str);
9546 str = (char *) alloca (i + 1);
9547 memcpy (str, const_str, i + 1);
9549 while (1)
9551 dash = strchr (str, '-');
9552 if (!dash)
9554 warning (0, "value of -mfixed-range must have form REG1-REG2");
9555 return;
9557 *dash = '\0';
9558 comma = strchr (dash + 1, ',');
9559 if (comma)
9560 *comma = '\0';
9562 first = decode_reg_name (str);
9563 if (first < 0)
9565 warning (0, "unknown register name: %s", str);
9566 return;
9569 last = decode_reg_name (dash + 1);
9570 if (last < 0)
9572 warning (0, "unknown register name: %s", dash + 1);
9573 return;
9576 *dash = '-';
9578 if (first > last)
9580 warning (0, "%s-%s is an empty range", str, dash + 1);
9581 return;
9584 for (i = first; i <= last; ++i)
9585 fixed_regs[i] = call_used_regs[i] = 1;
9587 if (!comma)
9588 break;
9590 *comma = ',';
9591 str = comma + 1;
9595 /* Insert any deferred function attributes from earlier pragmas. */
9596 static void
9597 sh_insert_attributes (tree node, tree *attributes)
9599 tree attrs;
9601 if (TREE_CODE (node) != FUNCTION_DECL)
9602 return;
9604 /* We are only interested in fields. */
9605 if (!DECL_P (node))
9606 return;
9608 /* Append the attributes to the deferred attributes. */
9609 *sh_deferred_function_attributes_tail = *attributes;
9610 attrs = sh_deferred_function_attributes;
9611 if (!attrs)
9612 return;
9614 /* Some attributes imply or require the interrupt attribute. */
9615 if (!lookup_attribute ("interrupt_handler", attrs)
9616 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9618 /* If we have a trapa_handler, but no interrupt_handler attribute,
9619 insert an interrupt_handler attribute. */
9620 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9621 /* We can't use sh_pr_interrupt here because that's not in the
9622 java frontend. */
9623 attrs
9624 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9625 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9626 if the interrupt attribute is missing, we ignore the attribute
9627 and warn. */
9628 else if (lookup_attribute ("sp_switch", attrs)
9629 || lookup_attribute ("trap_exit", attrs)
9630 || lookup_attribute ("nosave_low_regs", attrs)
9631 || lookup_attribute ("resbank", attrs))
9633 tree *tail;
9635 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9637 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9638 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9639 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9640 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9641 warning (OPT_Wattributes,
9642 "%qE attribute only applies to interrupt functions",
9643 TREE_PURPOSE (attrs));
9644 else
9646 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9647 NULL_TREE);
9648 tail = &TREE_CHAIN (*tail);
9651 attrs = *attributes;
9655 /* Install the processed list. */
9656 *attributes = attrs;
9658 /* Clear deferred attributes. */
9659 sh_deferred_function_attributes = NULL_TREE;
9660 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9662 return;
9665 /*------------------------------------------------------------------------------
9666 Target specific attributes
9667 Supported attributes are:
9669 * interrupt_handler
9670 Specifies this function is an interrupt handler.
9672 * trapa_handler
9673 Like interrupt_handler, but don't save all registers.
9675 * sp_switch
9676 Specifies an alternate stack for an interrupt handler to run on.
9678 * trap_exit
9679 Use a trapa to exit an interrupt function instead of rte.
9681 * nosave_low_regs
9682 Don't save r0..r7 in an interrupt handler function.
9683 This is useful on SH3* and SH4*, which have a separate set of low
9684 regs for user and privileged modes.
9685 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9686 those that run with interrupts disabled and thus can't be
9687 interrupted thenselves).
9689 * renesas
9690 Use Renesas calling/layout conventions (functions and structures).
9692 * resbank
9693 In case of an interrupt handler function, use a register bank to
9694 save registers R0-R14, MACH, MACL, GBR and PR.
9695 This is available only on SH2A targets.
9697 * function_vector
9698 Declares a function to be called using the TBR relative addressing
9699 mode. Takes an argument that specifies the slot number in the table
9700 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9703 /* Handle a 'resbank' attribute. */
9704 static tree
9705 sh_handle_resbank_handler_attribute (tree * node, tree name,
9706 tree args ATTRIBUTE_UNUSED,
9707 int flags ATTRIBUTE_UNUSED,
9708 bool * no_add_attrs)
9710 if (!TARGET_SH2A)
9712 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9713 name);
9714 *no_add_attrs = true;
9716 if (TREE_CODE (*node) != FUNCTION_DECL)
9718 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9719 name);
9720 *no_add_attrs = true;
9723 return NULL_TREE;
9726 /* Handle an "interrupt_handler" attribute; arguments as in
9727 struct attribute_spec.handler. */
9728 static tree
9729 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9730 tree args ATTRIBUTE_UNUSED,
9731 int flags ATTRIBUTE_UNUSED,
9732 bool *no_add_attrs)
9734 if (TREE_CODE (*node) != FUNCTION_DECL)
9736 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9737 name);
9738 *no_add_attrs = true;
9740 else if (TARGET_SHCOMPACT)
9742 error ("attribute interrupt_handler is not compatible with -m5-compact");
9743 *no_add_attrs = true;
9746 return NULL_TREE;
9749 /* Handle an 'function_vector' attribute; arguments as in
9750 struct attribute_spec.handler. */
9751 static tree
9752 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9753 tree args ATTRIBUTE_UNUSED,
9754 int flags ATTRIBUTE_UNUSED,
9755 bool * no_add_attrs)
9757 if (!TARGET_SH2A)
9759 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9760 name);
9761 *no_add_attrs = true;
9763 else if (TREE_CODE (*node) != FUNCTION_DECL)
9765 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9766 name);
9767 *no_add_attrs = true;
9769 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9771 /* The argument must be a constant integer. */
9772 warning (OPT_Wattributes,
9773 "%qE attribute argument not an integer constant",
9774 name);
9775 *no_add_attrs = true;
9777 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9779 /* The argument value must be between 0 to 255. */
9780 warning (OPT_Wattributes,
9781 "%qE attribute argument should be between 0 to 255",
9782 name);
9783 *no_add_attrs = true;
9785 return NULL_TREE;
9788 /* Returns true if current function has been assigned the attribute
9789 'function_vector'. */
9790 bool
9791 sh2a_is_function_vector_call (rtx x)
9793 if (GET_CODE (x) == SYMBOL_REF
9794 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9796 tree tr = SYMBOL_REF_DECL (x);
9798 if (sh2a_function_vector_p (tr))
9799 return true;
9802 return false;
9805 /* Returns the function vector number, if the attribute
9806 'function_vector' is assigned, otherwise returns zero. */
9808 sh2a_get_function_vector_number (rtx x)
9810 int num;
9811 tree list, t;
9813 if ((GET_CODE (x) == SYMBOL_REF)
9814 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9816 t = SYMBOL_REF_DECL (x);
9818 if (TREE_CODE (t) != FUNCTION_DECL)
9819 return 0;
9821 list = SH_ATTRIBUTES (t);
9822 while (list)
9824 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9826 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9827 return num;
9830 list = TREE_CHAIN (list);
9833 return 0;
9835 else
9836 return 0;
9839 /* Handle an "sp_switch" attribute; arguments as in
9840 struct attribute_spec.handler. */
9841 static tree
9842 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9843 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9845 if (TREE_CODE (*node) != FUNCTION_DECL)
9847 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9848 name);
9849 *no_add_attrs = true;
9851 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9853 /* The argument must be a constant string. */
9854 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9855 name);
9856 *no_add_attrs = true;
9859 return NULL_TREE;
9862 /* Handle an "trap_exit" attribute; arguments as in
9863 struct attribute_spec.handler. */
9864 static tree
9865 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9866 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9868 if (TREE_CODE (*node) != FUNCTION_DECL)
9870 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9871 name);
9872 *no_add_attrs = true;
9874 /* The argument specifies a trap number to be used in a trapa instruction
9875 at function exit (instead of an rte instruction). */
9876 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9878 /* The argument must be a constant integer. */
9879 warning (OPT_Wattributes, "%qE attribute argument not an "
9880 "integer constant", name);
9881 *no_add_attrs = true;
9884 return NULL_TREE;
9887 static tree
9888 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9889 tree name ATTRIBUTE_UNUSED,
9890 tree args ATTRIBUTE_UNUSED,
9891 int flags ATTRIBUTE_UNUSED,
9892 bool *no_add_attrs ATTRIBUTE_UNUSED)
9894 return NULL_TREE;
9897 /* True if __attribute__((renesas)) or -mrenesas. */
9898 bool
9899 sh_attr_renesas_p (const_tree td)
9901 if (TARGET_HITACHI)
9902 return true;
9903 if (td == NULL_TREE)
9904 return false;
9905 if (DECL_P (td))
9906 td = TREE_TYPE (td);
9907 if (td == error_mark_node)
9908 return false;
9909 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9910 != NULL_TREE);
9913 /* True if __attribute__((renesas)) or -mrenesas, for the current
9914 function. */
9915 bool
9916 sh_cfun_attr_renesas_p (void)
9918 return sh_attr_renesas_p (current_function_decl);
9921 /* Returns true if the current function has the "interrupt_handler"
9922 attribute set. */
9923 bool
9924 sh_cfun_interrupt_handler_p (void)
9926 return (lookup_attribute ("interrupt_handler",
9927 DECL_ATTRIBUTES (current_function_decl))
9928 != NULL_TREE);
9931 /* Returns true if FUNC has been assigned the attribute
9932 "function_vector". */
9933 bool
9934 sh2a_function_vector_p (tree func)
9936 tree list;
9937 if (TREE_CODE (func) != FUNCTION_DECL)
9938 return false;
9940 list = SH_ATTRIBUTES (func);
9941 while (list)
9943 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9944 return true;
9946 list = TREE_CHAIN (list);
9948 return false;
9951 /* Returns true if given tree has the "resbank" attribute set. */
9952 bool
9953 sh_cfun_resbank_handler_p (void)
9955 return ((lookup_attribute ("resbank",
9956 DECL_ATTRIBUTES (current_function_decl))
9957 != NULL_TREE)
9958 && (lookup_attribute ("interrupt_handler",
9959 DECL_ATTRIBUTES (current_function_decl))
9960 != NULL_TREE) && TARGET_SH2A);
9963 /* Returns true if the current function has a "trap_exit" attribute set. */
9964 bool
9965 sh_cfun_trap_exit_p (void)
9967 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9968 != NULL_TREE;
9971 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9972 static const char *
9973 sh_check_pch_target_flags (int old_flags)
9975 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9976 | MASK_SH_E | MASK_HARD_SH4
9977 | MASK_FPU_SINGLE | MASK_SH4))
9978 return _("created and used with different architectures / ABIs");
9979 if ((old_flags ^ target_flags) & MASK_HITACHI)
9980 return _("created and used with different ABIs");
9981 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9982 return _("created and used with different endianness");
9983 return NULL;
9986 /* Predicates used by the templates. */
9988 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9989 Used only in general_movsrc_operand. */
9990 bool
9991 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9993 switch (REGNO (op))
9995 case PR_REG:
9996 case MACL_REG:
9997 case MACH_REG:
9998 return true;
10000 return false;
10003 /* Returns true if OP is a floating point value with value 0.0. */
10004 bool
10005 fp_zero_operand (rtx op)
10007 REAL_VALUE_TYPE r;
10009 if (GET_MODE (op) != SFmode)
10010 return false;
10012 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10013 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
10016 /* Returns true if OP is a floating point value with value 1.0. */
10017 bool
10018 fp_one_operand (rtx op)
10020 REAL_VALUE_TYPE r;
10022 if (GET_MODE (op) != SFmode)
10023 return false;
10025 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10026 return REAL_VALUES_EQUAL (r, dconst1);
10029 /* Return the TLS type for TLS symbols. */
10030 enum tls_model
10031 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
10033 if (GET_CODE (op) != SYMBOL_REF)
10034 return TLS_MODEL_NONE;
10035 return SYMBOL_REF_TLS_MODEL (op);
10038 /* Return the destination address of a branch. */
10039 static int
10040 branch_dest (rtx branch)
10042 rtx dest = SET_SRC (PATTERN (branch));
10043 int dest_uid;
10045 if (GET_CODE (dest) == IF_THEN_ELSE)
10046 dest = XEXP (dest, 1);
10047 dest = XEXP (dest, 0);
10048 dest_uid = INSN_UID (dest);
10049 return INSN_ADDRESSES (dest_uid);
10052 /* Return nonzero if REG is not used after INSN.
10053 We assume REG is a reload reg, and therefore does
10054 not live past labels. It may live past calls or jumps though. */
10055 bool
10056 reg_unused_after (rtx reg, rtx_insn *insn)
10058 enum rtx_code code;
10059 rtx set;
10061 /* If the reg is set by this instruction, then it is safe for our
10062 case. Disregard the case where this is a store to memory, since
10063 we are checking a register used in the store address. */
10064 set = single_set (insn);
10065 if (set && !MEM_P (SET_DEST (set))
10066 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10067 return true;
10069 while ((insn = NEXT_INSN (insn)))
10071 rtx set;
10072 if (!INSN_P (insn))
10073 continue;
10075 code = GET_CODE (insn);
10077 #if 0
10078 /* If this is a label that existed before reload, then the register
10079 is dead here. However, if this is a label added by reorg, then
10080 the register may still be live here. We can't tell the difference,
10081 so we just ignore labels completely. */
10082 if (code == CODE_LABEL)
10083 return 1;
10084 /* else */
10085 #endif
10087 if (code == JUMP_INSN)
10088 return false;
10090 /* If this is a sequence, we must handle them all at once.
10091 We could have for instance a call that sets the target register,
10092 and an insn in a delay slot that uses the register. In this case,
10093 we must return 0. */
10094 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
10096 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
10097 int i;
10098 int retval = 0;
10100 for (i = 0; i < seq->len (); i++)
10102 rtx_insn *this_insn = seq->insn (i);
10103 rtx set = single_set (this_insn);
10105 if (CALL_P (this_insn))
10106 code = CALL_INSN;
10107 else if (JUMP_P (this_insn))
10109 if (INSN_ANNULLED_BRANCH_P (this_insn))
10110 return false;
10111 code = JUMP_INSN;
10114 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10115 return false;
10116 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10118 if (!MEM_P (SET_DEST (set)))
10119 retval = true;
10120 else
10121 return false;
10123 if (set == NULL_RTX
10124 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
10125 return false;
10127 if (retval == 1)
10128 return true;
10129 else if (code == JUMP_INSN)
10130 return false;
10133 set = single_set (insn);
10134 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10135 return false;
10136 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10137 return !MEM_P (SET_DEST (set));
10138 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
10139 return false;
10141 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10142 return true;
10144 return true;
10148 static GTY(()) rtx t_reg_rtx;
10150 get_t_reg_rtx (void)
10152 if (! t_reg_rtx)
10153 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10154 return t_reg_rtx;
10157 static GTY(()) tree fpscr_values;
10159 static void
10160 emit_fpu_switch (rtx scratch, int index)
10162 rtx src;
10164 if (fpscr_values == NULL)
10166 tree t;
10168 t = build_index_type (integer_one_node);
10169 t = build_array_type (integer_type_node, t);
10170 t = build_decl (BUILTINS_LOCATION,
10171 VAR_DECL, get_identifier ("__fpscr_values"), t);
10172 DECL_ARTIFICIAL (t) = 1;
10173 DECL_IGNORED_P (t) = 1;
10174 DECL_EXTERNAL (t) = 1;
10175 TREE_STATIC (t) = 1;
10176 TREE_PUBLIC (t) = 1;
10177 TREE_USED (t) = 1;
10179 fpscr_values = t;
10182 src = DECL_RTL (fpscr_values);
10183 if (!can_create_pseudo_p ())
10185 emit_move_insn (scratch, XEXP (src, 0));
10186 if (index != 0)
10187 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10188 src = adjust_automodify_address (src, SImode, scratch, index * 4);
10190 else
10191 src = adjust_address (src, SImode, index * 4);
10193 emit_insn (gen_lds_fpscr (src));
10196 static rtx get_free_reg (HARD_REG_SET);
10198 /* This function returns a register to use to load the address to load
10199 the fpscr from. Currently it always returns r1 or r7, but when we are
10200 able to use pseudo registers after combine, or have a better mechanism
10201 for choosing a register, it should be done here. */
10202 /* REGS_LIVE is the liveness information for the point for which we
10203 need this allocation. In some bare-bones exit blocks, r1 is live at the
10204 start. We can even have all of r0..r3 being live:
10205 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10206 INSN before which new insns are placed with will clobber the register
10207 we return. If a basic block consists only of setting the return value
10208 register to a pseudo and using that register, the return value is not
10209 live before or after this block, yet we we'll insert our insns right in
10210 the middle. */
10211 static rtx
10212 get_free_reg (HARD_REG_SET regs_live)
10214 if (! TEST_HARD_REG_BIT (regs_live, 1))
10215 return gen_rtx_REG (Pmode, 1);
10217 /* Hard reg 1 is live; since this is a small register classes target,
10218 there shouldn't be anything but a jump before the function end. */
10219 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10220 return gen_rtx_REG (Pmode, 7);
10223 /* This function will set the fpscr from memory.
10224 MODE is the mode we are setting it to. */
10225 void
10226 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10228 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10229 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10230 rtx addr_reg;
10232 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10233 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10236 /* Is the given character a logical line separator for the assembler? */
10237 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10238 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10239 #endif
10241 static bool
10242 sequence_insn_p (rtx_insn *insn)
10244 rtx_insn *prev, *next;
10246 prev = PREV_INSN (insn);
10247 if (prev == NULL)
10248 return false;
10250 next = NEXT_INSN (prev);
10251 if (next == NULL)
10252 return false;
10254 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10258 sh_insn_length_adjustment (rtx_insn *insn)
10260 /* Instructions with unfilled delay slots take up an extra two bytes for
10261 the nop in the delay slot. */
10262 if (((NONJUMP_INSN_P (insn)
10263 && GET_CODE (PATTERN (insn)) != USE
10264 && GET_CODE (PATTERN (insn)) != CLOBBER)
10265 || CALL_P (insn) || JUMP_P (insn))
10266 && ! sequence_insn_p (insn)
10267 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10268 return 2;
10270 /* Increase the insn length of a cbranch without a delay slot insn to
10271 force a delay slot which will be stuffed with a nop. */
10272 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
10273 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
10274 && ! sequence_insn_p (insn))
10275 return 2;
10277 /* sh-dsp parallel processing insn take four bytes instead of two. */
10279 if (NONJUMP_INSN_P (insn))
10281 int sum = 0;
10282 rtx body = PATTERN (insn);
10283 const char *templ;
10284 char c;
10285 bool maybe_label = true;
10287 if (GET_CODE (body) == ASM_INPUT)
10288 templ = XSTR (body, 0);
10289 else if (asm_noperands (body) >= 0)
10290 templ
10291 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10292 else
10293 return 0;
10296 int ppi_adjust = 0;
10299 c = *templ++;
10300 while (c == ' ' || c == '\t');
10301 /* all sh-dsp parallel-processing insns start with p.
10302 The only non-ppi sh insn starting with p is pref.
10303 The only ppi starting with pr is prnd. */
10304 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10305 ppi_adjust = 2;
10306 /* The repeat pseudo-insn expands two three insns, a total of
10307 six bytes in size. */
10308 else if ((c == 'r' || c == 'R')
10309 && ! strncasecmp ("epeat", templ, 5))
10310 ppi_adjust = 4;
10311 while (c && c != '\n'
10312 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10314 /* If this is a label, it is obviously not a ppi insn. */
10315 if (c == ':' && maybe_label)
10317 ppi_adjust = 0;
10318 break;
10320 else if (c == '\'' || c == '"')
10321 maybe_label = false;
10322 c = *templ++;
10324 sum += ppi_adjust;
10325 maybe_label = c != ':';
10327 while (c);
10328 return sum;
10330 return 0;
10333 /* Return TRUE for a valid displacement for the REG+disp addressing
10334 with MODE. */
10335 bool
10336 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
10337 bool allow_zero)
10339 if (! CONST_INT_P (op))
10340 return false;
10342 if (TARGET_SHMEDIA)
10344 int size;
10346 /* Check if this is the address of an unaligned load / store. */
10347 if (mode == VOIDmode)
10348 return satisfies_constraint_I06 (op);
10350 size = GET_MODE_SIZE (mode);
10351 return (!(INTVAL (op) & (size - 1))
10352 && INTVAL (op) >= -512 * size
10353 && INTVAL (op) < 512 * size);
10355 else
10357 const HOST_WIDE_INT offset = INTVAL (op);
10358 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10359 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10361 /* If the mode does not support any displacement always return false.
10362 Even though an index of '0' is actually always valid, it will cause
10363 troubles when e.g. a DFmode move is split into two SFmode moves,
10364 where one SFmode move will have index '0' and the other move will
10365 have index '4'. */
10366 if (!allow_zero && max_disp < 1)
10367 return false;
10369 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10373 /* Recognize an RTL expression that is a valid memory address for
10374 an instruction.
10375 The MODE argument is the machine mode for the MEM expression
10376 that wants to use this address.
10377 Allow REG
10378 REG+disp
10379 REG+r0
10380 REG++
10381 --REG
10383 GBR+disp */
10384 static bool
10385 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10387 if (! ALLOW_INDEXED_ADDRESS
10388 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10389 return false;
10391 if (REG_P (x) && REGNO (x) == GBR_REG)
10392 return true;
10394 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10395 return true;
10396 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10397 && ! TARGET_SHMEDIA
10398 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10399 return true;
10400 else if (GET_CODE (x) == PLUS)
10402 rtx xop0 = XEXP (x, 0);
10403 rtx xop1 = XEXP (x, 1);
10405 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10406 return gbr_displacement (xop1, mode);
10408 if (GET_MODE_SIZE (mode) <= 8
10409 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10410 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10411 return true;
10413 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10414 || ((xop0 == stack_pointer_rtx
10415 || xop0 == hard_frame_pointer_rtx)
10416 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10417 || ((xop1 == stack_pointer_rtx
10418 || xop1 == hard_frame_pointer_rtx)
10419 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10420 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10421 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10422 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10423 && TARGET_FMOVD && mode == DFmode)))
10425 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10426 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10427 return true;
10428 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10429 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10430 return true;
10434 return false;
10437 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10438 isn't protected by a PIC unspec. */
10439 bool
10440 nonpic_symbol_mentioned_p (rtx x)
10442 const char *fmt;
10443 int i;
10445 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10446 || GET_CODE (x) == PC)
10447 return true;
10449 /* We don't want to look into the possible MEM location of a
10450 CONST_DOUBLE, since we're not going to use it, in general. */
10451 if (GET_CODE (x) == CONST_DOUBLE)
10452 return false;
10454 if (GET_CODE (x) == UNSPEC
10455 && (XINT (x, 1) == UNSPEC_PIC
10456 || XINT (x, 1) == UNSPEC_GOT
10457 || XINT (x, 1) == UNSPEC_GOTOFF
10458 || XINT (x, 1) == UNSPEC_GOTPLT
10459 || XINT (x, 1) == UNSPEC_GOTTPOFF
10460 || XINT (x, 1) == UNSPEC_DTPOFF
10461 || XINT (x, 1) == UNSPEC_TPOFF
10462 || XINT (x, 1) == UNSPEC_PLT
10463 || XINT (x, 1) == UNSPEC_SYMOFF
10464 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10465 return false;
10467 fmt = GET_RTX_FORMAT (GET_CODE (x));
10468 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10470 if (fmt[i] == 'E')
10472 int j;
10473 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10474 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10475 return true;
10477 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10478 return true;
10481 return false;
10484 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10485 @GOTOFF in `reg'. */
10487 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
10488 rtx reg)
10490 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10491 return orig;
10493 if (GET_CODE (orig) == LABEL_REF
10494 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10496 if (reg == NULL_RTX)
10497 reg = gen_reg_rtx (Pmode);
10499 emit_insn (gen_symGOTOFF2reg (reg, orig));
10500 return reg;
10502 else if (GET_CODE (orig) == SYMBOL_REF)
10504 if (reg == NULL_RTX)
10505 reg = gen_reg_rtx (Pmode);
10507 emit_insn (gen_symGOT2reg (reg, orig));
10508 return reg;
10510 return orig;
10513 /* Given a (logical) mode size and an offset in bytes, try to find a the
10514 appropriate displacement value for a mov insn. On SH the displacements
10515 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10516 15 bytes in QImode. To compensate this we create a new base address by
10517 adding an adjustment value to it.
10519 If the originally requested offset is greater than 127 we prefer using
10520 values 124..127 over 128..131 to increase opportunities to use the
10521 add #imm, Rn insn.
10523 In some cases it is possible that a requested offset might seem unaligned
10524 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10525 This is compensated by adjusting the base address so that the effective
10526 address of the displacement move insn will be aligned.
10528 This is not the best possible way of rebasing the base address, as it
10529 does not look at other present displacement addressings around it.
10530 In some cases this can create more base address adjustments than would
10531 actually be necessary. */
10532 struct disp_adjust
10534 rtx offset_adjust;
10535 rtx mov_disp;
10538 static struct disp_adjust
10539 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
10541 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10543 /* Do not try to use SH2A's large displacements here, because this would
10544 effectively disable the small displacement insns. */
10545 const int mode_sz = GET_MODE_SIZE (mode);
10546 const int mov_insn_sz = mov_insn_size (mode, false);
10547 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10548 const int max_disp_next = max_disp + mov_insn_sz;
10549 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10550 HOST_WIDE_INT offset_adjust;
10552 /* In some cases this actually does happen and we must check for it. */
10553 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10554 return res;
10556 /* Keeps the previous behavior for QImode displacement addressing.
10557 This just decides how the offset is re-based. Removing this special
10558 case will result in slightly bigger code on average, but it's not that
10559 bad actually. */
10560 if (mov_insn_sz == 1)
10561 align_modifier = 0;
10563 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10565 if (mode_sz + offset - offset_adjust <= max_disp_next)
10567 res.offset_adjust = GEN_INT (offset_adjust);
10568 res.mov_disp = GEN_INT (offset - offset_adjust);
10571 return res;
10574 /* Try to modify an illegitimate address and make it legitimate.
10575 If we find one, return the new, valid address.
10576 Otherwise, return the original address. */
10577 static rtx
10578 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
10580 if (flag_pic)
10581 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10583 if (TARGET_SHMEDIA)
10584 return x;
10586 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10587 || (TARGET_SH2E && mode == SFmode))
10588 return x;
10590 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10591 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10593 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10594 INTVAL (XEXP (x, 1)));
10596 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10598 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10599 adj.offset_adjust, NULL_RTX, 0,
10600 OPTAB_LIB_WIDEN);
10601 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10604 return x;
10607 /* Attempt to replace *p, which is an address that needs reloading, with
10608 a valid memory address for an operand of mode MODE.
10609 Like for sh_legitimize_address, for the SH we try to get a normal form
10610 of the address. That will allow inheritance of the address reloads. */
10611 bool
10612 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10613 int itype)
10615 enum reload_type type = (enum reload_type) itype;
10616 const int mode_sz = GET_MODE_SIZE (mode);
10618 if (sh_lra_p ())
10619 return false;
10621 if (! ALLOW_INDEXED_ADDRESS
10622 && GET_CODE (*p) == PLUS
10623 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10625 *p = copy_rtx (*p);
10626 push_reload (*p, NULL_RTX, p, NULL,
10627 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10628 return true;
10631 if (! ALLOW_INDEXED_ADDRESS
10632 && GET_CODE (*p) == PLUS
10633 && GET_CODE (XEXP (*p, 0)) == PLUS)
10635 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10636 XEXP (XEXP (*p, 0), 1));
10637 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10638 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10639 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10640 return true;
10643 if (TARGET_SHMEDIA)
10644 return false;
10646 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10647 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10648 && (ALLOW_INDEXED_ADDRESS
10649 || XEXP (*p, 0) == stack_pointer_rtx
10650 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10652 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10653 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10655 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10657 push_reload (*p, NULL_RTX, p, NULL,
10658 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10659 return true;
10662 if (TARGET_SH2E && mode == SFmode)
10664 *p = copy_rtx (*p);
10665 push_reload (*p, NULL_RTX, p, NULL,
10666 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10667 return true;
10670 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10671 moves because then reload has a problem figuring the constraint
10672 that the move insn target/source reg must be R0.
10673 Or maybe some handling is wrong in sh_secondary_reload for this
10674 to work properly? */
10675 if ((mode_sz == 4 || mode_sz == 8)
10676 && ! (TARGET_SH4 && mode == DFmode)
10677 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10679 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10680 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10681 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10682 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10683 return true;
10687 /* We must re-recognize what we created before. */
10688 if (GET_CODE (*p) == PLUS
10689 && (mode_sz == 4 || mode_sz == 8)
10690 && GET_CODE (XEXP (*p, 0)) == PLUS
10691 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10692 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10693 && CONST_INT_P (XEXP (*p, 1))
10694 && ! (TARGET_SH2E && mode == SFmode))
10696 /* Because this address is so complex, we know it must have
10697 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10698 it is already unshared, and needs no further unsharing. */
10699 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10700 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10701 return true;
10704 return false;
10707 /* In the name of slightly smaller debug output, and to cater to
10708 general assembler lossage, recognize various UNSPEC sequences
10709 and turn them back into a direct symbol reference. */
10710 static rtx
10711 sh_delegitimize_address (rtx orig_x)
10713 rtx x, y;
10715 orig_x = delegitimize_mem_from_attrs (orig_x);
10717 x = orig_x;
10718 if (MEM_P (x))
10719 x = XEXP (x, 0);
10720 if (GET_CODE (x) == CONST)
10722 y = XEXP (x, 0);
10723 if (GET_CODE (y) == UNSPEC)
10725 if (XINT (y, 1) == UNSPEC_GOT
10726 || XINT (y, 1) == UNSPEC_GOTOFF
10727 || XINT (y, 1) == UNSPEC_SYMOFF)
10728 return XVECEXP (y, 0, 0);
10729 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10731 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10733 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10735 if (GET_CODE (symplt) == UNSPEC
10736 && XINT (symplt, 1) == UNSPEC_PLT)
10737 return XVECEXP (symplt, 0, 0);
10740 else if (TARGET_SHMEDIA
10741 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10742 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10744 rtx offset = XVECEXP (y, 0, 1);
10746 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10747 if (MEM_P (orig_x))
10748 x = replace_equiv_address_nv (orig_x, x);
10749 return x;
10754 return orig_x;
10757 /* Mark the use of a constant in the literal table. If the constant
10758 has multiple labels, make it unique. */
10759 static rtx
10760 mark_constant_pool_use (rtx x)
10762 rtx_insn *insn, *lab;
10763 rtx pattern;
10765 if (x == NULL_RTX)
10766 return x;
10768 switch (GET_CODE (x))
10770 case LABEL_REF:
10771 x = XEXP (x, 0);
10772 case CODE_LABEL:
10773 break;
10774 default:
10775 return x;
10778 /* Get the first label in the list of labels for the same constant
10779 and delete another labels in the list. */
10780 lab = as_a <rtx_insn *> (x);
10781 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10783 if (!LABEL_P (insn)
10784 || LABEL_REFS (insn) != NEXT_INSN (insn))
10785 break;
10786 lab = insn;
10789 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10790 as_a<rtx_insn *> (insn)->set_deleted ();
10792 /* Mark constants in a window. */
10793 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10795 if (!NONJUMP_INSN_P (insn))
10796 continue;
10798 pattern = PATTERN (insn);
10799 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10800 continue;
10802 switch (XINT (pattern, 1))
10804 case UNSPECV_CONST2:
10805 case UNSPECV_CONST4:
10806 case UNSPECV_CONST8:
10807 XVECEXP (pattern, 0, 1) = const1_rtx;
10808 break;
10809 case UNSPECV_WINDOW_END:
10810 if (XVECEXP (pattern, 0, 0) == x)
10811 return lab;
10812 break;
10813 case UNSPECV_CONST_END:
10814 return lab;
10815 default:
10816 break;
10820 return lab;
10823 /* Return true if it's possible to redirect BRANCH1 to the destination
10824 of an unconditional jump BRANCH2. We only want to do this if the
10825 resulting branch will have a short displacement. */
10826 static bool
10827 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
10829 /* Don't follow if BRANCH2 is possible to be a jump crossing between
10830 hot and cold partitions. */
10831 if (TARGET_SH1
10832 && flag_reorder_blocks_and_partition
10833 && simplejump_p (branch2)
10834 && CROSSING_JUMP_P (branch2))
10835 return false;
10837 if (flag_expensive_optimizations && simplejump_p (branch2))
10839 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10840 rtx_insn *insn;
10841 int distance;
10843 for (distance = 0, insn = NEXT_INSN (branch1);
10844 insn && distance < 256;
10845 insn = PREV_INSN (insn))
10847 if (insn == dest)
10848 return true;
10849 else
10850 distance += get_attr_length (insn);
10852 for (distance = 0, insn = NEXT_INSN (branch1);
10853 insn && distance < 256;
10854 insn = NEXT_INSN (insn))
10856 if (insn == dest)
10857 return true;
10858 else
10859 distance += get_attr_length (insn);
10862 return false;
10865 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10866 bool
10867 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10868 unsigned int new_reg)
10870 /* Interrupt functions can only use registers that have already been
10871 saved by the prologue, even if they would normally be
10872 call-clobbered. */
10873 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10874 return false;
10876 return true;
10879 /* Function to update the integer COST
10880 based on the relationship between INSN that is dependent on
10881 DEP_INSN through the dependence LINK. The default is to make no
10882 adjustment to COST. This can be used for example to specify to
10883 the scheduler that an output- or anti-dependence does not incur
10884 the same cost as a data-dependence. The return value should be
10885 the new value for COST. */
10886 static int
10887 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10888 rtx_insn *dep_insn, int cost)
10890 rtx reg, use_pat;
10892 if (TARGET_SHMEDIA)
10894 /* On SHmedia, if the dependence is an anti-dependence or
10895 output-dependence, there is no cost. */
10896 if (REG_NOTE_KIND (link) != 0)
10898 /* However, dependencies between target register loads and
10899 uses of the register in a subsequent block that are separated
10900 by a conditional branch are not modelled - we have to do with
10901 the anti-dependency between the target register load and the
10902 conditional branch that ends the current block. */
10903 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10904 && GET_CODE (PATTERN (dep_insn)) == SET
10905 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10906 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10907 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10909 int orig_cost = cost;
10910 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10911 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10912 ? insn : JUMP_LABEL (insn));
10913 /* On the likely path, the branch costs 1, on the unlikely path,
10914 it costs 3. */
10915 cost--;
10917 target = next_active_insn (target);
10918 while (target && ! flow_dependent_p (target, dep_insn)
10919 && --cost > 0);
10920 /* If two branches are executed in immediate succession, with the
10921 first branch properly predicted, this causes a stall at the
10922 second branch, hence we won't need the target for the
10923 second branch for two cycles after the launch of the first
10924 branch. */
10925 if (cost > orig_cost - 2)
10926 cost = orig_cost - 2;
10928 else
10929 cost = 0;
10932 else if (get_attr_is_mac_media (insn)
10933 && get_attr_is_mac_media (dep_insn))
10934 cost = 1;
10936 else if (! reload_completed
10937 && GET_CODE (PATTERN (insn)) == SET
10938 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10939 && GET_CODE (PATTERN (dep_insn)) == SET
10940 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10941 && cost < 4)
10942 cost = 4;
10943 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10944 that is needed at the target. */
10945 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10946 && ! flow_dependent_p (insn, dep_insn))
10947 cost--;
10949 else if (REG_NOTE_KIND (link) == 0)
10951 enum attr_type type;
10952 rtx dep_set;
10954 if (recog_memoized (insn) < 0
10955 || recog_memoized (dep_insn) < 0)
10956 return cost;
10958 dep_set = single_set (dep_insn);
10960 /* The latency that we specify in the scheduling description refers
10961 to the actual output, not to an auto-increment register; for that,
10962 the latency is one. */
10963 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10965 rtx set = single_set (insn);
10967 if (set
10968 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10969 && (!MEM_P (SET_DEST (set))
10970 || !reg_mentioned_p (SET_DEST (dep_set),
10971 XEXP (SET_DEST (set), 0))))
10972 cost = 1;
10974 /* The only input for a call that is timing-critical is the
10975 function's address. */
10976 if (CALL_P (insn))
10978 rtx call = get_call_rtx_from (insn);
10979 if (call
10980 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10981 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10982 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10983 cost -= TARGET_SH4_300 ? 3 : 6;
10985 /* Likewise, the most timing critical input for an sfuncs call
10986 is the function address. However, sfuncs typically start
10987 using their arguments pretty quickly.
10988 Assume a four cycle delay for SH4 before they are needed.
10989 Cached ST40-300 calls are quicker, so assume only a one
10990 cycle delay there.
10991 ??? Maybe we should encode the delays till input registers
10992 are needed by sfuncs into the sfunc call insn. */
10993 /* All sfunc calls are parallels with at least four components.
10994 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10995 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10996 && XVECLEN (PATTERN (insn), 0) >= 4
10997 && (reg = sfunc_uses_reg (insn)))
10999 if (! reg_set_p (reg, dep_insn))
11000 cost -= TARGET_SH4_300 ? 1 : 4;
11002 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
11004 enum attr_type dep_type = get_attr_type (dep_insn);
11006 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
11007 cost--;
11008 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
11009 && (type = get_attr_type (insn)) != TYPE_CALL
11010 && type != TYPE_SFUNC)
11011 cost--;
11012 /* When the preceding instruction loads the shift amount of
11013 the following SHAD/SHLD, the latency of the load is increased
11014 by 1 cycle. */
11015 if (get_attr_type (insn) == TYPE_DYN_SHIFT
11016 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
11017 && reg_overlap_mentioned_p (SET_DEST (dep_set),
11018 XEXP (SET_SRC (single_set (insn)),
11019 1)))
11020 cost++;
11021 /* When an LS group instruction with a latency of less than
11022 3 cycles is followed by a double-precision floating-point
11023 instruction, FIPR, or FTRV, the latency of the first
11024 instruction is increased to 3 cycles. */
11025 else if (cost < 3
11026 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
11027 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
11028 cost = 3;
11029 /* The lsw register of a double-precision computation is ready one
11030 cycle earlier. */
11031 else if (reload_completed
11032 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
11033 && (use_pat = single_set (insn))
11034 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
11035 SET_SRC (use_pat)))
11036 cost -= 1;
11038 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
11039 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
11040 cost -= 1;
11042 else if (TARGET_SH4_300)
11044 /* Stores need their input register two cycles later. */
11045 if (dep_set && cost >= 1
11046 && ((type = get_attr_type (insn)) == TYPE_STORE
11047 || type == TYPE_PSTORE
11048 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
11050 rtx set = single_set (insn);
11052 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
11053 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
11055 cost -= 2;
11056 /* But don't reduce the cost below 1 if the address depends
11057 on a side effect of dep_insn. */
11058 if (cost < 1
11059 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
11060 cost = 1;
11065 /* An anti-dependence penalty of two applies if the first insn is a double
11066 precision fadd / fsub / fmul. */
11067 else if (!TARGET_SH4_300
11068 && REG_NOTE_KIND (link) == REG_DEP_ANTI
11069 && recog_memoized (dep_insn) >= 0
11070 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
11071 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
11072 /* A lot of alleged anti-flow dependences are fake,
11073 so check this one is real. */
11074 && flow_dependent_p (dep_insn, insn))
11075 cost = 2;
11077 return cost;
11080 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
11081 if DEP_INSN is anti-flow dependent on INSN. */
11082 static bool
11083 flow_dependent_p (rtx insn, rtx dep_insn)
11085 rtx tmp = PATTERN (insn);
11087 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
11088 return tmp == NULL_RTX;
11091 /* A helper function for flow_dependent_p called through note_stores. */
11092 static void
11093 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
11095 rtx * pinsn = (rtx *) data;
11097 if (*pinsn && reg_referenced_p (x, *pinsn))
11098 *pinsn = NULL_RTX;
11101 /* For use by sh_allocate_initial_value. Note that sh.md contains some
11102 'special function' patterns (type sfunc) that clobber pr, but that
11103 do not look like function calls to leaf_function_p. Hence we must
11104 do this extra check. */
11105 static int
11106 sh_pr_n_sets (void)
11108 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11111 /* Return where to allocate pseudo for a given hard register initial
11112 value. */
11113 static rtx
11114 sh_allocate_initial_value (rtx hard_reg)
11116 rtx x;
11118 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11120 if (crtl->is_leaf
11121 && ! sh_pr_n_sets ()
11122 && ! (TARGET_SHCOMPACT
11123 && ((crtl->args.info.call_cookie
11124 & ~ CALL_COOKIE_RET_TRAMP (1))
11125 || crtl->saves_all_registers)))
11126 x = hard_reg;
11127 else
11128 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11130 else
11131 x = NULL_RTX;
11133 return x;
11136 /* This function returns "2" to indicate dual issue for the SH4
11137 processor. To be used by the DFA pipeline description. */
11138 static int
11139 sh_issue_rate (void)
11141 if (TARGET_SUPERSCALAR)
11142 return 2;
11143 else
11144 return 1;
11147 /* Functions for ready queue reordering for sched1. */
11149 /* Get weight for mode for a set x. */
11150 static short
11151 find_set_regmode_weight (rtx x, machine_mode mode)
11153 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11154 return 1;
11155 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11157 if (REG_P (SET_DEST (x)))
11159 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11160 return 1;
11161 else
11162 return 0;
11164 return 1;
11166 return 0;
11169 /* Get regmode weight for insn. */
11170 static short
11171 find_insn_regmode_weight (rtx insn, machine_mode mode)
11173 short reg_weight = 0;
11174 rtx x;
11176 /* Increment weight for each register born here. */
11177 x = PATTERN (insn);
11178 reg_weight += find_set_regmode_weight (x, mode);
11179 if (GET_CODE (x) == PARALLEL)
11181 int j;
11182 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11184 x = XVECEXP (PATTERN (insn), 0, j);
11185 reg_weight += find_set_regmode_weight (x, mode);
11188 /* Decrement weight for each register that dies here. */
11189 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11191 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11193 rtx note = XEXP (x, 0);
11194 if (REG_P (note) && GET_MODE (note) == mode)
11195 reg_weight--;
11198 return reg_weight;
11201 /* Calculate regmode weights for all insns of a basic block. */
11202 static void
11203 find_regmode_weight (basic_block b, machine_mode mode)
11205 rtx_insn *insn, *next_tail, *head, *tail;
11207 get_ebb_head_tail (b, b, &head, &tail);
11208 next_tail = NEXT_INSN (tail);
11210 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11212 /* Handle register life information. */
11213 if (!INSN_P (insn))
11214 continue;
11216 if (mode == SFmode)
11217 INSN_REGMODE_WEIGHT (insn, mode) =
11218 find_insn_regmode_weight (insn, mode)
11219 + 2 * find_insn_regmode_weight (insn, DFmode);
11220 else if (mode == SImode)
11221 INSN_REGMODE_WEIGHT (insn, mode) =
11222 find_insn_regmode_weight (insn, mode)
11223 + 2 * find_insn_regmode_weight (insn, DImode);
11227 /* Comparison function for ready queue sorting. */
11228 static int
11229 rank_for_reorder (const void *x, const void *y)
11231 rtx_insn *tmp = *(rtx_insn * const *) y;
11232 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11234 /* The insn in a schedule group should be issued the first. */
11235 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11236 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11238 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11239 minimizes instruction movement, thus minimizing sched's effect on
11240 register pressure. */
11241 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11244 /* Resort the array A in which only element at index N may be out of order. */
11245 static void
11246 swap_reorder (rtx_insn **a, int n)
11248 rtx_insn *insn = a[n - 1];
11249 int i = n - 2;
11251 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11253 a[i + 1] = a[i];
11254 i -= 1;
11256 a[i + 1] = insn;
11259 /* Sort the ready list by ascending priority. */
11260 static void
11261 ready_reorder (rtx_insn **ready, int nready)
11263 if (nready == 2)
11264 swap_reorder (ready, nready);
11265 else if (nready > 2)
11266 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11269 /* Count life regions of r0 for a block. */
11270 static int
11271 find_r0_life_regions (basic_block b)
11273 rtx_insn *end, *insn;
11274 rtx pset;
11275 rtx r0_reg;
11276 int live;
11277 int set;
11278 int death = 0;
11280 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11282 set = 1;
11283 live = 1;
11285 else
11287 set = 0;
11288 live = 0;
11291 insn = BB_HEAD (b);
11292 end = BB_END (b);
11293 r0_reg = gen_rtx_REG (SImode, R0_REG);
11294 while (1)
11296 if (INSN_P (insn))
11298 if (find_regno_note (insn, REG_DEAD, R0_REG))
11300 death++;
11301 live = 0;
11303 if (!live
11304 && (pset = single_set (insn))
11305 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11306 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11308 set++;
11309 live = 1;
11312 if (insn == end)
11313 break;
11314 insn = NEXT_INSN (insn);
11316 return set - death;
11319 /* Calculate regmode weights for all insns of all basic block. */
11320 static void
11321 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11322 int verbose ATTRIBUTE_UNUSED,
11323 int old_max_uid)
11325 basic_block b;
11327 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11328 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11329 r0_life_regions = 0;
11331 FOR_EACH_BB_REVERSE_FN (b, cfun)
11333 find_regmode_weight (b, SImode);
11334 find_regmode_weight (b, SFmode);
11335 if (!reload_completed)
11336 r0_life_regions += find_r0_life_regions (b);
11339 CURR_REGMODE_PRESSURE (SImode) = 0;
11340 CURR_REGMODE_PRESSURE (SFmode) = 0;
11343 /* Cleanup. */
11344 static void
11345 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11346 int verbose ATTRIBUTE_UNUSED)
11348 if (regmode_weight[0])
11350 free (regmode_weight[0]);
11351 regmode_weight[0] = NULL;
11353 if (regmode_weight[1])
11355 free (regmode_weight[1]);
11356 regmode_weight[1] = NULL;
11360 /* The scalar modes supported differs from the default version in TImode
11361 for 32-bit SHMEDIA. */
11362 static bool
11363 sh_scalar_mode_supported_p (machine_mode mode)
11365 if (TARGET_SHMEDIA32 && mode == TImode)
11366 return false;
11368 return default_scalar_mode_supported_p (mode);
11371 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11372 keep count of register pressures on SImode and SFmode. */
11373 static int
11374 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11375 int sched_verbose ATTRIBUTE_UNUSED,
11376 rtx_insn *insn,
11377 int can_issue_more)
11379 if (GET_CODE (PATTERN (insn)) != USE
11380 && GET_CODE (PATTERN (insn)) != CLOBBER)
11381 cached_can_issue_more = can_issue_more - 1;
11382 else
11383 cached_can_issue_more = can_issue_more;
11385 if (reload_completed)
11386 return cached_can_issue_more;
11388 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11389 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11391 return cached_can_issue_more;
11394 static void
11395 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11396 int verbose ATTRIBUTE_UNUSED,
11397 int veclen ATTRIBUTE_UNUSED)
11399 CURR_REGMODE_PRESSURE (SImode) = 0;
11400 CURR_REGMODE_PRESSURE (SFmode) = 0;
11403 /* Some magic numbers. */
11404 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11405 functions that already have high pressure on r0. */
11406 #define R0_MAX_LIFE_REGIONS 2
11407 /* Register Pressure thresholds for SImode and SFmode registers. */
11408 #define SIMODE_MAX_WEIGHT 5
11409 #define SFMODE_MAX_WEIGHT 10
11411 /* Return true if the pressure is high for MODE. */
11412 static bool
11413 high_pressure (machine_mode mode)
11415 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11416 functions that already have high pressure on r0. */
11417 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11418 return true;
11420 if (mode == SFmode)
11421 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11422 else
11423 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11426 /* Reorder ready queue if register pressure is high. */
11427 static int
11428 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11429 int sched_verbose ATTRIBUTE_UNUSED,
11430 rtx_insn **ready,
11431 int *n_readyp,
11432 int clock_var ATTRIBUTE_UNUSED)
11434 if (reload_completed)
11435 return sh_issue_rate ();
11437 if (high_pressure (SFmode) || high_pressure (SImode))
11439 ready_reorder (ready, *n_readyp);
11442 return sh_issue_rate ();
11445 /* Skip cycles if the current register pressure is high. */
11446 static int
11447 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11448 int sched_verbose ATTRIBUTE_UNUSED,
11449 rtx_insn **ready ATTRIBUTE_UNUSED,
11450 int *n_readyp ATTRIBUTE_UNUSED,
11451 int clock_var ATTRIBUTE_UNUSED)
11453 if (reload_completed)
11454 return cached_can_issue_more;
11456 if (high_pressure(SFmode) || high_pressure (SImode))
11457 skip_cycles = 1;
11459 return cached_can_issue_more;
11462 /* Skip cycles without sorting the ready queue. This will move insn from
11463 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11464 queue by sh_reorder. */
11466 /* Generally, skipping these many cycles are sufficient for all insns to move
11467 from Q -> R. */
11468 #define MAX_SKIPS 8
11470 static int
11471 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11472 int sched_verbose ATTRIBUTE_UNUSED,
11473 rtx_insn *insn ATTRIBUTE_UNUSED,
11474 int last_clock_var,
11475 int clock_var,
11476 int *sort_p)
11478 if (reload_completed)
11479 return 0;
11481 if (skip_cycles)
11483 if ((clock_var - last_clock_var) < MAX_SKIPS)
11485 *sort_p = 0;
11486 return 1;
11488 /* If this is the last cycle we are skipping, allow reordering of R. */
11489 if ((clock_var - last_clock_var) == MAX_SKIPS)
11491 *sort_p = 1;
11492 return 1;
11496 skip_cycles = 0;
11498 return 0;
11501 /* SHmedia requires registers for branches, so we can't generate new
11502 branches past reload. */
11503 static bool
11504 sh_cannot_modify_jumps_p (void)
11506 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11509 static reg_class_t
11510 sh_target_reg_class (void)
11512 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11515 static bool
11516 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11518 if (! shmedia_space_reserved_for_target_registers)
11519 return 0;
11520 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11521 return 0;
11523 HARD_REG_SET dummy;
11524 if (calc_live_regs (&dummy) >= 6 * 8)
11525 return 1;
11526 return 0;
11529 static bool
11530 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11532 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11536 On the SH1..SH4, the trampoline looks like
11537 2 0002 D202 mov.l l2,r2
11538 1 0000 D301 mov.l l1,r3
11539 3 0004 422B jmp @r2
11540 4 0006 0009 nop
11541 5 0008 00000000 l1: .long area
11542 6 000c 00000000 l2: .long function
11544 SH5 (compact) uses r1 instead of r3 for the static chain. */
11547 /* Emit RTL insns to initialize the variable parts of a trampoline.
11548 FNADDR is an RTX for the address of the function's pure code.
11549 CXT is an RTX for the static chain value for the function. */
11550 static void
11551 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11553 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11554 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11556 if (TARGET_SHMEDIA64)
11558 rtx tramp_templ;
11559 int fixed_len;
11561 rtx movi1 = GEN_INT (0xcc000010);
11562 rtx shori1 = GEN_INT (0xc8000010);
11563 rtx src, dst;
11565 /* The following trampoline works within a +- 128 KB range for cxt:
11566 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11567 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11568 gettr tr1,r1; blink tr0,r63 */
11569 /* Address rounding makes it hard to compute the exact bounds of the
11570 offset for this trampoline, but we have a rather generous offset
11571 range, so frame_offset should do fine as an upper bound. */
11572 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11574 /* ??? could optimize this trampoline initialization
11575 by writing DImode words with two insns each. */
11576 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11577 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11578 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11579 insn = gen_rtx_AND (DImode, insn, mask);
11580 /* Or in ptb/u .,tr1 pattern */
11581 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11582 insn = force_operand (insn, NULL_RTX);
11583 insn = gen_lowpart (SImode, insn);
11584 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11585 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11586 insn = gen_rtx_AND (DImode, insn, mask);
11587 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11588 insn = gen_lowpart (SImode, insn);
11589 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11590 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11591 insn = gen_rtx_AND (DImode, insn, mask);
11592 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11593 insn = gen_lowpart (SImode, insn);
11594 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11595 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11596 insn = gen_rtx_AND (DImode, insn, mask);
11597 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11598 insn = gen_lowpart (SImode, insn);
11599 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11600 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11601 insn = gen_rtx_AND (DImode, insn, mask);
11602 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11603 insn = gen_lowpart (SImode, insn);
11604 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11605 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11606 GEN_INT (0x6bf10600));
11607 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11608 GEN_INT (0x4415fc10));
11609 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11610 GEN_INT (0x4401fff0));
11611 emit_insn (gen_ic_invalidate_line (tramp));
11612 return;
11614 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11615 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11617 tramp_templ = gen_datalabel_ref (tramp_templ);
11618 dst = tramp_mem;
11619 src = gen_const_mem (BLKmode, tramp_templ);
11620 set_mem_align (dst, 256);
11621 set_mem_align (src, 64);
11622 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11624 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11625 emit_move_insn (adjust_address (tramp_mem, Pmode,
11626 fixed_len + GET_MODE_SIZE (Pmode)),
11627 cxt);
11628 emit_insn (gen_ic_invalidate_line (tramp));
11629 return;
11631 else if (TARGET_SHMEDIA)
11633 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11634 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11635 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11636 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11637 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11638 rotated 10 right, and higher 16 bit of every 32 selected. */
11639 rtx movishori
11640 = force_reg (V2HImode, (simplify_gen_subreg
11641 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11642 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11643 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11645 fnaddr = force_reg (SImode, fnaddr);
11646 cxt = force_reg (SImode, cxt);
11647 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11648 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11649 movishori));
11650 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11651 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11652 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11653 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11654 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11655 gen_rtx_SUBREG (V2HImode, cxt, 0),
11656 movishori));
11657 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11658 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11659 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11660 if (TARGET_LITTLE_ENDIAN)
11662 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11663 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11665 else
11667 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11668 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11670 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11671 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11672 emit_insn (gen_ic_invalidate_line (tramp));
11673 return;
11675 else if (TARGET_SHCOMPACT)
11677 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11678 return;
11680 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11681 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11682 SImode));
11683 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11684 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11685 SImode));
11686 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11687 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11688 if (TARGET_HARD_SH4 || TARGET_SH5)
11690 if (!TARGET_INLINE_IC_INVALIDATE
11691 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
11692 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11693 FUNCTION_ORDINARY),
11694 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11695 else
11696 emit_insn (gen_ic_invalidate_line (tramp));
11700 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11701 static rtx
11702 sh_trampoline_adjust_address (rtx tramp)
11704 if (TARGET_SHMEDIA)
11705 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11706 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11707 return tramp;
11710 /* FIXME: This is overly conservative. A SHcompact function that
11711 receives arguments ``by reference'' will have them stored in its
11712 own stack frame, so it must not pass pointers or references to
11713 these arguments to other functions by means of sibling calls. */
11714 /* If PIC, we cannot make sibling calls to global functions
11715 because the PLT requires r12 to be live. */
11716 static bool
11717 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11719 return (1
11720 && (! TARGET_SHCOMPACT
11721 || crtl->args.info.stack_regs == 0)
11722 && ! sh_cfun_interrupt_handler_p ()
11723 && (! flag_pic
11724 || (decl && ! TREE_PUBLIC (decl))
11725 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11728 /* Machine specific built-in functions. */
11730 struct builtin_description
11732 bool (* const is_enabled) (void);
11733 const enum insn_code icode;
11734 const char *const name;
11735 int signature;
11736 tree fndecl;
11739 static bool
11740 shmedia_builtin_p (void)
11742 return TARGET_SHMEDIA;
11745 /* This function can be used if there are any built-ins that are not for
11746 SHmedia. It's commented out to avoid the defined-but-unused warning. */
11747 static bool
11748 sh1_builtin_p (void)
11750 return TARGET_SH1;
11753 /* describe number and signedness of arguments; arg[0] == result
11754 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11755 /* 9: 64-bit pointer, 10: 32-bit pointer */
11756 static const char signature_args[][4] =
11758 #define SH_BLTIN_V2SI2 0
11759 { 4, 4 },
11760 #define SH_BLTIN_V4HI2 1
11761 { 4, 4 },
11762 #define SH_BLTIN_V2SI3 2
11763 { 4, 4, 4 },
11764 #define SH_BLTIN_V4HI3 3
11765 { 4, 4, 4 },
11766 #define SH_BLTIN_V8QI3 4
11767 { 4, 4, 4 },
11768 #define SH_BLTIN_MAC_HISI 5
11769 { 1, 4, 4, 1 },
11770 #define SH_BLTIN_SH_HI 6
11771 { 4, 4, 1 },
11772 #define SH_BLTIN_SH_SI 7
11773 { 4, 4, 1 },
11774 #define SH_BLTIN_V4HI2V2SI 8
11775 { 4, 4, 4 },
11776 #define SH_BLTIN_V4HI2V8QI 9
11777 { 4, 4, 4 },
11778 #define SH_BLTIN_SISF 10
11779 { 4, 2 },
11780 #define SH_BLTIN_LDUA_L 11
11781 { 2, 10 },
11782 #define SH_BLTIN_LDUA_Q 12
11783 { 1, 10 },
11784 #define SH_BLTIN_STUA_L 13
11785 { 0, 10, 2 },
11786 #define SH_BLTIN_STUA_Q 14
11787 { 0, 10, 1 },
11788 #define SH_BLTIN_LDUA_L64 15
11789 { 2, 9 },
11790 #define SH_BLTIN_LDUA_Q64 16
11791 { 1, 9 },
11792 #define SH_BLTIN_STUA_L64 17
11793 { 0, 9, 2 },
11794 #define SH_BLTIN_STUA_Q64 18
11795 { 0, 9, 1 },
11796 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11797 #define SH_BLTIN_2 19
11798 #define SH_BLTIN_SU 19
11799 { 1, 2 },
11800 #define SH_BLTIN_3 20
11801 #define SH_BLTIN_SUS 20
11802 { 2, 2, 1 },
11803 #define SH_BLTIN_PSSV 21
11804 { 0, 8, 2, 2 },
11805 #define SH_BLTIN_XXUU 22
11806 #define SH_BLTIN_UUUU 22
11807 { 1, 1, 1, 1 },
11808 #define SH_BLTIN_PV 23
11809 { 0, 8 },
11810 #define SH_BLTIN_VP 24
11811 { 8, 0 },
11812 #define SH_BLTIN_UV 25
11813 { 1, 0 },
11814 #define SH_BLTIN_VU 26
11815 { 0, 1 },
11817 /* mcmv: operands considered unsigned. */
11818 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11819 /* mperm: control value considered unsigned int. */
11820 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11821 /* mshards_q: returns signed short. */
11822 /* nsb: takes long long arg, returns unsigned char. */
11823 static struct builtin_description bdesc[] =
11825 { shmedia_builtin_p,
11826 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11827 { shmedia_builtin_p,
11828 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11829 { shmedia_builtin_p,
11830 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11831 { shmedia_builtin_p,
11832 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11833 { shmedia_builtin_p,
11834 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11835 { shmedia_builtin_p,
11836 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11837 { shmedia_builtin_p,
11838 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11839 { shmedia_builtin_p,
11840 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11841 { shmedia_builtin_p,
11842 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11843 { shmedia_builtin_p,
11844 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11845 { shmedia_builtin_p,
11846 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11847 { shmedia_builtin_p,
11848 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11849 { shmedia_builtin_p,
11850 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11851 { shmedia_builtin_p,
11852 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11853 { shmedia_builtin_p,
11854 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11855 { shmedia_builtin_p,
11856 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11857 { shmedia_builtin_p,
11858 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11859 { shmedia_builtin_p,
11860 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11861 { shmedia_builtin_p,
11862 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11863 { shmedia_builtin_p,
11864 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11865 { shmedia_builtin_p,
11866 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11867 { shmedia_builtin_p,
11868 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11869 { shmedia_builtin_p,
11870 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11871 { shmedia_builtin_p,
11872 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11873 { shmedia_builtin_p,
11874 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11875 { shmedia_builtin_p,
11876 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11877 { shmedia_builtin_p,
11878 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11879 { shmedia_builtin_p,
11880 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11881 { shmedia_builtin_p,
11882 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11883 { shmedia_builtin_p,
11884 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11885 { shmedia_builtin_p,
11886 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11887 { shmedia_builtin_p,
11888 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11889 { shmedia_builtin_p,
11890 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11891 { shmedia_builtin_p,
11892 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11893 { shmedia_builtin_p,
11894 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11895 { shmedia_builtin_p,
11896 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11897 { shmedia_builtin_p,
11898 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11899 { shmedia_builtin_p,
11900 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11901 { shmedia_builtin_p,
11902 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11903 { shmedia_builtin_p,
11904 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11905 { shmedia_builtin_p,
11906 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11907 { shmedia_builtin_p,
11908 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11909 { shmedia_builtin_p,
11910 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11911 { shmedia_builtin_p,
11912 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11913 { shmedia_builtin_p,
11914 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11915 { shmedia_builtin_p,
11916 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11917 { shmedia_builtin_p,
11918 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11919 { shmedia_builtin_p,
11920 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11921 { shmedia_builtin_p,
11922 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11923 { shmedia_builtin_p,
11924 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11925 { shmedia_builtin_p,
11926 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11927 { shmedia_builtin_p,
11928 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11929 { shmedia_builtin_p,
11930 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11931 { shmedia_builtin_p,
11932 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11933 { shmedia_builtin_p,
11934 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11935 { shmedia_builtin_p,
11936 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11937 { shmedia_builtin_p,
11938 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11939 { shmedia_builtin_p,
11940 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11941 { shmedia_builtin_p,
11942 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11943 { shmedia_builtin_p,
11944 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11945 { shmedia_builtin_p,
11946 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11947 { shmedia_builtin_p,
11948 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11949 { shmedia_builtin_p,
11950 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11951 { shmedia_builtin_p,
11952 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11953 { shmedia_builtin_p,
11954 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11955 { shmedia_builtin_p,
11956 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11957 { shmedia_builtin_p,
11958 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11959 { shmedia_builtin_p,
11960 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11961 { shmedia_builtin_p,
11962 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11963 { shmedia_builtin_p,
11964 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11965 { shmedia_builtin_p,
11966 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11967 { shmedia_builtin_p,
11968 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11969 { shmedia_builtin_p,
11970 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11971 { shmedia_builtin_p,
11972 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11973 { shmedia_builtin_p,
11974 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11975 { shmedia_builtin_p,
11976 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11977 { shmedia_builtin_p,
11978 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11979 { shmedia_builtin_p,
11980 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11981 { shmedia_builtin_p,
11982 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11983 { shmedia_builtin_p,
11984 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11985 { shmedia_builtin_p,
11986 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11987 { shmedia_builtin_p,
11988 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11989 { shmedia_builtin_p,
11990 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11992 { sh1_builtin_p,
11993 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
11994 { sh1_builtin_p,
11995 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
11998 static tree sh_builtin_get_fpscr;
11999 static tree sh_builtin_set_fpscr;
12001 static void
12002 sh_init_builtins (void)
12004 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
12005 memset (shared, 0, sizeof shared);
12007 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
12009 builtin_description* d = &bdesc[di];
12011 if (!d->is_enabled ())
12012 continue;
12014 tree type, arg_type = NULL_TREE;
12015 int signature = d->signature;
12017 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
12018 type = shared[signature];
12019 else
12021 int has_result = signature_args[signature][0] != 0;
12022 tree args[3];
12024 if ((signature_args[signature][1] & 8)
12025 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
12026 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
12027 continue;
12028 if (! TARGET_FPU_ANY
12029 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
12030 continue;
12031 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
12032 args[i] = NULL_TREE;
12033 for (int i = 3; ; i--)
12035 int arg = signature_args[signature][i];
12036 int opno = i - 1 + has_result;
12038 if (arg & 8)
12039 arg_type = ptr_type_node;
12040 else if (arg)
12041 arg_type = (*lang_hooks.types.type_for_mode)
12042 (insn_data[d->icode].operand[opno].mode, (arg & 1));
12043 else if (i)
12044 continue;
12045 else
12046 arg_type = void_type_node;
12047 if (i == 0)
12048 break;
12049 args[i-1] = arg_type;
12051 type = build_function_type_list (arg_type, args[0], args[1],
12052 args[2], NULL_TREE);
12053 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
12054 shared[signature] = type;
12056 d->fndecl =
12057 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
12058 NULL, NULL_TREE);
12059 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
12060 if (d->icode == CODE_FOR_sts_fpscr)
12061 sh_builtin_get_fpscr = d->fndecl;
12062 else if (d->icode == CODE_FOR_set_fpscr)
12063 sh_builtin_set_fpscr = d->fndecl;
12067 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
12069 static void
12070 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12072 const unsigned SH_FE_INVALID = 64;
12073 const unsigned SH_FE_DIVBYZERO = 32;
12074 const unsigned SH_FE_OVERFLOW = 16;
12075 const unsigned SH_FE_UNDERFLOW = 8;
12076 const unsigned SH_FE_INEXACT = 4;
12077 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
12078 | SH_FE_DIVBYZERO
12079 | SH_FE_OVERFLOW
12080 | SH_FE_UNDERFLOW
12081 | SH_FE_INEXACT);
12082 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
12083 tree fenv_var, mask, ld_fenv, masked_fenv;
12084 tree new_fenv_var, reload_fenv, restore_fnenv;
12085 tree update_call, atomic_feraiseexcept, hold_fnclex;
12087 if (! TARGET_FPU_ANY)
12088 return;
12090 /* Generate the equivalent of :
12091 unsigned int fenv_var;
12092 fenv_var = __builtin_sh_get_fpscr ();
12094 unsigned int masked_fenv;
12095 masked_fenv = fenv_var & mask;
12097 __builtin_sh_set_fpscr (masked_fenv); */
12099 fenv_var = create_tmp_var (unsigned_type_node);
12100 mask = build_int_cst (unsigned_type_node,
12101 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
12102 | SH_FE_ALL_EXCEPT));
12103 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
12104 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
12105 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
12106 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12107 *hold = build2 (COMPOUND_EXPR, void_type_node,
12108 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
12109 hold_fnclex);
12111 /* Store the value of masked_fenv to clear the exceptions:
12112 __builtin_sh_set_fpscr (masked_fenv); */
12114 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12116 /* Generate the equivalent of :
12117 unsigned int new_fenv_var;
12118 new_fenv_var = __builtin_sh_get_fpscr ();
12120 __builtin_sh_set_fpscr (fenv_var);
12122 __atomic_feraiseexcept (new_fenv_var); */
12124 new_fenv_var = create_tmp_var (unsigned_type_node);
12125 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
12126 build_call_expr (sh_builtin_get_fpscr, 0));
12127 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
12128 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12129 update_call = build_call_expr (atomic_feraiseexcept, 1,
12130 fold_convert (integer_type_node,
12131 new_fenv_var));
12132 *update = build2 (COMPOUND_EXPR, void_type_node,
12133 build2 (COMPOUND_EXPR, void_type_node,
12134 reload_fenv, restore_fnenv), update_call);
12137 /* Implements target hook vector_mode_supported_p. */
12138 bool
12139 sh_vector_mode_supported_p (machine_mode mode)
12141 if (TARGET_FPU_ANY
12142 && ((mode == V2SFmode)
12143 || (mode == V4SFmode)
12144 || (mode == V16SFmode)))
12145 return true;
12147 else if (TARGET_SHMEDIA
12148 && ((mode == V8QImode)
12149 || (mode == V2HImode)
12150 || (mode == V4HImode)
12151 || (mode == V2SImode)))
12152 return true;
12154 return false;
12157 bool
12158 sh_frame_pointer_required (void)
12160 /* If needed override this in other tm.h files to cope with various OS
12161 lossage requiring a frame pointer. */
12162 if (SUBTARGET_FRAME_POINTER_REQUIRED)
12163 return true;
12165 if (crtl->profile)
12166 return true;
12168 return false;
12171 /* Implements target hook dwarf_calling_convention. Return an enum
12172 of dwarf_calling_convention. */
12174 sh_dwarf_calling_convention (const_tree func)
12176 if (sh_attr_renesas_p (func))
12177 return DW_CC_GNU_renesas_sh;
12179 return DW_CC_normal;
12182 /* Returns the sh builtin decl for CODE. */
12183 static tree
12184 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
12186 if (code >= ARRAY_SIZE (bdesc))
12187 return error_mark_node;
12189 if (!bdesc[code].is_enabled ())
12190 return error_mark_node;
12192 return bdesc[code].fndecl;
12195 /* Expand an expression EXP that calls a built-in function,
12196 with result going to TARGET if that's convenient
12197 (and in mode MODE if that's convenient).
12198 SUBTARGET may be used as the target for computing one of EXP's operands.
12199 IGNORE is nonzero if the value is to be ignored. */
12200 static rtx
12201 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12202 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12204 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12205 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12206 const struct builtin_description *d = &bdesc[fcode];
12207 enum insn_code icode = d->icode;
12208 int signature = d->signature;
12209 int nop = 0;
12210 rtx op[4];
12212 if (signature_args[signature][0])
12214 if (ignore)
12215 return NULL_RTX;
12217 machine_mode tmode = insn_data[icode].operand[0].mode;
12218 if (! target || GET_MODE (target) != tmode
12219 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12220 target = gen_reg_rtx (tmode);
12221 op[nop++] = target;
12223 else
12224 target = NULL_RTX;
12226 for (int i = 1; i <= 3; i++, nop++)
12228 tree arg;
12229 machine_mode opmode, argmode;
12230 tree optype;
12232 if (! signature_args[signature][i])
12233 break;
12234 arg = CALL_EXPR_ARG (exp, i - 1);
12235 if (arg == error_mark_node)
12236 return const0_rtx;
12237 if (signature_args[signature][i] & 8)
12239 opmode = ptr_mode;
12240 optype = ptr_type_node;
12242 else
12244 opmode = insn_data[icode].operand[nop].mode;
12245 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12247 argmode = TYPE_MODE (TREE_TYPE (arg));
12248 if (argmode != opmode)
12249 arg = build1 (NOP_EXPR, optype, arg);
12250 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12251 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12252 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12255 rtx pat = NULL_RTX;
12257 switch (nop)
12259 case 1:
12260 pat = (*insn_data[d->icode].genfun) (op[0]);
12261 break;
12262 case 2:
12263 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12264 break;
12265 case 3:
12266 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12267 break;
12268 case 4:
12269 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12270 break;
12271 default:
12272 gcc_unreachable ();
12274 if (! pat)
12275 return NULL_RTX;
12276 emit_insn (pat);
12277 return target;
12280 void
12281 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12283 rtx sel0 = const0_rtx;
12284 rtx sel1 = const1_rtx;
12285 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12286 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12288 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12289 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12292 void
12293 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12295 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12297 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12298 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12301 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12302 We can allow any mode in any general register. The special registers
12303 only allow SImode. Don't allow any mode in the PR.
12305 We cannot hold DCmode values in the XD registers because alter_reg
12306 handles subregs of them incorrectly. We could work around this by
12307 spacing the XD registers like the DR registers, but this would require
12308 additional memory in every compilation to hold larger register vectors.
12309 We could hold SFmode / SCmode values in XD registers, but that
12310 would require a tertiary reload when reloading from / to memory,
12311 and a secondary reload to reload from / to general regs; that
12312 seems to be a losing proposition.
12314 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12315 it won't be ferried through GP registers first. */
12316 bool
12317 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
12319 if (SPECIAL_REGISTER_P (regno))
12320 return mode == SImode;
12322 if (regno == FPUL_REG)
12323 return (mode == SImode || mode == SFmode);
12325 if (FP_REGISTER_P (regno) && mode == SFmode)
12326 return true;
12328 if (mode == V2SFmode)
12330 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12331 || GENERAL_REGISTER_P (regno)))
12332 return true;
12333 else
12334 return false;
12337 if (mode == V4SFmode)
12339 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12340 || GENERAL_REGISTER_P (regno))
12341 return true;
12342 else
12343 return false;
12346 if (mode == V16SFmode)
12348 if (TARGET_SHMEDIA)
12350 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12351 return true;
12352 else
12353 return false;
12355 else
12356 return regno == FIRST_XD_REG;
12359 if (FP_REGISTER_P (regno))
12361 if (mode == SFmode
12362 || mode == SImode
12363 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12364 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12365 || mode == DCmode
12366 || (TARGET_SHMEDIA
12367 && (mode == DFmode || mode == DImode
12368 || mode == V2SFmode || mode == TImode)))
12369 && ((regno - FIRST_FP_REG) & 1) == 0)
12370 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12371 && ((regno - FIRST_FP_REG) & 3) == 0))
12372 return true;
12373 else
12374 return false;
12377 if (XD_REGISTER_P (regno))
12378 return mode == DFmode;
12380 if (TARGET_REGISTER_P (regno))
12381 return (mode == DImode || mode == SImode || mode == PDImode);
12383 if (regno == PR_REG)
12384 return mode == SImode;
12386 if (regno == FPSCR_REG)
12387 return mode == SImode;
12389 /* FIXME. This works around PR target/37633 for -O0. */
12390 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12392 unsigned int n = GET_MODE_SIZE (mode) / 8;
12394 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12395 && regno <= FIRST_GENERAL_REG + 14)
12396 return false;
12399 return true;
12402 /* Specify the modes required to caller save a given hard regno.
12403 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
12404 and returns ?Imode for float regs when sh_hard_regno_mode_ok
12405 permits integer modes on them. That makes LRA's split process
12406 unhappy. See PR55212.
12408 machine_mode
12409 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
12410 machine_mode mode)
12412 if (FP_REGISTER_P (regno)
12413 && (mode == SFmode
12414 || mode == SCmode
12415 || ((mode == DFmode || mode == DCmode)
12416 && ((regno - FIRST_FP_REG) & 1) == 0)))
12417 return mode;
12419 return choose_hard_reg_mode (regno, nregs, false);
12422 /* Return the class of registers for which a mode change from FROM to TO
12423 is invalid. */
12424 bool
12425 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
12426 enum reg_class rclass)
12428 /* We want to enable the use of SUBREGs as a means to
12429 VEC_SELECT a single element of a vector. */
12431 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12432 This can be problematic when SFmode vector subregs need to be accessed
12433 on the stack with displacement addressing, as it happens with -O0.
12434 Thus we disallow the mode change for -O0. */
12435 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12436 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12438 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12440 if (TARGET_LITTLE_ENDIAN)
12442 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12443 return reg_classes_intersect_p (DF_REGS, rclass);
12445 else
12447 if (GET_MODE_SIZE (from) < 8)
12448 return reg_classes_intersect_p (DF_REGS, rclass);
12451 return false;
12454 /* Return true if registers in machine mode MODE will likely be
12455 allocated to registers in small register classes. */
12456 bool
12457 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
12459 return (! TARGET_SHMEDIA);
12462 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12463 that label is used. */
12464 void
12465 sh_mark_label (rtx address, int nuses)
12467 if (GOTOFF_P (address))
12469 /* Extract the label or symbol. */
12470 address = XEXP (address, 0);
12471 if (GET_CODE (address) == PLUS)
12472 address = XEXP (address, 0);
12473 address = XVECEXP (address, 0, 0);
12475 if (GET_CODE (address) == LABEL_REF
12476 && LABEL_P (XEXP (address, 0)))
12477 LABEL_NUSES (XEXP (address, 0)) += nuses;
12480 /* Compute extra cost of moving data between one register class
12481 and another.
12483 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12484 uses this information. Hence, the general register <-> floating point
12485 register information here is not used for SFmode. */
12486 static int
12487 sh_register_move_cost (machine_mode mode,
12488 reg_class_t srcclass, reg_class_t dstclass)
12490 if (dstclass == T_REGS || dstclass == PR_REGS)
12491 return 10;
12493 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12494 return 4;
12496 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12497 && REGCLASS_HAS_FP_REG (srcclass)
12498 && REGCLASS_HAS_FP_REG (dstclass))
12499 return 4;
12501 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12502 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12504 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12505 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12506 return 9;
12508 if ((REGCLASS_HAS_FP_REG (dstclass)
12509 && REGCLASS_HAS_GENERAL_REG (srcclass))
12510 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12511 && REGCLASS_HAS_FP_REG (srcclass)))
12513 /* Discourage trying to use fp regs for a pointer. This also
12514 discourages fp regs with SImode because Pmode is an alias
12515 of SImode on this target. See PR target/48596. */
12516 int addend = (mode == Pmode) ? 40 : 0;
12518 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12519 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12522 if ((dstclass == FPUL_REGS
12523 && REGCLASS_HAS_GENERAL_REG (srcclass))
12524 || (srcclass == FPUL_REGS
12525 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12526 return 5;
12528 if ((dstclass == FPUL_REGS
12529 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12530 || (srcclass == FPUL_REGS
12531 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12532 return 7;
12534 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12535 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12536 return 20;
12538 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12539 if (TARGET_SHMEDIA
12540 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12542 if (sh_gettrcost >= 0)
12543 return sh_gettrcost;
12544 else if (!TARGET_PT_FIXED)
12545 return 100;
12548 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12549 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12550 return 4;
12552 if (TARGET_SHMEDIA
12553 || (TARGET_FMOVD
12554 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12555 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12556 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12558 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12561 static rtx
12562 emit_load_ptr (rtx reg, rtx addr)
12564 rtx mem = gen_const_mem (ptr_mode, addr);
12566 if (Pmode != ptr_mode)
12567 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12568 return emit_move_insn (reg, mem);
12571 static void
12572 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12573 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12574 tree function)
12576 CUMULATIVE_ARGS cum;
12577 int structure_value_byref = 0;
12578 rtx this_rtx, this_value, sibcall, funexp;
12579 rtx_insn *insns;
12580 tree funtype = TREE_TYPE (function);
12581 int simple_add = CONST_OK_FOR_ADD (delta);
12582 int did_load = 0;
12583 rtx scratch0, scratch1, scratch2;
12584 unsigned i;
12586 reload_completed = 1;
12587 epilogue_completed = 1;
12588 crtl->uses_only_leaf_regs = 1;
12590 emit_note (NOTE_INSN_PROLOGUE_END);
12592 /* Find the "this" pointer. We have such a wide range of ABIs for the
12593 SH that it's best to do this completely machine independently.
12594 "this" is passed as first argument, unless a structure return pointer
12595 comes first, in which case "this" comes second. */
12596 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12597 #ifndef PCC_STATIC_STRUCT_RETURN
12598 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12599 structure_value_byref = 1;
12600 #endif /* not PCC_STATIC_STRUCT_RETURN */
12601 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12603 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12605 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12607 this_rtx
12608 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12610 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12611 static chain pointer (even if you can't have nested virtual functions
12612 right now, someone might implement them sometime), and the rest of the
12613 registers are used for argument passing, are callee-saved, or reserved. */
12614 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12615 -ffixed-reg has been used. */
12616 if (! call_used_regs[0] || fixed_regs[0])
12617 error ("r0 needs to be available as a call-clobbered register");
12618 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12619 if (! TARGET_SH5)
12621 if (call_used_regs[1] && ! fixed_regs[1])
12622 scratch1 = gen_rtx_REG (ptr_mode, 1);
12623 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12624 pointing where to return struct values. */
12625 if (call_used_regs[3] && ! fixed_regs[3])
12626 scratch2 = gen_rtx_REG (Pmode, 3);
12628 else if (TARGET_SHMEDIA)
12630 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12631 if (i != REGNO (scratch0) &&
12632 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12634 scratch1 = gen_rtx_REG (ptr_mode, i);
12635 break;
12637 if (scratch1 == scratch0)
12638 error ("need a second call-clobbered general purpose register");
12639 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12640 if (call_used_regs[i] && ! fixed_regs[i])
12642 scratch2 = gen_rtx_REG (Pmode, i);
12643 break;
12645 if (scratch2 == scratch0)
12646 error ("need a call-clobbered target register");
12649 this_value = plus_constant (Pmode, this_rtx, delta);
12650 if (vcall_offset
12651 && (simple_add || scratch0 != scratch1)
12652 && strict_memory_address_p (ptr_mode, this_value))
12654 emit_load_ptr (scratch0, this_value);
12655 did_load = 1;
12658 if (!delta)
12659 ; /* Do nothing. */
12660 else if (simple_add)
12661 emit_move_insn (this_rtx, this_value);
12662 else
12664 emit_move_insn (scratch1, GEN_INT (delta));
12665 emit_insn (gen_add2_insn (this_rtx, scratch1));
12668 if (vcall_offset)
12670 rtx offset_addr;
12672 if (!did_load)
12673 emit_load_ptr (scratch0, this_rtx);
12675 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12676 if (strict_memory_address_p (ptr_mode, offset_addr))
12677 ; /* Do nothing. */
12678 else if (! TARGET_SH5 && scratch0 != scratch1)
12680 /* scratch0 != scratch1, and we have indexed loads. Get better
12681 schedule by loading the offset into r1 and using an indexed
12682 load - then the load of r1 can issue before the load from
12683 (this_rtx + delta) finishes. */
12684 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12685 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12687 else if (CONST_OK_FOR_ADD (vcall_offset))
12689 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12690 offset_addr = scratch0;
12692 else if (scratch0 != scratch1)
12694 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12695 emit_insn (gen_add2_insn (scratch0, scratch1));
12696 offset_addr = scratch0;
12698 else
12699 gcc_unreachable (); /* FIXME */
12700 emit_load_ptr (scratch0, offset_addr);
12702 if (Pmode != ptr_mode)
12703 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12704 emit_insn (gen_add2_insn (this_rtx, scratch0));
12707 /* Generate a tail call to the target function. */
12708 if (! TREE_USED (function))
12710 assemble_external (function);
12711 TREE_USED (function) = 1;
12713 funexp = XEXP (DECL_RTL (function), 0);
12714 /* If the function is overridden, so is the thunk, hence we don't
12715 need GOT addressing even if this is a public symbol. */
12716 #if 0
12717 if (TARGET_SH1 && ! flag_weak)
12718 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12719 else
12720 #endif
12721 if (TARGET_SH2 && flag_pic)
12723 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12724 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12726 else
12728 if (TARGET_SHMEDIA && flag_pic)
12730 funexp = gen_sym2PIC (funexp);
12731 PUT_MODE (funexp, Pmode);
12733 emit_move_insn (scratch2, funexp);
12734 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12735 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12737 sibcall = emit_call_insn (sibcall);
12738 SIBLING_CALL_P (sibcall) = 1;
12739 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12740 emit_barrier ();
12742 /* Run just enough of rest_of_compilation to do scheduling and get
12743 the insns emitted. Note that use_thunk calls
12744 assemble_start_function and assemble_end_function. */
12746 insns = get_insns ();
12748 if (optimize > 0)
12750 if (! cfun->cfg)
12751 init_flow (cfun);
12752 split_all_insns_noflow ();
12755 sh_reorg ();
12756 shorten_branches (insns);
12757 final_start_function (insns, file, 1);
12758 final (insns, file, 1);
12759 final_end_function ();
12761 reload_completed = 0;
12762 epilogue_completed = 0;
12766 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12768 rtx sym;
12770 /* If this is not an ordinary function, the name usually comes from a
12771 string literal or an sprintf buffer. Make sure we use the same
12772 string consistently, so that cse will be able to unify address loads. */
12773 if (kind != FUNCTION_ORDINARY)
12774 name = IDENTIFIER_POINTER (get_identifier (name));
12775 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12776 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12777 if (flag_pic)
12778 switch (kind)
12780 case FUNCTION_ORDINARY:
12781 break;
12782 case SFUNC_GOT:
12784 rtx reg = target ? target : gen_reg_rtx (Pmode);
12786 emit_insn (gen_symGOT2reg (reg, sym));
12787 sym = reg;
12788 break;
12790 case SFUNC_STATIC:
12792 /* ??? To allow cse to work, we use GOTOFF relocations.
12793 We could add combiner patterns to transform this into
12794 straight pc-relative calls with sym2PIC / bsrf when
12795 label load and function call are still 1:1 and in the
12796 same basic block during combine. */
12797 rtx reg = target ? target : gen_reg_rtx (Pmode);
12799 emit_insn (gen_symGOTOFF2reg (reg, sym));
12800 sym = reg;
12801 break;
12804 if (target && sym != target)
12806 emit_move_insn (target, sym);
12807 return target;
12809 return sym;
12812 /* Find the number of a general purpose register in S. */
12813 static int
12814 scavenge_reg (HARD_REG_SET *s)
12816 int r;
12817 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12818 if (TEST_HARD_REG_BIT (*s, r))
12819 return r;
12820 return -1;
12824 sh_get_pr_initial_val (void)
12826 rtx val;
12828 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12829 PR register on SHcompact, because it might be clobbered by the prologue.
12830 We check first if that is known to be the case. */
12831 if (TARGET_SHCOMPACT
12832 && ((crtl->args.info.call_cookie
12833 & ~ CALL_COOKIE_RET_TRAMP (1))
12834 || crtl->saves_all_registers))
12835 return gen_frame_mem (SImode, return_address_pointer_rtx);
12837 /* If we haven't finished rtl generation, there might be a nonlocal label
12838 that we haven't seen yet.
12839 ??? get_hard_reg_initial_val fails if it is called after register
12840 allocation has started, unless it has been called before for the
12841 same register. And even then, we end in trouble if we didn't use
12842 the register in the same basic block before. So call
12843 get_hard_reg_initial_val now and wrap it in an unspec if we might
12844 need to replace it. */
12845 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12846 combine can put the pseudo returned by get_hard_reg_initial_val into
12847 instructions that need a general purpose registers, which will fail to
12848 be recognized when the pseudo becomes allocated to PR. */
12850 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12851 if (TARGET_SH1)
12852 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12853 return val;
12856 bool
12857 sh_expand_t_scc (rtx operands[])
12859 enum rtx_code code = GET_CODE (operands[1]);
12860 rtx target = operands[0];
12861 rtx op0 = operands[2];
12862 rtx op1 = operands[3];
12863 rtx result = target;
12864 HOST_WIDE_INT val;
12866 if (!REG_P (op0) || REGNO (op0) != T_REG
12867 || !CONST_INT_P (op1))
12868 return false;
12869 if (!REG_P (result))
12870 result = gen_reg_rtx (SImode);
12871 val = INTVAL (op1);
12872 if ((code == EQ && val == 1) || (code == NE && val == 0))
12873 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12874 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12875 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12876 else if (code == EQ || code == NE)
12877 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12878 else
12879 return false;
12880 if (result != target)
12881 emit_move_insn (target, result);
12882 return true;
12885 /* INSN is an sfunc; return the rtx that describes the address used. */
12886 static rtx
12887 extract_sfunc_addr (rtx insn)
12889 rtx pattern, part = NULL_RTX;
12890 int len, i;
12892 pattern = PATTERN (insn);
12893 len = XVECLEN (pattern, 0);
12894 for (i = 0; i < len; i++)
12896 part = XVECEXP (pattern, 0, i);
12897 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12898 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12899 return XEXP (part, 0);
12901 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12902 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12905 /* Verify that the register in use_sfunc_addr still agrees with the address
12906 used in the sfunc. This prevents fill_slots_from_thread from changing
12907 use_sfunc_addr.
12908 INSN is the use_sfunc_addr instruction, and REG is the register it
12909 guards. */
12910 bool
12911 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12913 /* Search for the sfunc. It should really come right after INSN. */
12914 while ((insn = NEXT_INSN (insn)))
12916 if (LABEL_P (insn) || JUMP_P (insn))
12917 break;
12918 if (! INSN_P (insn))
12919 continue;
12921 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12922 insn = seq->insn (0);
12923 if (GET_CODE (PATTERN (insn)) != PARALLEL
12924 || get_attr_type (insn) != TYPE_SFUNC)
12925 continue;
12926 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12928 gcc_unreachable ();
12931 /* This function returns a constant rtx that represents 2**15 / pi in
12932 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12933 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12934 static GTY(()) rtx sh_fsca_sf2int_rtx;
12937 sh_fsca_sf2int (void)
12939 if (! sh_fsca_sf2int_rtx)
12941 REAL_VALUE_TYPE rv;
12943 real_from_string (&rv, "10430.378350470453");
12944 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12947 return sh_fsca_sf2int_rtx;
12950 /* This function returns a constant rtx that represents pi / 2**15 in
12951 SFmode. It's used to scale SFmode angles, in radians, to a
12952 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12953 maps to 0x10000. */
12954 static GTY(()) rtx sh_fsca_int2sf_rtx;
12957 sh_fsca_int2sf (void)
12959 if (! sh_fsca_int2sf_rtx)
12961 REAL_VALUE_TYPE rv;
12963 real_from_string (&rv, "9.587379924285257e-5");
12964 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12967 return sh_fsca_int2sf_rtx;
12970 /* Initialize the CUMULATIVE_ARGS structure. */
12971 void
12972 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12973 tree fntype,
12974 rtx libname ATTRIBUTE_UNUSED,
12975 tree fndecl,
12976 signed int n_named_args,
12977 machine_mode mode)
12979 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12980 pcum->free_single_fp_reg = 0;
12981 pcum->stack_regs = 0;
12982 pcum->byref_regs = 0;
12983 pcum->byref = 0;
12984 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12986 /* XXX - Should we check TARGET_HITACHI here ??? */
12987 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12989 if (fntype)
12991 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12992 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12993 pcum->prototype_p = prototype_p (fntype);
12994 pcum->arg_count [(int) SH_ARG_INT]
12995 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12997 pcum->call_cookie
12998 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12999 && pcum->arg_count [(int) SH_ARG_INT] == 0
13000 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
13001 ? int_size_in_bytes (TREE_TYPE (fntype))
13002 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
13003 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
13004 == FIRST_RET_REG));
13006 else
13008 pcum->arg_count [(int) SH_ARG_INT] = 0;
13009 pcum->prototype_p = FALSE;
13010 if (mode != VOIDmode)
13012 pcum->call_cookie =
13013 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
13014 && GET_MODE_SIZE (mode) > 4
13015 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
13017 /* If the default ABI is the Renesas ABI then all library
13018 calls must assume that the library will be using the
13019 Renesas ABI. So if the function would return its result
13020 in memory then we must force the address of this memory
13021 block onto the stack. Ideally we would like to call
13022 targetm.calls.return_in_memory() here but we do not have
13023 the TYPE or the FNDECL available so we synthesize the
13024 contents of that function as best we can. */
13025 pcum->force_mem =
13026 (TARGET_DEFAULT & MASK_HITACHI)
13027 && (mode == BLKmode
13028 || (GET_MODE_SIZE (mode) > 4
13029 && !(mode == DFmode
13030 && TARGET_FPU_DOUBLE)));
13032 else
13034 pcum->call_cookie = 0;
13035 pcum->force_mem = FALSE;
13041 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
13043 enum rtx_code code = TRUNCATE;
13045 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
13047 rtx inner = XEXP (x, 0);
13048 machine_mode inner_mode = GET_MODE (inner);
13050 if (inner_mode == mode)
13051 return inner;
13052 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
13053 x = inner;
13054 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
13055 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
13057 code = GET_CODE (x);
13058 x = inner;
13061 return gen_rtx_fmt_e (code, mode, x);
13064 /* Look through X cleaning up truncates of registers that span multiple
13065 actual hard registers. Return the number of changes made. */
13067 shmedia_cleanup_truncate (rtx x)
13069 int n_changes = 0;
13070 subrtx_var_iterator::array_type array;
13071 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
13073 rtx x = *iter;
13074 if (GET_CODE (x) == TRUNCATE)
13076 rtx reg = XEXP (x, 0);
13077 machine_mode reg_mode = GET_MODE (reg);
13078 if (REG_P (reg) && GET_MODE_SIZE (reg_mode) > 8)
13080 int offset = subreg_lowpart_offset (DImode, reg_mode);
13081 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, offset);
13082 n_changes += 1;
13083 iter.skip_subrtxes ();
13087 return n_changes;
13090 /* Load and store depend on the highpart of the address. However,
13091 set_attr_alternative does not give well-defined results before reload,
13092 so we must look at the rtl ourselves to see if any of the feeding
13093 registers is used in a memref.
13095 Return true iff INSN contains a MEM. */
13096 bool
13097 sh_contains_memref_p (rtx insn)
13099 subrtx_iterator::array_type array;
13100 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13101 if (MEM_P (*iter))
13102 return true;
13103 return false;
13106 /* Return true iff INSN loads a banked register. */
13107 bool
13108 sh_loads_bankedreg_p (rtx insn)
13110 if (GET_CODE (PATTERN (insn)) == SET)
13112 rtx op = SET_DEST (PATTERN(insn));
13113 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13114 return true;
13117 return false;
13120 /* FNADDR is the MEM expression from a call expander. Return an address
13121 to use in an SHmedia insn pattern. */
13123 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13125 int is_sym;
13127 fnaddr = XEXP (fnaddr, 0);
13128 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13129 if (flag_pic && is_sym)
13131 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13133 rtx reg = gen_reg_rtx (Pmode);
13135 /* We must not use GOTPLT for sibcalls, because PIC_REG
13136 must be restored before the PLT code gets to run. */
13137 if (is_sibcall)
13138 emit_insn (gen_symGOT2reg (reg, fnaddr));
13139 else
13140 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13141 fnaddr = reg;
13143 else
13145 fnaddr = gen_sym2PIC (fnaddr);
13146 PUT_MODE (fnaddr, Pmode);
13149 /* If ptabs might trap, make this visible to the rest of the compiler.
13150 We generally assume that symbols pertain to valid locations, but
13151 it is possible to generate invalid symbols with asm or linker tricks.
13152 In a list of functions where each returns its successor, an invalid
13153 symbol might denote an empty list. */
13154 if (!TARGET_PT_FIXED
13155 && (!is_sym || TARGET_INVALID_SYMBOLS)
13156 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13158 rtx tr = gen_reg_rtx (PDImode);
13160 emit_insn (gen_ptabs (tr, fnaddr));
13161 fnaddr = tr;
13163 else if (! target_reg_operand (fnaddr, Pmode))
13164 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13165 return fnaddr;
13168 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13169 static reg_class_t
13170 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13172 if (rclass == NO_REGS
13173 && TARGET_SHMEDIA
13174 && (CONST_DOUBLE_P (x)
13175 || GET_CODE (x) == SYMBOL_REF
13176 || PIC_ADDR_P (x)))
13177 return GENERAL_REGS;
13179 return rclass;
13182 /* Implement TARGET_SECONDARY_RELOAD. */
13183 static reg_class_t
13184 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13185 machine_mode mode, secondary_reload_info *sri)
13187 enum reg_class rclass = (enum reg_class) rclass_i;
13189 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13190 && REG_P (XEXP (XEXP (x, 0), 0))
13191 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13192 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13194 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13195 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13197 if (REG_P (x) && REGNO (x) == GBR_REG)
13198 return NO_REGS;
13200 if (in_p)
13202 if (REGCLASS_HAS_FP_REG (rclass)
13203 && ! TARGET_SHMEDIA
13204 && immediate_operand ((x), mode)
13205 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
13206 switch (mode)
13208 case SFmode:
13209 sri->icode = CODE_FOR_reload_insf__frn;
13210 return NO_REGS;
13211 case DFmode:
13212 sri->icode = CODE_FOR_reload_indf__frn;
13213 return NO_REGS;
13214 case SImode:
13215 /* ??? If we knew that we are in the appropriate mode -
13216 single precision - we could use a reload pattern directly. */
13217 return FPUL_REGS;
13218 default:
13219 abort ();
13221 if (rclass == FPUL_REGS
13222 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13223 || REGNO (x) == T_REG))
13224 || GET_CODE (x) == PLUS))
13225 return GENERAL_REGS;
13226 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13228 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13229 return GENERAL_REGS;
13230 else if (mode == SFmode)
13231 return FP_REGS;
13232 sri->icode = CODE_FOR_reload_insi__i_fpul;
13233 return NO_REGS;
13235 if (rclass == FPSCR_REGS
13236 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13237 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13238 return GENERAL_REGS;
13239 if (REGCLASS_HAS_FP_REG (rclass)
13240 && TARGET_SHMEDIA
13241 && immediate_operand (x, mode)
13242 && x != CONST0_RTX (GET_MODE (x))
13243 && GET_MODE (x) != V4SFmode)
13244 return GENERAL_REGS;
13245 if ((mode == QImode || mode == HImode)
13246 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13248 sri->icode = ((mode == QImode)
13249 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13250 return NO_REGS;
13252 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13253 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13254 return TARGET_REGS;
13255 } /* end of input-only processing. */
13257 if (((REGCLASS_HAS_FP_REG (rclass)
13258 && (REG_P (x)
13259 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13260 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13261 && TARGET_FMOVD))))
13262 || (REGCLASS_HAS_GENERAL_REG (rclass)
13263 && REG_P (x)
13264 && FP_REGISTER_P (REGNO (x))))
13265 && ! TARGET_SHMEDIA
13266 && (mode == SFmode || mode == SImode))
13267 return FPUL_REGS;
13268 if ((rclass == FPUL_REGS
13269 || (REGCLASS_HAS_FP_REG (rclass)
13270 && ! TARGET_SHMEDIA && mode == SImode))
13271 && (MEM_P (x)
13272 || (REG_P (x)
13273 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13274 || REGNO (x) == T_REG
13275 || system_reg_operand (x, VOIDmode)))))
13277 if (rclass == FPUL_REGS)
13278 return GENERAL_REGS;
13279 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
13281 if ((rclass == TARGET_REGS
13282 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13283 && !satisfies_constraint_Csy (x)
13284 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13285 return GENERAL_REGS;
13286 if ((rclass == MAC_REGS || rclass == PR_REGS)
13287 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13288 && rclass != REGNO_REG_CLASS (REGNO (x)))
13289 return GENERAL_REGS;
13290 if (rclass != GENERAL_REGS && REG_P (x)
13291 && TARGET_REGISTER_P (REGNO (x)))
13292 return GENERAL_REGS;
13294 /* If here fall back to loading FPUL register through general registers.
13295 This case can happen when movsi_ie insn is picked initially to
13296 load/store the FPUL register from/to another register, and then the
13297 other register is allocated on the stack. */
13298 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13299 return GENERAL_REGS;
13301 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13302 the other operand.
13303 On SH2A could also just leave it alone here, which would result in a
13304 4 byte move insn being generated instead. However, for this to work
13305 the insns must have the appropriate alternatives. */
13306 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13307 && satisfies_constraint_Sdd (x)
13308 && sh_disp_addr_displacement (x)
13309 <= sh_max_mov_insn_displacement (mode, false))
13310 return R0_REGS;
13312 /* When reload is trying to address a QImode or HImode subreg on the stack,
13313 force any subreg byte into R0_REGS, as this is going to become a
13314 displacement address.
13315 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13316 is on the stack, the memref to it might already require a displacement
13317 and that has to be added to the final address. At this point we don't
13318 know the cumulative displacement so we assume the worst case. */
13319 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13320 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13321 return R0_REGS;
13323 return NO_REGS;
13326 /* Return true if SUBST can't safely replace its equivalent during RA. */
13327 static bool
13328 sh_cannot_substitute_mem_equiv_p (rtx)
13330 if (TARGET_SHMEDIA)
13331 return false;
13333 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
13334 uses R0 and may cause spill failure when R0 is already used.
13335 We have to return true for that case at least.
13336 Moreover SH has strong R0 parity and also have not enough numbers of
13337 the hard registers to make the equiv substitution win in the size
13338 and the speed on average working sets. The pseudos produced to
13339 hold the equiv values can't get good hard registers for bad cases
13340 and end up memory save/restore insns which make the code worse. */
13341 return true;
13344 /* Return true if DISP can be legitimized. */
13345 static bool
13346 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
13347 machine_mode mode)
13349 if (TARGET_SHMEDIA)
13350 return false;
13352 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
13353 || (TARGET_SH2E && mode == SFmode))
13354 return false;
13356 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
13357 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
13359 *disp = adj.mov_disp;
13360 *offs = adj.offset_adjust;
13361 return true;
13364 return false;
13367 /* Return true if movsf insn should be splited with an additional
13368 register. */
13369 bool
13370 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
13372 /* op0 == op1 */
13373 if (rtx_equal_p (op0, op1))
13374 return true;
13375 /* fy, FQ, reg */
13376 if (GET_CODE (op1) == CONST_DOUBLE
13377 && ! satisfies_constraint_G (op1)
13378 && ! satisfies_constraint_H (op1)
13379 && REG_P (op0)
13380 && REG_P (op2))
13381 return true;
13382 /* f, r, y */
13383 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
13384 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
13385 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13386 return true;
13387 /* r, f, y */
13388 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
13389 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
13390 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13391 return true;
13393 return false;
13396 static void
13397 sh_conditional_register_usage (void)
13399 int regno;
13400 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13401 if (! VALID_REGISTER_P (regno))
13402 fixed_regs[regno] = call_used_regs[regno] = 1;
13403 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13404 if (TARGET_SH5)
13406 call_used_regs[FIRST_GENERAL_REG + 8]
13407 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13408 call_really_used_regs[FIRST_GENERAL_REG + 8]
13409 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13411 if (TARGET_SHMEDIA)
13413 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13414 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13415 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13417 if (flag_pic)
13419 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13420 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13422 /* Renesas saves and restores mac registers on call. */
13423 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13425 call_really_used_regs[MACH_REG] = 0;
13426 call_really_used_regs[MACL_REG] = 0;
13429 if (TARGET_SHMEDIA)
13431 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13432 if (! fixed_regs[regno] && call_really_used_regs[regno])
13433 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13435 else
13436 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13437 if (! fixed_regs[regno] && call_really_used_regs[regno])
13438 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13440 call_really_used_regs[FPSCR_MODES_REG] = 0;
13441 call_really_used_regs[FPSCR_STAT_REG] = 0;
13444 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13446 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13447 static bool
13448 sh_legitimate_constant_p (machine_mode mode, rtx x)
13450 return (TARGET_SHMEDIA
13451 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13452 || x == CONST0_RTX (mode)
13453 || !TARGET_SHMEDIA_FPU
13454 || TARGET_SHMEDIA64)
13455 : (GET_CODE (x) != CONST_DOUBLE
13456 || mode == DFmode || mode == SFmode
13457 || mode == DImode || GET_MODE (x) == VOIDmode));
13460 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13462 static void
13463 sh_init_sync_libfuncs (void)
13465 init_sync_libfuncs (UNITS_PER_WORD);
13468 /* Return true if it is appropriate to emit `ret' instructions in the
13469 body of a function. */
13470 bool
13471 sh_can_use_simple_return_p (void)
13473 HARD_REG_SET live_regs_mask;
13474 int d;
13476 /* Some targets require special return insns. */
13477 if (TARGET_SHMEDIA
13478 || (TARGET_SHCOMPACT
13479 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13480 return false;
13482 if (! reload_completed || frame_pointer_needed)
13483 return false;
13485 /* Moving prologue around does't reduce the size. */
13486 if (optimize_function_for_size_p (cfun))
13487 return false;
13489 /* Finally, allow for pr save. */
13490 d = calc_live_regs (&live_regs_mask);
13492 if (rounded_frame_size (d) > 4)
13493 return false;
13495 return true;
13498 /*------------------------------------------------------------------------------
13499 Address mode optimization support code
13502 typedef HOST_WIDE_INT disp_t;
13503 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13504 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13505 static const disp_t INVALID_DISP = MAX_DISP;
13507 /* A memory reference which is described by a base register and a
13508 displacement. */
13509 class base_reg_disp
13511 public:
13512 base_reg_disp (rtx br, disp_t d);
13514 bool is_reg (void) const;
13515 bool is_disp (void) const;
13516 rtx reg (void) const;
13517 disp_t disp (void) const;
13519 private:
13520 rtx reg_;
13521 disp_t disp_;
13524 inline
13525 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13526 : reg_ (br), disp_ (d)
13530 inline bool
13531 base_reg_disp::is_reg (void) const
13533 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13536 inline bool
13537 base_reg_disp::is_disp (void) const
13539 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13542 inline rtx
13543 base_reg_disp::reg (void) const
13545 return reg_;
13548 inline disp_t
13549 base_reg_disp::disp (void) const
13551 return disp_;
13554 /* Find the base register and calculate the displacement for a given
13555 address rtx 'x'. */
13556 static base_reg_disp
13557 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
13558 rtx base_reg = NULL)
13560 if (REG_P (x))
13562 if (REGNO (x) == GBR_REG)
13563 return base_reg_disp (x, disp);
13565 /* We've reached a hard-reg. This is probably the point where
13566 function args are copied to pseudos. Do not go any further and
13567 stick to the pseudo. If the original mem addr was in a hard reg
13568 from the beginning, it will become the base reg. */
13569 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13570 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13572 /* Find the def of the reg and trace it. If there are more than one
13573 defs and they are not the same, assume it's not safe to proceed. */
13574 rtx_insn* last_i = NULL;
13575 rtx last_set = NULL;
13576 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
13577 d = DF_REF_NEXT_REG (d))
13579 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
13581 /* Accept multiple defs, as long as they are equal. */
13582 if (last_set == NULL || rtx_equal_p (last_set, set))
13584 last_i = DF_REF_INSN (d);
13585 last_set = set;
13587 else
13589 last_i = NULL;
13590 last_set = NULL;
13591 break;
13595 if (last_set != NULL && last_i != NULL)
13596 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
13597 XEXP (last_set, 0));
13599 /* When here, no previous insn was found that sets the reg.
13600 The input reg is already the base reg. */
13601 return base_reg_disp (x, disp);
13604 else if (GET_CODE (x) == PLUS)
13606 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13607 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13609 /* Either left or right val must be a reg.
13610 We don't handle the case of 'reg + reg' here. */
13611 if (left_val.is_reg () && right_val.is_disp ())
13612 return base_reg_disp (left_val.reg (), left_val.disp ()
13613 + right_val.disp () + disp);
13614 else if (right_val.is_reg () && left_val.is_disp ())
13615 return base_reg_disp (right_val.reg (), right_val.disp ()
13616 + left_val.disp () + disp);
13617 else
13618 return base_reg_disp (base_reg, disp);
13621 else if (CONST_INT_P (x))
13622 return base_reg_disp (NULL, disp + INTVAL (x));
13624 /* Didn't find anything useful. */
13625 return base_reg_disp (base_reg, disp);
13628 /* Given an insn and a memory operand, try to find an equivalent GBR
13629 based memory address and return the corresponding new memory address.
13630 Return NULL_RTX if not found. */
13632 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
13634 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
13635 return NULL_RTX;
13637 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13638 if (side_effects_p (XEXP (mem, 0)))
13639 return NULL_RTX;
13641 /* When not optimizing there might be no dataflow available. */
13642 if (df == NULL)
13643 return NULL_RTX;
13645 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13647 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13649 /* If GBR is marked as call clobbered we bail out if we see a call.
13650 FIXME: Actually should check if this mem refers to the gbr value
13651 before or after the call. If there is a store_gbr preceeding this
13652 mem, it's safe to use GBR for this mem.
13654 If GBR is not marked as call clobbered, but there is some other
13655 def than a call, it's probably a load_gbr upon which we also
13656 bail out to be on the safe side.
13657 FIXME: Should check if we have a use-after-def case, such as
13658 the call case above. */
13659 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
13660 d = DF_REF_NEXT_REG (d))
13662 if (CALL_P (DF_REF_INSN (d)))
13664 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
13665 return NULL_RTX;
13666 else
13667 continue;
13669 else
13670 return NULL_RTX;
13673 rtx disp = GEN_INT (gbr_disp.disp ());
13674 if (gbr_displacement (disp, GET_MODE (mem)))
13675 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13678 return NULL_RTX;
13681 /*------------------------------------------------------------------------------
13682 Manual insn combine support code.
13685 /* Return true if the specified insn contains any UNSPECs or
13686 UNSPEC_VOLATILEs. */
13687 static bool
13688 sh_unspec_insn_p (rtx x)
13690 subrtx_iterator::array_type array;
13691 FOR_EACH_SUBRTX (i, array, x, ALL)
13692 if (*i != NULL
13693 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
13694 return true;
13696 return false;
13699 /* Return true if the register operands of the specified insn are modified
13700 between the specified from and to insns (exclusive of those two). */
13701 bool
13702 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
13703 const rtx_insn* from,
13704 const rtx_insn* to)
13706 /* FIXME: Return true for multiple sets for now. */
13707 rtx s = single_set (operands_insn);
13708 if (s == NULL_RTX)
13709 return true;
13711 subrtx_iterator::array_type array;
13712 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
13713 if (*i != NULL &&
13714 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
13715 return true;
13717 return false;
13720 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
13721 negates the T bit and stores the result in the T bit. */
13722 bool
13723 sh_is_nott_insn (const rtx_insn* i)
13725 return i != NULL && GET_CODE (PATTERN (i)) == SET
13726 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
13727 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
13731 sh_movt_set_dest (const rtx_insn* i)
13733 if (i == NULL)
13734 return NULL;
13736 const_rtx p = PATTERN (i);
13737 return GET_CODE (p) == SET
13738 && arith_reg_dest (XEXP (p, 0), SImode)
13739 && t_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13742 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
13743 that stores the negated T bit in a register, and return the destination
13744 register rtx, or null. */
13746 sh_movrt_set_dest (const rtx_insn* i)
13748 if (i == NULL)
13749 return NULL;
13751 const_rtx p = PATTERN (i);
13753 /* The negc movrt replacement is inside a parallel. */
13754 if (GET_CODE (p) == PARALLEL)
13755 p = XVECEXP (p, 0, 0);
13757 return GET_CODE (p) == SET
13758 && arith_reg_dest (XEXP (p, 0), SImode)
13759 && negt_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13762 /* Given an insn and a reg number, tell whether the reg dies or is unused
13763 after the insn. */
13764 bool
13765 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
13767 return find_regno_note (i, REG_DEAD, regno) != NULL
13768 || find_regno_note (i, REG_UNUSED, regno) != NULL;
13771 /* Given an insn and a reg number, remove reg dead or reg unused notes to
13772 mark it as being used after the insn. */
13773 void
13774 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
13776 if (rtx n = find_regno_note (i, REG_DEAD, regno))
13777 remove_note (i, n);
13778 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
13779 remove_note (i, n);
13782 /* Given an insn check if it contains any post/pre inc/dec mem operands and
13783 add the REG_INC notes accordingly.
13784 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
13785 FIXME: This function is currently used by peephole2 patterns because
13786 the peephole2 pass does not preserve REG_INC notes. If the notes
13787 are dropped the following passes will do wrong things. */
13788 rtx_insn*
13789 sh_check_add_incdec_notes (rtx_insn* i)
13791 struct for_each_inc_dec_clb
13793 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
13794 rtx dest, rtx src ATTRIBUTE_UNUSED,
13795 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
13797 gcc_assert (REG_P (dest));
13799 rtx_insn* i = (rtx_insn*)arg;
13800 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
13801 add_reg_note (i, REG_INC, dest);
13803 return 0;
13807 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
13808 return i;
13811 /* Given an op rtx and an insn, try to find out whether the result of the
13812 specified op consists only of logical operations on T bit stores. */
13813 bool
13814 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
13816 if (!logical_operator (op, SImode))
13817 return false;
13819 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13820 int op_is_t_count = 0;
13822 for (int i = 0; i < 2; ++i)
13824 if (t_reg_operand (ops[i], VOIDmode)
13825 || negt_reg_operand (ops[i], VOIDmode))
13826 op_is_t_count++;
13828 else
13830 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13831 prev_nonnote_insn_bb);
13832 if (op_set.set_src == NULL_RTX)
13833 continue;
13835 if (t_reg_operand (op_set.set_src, VOIDmode)
13836 || negt_reg_operand (op_set.set_src, VOIDmode)
13837 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13838 op_is_t_count++;
13842 return op_is_t_count == 2;
13845 /* Given the operand that is extended in a sign/zero extend insn, and the
13846 insn, try to figure out whether the sign/zero extension can be replaced
13847 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13848 NULL_RTX otherwise. */
13850 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
13852 if (REG_P (extended_op))
13853 extended_op = extended_op;
13854 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13855 extended_op = SUBREG_REG (extended_op);
13856 else
13857 return NULL_RTX;
13859 /* Reg moves must be of the same mode. */
13860 if (GET_MODE (extended_op) != SImode)
13861 return NULL_RTX;
13863 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13864 if (s.set_src == NULL_RTX)
13865 return NULL_RTX;
13867 if (t_reg_operand (s.set_src, VOIDmode)
13868 || negt_reg_operand (s.set_src, VOIDmode))
13869 return extended_op;
13871 /* If the zero extended reg was formed by a logical operation, check the
13872 operands of the logical operation. If both originated from T bit
13873 stores the zero extension can be eliminated. */
13874 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13875 return extended_op;
13877 return NULL_RTX;
13880 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
13881 figure out whether it should be converted into a movt-xor sequence in
13882 the movrt_negc splitter.
13883 Returns true if insns have been modified and the splitter has succeeded. */
13884 bool
13885 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
13887 /* In cases such as
13888 tst r4,r4
13889 mov #-1,r1
13890 negc r1,r1
13891 tst r4,r4
13892 we can replace the T bit clobbering negc with a movt-xor sequence and
13893 eliminate the redundant comparison.
13894 Because the xor insn depends on register allocation results, allow this
13895 only before reload. */
13896 if (!can_create_pseudo_p ())
13897 return false;
13899 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13900 prev_nonnote_insn_bb);
13901 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13902 next_nonnote_insn_bb);
13904 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
13905 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
13906 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
13907 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
13908 t_before_negc.insn,
13909 t_after_negc.insn)
13910 && !sh_unspec_insn_p (t_after_negc.insn)
13911 && !volatile_insn_p (PATTERN (t_after_negc.insn))
13912 && !side_effects_p (PATTERN (t_after_negc.insn))
13913 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
13915 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
13916 set_insn_deleted (t_after_negc.insn);
13917 return true;
13919 else
13920 return false;
13923 /* Given a reg and the current insn, see if the value of the reg originated
13924 from a sign or zero extension and return the discovered information. */
13925 sh_extending_set_of_reg
13926 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
13928 if (reg == NULL)
13929 return sh_extending_set_of_reg (curr_insn);
13931 if (SUBREG_P (reg))
13932 reg = SUBREG_REG (reg);
13934 if (!REG_P (reg))
13935 return sh_extending_set_of_reg (curr_insn);
13937 /* FIXME: Also search the predecessor basic blocks. It seems that checking
13938 only the adjacent predecessor blocks would cover most of the cases.
13939 Also try to look through the first extension that we hit. There are some
13940 cases, where a zero_extend is followed an (implicit) sign_extend, and it
13941 fails to see the sign_extend. */
13942 sh_extending_set_of_reg result =
13943 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
13945 if (result.set_src != NULL)
13947 if (GET_CODE (result.set_src) == SIGN_EXTEND
13948 || GET_CODE (result.set_src) == ZERO_EXTEND)
13950 if (dump_file)
13951 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13952 "explicitly sign/zero extended in insn %d\n",
13953 REGNO (reg), INSN_UID (result.insn));
13954 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
13955 result.ext_code = GET_CODE (result.set_src);
13957 else if (MEM_P (result.set_src)
13958 && (GET_MODE (result.set_src) == QImode
13959 || GET_MODE (result.set_src) == HImode)
13960 && !sh_unspec_insn_p (result.insn))
13962 /* On SH QIHImode memory loads always sign extend. However, in
13963 some cases where it seems that the higher bits are not
13964 interesting, the loads will not be expanded as sign extending
13965 insns, but as QIHImode loads into QIHImode regs. We report that
13966 the reg has been sign extended by the mem load. When it is used
13967 as such, we must convert the mem load into a sign extending insn,
13968 see also sh_extending_set_of_reg::use_as_extended_reg. */
13969 if (dump_file)
13970 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13971 "implicitly sign extended in insn %d\n",
13972 REGNO (reg), INSN_UID (result.insn));
13973 result.from_mode = GET_MODE (result.set_src);
13974 result.ext_code = SIGN_EXTEND;
13978 return result;
13981 /* Given a reg that is known to be sign or zero extended at some insn,
13982 take the appropriate measures so that the extended value can be used as
13983 a reg at the specified insn and return the resulting reg rtx. */
13985 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
13987 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
13988 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
13989 gcc_assert (from_mode == QImode || from_mode == HImode);
13991 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
13993 if (dump_file)
13994 fprintf (dump_file,
13995 "use_as_extended_reg: converting non-extending mem load in "
13996 "insn %d into sign-extending load\n", INSN_UID (insn));
13998 rtx r = gen_reg_rtx (SImode);
13999 rtx_insn* i0;
14000 if (from_mode == QImode)
14001 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
14002 else if (from_mode == HImode)
14003 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
14004 else
14005 gcc_unreachable ();
14007 emit_insn_after (
14008 gen_move_insn (XEXP (set_rtx, 0),
14009 gen_lowpart (GET_MODE (set_src), r)), i0);
14010 set_insn_deleted (insn);
14011 return r;
14013 else
14015 rtx extension_dst = XEXP (set_rtx, 0);
14016 if (modified_between_p (extension_dst, insn, use_at_insn))
14018 if (dump_file)
14019 fprintf (dump_file,
14020 "use_as_extended_reg: dest reg %d of extending insn %d is "
14021 "modified, inserting a reg-reg copy\n",
14022 REGNO (extension_dst), INSN_UID (insn));
14024 rtx r = gen_reg_rtx (SImode);
14025 emit_insn_after (gen_move_insn (r, extension_dst), insn);
14026 return r;
14028 else
14030 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
14031 return extension_dst;
14036 bool
14037 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
14039 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
14040 && (from_mode == QImode || from_mode == HImode)
14041 && set_src != NULL)
14042 return arith_reg_operand (XEXP (set_src, 0), from_mode);
14043 else
14044 return false;
14048 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
14050 gcc_assert (can_use_as_unextended_reg ());
14052 rtx r = XEXP (set_src, 0);
14053 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
14055 if (modified_between_p (r, insn, use_at_insn))
14057 rtx r1 = gen_reg_rtx (SImode);
14058 emit_insn_after (gen_move_insn (r1, r0), insn);
14059 return r1;
14061 else
14063 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
14064 ? REGNO (SUBREG_REG (r))
14065 : REGNO (r));
14066 return r0;
14070 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
14071 perform the necessary checks on the operands and split it accordingly. */
14072 void
14073 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
14074 int subreg_offset, rtx operands[])
14076 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
14078 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
14079 curr_insn);
14080 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
14081 curr_insn);
14083 /* If one of the operands is known to be zero extended, that's already
14084 sufficient to mask out the unwanted high bits. */
14085 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
14087 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14088 operands[1]));
14089 return;
14091 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
14093 emit_insn (gen_tstsi_t (operands[0],
14094 eop1.use_as_extended_reg (curr_insn)));
14095 return;
14098 /* None of the operands seem to be zero extended.
14099 If both are sign extended it's OK, too. */
14100 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
14101 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
14103 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14104 eop1.use_as_extended_reg (curr_insn)));
14105 return;
14108 /* Otherwise we have to insert a zero extension on one of the operands to
14109 mask out the unwanted high bits.
14110 Prefer the operand that has no known extension. */
14111 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
14112 std::swap (operands[0], operands[1]);
14114 rtx tmp0 = gen_reg_rtx (SImode);
14115 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
14116 GET_MODE (operands[0]), subreg_offset);
14117 emit_insn (subreg_mode == QImode
14118 ? gen_zero_extendqisi2 (tmp0, tmp1)
14119 : gen_zero_extendhisi2 (tmp0, tmp1));
14120 emit_insn (gen_tstsi_t (tmp0, operands[1]));
14123 /* A helper class to increment/decrement a counter variable each time a
14124 function is entered/left. */
14125 class scope_counter
14127 public:
14128 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
14130 ~scope_counter (void)
14132 --m_counter;
14133 gcc_assert (m_counter >= 0);
14136 int count (void) const { return m_counter; }
14138 private:
14139 int& m_counter;
14142 /* Given an rtx x, determine whether the expression can be used to create
14143 an insn that calulates x and stores the result in the T bit.
14144 This is used by the 'treg_set_expr' predicate to construct insns sequences
14145 where T bit results are fed into other insns, such as addc, subc, negc
14146 insns.
14148 FIXME: The patterns that expand 'treg_set_expr' operands tend to
14149 distinguish between 'positive' and 'negative' forms. For now this has to
14150 be done in the preparation code. We could also introduce
14151 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
14152 two different patterns for the 'postive' and 'negative' forms. However,
14153 the total amount of lines of code seems to be about the same and the
14154 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
14155 recog function would need to look inside the expression by temporarily
14156 splitting it. */
14157 static int sh_recog_treg_set_expr_reent_count = 0;
14159 bool
14160 sh_recog_treg_set_expr (rtx op, machine_mode mode)
14162 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
14164 /* Limit the recursion count to avoid nested expressions which we can't
14165 resolve to a single treg set insn. */
14166 if (recursion.count () > 1)
14167 return false;
14169 /* Early accept known possible operands before doing recog. */
14170 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode))
14171 return true;
14173 /* Early reject impossible operands before doing recog.
14174 There are some (set ((t) (subreg ...))) patterns, but we must be careful
14175 not to allow any invalid reg-reg or mem-reg moves, or else other passes
14176 such as lower-subreg will bail out. Some insns such as SH4A movua are
14177 done with UNSPEC, so must reject those, too, or else it would result
14178 in an invalid reg -> treg move. */
14179 if (register_operand (op, mode) || memory_operand (op, mode)
14180 || sh_unspec_insn_p (op))
14181 return false;
14183 if (!can_create_pseudo_p ())
14184 return false;
14186 /* We are going to invoke recog in a re-entrant way and thus
14187 have to capture its current state and restore it afterwards. */
14188 recog_data_d prev_recog_data = recog_data;
14190 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
14191 SET_PREV_INSN (i) = NULL;
14192 SET_NEXT_INSN (i) = NULL;
14194 int result = recog (PATTERN (i), i, 0);
14196 /* It seems there is no insn like that. Create a simple negated
14197 version and try again. If we hit a negated form, we'll allow that
14198 and append a nott sequence when splitting out the insns. Insns that
14199 do the split can then remove the trailing nott if they know how to
14200 deal with it. */
14201 if (result < 0 && GET_CODE (op) == EQ)
14203 PUT_CODE (op, NE);
14204 result = recog (PATTERN (i), i, 0);
14205 PUT_CODE (op, EQ);
14207 if (result < 0 && GET_CODE (op) == NE)
14209 PUT_CODE (op, EQ);
14210 result = recog (PATTERN (i), i, 0);
14211 PUT_CODE (op, NE);
14214 recog_data = prev_recog_data;
14215 return result >= 0;
14218 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
14219 This can be used as a condition for insn/split patterns to allow certain
14220 T bit setting patters only to be matched as sub expressions of other
14221 patterns. */
14222 bool
14223 sh_in_recog_treg_set_expr (void)
14225 return sh_recog_treg_set_expr_reent_count > 0;
14228 /* Given an rtx x, which is assumed to be some expression that has been
14229 matched by the 'treg_set_expr' predicate before, split and emit the
14230 insns that are necessary to calculate the expression and store the result
14231 in the T bit.
14232 The splitting is done recursively similar to 'try_split' in emit-rt.c.
14233 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
14234 'delete_insn' which then causes the DF parts to bail out, because we
14235 currently are inside another gen_split* function and would invoke
14236 'try_split' in a reentrant way. */
14237 static std::pair<rtx_insn*, rtx_insn*>
14238 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
14240 if (dump_file)
14242 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
14243 print_rtl_single (dump_file, i);
14244 fprintf (dump_file, "\n");
14247 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
14249 if (seq == NULL)
14250 return std::make_pair (i, i);
14252 /* Avoid infinite splitter loops if any insn of the result matches
14253 the original pattern. */
14254 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
14255 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
14256 return std::make_pair (i, i);
14258 unshare_all_rtl_in_chain (seq);
14260 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
14261 a linked list, replace the single insn with the new insns. */
14262 rtx_insn* seqlast = seq;
14263 while (NEXT_INSN (seqlast) != NULL)
14264 seqlast = NEXT_INSN (seqlast);
14266 if (rtx_insn* iprev = PREV_INSN (i))
14267 SET_NEXT_INSN (iprev) = seq;
14268 if (rtx_insn* inext = NEXT_INSN (i))
14269 SET_PREV_INSN (inext) = seqlast;
14271 SET_PREV_INSN (seq) = PREV_INSN (i);
14272 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
14274 SET_PREV_INSN (i) = NULL;
14275 SET_NEXT_INSN (i) = NULL;
14277 /* Recursively split all insns. */
14278 for (i = seq; ; i = NEXT_INSN (i))
14280 std::pair<rtx_insn*, rtx_insn*> ii =
14281 sh_try_split_insn_simple (i, curr_insn, n + 1);
14282 if (i == seq)
14283 seq = ii.first;
14284 if (i == seqlast)
14286 seqlast = ii.second;
14287 break;
14289 i = ii.first;
14292 return std::make_pair (seq, seqlast);
14295 sh_treg_insns
14296 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
14298 if (t_reg_operand (x, VOIDmode))
14299 return sh_treg_insns ();
14301 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
14303 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
14304 SET_PREV_INSN (i) = NULL;
14305 SET_NEXT_INSN (i) = NULL;
14307 if (dump_file)
14309 fprintf (dump_file, "split_treg_set_expr insn:\n");
14310 print_rtl (dump_file, i);
14311 fprintf (dump_file, "\n");
14314 /* We are going to invoke recog/split_insns in a re-entrant way and thus
14315 have to capture its current state and restore it afterwards. */
14316 recog_data_d prev_recog_data = recog_data;
14318 int insn_code = recog (PATTERN (i), i, 0);
14320 /* If the insn was not found, see if we matched the negated form before
14321 and append a nott. */
14322 bool append_nott = false;
14324 if (insn_code < 0 && GET_CODE (x) == EQ)
14326 PUT_CODE (x, NE);
14327 insn_code = recog (PATTERN (i), i, 0);
14328 if (insn_code >= 0)
14329 append_nott = true;
14330 else
14331 PUT_CODE (x, EQ);
14333 if (insn_code < 0 && GET_CODE (x) == NE)
14335 PUT_CODE (x, EQ);
14336 insn_code = recog (PATTERN (i), i, 0);
14337 if (insn_code >= 0)
14338 append_nott = true;
14339 else
14340 PUT_CODE (x, NE);
14343 gcc_assert (insn_code >= 0);
14345 /* Try to recursively split the insn. Some insns might refuse to split
14346 any further while we are in the treg_set_expr splitting phase. They
14347 will be emitted as part of the outer insn and then split again. */
14348 std::pair<rtx_insn*, rtx_insn*> insnlist =
14349 sh_try_split_insn_simple (i, curr_insn);
14351 /* Restore recog state. */
14352 recog_data = prev_recog_data;
14354 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
14355 ? insnlist.second
14356 : NULL;
14357 if (dump_file)
14359 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
14360 print_rtl (dump_file, insnlist.first);
14361 fprintf (dump_file, "\n");
14363 if (nott_insn != NULL)
14364 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
14367 emit_insn (insnlist.first);
14369 if (nott_insn != NULL && append_nott)
14371 if (dump_file)
14372 fprintf (dump_file, "removing trailing nott\n");
14373 remove_insn (nott_insn);
14374 nott_insn = NULL;
14375 append_nott = false;
14378 if (append_nott)
14379 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
14381 rtx_insn* first_insn = get_insns ();
14383 if (dump_file)
14385 fprintf (dump_file, "resulting insns:\n");
14386 print_rtl (dump_file, first_insn);
14387 fprintf (dump_file, "\n");
14390 return sh_treg_insns (first_insn, nott_insn);
14393 /*------------------------------------------------------------------------------
14394 Mode switching support code.
14397 static void
14398 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
14399 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14401 if ((TARGET_SH4A_FP || TARGET_SH4_300)
14402 && prev_mode != FP_MODE_NONE && prev_mode != mode)
14404 emit_insn (gen_toggle_pr ());
14405 if (TARGET_FMOVD)
14406 emit_insn (gen_toggle_sz ());
14408 else if (mode != FP_MODE_NONE)
14410 rtx tmp = gen_reg_rtx (SImode);
14411 emit_insn (gen_sts_fpscr (tmp));
14412 rtx i = NULL;
14414 const unsigned HOST_WIDE_INT fpbits =
14415 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
14417 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
14418 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14419 else if (mode == FP_MODE_SINGLE)
14420 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
14421 else if (mode == FP_MODE_DOUBLE)
14422 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14423 else
14424 gcc_unreachable ();
14426 emit_insn (i);
14427 emit_insn (gen_lds_fpscr (tmp));
14431 static int
14432 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
14434 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
14437 static int
14438 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
14440 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
14441 get_attr_fp_set (insn) != FP_SET_NONE)
14442 return (int) get_attr_fp_set (insn);
14443 else
14444 return mode;
14447 static int
14448 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
14450 return NORMAL_MODE (entity);
14453 static int
14454 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
14456 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
14459 static int
14460 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
14462 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
14465 /*------------------------------------------------------------------------------
14466 Misc
14469 /* Return true if we use LRA instead of reload pass. */
14470 static bool
14471 sh_lra_p (void)
14473 return sh_lra_flag;
14476 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14478 static bool
14479 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14480 unsigned int align,
14481 enum by_pieces_operation op,
14482 bool speed_p)
14484 switch (op)
14486 case MOVE_BY_PIECES:
14487 return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
14488 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14489 case STORE_BY_PIECES:
14490 case SET_BY_PIECES:
14491 return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
14492 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14493 default:
14494 return default_use_by_pieces_infrastructure_p (size, align,
14495 op, speed_p);
14499 #include "gt-sh.h"