gcc/
[official-gcc.git] / gcc / config / sh / sh.c
blob5c0c6a1c9ca7be567d36031fbb582349cd35551a
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2015 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
23 #include <vector>
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "insn-config.h"
30 #include "rtl.h"
31 #include "alias.h"
32 #include "symtab.h"
33 #include "tree.h"
34 #include "fold-const.h"
35 #include "stringpool.h"
36 #include "stor-layout.h"
37 #include "calls.h"
38 #include "varasm.h"
39 #include "flags.h"
40 #include "hard-reg-set.h"
41 #include "function.h"
42 #include "expmed.h"
43 #include "dojump.h"
44 #include "explow.h"
45 #include "emit-rtl.h"
46 #include "stmt.h"
47 #include "expr.h"
48 #include "insn-codes.h"
49 #include "optabs.h"
50 #include "reload.h"
51 #include "regs.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "diagnostic-core.h"
55 #include "recog.h"
56 #include "dwarf2.h"
57 #include "tm_p.h"
58 #include "target.h"
59 #include "langhooks.h"
60 #include "predict.h"
61 #include "dominance.h"
62 #include "cfg.h"
63 #include "cfgrtl.h"
64 #include "cfganal.h"
65 #include "lcm.h"
66 #include "cfgbuild.h"
67 #include "cfgcleanup.h"
68 #include "basic-block.h"
69 #include "df.h"
70 #include "intl.h"
71 #include "sched-int.h"
72 #include "params.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "gimple.h"
79 #include "gimplify.h"
80 #include "cfgloop.h"
81 #include "alloc-pool.h"
82 #include "tm-constrs.h"
83 #include "opts.h"
84 #include "tree-pass.h"
85 #include "pass_manager.h"
86 #include "context.h"
87 #include "builtins.h"
88 #include "rtl-iter.h"
90 #include "target-def.h"
92 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
94 /* These are some macros to abstract register modes. */
95 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
96 && ((HOST_WIDE_INT)(VALUE)) <= 511)
98 #define CONST_OK_FOR_ADD(size) \
99 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
100 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
101 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
102 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
104 /* Used to simplify the logic below. Find the attributes wherever
105 they may be. */
106 #define SH_ATTRIBUTES(decl) \
107 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
108 : DECL_ATTRIBUTES (decl) \
109 ? (DECL_ATTRIBUTES (decl)) \
110 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
112 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
113 int current_function_interrupt;
115 tree sh_deferred_function_attributes;
116 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
118 /* Global variables for machine-dependent things. */
120 /* Which cpu are we scheduling for. */
121 enum processor_type sh_cpu;
123 /* Definitions used in ready queue reordering for first scheduling pass. */
125 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
126 static short *regmode_weight[2];
128 /* Total SFmode and SImode weights of scheduled insns. */
129 static int curr_regmode_pressure[2];
131 /* Number of r0 life regions. */
132 static int r0_life_regions;
134 /* If true, skip cycles for Q -> R movement. */
135 static int skip_cycles = 0;
137 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
138 and returned from sh_reorder2. */
139 static short cached_can_issue_more;
141 /* Unique number for UNSPEC_BBR pattern. */
142 static unsigned int unspec_bbr_uid = 1;
144 /* Provides the class number of the smallest class containing
145 reg number. */
146 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
148 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
151 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
152 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
153 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
154 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
155 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
156 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
157 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
161 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
167 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
168 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
169 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
170 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
171 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
172 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
173 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
174 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
175 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
176 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
177 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
178 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
179 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
180 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
181 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
182 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
183 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
184 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
185 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
186 GENERAL_REGS, GENERAL_REGS,
189 char sh_register_names[FIRST_PSEUDO_REGISTER] \
190 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
192 char sh_additional_register_names[ADDREGNAMES_SIZE] \
193 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
194 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
196 int assembler_dialect;
198 static bool shmedia_space_reserved_for_target_registers;
200 static void split_branches (rtx_insn *);
201 static int branch_dest (rtx);
202 static void print_slot (rtx_sequence *);
203 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
204 static void dump_table (rtx_insn *, rtx_insn *);
205 static bool broken_move (rtx_insn *);
206 static bool mova_p (rtx_insn *);
207 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
208 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
209 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
210 static void sh_reorg (void);
211 static void sh_option_override (void);
212 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
213 static rtx_insn *frame_insn (rtx);
214 static rtx push (int);
215 static void pop (int);
216 static void push_regs (HARD_REG_SET *, int);
217 static int calc_live_regs (HARD_REG_SET *);
218 static HOST_WIDE_INT rounded_frame_size (int);
219 static bool sh_frame_pointer_required (void);
220 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
221 static int sh_mode_needed (int, rtx_insn *);
222 static int sh_mode_after (int, int, rtx_insn *);
223 static int sh_mode_entry (int);
224 static int sh_mode_exit (int);
225 static int sh_mode_priority (int entity, int n);
226 static bool sh_lra_p (void);
228 static rtx mark_constant_pool_use (rtx);
229 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
230 int, bool *);
231 static tree sh_handle_resbank_handler_attribute (tree *, tree,
232 tree, int, bool *);
233 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
234 tree, int, bool *);
235 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
236 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
237 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
238 static void sh_print_operand (FILE *, rtx, int);
239 static void sh_print_operand_address (FILE *, rtx);
240 static bool sh_print_operand_punct_valid_p (unsigned char code);
241 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
242 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
243 static void sh_insert_attributes (tree, tree *);
244 static const char *sh_check_pch_target_flags (int);
245 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
246 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
247 static int sh_issue_rate (void);
248 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
249 static short find_set_regmode_weight (rtx, machine_mode);
250 static short find_insn_regmode_weight (rtx, machine_mode);
251 static void find_regmode_weight (basic_block, machine_mode);
252 static int find_r0_life_regions (basic_block);
253 static void sh_md_init_global (FILE *, int, int);
254 static void sh_md_finish_global (FILE *, int);
255 static int rank_for_reorder (const void *, const void *);
256 static void swap_reorder (rtx_insn **, int);
257 static void ready_reorder (rtx_insn **, int);
258 static bool high_pressure (machine_mode);
259 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
260 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
261 static void sh_md_init (FILE *, int, int);
262 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
264 static bool sh_function_ok_for_sibcall (tree, tree);
266 static bool sh_cannot_modify_jumps_p (void);
267 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
268 static reg_class_t sh_target_reg_class (void);
269 static bool sh_optimize_target_register_callee_saved (bool);
270 static bool sh_ms_bitfield_layout_p (const_tree);
272 static void sh_init_builtins (void);
273 static tree sh_builtin_decl (unsigned, bool);
274 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
275 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
276 HOST_WIDE_INT, tree);
277 static void sh_file_start (void);
278 static bool flow_dependent_p (rtx, rtx);
279 static void flow_dependent_p_1 (rtx, const_rtx, void *);
280 static int shiftcosts (rtx);
281 static int and_xor_ior_costs (rtx, int);
282 static int addsubcosts (rtx);
283 static int multcosts (rtx);
284 static bool unspec_caller_rtx_p (rtx);
285 static bool sh_cannot_copy_insn_p (rtx_insn *);
286 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
287 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
288 static int sh_pr_n_sets (void);
289 static rtx sh_allocate_initial_value (rtx);
290 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
291 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
292 machine_mode,
293 struct secondary_reload_info *);
294 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
295 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
296 static rtx sh_delegitimize_address (rtx);
297 static bool sh_cannot_substitute_mem_equiv_p (rtx);
298 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
299 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
300 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
301 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
302 static int scavenge_reg (HARD_REG_SET *s);
303 struct save_schedule_s;
304 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
305 struct save_schedule_s *, int);
307 static rtx sh_struct_value_rtx (tree, int);
308 static rtx sh_function_value (const_tree, const_tree, bool);
309 static bool sh_function_value_regno_p (const unsigned int);
310 static rtx sh_libcall_value (machine_mode, const_rtx);
311 static bool sh_return_in_memory (const_tree, const_tree);
312 static rtx sh_builtin_saveregs (void);
313 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
314 tree, int *, int);
315 static bool sh_strict_argument_naming (cumulative_args_t);
316 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
317 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
318 static tree sh_build_builtin_va_list (void);
319 static void sh_va_start (tree, rtx);
320 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
321 static bool sh_promote_prototypes (const_tree);
322 static machine_mode sh_promote_function_mode (const_tree type,
323 machine_mode,
324 int *punsignedp,
325 const_tree funtype,
326 int for_return);
327 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
328 const_tree, bool);
329 static bool sh_callee_copies (cumulative_args_t, machine_mode,
330 const_tree, bool);
331 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
332 tree, bool);
333 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
334 const_tree, bool);
335 static rtx sh_function_arg (cumulative_args_t, machine_mode,
336 const_tree, bool);
337 static bool sh_scalar_mode_supported_p (machine_mode);
338 static int sh_dwarf_calling_convention (const_tree);
339 static void sh_encode_section_info (tree, rtx, int);
340 static bool sh2a_function_vector_p (tree);
341 static void sh_trampoline_init (rtx, tree, rtx);
342 static rtx sh_trampoline_adjust_address (rtx);
343 static void sh_conditional_register_usage (void);
344 static bool sh_legitimate_constant_p (machine_mode, rtx);
345 static int mov_insn_size (machine_mode, bool);
346 static int mov_insn_alignment_mask (machine_mode, bool);
347 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
348 unsigned int,
349 enum by_pieces_operation,
350 bool);
351 static bool sequence_insn_p (rtx_insn *);
352 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
353 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
354 machine_mode, bool);
355 static bool sh_legitimate_combined_insn (rtx_insn* insn);
357 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
359 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
361 static const struct attribute_spec sh_attribute_table[] =
363 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
364 affects_type_identity } */
365 { "interrupt_handler", 0, 0, true, false, false,
366 sh_handle_interrupt_handler_attribute, false },
367 { "sp_switch", 1, 1, true, false, false,
368 sh_handle_sp_switch_attribute, false },
369 { "trap_exit", 1, 1, true, false, false,
370 sh_handle_trap_exit_attribute, false },
371 { "renesas", 0, 0, false, true, false,
372 sh_handle_renesas_attribute, false },
373 { "trapa_handler", 0, 0, true, false, false,
374 sh_handle_interrupt_handler_attribute, false },
375 { "nosave_low_regs", 0, 0, true, false, false,
376 sh_handle_interrupt_handler_attribute, false },
377 { "resbank", 0, 0, true, false, false,
378 sh_handle_resbank_handler_attribute, false },
379 { "function_vector", 1, 1, true, false, false,
380 sh2a_handle_function_vector_handler_attribute, false },
381 { NULL, 0, 0, false, false, false, NULL, false }
384 /* Initialize the GCC target structure. */
385 #undef TARGET_ATTRIBUTE_TABLE
386 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
388 /* The next two are used for debug info when compiling with -gdwarf. */
389 #undef TARGET_ASM_UNALIGNED_HI_OP
390 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
391 #undef TARGET_ASM_UNALIGNED_SI_OP
392 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
394 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
395 #undef TARGET_ASM_UNALIGNED_DI_OP
396 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
397 #undef TARGET_ASM_ALIGNED_DI_OP
398 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
400 #undef TARGET_OPTION_OVERRIDE
401 #define TARGET_OPTION_OVERRIDE sh_option_override
403 #undef TARGET_PRINT_OPERAND
404 #define TARGET_PRINT_OPERAND sh_print_operand
405 #undef TARGET_PRINT_OPERAND_ADDRESS
406 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
407 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
408 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
409 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
410 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
412 #undef TARGET_ASM_FUNCTION_EPILOGUE
413 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
415 #undef TARGET_ASM_OUTPUT_MI_THUNK
416 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
418 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
419 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
420 hook_bool_const_tree_hwi_hwi_const_tree_true
422 #undef TARGET_ASM_FILE_START
423 #define TARGET_ASM_FILE_START sh_file_start
424 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
425 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
427 #undef TARGET_REGISTER_MOVE_COST
428 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
430 #undef TARGET_INSERT_ATTRIBUTES
431 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
433 #undef TARGET_SCHED_ADJUST_COST
434 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
436 #undef TARGET_SCHED_ISSUE_RATE
437 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
439 /* The next 5 hooks have been implemented for reenabling sched1. With the
440 help of these macros we are limiting the movement of insns in sched1 to
441 reduce the register pressure. The overall idea is to keep count of SImode
442 and SFmode regs required by already scheduled insns. When these counts
443 cross some threshold values; give priority to insns that free registers.
444 The insn that frees registers is most likely to be the insn with lowest
445 LUID (original insn order); but such an insn might be there in the stalled
446 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
447 up to a max of 8 cycles so that such insns may move from Q -> R.
449 The description of the hooks are as below:
451 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
452 scheduler; it is called inside the sched_init function just after
453 find_insn_reg_weights function call. It is used to calculate the SImode
454 and SFmode weights of insns of basic blocks; much similar to what
455 find_insn_reg_weights does.
456 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
458 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
459 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
460 (Q)->(R).
462 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
463 high; reorder the ready queue so that the insn with lowest LUID will be
464 issued next.
466 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
467 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
469 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
470 can be returned from TARGET_SCHED_REORDER2.
472 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
474 #undef TARGET_SCHED_DFA_NEW_CYCLE
475 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
477 #undef TARGET_SCHED_INIT_GLOBAL
478 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
480 #undef TARGET_SCHED_FINISH_GLOBAL
481 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
483 #undef TARGET_SCHED_VARIABLE_ISSUE
484 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
486 #undef TARGET_SCHED_REORDER
487 #define TARGET_SCHED_REORDER sh_reorder
489 #undef TARGET_SCHED_REORDER2
490 #define TARGET_SCHED_REORDER2 sh_reorder2
492 #undef TARGET_SCHED_INIT
493 #define TARGET_SCHED_INIT sh_md_init
495 #undef TARGET_DELEGITIMIZE_ADDRESS
496 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
498 #undef TARGET_LEGITIMIZE_ADDRESS
499 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
501 #undef TARGET_CANNOT_MODIFY_JUMPS_P
502 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
503 #undef TARGET_CAN_FOLLOW_JUMP
504 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
505 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
506 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
507 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
508 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
509 sh_optimize_target_register_callee_saved
511 #undef TARGET_MS_BITFIELD_LAYOUT_P
512 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
514 #undef TARGET_INIT_BUILTINS
515 #define TARGET_INIT_BUILTINS sh_init_builtins
516 #undef TARGET_BUILTIN_DECL
517 #define TARGET_BUILTIN_DECL sh_builtin_decl
518 #undef TARGET_EXPAND_BUILTIN
519 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
521 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
522 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
524 #undef TARGET_CANNOT_COPY_INSN_P
525 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
526 #undef TARGET_RTX_COSTS
527 #define TARGET_RTX_COSTS sh_rtx_costs
528 #undef TARGET_ADDRESS_COST
529 #define TARGET_ADDRESS_COST sh_address_cost
530 #undef TARGET_ALLOCATE_INITIAL_VALUE
531 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
533 #undef TARGET_MACHINE_DEPENDENT_REORG
534 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
536 #undef TARGET_DWARF_REGISTER_SPAN
537 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
539 #ifdef HAVE_AS_TLS
540 #undef TARGET_HAVE_TLS
541 #define TARGET_HAVE_TLS true
542 #endif
544 #undef TARGET_PROMOTE_PROTOTYPES
545 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
546 #undef TARGET_PROMOTE_FUNCTION_MODE
547 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
549 #undef TARGET_FUNCTION_VALUE
550 #define TARGET_FUNCTION_VALUE sh_function_value
551 #undef TARGET_FUNCTION_VALUE_REGNO_P
552 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
553 #undef TARGET_LIBCALL_VALUE
554 #define TARGET_LIBCALL_VALUE sh_libcall_value
555 #undef TARGET_STRUCT_VALUE_RTX
556 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
557 #undef TARGET_RETURN_IN_MEMORY
558 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
560 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
561 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
562 #undef TARGET_SETUP_INCOMING_VARARGS
563 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
564 #undef TARGET_STRICT_ARGUMENT_NAMING
565 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
566 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
567 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
568 #undef TARGET_MUST_PASS_IN_STACK
569 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
570 #undef TARGET_PASS_BY_REFERENCE
571 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
572 #undef TARGET_CALLEE_COPIES
573 #define TARGET_CALLEE_COPIES sh_callee_copies
574 #undef TARGET_ARG_PARTIAL_BYTES
575 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
576 #undef TARGET_FUNCTION_ARG
577 #define TARGET_FUNCTION_ARG sh_function_arg
578 #undef TARGET_FUNCTION_ARG_ADVANCE
579 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
581 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
582 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
584 #undef TARGET_BUILD_BUILTIN_VA_LIST
585 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
586 #undef TARGET_EXPAND_BUILTIN_VA_START
587 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
588 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
589 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
591 #undef TARGET_SCALAR_MODE_SUPPORTED_P
592 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
593 #undef TARGET_VECTOR_MODE_SUPPORTED_P
594 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
596 #undef TARGET_CHECK_PCH_TARGET_FLAGS
597 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
599 #undef TARGET_DWARF_CALLING_CONVENTION
600 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
602 #undef TARGET_FRAME_POINTER_REQUIRED
603 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
605 #undef TARGET_MODE_EMIT
606 #define TARGET_MODE_EMIT sh_emit_mode_set
608 #undef TARGET_MODE_NEEDED
609 #define TARGET_MODE_NEEDED sh_mode_needed
611 #undef TARGET_MODE_AFTER
612 #define TARGET_MODE_AFTER sh_mode_after
614 #undef TARGET_MODE_ENTRY
615 #define TARGET_MODE_ENTRY sh_mode_entry
617 #undef TARGET_MODE_EXIT
618 #define TARGET_MODE_EXIT sh_mode_exit
620 #undef TARGET_MODE_PRIORITY
621 #define TARGET_MODE_PRIORITY sh_mode_priority
623 /* Return regmode weight for insn. */
624 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
625 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
627 /* Return current register pressure for regmode. */
628 #define CURR_REGMODE_PRESSURE(MODE)\
629 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
631 #undef TARGET_ENCODE_SECTION_INFO
632 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
634 #undef TARGET_LRA_P
635 #define TARGET_LRA_P sh_lra_p
637 #undef TARGET_SECONDARY_RELOAD
638 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
640 #undef TARGET_PREFERRED_RELOAD_CLASS
641 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
643 #undef TARGET_CONDITIONAL_REGISTER_USAGE
644 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
646 #undef TARGET_LEGITIMATE_ADDRESS_P
647 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
649 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
650 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
652 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
653 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
654 sh_legitimize_address_displacement
656 #undef TARGET_TRAMPOLINE_INIT
657 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
658 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
659 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
661 #undef TARGET_LEGITIMATE_CONSTANT_P
662 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
664 #undef TARGET_CANONICALIZE_COMPARISON
665 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
667 #undef TARGET_LEGITIMATE_COMBINED_INSN
668 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
670 #undef TARGET_FIXED_CONDITION_CODE_REGS
671 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
673 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
674 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
675 sh_use_by_pieces_infrastructure_p
677 /* Machine-specific symbol_ref flags. */
678 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
680 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
681 is used by optabs.c atomic op expansion code as well as in sync.md. */
682 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
683 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
685 struct gcc_target targetm = TARGET_INITIALIZER;
688 /* Information on the currently selected atomic model.
689 This is initialized in sh_option_override. */
690 static sh_atomic_model selected_atomic_model_;
692 const sh_atomic_model&
693 selected_atomic_model (void)
695 return selected_atomic_model_;
698 static sh_atomic_model
699 parse_validate_atomic_model_option (const char* str)
701 const char* model_names[sh_atomic_model::num_models];
702 model_names[sh_atomic_model::none] = "none";
703 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
704 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
705 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
706 model_names[sh_atomic_model::soft_imask] = "soft-imask";
708 const char* model_cdef_names[sh_atomic_model::num_models];
709 model_cdef_names[sh_atomic_model::none] = "NONE";
710 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
711 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
712 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
713 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
715 sh_atomic_model ret;
716 ret.type = sh_atomic_model::none;
717 ret.name = model_names[sh_atomic_model::none];
718 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
719 ret.strict = false;
720 ret.tcb_gbr_offset = -1;
722 /* Handle empty string as 'none'. */
723 if (str == NULL || *str == '\0')
724 return ret;
726 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
728 std::vector<std::string> tokens;
729 for (std::stringstream ss (str); ss.good (); )
731 tokens.push_back (std::string ());
732 std::getline (ss, tokens.back (), ',');
735 if (tokens.empty ())
736 err_ret ("invalid atomic model option");
738 /* The first token must be the atomic model name. */
740 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
741 if (tokens.front () == model_names[i])
743 ret.type = (sh_atomic_model::enum_type)i;
744 ret.name = model_names[i];
745 ret.cdef_name = model_cdef_names[i];
746 goto got_mode_name;
749 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
750 got_mode_name:;
753 /* Go through the remaining tokens. */
754 for (size_t i = 1; i < tokens.size (); ++i)
756 if (tokens[i] == "strict")
757 ret.strict = true;
758 else if (tokens[i].find ("gbr-offset=") == 0)
760 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
761 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
762 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
763 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
764 "option", offset_str.c_str ());
766 else
767 err_ret ("unknown parameter \"%s\" in atomic model option",
768 tokens[i].c_str ());
771 /* Check that the selection makes sense. */
772 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
773 err_ret ("atomic operations are not supported on SHmedia");
775 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
776 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
777 ret.name);
779 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
780 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
782 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
783 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
785 if (ret.type == sh_atomic_model::soft_tcb
786 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
787 || (ret.tcb_gbr_offset & 3) != 0))
788 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
789 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
790 ret.name);
792 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
793 err_ret ("cannot use atomic model %s in user mode", ret.name);
795 return ret;
797 #undef err_ret
800 /* Register SH specific RTL passes. */
801 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
802 const char* name);
803 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
804 const char* name);
805 static void
806 register_sh_passes (void)
808 if (!TARGET_SH1)
809 return;
811 /* Running the sh_treg_combine pass after ce1 generates better code when
812 comparisons are combined and reg-reg moves are introduced, because
813 reg-reg moves will be eliminated afterwards. However, there are quite
814 some cases where combine will be unable to fold comparison related insns,
815 thus for now don't do it.
816 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
817 PASS_POS_INSERT_AFTER, "ce1", 1);
820 /* Run sh_treg_combine pass after combine but before register allocation. */
821 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
822 PASS_POS_INSERT_AFTER, "split1", 1);
824 /* Run sh_treg_combine pass after register allocation and basic block
825 reordering as this sometimes creates new opportunities. */
826 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
827 PASS_POS_INSERT_AFTER, "split4", 1);
829 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
830 is known after a conditional branch.
831 This must be done after basic blocks and branch conditions have
832 stabilized and won't be changed by further passes. */
833 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
834 PASS_POS_INSERT_BEFORE, "sched2", 1);
837 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
838 various options, and do some machine dependent initialization. */
839 static void
840 sh_option_override (void)
842 int regno;
844 SUBTARGET_OVERRIDE_OPTIONS;
845 if (optimize > 1 && !optimize_size)
846 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
848 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
849 TARGET_CBRANCHDI4 = 1;
850 TARGET_CMPEQDI_T = 0;
852 sh_cpu = PROCESSOR_SH1;
853 assembler_dialect = 0;
854 if (TARGET_SH2)
855 sh_cpu = PROCESSOR_SH2;
856 if (TARGET_SH2E)
857 sh_cpu = PROCESSOR_SH2E;
858 if (TARGET_SH2A)
859 sh_cpu = PROCESSOR_SH2A;
860 if (TARGET_SH3)
861 sh_cpu = PROCESSOR_SH3;
862 if (TARGET_SH3E)
863 sh_cpu = PROCESSOR_SH3E;
864 if (TARGET_SH4)
866 assembler_dialect = 1;
867 sh_cpu = PROCESSOR_SH4;
869 if (TARGET_SH4A)
871 assembler_dialect = 1;
872 sh_cpu = PROCESSOR_SH4A;
874 if (TARGET_SH5)
876 sh_cpu = PROCESSOR_SH5;
877 target_flags |= MASK_ALIGN_DOUBLE;
878 if (TARGET_SHMEDIA_FPU)
879 target_flags |= MASK_FMOVD;
880 if (TARGET_SHMEDIA)
882 /* There are no delay slots on SHmedia. */
883 flag_delayed_branch = 0;
884 /* Relaxation isn't yet supported for SHmedia */
885 target_flags &= ~MASK_RELAX;
886 /* After reload, if conversion does little good but can cause
887 ICEs:
888 - find_if_block doesn't do anything for SH because we don't
889 have conditional execution patterns. (We use conditional
890 move patterns, which are handled differently, and only
891 before reload).
892 - find_cond_trap doesn't do anything for the SH because we
893 don't have conditional traps.
894 - find_if_case_1 uses redirect_edge_and_branch_force in
895 the only path that does an optimization, and this causes
896 an ICE when branch targets are in registers.
897 - find_if_case_2 doesn't do anything for the SHmedia after
898 reload except when it can redirect a tablejump - and
899 that's rather rare. */
900 flag_if_conversion2 = 0;
901 if (! strcmp (sh_div_str, "call"))
902 sh_div_strategy = SH_DIV_CALL;
903 else if (! strcmp (sh_div_str, "call2"))
904 sh_div_strategy = SH_DIV_CALL2;
905 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
906 sh_div_strategy = SH_DIV_FP;
907 else if (! strcmp (sh_div_str, "inv"))
908 sh_div_strategy = SH_DIV_INV;
909 else if (! strcmp (sh_div_str, "inv:minlat"))
910 sh_div_strategy = SH_DIV_INV_MINLAT;
911 else if (! strcmp (sh_div_str, "inv20u"))
912 sh_div_strategy = SH_DIV_INV20U;
913 else if (! strcmp (sh_div_str, "inv20l"))
914 sh_div_strategy = SH_DIV_INV20L;
915 else if (! strcmp (sh_div_str, "inv:call2"))
916 sh_div_strategy = SH_DIV_INV_CALL2;
917 else if (! strcmp (sh_div_str, "inv:call"))
918 sh_div_strategy = SH_DIV_INV_CALL;
919 else if (! strcmp (sh_div_str, "inv:fp"))
921 if (TARGET_FPU_ANY)
922 sh_div_strategy = SH_DIV_INV_FP;
923 else
924 sh_div_strategy = SH_DIV_INV;
926 TARGET_CBRANCHDI4 = 0;
927 /* Assembler CFI isn't yet fully supported for SHmedia. */
928 flag_dwarf2_cfi_asm = 0;
931 else
933 /* Only the sh64-elf assembler fully supports .quad properly. */
934 targetm.asm_out.aligned_op.di = NULL;
935 targetm.asm_out.unaligned_op.di = NULL;
938 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
939 Disable it for everything else. */
940 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
941 TARGET_USERMODE = false;
943 if (TARGET_SH1)
945 if (! strcmp (sh_div_str, "call-div1"))
946 sh_div_strategy = SH_DIV_CALL_DIV1;
947 else if (! strcmp (sh_div_str, "call-fp")
948 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
949 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
950 sh_div_strategy = SH_DIV_CALL_FP;
951 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
952 sh_div_strategy = SH_DIV_CALL_TABLE;
953 else
954 /* Pick one that makes most sense for the target in general.
955 It is not much good to use different functions depending
956 on -Os, since then we'll end up with two different functions
957 when some of the code is compiled for size, and some for
958 speed. */
960 /* SH4 tends to emphasize speed. */
961 if (TARGET_HARD_SH4)
962 sh_div_strategy = SH_DIV_CALL_TABLE;
963 /* These have their own way of doing things. */
964 else if (TARGET_SH2A)
965 sh_div_strategy = SH_DIV_INTRINSIC;
966 /* ??? Should we use the integer SHmedia function instead? */
967 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
968 sh_div_strategy = SH_DIV_CALL_FP;
969 /* SH1 .. SH3 cores often go into small-footprint systems, so
970 default to the smallest implementation available. */
971 else
972 sh_div_strategy = SH_DIV_CALL_DIV1;
974 if (!TARGET_SH1)
975 TARGET_PRETEND_CMOVE = 0;
976 if (sh_divsi3_libfunc[0])
977 ; /* User supplied - leave it alone. */
978 else if (TARGET_DIVIDE_CALL_FP)
979 sh_divsi3_libfunc = "__sdivsi3_i4";
980 else if (TARGET_DIVIDE_CALL_TABLE)
981 sh_divsi3_libfunc = "__sdivsi3_i4i";
982 else if (TARGET_SH5)
983 sh_divsi3_libfunc = "__sdivsi3_1";
984 else
985 sh_divsi3_libfunc = "__sdivsi3";
987 if (sh_branch_cost == -1)
989 /* The SH1 does not have delay slots, hence we get a pipeline stall
990 at every branch. The SH4 is superscalar, so the single delay slot
991 is not sufficient to keep both pipelines filled.
992 In any case, set the default branch cost to '2', as it results in
993 slightly overall smaller code and also enables some if conversions
994 that are required for matching special T bit related insns. */
995 sh_branch_cost = 2;
998 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
999 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
1000 TARGET_ZDCBRANCH = 1;
1002 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1003 if (! VALID_REGISTER_P (regno))
1004 sh_register_names[regno][0] = '\0';
1006 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
1007 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
1008 sh_additional_register_names[regno][0] = '\0';
1010 if ((flag_pic && ! TARGET_PREFERGOT)
1011 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
1012 flag_no_function_cse = 1;
1014 if (targetm.small_register_classes_for_mode_p (VOIDmode))
1016 /* Never run scheduling before reload, since that can
1017 break global alloc, and generates slower code anyway due
1018 to the pressure on R0. */
1019 /* Enable sched1 for SH4 if the user explicitly requests.
1020 When sched1 is enabled, the ready queue will be reordered by
1021 the target hooks if pressure is high. We can not do this for
1022 PIC, SH3 and lower as they give spill failures for R0. */
1023 if (!TARGET_HARD_SH4 || flag_pic)
1024 flag_schedule_insns = 0;
1025 /* ??? Current exception handling places basic block boundaries
1026 after call_insns. It causes the high pressure on R0 and gives
1027 spill failures for R0 in reload. See PR 22553 and the thread
1028 on gcc-patches
1029 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
1030 else if (flag_exceptions)
1032 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
1033 warning (0, "ignoring -fschedule-insns because of exception "
1034 "handling bug");
1035 flag_schedule_insns = 0;
1037 else if (flag_schedule_insns
1038 && !global_options_set.x_flag_schedule_insns)
1039 flag_schedule_insns = 0;
1042 /* Unwind info is not correct around the CFG unless either a frame
1043 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1044 unwind info generation to be aware of the CFG and propagating states
1045 around edges. */
1046 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1047 || flag_exceptions || flag_non_call_exceptions)
1048 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1050 warning (0, "unwind tables currently require either a frame pointer "
1051 "or -maccumulate-outgoing-args for correctness");
1052 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1055 /* Adjust loop, jump and function alignment values (in bytes), if those
1056 were not specified by the user using -falign-loops, -falign-jumps
1057 and -falign-functions options.
1058 32 bit alignment is better for speed, because instructions can be
1059 fetched as a pair from a longword boundary. For size use 16 bit
1060 alignment to get more compact code.
1061 Aligning all jumps increases the code size, even if it might
1062 result in slightly faster code. Thus, it is set to the smallest
1063 alignment possible if not specified by the user. */
1064 if (align_loops == 0)
1066 if (TARGET_SH5)
1067 align_loops = 8;
1068 else
1069 align_loops = optimize_size ? 2 : 4;
1072 if (align_jumps == 0)
1074 if (TARGET_SHMEDIA)
1075 align_jumps = 1 << CACHE_LOG;
1076 else
1077 align_jumps = 2;
1079 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1080 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1082 if (align_functions == 0)
1084 if (TARGET_SHMEDIA)
1085 align_functions = optimize_size
1086 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1087 else
1088 align_functions = optimize_size ? 2 : 4;
1091 /* The linker relaxation code breaks when a function contains
1092 alignments that are larger than that at the start of a
1093 compilation unit. */
1094 if (TARGET_RELAX)
1096 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1098 /* Also take possible .long constants / mova tables into account. */
1099 if (min_align < 4)
1100 min_align = 4;
1101 if (align_functions < min_align)
1102 align_functions = min_align;
1105 if (flag_unsafe_math_optimizations)
1107 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1108 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1109 TARGET_FSCA = 1;
1111 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1112 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1113 TARGET_FSRRA = 1;
1116 /* Allow fsrra insn only if -funsafe-math-optimizations and
1117 -ffinite-math-only is enabled. */
1118 TARGET_FSRRA = TARGET_FSRRA
1119 && flag_unsafe_math_optimizations
1120 && flag_finite_math_only;
1122 /* If the -mieee option was not explicitly set by the user, turn it on
1123 unless -ffinite-math-only was specified. See also PR 33135. */
1124 if (! global_options_set.x_TARGET_IEEE)
1125 TARGET_IEEE = ! flag_finite_math_only;
1127 if (sh_fixed_range_str)
1128 sh_fix_range (sh_fixed_range_str);
1130 /* This target defaults to strict volatile bitfields. */
1131 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1132 flag_strict_volatile_bitfields = 1;
1134 /* Parse atomic model option and make sure it is valid for the current
1135 target CPU. */
1136 selected_atomic_model_
1137 = parse_validate_atomic_model_option (sh_atomic_model_str);
1139 register_sh_passes ();
1142 /* Print the operand address in x to the stream. */
1143 static void
1144 sh_print_operand_address (FILE *stream, rtx x)
1146 switch (GET_CODE (x))
1148 case REG:
1149 case SUBREG:
1150 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1151 break;
1153 case PLUS:
1155 rtx base = XEXP (x, 0);
1156 rtx index = XEXP (x, 1);
1158 switch (GET_CODE (index))
1160 case CONST_INT:
1161 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1162 reg_names[true_regnum (base)]);
1163 break;
1165 case REG:
1166 case SUBREG:
1168 int base_num = true_regnum (base);
1169 int index_num = true_regnum (index);
1171 fprintf (stream, "@(r0,%s)",
1172 reg_names[MAX (base_num, index_num)]);
1173 break;
1176 default:
1177 gcc_unreachable ();
1180 break;
1182 case PRE_DEC:
1183 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1184 break;
1186 case POST_INC:
1187 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1188 break;
1190 default:
1191 x = mark_constant_pool_use (x);
1192 output_addr_const (stream, x);
1193 break;
1197 /* Print operand x (an rtx) in assembler syntax to file stream
1198 according to modifier code.
1200 '.' print a .s if insn needs delay slot
1201 ',' print LOCAL_LABEL_PREFIX
1202 '@' print trap, rte or rts depending upon pragma interruptness
1203 '#' output a nop if there is nothing to put in the delay slot
1204 ''' print likelihood suffix (/u for unlikely).
1205 '>' print branch target if -fverbose-asm
1206 'O' print a constant without the #
1207 'R' print the LSW of a dp value - changes if in little endian
1208 'S' print the MSW of a dp value - changes if in little endian
1209 'T' print the next word of a dp value - same as 'R' in big endian mode.
1210 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1211 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1212 'N' print 'r63' if the operand is (const_int 0).
1213 'd' print a V2SF reg as dN instead of fpN.
1214 'm' print a pair `base,offset' or `base,index', for LD and ST.
1215 'U' Likewise for {LD,ST}{HI,LO}.
1216 'V' print the position of a single bit set.
1217 'W' print the position of a single bit cleared.
1218 't' print a memory address which is a register.
1219 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1220 'o' output an operator. */
1221 static void
1222 sh_print_operand (FILE *stream, rtx x, int code)
1224 int regno;
1225 machine_mode mode;
1227 switch (code)
1229 tree trapa_attr;
1231 case '.':
1232 if (final_sequence
1233 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1234 && get_attr_length (final_sequence->insn (1)))
1235 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1236 break;
1237 case ',':
1238 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1239 break;
1240 case '@':
1241 trapa_attr = lookup_attribute ("trap_exit",
1242 DECL_ATTRIBUTES (current_function_decl));
1243 if (trapa_attr)
1244 fprintf (stream, "trapa #%ld",
1245 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1246 else if (sh_cfun_interrupt_handler_p ())
1248 if (sh_cfun_resbank_handler_p ())
1249 fprintf (stream, "resbank\n");
1250 fprintf (stream, "rte");
1252 else
1253 fprintf (stream, "rts");
1254 break;
1255 case '#':
1256 /* Output a nop if there's nothing in the delay slot. */
1257 if (dbr_sequence_length () == 0)
1258 fprintf (stream, "\n\tnop");
1259 break;
1260 case '\'':
1262 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1264 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1265 fputs ("/u", stream);
1266 break;
1268 case '>':
1269 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1271 fputs ("\t! target: ", stream);
1272 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1274 break;
1275 case 'O':
1276 x = mark_constant_pool_use (x);
1277 output_addr_const (stream, x);
1278 break;
1279 /* N.B.: %R / %S / %T adjust memory addresses by four.
1280 For SHMEDIA, that means they can be used to access the first and
1281 second 32 bit part of a 64 bit (or larger) value that
1282 might be held in floating point registers or memory.
1283 While they can be used to access 64 bit parts of a larger value
1284 held in general purpose registers, that won't work with memory -
1285 neither for fp registers, since the frxx names are used. */
1286 case 'R':
1287 if (REG_P (x) || GET_CODE (x) == SUBREG)
1289 regno = true_regnum (x);
1290 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1291 fputs (reg_names[regno], (stream));
1293 else if (MEM_P (x))
1295 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1296 sh_print_operand_address (stream, XEXP (x, 0));
1298 else
1300 rtx sub = NULL_RTX;
1302 mode = GET_MODE (x);
1303 if (mode == VOIDmode)
1304 mode = DImode;
1305 if (GET_MODE_SIZE (mode) >= 8)
1306 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1307 if (sub)
1308 sh_print_operand (stream, sub, 0);
1309 else
1310 output_operand_lossage ("invalid operand to %%R");
1312 break;
1313 case 'S':
1314 if (REG_P (x) || GET_CODE (x) == SUBREG)
1316 regno = true_regnum (x);
1317 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1318 fputs (reg_names[regno], (stream));
1320 else if (MEM_P (x))
1322 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1323 sh_print_operand_address (stream, XEXP (x, 0));
1325 else
1327 rtx sub = NULL_RTX;
1329 mode = GET_MODE (x);
1330 if (mode == VOIDmode)
1331 mode = DImode;
1332 if (GET_MODE_SIZE (mode) >= 8)
1333 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1334 if (sub)
1335 sh_print_operand (stream, sub, 0);
1336 else
1337 output_operand_lossage ("invalid operand to %%S");
1339 break;
1340 case 'T':
1341 /* Next word of a double. */
1342 switch (GET_CODE (x))
1344 case REG:
1345 fputs (reg_names[REGNO (x) + 1], (stream));
1346 break;
1347 case MEM:
1348 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1349 && GET_CODE (XEXP (x, 0)) != POST_INC)
1350 x = adjust_address (x, SImode, 4);
1351 sh_print_operand_address (stream, XEXP (x, 0));
1352 break;
1353 default:
1354 break;
1356 break;
1358 case 't':
1359 gcc_assert (MEM_P (x));
1360 x = XEXP (x, 0);
1361 switch (GET_CODE (x))
1363 case REG:
1364 case SUBREG:
1365 sh_print_operand (stream, x, 0);
1366 break;
1367 default:
1368 break;
1370 break;
1372 case 'o':
1373 switch (GET_CODE (x))
1375 case PLUS: fputs ("add", stream); break;
1376 case MINUS: fputs ("sub", stream); break;
1377 case MULT: fputs ("mul", stream); break;
1378 case DIV: fputs ("div", stream); break;
1379 case EQ: fputs ("eq", stream); break;
1380 case NE: fputs ("ne", stream); break;
1381 case GT: case LT: fputs ("gt", stream); break;
1382 case GE: case LE: fputs ("ge", stream); break;
1383 case GTU: case LTU: fputs ("gtu", stream); break;
1384 case GEU: case LEU: fputs ("geu", stream); break;
1385 default:
1386 break;
1388 break;
1389 case 'M':
1390 if (TARGET_SHMEDIA)
1392 if (MEM_P (x)
1393 && GET_CODE (XEXP (x, 0)) == PLUS
1394 && (REG_P (XEXP (XEXP (x, 0), 1))
1395 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1396 fputc ('x', stream);
1398 else
1400 if (MEM_P (x))
1402 switch (GET_MODE (x))
1404 case QImode: fputs (".b", stream); break;
1405 case HImode: fputs (".w", stream); break;
1406 case SImode: fputs (".l", stream); break;
1407 case SFmode: fputs (".s", stream); break;
1408 case DFmode: fputs (".d", stream); break;
1409 default: gcc_unreachable ();
1413 break;
1415 case 'm':
1416 gcc_assert (MEM_P (x));
1417 x = XEXP (x, 0);
1418 /* Fall through. */
1419 case 'U':
1420 switch (GET_CODE (x))
1422 case REG:
1423 case SUBREG:
1424 sh_print_operand (stream, x, 0);
1425 fputs (", 0", stream);
1426 break;
1428 case PLUS:
1429 sh_print_operand (stream, XEXP (x, 0), 0);
1430 fputs (", ", stream);
1431 sh_print_operand (stream, XEXP (x, 1), 0);
1432 break;
1434 default:
1435 gcc_unreachable ();
1437 break;
1439 case 'V':
1441 int num = exact_log2 (INTVAL (x));
1442 gcc_assert (num >= 0);
1443 fprintf (stream, "#%d", num);
1445 break;
1447 case 'W':
1449 int num = exact_log2 (~INTVAL (x));
1450 gcc_assert (num >= 0);
1451 fprintf (stream, "#%d", num);
1453 break;
1455 case 'd':
1456 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1458 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1459 break;
1461 case 'N':
1462 if (x == CONST0_RTX (GET_MODE (x)))
1464 fprintf ((stream), "r63");
1465 break;
1467 goto default_output;
1468 case 'u':
1469 if (CONST_INT_P (x))
1471 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1472 break;
1474 /* Fall through. */
1476 default_output:
1477 default:
1478 regno = 0;
1479 mode = GET_MODE (x);
1481 switch (GET_CODE (x))
1483 case TRUNCATE:
1485 rtx inner = XEXP (x, 0);
1486 int offset = 0;
1487 machine_mode inner_mode;
1489 /* We might see SUBREGs with vector mode registers inside. */
1490 if (GET_CODE (inner) == SUBREG
1491 && (GET_MODE_SIZE (GET_MODE (inner))
1492 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1493 && subreg_lowpart_p (inner))
1494 inner = SUBREG_REG (inner);
1495 if (CONST_INT_P (inner))
1497 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1498 goto default_output;
1500 inner_mode = GET_MODE (inner);
1501 if (GET_CODE (inner) == SUBREG
1502 && (GET_MODE_SIZE (GET_MODE (inner))
1503 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1504 && REG_P (SUBREG_REG (inner)))
1506 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1507 GET_MODE (SUBREG_REG (inner)),
1508 SUBREG_BYTE (inner),
1509 GET_MODE (inner));
1510 inner = SUBREG_REG (inner);
1512 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1513 abort ();
1514 /* Floating point register pairs are always big endian;
1515 general purpose registers are 64 bit wide. */
1516 regno = REGNO (inner);
1517 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1518 - HARD_REGNO_NREGS (regno, mode))
1519 + offset;
1520 x = inner;
1521 goto reg;
1523 case SIGN_EXTEND:
1524 x = XEXP (x, 0);
1525 goto reg;
1526 /* FIXME: We need this on SHmedia32 because reload generates
1527 some sign-extended HI or QI loads into DImode registers
1528 but, because Pmode is SImode, the address ends up with a
1529 subreg:SI of the DImode register. Maybe reload should be
1530 fixed so as to apply alter_subreg to such loads? */
1531 case IF_THEN_ELSE:
1532 gcc_assert (trapping_target_operand (x, VOIDmode));
1533 x = XEXP (XEXP (x, 2), 0);
1534 goto default_output;
1535 case SUBREG:
1536 gcc_assert (SUBREG_BYTE (x) == 0
1537 && REG_P (SUBREG_REG (x)));
1539 x = SUBREG_REG (x);
1540 /* Fall through. */
1542 reg:
1543 case REG:
1544 regno += REGNO (x);
1545 if (FP_REGISTER_P (regno)
1546 && mode == V16SFmode)
1547 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1548 else if (FP_REGISTER_P (REGNO (x))
1549 && mode == V4SFmode)
1550 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1551 else if (REG_P (x)
1552 && mode == V2SFmode)
1553 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1554 else if (FP_REGISTER_P (REGNO (x))
1555 && GET_MODE_SIZE (mode) > 4)
1556 fprintf ((stream), "d%s", reg_names[regno] + 1);
1557 else
1558 fputs (reg_names[regno], (stream));
1559 break;
1561 case MEM:
1562 output_address (XEXP (x, 0));
1563 break;
1565 default:
1566 if (TARGET_SH1)
1567 fputc ('#', stream);
1568 output_addr_const (stream, x);
1569 break;
1571 break;
1575 static bool
1576 sh_print_operand_punct_valid_p (unsigned char code)
1578 return (code == '.' || code == '#' || code == '@' || code == ','
1579 || code == '$' || code == '\'' || code == '>');
1582 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1583 static bool
1584 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1586 if (GET_CODE (x) == UNSPEC)
1588 switch (XINT (x, 1))
1590 case UNSPEC_DATALABEL:
1591 fputs ("datalabel ", file);
1592 output_addr_const (file, XVECEXP (x, 0, 0));
1593 break;
1594 case UNSPEC_PIC:
1595 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1596 output_addr_const (file, XVECEXP (x, 0, 0));
1597 break;
1598 case UNSPEC_GOT:
1599 output_addr_const (file, XVECEXP (x, 0, 0));
1600 fputs ("@GOT", file);
1601 break;
1602 case UNSPEC_GOTOFF:
1603 output_addr_const (file, XVECEXP (x, 0, 0));
1604 fputs ("@GOTOFF", file);
1605 break;
1606 case UNSPEC_PLT:
1607 output_addr_const (file, XVECEXP (x, 0, 0));
1608 fputs ("@PLT", file);
1609 break;
1610 case UNSPEC_GOTPLT:
1611 output_addr_const (file, XVECEXP (x, 0, 0));
1612 fputs ("@GOTPLT", file);
1613 break;
1614 case UNSPEC_DTPOFF:
1615 output_addr_const (file, XVECEXP (x, 0, 0));
1616 fputs ("@DTPOFF", file);
1617 break;
1618 case UNSPEC_GOTTPOFF:
1619 output_addr_const (file, XVECEXP (x, 0, 0));
1620 fputs ("@GOTTPOFF", file);
1621 break;
1622 case UNSPEC_TPOFF:
1623 output_addr_const (file, XVECEXP (x, 0, 0));
1624 fputs ("@TPOFF", file);
1625 break;
1626 case UNSPEC_CALLER:
1628 char name[32];
1629 /* LPCS stands for Label for PIC Call Site. */
1630 targetm.asm_out.generate_internal_label (name, "LPCS",
1631 INTVAL (XVECEXP (x, 0, 0)));
1632 assemble_name (file, name);
1634 break;
1635 case UNSPEC_EXTRACT_S16:
1636 case UNSPEC_EXTRACT_U16:
1638 rtx val, shift;
1640 val = XVECEXP (x, 0, 0);
1641 shift = XVECEXP (x, 0, 1);
1642 fputc ('(', file);
1643 if (shift != const0_rtx)
1644 fputc ('(', file);
1645 if (GET_CODE (val) == CONST
1646 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1648 fputc ('(', file);
1649 output_addr_const (file, val);
1650 fputc (')', file);
1652 else
1653 output_addr_const (file, val);
1654 if (shift != const0_rtx)
1656 fputs (" >> ", file);
1657 output_addr_const (file, shift);
1658 fputc (')', file);
1660 fputs (" & 65535)", file);
1662 break;
1663 case UNSPEC_SYMOFF:
1664 output_addr_const (file, XVECEXP (x, 0, 0));
1665 fputc ('-', file);
1666 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1668 fputc ('(', file);
1669 output_addr_const (file, XVECEXP (x, 0, 1));
1670 fputc (')', file);
1672 else
1673 output_addr_const (file, XVECEXP (x, 0, 1));
1674 break;
1675 case UNSPEC_PCREL_SYMOFF:
1676 output_addr_const (file, XVECEXP (x, 0, 0));
1677 fputs ("-(", file);
1678 output_addr_const (file, XVECEXP (x, 0, 1));
1679 fputs ("-.)", file);
1680 break;
1681 default:
1682 return false;
1684 return true;
1686 else
1687 return false;
1690 /* Encode symbol attributes of a SYMBOL_REF into its
1691 SYMBOL_REF_FLAGS. */
1692 static void
1693 sh_encode_section_info (tree decl, rtx rtl, int first)
1695 default_encode_section_info (decl, rtl, first);
1697 if (TREE_CODE (decl) == FUNCTION_DECL
1698 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1699 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1702 /* Prepare operands for a move define_expand; specifically, one of the
1703 operands must be in a register. */
1704 void
1705 prepare_move_operands (rtx operands[], machine_mode mode)
1707 if ((mode == SImode || mode == DImode)
1708 && flag_pic
1709 && ! ((mode == Pmode || mode == ptr_mode)
1710 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1712 rtx temp;
1713 if (SYMBOLIC_CONST_P (operands[1]))
1715 if (MEM_P (operands[0]))
1716 operands[1] = force_reg (Pmode, operands[1]);
1717 else if (TARGET_SHMEDIA
1718 && GET_CODE (operands[1]) == LABEL_REF
1719 && target_reg_operand (operands[0], mode))
1720 /* It's ok. */;
1721 else
1723 temp = (!can_create_pseudo_p ()
1724 ? operands[0]
1725 : gen_reg_rtx (Pmode));
1726 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1729 else if (GET_CODE (operands[1]) == CONST
1730 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1731 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1733 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1734 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1735 mode, temp);
1736 operands[1] = expand_binop (mode, add_optab, temp,
1737 XEXP (XEXP (operands[1], 0), 1),
1738 (!can_create_pseudo_p ()
1739 ? temp
1740 : gen_reg_rtx (Pmode)),
1741 0, OPTAB_LIB_WIDEN);
1745 if (! reload_in_progress && ! reload_completed)
1747 /* Copy the source to a register if both operands aren't registers. */
1748 if (! register_operand (operands[0], mode)
1749 && ! sh_register_operand (operands[1], mode))
1750 operands[1] = copy_to_mode_reg (mode, operands[1]);
1752 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1754 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1755 except that we can't use that function because it is static. */
1756 rtx new_rtx = change_address (operands[0], mode, 0);
1757 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1758 operands[0] = new_rtx;
1761 /* This case can happen while generating code to move the result
1762 of a library call to the target. Reject `st r0,@(rX,rY)' because
1763 reload will fail to find a spill register for rX, since r0 is already
1764 being used for the source. */
1765 else if (TARGET_SH1
1766 && refers_to_regno_p (R0_REG, operands[1])
1767 && MEM_P (operands[0])
1768 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1769 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1770 operands[1] = copy_to_mode_reg (mode, operands[1]);
1772 /* When the displacement addressing is used, RA will assign r0 to
1773 the pseudo register operand for the QI/HImode load/store.
1774 This tends to make a long live range for R0 and might cause
1775 anomalous register spills in some case with LRA. See PR
1776 target/55212.
1777 We split possible load/store to two move insns via r0 so as to
1778 shorten R0 live range. It will make some codes worse but will
1779 win on average for LRA.
1780 Also when base+index addressing is used and the index term is
1781 a subreg, LRA assumes that more hard registers can be available
1782 in some situation. It isn't the case for SH in the problematic
1783 case. We can pre-allocate R0 for that index term to avoid
1784 the issue. See PR target/66591. */
1785 else if (sh_lra_p ()
1786 && TARGET_SH1 && ! TARGET_SH2A
1787 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1788 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1790 bool load_p = REG_P (operands[0]);
1791 rtx reg = operands[load_p ? 0 : 1];
1792 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1794 if ((mode == QImode || mode == HImode)
1795 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1796 && GET_CODE (adr) == PLUS
1797 && REG_P (XEXP (adr, 0))
1798 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1799 && CONST_INT_P (XEXP (adr, 1))
1800 && INTVAL (XEXP (adr, 1)) != 0
1801 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1803 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1804 emit_move_insn (r0_rtx, operands[1]);
1805 operands[1] = r0_rtx;
1807 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1808 && GET_CODE (adr) == PLUS
1809 && REG_P (XEXP (adr, 0))
1810 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1811 && SUBREG_P (XEXP (adr, 1))
1812 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1814 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1815 emit_move_insn (r0_rtx, XEXP (adr, 1));
1816 XEXP (adr, 1) = r0_rtx;
1821 if (mode == Pmode || mode == ptr_mode)
1823 rtx op0, op1, opc;
1824 enum tls_model tls_kind;
1826 op0 = operands[0];
1827 op1 = operands[1];
1828 if (GET_CODE (op1) == CONST
1829 && GET_CODE (XEXP (op1, 0)) == PLUS
1830 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1831 != TLS_MODEL_NONE))
1833 opc = XEXP (XEXP (op1, 0), 1);
1834 op1 = XEXP (XEXP (op1, 0), 0);
1836 else
1837 opc = NULL_RTX;
1839 if (! reload_in_progress && ! reload_completed
1840 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1842 rtx tga_op1, tga_ret, tmp, tmp2;
1844 if (! flag_pic
1845 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1846 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1847 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1849 static int got_labelno;
1850 /* Don't schedule insns for getting GOT address when
1851 the first scheduling is enabled, to avoid spill
1852 failures for R0. */
1853 if (flag_schedule_insns)
1854 emit_insn (gen_blockage ());
1855 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1856 emit_use (gen_rtx_REG (SImode, PIC_REG));
1857 if (flag_schedule_insns)
1858 emit_insn (gen_blockage ());
1861 switch (tls_kind)
1863 case TLS_MODEL_GLOBAL_DYNAMIC:
1864 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1865 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1866 tmp = gen_reg_rtx (Pmode);
1867 emit_move_insn (tmp, tga_ret);
1868 op1 = tmp;
1869 break;
1871 case TLS_MODEL_LOCAL_DYNAMIC:
1872 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1873 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1875 tmp = gen_reg_rtx (Pmode);
1876 emit_move_insn (tmp, tga_ret);
1878 if (register_operand (op0, Pmode))
1879 tmp2 = op0;
1880 else
1881 tmp2 = gen_reg_rtx (Pmode);
1883 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1884 op1 = tmp2;
1885 break;
1887 case TLS_MODEL_INITIAL_EXEC:
1888 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1889 tmp = gen_sym2GOTTPOFF (op1);
1890 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1891 op1 = tga_op1;
1892 break;
1894 case TLS_MODEL_LOCAL_EXEC:
1895 tmp2 = gen_reg_rtx (Pmode);
1896 emit_insn (gen_store_gbr (tmp2));
1897 tmp = gen_reg_rtx (Pmode);
1898 emit_insn (gen_symTPOFF2reg (tmp, op1));
1900 if (register_operand (op0, Pmode))
1901 op1 = op0;
1902 else
1903 op1 = gen_reg_rtx (Pmode);
1905 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1906 break;
1908 default:
1909 gcc_unreachable ();
1911 if (opc)
1912 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1913 operands[1] = op1;
1918 /* Implement the canonicalize_comparison target hook for the combine
1919 pass. For the target hook this function is invoked via
1920 sh_canonicalize_comparison. This function is also re-used to
1921 canonicalize comparisons in cbranch pattern expanders. */
1922 static void
1923 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1924 machine_mode mode,
1925 bool op0_preserve_value)
1927 /* When invoked from within the combine pass the mode is not specified,
1928 so try to get it from one of the operands. */
1929 if (mode == VOIDmode)
1930 mode = GET_MODE (op0);
1931 if (mode == VOIDmode)
1932 mode = GET_MODE (op1);
1934 // We need to have a mode to do something useful here.
1935 if (mode == VOIDmode)
1936 return;
1938 // Currently, we don't deal with floats here.
1939 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1940 return;
1942 // Make sure that the constant operand is the second operand.
1943 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1945 if (op0_preserve_value)
1946 return;
1948 std::swap (op0, op1);
1949 cmp = swap_condition (cmp);
1952 if (CONST_INT_P (op1))
1954 /* Try to adjust the constant operand in such a way that available
1955 comparison insns can be utilized better and the constant can be
1956 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1957 constant pool. */
1958 const HOST_WIDE_INT val = INTVAL (op1);
1960 /* x > -1 --> x >= 0
1961 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1962 x <= -1 --> x < 0
1963 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1964 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1966 cmp = cmp == GT ? GE : LT;
1967 op1 = gen_int_mode (val + 1, mode);
1970 /* x >= 1 --> x > 0
1971 x >= 0x80 --> x > 0x7F
1972 x < 1 --> x <= 0
1973 x < 0x80 --> x <= 0x7F */
1974 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1976 cmp = cmp == GE ? GT : LE;
1977 op1 = gen_int_mode (val - 1, mode);
1980 /* unsigned x >= 1 --> x != 0
1981 unsigned x < 1 --> x == 0 */
1982 else if (val == 1 && (cmp == GEU || cmp == LTU))
1984 cmp = cmp == GEU ? NE : EQ;
1985 op1 = CONST0_RTX (mode);
1988 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1989 unsigned x < 0x80 --> unsigned x < 0x7F */
1990 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1992 cmp = cmp == GEU ? GTU : LEU;
1993 op1 = gen_int_mode (val - 1, mode);
1996 /* unsigned x > 0 --> x != 0
1997 unsigned x <= 0 --> x == 0 */
1998 else if (val == 0 && (cmp == GTU || cmp == LEU))
1999 cmp = cmp == GTU ? NE : EQ;
2001 /* unsigned x > 0x7FFFFFFF --> signed x < 0
2002 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
2003 else if (mode == SImode && (cmp == GTU || cmp == LEU)
2004 && val == 0x7FFFFFFF)
2006 cmp = cmp == GTU ? LT : GE;
2007 op1 = const0_rtx;
2010 /* unsigned x >= 0x80000000 --> signed x < 0
2011 unsigned x < 0x80000000 --> signed x >= 0 */
2012 else if (mode == SImode && (cmp == GEU || cmp == LTU)
2013 && (unsigned HOST_WIDE_INT)val
2014 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
2016 cmp = cmp == GEU ? LT : GE;
2017 op1 = const0_rtx;
2022 /* This function implements the canonicalize_comparison target hook.
2023 This wrapper around the internally used sh_canonicalize_comparison
2024 function is needed to do the enum rtx_code <-> int conversion.
2025 Target hooks cannot use enum rtx_code in its definition. */
2026 static void
2027 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
2028 bool op0_preserve_value)
2030 enum rtx_code tmp_code = (enum rtx_code)*code;
2031 sh_canonicalize_comparison (tmp_code, *op0, *op1,
2032 VOIDmode, op0_preserve_value);
2033 *code = (int)tmp_code;
2036 /* This function implements the legitimate_combined_insn target hook,
2037 which the combine pass uses to early reject combined insns, before
2038 it tries to recog the insn and determine its cost. */
2039 static bool
2040 sh_legitimate_combined_insn (rtx_insn* insn)
2042 /* Reject combinations of memory loads and zero extensions, as these
2043 interfere with other combine patterns such as zero extracts and bit
2044 tests. The SH2A movu.{b|w} insns are formed later in the
2045 'sh_optimize_extu_exts' pass after combine/split1. */
2046 rtx p = PATTERN (insn);
2047 if (GET_CODE (p) == SET
2048 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
2049 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
2050 && MEM_P (XEXP (XEXP (p, 1), 0)))
2051 return false;
2053 return true;
2056 bool
2057 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
2059 *p1 = T_REG;
2060 *p2 = INVALID_REGNUM;
2061 return true;
2064 enum rtx_code
2065 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2066 enum rtx_code comparison)
2068 /* The scratch reg is only available when this is invoked from within
2069 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2070 rtx scratch = NULL_RTX;
2072 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2073 comparison = GET_CODE (operands[0]);
2074 else
2075 scratch = operands[4];
2077 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2078 mode, false);
2080 /* Notice that this function is also invoked after reload by
2081 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2082 rtx op1 = operands[1];
2084 if (can_create_pseudo_p ())
2085 operands[1] = force_reg (mode, op1);
2086 /* When we are handling DImode comparisons, we want to keep constants so
2087 that we can optimize the component comparisons; however, memory loads
2088 are better issued as a whole so that they can be scheduled well.
2089 SImode equality comparisons allow I08 constants, but only when they
2090 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2091 into a register, that register might as well be r0, and we allow the
2092 constant. If it is already in a register, this is likely to be
2093 allocated to a different hard register, thus we load the constant into
2094 a register unless it is zero. */
2095 if (!REG_P (operands[2])
2096 && (!CONST_INT_P (operands[2])
2097 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2098 && ((comparison != EQ && comparison != NE)
2099 || (REG_P (op1) && REGNO (op1) != R0_REG)
2100 || !satisfies_constraint_I08 (operands[2])))))
2102 if (scratch && GET_MODE (scratch) == mode)
2104 emit_move_insn (scratch, operands[2]);
2105 operands[2] = scratch;
2107 else if (can_create_pseudo_p ())
2108 operands[2] = force_reg (mode, operands[2]);
2110 return comparison;
2113 void
2114 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2116 rtx (*branch_expander) (rtx) = gen_branch_true;
2117 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2118 switch (comparison)
2120 case NE: case LT: case LE: case LTU: case LEU:
2121 comparison = reverse_condition (comparison);
2122 branch_expander = gen_branch_false;
2123 default: ;
2125 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2126 gen_rtx_fmt_ee (comparison, SImode,
2127 operands[1], operands[2])));
2128 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2129 if (probability >= 0)
2130 add_int_reg_note (jump, REG_BR_PROB, probability);
2133 /* ??? How should we distribute probabilities when more than one branch
2134 is generated. So far we only have some ad-hoc observations:
2135 - If the operands are random, they are likely to differ in both parts.
2136 - If comparing items in a hash chain, the operands are random or equal;
2137 operation should be EQ or NE.
2138 - If items are searched in an ordered tree from the root, we can expect
2139 the highpart to be unequal about half of the time; operation should be
2140 an inequality comparison, operands non-constant, and overall probability
2141 about 50%. Likewise for quicksort.
2142 - Range checks will be often made against constants. Even if we assume for
2143 simplicity an even distribution of the non-constant operand over a
2144 sub-range here, the same probability could be generated with differently
2145 wide sub-ranges - as long as the ratio of the part of the subrange that
2146 is before the threshold to the part that comes after the threshold stays
2147 the same. Thus, we can't really tell anything here;
2148 assuming random distribution is at least simple.
2150 bool
2151 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2153 enum rtx_code msw_taken, msw_skip, lsw_taken;
2154 rtx_code_label *skip_label = NULL;
2155 rtx op1h, op1l, op2h, op2l;
2156 int num_branches;
2157 int prob, rev_prob;
2158 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2159 rtx scratch = operands[4];
2161 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2162 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2163 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2164 op1l = gen_lowpart (SImode, operands[1]);
2165 op2l = gen_lowpart (SImode, operands[2]);
2166 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2167 prob = split_branch_probability;
2168 rev_prob = REG_BR_PROB_BASE - prob;
2169 switch (comparison)
2171 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2172 That costs 1 cycle more when the first branch can be predicted taken,
2173 but saves us mispredicts because only one branch needs prediction.
2174 It also enables generating the cmpeqdi_t-1 pattern. */
2175 case EQ:
2176 if (TARGET_CMPEQDI_T)
2178 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2179 emit_jump_insn (gen_branch_true (operands[3]));
2180 return true;
2182 msw_skip = NE;
2183 lsw_taken = EQ;
2184 if (prob >= 0)
2186 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2187 msw_skip_prob = rev_prob;
2188 if (REG_BR_PROB_BASE <= 65535)
2189 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2190 else
2192 lsw_taken_prob
2193 = (prob
2194 ? (REG_BR_PROB_BASE
2195 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2196 / ((gcov_type) prob << 32)))
2197 : 0);
2200 break;
2201 case NE:
2202 if (TARGET_CMPEQDI_T)
2204 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2205 emit_jump_insn (gen_branch_false (operands[3]));
2206 return true;
2208 msw_taken = NE;
2209 msw_taken_prob = prob;
2210 lsw_taken = NE;
2211 lsw_taken_prob = 0;
2212 break;
2213 case GTU: case GT:
2214 msw_taken = comparison;
2215 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2216 break;
2217 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2218 msw_skip = swap_condition (msw_taken);
2219 lsw_taken = GTU;
2220 break;
2221 case GEU: case GE:
2222 if (op2l == CONST0_RTX (SImode))
2223 msw_taken = comparison;
2224 else
2226 msw_taken = comparison == GE ? GT : GTU;
2227 msw_skip = swap_condition (msw_taken);
2228 lsw_taken = GEU;
2230 break;
2231 case LTU: case LT:
2232 msw_taken = comparison;
2233 if (op2l == CONST0_RTX (SImode))
2234 break;
2235 msw_skip = swap_condition (msw_taken);
2236 lsw_taken = LTU;
2237 break;
2238 case LEU: case LE:
2239 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2240 msw_taken = comparison;
2241 else
2243 lsw_taken = LEU;
2244 if (comparison == LE)
2245 msw_taken = LT;
2246 else if (op2h != CONST0_RTX (SImode))
2247 msw_taken = LTU;
2248 else
2250 msw_skip = swap_condition (LTU);
2251 break;
2253 msw_skip = swap_condition (msw_taken);
2255 break;
2256 default: return false;
2258 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2259 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2260 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2261 if (comparison != EQ && comparison != NE && num_branches > 1)
2263 if (!CONSTANT_P (operands[2])
2264 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2265 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2267 msw_taken_prob = prob / 2U;
2268 msw_skip_prob
2269 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2270 lsw_taken_prob = prob;
2272 else
2274 msw_taken_prob = prob;
2275 msw_skip_prob = REG_BR_PROB_BASE;
2276 /* ??? If we have a constant op2h, should we use that when
2277 calculating lsw_taken_prob? */
2278 lsw_taken_prob = prob;
2281 operands[1] = op1h;
2282 operands[2] = op2h;
2283 operands[4] = NULL_RTX;
2284 if (reload_completed
2285 && ! arith_reg_or_0_operand (op2h, SImode)
2286 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2287 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2288 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2290 emit_move_insn (scratch, operands[2]);
2291 operands[2] = scratch;
2293 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2294 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2295 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2297 rtx taken_label = operands[3];
2299 /* Operands were possibly modified, but msw_skip doesn't expect this.
2300 Always use the original ones. */
2301 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2303 operands[1] = op1h;
2304 operands[2] = op2h;
2305 if (reload_completed
2306 && ! arith_reg_or_0_operand (op2h, SImode)
2307 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2309 emit_move_insn (scratch, operands[2]);
2310 operands[2] = scratch;
2314 operands[3] = skip_label = gen_label_rtx ();
2315 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2316 operands[3] = taken_label;
2318 operands[1] = op1l;
2319 operands[2] = op2l;
2320 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2322 if (reload_completed
2323 && ! arith_reg_or_0_operand (op2l, SImode)
2324 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2326 emit_move_insn (scratch, operands[2]);
2327 operands[2] = scratch;
2329 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2331 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2332 emit_label (skip_label);
2333 return true;
2336 /* Given an operand, return 1 if the evaluated operand plugged into an
2337 if_then_else will result in a branch_true, 0 if branch_false, or
2338 -1 if neither nor applies. The truth table goes like this:
2340 op | cmpval | code | result
2341 ---------+--------+---------+--------------------
2342 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2343 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2344 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2345 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2346 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2347 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2348 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2349 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2351 sh_eval_treg_value (rtx op)
2353 if (t_reg_operand (op, GET_MODE (op)))
2354 return 1;
2355 if (negt_reg_operand (op, GET_MODE (op)))
2356 return 0;
2358 rtx_code code = GET_CODE (op);
2359 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2360 return -1;
2362 int cmpop = code == EQ ? 1 : 0;
2363 int cmpval = INTVAL (XEXP (op, 1));
2364 if (cmpval != 0 && cmpval != 1)
2365 return -1;
2367 int t;
2368 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2369 t = 0;
2370 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2371 t = 1;
2372 else
2373 return -1;
2375 return t ^ (cmpval == cmpop);
2378 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2379 of floating-point comparisons. */
2380 static void
2381 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2383 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2384 && GET_CODE (insn) != PARALLEL)
2386 insn = gen_rtx_PARALLEL (VOIDmode,
2387 gen_rtvec (3, insn,
2388 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2389 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2391 emit_insn (insn);
2394 /* Prepare the operands for an scc instruction; make sure that the
2395 compare has been done and the result is in T_REG. */
2396 void
2397 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2399 rtx t_reg = get_t_reg_rtx ();
2400 enum rtx_code oldcode = code;
2401 machine_mode mode;
2403 /* First need a compare insn. */
2404 switch (code)
2406 case NE:
2407 /* It isn't possible to handle this case. */
2408 gcc_unreachable ();
2409 case LT:
2410 code = GT;
2411 break;
2412 case LE:
2413 code = GE;
2414 break;
2415 case LTU:
2416 code = GTU;
2417 break;
2418 case LEU:
2419 code = GEU;
2420 break;
2421 default:
2422 break;
2424 if (code != oldcode)
2425 std::swap (op0, op1);
2427 mode = GET_MODE (op0);
2428 if (mode == VOIDmode)
2429 mode = GET_MODE (op1);
2431 op0 = force_reg (mode, op0);
2432 if ((code != EQ && code != NE
2433 && (op1 != const0_rtx
2434 || code == GTU || code == GEU || code == LTU || code == LEU))
2435 || (mode == DImode && op1 != const0_rtx)
2436 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2437 op1 = force_reg (mode, op1);
2439 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2440 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2441 mode);
2445 sh_emit_cheap_store_flag (machine_mode mode, enum rtx_code code,
2446 rtx op0, rtx op1)
2448 rtx target = gen_reg_rtx (SImode);
2449 rtx tmp;
2451 gcc_assert (TARGET_SHMEDIA);
2452 switch (code)
2454 case EQ:
2455 case GT:
2456 case LT:
2457 case UNORDERED:
2458 case GTU:
2459 case LTU:
2460 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2461 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2462 code = NE;
2463 break;
2465 case NE:
2466 case GE:
2467 case LE:
2468 case ORDERED:
2469 case GEU:
2470 case LEU:
2471 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2472 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2473 code = EQ;
2474 break;
2476 case UNEQ:
2477 case UNGE:
2478 case UNGT:
2479 case UNLE:
2480 case UNLT:
2481 case LTGT:
2482 return NULL_RTX;
2484 default:
2485 gcc_unreachable ();
2488 if (mode == DImode)
2490 rtx t2 = gen_reg_rtx (DImode);
2491 emit_insn (gen_extendsidi2 (t2, target));
2492 target = t2;
2495 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2498 /* Called from the md file, set up the operands of a compare instruction. */
2499 void
2500 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2502 enum rtx_code code = GET_CODE (operands[0]);
2503 enum rtx_code branch_code;
2504 rtx op0 = operands[1];
2505 rtx op1 = operands[2];
2506 rtx insn;
2507 bool need_ccmpeq = false;
2509 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2511 op0 = force_reg (mode, op0);
2512 op1 = force_reg (mode, op1);
2514 else
2516 if (code != EQ || mode == DImode)
2518 /* Force args into regs, since we can't use constants here. */
2519 op0 = force_reg (mode, op0);
2520 if (op1 != const0_rtx || code == GTU || code == GEU)
2521 op1 = force_reg (mode, op1);
2525 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2527 if (code == LT
2528 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2529 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2531 std::swap (op0, op1);
2532 code = swap_condition (code);
2535 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2536 if (code == GE)
2538 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2539 need_ccmpeq = true;
2540 code = GT;
2543 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2544 to EQ/GT respectively. */
2545 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2548 switch (code)
2550 case EQ:
2551 case GT:
2552 case GE:
2553 case GTU:
2554 case GEU:
2555 branch_code = code;
2556 break;
2557 case NE:
2558 case LT:
2559 case LE:
2560 case LTU:
2561 case LEU:
2562 branch_code = reverse_condition (code);
2563 break;
2564 default:
2565 gcc_unreachable ();
2568 insn = gen_rtx_SET (get_t_reg_rtx (),
2569 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2571 sh_emit_set_t_insn (insn, mode);
2572 if (need_ccmpeq)
2573 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2575 if (branch_code == code)
2576 emit_jump_insn (gen_branch_true (operands[3]));
2577 else
2578 emit_jump_insn (gen_branch_false (operands[3]));
2581 void
2582 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2584 enum rtx_code code = GET_CODE (operands[1]);
2585 rtx op0 = operands[2];
2586 rtx op1 = operands[3];
2587 rtx_code_label *lab = NULL;
2588 bool invert = false;
2590 op0 = force_reg (mode, op0);
2591 if ((code != EQ && code != NE
2592 && (op1 != const0_rtx
2593 || code == GTU || code == GEU || code == LTU || code == LEU))
2594 || (mode == DImode && op1 != const0_rtx)
2595 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2596 op1 = force_reg (mode, op1);
2598 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2600 if (code == LT || code == LE)
2602 std::swap (op0, op1);
2603 code = swap_condition (code);
2605 if (code == GE)
2607 if (TARGET_IEEE)
2609 lab = gen_label_rtx ();
2610 sh_emit_scc_to_t (EQ, op0, op1);
2611 emit_jump_insn (gen_branch_true (lab));
2612 code = GT;
2614 else
2616 code = LT;
2617 invert = true;
2622 if (code == NE)
2624 code = EQ;
2625 invert = true;
2628 sh_emit_scc_to_t (code, op0, op1);
2629 if (lab)
2630 emit_label (lab);
2631 if (invert)
2632 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2633 else
2634 emit_move_insn (operands[0], get_t_reg_rtx ());
2637 /* Functions to output assembly code. */
2639 /* Return a sequence of instructions to perform DI or DF move.
2641 Since the SH cannot move a DI or DF in one instruction, we have
2642 to take care when we see overlapping source and dest registers. */
2643 const char *
2644 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2645 machine_mode mode)
2647 rtx dst = operands[0];
2648 rtx src = operands[1];
2650 if (MEM_P (dst)
2651 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2652 return "mov.l %T1,%0" "\n"
2653 " mov.l %1,%0";
2655 if (register_operand (dst, mode)
2656 && register_operand (src, mode))
2658 if (REGNO (src) == MACH_REG)
2659 return "sts mach,%S0" "\n"
2660 " sts macl,%R0";
2662 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2663 when mov.d r1,r0 do r1->r0 then r2->r1. */
2664 if (REGNO (src) + 1 == REGNO (dst))
2665 return "mov %T1,%T0" "\n"
2666 " mov %1,%0";
2667 else
2668 return "mov %1,%0" "\n"
2669 " mov %T1,%T0";
2671 else if (CONST_INT_P (src))
2673 if (INTVAL (src) < 0)
2674 output_asm_insn ("mov #-1,%S0", operands);
2675 else
2676 output_asm_insn ("mov #0,%S0", operands);
2678 return "mov %1,%R0";
2680 else if (MEM_P (src))
2682 int ptrreg = -1;
2683 int dreg = REGNO (dst);
2684 rtx inside = XEXP (src, 0);
2686 switch (GET_CODE (inside))
2688 case REG:
2689 ptrreg = REGNO (inside);
2690 break;
2692 case SUBREG:
2693 ptrreg = subreg_regno (inside);
2694 break;
2696 case PLUS:
2697 ptrreg = REGNO (XEXP (inside, 0));
2698 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2699 an offsettable address. Unfortunately, offsettable addresses use
2700 QImode to check the offset, and a QImode offsettable address
2701 requires r0 for the other operand, which is not currently
2702 supported, so we can't use the 'o' constraint.
2703 Thus we must check for and handle r0+REG addresses here.
2704 We punt for now, since this is likely very rare. */
2705 gcc_assert (!REG_P (XEXP (inside, 1)));
2706 break;
2708 case LABEL_REF:
2709 return "mov.l %1,%0" "\n"
2710 " mov.l %1+4,%T0";
2711 case POST_INC:
2712 return "mov.l %1,%0" "\n"
2713 " mov.l %1,%T0";
2714 default:
2715 gcc_unreachable ();
2718 /* Work out the safe way to copy. Copy into the second half first. */
2719 if (dreg == ptrreg)
2720 return "mov.l %T1,%T0" "\n"
2721 " mov.l %1,%0";
2724 return "mov.l %1,%0" "\n"
2725 " mov.l %T1,%T0";
2728 /* Print an instruction which would have gone into a delay slot after
2729 another instruction, but couldn't because the other instruction expanded
2730 into a sequence where putting the slot insn at the end wouldn't work. */
2731 static void
2732 print_slot (rtx_sequence *seq)
2734 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2736 seq->insn (1)->set_deleted ();
2739 const char *
2740 output_far_jump (rtx_insn *insn, rtx op)
2742 struct { rtx lab, reg, op; } this_jmp;
2743 rtx_code_label *braf_base_lab = NULL;
2744 const char *jump;
2745 int far;
2746 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2747 rtx_insn *prev;
2749 this_jmp.lab = gen_label_rtx ();
2751 if (TARGET_SH2
2752 && offset >= -32764
2753 && offset - get_attr_length (insn) <= 32766
2754 && ! CROSSING_JUMP_P (insn))
2756 far = 0;
2757 jump = "mov.w %O0,%1" "\n"
2758 " braf %1";
2760 else
2762 far = 1;
2763 if (flag_pic)
2765 if (TARGET_SH2)
2766 jump = "mov.l %O0,%1" "\n"
2767 " braf %1";
2768 else
2769 jump = "mov.l r0,@-r15" "\n"
2770 " mova %O0,r0" "\n"
2771 " mov.l @r0,%1" "\n"
2772 " add r0,%1" "\n"
2773 " mov.l @r15+,r0" "\n"
2774 " jmp @%1";
2776 else
2777 jump = "mov.l %O0,%1" "\n"
2778 " jmp @%1";
2780 /* If we have a scratch register available, use it. */
2781 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2782 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2784 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2785 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2786 jump = "mov.l r1,@-r15" "\n"
2787 " mova %O0,r0" "\n"
2788 " mov.l @r0,r1" "\n"
2789 " add r1,r0" "\n"
2790 " mov.l @r15+,r1" "\n"
2791 " jmp @%1";
2792 output_asm_insn (jump, &this_jmp.lab);
2793 if (dbr_sequence_length ())
2794 print_slot (final_sequence);
2795 else
2796 output_asm_insn ("nop", 0);
2798 else
2800 /* Output the delay slot insn first if any. */
2801 if (dbr_sequence_length ())
2802 print_slot (final_sequence);
2804 this_jmp.reg = gen_rtx_REG (SImode, 13);
2805 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2806 Fortunately, MACL is fixed and call-clobbered, and we never
2807 need its value across jumps, so save r13 in it instead of in
2808 the stack. */
2809 if (TARGET_SH5)
2810 output_asm_insn ("lds r13,macl", 0);
2811 else
2812 output_asm_insn ("mov.l r13,@-r15", 0);
2813 output_asm_insn (jump, &this_jmp.lab);
2814 if (TARGET_SH5)
2815 output_asm_insn ("sts macl,r13", 0);
2816 else
2817 output_asm_insn ("mov.l @r15+,r13", 0);
2819 if (far && flag_pic && TARGET_SH2)
2821 braf_base_lab = gen_label_rtx ();
2822 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2823 CODE_LABEL_NUMBER (braf_base_lab));
2825 if (far)
2826 output_asm_insn (".align 2", 0);
2827 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2828 this_jmp.op = op;
2829 if (far && flag_pic)
2831 if (TARGET_SH2)
2832 this_jmp.lab = braf_base_lab;
2833 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2835 else
2836 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2837 return "";
2840 /* Local label counter, used for constants in the pool and inside
2841 pattern branches. */
2842 static int lf = 100;
2844 /* Output code for ordinary branches. */
2845 const char *
2846 output_branch (int logic, rtx_insn *insn, rtx *operands)
2848 switch (get_attr_length (insn))
2850 case 6:
2851 /* This can happen if filling the delay slot has caused a forward
2852 branch to exceed its range (we could reverse it, but only
2853 when we know we won't overextend other branches; this should
2854 best be handled by relaxation).
2855 It can also happen when other condbranches hoist delay slot insn
2856 from their destination, thus leading to code size increase.
2857 But the branch will still be in the range -4092..+4098 bytes. */
2858 if (! TARGET_RELAX)
2860 int label = lf++;
2861 /* The call to print_slot will clobber the operands. */
2862 rtx op0 = operands[0];
2864 /* If the instruction in the delay slot is annulled (true), then
2865 there is no delay slot where we can put it now. The only safe
2866 place for it is after the label. final will do that by default. */
2868 if (final_sequence
2869 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2870 && get_attr_length (final_sequence->insn (1)))
2872 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2873 ASSEMBLER_DIALECT ? "/" : ".", label);
2874 print_slot (final_sequence);
2876 else
2877 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2879 output_asm_insn ("bra\t%l0", &op0);
2880 fprintf (asm_out_file, "\tnop\n");
2881 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2883 return "";
2885 /* When relaxing, handle this like a short branch. The linker
2886 will fix it up if it still doesn't fit after relaxation. */
2887 case 2:
2888 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2890 /* These are for SH2e, in which we have to account for the
2891 extra nop because of the hardware bug in annulled branches. */
2892 case 8:
2893 if (! TARGET_RELAX)
2895 int label = lf++;
2897 gcc_assert (!final_sequence
2898 || !(INSN_ANNULLED_BRANCH_P
2899 (XVECEXP (final_sequence, 0, 0))));
2900 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2901 logic ? "f" : "t",
2902 ASSEMBLER_DIALECT ? "/" : ".", label);
2903 fprintf (asm_out_file, "\tnop\n");
2904 output_asm_insn ("bra\t%l0", operands);
2905 fprintf (asm_out_file, "\tnop\n");
2906 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2908 return "";
2910 /* When relaxing, fall through. */
2911 case 4:
2913 char buffer[10];
2915 sprintf (buffer, "b%s%ss\t%%l0",
2916 logic ? "t" : "f",
2917 ASSEMBLER_DIALECT ? "/" : ".");
2918 output_asm_insn (buffer, &operands[0]);
2919 return "nop";
2922 default:
2923 /* There should be no longer branches now - that would
2924 indicate that something has destroyed the branches set
2925 up in machine_dependent_reorg. */
2926 gcc_unreachable ();
2930 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2931 fill in operands 9 as a label to the successor insn.
2932 We try to use jump threading where possible.
2933 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2934 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2935 follow jmp and bt, if the address is in range. */
2936 const char *
2937 output_branchy_insn (enum rtx_code code, const char *templ,
2938 rtx_insn *insn, rtx *operands)
2940 rtx_insn *next_insn = NEXT_INSN (insn);
2942 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2944 rtx src = SET_SRC (PATTERN (next_insn));
2945 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2947 /* Following branch not taken */
2948 rtx_code_label *lab = gen_label_rtx ();
2949 emit_label_after (lab, next_insn);
2950 INSN_ADDRESSES_NEW (lab,
2951 INSN_ADDRESSES (INSN_UID (next_insn))
2952 + get_attr_length (next_insn));
2953 operands[9] = lab;
2954 return templ;
2956 else
2958 int offset = (branch_dest (next_insn)
2959 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2960 if (offset >= -252 && offset <= 258)
2962 if (GET_CODE (src) == IF_THEN_ELSE)
2963 /* branch_true */
2964 src = XEXP (src, 1);
2965 operands[9] = src;
2966 return templ;
2970 rtx_code_label *lab = gen_label_rtx ();
2971 emit_label_after (lab, insn);
2972 INSN_ADDRESSES_NEW (lab,
2973 INSN_ADDRESSES (INSN_UID (insn))
2974 + get_attr_length (insn));
2975 operands[9] = lab;
2976 return templ;
2979 const char *
2980 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2982 return output_branchy_insn (NE, "bt %l9" "\n"
2983 " fcmp/eq %1,%0",
2984 insn, operands);
2987 /* Output the start of the assembler file. */
2988 static void
2989 sh_file_start (void)
2991 default_file_start ();
2993 if (TARGET_ELF)
2994 /* We need to show the text section with the proper
2995 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2996 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2997 will complain. We can teach GAS specifically about the
2998 default attributes for our choice of text section, but
2999 then we would have to change GAS again if/when we change
3000 the text section name. */
3001 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
3002 else
3003 /* Switch to the data section so that the coffsem symbol
3004 isn't in the text section. */
3005 switch_to_section (data_section);
3007 if (TARGET_LITTLE_ENDIAN)
3008 fputs ("\t.little\n", asm_out_file);
3010 if (!TARGET_ELF)
3012 if (TARGET_SHCOMPACT)
3013 fputs ("\t.mode\tSHcompact\n", asm_out_file);
3014 else if (TARGET_SHMEDIA)
3015 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
3016 TARGET_SHMEDIA64 ? 64 : 32);
3020 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
3021 static bool
3022 unspec_caller_rtx_p (rtx pat)
3024 rtx base, offset;
3025 int i;
3027 split_const (pat, &base, &offset);
3028 if (GET_CODE (base) == UNSPEC)
3030 if (XINT (base, 1) == UNSPEC_CALLER)
3031 return true;
3032 for (i = 0; i < XVECLEN (base, 0); i++)
3033 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
3034 return true;
3036 return false;
3039 /* Indicate that INSN cannot be duplicated. This is true for insn
3040 that generates a unique label. */
3041 static bool
3042 sh_cannot_copy_insn_p (rtx_insn *insn)
3044 rtx pat;
3046 if (!reload_completed || !flag_pic)
3047 return false;
3049 if (!NONJUMP_INSN_P (insn))
3050 return false;
3051 if (asm_noperands (insn) >= 0)
3052 return false;
3054 pat = PATTERN (insn);
3055 if (GET_CODE (pat) != SET)
3056 return false;
3057 pat = SET_SRC (pat);
3059 if (unspec_caller_rtx_p (pat))
3060 return true;
3062 return false;
3065 /* Number of instructions used to make an arithmetic right shift by N. */
3066 static const char ashiftrt_insns[] =
3067 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3069 /* Description of a logical left or right shift, when expanded to a sequence
3070 of 1/2/8/16 shifts.
3071 Notice that one bit right shifts clobber the T bit. One bit left shifts
3072 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3073 enum
3075 ASHL_CLOBBERS_T = 1 << 0,
3076 LSHR_CLOBBERS_T = 1 << 1
3079 struct ashl_lshr_sequence
3081 char insn_count;
3082 signed char amount[6];
3083 char clobbers_t;
3086 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3088 { 0, { 0 }, 0 }, // 0
3089 { 1, { 1 }, LSHR_CLOBBERS_T },
3090 { 1, { 2 }, 0 },
3091 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3092 { 2, { 2, 2 }, 0 }, // 4
3093 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3094 { 3, { 2, 2, 2 }, 0 },
3095 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3096 { 1, { 8 }, 0 }, // 8
3097 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3098 { 2, { 8, 2 }, 0 },
3099 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3100 { 3, { 8, 2, 2 }, 0 }, // 12
3101 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3102 { 3, { 8, -2, 8 }, 0 },
3103 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3104 { 1, { 16 }, 0 }, // 16
3105 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3106 { 2, { 16, 2 }, 0 },
3107 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3108 { 3, { 16, 2, 2 }, 0 }, // 20
3109 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3110 { 3, { 16, -2, 8 }, 0 },
3111 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3112 { 2, { 16, 8 }, 0 }, // 24
3113 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3114 { 3, { 16, 8, 2 }, 0 },
3115 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3116 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3117 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3118 { 3, { 16, -2, 16 }, 0 },
3120 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3121 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3122 However, the shift-and combiner code needs this entry here to be in
3123 terms of real shift insns. */
3124 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3127 /* Individual shift amounts for shift amounts < 16, up to three highmost
3128 bits might be clobbered. This is typically used when combined with some
3129 kind of sign or zero extension. */
3130 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3132 { 0, { 0 }, 0 }, // 0
3133 { 1, { 1 }, LSHR_CLOBBERS_T },
3134 { 1, { 2 }, 0 },
3135 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3136 { 2, { 2, 2 }, 0 }, // 4
3137 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3138 { 2, { 8, -2 }, 0 },
3139 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3140 { 1, { 8 }, 0 }, // 8
3141 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3142 { 2, { 8, 2 }, 0 },
3143 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3144 { 3, { 8, 2, 2 }, 0 }, // 12
3145 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3146 { 2, { 16, -2 }, 0 },
3147 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3148 { 1, { 16 }, 0 }, // 16
3149 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3150 { 2, { 16, 2 }, 0 },
3151 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3152 { 3, { 16, 2, 2 }, 0 }, // 20
3153 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3154 { 3, { 16, -2, 8 }, 0 },
3155 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3156 { 2, { 16, 8 }, 0 }, // 24
3157 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3158 { 3, { 16, 8, 2 }, 0 },
3159 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3160 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3161 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3162 { 3, { 16, -2, 16 }, 0 },
3163 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3166 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3167 will clobber the T bit. */
3168 bool
3169 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3171 gcc_assert (CONST_INT_P (shift_amount));
3173 const int shift_amount_i = INTVAL (shift_amount) & 31;
3175 /* Special case for shift count of 31: use and-rotl sequence. */
3176 if (shift_amount_i == 31)
3177 return true;
3179 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3180 & ASHL_CLOBBERS_T) != 0;
3183 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3184 instructions will clobber the T bit. */
3185 bool
3186 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3188 gcc_assert (CONST_INT_P (shift_amount));
3190 const int shift_amount_i = INTVAL (shift_amount) & 31;
3192 /* Special case for shift count of 31: use shll-movt sequence. */
3193 if (shift_amount_i == 31)
3194 return true;
3196 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3197 & LSHR_CLOBBERS_T) != 0;
3200 /* Return true if it is potentially beneficial to use a dynamic shift
3201 instruction (shad / shar) instead of a combination of 1/2/8/16
3202 shift instructions for the specified shift count.
3203 If dynamic shifts are not available, always return false. */
3204 bool
3205 sh_dynamicalize_shift_p (rtx count)
3207 gcc_assert (CONST_INT_P (count));
3209 const int shift_amount_i = INTVAL (count) & 31;
3210 int insn_count;
3212 /* For left and right shifts, there are shorter 2 insn sequences for
3213 shift amounts of 31. */
3214 if (shift_amount_i == 31)
3215 insn_count = 2;
3216 else
3217 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3219 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3222 /* Assuming we have a value that has been sign-extended by at least one bit,
3223 can we use the ext_shift_amounts with the last shift turned to an
3224 arithmetic shift to shift it by N without data loss, and quicker than by
3225 other means? */
3226 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3228 /* Return the cost of a shift. */
3229 static inline int
3230 shiftcosts (rtx x)
3232 int value;
3234 if (TARGET_SHMEDIA)
3235 return 1;
3237 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3239 if (GET_MODE (x) == DImode
3240 && CONST_INT_P (XEXP (x, 1))
3241 && INTVAL (XEXP (x, 1)) == 1)
3242 return 2;
3244 /* Everything else is invalid, because there is no pattern for it. */
3245 return -1;
3247 /* If shift by a non constant, then this will be expensive. */
3248 if (!CONST_INT_P (XEXP (x, 1)))
3249 return SH_DYNAMIC_SHIFT_COST;
3251 /* Otherwise, return the true cost in instructions. Cope with out of range
3252 shift counts more or less arbitrarily. */
3253 value = INTVAL (XEXP (x, 1)) & 31;
3255 if (GET_CODE (x) == ASHIFTRT)
3257 int cost = ashiftrt_insns[value];
3258 /* If dynamic shifts are available and profitable in this case, then we
3259 put the constant in a reg and use shad. */
3260 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3261 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3262 return cost;
3264 else
3265 return ashl_lshr_seq[value].insn_count;
3268 /* Return the cost of an AND/XOR/IOR operation. */
3269 static inline int
3270 and_xor_ior_costs (rtx x, int code)
3272 /* On SH1-4 we have only max. SImode operations.
3273 Double the cost for modes > SImode. */
3274 const int cost_scale = !TARGET_SHMEDIA
3275 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3276 ? 2 : 1;
3278 /* A logical operation with two registers is a single cycle
3279 instruction. */
3280 if (!CONST_INT_P (XEXP (x, 1)))
3281 return 1 * cost_scale;
3283 int i = INTVAL (XEXP (x, 1));
3285 if (TARGET_SHMEDIA)
3287 if (satisfies_constraint_I10 (XEXP (x, 1))
3288 || satisfies_constraint_J16 (XEXP (x, 1)))
3289 return 1;
3290 else
3291 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3294 /* These constants are single cycle extu.[bw] instructions. */
3295 if ((i == 0xff || i == 0xffff) && code == AND)
3296 return 1 * cost_scale;
3297 /* Constants that can be used in an instruction as an immediate are
3298 a single cycle, but this requires r0, so make it a little more
3299 expensive. */
3300 if (CONST_OK_FOR_K08 (i))
3301 return 2 * cost_scale;
3302 /* Constants that can be loaded with a mov immediate need one more cycle.
3303 This case is probably unnecessary. */
3304 if (CONST_OK_FOR_I08 (i))
3305 return 2 * cost_scale;
3306 /* Any other constant requires an additional 2 cycle pc-relative load.
3307 This case is probably unnecessary. */
3308 return 3 * cost_scale;
3311 /* Return the cost of an addition or a subtraction. */
3312 static inline int
3313 addsubcosts (rtx x)
3315 if (GET_MODE (x) == SImode)
3317 /* The addc or subc patterns will eventually become one or two
3318 instructions. Below are some costs for some of the patterns
3319 which combine would reject because the costs of the individual
3320 insns in the patterns are lower.
3322 FIXME: It would be much easier if we had something like insn cost
3323 attributes and the cost calculation machinery used those attributes
3324 in the first place. This would eliminate redundant recog-like C
3325 code to calculate costs of complex patterns. */
3326 rtx op0 = XEXP (x, 0);
3327 rtx op1 = XEXP (x, 1);
3329 if (GET_CODE (x) == PLUS)
3331 if (GET_CODE (op0) == AND
3332 && XEXP (op0, 1) == const1_rtx
3333 && (GET_CODE (op1) == PLUS
3334 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3335 return 1;
3337 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3338 && GET_CODE (op1) == LSHIFTRT
3339 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3340 return 1;
3342 /* Let's assume that adding the result of an insns that stores into
3343 the T bit is cheap. */
3344 if (treg_set_expr (op1, SImode))
3345 return 1;
3346 if (treg_set_expr (op0, SImode))
3347 return 1;
3350 /* On SH1-4 we have only max. SImode operations.
3351 Double the cost for modes > SImode. */
3352 const int cost_scale = !TARGET_SHMEDIA
3353 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3354 ? 2 : 1;
3356 /* Adding a register is a single cycle insn. */
3357 if (REG_P (XEXP (x, 1))
3358 || GET_CODE (XEXP (x, 1)) == SUBREG)
3359 return 1 * cost_scale;
3361 /* Likewise for small constants. */
3362 if (CONST_INT_P (XEXP (x, 1))
3363 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3364 return 1 * cost_scale;
3366 if (TARGET_SHMEDIA)
3367 switch (GET_CODE (XEXP (x, 1)))
3369 case CONST:
3370 case LABEL_REF:
3371 case SYMBOL_REF:
3372 return TARGET_SHMEDIA64 ? 5 : 3;
3374 case CONST_INT:
3375 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3376 return 2;
3377 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3378 return 3;
3379 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3380 return 4;
3382 /* Fall through. */
3383 default:
3384 return 5;
3387 /* Any other constant requires a 2 cycle pc-relative load plus an
3388 addition. */
3389 return 3 * cost_scale;
3392 /* Return the cost of a multiply. */
3393 static inline int
3394 multcosts (rtx x ATTRIBUTE_UNUSED)
3396 if (sh_multcost >= 0)
3397 return sh_multcost;
3398 if (TARGET_SHMEDIA)
3399 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3400 accept constants. Ideally, we would use a cost of one or two and
3401 add the cost of the operand, but disregard the latter when inside loops
3402 and loop invariant code motion is still to follow.
3403 Using a multiply first and splitting it later if it's a loss
3404 doesn't work because of different sign / zero extension semantics
3405 of multiplies vs. shifts. */
3406 return optimize_size ? 2 : 3;
3408 if (TARGET_SH2)
3410 /* We have a mul insn, so we can never take more than the mul and the
3411 read of the mac reg, but count more because of the latency and extra
3412 reg usage. */
3413 if (optimize_size)
3414 return 2;
3415 return 3;
3418 /* If we're aiming at small code, then just count the number of
3419 insns in a multiply call sequence. */
3420 if (optimize_size)
3421 return 5;
3423 /* Otherwise count all the insns in the routine we'd be calling too. */
3424 return 20;
3427 /* Compute a (partial) cost for rtx X. Return true if the complete
3428 cost has been computed, and false if subexpressions should be
3429 scanned. In either case, *TOTAL contains the cost result. */
3430 static bool
3431 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3432 int *total, bool speed ATTRIBUTE_UNUSED)
3434 switch (code)
3436 /* The lower-subreg pass decides whether to split multi-word regs
3437 into individual regs by looking at the cost for a SET of certain
3438 modes with the following patterns:
3439 (set (reg) (reg))
3440 (set (reg) (const_int 0))
3441 On machines that support vector-move operations a multi-word move
3442 is the same cost as individual reg move. On SH there is no
3443 vector-move, so we have to provide the correct cost in the number
3444 of move insns to load/store the reg of the mode in question. */
3445 case SET:
3446 if (register_operand (SET_DEST (x), VOIDmode)
3447 && (register_operand (SET_SRC (x), VOIDmode)
3448 || satisfies_constraint_Z (SET_SRC (x))))
3450 const machine_mode mode = GET_MODE (SET_DEST (x));
3451 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3452 / mov_insn_size (mode, TARGET_SH2A));
3453 return true;
3455 return false;
3457 /* The cost of a mem access is mainly the cost of the address mode. */
3458 case MEM:
3459 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3460 true);
3461 return true;
3463 case IF_THEN_ELSE:
3464 /* This case is required for the if_then_else negc pattern. */
3465 if (treg_set_expr (XEXP (x, 0), SImode))
3467 *total = COSTS_N_INSNS (1);
3468 return true;
3470 else
3471 return false;
3473 /* Zero extracts of single bits are usually combine patterns for the
3474 tst insns. */
3475 case ZERO_EXTRACT:
3476 if (GET_CODE (XEXP (x, 0)) == XOR
3477 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3478 && XEXP (x, 1) == const1_rtx
3479 && CONST_INT_P (XEXP (x, 2))
3480 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3481 /* Check that the xor constaint overlaps with the extracted bit. */
3482 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3484 *total = 1; //COSTS_N_INSNS (1);
3485 return true;
3487 return false;
3489 /* The cost of a sign or zero extend depends on whether the source is a
3490 reg or a mem. In case of a mem take the address into acount. */
3491 case SIGN_EXTEND:
3492 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3494 *total = COSTS_N_INSNS (1);
3495 return true;
3497 if (MEM_P (XEXP (x, 0)))
3499 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3500 GET_MODE (XEXP (x, 0)),
3501 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3502 return true;
3504 return false;
3506 case ZERO_EXTEND:
3507 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3509 *total = COSTS_N_INSNS (1);
3510 return true;
3512 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3513 && (GET_MODE (XEXP (x, 0)) == QImode
3514 || GET_MODE (XEXP (x, 0)) == HImode))
3516 /* Handle SH2A's movu.b and movu.w insn. */
3517 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3518 GET_MODE (XEXP (x, 0)),
3519 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3520 return true;
3522 return false;
3524 /* mems for SFmode and DFmode can be inside a parallel due to
3525 the way the fpscr is handled. */
3526 case PARALLEL:
3527 for (int i = 0; i < XVECLEN (x, 0); i++)
3529 rtx xx = XVECEXP (x, 0, i);
3530 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3532 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3533 GET_MODE (XEXP (xx, 0)),
3534 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3535 return true;
3537 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3539 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3540 GET_MODE (XEXP (xx, 1)),
3541 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3542 return true;
3546 if (sh_1el_vec (x, VOIDmode))
3547 *total = outer_code != SET;
3548 else if (sh_rep_vec (x, VOIDmode))
3549 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3550 + (outer_code != SET));
3551 else
3552 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3553 return true;
3555 case CONST_INT:
3556 if (TARGET_SHMEDIA)
3558 if (INTVAL (x) == 0)
3559 *total = 0;
3560 else if (outer_code == AND && and_operand ((x), DImode))
3561 *total = 0;
3562 else if ((outer_code == IOR || outer_code == XOR
3563 || outer_code == PLUS)
3564 && CONST_OK_FOR_I10 (INTVAL (x)))
3565 *total = 0;
3566 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3567 *total = COSTS_N_INSNS (outer_code != SET);
3568 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3569 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3570 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3571 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3572 else
3573 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3574 return true;
3576 if (CONST_OK_FOR_I08 (INTVAL (x)))
3577 *total = 0;
3578 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3579 && CONST_OK_FOR_K08 (INTVAL (x)))
3580 *total = 1;
3581 /* prepare_cmp_insn will force costly constants int registers before
3582 the cbranch[sd]i4 patterns can see them, so preserve potentially
3583 interesting ones not covered by I08 above. */
3584 else if (outer_code == COMPARE
3585 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3586 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3587 || INTVAL (x) == 0x7fffffff
3588 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3589 *total = 1;
3590 else
3591 *total = 8;
3592 return true;
3594 case EQ:
3595 /* An and with a constant compared against zero is
3596 most likely going to be a TST #imm, R0 instruction.
3597 Notice that this does not catch the zero_extract variants from
3598 the md file. */
3599 if (XEXP (x, 1) == const0_rtx
3600 && (GET_CODE (XEXP (x, 0)) == AND
3601 || (SUBREG_P (XEXP (x, 0))
3602 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND)))
3604 *total = 1;
3605 return true;
3608 else if (XEXP (x, 1) == const0_rtx
3609 && GET_CODE (XEXP (x, 0)) == AND
3610 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3611 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3612 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3613 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3615 *total = 1;
3616 return true;
3618 else
3619 return false;
3621 case SMIN:
3622 case SMAX:
3623 /* This is most likely a clips.b or clips.w insn that is being made up
3624 by combine. */
3625 if (TARGET_SH2A
3626 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3627 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3628 && REG_P (XEXP (XEXP (x, 0), 0))
3629 && CONST_INT_P (XEXP (x, 1)))
3631 *total = COSTS_N_INSNS (1);
3632 return true;
3634 else
3635 return false;
3637 case CONST:
3638 case LABEL_REF:
3639 case SYMBOL_REF:
3640 if (TARGET_SHMEDIA64)
3641 *total = COSTS_N_INSNS (4);
3642 else if (TARGET_SHMEDIA32)
3643 *total = COSTS_N_INSNS (2);
3644 else
3645 *total = 5;
3646 return true;
3648 case CONST_DOUBLE:
3649 if (TARGET_SHMEDIA)
3650 *total = COSTS_N_INSNS (4);
3651 /* prepare_cmp_insn will force costly constants int registers before
3652 the cbranchdi4 pattern can see them, so preserve potentially
3653 interesting ones. */
3654 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3655 *total = 1;
3656 else
3657 *total = 10;
3658 return true;
3660 case CONST_VECTOR:
3661 /* FIXME: This looks broken. Only the last statement has any effect.
3662 Probably this could be folded with the PARALLEL case? */
3663 if (x == CONST0_RTX (GET_MODE (x)))
3664 *total = 0;
3665 else if (sh_1el_vec (x, VOIDmode))
3666 *total = outer_code != SET;
3667 if (sh_rep_vec (x, VOIDmode))
3668 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3669 + (outer_code != SET));
3670 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3671 return true;
3673 case PLUS:
3674 case MINUS:
3675 *total = COSTS_N_INSNS (addsubcosts (x));
3676 return true;
3678 case AND:
3679 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3680 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3682 *total = COSTS_N_INSNS (1);
3683 return true;
3685 /* Fall through. */
3687 case XOR:
3688 case IOR:
3689 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3690 return true;
3692 case MULT:
3693 *total = COSTS_N_INSNS (multcosts (x));
3694 return true;
3696 case LT:
3697 case GE:
3698 /* div0s sign comparison. */
3699 if (GET_CODE (XEXP (x, 0)) == XOR
3700 && REG_P ((XEXP (XEXP (x, 0), 0)))
3701 && REG_P ((XEXP (XEXP (x, 0), 1)))
3702 && satisfies_constraint_Z (XEXP (x, 1)))
3704 *total = COSTS_N_INSNS (1);
3705 return true;
3707 else
3708 return false;
3710 case LSHIFTRT:
3711 /* div0s sign comparison. */
3712 if (GET_CODE (XEXP (x, 0)) == XOR
3713 && REG_P ((XEXP (XEXP (x, 0), 0)))
3714 && REG_P ((XEXP (XEXP (x, 0), 1)))
3715 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3717 *total = COSTS_N_INSNS (1);
3718 return true;
3720 /* Fall through to shiftcosts. */
3721 case ASHIFT:
3722 case ASHIFTRT:
3724 int cost = shiftcosts (x);
3725 if (cost < 0)
3726 return false;
3727 *total = COSTS_N_INSNS (cost);
3728 return true;
3731 case DIV:
3732 case UDIV:
3733 case MOD:
3734 case UMOD:
3735 *total = COSTS_N_INSNS (20);
3736 return true;
3738 case FLOAT:
3739 case FIX:
3740 *total = 100;
3741 return true;
3743 default:
3744 return false;
3748 /* Determine the size of the fundamental move insn that will be used
3749 for the specified mode. */
3750 static inline int
3751 mov_insn_size (machine_mode mode, bool consider_sh2a)
3753 const int mode_sz = GET_MODE_SIZE (mode);
3755 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3756 || (TARGET_FMOVD && mode == DFmode))
3757 return mode_sz;
3758 else
3760 /* The max. available mode for actual move insns is SImode.
3761 Larger accesses will be split into multiple loads/stores. */
3762 const int max_mov_sz = GET_MODE_SIZE (SImode);
3763 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3767 /* Determine the maximum possible displacement for a move insn for the
3768 specified mode. */
3770 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3772 /* The 4 byte displacement move insns are the same as the 2 byte
3773 versions but take a 12 bit displacement. All we need to do is to
3774 scale the max. displacement value accordingly. */
3775 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3777 /* SH2A supports FPU move insns with 12 bit displacements.
3778 Other variants to do not support any kind of displacements for
3779 FPU move insns. */
3780 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3781 return 0;
3782 else
3784 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3785 const int mode_sz = GET_MODE_SIZE (mode);
3786 int r = 15 * mov_insn_sz * disp_scale;
3788 /* If the mov insn will be split into multiple loads/stores, the
3789 maximum possible displacement is a bit smaller. */
3790 if (mode_sz > mov_insn_sz)
3791 r -= mode_sz - mov_insn_sz;
3792 return r;
3796 /* Determine the alignment mask for a move insn of the
3797 specified mode. */
3798 static inline int
3799 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3801 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3802 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3805 /* Return the displacement value of a displacement address. */
3806 HOST_WIDE_INT
3807 sh_disp_addr_displacement (rtx x)
3809 gcc_assert (satisfies_constraint_Sdd (x));
3810 return INTVAL (XEXP (XEXP (x, 0), 1));
3813 /* Compute the cost of an address. */
3814 static int
3815 sh_address_cost (rtx x, machine_mode mode,
3816 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3818 /* 'GBR + 0'. Account one more because of R0 restriction. */
3819 if (REG_P (x) && REGNO (x) == GBR_REG)
3820 return 2;
3822 /* Simple reg, post-inc, pre-dec addressing. */
3823 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3824 return 1;
3826 /* 'reg + disp' addressing. */
3827 if (GET_CODE (x) == PLUS
3828 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3830 /* 'GBR + disp'. Account one more because of R0 restriction. */
3831 if (REGNO (XEXP (x, 0)) == GBR_REG
3832 && gbr_displacement (XEXP (x, 1), mode))
3833 return 2;
3835 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3837 if (offset == 0)
3838 return 1;
3840 /* The displacement would fit into a 2 byte move insn.
3841 HImode and QImode loads/stores with displacement put pressure on
3842 R0 which will most likely require another reg copy. Thus account
3843 a higher cost for that. */
3844 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3845 return (mode == HImode || mode == QImode) ? 2 : 1;
3847 /* The displacement would fit into a 4 byte move insn (SH2A). */
3848 if (TARGET_SH2A
3849 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3850 return 2;
3852 /* The displacement is probably out of range and will require extra
3853 calculations. */
3854 return 3;
3857 /* 'reg + reg' addressing. Account a slightly higher cost because of
3858 increased pressure on R0. */
3859 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3860 && ! TARGET_SHMEDIA)
3861 return 3;
3863 /* Not sure what it is - probably expensive. */
3864 return 10;
3867 /* Code to expand a shift. */
3868 static void
3869 gen_ashift (int type, int n, rtx reg)
3871 rtx n_rtx;
3873 /* Negative values here come from the shift_amounts array. */
3874 if (n < 0)
3876 if (type == ASHIFT)
3877 type = LSHIFTRT;
3878 else
3879 type = ASHIFT;
3880 n = -n;
3883 n_rtx = GEN_INT (n);
3884 gcc_assert (satisfies_constraint_P27 (n_rtx));
3886 switch (type)
3888 case ASHIFTRT:
3889 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3890 break;
3891 case LSHIFTRT:
3892 if (n == 1)
3893 emit_insn (gen_shlr (reg, reg));
3894 else
3895 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3896 break;
3897 case ASHIFT:
3898 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3899 break;
3900 default:
3901 gcc_unreachable ();
3905 /* Code to expand a HImode shift. */
3906 static void
3907 gen_ashift_hi (int type, int n, rtx reg)
3909 /* Negative values here come from the shift_amounts array. */
3910 if (n < 0)
3912 if (type == ASHIFT)
3913 type = LSHIFTRT;
3914 else
3915 type = ASHIFT;
3916 n = -n;
3919 switch (type)
3921 case ASHIFTRT:
3922 case LSHIFTRT:
3923 /* We don't have HImode right shift operations because using the
3924 ordinary 32 bit shift instructions for that doesn't generate proper
3925 zero/sign extension.
3926 gen_ashift_hi is only called in contexts where we know that the
3927 sign extension works out correctly. */
3929 int offset = 0;
3930 if (GET_CODE (reg) == SUBREG)
3932 offset = SUBREG_BYTE (reg);
3933 reg = SUBREG_REG (reg);
3935 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3936 break;
3938 case ASHIFT:
3939 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3940 break;
3944 /* Output RTL to split a constant shift into its component SH constant
3945 shift instructions. */
3946 void
3947 gen_shifty_op (int code, rtx *operands)
3949 int value = INTVAL (operands[2]);
3950 int max, i;
3952 /* Truncate the shift count in case it is out of bounds. */
3953 value = value & 31;
3955 if (value == 31)
3957 if (code == LSHIFTRT)
3959 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3960 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3961 return;
3963 else if (code == ASHIFT)
3965 /* There is a two instruction sequence for 31 bit left shifts,
3966 but it requires r0. */
3967 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3969 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3970 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3971 return;
3975 else if (value == 0)
3977 /* This can happen even when optimizing, if there were subregs before
3978 reload. Don't output a nop here, as this is never optimized away;
3979 use a no-op move instead. */
3980 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3981 return;
3984 max = ashl_lshr_seq[value].insn_count;
3985 for (i = 0; i < max; i++)
3986 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3989 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3990 don't matter. */
3991 void
3992 gen_shifty_hi_op (int code, rtx *operands)
3994 int value = INTVAL (operands[2]);
3995 int max, i;
3996 void (*gen_fun) (int, int, rtx);
3998 /* This operation is used by and_shl for SImode values with a few
3999 high bits known to be cleared. */
4000 value &= 31;
4001 if (value == 0)
4003 emit_insn (gen_nop ());
4004 return;
4007 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
4008 if (code == ASHIFT)
4010 max = ext_ashl_lshr_seq[value].insn_count;
4011 for (i = 0; i < max; i++)
4012 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4014 else
4015 /* When shifting right, emit the shifts in reverse order, so that
4016 solitary negative values come first. */
4017 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
4018 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4021 /* Output RTL for an arithmetic right shift.
4022 ??? Rewrite to use super-optimizer sequences. */
4023 bool
4024 expand_ashiftrt (rtx *operands)
4026 rtx wrk;
4027 char func[18];
4028 int value;
4030 if (TARGET_DYNSHIFT)
4032 if (!CONST_INT_P (operands[2]))
4034 rtx count = copy_to_mode_reg (SImode, operands[2]);
4035 emit_insn (gen_negsi2 (count, count));
4036 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4037 return true;
4039 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
4040 > 1 + SH_DYNAMIC_SHIFT_COST)
4042 rtx count
4043 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
4044 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4045 return true;
4048 if (!CONST_INT_P (operands[2]))
4049 return false;
4051 value = INTVAL (operands[2]) & 31;
4053 if (value == 31)
4055 /* If we are called from abs expansion, arrange things so that we
4056 we can use a single MT instruction that doesn't clobber the source,
4057 if LICM can hoist out the load of the constant zero. */
4058 if (currently_expanding_to_rtl)
4060 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
4061 operands[1]));
4062 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
4063 return true;
4065 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
4066 return true;
4068 else if (value >= 16 && value <= 19)
4070 wrk = gen_reg_rtx (SImode);
4071 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
4072 value -= 16;
4073 while (value--)
4074 gen_ashift (ASHIFTRT, 1, wrk);
4075 emit_move_insn (operands[0], wrk);
4076 return true;
4078 /* Expand a short sequence inline, longer call a magic routine. */
4079 else if (value <= 5)
4081 wrk = gen_reg_rtx (SImode);
4082 emit_move_insn (wrk, operands[1]);
4083 while (value--)
4084 gen_ashift (ASHIFTRT, 1, wrk);
4085 emit_move_insn (operands[0], wrk);
4086 return true;
4089 wrk = gen_reg_rtx (Pmode);
4091 /* Load the value into an arg reg and call a helper. */
4092 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
4093 sprintf (func, "__ashiftrt_r4_%d", value);
4094 function_symbol (wrk, func, SFUNC_STATIC);
4095 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
4096 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
4097 return true;
4100 /* Try to find a good way to implement the combiner pattern
4101 [(set (match_operand:SI 0 "register_operand" "r")
4102 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4103 (match_operand:SI 2 "const_int_operand" "n"))
4104 (match_operand:SI 3 "const_int_operand" "n"))) .
4105 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
4106 return 0 for simple right / left or left/right shift combination.
4107 return 1 for a combination of shifts with zero_extend.
4108 return 2 for a combination of shifts with an AND that needs r0.
4109 return 3 for a combination of shifts with an AND that needs an extra
4110 scratch register, when the three highmost bits of the AND mask are clear.
4111 return 4 for a combination of shifts with an AND that needs an extra
4112 scratch register, when any of the three highmost bits of the AND mask
4113 is set.
4114 If ATTRP is set, store an initial right shift width in ATTRP[0],
4115 and the instruction length in ATTRP[1] . These values are not valid
4116 when returning 0.
4117 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
4118 shift_amounts for the last shift value that is to be used before the
4119 sign extend. */
4121 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
4123 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
4124 int left = INTVAL (left_rtx), right;
4125 int best = 0;
4126 int cost, best_cost = 10000;
4127 int best_right = 0, best_len = 0;
4128 int i;
4129 int can_ext;
4131 if (left < 0 || left > 31)
4132 return 0;
4133 if (CONST_INT_P (mask_rtx))
4134 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
4135 else
4136 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
4137 /* Can this be expressed as a right shift / left shift pair? */
4138 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
4139 right = exact_log2 (lsb);
4140 mask2 = ~(mask + lsb - 1);
4141 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4142 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4143 if (! mask2)
4144 best_cost = ashl_lshr_seq[right].insn_count
4145 + ashl_lshr_seq[right + left].insn_count;
4146 /* mask has no trailing zeroes <==> ! right */
4147 else if (! right && mask2 == ~(lsb2 - 1))
4149 int late_right = exact_log2 (lsb2);
4150 best_cost = ashl_lshr_seq[left + late_right].insn_count
4151 + ashl_lshr_seq[late_right].insn_count;
4153 /* Try to use zero extend. */
4154 if (mask2 == ~(lsb2 - 1))
4156 int width, first;
4158 for (width = 8; width <= 16; width += 8)
4160 /* Can we zero-extend right away? */
4161 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4163 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4164 + ext_ashl_lshr_seq[left + right].insn_count;
4165 if (cost < best_cost)
4167 best = 1;
4168 best_cost = cost;
4169 best_right = right;
4170 best_len = cost;
4171 if (attrp)
4172 attrp[2] = -1;
4174 continue;
4176 /* ??? Could try to put zero extend into initial right shift,
4177 or even shift a bit left before the right shift. */
4178 /* Determine value of first part of left shift, to get to the
4179 zero extend cut-off point. */
4180 first = width - exact_log2 (lsb2) + right;
4181 if (first >= 0 && right + left - first >= 0)
4183 cost = ext_ashl_lshr_seq[right].insn_count
4184 + ext_ashl_lshr_seq[first].insn_count + 1
4185 + ext_ashl_lshr_seq[right + left - first].insn_count;
4187 if (cost < best_cost)
4189 best = 1;
4190 best_cost = cost;
4191 best_right = right;
4192 best_len = cost;
4193 if (attrp)
4194 attrp[2] = first;
4199 /* Try to use r0 AND pattern */
4200 for (i = 0; i <= 2; i++)
4202 if (i > right)
4203 break;
4204 if (! CONST_OK_FOR_K08 (mask >> i))
4205 continue;
4206 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4207 if (cost < best_cost)
4209 best = 2;
4210 best_cost = cost;
4211 best_right = i;
4212 best_len = cost - 1;
4215 /* Try to use a scratch register to hold the AND operand. */
4216 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4217 for (i = 0; i <= 2; i++)
4219 if (i > right)
4220 break;
4221 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4222 + (can_ext
4223 ? ext_ashl_lshr_seq
4224 : ashl_lshr_seq)[left + i].insn_count;
4225 if (cost < best_cost)
4227 best = 4 - can_ext;
4228 best_cost = cost;
4229 best_right = i;
4230 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4234 if (attrp)
4236 attrp[0] = best_right;
4237 attrp[1] = best_len;
4239 return best;
4242 /* This is used in length attributes of the unnamed instructions
4243 corresponding to shl_and_kind return values of 1 and 2. */
4245 shl_and_length (rtx insn)
4247 rtx set_src, left_rtx, mask_rtx;
4248 int attributes[3];
4250 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4251 left_rtx = XEXP (XEXP (set_src, 0), 1);
4252 mask_rtx = XEXP (set_src, 1);
4253 shl_and_kind (left_rtx, mask_rtx, attributes);
4254 return attributes[1];
4257 /* This is used in length attribute of the and_shl_scratch instruction. */
4259 shl_and_scr_length (rtx insn)
4261 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4262 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4263 rtx op = XEXP (set_src, 0);
4264 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4265 op = XEXP (XEXP (op, 0), 0);
4266 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4269 /* Generate rtl for instructions for which shl_and_kind advised a particular
4270 method of generating them, i.e. returned zero. */
4271 bool
4272 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4274 int attributes[3];
4275 unsigned HOST_WIDE_INT mask;
4276 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4277 int right, total_shift;
4278 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4280 right = attributes[0];
4281 total_shift = INTVAL (left_rtx) + right;
4282 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4283 switch (kind)
4285 default:
4286 return true;
4287 case 1:
4289 int first = attributes[2];
4290 rtx operands[3];
4292 if (first < 0)
4294 emit_insn ((mask << right) <= 0xff
4295 ? gen_zero_extendqisi2 (dest,
4296 gen_lowpart (QImode, source))
4297 : gen_zero_extendhisi2 (dest,
4298 gen_lowpart (HImode, source)));
4299 source = dest;
4301 if (source != dest)
4302 emit_insn (gen_movsi (dest, source));
4303 operands[0] = dest;
4304 if (right)
4306 operands[2] = GEN_INT (right);
4307 gen_shifty_hi_op (LSHIFTRT, operands);
4309 if (first > 0)
4311 operands[2] = GEN_INT (first);
4312 gen_shifty_hi_op (ASHIFT, operands);
4313 total_shift -= first;
4314 mask <<= first;
4316 if (first >= 0)
4317 emit_insn (mask <= 0xff
4318 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4319 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4320 if (total_shift > 0)
4322 operands[2] = GEN_INT (total_shift);
4323 gen_shifty_hi_op (ASHIFT, operands);
4325 break;
4327 case 4:
4328 shift_gen_fun = gen_shifty_op;
4329 case 3:
4330 /* If the topmost bit that matters is set, set the topmost bits
4331 that don't matter. This way, we might be able to get a shorter
4332 signed constant. */
4333 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4334 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4335 case 2:
4336 /* Don't expand fine-grained when combining, because that will
4337 make the pattern fail. */
4338 if (currently_expanding_to_rtl
4339 || reload_in_progress || reload_completed)
4341 rtx operands[3];
4343 /* Cases 3 and 4 should be handled by this split
4344 only while combining */
4345 gcc_assert (kind <= 2);
4346 if (right)
4348 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4349 source = dest;
4351 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4352 if (total_shift)
4354 operands[0] = dest;
4355 operands[1] = dest;
4356 operands[2] = GEN_INT (total_shift);
4357 shift_gen_fun (ASHIFT, operands);
4359 break;
4361 else
4363 int neg = 0;
4364 if (kind != 4 && total_shift < 16)
4366 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4367 if (neg > 0)
4368 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4369 else
4370 neg = 0;
4372 emit_insn (gen_and_shl_scratch (dest, source,
4373 GEN_INT (right),
4374 GEN_INT (mask),
4375 GEN_INT (total_shift + neg),
4376 GEN_INT (neg)));
4377 emit_insn (gen_movsi (dest, dest));
4378 break;
4381 return false;
4384 /* Try to find a good way to implement the combiner pattern
4385 [(set (match_operand:SI 0 "register_operand" "=r")
4386 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4387 (match_operand:SI 2 "const_int_operand" "n")
4388 (match_operand:SI 3 "const_int_operand" "n")
4389 (const_int 0)))
4390 (clobber (reg:SI T_REG))]
4391 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4392 return 0 for simple left / right shift combination.
4393 return 1 for left shift / 8 bit sign extend / left shift.
4394 return 2 for left shift / 16 bit sign extend / left shift.
4395 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4396 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4397 return 5 for left shift / 16 bit sign extend / right shift
4398 return 6 for < 8 bit sign extend / left shift.
4399 return 7 for < 8 bit sign extend / left shift / single right shift.
4400 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4402 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4404 int left, size, insize, ext;
4405 int cost = 0, best_cost;
4406 int kind;
4408 left = INTVAL (left_rtx);
4409 size = INTVAL (size_rtx);
4410 insize = size - left;
4411 gcc_assert (insize > 0);
4412 /* Default to left / right shift. */
4413 kind = 0;
4414 best_cost = ashl_lshr_seq[32 - insize].insn_count
4415 + ashl_lshr_seq[32 - size].insn_count;
4416 if (size <= 16)
4418 /* 16 bit shift / sign extend / 16 bit shift */
4419 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4420 + ashl_lshr_seq[16 - size].insn_count;
4421 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4422 below, by alternative 3 or something even better. */
4423 if (cost < best_cost)
4425 kind = 5;
4426 best_cost = cost;
4429 /* Try a plain sign extend between two shifts. */
4430 for (ext = 16; ext >= insize; ext -= 8)
4432 if (ext <= size)
4434 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4435 + ashl_lshr_seq[size - ext].insn_count;
4436 if (cost < best_cost)
4438 kind = ext / (unsigned) 8;
4439 best_cost = cost;
4442 /* Check if we can do a sloppy shift with a final signed shift
4443 restoring the sign. */
4444 if (EXT_SHIFT_SIGNED (size - ext))
4445 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4446 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4447 /* If not, maybe it's still cheaper to do the second shift sloppy,
4448 and do a final sign extend? */
4449 else if (size <= 16)
4450 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4451 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4452 + 1;
4453 else
4454 continue;
4455 if (cost < best_cost)
4457 kind = ext / (unsigned) 8 + 2;
4458 best_cost = cost;
4461 /* Check if we can sign extend in r0 */
4462 if (insize < 8)
4464 cost = 3 + ashl_lshr_seq[left].insn_count;
4465 if (cost < best_cost)
4467 kind = 6;
4468 best_cost = cost;
4470 /* Try the same with a final signed shift. */
4471 if (left < 31)
4473 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4474 if (cost < best_cost)
4476 kind = 7;
4477 best_cost = cost;
4481 if (TARGET_DYNSHIFT)
4483 /* Try to use a dynamic shift. */
4484 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4485 if (cost < best_cost)
4487 kind = 0;
4488 best_cost = cost;
4491 if (costp)
4492 *costp = cost;
4493 return kind;
4496 /* Function to be used in the length attribute of the instructions
4497 implementing this pattern. */
4499 shl_sext_length (rtx insn)
4501 rtx set_src, left_rtx, size_rtx;
4502 int cost;
4504 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4505 left_rtx = XEXP (XEXP (set_src, 0), 1);
4506 size_rtx = XEXP (set_src, 1);
4507 shl_sext_kind (left_rtx, size_rtx, &cost);
4508 return cost;
4511 /* Generate rtl for this pattern */
4512 bool
4513 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4515 int kind;
4516 int left, size, insize, cost;
4517 rtx operands[3];
4519 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4520 left = INTVAL (left_rtx);
4521 size = INTVAL (size_rtx);
4522 insize = size - left;
4523 switch (kind)
4525 case 1:
4526 case 2:
4527 case 3:
4528 case 4:
4530 int ext = kind & 1 ? 8 : 16;
4531 int shift2 = size - ext;
4533 /* Don't expand fine-grained when combining, because that will
4534 make the pattern fail. */
4535 if (! currently_expanding_to_rtl
4536 && ! reload_in_progress && ! reload_completed)
4538 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4539 emit_insn (gen_movsi (dest, source));
4540 break;
4542 if (dest != source)
4543 emit_insn (gen_movsi (dest, source));
4544 operands[0] = dest;
4545 if (ext - insize)
4547 operands[2] = GEN_INT (ext - insize);
4548 gen_shifty_hi_op (ASHIFT, operands);
4550 emit_insn (kind & 1
4551 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4552 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4553 if (kind <= 2)
4555 if (shift2)
4557 operands[2] = GEN_INT (shift2);
4558 gen_shifty_op (ASHIFT, operands);
4561 else
4563 if (shift2 > 0)
4565 if (EXT_SHIFT_SIGNED (shift2))
4567 operands[2] = GEN_INT (shift2 + 1);
4568 gen_shifty_op (ASHIFT, operands);
4569 operands[2] = const1_rtx;
4570 gen_shifty_op (ASHIFTRT, operands);
4571 break;
4573 operands[2] = GEN_INT (shift2);
4574 gen_shifty_hi_op (ASHIFT, operands);
4576 else if (shift2)
4578 operands[2] = GEN_INT (-shift2);
4579 gen_shifty_hi_op (LSHIFTRT, operands);
4581 emit_insn (size <= 8
4582 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4583 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4585 break;
4587 case 5:
4589 int i = 16 - size;
4590 if (! currently_expanding_to_rtl
4591 && ! reload_in_progress && ! reload_completed)
4592 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4593 else
4595 operands[0] = dest;
4596 operands[2] = GEN_INT (16 - insize);
4597 gen_shifty_hi_op (ASHIFT, operands);
4598 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4600 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4601 while (--i >= 0)
4602 gen_ashift (ASHIFTRT, 1, dest);
4603 break;
4605 case 6:
4606 case 7:
4607 /* Don't expand fine-grained when combining, because that will
4608 make the pattern fail. */
4609 if (! currently_expanding_to_rtl
4610 && ! reload_in_progress && ! reload_completed)
4612 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4613 emit_insn (gen_movsi (dest, source));
4614 break;
4616 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4617 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4618 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4619 operands[0] = dest;
4620 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4621 gen_shifty_op (ASHIFT, operands);
4622 if (kind == 7)
4623 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4624 break;
4625 default:
4626 return true;
4628 return false;
4631 /* Prefix a symbol_ref name with "datalabel". */
4633 gen_datalabel_ref (rtx sym)
4635 const char *str;
4637 if (GET_CODE (sym) == LABEL_REF)
4638 return gen_rtx_CONST (GET_MODE (sym),
4639 gen_rtx_UNSPEC (GET_MODE (sym),
4640 gen_rtvec (1, sym),
4641 UNSPEC_DATALABEL));
4643 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4645 str = XSTR (sym, 0);
4646 /* Share all SYMBOL_REF strings with the same value - that is important
4647 for cse. */
4648 str = IDENTIFIER_POINTER (get_identifier (str));
4649 XSTR (sym, 0) = str;
4651 return sym;
4655 typedef struct label_ref_list_d
4657 rtx_code_label *label;
4658 struct label_ref_list_d *next;
4660 /* Pool allocation new operator. */
4661 inline void *operator new (size_t)
4663 return pool.allocate ();
4666 /* Delete operator utilizing pool allocation. */
4667 inline void operator delete (void *ptr)
4669 pool.remove ((label_ref_list_d *) ptr);
4672 /* Memory allocation pool. */
4673 static pool_allocator<label_ref_list_d> pool;
4675 } *label_ref_list_t;
4677 pool_allocator<label_ref_list_d> label_ref_list_d::pool
4678 ("label references list", 30);
4680 /* The SH cannot load a large constant into a register, constants have to
4681 come from a pc relative load. The reference of a pc relative load
4682 instruction must be less than 1k in front of the instruction. This
4683 means that we often have to dump a constant inside a function, and
4684 generate code to branch around it.
4686 It is important to minimize this, since the branches will slow things
4687 down and make things bigger.
4689 Worst case code looks like:
4691 mov.l L1,rn
4692 bra L2
4694 align
4695 L1: .long value
4699 mov.l L3,rn
4700 bra L4
4702 align
4703 L3: .long value
4707 We fix this by performing a scan before scheduling, which notices which
4708 instructions need to have their operands fetched from the constant table
4709 and builds the table.
4711 The algorithm is:
4713 scan, find an instruction which needs a pcrel move. Look forward, find the
4714 last barrier which is within MAX_COUNT bytes of the requirement.
4715 If there isn't one, make one. Process all the instructions between
4716 the find and the barrier.
4718 In the above example, we can tell that L3 is within 1k of L1, so
4719 the first move can be shrunk from the 3 insn+constant sequence into
4720 just 1 insn, and the constant moved to L3 to make:
4722 mov.l L1,rn
4724 mov.l L3,rn
4725 bra L4
4727 align
4728 L3:.long value
4729 L4:.long value
4731 Then the second move becomes the target for the shortening process. */
4733 typedef struct
4735 rtx value; /* Value in table. */
4736 rtx_code_label *label; /* Label of value. */
4737 label_ref_list_t wend; /* End of window. */
4738 machine_mode mode; /* Mode of value. */
4740 /* True if this constant is accessed as part of a post-increment
4741 sequence. Note that HImode constants are never accessed in this way. */
4742 bool part_of_sequence_p;
4743 } pool_node;
4745 /* The maximum number of constants that can fit into one pool, since
4746 constants in the range 0..510 are at least 2 bytes long, and in the
4747 range from there to 1018 at least 4 bytes. */
4749 #define MAX_POOL_SIZE 372
4750 static pool_node pool_vector[MAX_POOL_SIZE];
4751 static int pool_size;
4752 static rtx_code_label *pool_window_label;
4753 static int pool_window_last;
4755 static int max_labelno_before_reorg;
4757 /* ??? If we need a constant in HImode which is the truncated value of a
4758 constant we need in SImode, we could combine the two entries thus saving
4759 two bytes. Is this common enough to be worth the effort of implementing
4760 it? */
4762 /* ??? This stuff should be done at the same time that we shorten branches.
4763 As it is now, we must assume that all branches are the maximum size, and
4764 this causes us to almost always output constant pools sooner than
4765 necessary. */
4767 /* Add a constant to the pool and return its label. */
4768 static rtx_code_label *
4769 add_constant (rtx x, machine_mode mode, rtx last_value)
4771 int i;
4772 rtx_code_label *lab, *new_rtx;
4773 label_ref_list_t ref, newref;
4775 /* First see if we've already got it. */
4776 for (i = 0; i < pool_size; i++)
4778 if (x->code == pool_vector[i].value->code
4779 && mode == pool_vector[i].mode)
4781 if (x->code == CODE_LABEL)
4783 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4784 continue;
4786 if (rtx_equal_p (x, pool_vector[i].value))
4788 lab = new_rtx = 0;
4789 if (! last_value
4790 || ! i
4791 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4793 new_rtx = gen_label_rtx ();
4794 LABEL_REFS (new_rtx) = pool_vector[i].label;
4795 pool_vector[i].label = lab = new_rtx;
4797 if (lab && pool_window_label)
4799 newref = new label_ref_list_d;
4800 newref->label = pool_window_label;
4801 ref = pool_vector[pool_window_last].wend;
4802 newref->next = ref;
4803 pool_vector[pool_window_last].wend = newref;
4805 if (new_rtx)
4806 pool_window_label = new_rtx;
4807 pool_window_last = i;
4808 return lab;
4813 /* Need a new one. */
4814 pool_vector[pool_size].value = x;
4815 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4817 lab = 0;
4818 pool_vector[pool_size - 1].part_of_sequence_p = true;
4820 else
4821 lab = gen_label_rtx ();
4822 pool_vector[pool_size].mode = mode;
4823 pool_vector[pool_size].label = lab;
4824 pool_vector[pool_size].wend = NULL;
4825 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4826 if (lab && pool_window_label)
4828 newref = new label_ref_list_d;
4829 newref->label = pool_window_label;
4830 ref = pool_vector[pool_window_last].wend;
4831 newref->next = ref;
4832 pool_vector[pool_window_last].wend = newref;
4834 if (lab)
4835 pool_window_label = lab;
4836 pool_window_last = pool_size;
4837 pool_size++;
4838 return lab;
4841 /* Output the literal table. START, if nonzero, is the first instruction
4842 this table is needed for, and also indicates that there is at least one
4843 casesi_worker_2 instruction; We have to emit the operand3 labels from
4844 these insns at a 4-byte aligned position. BARRIER is the barrier
4845 after which we are to place the table. */
4846 static void
4847 dump_table (rtx_insn *start, rtx_insn *barrier)
4849 rtx_insn *scan = barrier;
4850 int i;
4851 bool need_align = true;
4852 rtx lab;
4853 label_ref_list_t ref;
4854 bool have_df = false;
4856 /* Do two passes, first time dump out the HI sized constants. */
4858 for (i = 0; i < pool_size; i++)
4860 pool_node *p = &pool_vector[i];
4862 if (p->mode == HImode)
4864 if (need_align)
4866 scan = emit_insn_after (gen_align_2 (), scan);
4867 need_align = false;
4869 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4870 scan = emit_label_after (lab, scan);
4871 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4872 scan);
4873 for (ref = p->wend; ref; ref = ref->next)
4875 lab = ref->label;
4876 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4879 else if (p->mode == DFmode)
4880 have_df = true;
4883 need_align = true;
4885 if (start)
4887 scan = emit_insn_after (gen_align_4 (), scan);
4888 need_align = false;
4889 for (; start != barrier; start = NEXT_INSN (start))
4890 if (NONJUMP_INSN_P (start)
4891 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4893 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4894 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4896 scan = emit_label_after (lab, scan);
4899 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4901 rtx_insn *align_insn = NULL;
4903 scan = emit_label_after (gen_label_rtx (), scan);
4904 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4905 need_align = false;
4907 for (i = 0; i < pool_size; i++)
4909 pool_node *p = &pool_vector[i];
4911 switch (p->mode)
4913 case HImode:
4914 break;
4915 case SImode:
4916 case SFmode:
4917 if (align_insn && !p->part_of_sequence_p)
4919 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4920 emit_label_before (lab, align_insn);
4921 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4922 align_insn);
4923 for (ref = p->wend; ref; ref = ref->next)
4925 lab = ref->label;
4926 emit_insn_before (gen_consttable_window_end (lab),
4927 align_insn);
4929 delete_insn (align_insn);
4930 align_insn = NULL;
4931 continue;
4933 else
4935 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4936 scan = emit_label_after (lab, scan);
4937 scan = emit_insn_after (gen_consttable_4 (p->value,
4938 const0_rtx), scan);
4939 need_align = ! need_align;
4941 break;
4942 case DFmode:
4943 if (need_align)
4945 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4946 align_insn = scan;
4947 need_align = false;
4949 case DImode:
4950 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4951 scan = emit_label_after (lab, scan);
4952 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4953 scan);
4954 break;
4955 default:
4956 gcc_unreachable ();
4959 if (p->mode != HImode)
4961 for (ref = p->wend; ref; ref = ref->next)
4963 lab = ref->label;
4964 scan = emit_insn_after (gen_consttable_window_end (lab),
4965 scan);
4970 pool_size = 0;
4973 for (i = 0; i < pool_size; i++)
4975 pool_node *p = &pool_vector[i];
4977 switch (p->mode)
4979 case HImode:
4980 break;
4981 case SImode:
4982 case SFmode:
4983 if (need_align)
4985 need_align = false;
4986 scan = emit_label_after (gen_label_rtx (), scan);
4987 scan = emit_insn_after (gen_align_4 (), scan);
4989 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4990 scan = emit_label_after (lab, scan);
4991 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4992 scan);
4993 break;
4994 case DFmode:
4995 case DImode:
4996 if (need_align)
4998 need_align = false;
4999 scan = emit_label_after (gen_label_rtx (), scan);
5000 scan = emit_insn_after (gen_align_4 (), scan);
5002 for (lab = p->label; lab; lab = LABEL_REFS (lab))
5003 scan = emit_label_after (lab, scan);
5004 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
5005 scan);
5006 break;
5007 default:
5008 gcc_unreachable ();
5011 if (p->mode != HImode)
5013 for (ref = p->wend; ref; ref = ref->next)
5015 lab = ref->label;
5016 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
5021 scan = emit_insn_after (gen_consttable_end (), scan);
5022 scan = emit_barrier_after (scan);
5023 pool_size = 0;
5024 pool_window_label = NULL;
5025 pool_window_last = 0;
5028 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
5030 /* Nonzero if the insn is a move instruction which needs to be fixed. */
5032 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
5033 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
5034 need to fix it if the input value is CONST_OK_FOR_I08. */
5035 static bool
5036 broken_move (rtx_insn *insn)
5038 if (NONJUMP_INSN_P (insn))
5040 rtx pat = PATTERN (insn);
5041 if (GET_CODE (pat) == PARALLEL)
5042 pat = XVECEXP (pat, 0, 0);
5043 if (GET_CODE (pat) == SET
5044 /* We can load any 8-bit value if we don't care what the high
5045 order bits end up as. */
5046 && GET_MODE (SET_DEST (pat)) != QImode
5047 && (CONSTANT_P (SET_SRC (pat))
5048 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
5049 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
5050 /* Match mova_const. */
5051 || (GET_CODE (SET_SRC (pat)) == UNSPEC
5052 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
5053 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
5054 && ! (TARGET_SH2E
5055 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
5056 && (fp_zero_operand (SET_SRC (pat))
5057 || fp_one_operand (SET_SRC (pat)))
5058 /* In general we don't know the current setting of fpscr, so
5059 disable fldi.
5060 There is an exception if this was a register-register move
5061 before reload - and hence it was ascertained that we have
5062 single precision setting - and in a post-reload optimization
5063 we changed this to do a constant load. In that case
5064 we don't have an r0 clobber, hence we must use fldi. */
5065 && (TARGET_FMOVD
5066 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
5067 == SCRATCH))
5068 && REG_P (SET_DEST (pat))
5069 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
5070 && ! (TARGET_SH2A
5071 && GET_MODE (SET_DEST (pat)) == SImode
5072 && (satisfies_constraint_I20 (SET_SRC (pat))
5073 || satisfies_constraint_I28 (SET_SRC (pat))))
5074 && ! satisfies_constraint_I08 (SET_SRC (pat)))
5075 return true;
5078 return false;
5081 /* Return true if the specified insn is a mova insn. */
5082 static bool
5083 mova_p (rtx_insn *insn)
5085 return (NONJUMP_INSN_P (insn)
5086 && GET_CODE (PATTERN (insn)) == SET
5087 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
5088 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
5089 /* Don't match mova_const. */
5090 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
5093 /* Fix up a mova from a switch that went out of range. */
5094 static void
5095 fixup_mova (rtx_insn *mova)
5097 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
5098 if (! flag_pic)
5100 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
5101 INSN_CODE (mova) = -1;
5103 else
5105 rtx_insn *worker = mova;
5106 rtx_code_label *lab = gen_label_rtx ();
5107 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
5111 worker = NEXT_INSN (worker);
5112 gcc_assert (worker
5113 && !LABEL_P (worker)
5114 && !JUMP_P (worker));
5115 } while (NOTE_P (worker)
5116 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
5117 wpat = PATTERN (worker);
5118 wpat0 = XVECEXP (wpat, 0, 0);
5119 wpat1 = XVECEXP (wpat, 0, 1);
5120 wsrc = SET_SRC (wpat0);
5121 PATTERN (worker) = (gen_casesi_worker_2
5122 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
5123 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
5124 XEXP (wpat1, 0)));
5125 INSN_CODE (worker) = -1;
5126 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
5127 base = gen_rtx_LABEL_REF (Pmode, lab);
5128 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
5129 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
5130 INSN_CODE (mova) = -1;
5134 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
5135 *num_mova, and check if the new mova is not nested within the first one.
5136 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
5137 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
5138 static int
5139 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
5141 int n_addr = 0; /* Initialization to shut up spurious warning. */
5142 int f_target, n_target = 0; /* Likewise. */
5144 if (optimize)
5146 /* If NEW_MOVA has no address yet, it will be handled later. */
5147 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
5148 return -1;
5150 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
5151 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
5152 if (n_addr > n_target || n_addr + 1022 < n_target)
5154 /* Change the mova into a load.
5155 broken_move will then return true for it. */
5156 fixup_mova (new_mova);
5157 return 1;
5160 if (!(*num_mova)++)
5162 *first_mova = new_mova;
5163 return 2;
5165 if (!optimize
5166 || ((f_target
5167 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5168 >= n_target))
5169 return -1;
5171 (*num_mova)--;
5172 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5173 > n_target - n_addr)
5175 fixup_mova (*first_mova);
5176 return 0;
5178 else
5180 fixup_mova (new_mova);
5181 return 1;
5185 /* Find the last barrier from insn FROM which is close enough to hold the
5186 constant pool. If we can't find one, then create one near the end of
5187 the range. */
5188 static rtx_insn *
5189 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5191 int count_si = 0;
5192 int count_hi = 0;
5193 int found_hi = 0;
5194 int found_si = 0;
5195 int found_di = 0;
5196 int hi_align = 2;
5197 int si_align = 2;
5198 int leading_mova = num_mova;
5199 rtx_insn *barrier_before_mova = NULL;
5200 rtx_insn *found_barrier = NULL;
5201 rtx_insn *good_barrier = NULL;
5202 int si_limit;
5203 int hi_limit;
5204 rtx_insn *orig = from;
5205 rtx_insn *last_got = NULL;
5206 rtx_insn *last_symoff = NULL;
5208 /* For HImode: range is 510, add 4 because pc counts from address of
5209 second instruction after this one, subtract 2 for the jump instruction
5210 that we may need to emit before the table, subtract 2 for the instruction
5211 that fills the jump delay slot (in very rare cases, reorg will take an
5212 instruction from after the constant pool or will leave the delay slot
5213 empty). This gives 510.
5214 For SImode: range is 1020, add 4 because pc counts from address of
5215 second instruction after this one, subtract 2 in case pc is 2 byte
5216 aligned, subtract 2 for the jump instruction that we may need to emit
5217 before the table, subtract 2 for the instruction that fills the jump
5218 delay slot. This gives 1018. */
5220 /* The branch will always be shortened now that the reference address for
5221 forward branches is the successor address, thus we need no longer make
5222 adjustments to the [sh]i_limit for -O0. */
5224 si_limit = 1018;
5225 hi_limit = 510;
5227 while (from && count_si < si_limit && count_hi < hi_limit)
5229 int inc = get_attr_length (from);
5230 int new_align = 1;
5232 /* If this is a label that existed at the time of the compute_alignments
5233 call, determine the alignment. N.B. When find_barrier recurses for
5234 an out-of-reach mova, we might see labels at the start of previously
5235 inserted constant tables. */
5236 if (LABEL_P (from)
5237 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5239 if (optimize)
5240 new_align = 1 << label_to_alignment (from);
5241 else if (BARRIER_P (prev_nonnote_insn (from)))
5242 new_align = 1 << barrier_align (from);
5243 else
5244 new_align = 1;
5245 inc = 0;
5247 /* In case we are scanning a constant table because of recursion, check
5248 for explicit alignments. If the table is long, we might be forced
5249 to emit the new table in front of it; the length of the alignment
5250 might be the last straw. */
5251 else if (NONJUMP_INSN_P (from)
5252 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5253 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5254 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5255 /* When we find the end of a constant table, paste the new constant
5256 at the end. That is better than putting it in front because
5257 this way, we don't need extra alignment for adding a 4-byte-aligned
5258 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5259 else if (NONJUMP_INSN_P (from)
5260 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5261 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5262 return from;
5264 if (BARRIER_P (from))
5266 rtx_insn *next;
5268 found_barrier = from;
5270 /* If we are at the end of the function, or in front of an alignment
5271 instruction, we need not insert an extra alignment. We prefer
5272 this kind of barrier. */
5273 if (barrier_align (from) > 2)
5274 good_barrier = from;
5276 /* If we are at the end of a hot/cold block, dump the constants
5277 here. */
5278 next = NEXT_INSN (from);
5279 if (next
5280 && NOTE_P (next)
5281 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5282 break;
5285 if (broken_move (from))
5287 rtx pat, src, dst;
5288 machine_mode mode;
5290 pat = PATTERN (from);
5291 if (GET_CODE (pat) == PARALLEL)
5292 pat = XVECEXP (pat, 0, 0);
5293 src = SET_SRC (pat);
5294 dst = SET_DEST (pat);
5295 mode = GET_MODE (dst);
5297 /* GOT pcrelat setting comes in pair of
5298 mova .L8,r0
5299 mov.l .L8,r12
5300 instructions. (plus add r0,r12).
5301 Remember if we see one without the other. */
5302 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5303 last_got = last_got ? NULL : from;
5304 else if (PIC_ADDR_P (src))
5305 last_got = last_got ? NULL : from;
5307 /* We must explicitly check the mode, because sometimes the
5308 front end will generate code to load unsigned constants into
5309 HImode targets without properly sign extending them. */
5310 if (mode == HImode
5311 || (mode == SImode && satisfies_constraint_I16 (src)
5312 && REGNO (dst) != FPUL_REG))
5314 found_hi += 2;
5315 /* We put the short constants before the long constants, so
5316 we must count the length of short constants in the range
5317 for the long constants. */
5318 /* ??? This isn't optimal, but is easy to do. */
5319 si_limit -= 2;
5321 else
5323 /* We dump DF/DI constants before SF/SI ones, because
5324 the limit is the same, but the alignment requirements
5325 are higher. We may waste up to 4 additional bytes
5326 for alignment, and the DF/DI constant may have
5327 another SF/SI constant placed before it. */
5328 if (TARGET_SHCOMPACT
5329 && ! found_di
5330 && (mode == DFmode || mode == DImode))
5332 found_di = 1;
5333 si_limit -= 8;
5335 while (si_align > 2 && found_si + si_align - 2 > count_si)
5336 si_align >>= 1;
5337 if (found_si > count_si)
5338 count_si = found_si;
5339 found_si += GET_MODE_SIZE (mode);
5340 if (num_mova)
5341 si_limit -= GET_MODE_SIZE (mode);
5345 if (mova_p (from))
5347 switch (untangle_mova (&num_mova, &mova, from))
5349 case 1:
5350 if (flag_pic)
5352 rtx src = SET_SRC (PATTERN (from));
5353 if (GET_CODE (src) == CONST
5354 && GET_CODE (XEXP (src, 0)) == UNSPEC
5355 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5356 last_symoff = from;
5358 break;
5359 case 0: return find_barrier (0, 0, mova);
5360 case 2:
5362 leading_mova = 0;
5363 barrier_before_mova
5364 = good_barrier ? good_barrier : found_barrier;
5366 default: break;
5368 if (found_si > count_si)
5369 count_si = found_si;
5371 else if (JUMP_TABLE_DATA_P (from)
5372 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5374 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5375 || (num_mova
5376 && (prev_nonnote_insn (from)
5377 == XEXP (MOVA_LABELREF (mova), 0))))
5378 num_mova--;
5379 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5381 /* We have just passed the barrier in front of the
5382 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5383 the ADDR_DIFF_VEC is accessed as data, just like our pool
5384 constants, this is a good opportunity to accommodate what
5385 we have gathered so far.
5386 If we waited any longer, we could end up at a barrier in
5387 front of code, which gives worse cache usage for separated
5388 instruction / data caches. */
5389 good_barrier = found_barrier;
5390 break;
5392 else
5394 rtx body = PATTERN (from);
5395 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5398 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5399 else if (JUMP_P (from)
5400 && ! TARGET_SH2
5401 && ! optimize_size)
5402 new_align = 4;
5404 /* There is a possibility that a bf is transformed into a bf/s by the
5405 delay slot scheduler. */
5406 if (JUMP_P (from)
5407 && get_attr_type (from) == TYPE_CBRANCH
5408 && ! sequence_insn_p (from))
5409 inc += 2;
5411 if (found_si)
5413 count_si += inc;
5414 if (new_align > si_align)
5416 si_limit -= (count_si - 1) & (new_align - si_align);
5417 si_align = new_align;
5419 count_si = (count_si + new_align - 1) & -new_align;
5421 if (found_hi)
5423 count_hi += inc;
5424 if (new_align > hi_align)
5426 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5427 hi_align = new_align;
5429 count_hi = (count_hi + new_align - 1) & -new_align;
5431 from = NEXT_INSN (from);
5434 if (num_mova)
5436 if (leading_mova)
5438 /* Try as we might, the leading mova is out of range. Change
5439 it into a load (which will become a pcload) and retry. */
5440 fixup_mova (mova);
5441 return find_barrier (0, 0, mova);
5443 else
5445 /* Insert the constant pool table before the mova instruction,
5446 to prevent the mova label reference from going out of range. */
5447 from = mova;
5448 good_barrier = found_barrier = barrier_before_mova;
5452 if (found_barrier)
5454 if (good_barrier && next_real_insn (found_barrier))
5455 found_barrier = good_barrier;
5457 else
5459 /* We didn't find a barrier in time to dump our stuff,
5460 so we'll make one. */
5461 rtx_code_label *label = gen_label_rtx ();
5463 /* Don't emit a constant table in the middle of insns for
5464 casesi_worker_2. This is a bit overkill but is enough
5465 because casesi_worker_2 wouldn't appear so frequently. */
5466 if (last_symoff)
5467 from = last_symoff;
5469 /* If we exceeded the range, then we must back up over the last
5470 instruction we looked at. Otherwise, we just need to undo the
5471 NEXT_INSN at the end of the loop. */
5472 if (PREV_INSN (from) != orig
5473 && (count_hi > hi_limit || count_si > si_limit))
5474 from = PREV_INSN (PREV_INSN (from));
5475 else
5476 from = PREV_INSN (from);
5478 /* Don't emit a constant table int the middle of global pointer setting,
5479 since that that would move the addressing base GOT into another table.
5480 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5481 in the pool anyway, so just move up the whole constant pool.
5483 However, avoid doing so when the last single GOT mov is the starting
5484 insn itself. Going past above the start insn would create a negative
5485 offset, causing errors. */
5486 if (last_got && last_got != orig)
5487 from = PREV_INSN (last_got);
5489 /* Don't insert the constant pool table at the position which
5490 may be the landing pad. */
5491 if (flag_exceptions
5492 && CALL_P (from)
5493 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5494 from = PREV_INSN (from);
5496 /* Walk back to be just before any jump or label.
5497 Putting it before a label reduces the number of times the branch
5498 around the constant pool table will be hit. Putting it before
5499 a jump makes it more likely that the bra delay slot will be
5500 filled. */
5501 while (NOTE_P (from) || JUMP_P (from)
5502 || LABEL_P (from))
5503 from = PREV_INSN (from);
5505 /* Make sure we do not split between a call and its corresponding
5506 CALL_ARG_LOCATION note. */
5507 if (CALL_P (from))
5509 rtx_insn *next = NEXT_INSN (from);
5510 if (next && NOTE_P (next)
5511 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5512 from = next;
5515 from = emit_jump_insn_after (gen_jump (label), from);
5516 JUMP_LABEL (from) = label;
5517 LABEL_NUSES (label) = 1;
5518 found_barrier = emit_barrier_after (from);
5519 emit_label_after (label, found_barrier);
5522 return found_barrier;
5525 /* If the instruction INSN is implemented by a special function, and we can
5526 positively find the register that is used to call the sfunc, and this
5527 register is not used anywhere else in this instruction - except as the
5528 destination of a set, return this register; else, return 0. */
5530 sfunc_uses_reg (rtx_insn *insn)
5532 int i;
5533 rtx pattern, part, reg_part, reg;
5535 if (!NONJUMP_INSN_P (insn))
5536 return NULL_RTX;
5537 pattern = PATTERN (insn);
5538 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5539 return NULL_RTX;
5541 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5543 part = XVECEXP (pattern, 0, i);
5544 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5545 reg_part = part;
5547 if (! reg_part)
5548 return NULL_RTX;
5549 reg = XEXP (reg_part, 0);
5550 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5552 part = XVECEXP (pattern, 0, i);
5553 if (part == reg_part || GET_CODE (part) == CLOBBER)
5554 continue;
5555 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5556 && REG_P (SET_DEST (part)))
5557 ? SET_SRC (part) : part)))
5558 return NULL_RTX;
5560 return reg;
5563 /* See if the only way in which INSN uses REG is by calling it, or by
5564 setting it while calling it. Set *SET to a SET rtx if the register
5565 is set by INSN. */
5566 static bool
5567 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5569 rtx pattern, reg2;
5571 *set = NULL_RTX;
5573 reg2 = sfunc_uses_reg (insn);
5574 if (reg2 && REGNO (reg2) == REGNO (reg))
5576 pattern = single_set (insn);
5577 if (pattern
5578 && REG_P (SET_DEST (pattern))
5579 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5580 *set = pattern;
5581 return false;
5583 if (!CALL_P (insn))
5585 /* We don't use rtx_equal_p because we don't care if the mode is
5586 different. */
5587 pattern = single_set (insn);
5588 if (pattern
5589 && REG_P (SET_DEST (pattern))
5590 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5592 rtx par, part;
5593 int i;
5595 *set = pattern;
5596 par = PATTERN (insn);
5597 if (GET_CODE (par) == PARALLEL)
5598 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5600 part = XVECEXP (par, 0, i);
5601 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5602 return true;
5604 return reg_mentioned_p (reg, SET_SRC (pattern));
5607 return true;
5610 pattern = PATTERN (insn);
5612 if (GET_CODE (pattern) == PARALLEL)
5614 int i;
5616 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5617 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5618 return true;
5619 pattern = XVECEXP (pattern, 0, 0);
5622 if (GET_CODE (pattern) == SET)
5624 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5626 /* We don't use rtx_equal_p, because we don't care if the
5627 mode is different. */
5628 if (!REG_P (SET_DEST (pattern))
5629 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5630 return true;
5632 *set = pattern;
5635 pattern = SET_SRC (pattern);
5638 if (GET_CODE (pattern) != CALL
5639 || !MEM_P (XEXP (pattern, 0))
5640 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5641 return true;
5643 return false;
5646 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5647 general registers. Bits 0..15 mean that the respective registers
5648 are used as inputs in the instruction. Bits 16..31 mean that the
5649 registers 0..15, respectively, are used as outputs, or are clobbered.
5650 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5652 regs_used (rtx x, int is_dest)
5654 enum rtx_code code;
5655 const char *fmt;
5656 int i, used = 0;
5658 if (! x)
5659 return used;
5660 code = GET_CODE (x);
5661 switch (code)
5663 case REG:
5664 if (REGNO (x) < 16)
5665 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5666 << (REGNO (x) + is_dest));
5667 return 0;
5668 case SUBREG:
5670 rtx y = SUBREG_REG (x);
5672 if (!REG_P (y))
5673 break;
5674 if (REGNO (y) < 16)
5675 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5676 << (REGNO (y) +
5677 subreg_regno_offset (REGNO (y),
5678 GET_MODE (y),
5679 SUBREG_BYTE (x),
5680 GET_MODE (x)) + is_dest));
5681 return 0;
5683 case SET:
5684 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5685 case RETURN:
5686 /* If there was a return value, it must have been indicated with USE. */
5687 return 0x00ffff00;
5688 case CLOBBER:
5689 is_dest = 1;
5690 break;
5691 case MEM:
5692 is_dest = 0;
5693 break;
5694 case CALL:
5695 used |= 0x00ff00f0;
5696 break;
5697 default:
5698 break;
5701 fmt = GET_RTX_FORMAT (code);
5703 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5705 if (fmt[i] == 'E')
5707 int j;
5708 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5709 used |= regs_used (XVECEXP (x, i, j), is_dest);
5711 else if (fmt[i] == 'e')
5712 used |= regs_used (XEXP (x, i), is_dest);
5714 return used;
5717 /* Create an instruction that prevents redirection of a conditional branch
5718 to the destination of the JUMP with address ADDR.
5719 If the branch needs to be implemented as an indirect jump, try to find
5720 a scratch register for it.
5721 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5722 If any preceding insn that doesn't fit into a delay slot is good enough,
5723 pass 1. Pass 2 if a definite blocking insn is needed.
5724 -1 is used internally to avoid deep recursion.
5725 If a blocking instruction is made or recognized, return it. */
5726 static rtx_insn *
5727 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5729 int dead = 0;
5730 rtx_insn *prev = prev_nonnote_insn (jump);
5731 rtx dest;
5733 /* First, check if we already have an instruction that satisfies our need. */
5734 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5736 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5737 return prev;
5738 if (GET_CODE (PATTERN (prev)) == USE
5739 || GET_CODE (PATTERN (prev)) == CLOBBER
5740 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5741 prev = jump;
5742 else if ((need_block &= ~1) < 0)
5743 return prev;
5744 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5745 need_block = 0;
5747 if (GET_CODE (PATTERN (jump)) == RETURN)
5749 if (! need_block)
5750 return prev;
5751 /* Reorg even does nasty things with return insns that cause branches
5752 to go out of range - see find_end_label and callers. */
5753 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5755 /* We can't use JUMP_LABEL here because it might be undefined
5756 when not optimizing. */
5757 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5758 /* If the branch is out of range, try to find a scratch register for it. */
5759 if (optimize
5760 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5761 > 4092 + 4098))
5763 rtx_insn *scan;
5764 /* Don't look for the stack pointer as a scratch register,
5765 it would cause trouble if an interrupt occurred. */
5766 unsigned attempt = 0x7fff, used;
5767 int jump_left = flag_expensive_optimizations + 1;
5769 /* It is likely that the most recent eligible instruction is wanted for
5770 the delay slot. Therefore, find out which registers it uses, and
5771 try to avoid using them. */
5773 for (scan = jump; (scan = PREV_INSN (scan)); )
5775 enum rtx_code code;
5777 if (scan->deleted ())
5778 continue;
5779 code = GET_CODE (scan);
5780 if (code == CODE_LABEL || code == JUMP_INSN)
5781 break;
5782 if (code == INSN
5783 && GET_CODE (PATTERN (scan)) != USE
5784 && GET_CODE (PATTERN (scan)) != CLOBBER
5785 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5787 attempt &= ~regs_used (PATTERN (scan), 0);
5788 break;
5791 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5792 (scan = NEXT_INSN (scan)); )
5794 enum rtx_code code;
5796 if (scan->deleted ())
5797 continue;
5798 code = GET_CODE (scan);
5799 if (INSN_P (scan))
5801 used |= regs_used (PATTERN (scan), 0);
5802 if (code == CALL_INSN)
5803 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5804 dead |= (used >> 16) & ~used;
5805 if (dead & attempt)
5807 dead &= attempt;
5808 break;
5810 if (code == JUMP_INSN)
5812 if (jump_left-- && simplejump_p (scan))
5813 scan = JUMP_LABEL_AS_INSN (scan);
5814 else
5815 break;
5819 /* Mask out the stack pointer again, in case it was
5820 the only 'free' register we have found. */
5821 dead &= 0x7fff;
5823 /* If the immediate destination is still in range, check for possible
5824 threading with a jump beyond the delay slot insn.
5825 Don't check if we are called recursively; the jump has been or will be
5826 checked in a different invocation then. */
5828 else if (optimize && need_block >= 0)
5830 rtx_insn *next = next_active_insn (next_active_insn (dest));
5831 if (next && JUMP_P (next)
5832 && GET_CODE (PATTERN (next)) == SET
5833 && recog_memoized (next) == CODE_FOR_jump_compact)
5835 dest = JUMP_LABEL (next);
5836 if (dest
5837 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5838 > 4092 + 4098))
5839 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5843 if (dead)
5845 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5847 /* It would be nice if we could convert the jump into an indirect
5848 jump / far branch right now, and thus exposing all constituent
5849 instructions to further optimization. However, reorg uses
5850 simplejump_p to determine if there is an unconditional jump where
5851 it should try to schedule instructions from the target of the
5852 branch; simplejump_p fails for indirect jumps even if they have
5853 a JUMP_LABEL. */
5854 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5855 (reg, GEN_INT (unspec_bbr_uid++)),
5856 jump);
5857 /* ??? We would like this to have the scope of the jump, but that
5858 scope will change when a delay slot insn of an inner scope is added.
5859 Hence, after delay slot scheduling, we'll have to expect
5860 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5861 the jump. */
5863 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5864 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5865 return insn;
5867 else if (need_block)
5868 /* We can't use JUMP_LABEL here because it might be undefined
5869 when not optimizing. */
5870 return emit_insn_before (gen_block_branch_redirect
5871 (GEN_INT (unspec_bbr_uid++)),
5872 jump);
5873 return prev;
5876 #define CONDJUMP_MIN -252
5877 #define CONDJUMP_MAX 262
5878 struct far_branch
5880 /* A label (to be placed) in front of the jump
5881 that jumps to our ultimate destination. */
5882 rtx_insn *near_label;
5883 /* Where we are going to insert it if we cannot move the jump any farther,
5884 or the jump itself if we have picked up an existing jump. */
5885 rtx_insn *insert_place;
5886 /* The ultimate destination. */
5887 rtx_insn *far_label;
5888 struct far_branch *prev;
5889 /* If the branch has already been created, its address;
5890 else the address of its first prospective user. */
5891 int address;
5894 static void gen_far_branch (struct far_branch *);
5895 enum mdep_reorg_phase_e mdep_reorg_phase;
5896 static void
5897 gen_far_branch (struct far_branch *bp)
5899 rtx_insn *insn = bp->insert_place;
5900 rtx_jump_insn *jump;
5901 rtx_code_label *label = gen_label_rtx ();
5902 int ok;
5904 emit_label_after (label, insn);
5905 if (bp->far_label)
5907 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5908 LABEL_NUSES (bp->far_label)++;
5910 else
5911 jump = emit_jump_insn_after (gen_return (), insn);
5913 /* Emit a barrier so that reorg knows that any following instructions
5914 are not reachable via a fall-through path.
5915 But don't do this when not optimizing, since we wouldn't suppress the
5916 alignment for the barrier then, and could end up with out-of-range
5917 pc-relative loads. */
5918 if (optimize)
5919 emit_barrier_after (jump);
5920 emit_label_after (bp->near_label, insn);
5922 if (bp->far_label)
5923 JUMP_LABEL (jump) = bp->far_label;
5924 else
5926 rtx pat = PATTERN (jump);
5927 gcc_assert (ANY_RETURN_P (pat));
5928 JUMP_LABEL (jump) = pat;
5931 ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5932 gcc_assert (ok);
5934 /* If we are branching around a jump (rather than a return), prevent
5935 reorg from using an insn from the jump target as the delay slot insn -
5936 when reorg did this, it pessimized code (we rather hide the delay slot)
5937 and it could cause branches to go out of range. */
5938 if (bp->far_label)
5939 (emit_insn_after
5940 (gen_stuff_delay_slot
5941 (GEN_INT (unspec_bbr_uid++),
5942 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5943 insn));
5944 /* Prevent reorg from undoing our splits. */
5945 gen_block_redirect (jump, bp->address += 2, 2);
5948 /* Fix up ADDR_DIFF_VECs. */
5949 void
5950 fixup_addr_diff_vecs (rtx_insn *first)
5952 rtx_insn *insn;
5954 for (insn = first; insn; insn = NEXT_INSN (insn))
5956 rtx vec_lab, pat, prevpat, x, braf_label;
5957 rtx_insn *prev;
5959 if (! JUMP_TABLE_DATA_P (insn)
5960 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5961 continue;
5962 pat = PATTERN (insn);
5963 vec_lab = XEXP (XEXP (pat, 0), 0);
5965 /* Search the matching casesi_jump_2. */
5966 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5968 if (!JUMP_P (prev))
5969 continue;
5970 prevpat = PATTERN (prev);
5971 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5972 continue;
5973 x = XVECEXP (prevpat, 0, 1);
5974 if (GET_CODE (x) != USE)
5975 continue;
5976 x = XEXP (x, 0);
5977 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5978 break;
5980 /* FIXME: This is a bug in the optimizer, but it seems harmless
5981 to just avoid panicing. */
5982 if (!prev)
5983 continue;
5985 /* Emit the reference label of the braf where it belongs, right after
5986 the casesi_jump_2 (i.e. braf). */
5987 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5988 emit_label_after (braf_label, prev);
5990 /* Fix up the ADDR_DIF_VEC to be relative
5991 to the reference address of the braf. */
5992 XEXP (XEXP (pat, 0), 0) = braf_label;
5996 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5997 a barrier. Return the base 2 logarithm of the desired alignment. */
5999 barrier_align (rtx_insn *barrier_or_label)
6001 rtx next, pat;
6003 if (! barrier_or_label)
6004 return 0;
6006 if (LABEL_P (barrier_or_label)
6007 && NEXT_INSN (barrier_or_label)
6008 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
6009 return 2;
6011 if (BARRIER_P (barrier_or_label)
6012 && PREV_INSN (barrier_or_label)
6013 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
6015 pat = PATTERN (PREV_INSN (barrier_or_label));
6016 /* If this is a very small table, we want to keep the alignment after
6017 the table to the minimum for proper code alignment. */
6018 return ((optimize_size
6019 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
6020 <= (unsigned) 1 << (CACHE_LOG - 2)))
6021 ? 1 << TARGET_SHMEDIA : align_jumps_log);
6024 next = next_active_insn (barrier_or_label);
6026 if (! next)
6027 return 0;
6029 pat = PATTERN (next);
6031 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
6032 /* This is a barrier in front of a constant table. */
6033 return 0;
6035 if (optimize_size)
6036 return 0;
6038 if (! TARGET_SH2 || ! optimize)
6039 return align_jumps_log;
6041 /* When fixing up pcloads, a constant table might be inserted just before
6042 the basic block that ends with the barrier. Thus, we can't trust the
6043 instruction lengths before that. */
6044 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
6046 /* Check if there is an immediately preceding branch to the insn beyond
6047 the barrier. We must weight the cost of discarding useful information
6048 from the current cache line when executing this branch and there is
6049 an alignment, against that of fetching unneeded insn in front of the
6050 branch target when there is no alignment. */
6052 /* There are two delay_slot cases to consider. One is the simple case
6053 where the preceding branch is to the insn beyond the barrier (simple
6054 delay slot filling), and the other is where the preceding branch has
6055 a delay slot that is a duplicate of the insn after the barrier
6056 (fill_eager_delay_slots) and the branch is to the insn after the insn
6057 after the barrier. */
6059 int slot, credit;
6060 bool jump_to_next = false;
6062 /* Skip to the insn before the JUMP_INSN before the barrier under
6063 investigation. */
6064 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
6066 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
6067 credit >= 0 && prev && NONJUMP_INSN_P (prev);
6068 prev = prev_real_insn (prev))
6070 jump_to_next = false;
6071 if (GET_CODE (PATTERN (prev)) == USE
6072 || GET_CODE (PATTERN (prev)) == CLOBBER)
6073 continue;
6074 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
6076 prev = prev_seq->insn (1);
6077 if (INSN_UID (prev) == INSN_UID (next))
6079 /* Delay slot was filled with insn at jump target. */
6080 jump_to_next = true;
6081 continue;
6085 if (slot &&
6086 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
6087 slot = 0;
6088 credit -= get_attr_length (prev);
6090 if (prev && jump_to_label_p (prev))
6092 rtx_insn *x;
6093 if (jump_to_next
6094 || next_real_insn (JUMP_LABEL (prev)) == next
6095 /* If relax_delay_slots() decides NEXT was redundant
6096 with some previous instruction, it will have
6097 redirected PREV's jump to the following insn. */
6098 || JUMP_LABEL (prev) == next_nonnote_insn (next)
6099 /* There is no upper bound on redundant instructions
6100 that might have been skipped, but we must not put an
6101 alignment where none had been before. */
6102 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
6103 (INSN_P (x)
6104 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
6105 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
6106 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
6108 rtx pat = PATTERN (prev);
6109 if (GET_CODE (pat) == PARALLEL)
6110 pat = XVECEXP (pat, 0, 0);
6111 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
6112 return 0;
6117 return align_jumps_log;
6120 /* If we are inside a phony loop, almost any kind of label can turn up as the
6121 first one in the loop. Aligning a braf label causes incorrect switch
6122 destination addresses; we can detect braf labels because they are
6123 followed by a BARRIER.
6124 Applying loop alignment to small constant or switch tables is a waste
6125 of space, so we suppress this too. */
6127 sh_loop_align (rtx_insn *label)
6129 rtx_insn *next = label;
6131 if (! optimize || optimize_size)
6132 return 0;
6135 next = next_nonnote_insn (next);
6136 while (next && LABEL_P (next));
6138 if (! next
6139 || ! INSN_P (next)
6140 || recog_memoized (next) == CODE_FOR_consttable_2)
6141 return 0;
6143 return align_loops_log;
6146 /* Do a final pass over the function, just before delayed branch
6147 scheduling. */
6148 static void
6149 sh_reorg (void)
6151 rtx_insn *first, *insn, *mova = NULL;
6152 int num_mova;
6153 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
6154 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
6156 first = get_insns ();
6157 max_labelno_before_reorg = max_label_num ();
6159 /* We must split call insns before introducing `mova's. If we're
6160 optimizing, they'll have already been split. Otherwise, make
6161 sure we don't split them too late. */
6162 if (! optimize)
6163 split_all_insns_noflow ();
6165 if (TARGET_SHMEDIA)
6166 return;
6168 /* If relaxing, generate pseudo-ops to associate function calls with
6169 the symbols they call. It does no harm to not generate these
6170 pseudo-ops. However, when we can generate them, it enables the
6171 linker to potentially relax the jsr to a bsr, and eliminate the
6172 register load and, possibly, the constant pool entry. */
6174 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6175 if (TARGET_RELAX)
6177 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6178 own purposes. This works because none of the remaining passes
6179 need to look at them.
6181 ??? But it may break in the future. We should use a machine
6182 dependent REG_NOTE, or some other approach entirely. */
6183 for (insn = first; insn; insn = NEXT_INSN (insn))
6185 if (INSN_P (insn))
6187 rtx note;
6189 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6190 NULL_RTX)) != 0)
6191 remove_note (insn, note);
6195 for (insn = first; insn; insn = NEXT_INSN (insn))
6197 rtx pattern, reg, set, dies;
6198 rtx_code_label *label;
6199 rtx_insn *link, *scan;
6200 int rescan = 0, foundinsn = 0;
6202 if (CALL_P (insn))
6204 pattern = PATTERN (insn);
6206 if (GET_CODE (pattern) == PARALLEL)
6207 pattern = XVECEXP (pattern, 0, 0);
6208 if (GET_CODE (pattern) == SET)
6209 pattern = SET_SRC (pattern);
6211 if (GET_CODE (pattern) != CALL
6212 || !MEM_P (XEXP (pattern, 0)))
6213 continue;
6215 reg = XEXP (XEXP (pattern, 0), 0);
6217 else
6219 reg = sfunc_uses_reg (insn);
6220 if (! reg)
6221 continue;
6224 if (!REG_P (reg))
6225 continue;
6227 /* Try scanning backward to find where the register is set. */
6228 link = NULL;
6229 for (scan = PREV_INSN (insn);
6230 scan && !LABEL_P (scan);
6231 scan = PREV_INSN (scan))
6233 if (! INSN_P (scan))
6234 continue;
6236 if (! reg_mentioned_p (reg, scan))
6237 continue;
6239 if (noncall_uses_reg (reg, scan, &set))
6240 break;
6242 if (set)
6244 link = scan;
6245 break;
6249 if (! link)
6250 continue;
6252 /* The register is set at LINK. */
6254 /* We can only optimize the function call if the register is
6255 being set to a symbol. In theory, we could sometimes
6256 optimize calls to a constant location, but the assembler
6257 and linker do not support that at present. */
6258 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6259 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6260 continue;
6262 /* Scan forward from LINK to the place where REG dies, and
6263 make sure that the only insns which use REG are
6264 themselves function calls. */
6266 /* ??? This doesn't work for call targets that were allocated
6267 by reload, since there may not be a REG_DEAD note for the
6268 register. */
6270 dies = NULL_RTX;
6271 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6273 rtx scanset;
6275 /* Don't try to trace forward past a CODE_LABEL if we haven't
6276 seen INSN yet. Ordinarily, we will only find the setting insn
6277 if it is in the same basic block. However,
6278 cross-jumping can insert code labels in between the load and
6279 the call, and can result in situations where a single call
6280 insn may have two targets depending on where we came from. */
6282 if (LABEL_P (scan) && ! foundinsn)
6283 break;
6285 if (! INSN_P (scan))
6286 continue;
6288 /* Don't try to trace forward past a JUMP. To optimize
6289 safely, we would have to check that all the
6290 instructions at the jump destination did not use REG. */
6292 if (JUMP_P (scan))
6293 break;
6295 if (! reg_mentioned_p (reg, scan))
6296 continue;
6298 if (noncall_uses_reg (reg, scan, &scanset))
6299 break;
6301 if (scan == insn)
6302 foundinsn = 1;
6304 if (scan != insn
6305 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6307 /* There is a function call to this register other
6308 than the one we are checking. If we optimize
6309 this call, we need to rescan again below. */
6310 rescan = 1;
6313 /* ??? We shouldn't have to worry about SCANSET here.
6314 We should just be able to check for a REG_DEAD note
6315 on a function call. However, the REG_DEAD notes are
6316 apparently not dependable around libcalls; c-torture
6317 execute/920501-2 is a test case. If SCANSET is set,
6318 then this insn sets the register, so it must have
6319 died earlier. Unfortunately, this will only handle
6320 the cases in which the register is, in fact, set in a
6321 later insn. */
6323 /* ??? We shouldn't have to use FOUNDINSN here.
6324 This dates back to when we used LOG_LINKS to find
6325 the most recent insn which sets the register. */
6327 if (foundinsn
6328 && (scanset
6329 || find_reg_note (scan, REG_DEAD, reg)))
6331 dies = scan;
6332 break;
6336 if (! dies)
6338 /* Either there was a branch, or some insn used REG
6339 other than as a function call address. */
6340 continue;
6343 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6344 on the insn which sets the register, and on each call insn
6345 which uses the register. In final_prescan_insn we look for
6346 the REG_LABEL_OPERAND notes, and output the appropriate label
6347 or pseudo-op. */
6349 label = gen_label_rtx ();
6350 add_reg_note (link, REG_LABEL_OPERAND, label);
6351 add_reg_note (insn, REG_LABEL_OPERAND, label);
6352 if (rescan)
6354 scan = link;
6357 rtx reg2;
6359 scan = NEXT_INSN (scan);
6360 if (scan != insn
6361 && ((CALL_P (scan)
6362 && reg_mentioned_p (reg, scan))
6363 || ((reg2 = sfunc_uses_reg (scan))
6364 && REGNO (reg2) == REGNO (reg))))
6365 add_reg_note (scan, REG_LABEL_OPERAND, label);
6367 while (scan != dies);
6372 if (TARGET_SH2)
6373 fixup_addr_diff_vecs (first);
6375 if (optimize)
6377 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6378 shorten_branches (first);
6381 /* Scan the function looking for move instructions which have to be
6382 changed to pc-relative loads and insert the literal tables. */
6383 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6384 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6386 if (mova_p (insn))
6388 /* ??? basic block reordering can move a switch table dispatch
6389 below the switch table. Check if that has happened.
6390 We only have the addresses available when optimizing; but then,
6391 this check shouldn't be needed when not optimizing. */
6392 if (!untangle_mova (&num_mova, &mova, insn))
6394 insn = mova;
6395 num_mova = 0;
6398 else if (JUMP_TABLE_DATA_P (insn)
6399 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6400 && num_mova
6401 /* ??? loop invariant motion can also move a mova out of a
6402 loop. Since loop does this code motion anyway, maybe we
6403 should wrap UNSPEC_MOVA into a CONST, so that reload can
6404 move it back. */
6405 && ((num_mova > 1
6406 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6407 || (prev_nonnote_insn (insn)
6408 == XEXP (MOVA_LABELREF (mova), 0))))
6410 rtx_insn *scan;
6411 int total;
6413 num_mova--;
6415 /* Some code might have been inserted between the mova and
6416 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6417 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6418 total += get_attr_length (scan);
6420 /* range of mova is 1020, add 4 because pc counts from address of
6421 second instruction after this one, subtract 2 in case pc is 2
6422 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6423 cancels out with alignment effects of the mova itself. */
6424 if (total > 1022)
6426 /* Change the mova into a load, and restart scanning
6427 there. broken_move will then return true for mova. */
6428 fixup_mova (mova);
6429 insn = mova;
6432 if (broken_move (insn)
6433 || (NONJUMP_INSN_P (insn)
6434 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6436 rtx_insn *scan;
6437 /* Scan ahead looking for a barrier to stick the constant table
6438 behind. */
6439 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6440 rtx_insn *last_float_move = NULL;
6441 rtx last_float = 0, *last_float_addr = NULL;
6442 int need_aligned_label = 0;
6444 if (num_mova && ! mova_p (mova))
6446 /* find_barrier had to change the first mova into a
6447 pcload; thus, we have to start with this new pcload. */
6448 insn = mova;
6449 num_mova = 0;
6451 /* Now find all the moves between the points and modify them. */
6452 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6454 if (LABEL_P (scan))
6455 last_float = 0;
6456 if (NONJUMP_INSN_P (scan)
6457 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6458 need_aligned_label = 1;
6459 if (broken_move (scan))
6461 rtx *patp = &PATTERN (scan), pat = *patp;
6462 rtx src, dst;
6463 rtx lab;
6464 rtx newsrc;
6465 machine_mode mode;
6467 if (GET_CODE (pat) == PARALLEL)
6468 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6469 src = SET_SRC (pat);
6470 dst = SET_DEST (pat);
6471 mode = GET_MODE (dst);
6473 if (mode == SImode && satisfies_constraint_I16 (src)
6474 && REGNO (dst) != FPUL_REG)
6476 int offset = 0;
6478 mode = HImode;
6479 while (GET_CODE (dst) == SUBREG)
6481 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6482 GET_MODE (SUBREG_REG (dst)),
6483 SUBREG_BYTE (dst),
6484 GET_MODE (dst));
6485 dst = SUBREG_REG (dst);
6487 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6489 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6491 /* This must be an insn that clobbers r0. */
6492 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6493 XVECLEN (PATTERN (scan), 0)
6494 - 1);
6495 rtx clobber = *clobberp;
6497 gcc_assert (GET_CODE (clobber) == CLOBBER
6498 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6500 if (last_float
6501 && reg_set_between_p (r0_rtx, last_float_move, scan))
6502 last_float = 0;
6503 if (last_float
6504 && TARGET_SHCOMPACT
6505 && GET_MODE_SIZE (mode) != 4
6506 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6507 last_float = 0;
6508 lab = add_constant (src, mode, last_float);
6509 if (lab)
6510 emit_insn_before (gen_mova (lab), scan);
6511 else
6513 /* There will be a REG_UNUSED note for r0 on
6514 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6515 lest reorg:mark_target_live_regs will not
6516 consider r0 to be used, and we end up with delay
6517 slot insn in front of SCAN that clobbers r0. */
6518 rtx note
6519 = find_regno_note (last_float_move, REG_UNUSED, 0);
6521 /* If we are not optimizing, then there may not be
6522 a note. */
6523 if (note)
6524 PUT_REG_NOTE_KIND (note, REG_INC);
6526 *last_float_addr = r0_inc_rtx;
6528 last_float_move = scan;
6529 last_float = src;
6530 newsrc = gen_const_mem (mode,
6531 (((TARGET_SH4 && ! TARGET_FMOVD)
6532 || REGNO (dst) == FPUL_REG)
6533 ? r0_inc_rtx
6534 : r0_rtx));
6535 last_float_addr = &XEXP (newsrc, 0);
6537 /* Remove the clobber of r0. */
6538 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6539 gen_rtx_SCRATCH (Pmode));
6541 /* This is a mova needing a label. Create it. */
6542 else if (GET_CODE (src) == UNSPEC
6543 && XINT (src, 1) == UNSPEC_MOVA
6544 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6546 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6547 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6548 newsrc = gen_rtx_UNSPEC (SImode,
6549 gen_rtvec (1, newsrc),
6550 UNSPEC_MOVA);
6552 else if (GET_CODE (src) == UNSPEC_VOLATILE
6553 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6555 newsrc = XVECEXP (src, 0, 0);
6556 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6557 INSN_CODE (scan) = -1;
6558 continue;
6560 else
6562 lab = add_constant (src, mode, 0);
6563 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6564 newsrc = gen_const_mem (mode, newsrc);
6566 *patp = gen_rtx_SET (dst, newsrc);
6567 INSN_CODE (scan) = -1;
6570 dump_table (need_aligned_label ? insn : 0, barrier);
6571 insn = barrier;
6574 label_ref_list_d::pool.release ();
6575 for (insn = first; insn; insn = NEXT_INSN (insn))
6576 PUT_MODE (insn, VOIDmode);
6578 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6579 INSN_ADDRESSES_FREE ();
6580 split_branches (first);
6582 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6583 also has an effect on the register that holds the address of the sfunc.
6584 Insert an extra dummy insn in front of each sfunc that pretends to
6585 use this register. */
6586 if (flag_delayed_branch)
6588 for (insn = first; insn; insn = NEXT_INSN (insn))
6590 rtx reg = sfunc_uses_reg (insn);
6592 if (! reg)
6593 continue;
6594 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6597 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6600 /* Return the UID of the insn that follows the specified label. */
6602 get_dest_uid (rtx label, int max_uid)
6604 rtx_insn *dest = next_real_insn (label);
6605 int dest_uid;
6606 if (! dest)
6607 /* This can happen for an undefined label. */
6608 return 0;
6609 dest_uid = INSN_UID (dest);
6610 /* If this is a newly created branch redirection blocking instruction,
6611 we cannot index the branch_uid or insn_addresses arrays with its
6612 uid. But then, we won't need to, because the actual destination is
6613 the following branch. */
6614 while (dest_uid >= max_uid)
6616 dest = NEXT_INSN (dest);
6617 dest_uid = INSN_UID (dest);
6619 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6620 return 0;
6621 return dest_uid;
6624 /* Split condbranches that are out of range. Also add clobbers for
6625 scratch registers that are needed in far jumps.
6626 We do this before delay slot scheduling, so that it can take our
6627 newly created instructions into account. It also allows us to
6628 find branches with common targets more easily. */
6629 static void
6630 split_branches (rtx_insn *first)
6632 rtx_insn *insn;
6633 struct far_branch **uid_branch, *far_branch_list = 0;
6634 int max_uid = get_max_uid ();
6635 int ok;
6637 /* Find out which branches are out of range. */
6638 shorten_branches (first);
6640 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6641 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6643 for (insn = first; insn; insn = NEXT_INSN (insn))
6644 if (! INSN_P (insn))
6645 continue;
6646 else if (insn->deleted ())
6648 /* Shorten_branches would split this instruction again,
6649 so transform it into a note. */
6650 SET_INSN_DELETED (insn);
6652 else if (JUMP_P (insn))
6654 enum attr_type type = get_attr_type (insn);
6655 if (type == TYPE_CBRANCH)
6657 rtx_insn *next, *beyond;
6659 if (get_attr_length (insn) > 4)
6661 rtx src = SET_SRC (PATTERN (insn));
6662 rtx olabel = XEXP (XEXP (src, 1), 0);
6663 int addr = INSN_ADDRESSES (INSN_UID (insn));
6664 rtx_insn *label = 0;
6665 int dest_uid = get_dest_uid (olabel, max_uid);
6666 struct far_branch *bp = uid_branch[dest_uid];
6668 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6669 the label if the LABEL_NUSES count drops to zero. There is
6670 always a jump_optimize pass that sets these values, but it
6671 proceeds to delete unreferenced code, and then if not
6672 optimizing, to un-delete the deleted instructions, thus
6673 leaving labels with too low uses counts. */
6674 if (! optimize)
6676 JUMP_LABEL (insn) = olabel;
6677 LABEL_NUSES (olabel)++;
6679 if (! bp)
6681 bp = (struct far_branch *) alloca (sizeof *bp);
6682 uid_branch[dest_uid] = bp;
6683 bp->prev = far_branch_list;
6684 far_branch_list = bp;
6685 bp->far_label = as_a <rtx_insn *> (
6686 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6687 0));
6688 LABEL_NUSES (bp->far_label)++;
6690 else
6692 label = bp->near_label;
6693 if (! label && bp->address - addr >= CONDJUMP_MIN)
6695 rtx_insn *block = bp->insert_place;
6697 if (GET_CODE (PATTERN (block)) == RETURN)
6698 block = PREV_INSN (block);
6699 else
6700 block = gen_block_redirect (block,
6701 bp->address, 2);
6702 label = emit_label_after (gen_label_rtx (),
6703 PREV_INSN (block));
6704 bp->near_label = label;
6706 else if (label && ! NEXT_INSN (label))
6708 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6709 bp->insert_place = insn;
6710 else
6711 gen_far_branch (bp);
6714 if (! label
6715 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6717 bp->near_label = label = gen_label_rtx ();
6718 bp->insert_place = insn;
6719 bp->address = addr;
6721 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6722 gcc_assert (ok);
6724 else
6726 /* get_attr_length (insn) == 2 */
6727 /* Check if we have a pattern where reorg wants to redirect
6728 the branch to a label from an unconditional branch that
6729 is too far away. */
6730 /* We can't use JUMP_LABEL here because it might be undefined
6731 when not optimizing. */
6732 /* A syntax error might cause beyond to be NULL_RTX. */
6733 beyond
6734 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6735 0));
6737 if (beyond
6738 && (JUMP_P (beyond)
6739 || ((beyond = next_active_insn (beyond))
6740 && JUMP_P (beyond)))
6741 && GET_CODE (PATTERN (beyond)) == SET
6742 && recog_memoized (beyond) == CODE_FOR_jump_compact
6743 && ((INSN_ADDRESSES
6744 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6745 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6746 > 252 + 258 + 2))
6747 gen_block_redirect (beyond,
6748 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6751 next = next_active_insn (insn);
6753 if (next
6754 && (JUMP_P (next)
6755 || ((next = next_active_insn (next))
6756 && JUMP_P (next)))
6757 && GET_CODE (PATTERN (next)) == SET
6758 && recog_memoized (next) == CODE_FOR_jump_compact
6759 && ((INSN_ADDRESSES
6760 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6761 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6762 > 252 + 258 + 2))
6763 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6765 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6767 int addr = INSN_ADDRESSES (INSN_UID (insn));
6768 rtx_insn *far_label = 0;
6769 int dest_uid = 0;
6770 struct far_branch *bp;
6772 if (type == TYPE_JUMP)
6774 if (CROSSING_JUMP_P (insn))
6776 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6777 insn);
6778 continue;
6781 far_label = as_a <rtx_insn *> (
6782 XEXP (SET_SRC (PATTERN (insn)), 0));
6783 dest_uid = get_dest_uid (far_label, max_uid);
6784 if (! dest_uid)
6786 /* Parse errors can lead to labels outside
6787 the insn stream. */
6788 if (! NEXT_INSN (far_label))
6789 continue;
6791 if (! optimize)
6793 JUMP_LABEL (insn) = far_label;
6794 LABEL_NUSES (far_label)++;
6796 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6797 far_label = 0;
6800 bp = uid_branch[dest_uid];
6801 if (! bp)
6803 bp = (struct far_branch *) alloca (sizeof *bp);
6804 uid_branch[dest_uid] = bp;
6805 bp->prev = far_branch_list;
6806 far_branch_list = bp;
6807 bp->near_label = 0;
6808 bp->far_label = far_label;
6809 if (far_label)
6810 LABEL_NUSES (far_label)++;
6812 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6813 if (addr - bp->address <= CONDJUMP_MAX)
6814 emit_label_after (bp->near_label, PREV_INSN (insn));
6815 else
6817 gen_far_branch (bp);
6818 bp->near_label = 0;
6820 else
6821 bp->near_label = 0;
6822 bp->address = addr;
6823 bp->insert_place = insn;
6824 if (! far_label)
6825 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6826 else
6827 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6830 /* Generate all pending far branches,
6831 and free our references to the far labels. */
6832 while (far_branch_list)
6834 if (far_branch_list->near_label
6835 && ! NEXT_INSN (far_branch_list->near_label))
6836 gen_far_branch (far_branch_list);
6837 if (optimize
6838 && far_branch_list->far_label
6839 && ! --LABEL_NUSES (far_branch_list->far_label))
6840 delete_insn (far_branch_list->far_label);
6841 far_branch_list = far_branch_list->prev;
6844 /* Instruction length information is no longer valid due to the new
6845 instructions that have been generated. */
6846 init_insn_lengths ();
6849 /* Dump out instruction addresses, which is useful for debugging the
6850 constant pool table stuff.
6852 If relaxing, output the label and pseudo-ops used to link together
6853 calls and the instruction which set the registers.
6855 ??? The addresses printed by this routine for insns are nonsense for
6856 insns which are inside of a sequence where none of the inner insns have
6857 variable length. This is because the second pass of shorten_branches
6858 does not bother to update them. */
6859 void
6860 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6861 int noperands ATTRIBUTE_UNUSED)
6863 if (TARGET_DUMPISIZE)
6864 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6866 if (TARGET_RELAX)
6868 rtx note;
6870 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6871 if (note)
6873 rtx pattern;
6875 pattern = PATTERN (insn);
6876 if (GET_CODE (pattern) == PARALLEL)
6877 pattern = XVECEXP (pattern, 0, 0);
6878 switch (GET_CODE (pattern))
6880 case SET:
6881 if (GET_CODE (SET_SRC (pattern)) != CALL
6882 && get_attr_type (insn) != TYPE_SFUNC)
6884 targetm.asm_out.internal_label
6885 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6886 break;
6888 /* else FALLTHROUGH */
6889 case CALL:
6890 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6891 CODE_LABEL_NUMBER (XEXP (note, 0)));
6892 break;
6894 default:
6895 gcc_unreachable ();
6901 /* Dump out any constants accumulated in the final pass. These will
6902 only be labels. */
6903 const char *
6904 output_jump_label_table (void)
6906 int i;
6908 if (pool_size)
6910 fprintf (asm_out_file, "\t.align 2\n");
6911 for (i = 0; i < pool_size; i++)
6913 pool_node *p = &pool_vector[i];
6915 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6916 CODE_LABEL_NUMBER (p->label));
6917 output_asm_insn (".long %O0", &p->value);
6919 pool_size = 0;
6922 return "";
6925 /* A full frame looks like:
6927 arg-5
6928 arg-4
6929 [ if current_function_anonymous_args
6930 arg-3
6931 arg-2
6932 arg-1
6933 arg-0 ]
6934 saved-fp
6935 saved-r10
6936 saved-r11
6937 saved-r12
6938 saved-pr
6939 local-n
6941 local-1
6942 local-0 <- fp points here.
6944 Number of bytes pushed for anonymous args, used to pass information
6945 between expand_prologue and expand_epilogue.
6947 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6948 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6949 for an epilogue and a negative value means that it's for a sibcall
6950 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6951 all the registers that are about to be restored, and hence dead. */
6952 static void
6953 output_stack_adjust (int size, rtx reg, int epilogue_p,
6954 HARD_REG_SET *live_regs_mask, bool frame_p)
6956 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6957 if (size)
6959 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6961 /* This test is bogus, as output_stack_adjust is used to re-align the
6962 stack. */
6963 #if 0
6964 gcc_assert (!(size % align));
6965 #endif
6967 if (CONST_OK_FOR_ADD (size))
6968 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6969 /* Try to do it with two partial adjustments; however, we must make
6970 sure that the stack is properly aligned at all times, in case
6971 an interrupt occurs between the two partial adjustments. */
6972 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6973 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6975 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6976 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6978 else
6980 rtx const_reg;
6981 rtx insn;
6982 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6983 int i;
6985 /* If TEMP is invalid, we could temporarily save a general
6986 register to MACL. However, there is currently no need
6987 to handle this case, so just die when we see it. */
6988 if (epilogue_p < 0
6989 || current_function_interrupt
6990 || ! call_really_used_regs[temp] || fixed_regs[temp])
6991 temp = -1;
6992 if (temp < 0 && ! current_function_interrupt
6993 && (TARGET_SHMEDIA || epilogue_p >= 0))
6995 HARD_REG_SET temps;
6996 COPY_HARD_REG_SET (temps, call_used_reg_set);
6997 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6998 if (epilogue_p > 0)
7000 int nreg = 0;
7001 if (crtl->return_rtx)
7003 machine_mode mode;
7004 mode = GET_MODE (crtl->return_rtx);
7005 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
7006 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
7008 for (i = 0; i < nreg; i++)
7009 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
7010 if (crtl->calls_eh_return)
7012 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
7013 for (i = 0; i <= 3; i++)
7014 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
7017 if (TARGET_SHMEDIA && epilogue_p < 0)
7018 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
7019 CLEAR_HARD_REG_BIT (temps, i);
7020 if (epilogue_p <= 0)
7022 for (i = FIRST_PARM_REG;
7023 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
7024 CLEAR_HARD_REG_BIT (temps, i);
7025 if (cfun->static_chain_decl != NULL)
7026 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
7028 temp = scavenge_reg (&temps);
7030 if (temp < 0 && live_regs_mask)
7032 HARD_REG_SET temps;
7034 COPY_HARD_REG_SET (temps, *live_regs_mask);
7035 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
7036 temp = scavenge_reg (&temps);
7038 if (temp < 0)
7040 rtx adj_reg, tmp_reg, mem;
7042 /* If we reached here, the most likely case is the (sibcall)
7043 epilogue for non SHmedia. Put a special push/pop sequence
7044 for such case as the last resort. This looks lengthy but
7045 would not be problem because it seems to be very
7046 rare. */
7048 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
7051 /* ??? There is still the slight possibility that r4 or
7052 r5 have been reserved as fixed registers or assigned
7053 as global registers, and they change during an
7054 interrupt. There are possible ways to handle this:
7056 - If we are adjusting the frame pointer (r14), we can do
7057 with a single temp register and an ordinary push / pop
7058 on the stack.
7059 - Grab any call-used or call-saved registers (i.e. not
7060 fixed or globals) for the temps we need. We might
7061 also grab r14 if we are adjusting the stack pointer.
7062 If we can't find enough available registers, issue
7063 a diagnostic and die - the user must have reserved
7064 way too many registers.
7065 But since all this is rather unlikely to happen and
7066 would require extra testing, we just die if r4 / r5
7067 are not available. */
7068 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
7069 && !global_regs[4] && !global_regs[5]);
7071 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
7072 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
7073 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
7074 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
7075 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
7076 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7077 emit_move_insn (mem, tmp_reg);
7078 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
7079 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7080 emit_move_insn (mem, tmp_reg);
7081 emit_move_insn (reg, adj_reg);
7082 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7083 emit_move_insn (adj_reg, mem);
7084 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7085 emit_move_insn (tmp_reg, mem);
7086 /* Tell flow the insns that pop r4/r5 aren't dead. */
7087 emit_use (tmp_reg);
7088 emit_use (adj_reg);
7089 return;
7091 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
7093 /* If SIZE is negative, subtract the positive value.
7094 This sometimes allows a constant pool entry to be shared
7095 between prologue and epilogue code. */
7096 if (size < 0)
7098 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
7099 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
7101 else
7103 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
7104 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
7106 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7107 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
7108 GEN_INT (size))));
7113 /* Emit the specified insn and mark it as frame related.
7114 FIXME: Rename this to emit_frame_insn. */
7115 static rtx_insn *
7116 frame_insn (rtx x)
7118 rtx_insn *insn = emit_insn (x);
7119 RTX_FRAME_RELATED_P (insn) = 1;
7120 return insn;
7123 /* Output RTL to push register RN onto the stack. */
7124 static rtx
7125 push (int rn)
7127 rtx x;
7128 if (rn == FPUL_REG)
7129 x = gen_push_fpul ();
7130 else if (rn == FPSCR_REG)
7131 x = gen_push_fpscr ();
7132 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7133 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7135 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7136 return NULL_RTX;
7137 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
7139 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7140 x = gen_push_e (gen_rtx_REG (SFmode, rn));
7141 else
7142 x = gen_push (gen_rtx_REG (SImode, rn));
7144 x = frame_insn (x);
7145 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7146 return x;
7149 /* Output RTL to pop register RN from the stack. */
7150 static void
7151 pop (int rn)
7153 rtx x, sp_reg, reg;
7154 if (rn == FPUL_REG)
7155 x = gen_pop_fpul ();
7156 else if (rn == FPSCR_REG)
7157 x = gen_pop_fpscr ();
7158 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7159 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7161 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7162 return;
7163 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7165 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7166 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7167 else
7168 x = gen_pop (gen_rtx_REG (SImode, rn));
7170 x = emit_insn (x);
7172 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7173 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7174 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7175 : SET_DEST (PATTERN (x)));
7176 add_reg_note (x, REG_CFA_RESTORE, reg);
7177 add_reg_note (x, REG_CFA_ADJUST_CFA,
7178 gen_rtx_SET (sp_reg,
7179 plus_constant (SImode, sp_reg,
7180 GET_MODE_SIZE (GET_MODE (reg)))));
7181 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7182 RTX_FRAME_RELATED_P (x) = 1;
7185 /* Generate code to push the regs specified in the mask. */
7186 static void
7187 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7189 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7190 int skip_fpscr = 0;
7192 /* Push PR last; this gives better latencies after the prologue, and
7193 candidates for the return delay slot when there are no general
7194 registers pushed. */
7195 for (; i < FIRST_PSEUDO_REGISTER; i++)
7197 /* If this is an interrupt handler, and the SZ bit varies,
7198 and we have to push any floating point register, we need
7199 to switch to the correct precision first. */
7200 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7201 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7203 HARD_REG_SET unsaved;
7205 push (FPSCR_REG);
7206 COMPL_HARD_REG_SET (unsaved, *mask);
7207 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7208 skip_fpscr = 1;
7210 if (i != PR_REG
7211 && (i != FPSCR_REG || ! skip_fpscr)
7212 && TEST_HARD_REG_BIT (*mask, i))
7214 /* If the ISR has RESBANK attribute assigned, don't push any of
7215 the following registers - R0-R14, MACH, MACL and GBR. */
7216 if (! (sh_cfun_resbank_handler_p ()
7217 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7218 || i == MACH_REG
7219 || i == MACL_REG
7220 || i == GBR_REG)))
7221 push (i);
7225 /* Push banked registers last to improve delay slot opportunities. */
7226 if (interrupt_handler)
7228 bool use_movml = false;
7230 if (TARGET_SH2A)
7232 unsigned int count = 0;
7234 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7235 if (TEST_HARD_REG_BIT (*mask, i))
7236 count++;
7237 else
7238 break;
7240 /* Use movml when all banked registers are pushed. */
7241 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7242 use_movml = true;
7245 if (sh_cfun_resbank_handler_p ())
7246 ; /* Do nothing. */
7247 else if (use_movml)
7249 rtx x, mem, reg, set;
7250 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7252 /* We must avoid scheduling multiple store insn with another
7253 insns. */
7254 emit_insn (gen_blockage ());
7255 x = gen_movml_push_banked (sp_reg);
7256 x = frame_insn (x);
7257 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7259 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7260 reg = gen_rtx_REG (SImode, i);
7261 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7264 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
7265 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7266 emit_insn (gen_blockage ());
7268 else
7269 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7270 if (TEST_HARD_REG_BIT (*mask, i))
7271 push (i);
7274 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7275 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7276 push (PR_REG);
7279 /* Calculate how much extra space is needed to save all callee-saved
7280 target registers.
7281 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7282 static int
7283 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7285 int reg;
7286 int stack_space = 0;
7287 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7289 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7290 if ((! call_really_used_regs[reg] || interrupt_handler)
7291 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7292 /* Leave space to save this target register on the stack,
7293 in case target register allocation wants to use it. */
7294 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7295 return stack_space;
7298 /* Decide whether we should reserve space for callee-save target registers,
7299 in case target register allocation wants to use them. REGS_SAVED is
7300 the space, in bytes, that is already required for register saves.
7301 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7302 static int
7303 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7304 HARD_REG_SET *live_regs_mask)
7306 if (optimize_size)
7307 return 0;
7308 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7311 /* Decide how much space to reserve for callee-save target registers
7312 in case target register allocation wants to use them.
7313 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7314 static int
7315 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7317 if (shmedia_space_reserved_for_target_registers)
7318 return shmedia_target_regs_stack_space (live_regs_mask);
7319 else
7320 return 0;
7323 /* Work out the registers which need to be saved, both as a mask and a
7324 count of saved words. Return the count.
7326 If doing a pragma interrupt function, then push all regs used by the
7327 function, and if we call another function (we can tell by looking at PR),
7328 make sure that all the regs it clobbers are safe too. */
7329 static int
7330 calc_live_regs (HARD_REG_SET *live_regs_mask)
7332 unsigned int reg;
7333 int count;
7334 tree attrs;
7335 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7336 bool nosave_low_regs;
7337 int pr_live, has_call;
7339 attrs = DECL_ATTRIBUTES (current_function_decl);
7340 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7341 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7342 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7343 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7345 CLEAR_HARD_REG_SET (*live_regs_mask);
7346 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7347 && df_regs_ever_live_p (FPSCR_REG))
7348 target_flags &= ~MASK_FPU_SINGLE;
7349 /* If we can save a lot of saves by switching to double mode, do that. */
7350 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7351 && TARGET_FPU_SINGLE)
7352 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7353 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7354 && (! call_really_used_regs[reg]
7355 || interrupt_handler)
7356 && ++count > 2)
7358 target_flags &= ~MASK_FPU_SINGLE;
7359 break;
7361 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7362 knows how to use it. That means the pseudo originally allocated for
7363 the initial value can become the PR_MEDIA_REG hard register, as seen for
7364 execute/20010122-1.c:test9. */
7365 if (TARGET_SHMEDIA)
7366 /* ??? this function is called from initial_elimination_offset, hence we
7367 can't use the result of sh_media_register_for_return here. */
7368 pr_live = sh_pr_n_sets ();
7369 else
7371 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7372 pr_live = (pr_initial
7373 ? (!REG_P (pr_initial)
7374 || REGNO (pr_initial) != (PR_REG))
7375 : df_regs_ever_live_p (PR_REG));
7376 /* For Shcompact, if not optimizing, we end up with a memory reference
7377 using the return address pointer for __builtin_return_address even
7378 though there is no actual need to put the PR register on the stack. */
7379 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7381 /* Force PR to be live if the prologue has to call the SHmedia
7382 argument decoder or register saver. */
7383 if (TARGET_SHCOMPACT
7384 && ((crtl->args.info.call_cookie
7385 & ~ CALL_COOKIE_RET_TRAMP (1))
7386 || crtl->saves_all_registers))
7387 pr_live = 1;
7388 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7389 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7391 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7392 ? pr_live
7393 : interrupt_handler
7394 ? (/* Need to save all the regs ever live. */
7395 (df_regs_ever_live_p (reg)
7396 || (call_really_used_regs[reg]
7397 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7398 || reg == PIC_OFFSET_TABLE_REGNUM)
7399 && has_call)
7400 || (TARGET_SHMEDIA && has_call
7401 && REGISTER_NATURAL_MODE (reg) == SImode
7402 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7403 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7404 && reg != RETURN_ADDRESS_POINTER_REGNUM
7405 && reg != T_REG && reg != GBR_REG
7406 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7407 /* Push fpscr only on targets which have FPU */
7408 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7409 : (/* Only push those regs which are used and need to be saved. */
7410 (TARGET_SHCOMPACT
7411 && flag_pic
7412 && crtl->args.info.call_cookie
7413 && reg == PIC_OFFSET_TABLE_REGNUM)
7414 || (df_regs_ever_live_p (reg)
7415 && ((!call_really_used_regs[reg]
7416 && !(reg != PIC_OFFSET_TABLE_REGNUM
7417 && fixed_regs[reg] && call_used_regs[reg]))
7418 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7419 || (crtl->calls_eh_return
7420 && (reg == EH_RETURN_DATA_REGNO (0)
7421 || reg == EH_RETURN_DATA_REGNO (1)
7422 || reg == EH_RETURN_DATA_REGNO (2)
7423 || reg == EH_RETURN_DATA_REGNO (3)))
7424 || ((reg == MACL_REG || reg == MACH_REG)
7425 && df_regs_ever_live_p (reg)
7426 && sh_cfun_attr_renesas_p ())
7429 SET_HARD_REG_BIT (*live_regs_mask, reg);
7430 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7432 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7433 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7435 if (FP_REGISTER_P (reg))
7437 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7439 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7440 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7443 else if (XD_REGISTER_P (reg))
7445 /* Must switch to double mode to access these registers. */
7446 target_flags &= ~MASK_FPU_SINGLE;
7450 if (nosave_low_regs && reg == R8_REG)
7451 break;
7453 /* If we have a target register optimization pass after prologue / epilogue
7454 threading, we need to assume all target registers will be live even if
7455 they aren't now. */
7456 if (flag_branch_target_load_optimize2
7457 && TARGET_SAVE_ALL_TARGET_REGS
7458 && shmedia_space_reserved_for_target_registers)
7459 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7460 if ((! call_really_used_regs[reg] || interrupt_handler)
7461 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7463 SET_HARD_REG_BIT (*live_regs_mask, reg);
7464 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7466 /* If this is an interrupt handler, we don't have any call-clobbered
7467 registers we can conveniently use for target register save/restore.
7468 Make sure we save at least one general purpose register when we need
7469 to save target registers. */
7470 if (interrupt_handler
7471 && hard_reg_set_intersect_p (*live_regs_mask,
7472 reg_class_contents[TARGET_REGS])
7473 && ! hard_reg_set_intersect_p (*live_regs_mask,
7474 reg_class_contents[GENERAL_REGS]))
7476 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7477 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7480 return count;
7483 /* Code to generate prologue and epilogue sequences */
7485 /* PUSHED is the number of bytes that are being pushed on the
7486 stack for register saves. Return the frame size, padded
7487 appropriately so that the stack stays properly aligned. */
7488 static HOST_WIDE_INT
7489 rounded_frame_size (int pushed)
7491 HOST_WIDE_INT size = get_frame_size ();
7492 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7494 if (ACCUMULATE_OUTGOING_ARGS)
7495 size += crtl->outgoing_args_size;
7497 return ((size + pushed + align - 1) & -align) - pushed;
7500 /* Choose a call-clobbered target-branch register that remains
7501 unchanged along the whole function. We set it up as the return
7502 value in the prologue. */
7504 sh_media_register_for_return (void)
7506 int regno;
7507 int tr0_used;
7509 if (! crtl->is_leaf)
7510 return -1;
7511 if (lookup_attribute ("interrupt_handler",
7512 DECL_ATTRIBUTES (current_function_decl)))
7513 return -1;
7514 if (sh_cfun_interrupt_handler_p ())
7515 return -1;
7517 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7519 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7520 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7521 return regno;
7523 return -1;
7526 /* The maximum registers we need to save are:
7527 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7528 - 32 floating point registers (for each pair, we save none,
7529 one single precision value, or a double precision value).
7530 - 8 target registers
7531 - add 1 entry for a delimiter. */
7532 #define MAX_SAVED_REGS (62+32+8)
7534 typedef struct save_entry_s
7536 unsigned char reg;
7537 unsigned char mode;
7538 short offset;
7539 } save_entry;
7541 #define MAX_TEMPS 4
7543 /* There will be a delimiter entry with VOIDmode both at the start and the
7544 end of a filled in schedule. The end delimiter has the offset of the
7545 save with the smallest (i.e. most negative) offset. */
7546 typedef struct save_schedule_s
7548 save_entry entries[MAX_SAVED_REGS + 2];
7549 int temps[MAX_TEMPS+1];
7550 } save_schedule;
7552 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7553 use reverse order. Returns the last entry written to (not counting
7554 the delimiter). OFFSET_BASE is a number to be added to all offset
7555 entries. */
7556 static save_entry *
7557 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7558 int offset_base)
7560 int align, i;
7561 save_entry *entry = schedule->entries;
7562 int tmpx = 0;
7563 int offset;
7565 if (! current_function_interrupt)
7566 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7567 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7568 && ! FUNCTION_ARG_REGNO_P (i)
7569 && i != FIRST_RET_REG
7570 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7571 && ! (crtl->calls_eh_return
7572 && (i == EH_RETURN_STACKADJ_REGNO
7573 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7574 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7575 schedule->temps[tmpx++] = i;
7576 entry->reg = -1;
7577 entry->mode = VOIDmode;
7578 entry->offset = offset_base;
7579 entry++;
7580 /* We loop twice: first, we save 8-byte aligned registers in the
7581 higher addresses, that are known to be aligned. Then, we
7582 proceed to saving 32-bit registers that don't need 8-byte
7583 alignment.
7584 If this is an interrupt function, all registers that need saving
7585 need to be saved in full. moreover, we need to postpone saving
7586 target registers till we have saved some general purpose registers
7587 we can then use as scratch registers. */
7588 offset = offset_base;
7589 for (align = 1; align >= 0; align--)
7591 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7592 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7594 machine_mode mode = REGISTER_NATURAL_MODE (i);
7595 int reg = i;
7597 if (current_function_interrupt)
7599 if (TARGET_REGISTER_P (i))
7600 continue;
7601 if (GENERAL_REGISTER_P (i))
7602 mode = DImode;
7604 if (mode == SFmode && (i % 2) == 1
7605 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7606 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7608 mode = DFmode;
7609 i--;
7610 reg--;
7613 /* If we're doing the aligned pass and this is not aligned,
7614 or we're doing the unaligned pass and this is aligned,
7615 skip it. */
7616 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7617 != align)
7618 continue;
7620 if (current_function_interrupt
7621 && GENERAL_REGISTER_P (i)
7622 && tmpx < MAX_TEMPS)
7623 schedule->temps[tmpx++] = i;
7625 offset -= GET_MODE_SIZE (mode);
7626 entry->reg = i;
7627 entry->mode = mode;
7628 entry->offset = offset;
7629 entry++;
7631 if (align && current_function_interrupt)
7632 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7633 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7635 offset -= GET_MODE_SIZE (DImode);
7636 entry->reg = i;
7637 entry->mode = DImode;
7638 entry->offset = offset;
7639 entry++;
7642 entry->reg = -1;
7643 entry->mode = VOIDmode;
7644 entry->offset = offset;
7645 schedule->temps[tmpx] = -1;
7646 return entry - 1;
7649 /* Expand code for the function prologue. */
7650 void
7651 sh_expand_prologue (void)
7653 HARD_REG_SET live_regs_mask;
7654 int d, i;
7655 int d_rounding = 0;
7656 int save_flags = target_flags;
7657 int pretend_args;
7658 int stack_usage;
7659 tree sp_switch_attr
7660 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7662 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7664 /* We have pretend args if we had an object sent partially in registers
7665 and partially on the stack, e.g. a large structure. */
7666 pretend_args = crtl->args.pretend_args_size;
7667 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7668 && (NPARM_REGS(SImode)
7669 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7670 pretend_args = 0;
7672 output_stack_adjust (-pretend_args
7673 - crtl->args.info.stack_regs * 8,
7674 stack_pointer_rtx, 0, NULL, true);
7675 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7677 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7678 /* We're going to use the PIC register to load the address of the
7679 incoming-argument decoder and/or of the return trampoline from
7680 the GOT, so make sure the PIC register is preserved and
7681 initialized. */
7682 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7684 if (TARGET_SHCOMPACT
7685 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7687 int reg;
7689 /* First, make all registers with incoming arguments that will
7690 be pushed onto the stack live, so that register renaming
7691 doesn't overwrite them. */
7692 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7693 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7694 >= NPARM_REGS (SImode) - reg)
7695 for (; reg < NPARM_REGS (SImode); reg++)
7696 emit_insn (gen_shcompact_preserve_incoming_args
7697 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7698 else if (CALL_COOKIE_INT_REG_GET
7699 (crtl->args.info.call_cookie, reg) == 1)
7700 emit_insn (gen_shcompact_preserve_incoming_args
7701 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7703 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7704 stack_pointer_rtx);
7705 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7706 GEN_INT (crtl->args.info.call_cookie));
7707 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7708 gen_rtx_REG (SImode, R0_REG));
7710 else if (TARGET_SHMEDIA)
7712 int tr = sh_media_register_for_return ();
7714 if (tr >= 0)
7715 emit_move_insn (gen_rtx_REG (DImode, tr),
7716 gen_rtx_REG (DImode, PR_MEDIA_REG));
7719 /* Emit the code for SETUP_VARARGS. */
7720 if (cfun->stdarg)
7722 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7724 /* Push arg regs as if they'd been provided by caller in stack. */
7725 for (i = 0; i < NPARM_REGS(SImode); i++)
7727 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7729 if (i >= (NPARM_REGS(SImode)
7730 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7732 break;
7733 push (rn);
7734 stack_usage += GET_MODE_SIZE (SImode);
7739 /* If we're supposed to switch stacks at function entry, do so now. */
7740 if (sp_switch_attr)
7742 rtx lab, newsrc;
7743 /* The argument specifies a variable holding the address of the
7744 stack the interrupt function should switch to/from at entry/exit. */
7745 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7746 const char *s
7747 = ggc_strdup (TREE_STRING_POINTER (arg));
7748 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7750 lab = add_constant (sp_switch, SImode, 0);
7751 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7753 emit_insn (gen_sp_switch_1 (newsrc));
7756 d = calc_live_regs (&live_regs_mask);
7757 /* ??? Maybe we could save some switching if we can move a mode switch
7758 that already happens to be at the function start into the prologue. */
7759 if (target_flags != save_flags && ! current_function_interrupt)
7760 emit_insn (gen_toggle_sz ());
7762 if (TARGET_SH5)
7764 int offset_base, offset;
7765 rtx r0 = NULL_RTX;
7766 int offset_in_r0 = -1;
7767 int sp_in_r0 = 0;
7768 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7769 int total_size, save_size;
7770 save_schedule schedule;
7771 save_entry *entry;
7772 int *tmp_pnt;
7774 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7775 && ! current_function_interrupt)
7776 r0 = gen_rtx_REG (Pmode, R0_REG);
7778 /* D is the actual number of bytes that we need for saving registers,
7779 however, in initial_elimination_offset we have committed to using
7780 an additional TREGS_SPACE amount of bytes - in order to keep both
7781 addresses to arguments supplied by the caller and local variables
7782 valid, we must keep this gap. Place it between the incoming
7783 arguments and the actually saved registers in a bid to optimize
7784 locality of reference. */
7785 total_size = d + tregs_space;
7786 total_size += rounded_frame_size (total_size);
7787 save_size = total_size - rounded_frame_size (d);
7788 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7789 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7790 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7792 /* If adjusting the stack in a single step costs nothing extra, do so.
7793 I.e. either if a single addi is enough, or we need a movi anyway,
7794 and we don't exceed the maximum offset range (the test for the
7795 latter is conservative for simplicity). */
7796 if (TARGET_SHMEDIA
7797 && (CONST_OK_FOR_I10 (-total_size)
7798 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7799 && total_size <= 2044)))
7800 d_rounding = total_size - save_size;
7802 offset_base = d + d_rounding;
7804 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7805 0, NULL, true);
7806 stack_usage += save_size + d_rounding;
7808 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7809 tmp_pnt = schedule.temps;
7810 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7812 machine_mode mode = (machine_mode) entry->mode;
7813 unsigned int reg = entry->reg;
7814 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7815 rtx orig_reg_rtx;
7817 offset = entry->offset;
7819 reg_rtx = gen_rtx_REG (mode, reg);
7821 mem_rtx = gen_frame_mem (mode,
7822 gen_rtx_PLUS (Pmode,
7823 stack_pointer_rtx,
7824 GEN_INT (offset)));
7826 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7828 gcc_assert (r0);
7829 mem_rtx = NULL_RTX;
7832 if (HAVE_PRE_DECREMENT
7833 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7834 || mem_rtx == NULL_RTX
7835 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7837 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7839 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7840 pre_dec = NULL_RTX;
7841 else
7843 mem_rtx = NULL_RTX;
7844 offset += GET_MODE_SIZE (mode);
7848 if (mem_rtx != NULL_RTX)
7849 goto addr_ok;
7851 if (offset_in_r0 == -1)
7853 emit_move_insn (r0, GEN_INT (offset));
7854 offset_in_r0 = offset;
7856 else if (offset != offset_in_r0)
7858 emit_move_insn (r0,
7859 gen_rtx_PLUS
7860 (Pmode, r0,
7861 GEN_INT (offset - offset_in_r0)));
7862 offset_in_r0 += offset - offset_in_r0;
7865 if (pre_dec != NULL_RTX)
7867 if (! sp_in_r0)
7869 emit_move_insn (r0,
7870 gen_rtx_PLUS
7871 (Pmode, r0, stack_pointer_rtx));
7872 sp_in_r0 = 1;
7875 offset -= GET_MODE_SIZE (mode);
7876 offset_in_r0 -= GET_MODE_SIZE (mode);
7878 mem_rtx = pre_dec;
7880 else if (sp_in_r0)
7881 mem_rtx = gen_frame_mem (mode, r0);
7882 else
7883 mem_rtx = gen_frame_mem (mode,
7884 gen_rtx_PLUS (Pmode,
7885 stack_pointer_rtx,
7886 r0));
7888 /* We must not use an r0-based address for target-branch
7889 registers or for special registers without pre-dec
7890 memory addresses, since we store their values in r0
7891 first. */
7892 gcc_assert (!TARGET_REGISTER_P (reg)
7893 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7894 || mem_rtx == pre_dec));
7896 addr_ok:
7897 orig_reg_rtx = reg_rtx;
7898 if (TARGET_REGISTER_P (reg)
7899 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7900 && mem_rtx != pre_dec))
7902 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7904 emit_move_insn (tmp_reg, reg_rtx);
7906 if (REGNO (tmp_reg) == R0_REG)
7908 offset_in_r0 = -1;
7909 sp_in_r0 = 0;
7910 gcc_assert (!refers_to_regno_p (R0_REG, mem_rtx));
7913 if (*++tmp_pnt <= 0)
7914 tmp_pnt = schedule.temps;
7916 reg_rtx = tmp_reg;
7919 rtx insn;
7921 /* Mark as interesting for dwarf cfi generator */
7922 insn = emit_move_insn (mem_rtx, reg_rtx);
7923 RTX_FRAME_RELATED_P (insn) = 1;
7924 /* If we use an intermediate register for the save, we can't
7925 describe this exactly in cfi as a copy of the to-be-saved
7926 register into the temporary register and then the temporary
7927 register on the stack, because the temporary register can
7928 have a different natural size than the to-be-saved register.
7929 Thus, we gloss over the intermediate copy and pretend we do
7930 a direct save from the to-be-saved register. */
7931 if (REGNO (reg_rtx) != reg)
7933 rtx set;
7935 set = gen_rtx_SET (mem_rtx, orig_reg_rtx);
7936 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7939 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7941 rtx reg_rtx = gen_rtx_REG (mode, reg);
7942 rtx set;
7943 rtx mem_rtx = gen_frame_mem (mode,
7944 gen_rtx_PLUS (Pmode,
7945 stack_pointer_rtx,
7946 GEN_INT (offset)));
7948 set = gen_rtx_SET (mem_rtx, reg_rtx);
7949 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7954 gcc_assert (entry->offset == d_rounding);
7956 else
7958 push_regs (&live_regs_mask, current_function_interrupt);
7959 stack_usage += d;
7962 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7963 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7965 if (SHMEDIA_REGS_STACK_ADJUST ())
7967 /* This must NOT go through the PLT, otherwise mach and macl
7968 may be clobbered. */
7969 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7970 (TARGET_FPU_ANY
7971 ? "__GCC_push_shmedia_regs"
7972 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7973 emit_insn (gen_shmedia_save_restore_regs_compact
7974 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7977 if (target_flags != save_flags && ! current_function_interrupt)
7978 emit_insn (gen_toggle_sz ());
7980 target_flags = save_flags;
7982 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7983 stack_pointer_rtx, 0, NULL, true);
7984 stack_usage += rounded_frame_size (d) - d_rounding;
7986 if (frame_pointer_needed)
7987 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7989 if (TARGET_SHCOMPACT
7990 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7992 /* This must NOT go through the PLT, otherwise mach and macl
7993 may be clobbered. */
7994 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7995 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7996 emit_insn (gen_shcompact_incoming_args ());
7999 /* If we are profiling, make sure no instructions are scheduled before
8000 the call to mcount. Similarly if some call instructions are swapped
8001 before frame related insns, it'll confuse the unwinder because
8002 currently SH has no unwind info for function epilogues. */
8003 if (crtl->profile || flag_exceptions || flag_unwind_tables)
8004 emit_insn (gen_blockage ());
8006 if (flag_stack_usage_info)
8007 current_function_static_stack_size = stack_usage;
8010 /* Expand code for the function epilogue. */
8011 void
8012 sh_expand_epilogue (bool sibcall_p)
8014 HARD_REG_SET live_regs_mask;
8015 int d, i;
8016 int d_rounding = 0;
8018 int save_flags = target_flags;
8019 int frame_size, save_size;
8020 int fpscr_deferred = 0;
8021 int e = sibcall_p ? -1 : 1;
8023 d = calc_live_regs (&live_regs_mask);
8025 save_size = d;
8026 frame_size = rounded_frame_size (d);
8028 if (TARGET_SH5)
8030 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
8031 int total_size;
8032 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
8033 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8034 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
8036 total_size = d + tregs_space;
8037 total_size += rounded_frame_size (total_size);
8038 save_size = total_size - frame_size;
8040 /* If adjusting the stack in a single step costs nothing extra, do so.
8041 I.e. either if a single addi is enough, or we need a movi anyway,
8042 and we don't exceed the maximum offset range (the test for the
8043 latter is conservative for simplicity). */
8044 if (TARGET_SHMEDIA
8045 && ! frame_pointer_needed
8046 && (CONST_OK_FOR_I10 (total_size)
8047 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
8048 && total_size <= 2044)))
8049 d_rounding = frame_size;
8051 frame_size -= d_rounding;
8054 if (frame_pointer_needed)
8056 /* We must avoid scheduling the epilogue with previous basic blocks.
8057 See PR/18032 and PR/40313. */
8058 emit_insn (gen_blockage ());
8059 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
8060 &live_regs_mask, true);
8062 /* We must avoid moving the stack pointer adjustment past code
8063 which reads from the local frame, else an interrupt could
8064 occur after the SP adjustment and clobber data in the local
8065 frame. */
8066 emit_insn (gen_blockage ());
8067 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
8069 else if (frame_size)
8071 /* We must avoid moving the stack pointer adjustment past code
8072 which reads from the local frame, else an interrupt could
8073 occur after the SP adjustment and clobber data in the local
8074 frame. */
8075 emit_insn (gen_blockage ());
8076 output_stack_adjust (frame_size, stack_pointer_rtx, e,
8077 &live_regs_mask, true);
8080 if (SHMEDIA_REGS_STACK_ADJUST ())
8082 function_symbol (gen_rtx_REG (Pmode, R0_REG),
8083 (TARGET_FPU_ANY
8084 ? "__GCC_pop_shmedia_regs"
8085 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
8086 /* This must NOT go through the PLT, otherwise mach and macl
8087 may be clobbered. */
8088 emit_insn (gen_shmedia_save_restore_regs_compact
8089 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
8092 /* Pop all the registers. */
8094 if (target_flags != save_flags && ! current_function_interrupt)
8095 emit_insn (gen_toggle_sz ());
8096 if (TARGET_SH5)
8098 int offset_base, offset;
8099 int offset_in_r0 = -1;
8100 int sp_in_r0 = 0;
8101 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
8102 save_schedule schedule;
8103 save_entry *entry;
8104 int *tmp_pnt;
8106 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
8107 offset_base = -entry[1].offset + d_rounding;
8108 tmp_pnt = schedule.temps;
8109 for (; entry->mode != VOIDmode; entry--)
8111 machine_mode mode = (machine_mode) entry->mode;
8112 int reg = entry->reg;
8113 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
8115 offset = offset_base + entry->offset;
8116 reg_rtx = gen_rtx_REG (mode, reg);
8118 mem_rtx = gen_frame_mem (mode,
8119 gen_rtx_PLUS (Pmode,
8120 stack_pointer_rtx,
8121 GEN_INT (offset)));
8123 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
8124 mem_rtx = NULL_RTX;
8126 if (HAVE_POST_INCREMENT
8127 && (offset == offset_in_r0
8128 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
8129 && mem_rtx == NULL_RTX)
8130 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
8132 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
8134 if (!memory_address_p (mode, XEXP (post_inc, 0)))
8135 post_inc = NULL_RTX;
8136 else
8137 mem_rtx = NULL_RTX;
8140 if (mem_rtx != NULL_RTX)
8141 goto addr_ok;
8143 if (offset_in_r0 == -1)
8145 emit_move_insn (r0, GEN_INT (offset));
8146 offset_in_r0 = offset;
8148 else if (offset != offset_in_r0)
8150 emit_move_insn (r0,
8151 gen_rtx_PLUS
8152 (Pmode, r0,
8153 GEN_INT (offset - offset_in_r0)));
8154 offset_in_r0 += offset - offset_in_r0;
8157 if (post_inc != NULL_RTX)
8159 if (! sp_in_r0)
8161 emit_move_insn (r0,
8162 gen_rtx_PLUS
8163 (Pmode, r0, stack_pointer_rtx));
8164 sp_in_r0 = 1;
8167 mem_rtx = post_inc;
8169 offset_in_r0 += GET_MODE_SIZE (mode);
8171 else if (sp_in_r0)
8172 mem_rtx = gen_frame_mem (mode, r0);
8173 else
8174 mem_rtx = gen_frame_mem (mode,
8175 gen_rtx_PLUS (Pmode,
8176 stack_pointer_rtx,
8177 r0));
8179 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8180 || mem_rtx == post_inc);
8182 addr_ok:
8183 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8184 && mem_rtx != post_inc)
8186 emit_move_insn (r0, mem_rtx);
8187 mem_rtx = r0;
8189 else if (TARGET_REGISTER_P (reg))
8191 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8193 /* Give the scheduler a bit of freedom by using up to
8194 MAX_TEMPS registers in a round-robin fashion. */
8195 emit_move_insn (tmp_reg, mem_rtx);
8196 mem_rtx = tmp_reg;
8197 if (*++tmp_pnt < 0)
8198 tmp_pnt = schedule.temps;
8201 emit_move_insn (reg_rtx, mem_rtx);
8204 gcc_assert (entry->offset + offset_base == d + d_rounding);
8206 else /* ! TARGET_SH5 */
8208 int last_reg;
8210 save_size = 0;
8211 /* For an ISR with RESBANK attribute assigned, don't pop PR
8212 register. */
8213 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8214 && !sh_cfun_resbank_handler_p ())
8216 if (!frame_pointer_needed)
8217 emit_insn (gen_blockage ());
8218 pop (PR_REG);
8221 /* Banked registers are popped first to avoid being scheduled in the
8222 delay slot. RTE switches banks before the ds instruction. */
8223 if (current_function_interrupt)
8225 bool use_movml = false;
8227 if (TARGET_SH2A)
8229 unsigned int count = 0;
8231 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8232 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8233 count++;
8234 else
8235 break;
8237 /* Use movml when all banked register are poped. */
8238 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8239 use_movml = true;
8242 if (sh_cfun_resbank_handler_p ())
8243 ; /* Do nothing. */
8244 else if (use_movml)
8246 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8248 /* We must avoid scheduling multiple load insn with another
8249 insns. */
8250 emit_insn (gen_blockage ());
8251 emit_insn (gen_movml_pop_banked (sp_reg));
8252 emit_insn (gen_blockage ());
8254 else
8255 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8256 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8257 pop (i);
8259 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8261 else
8262 last_reg = FIRST_PSEUDO_REGISTER;
8264 for (i = 0; i < last_reg; i++)
8266 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8268 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8269 && hard_reg_set_intersect_p (live_regs_mask,
8270 reg_class_contents[DF_REGS]))
8271 fpscr_deferred = 1;
8272 /* For an ISR with RESBANK attribute assigned, don't pop
8273 following registers, R0-R14, MACH, MACL and GBR. */
8274 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8275 && ! (sh_cfun_resbank_handler_p ()
8276 && ((j >= FIRST_GENERAL_REG
8277 && j < LAST_GENERAL_REG)
8278 || j == MACH_REG
8279 || j == MACL_REG
8280 || j == GBR_REG)))
8281 pop (j);
8283 if (j == FIRST_FP_REG && fpscr_deferred)
8284 pop (FPSCR_REG);
8287 if (target_flags != save_flags && ! current_function_interrupt)
8288 emit_insn (gen_toggle_sz ());
8289 target_flags = save_flags;
8291 output_stack_adjust (crtl->args.pretend_args_size
8292 + save_size + d_rounding
8293 + crtl->args.info.stack_regs * 8,
8294 stack_pointer_rtx, e, NULL, true);
8296 if (crtl->calls_eh_return)
8297 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8298 EH_RETURN_STACKADJ_RTX));
8300 /* Switch back to the normal stack if necessary. */
8301 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8302 emit_insn (gen_sp_switch_2 ());
8304 /* Tell flow the insn that pops PR isn't dead. */
8305 /* PR_REG will never be live in SHmedia mode, and we don't need to
8306 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8307 by the return pattern. */
8308 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8309 emit_use (gen_rtx_REG (SImode, PR_REG));
8312 /* Emit code to change the current function's return address to RA.
8313 TEMP is available as a scratch register, if needed. */
8314 void
8315 sh_set_return_address (rtx ra, rtx tmp)
8317 HARD_REG_SET live_regs_mask;
8318 int d;
8319 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8320 int pr_offset;
8322 d = calc_live_regs (&live_regs_mask);
8324 /* If pr_reg isn't life, we can set it (or the register given in
8325 sh_media_register_for_return) directly. */
8326 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8328 rtx rr;
8330 if (TARGET_SHMEDIA)
8332 int rr_regno = sh_media_register_for_return ();
8334 if (rr_regno < 0)
8335 rr_regno = pr_reg;
8337 rr = gen_rtx_REG (DImode, rr_regno);
8339 else
8340 rr = gen_rtx_REG (SImode, pr_reg);
8342 emit_insn (GEN_MOV (rr, ra));
8343 /* Tell flow the register for return isn't dead. */
8344 emit_use (rr);
8345 return;
8348 if (TARGET_SH5)
8350 int offset;
8351 save_schedule schedule;
8352 save_entry *entry;
8354 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8355 offset = entry[1].offset;
8356 for (; entry->mode != VOIDmode; entry--)
8357 if (entry->reg == pr_reg)
8358 goto found;
8360 /* We can't find pr register. */
8361 gcc_unreachable ();
8363 found:
8364 offset = entry->offset - offset;
8365 pr_offset = (rounded_frame_size (d) + offset
8366 + SHMEDIA_REGS_STACK_ADJUST ());
8368 else
8369 pr_offset = rounded_frame_size (d);
8371 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8373 if (frame_pointer_needed)
8374 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8375 else
8376 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8378 tmp = gen_frame_mem (Pmode, tmp);
8379 emit_insn (GEN_MOV (tmp, ra));
8380 /* Tell this store isn't dead. */
8381 emit_use (tmp);
8384 /* Clear variables at function end. */
8385 static void
8386 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8387 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8391 static rtx
8392 sh_builtin_saveregs (void)
8394 /* First unnamed integer register. */
8395 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8396 /* Number of integer registers we need to save. */
8397 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8398 /* First unnamed SFmode float reg */
8399 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8400 /* Number of SFmode float regs to save. */
8401 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8402 rtx regbuf, fpregs;
8403 int bufsize, regno;
8404 alias_set_type alias_set;
8406 if (TARGET_SH5)
8408 if (n_intregs)
8410 int pushregs = n_intregs;
8412 while (pushregs < NPARM_REGS (SImode) - 1
8413 && (CALL_COOKIE_INT_REG_GET
8414 (crtl->args.info.call_cookie,
8415 NPARM_REGS (SImode) - pushregs)
8416 == 1))
8418 crtl->args.info.call_cookie
8419 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8420 - pushregs, 1);
8421 pushregs++;
8424 if (pushregs == NPARM_REGS (SImode))
8425 crtl->args.info.call_cookie
8426 |= (CALL_COOKIE_INT_REG (0, 1)
8427 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8428 else
8429 crtl->args.info.call_cookie
8430 |= CALL_COOKIE_STACKSEQ (pushregs);
8432 crtl->args.pretend_args_size += 8 * n_intregs;
8434 if (TARGET_SHCOMPACT)
8435 return const0_rtx;
8438 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8440 error ("__builtin_saveregs not supported by this subtarget");
8441 return const0_rtx;
8444 if (TARGET_SHMEDIA)
8445 n_floatregs = 0;
8447 /* Allocate block of memory for the regs. */
8448 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8449 Or can assign_stack_local accept a 0 SIZE argument? */
8450 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8452 if (TARGET_SHMEDIA)
8453 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8454 else if (n_floatregs & 1)
8456 rtx addr;
8458 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8459 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8460 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8461 regbuf = change_address (regbuf, BLKmode, addr);
8463 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8465 rtx addr, mask;
8467 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8468 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8469 XEXP (regbuf, 0), 4));
8470 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8471 emit_insn (gen_andsi3 (addr, addr, mask));
8472 regbuf = change_address (regbuf, BLKmode, addr);
8474 else
8475 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8476 alias_set = get_varargs_alias_set ();
8477 set_mem_alias_set (regbuf, alias_set);
8479 /* Save int args.
8480 This is optimized to only save the regs that are necessary. Explicitly
8481 named args need not be saved. */
8482 if (n_intregs > 0)
8483 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8484 adjust_address (regbuf, BLKmode,
8485 n_floatregs * UNITS_PER_WORD),
8486 n_intregs);
8488 if (TARGET_SHMEDIA)
8489 /* Return the address of the regbuf. */
8490 return XEXP (regbuf, 0);
8492 /* Save float args.
8493 This is optimized to only save the regs that are necessary. Explicitly
8494 named args need not be saved.
8495 We explicitly build a pointer to the buffer because it halves the insn
8496 count when not optimizing (otherwise the pointer is built for each reg
8497 saved).
8498 We emit the moves in reverse order so that we can use predecrement. */
8500 fpregs = copy_to_mode_reg (Pmode,
8501 plus_constant (Pmode, XEXP (regbuf, 0),
8502 n_floatregs * UNITS_PER_WORD));
8503 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8505 rtx mem;
8506 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8508 emit_insn (gen_addsi3 (fpregs, fpregs,
8509 GEN_INT (-2 * UNITS_PER_WORD)));
8510 mem = change_address (regbuf, DFmode, fpregs);
8511 emit_move_insn (mem,
8512 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8514 regno = first_floatreg;
8515 if (regno & 1)
8517 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8518 mem = change_address (regbuf, SFmode, fpregs);
8519 emit_move_insn (mem,
8520 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8521 + regno - SH_REG_MSW_OFFSET));
8524 else
8525 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8527 rtx mem;
8529 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8530 mem = change_address (regbuf, SFmode, fpregs);
8531 emit_move_insn (mem,
8532 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8535 /* Return the address of the regbuf. */
8536 return XEXP (regbuf, 0);
8539 /* Define the `__builtin_va_list' type for the ABI. */
8540 static tree
8541 sh_build_builtin_va_list (void)
8543 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8544 tree record, type_decl;
8546 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8547 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8548 return ptr_type_node;
8550 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8551 type_decl = build_decl (BUILTINS_LOCATION,
8552 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8554 f_next_o = build_decl (BUILTINS_LOCATION,
8555 FIELD_DECL, get_identifier ("__va_next_o"),
8556 ptr_type_node);
8557 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8558 FIELD_DECL,
8559 get_identifier ("__va_next_o_limit"),
8560 ptr_type_node);
8561 f_next_fp = build_decl (BUILTINS_LOCATION,
8562 FIELD_DECL, get_identifier ("__va_next_fp"),
8563 ptr_type_node);
8564 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8565 FIELD_DECL,
8566 get_identifier ("__va_next_fp_limit"),
8567 ptr_type_node);
8568 f_next_stack = build_decl (BUILTINS_LOCATION,
8569 FIELD_DECL, get_identifier ("__va_next_stack"),
8570 ptr_type_node);
8572 DECL_FIELD_CONTEXT (f_next_o) = record;
8573 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8574 DECL_FIELD_CONTEXT (f_next_fp) = record;
8575 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8576 DECL_FIELD_CONTEXT (f_next_stack) = record;
8578 TYPE_STUB_DECL (record) = type_decl;
8579 TYPE_NAME (record) = type_decl;
8580 TYPE_FIELDS (record) = f_next_o;
8581 DECL_CHAIN (f_next_o) = f_next_o_limit;
8582 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8583 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8584 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8586 layout_type (record);
8588 return record;
8591 /* Implement `va_start' for varargs and stdarg. */
8592 static void
8593 sh_va_start (tree valist, rtx nextarg)
8595 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8596 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8597 tree t, u;
8598 int nfp, nint;
8600 if (TARGET_SH5)
8602 expand_builtin_saveregs ();
8603 std_expand_builtin_va_start (valist, nextarg);
8604 return;
8607 if ((! TARGET_SH2E && ! TARGET_SH4)
8608 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8610 std_expand_builtin_va_start (valist, nextarg);
8611 return;
8614 f_next_o = TYPE_FIELDS (va_list_type_node);
8615 f_next_o_limit = DECL_CHAIN (f_next_o);
8616 f_next_fp = DECL_CHAIN (f_next_o_limit);
8617 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8618 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8620 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8621 NULL_TREE);
8622 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8623 valist, f_next_o_limit, NULL_TREE);
8624 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8625 NULL_TREE);
8626 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8627 valist, f_next_fp_limit, NULL_TREE);
8628 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8629 valist, f_next_stack, NULL_TREE);
8631 /* Call __builtin_saveregs. */
8632 u = make_tree (sizetype, expand_builtin_saveregs ());
8633 u = fold_convert (ptr_type_node, u);
8634 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8635 TREE_SIDE_EFFECTS (t) = 1;
8636 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8638 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8639 if (nfp < 8)
8640 nfp = 8 - nfp;
8641 else
8642 nfp = 0;
8643 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8644 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8645 TREE_SIDE_EFFECTS (t) = 1;
8646 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8648 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8649 TREE_SIDE_EFFECTS (t) = 1;
8650 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8652 nint = crtl->args.info.arg_count[SH_ARG_INT];
8653 if (nint < 4)
8654 nint = 4 - nint;
8655 else
8656 nint = 0;
8657 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8658 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8659 TREE_SIDE_EFFECTS (t) = 1;
8660 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8662 u = make_tree (ptr_type_node, nextarg);
8663 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8664 TREE_SIDE_EFFECTS (t) = 1;
8665 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8668 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8669 member, return it. */
8670 static tree
8671 find_sole_member (tree type)
8673 tree field, member = NULL_TREE;
8675 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8677 if (TREE_CODE (field) != FIELD_DECL)
8678 continue;
8679 if (!DECL_SIZE (field))
8680 return NULL_TREE;
8681 if (integer_zerop (DECL_SIZE (field)))
8682 continue;
8683 if (member)
8684 return NULL_TREE;
8685 member = field;
8687 return member;
8690 /* Implement `va_arg'. */
8691 static tree
8692 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8693 gimple_seq *post_p ATTRIBUTE_UNUSED)
8695 HOST_WIDE_INT size, rsize;
8696 tree tmp, pptr_type_node;
8697 tree addr, lab_over = NULL, result = NULL;
8698 bool pass_by_ref;
8699 tree eff_type;
8701 if (!VOID_TYPE_P (type))
8702 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8703 else
8704 pass_by_ref = false;
8706 if (pass_by_ref)
8707 type = build_pointer_type (type);
8709 size = int_size_in_bytes (type);
8710 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8711 pptr_type_node = build_pointer_type (ptr_type_node);
8713 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8714 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8716 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8717 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8718 int pass_as_float;
8719 tree lab_false;
8720 tree member;
8722 f_next_o = TYPE_FIELDS (va_list_type_node);
8723 f_next_o_limit = DECL_CHAIN (f_next_o);
8724 f_next_fp = DECL_CHAIN (f_next_o_limit);
8725 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8726 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8728 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8729 NULL_TREE);
8730 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8731 valist, f_next_o_limit, NULL_TREE);
8732 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8733 valist, f_next_fp, NULL_TREE);
8734 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8735 valist, f_next_fp_limit, NULL_TREE);
8736 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8737 valist, f_next_stack, NULL_TREE);
8739 /* Structures with a single member with a distinct mode are passed
8740 like their member. This is relevant if the latter has a REAL_TYPE
8741 or COMPLEX_TYPE type. */
8742 eff_type = type;
8743 while (TREE_CODE (eff_type) == RECORD_TYPE
8744 && (member = find_sole_member (eff_type))
8745 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8746 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8747 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8749 tree field_type = TREE_TYPE (member);
8751 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8752 eff_type = field_type;
8753 else
8755 gcc_assert ((TYPE_ALIGN (eff_type)
8756 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8757 || (TYPE_ALIGN (eff_type)
8758 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8759 break;
8763 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8765 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8766 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8767 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8768 && size <= 16));
8770 else
8772 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8775 addr = create_tmp_var (pptr_type_node);
8776 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8777 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8779 valist = build_simple_mem_ref (addr);
8781 if (pass_as_float)
8783 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
8784 tree cmp;
8785 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8787 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8788 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8790 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8791 tmp = next_fp_limit;
8792 if (size > 4 && !is_double)
8793 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8794 tmp = build2 (GE_EXPR, boolean_type_node,
8795 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8796 cmp = build3 (COND_EXPR, void_type_node, tmp,
8797 build1 (GOTO_EXPR, void_type_node,
8798 unshare_expr (lab_false)), NULL_TREE);
8799 if (!is_double)
8800 gimplify_and_add (cmp, pre_p);
8802 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8803 || (is_double || size == 16))
8805 tmp = fold_convert (sizetype, next_fp_tmp);
8806 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8807 size_int (UNITS_PER_WORD));
8808 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8809 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8811 if (is_double)
8812 gimplify_and_add (cmp, pre_p);
8814 #ifdef FUNCTION_ARG_SCmode_WART
8815 if (TYPE_MODE (eff_type) == SCmode
8816 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8818 tree subtype = TREE_TYPE (eff_type);
8819 tree real, imag;
8821 imag
8822 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8823 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8825 real
8826 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8827 real = get_initialized_tmp_var (real, pre_p, NULL);
8829 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8830 if (type != eff_type)
8831 result = build1 (VIEW_CONVERT_EXPR, type, result);
8832 result = get_initialized_tmp_var (result, pre_p, NULL);
8834 #endif /* FUNCTION_ARG_SCmode_WART */
8836 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8837 gimplify_and_add (tmp, pre_p);
8839 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8840 gimplify_and_add (tmp, pre_p);
8842 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8843 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8844 gimplify_assign (unshare_expr (next_fp_tmp),
8845 unshare_expr (valist), pre_p);
8847 gimplify_assign (unshare_expr (valist),
8848 unshare_expr (next_fp_tmp), post_p);
8849 valist = next_fp_tmp;
8851 else
8853 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8854 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8855 unshare_expr (next_o_limit));
8856 tmp = build3 (COND_EXPR, void_type_node, tmp,
8857 build1 (GOTO_EXPR, void_type_node,
8858 unshare_expr (lab_false)),
8859 NULL_TREE);
8860 gimplify_and_add (tmp, pre_p);
8862 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8863 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8865 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8866 gimplify_and_add (tmp, pre_p);
8868 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8869 gimplify_and_add (tmp, pre_p);
8871 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8872 gimplify_assign (unshare_expr (next_o),
8873 unshare_expr (next_o_limit), pre_p);
8875 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8876 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8879 if (!result)
8881 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8882 gimplify_and_add (tmp, pre_p);
8886 /* ??? In va-sh.h, there had been code to make values larger than
8887 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8889 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8890 if (result)
8892 gimplify_assign (result, tmp, pre_p);
8893 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8894 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8895 gimplify_and_add (tmp, pre_p);
8897 else
8898 result = tmp;
8900 if (pass_by_ref)
8901 result = build_va_arg_indirect_ref (result);
8903 return result;
8906 /* 64 bit floating points memory transfers are paired single precision loads
8907 or store. So DWARF information needs fixing in little endian (unless
8908 PR=SZ=1 in FPSCR). */
8910 sh_dwarf_register_span (rtx reg)
8912 unsigned regno = REGNO (reg);
8914 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8915 return NULL_RTX;
8917 return
8918 gen_rtx_PARALLEL (VOIDmode,
8919 gen_rtvec (2,
8920 gen_rtx_REG (SFmode, regno + 1),
8921 gen_rtx_REG (SFmode, regno)));
8924 static machine_mode
8925 sh_promote_function_mode (const_tree type, machine_mode mode,
8926 int *punsignedp, const_tree funtype,
8927 int for_return)
8929 if (sh_promote_prototypes (funtype))
8930 return promote_mode (type, mode, punsignedp);
8931 else
8932 return default_promote_function_mode (type, mode, punsignedp, funtype,
8933 for_return);
8936 static bool
8937 sh_promote_prototypes (const_tree type)
8939 if (TARGET_HITACHI)
8940 return false;
8941 if (! type)
8942 return true;
8943 return ! sh_attr_renesas_p (type);
8946 /* Whether an argument must be passed by reference. On SHcompact, we
8947 pretend arguments wider than 32-bits that would have been passed in
8948 registers are passed by reference, so that an SHmedia trampoline
8949 loads them into the full 64-bits registers. */
8950 static int
8951 shcompact_byref (const CUMULATIVE_ARGS *cum, machine_mode mode,
8952 const_tree type, bool named)
8954 unsigned HOST_WIDE_INT size;
8956 if (type)
8957 size = int_size_in_bytes (type);
8958 else
8959 size = GET_MODE_SIZE (mode);
8961 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8962 && (!named
8963 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8964 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8965 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8966 && size > 4
8967 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8968 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8969 return size;
8970 else
8971 return 0;
8974 static bool
8975 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8976 const_tree type, bool named)
8978 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8980 if (targetm.calls.must_pass_in_stack (mode, type))
8981 return true;
8983 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8984 wants to know about pass-by-reference semantics for incoming
8985 arguments. */
8986 if (! cum)
8987 return false;
8989 if (TARGET_SHCOMPACT)
8991 cum->byref = shcompact_byref (cum, mode, type, named);
8992 return cum->byref != 0;
8995 return false;
8998 static bool
8999 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
9000 const_tree type, bool named ATTRIBUTE_UNUSED)
9002 /* ??? How can it possibly be correct to return true only on the
9003 caller side of the equation? Is there someplace else in the
9004 sh backend that's magically producing the copies? */
9005 return (get_cumulative_args (cum)->outgoing
9006 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
9007 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
9010 /* Round a register number up to a proper boundary for an arg of mode
9011 MODE.
9012 The SH doesn't care about double alignment, so we only
9013 round doubles to even regs when asked to explicitly. */
9014 static int
9015 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
9017 /* FIXME: This used to be a macro and has been copy pasted into this
9018 function as is. Make this more readable. */
9019 return
9020 (((TARGET_ALIGN_DOUBLE
9021 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9022 && (mode == DFmode || mode == DCmode)
9023 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
9024 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
9025 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
9026 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
9027 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
9030 /* Return true if arg of the specified mode should be be passed in a register
9031 or false otherwise. */
9032 static bool
9033 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
9034 const_tree type)
9036 /* FIXME: This used to be a macro and has been copy pasted into this
9037 function as is. Make this more readable. */
9038 return
9039 ((type == 0
9040 || (! TREE_ADDRESSABLE (type)
9041 && (! (TARGET_HITACHI || cum.renesas_abi)
9042 || ! (AGGREGATE_TYPE_P (type)
9043 || (!TARGET_FPU_ANY
9044 && (GET_MODE_CLASS (mode) == MODE_FLOAT
9045 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
9046 && ! cum.force_mem
9047 && (TARGET_SH2E
9048 ? ((mode) == BLKmode
9049 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
9050 + int_size_in_bytes (type))
9051 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
9052 : ((sh_round_reg (cum, mode)
9053 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
9054 <= NPARM_REGS (mode)))
9055 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
9058 static int
9059 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9060 tree type, bool named ATTRIBUTE_UNUSED)
9062 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9063 int words = 0;
9065 if (!TARGET_SH5
9066 && sh_pass_in_reg_p (*cum, mode, type)
9067 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
9068 && (sh_round_reg (*cum, mode)
9069 + (mode != BLKmode
9070 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
9071 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
9072 > NPARM_REGS (mode)))
9073 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
9075 else if (!TARGET_SHCOMPACT
9076 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
9077 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
9079 return words * UNITS_PER_WORD;
9083 /* Define where to put the arguments to a function.
9084 Value is zero to push the argument on the stack,
9085 or a hard register in which to store the argument.
9087 MODE is the argument's machine mode.
9088 TYPE is the data type of the argument (as a tree).
9089 This is null for libcalls where that information may
9090 not be available.
9091 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9092 the preceding args and about the function being called.
9093 NAMED is nonzero if this argument is a named parameter
9094 (otherwise it is an extra parameter matching an ellipsis).
9096 On SH the first args are normally in registers
9097 and the rest are pushed. Any arg that starts within the first
9098 NPARM_REGS words is at least partially passed in a register unless
9099 its data type forbids. */
9100 static rtx
9101 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
9102 const_tree type, bool named)
9104 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9106 if (! TARGET_SH5 && mode == VOIDmode)
9107 return GEN_INT (ca->renesas_abi ? 1 : 0);
9109 if (! TARGET_SH5
9110 && sh_pass_in_reg_p (*ca, mode, type)
9111 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
9113 int regno;
9115 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
9116 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
9118 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
9119 gen_rtx_REG (SFmode,
9120 BASE_ARG_REG (mode)
9121 + (sh_round_reg (*ca, mode) ^ 1)),
9122 const0_rtx);
9123 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
9124 gen_rtx_REG (SFmode,
9125 BASE_ARG_REG (mode)
9126 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
9127 GEN_INT (4));
9128 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
9131 /* If the alignment of a DF value causes an SF register to be
9132 skipped, we will use that skipped register for the next SF
9133 value. */
9134 if ((TARGET_HITACHI || ca->renesas_abi)
9135 && ca->free_single_fp_reg
9136 && mode == SFmode)
9137 return gen_rtx_REG (mode, ca->free_single_fp_reg);
9139 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
9140 ^ (mode == SFmode && TARGET_SH4
9141 && TARGET_LITTLE_ENDIAN
9142 && ! TARGET_HITACHI && ! ca->renesas_abi);
9143 return gen_rtx_REG (mode, regno);
9147 if (TARGET_SH5)
9149 if (mode == VOIDmode && TARGET_SHCOMPACT)
9150 return GEN_INT (ca->call_cookie);
9152 /* The following test assumes unnamed arguments are promoted to
9153 DFmode. */
9154 if (mode == SFmode && ca->free_single_fp_reg)
9155 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
9157 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9158 && (named || ! ca->prototype_p)
9159 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9161 if (! ca->prototype_p && TARGET_SHMEDIA)
9162 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9164 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9165 FIRST_FP_PARM_REG
9166 + ca->arg_count[(int) SH_ARG_FLOAT]);
9169 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9170 && (! TARGET_SHCOMPACT
9171 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9172 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9173 type, named))))
9175 return gen_rtx_REG (mode, (FIRST_PARM_REG
9176 + ca->arg_count[(int) SH_ARG_INT]));
9179 return NULL_RTX;
9182 return NULL_RTX;
9185 /* Update the data in CUM to advance over an argument
9186 of mode MODE and data type TYPE.
9187 (TYPE is null for libcalls where that information may not be
9188 available.) */
9189 static void
9190 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
9191 const_tree type, bool named)
9193 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9195 if (ca->force_mem)
9196 ca->force_mem = 0;
9197 else if (TARGET_SH5)
9199 const_tree type2 = (ca->byref && type
9200 ? TREE_TYPE (type)
9201 : type);
9202 machine_mode mode2 = (ca->byref && type
9203 ? TYPE_MODE (type2)
9204 : mode);
9205 int dwords = ((ca->byref
9206 ? ca->byref
9207 : mode2 == BLKmode
9208 ? int_size_in_bytes (type2)
9209 : GET_MODE_SIZE (mode2)) + 7) / 8;
9210 int numregs = MIN (dwords, NPARM_REGS (SImode)
9211 - ca->arg_count[(int) SH_ARG_INT]);
9213 if (numregs)
9215 ca->arg_count[(int) SH_ARG_INT] += numregs;
9216 if (TARGET_SHCOMPACT
9217 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9219 ca->call_cookie
9220 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9221 - numregs, 1);
9222 /* N.B. We want this also for outgoing. */
9223 ca->stack_regs += numregs;
9225 else if (ca->byref)
9227 if (! ca->outgoing)
9228 ca->stack_regs += numregs;
9229 ca->byref_regs += numregs;
9230 ca->byref = 0;
9232 ca->call_cookie
9233 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9234 - numregs, 2);
9235 while (--numregs);
9236 ca->call_cookie
9237 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9238 - 1, 1);
9240 else if (dwords > numregs)
9242 int pushregs = numregs;
9244 if (TARGET_SHCOMPACT)
9245 ca->stack_regs += numregs;
9246 while (pushregs < NPARM_REGS (SImode) - 1
9247 && (CALL_COOKIE_INT_REG_GET
9248 (ca->call_cookie,
9249 NPARM_REGS (SImode) - pushregs)
9250 == 1))
9252 ca->call_cookie
9253 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9254 - pushregs, 1);
9255 pushregs++;
9257 if (numregs == NPARM_REGS (SImode))
9258 ca->call_cookie
9259 |= CALL_COOKIE_INT_REG (0, 1)
9260 | CALL_COOKIE_STACKSEQ (numregs - 1);
9261 else
9262 ca->call_cookie
9263 |= CALL_COOKIE_STACKSEQ (numregs);
9266 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9267 && (named || ! ca->prototype_p))
9269 if (mode2 == SFmode && ca->free_single_fp_reg)
9270 ca->free_single_fp_reg = 0;
9271 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9272 < NPARM_REGS (SFmode))
9274 int numfpregs
9275 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9276 NPARM_REGS (SFmode)
9277 - ca->arg_count[(int) SH_ARG_FLOAT]);
9279 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9281 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9283 if (ca->outgoing && numregs > 0)
9286 ca->call_cookie
9287 |= (CALL_COOKIE_INT_REG
9288 (ca->arg_count[(int) SH_ARG_INT]
9289 - numregs + ((numfpregs - 2) / 2),
9290 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9291 - numfpregs) / 2));
9293 while (numfpregs -= 2);
9295 else if (mode2 == SFmode && (named)
9296 && (ca->arg_count[(int) SH_ARG_FLOAT]
9297 < NPARM_REGS (SFmode)))
9298 ca->free_single_fp_reg
9299 = FIRST_FP_PARM_REG - numfpregs
9300 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9303 return;
9306 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9308 /* Note that we've used the skipped register. */
9309 if (mode == SFmode && ca->free_single_fp_reg)
9311 ca->free_single_fp_reg = 0;
9312 return;
9314 /* When we have a DF after an SF, there's an SF register that get
9315 skipped in order to align the DF value. We note this skipped
9316 register, because the next SF value will use it, and not the
9317 SF that follows the DF. */
9318 if (mode == DFmode
9319 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9321 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9322 + BASE_ARG_REG (mode));
9326 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9327 || sh_pass_in_reg_p (*ca, mode, type))
9328 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9329 = (sh_round_reg (*ca, mode)
9330 + (mode == BLKmode
9331 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9332 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9335 /* The Renesas calling convention doesn't quite fit into this scheme since
9336 the address is passed like an invisible argument, but one that is always
9337 passed in memory. */
9338 static rtx
9339 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9341 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9342 return NULL_RTX;
9343 return gen_rtx_REG (Pmode, 2);
9346 /* Worker function for TARGET_FUNCTION_VALUE.
9348 For the SH, this is like LIBCALL_VALUE, except that we must change the
9349 mode like PROMOTE_MODE does.
9350 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9351 tested here has to be kept in sync with the one in
9352 explow.c:promote_mode. */
9353 static rtx
9354 sh_function_value (const_tree valtype,
9355 const_tree fn_decl_or_type,
9356 bool outgoing ATTRIBUTE_UNUSED)
9358 if (fn_decl_or_type
9359 && !DECL_P (fn_decl_or_type))
9360 fn_decl_or_type = NULL;
9362 return gen_rtx_REG (
9363 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9364 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9365 && (TREE_CODE (valtype) == INTEGER_TYPE
9366 || TREE_CODE (valtype) == ENUMERAL_TYPE
9367 || TREE_CODE (valtype) == BOOLEAN_TYPE
9368 || TREE_CODE (valtype) == REAL_TYPE
9369 || TREE_CODE (valtype) == OFFSET_TYPE))
9370 && sh_promote_prototypes (fn_decl_or_type)
9371 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9372 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9375 /* Worker function for TARGET_LIBCALL_VALUE. */
9376 static rtx
9377 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9379 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9382 /* Return true if N is a possible register number of function value. */
9383 static bool
9384 sh_function_value_regno_p (const unsigned int regno)
9386 return ((regno) == FIRST_RET_REG
9387 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9388 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9391 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9392 static bool
9393 sh_return_in_memory (const_tree type, const_tree fndecl)
9395 if (TARGET_SH5)
9397 if (TYPE_MODE (type) == BLKmode)
9398 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9399 else
9400 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9402 else
9404 return (TYPE_MODE (type) == BLKmode
9405 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9406 && TREE_CODE (type) == RECORD_TYPE));
9410 /* We actually emit the code in sh_expand_prologue. We used to use
9411 a static variable to flag that we need to emit this code, but that
9412 doesn't when inlining, when functions are deferred and then emitted
9413 later. Fortunately, we already have two flags that are part of struct
9414 function that tell if a function uses varargs or stdarg. */
9415 static void
9416 sh_setup_incoming_varargs (cumulative_args_t ca,
9417 machine_mode mode,
9418 tree type,
9419 int *pretend_arg_size,
9420 int second_time ATTRIBUTE_UNUSED)
9422 gcc_assert (cfun->stdarg);
9423 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9425 int named_parm_regs, anon_parm_regs;
9427 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9428 + (mode == BLKmode
9429 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9430 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9431 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9432 if (anon_parm_regs > 0)
9433 *pretend_arg_size = anon_parm_regs * 4;
9437 static bool
9438 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9440 return TARGET_SH5;
9443 static bool
9444 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9446 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9448 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9452 /* Define the offset between two registers, one to be eliminated, and
9453 the other its replacement, at the start of a routine. */
9455 initial_elimination_offset (int from, int to)
9457 int regs_saved;
9458 int regs_saved_rounding = 0;
9459 int total_saved_regs_space;
9460 int total_auto_space;
9461 int save_flags = target_flags;
9462 int copy_flags;
9463 HARD_REG_SET live_regs_mask;
9465 shmedia_space_reserved_for_target_registers = false;
9466 regs_saved = calc_live_regs (&live_regs_mask);
9467 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9469 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9471 shmedia_space_reserved_for_target_registers = true;
9472 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9475 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9476 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9477 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9479 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9480 copy_flags = target_flags;
9481 target_flags = save_flags;
9483 total_saved_regs_space = regs_saved + regs_saved_rounding;
9485 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9486 return total_saved_regs_space + total_auto_space
9487 + crtl->args.info.byref_regs * 8;
9489 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9490 return total_saved_regs_space + total_auto_space
9491 + crtl->args.info.byref_regs * 8;
9493 /* Initial gap between fp and sp is 0. */
9494 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9495 return 0;
9497 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9498 return rounded_frame_size (0);
9500 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9501 return rounded_frame_size (0);
9503 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9504 && (to == HARD_FRAME_POINTER_REGNUM
9505 || to == STACK_POINTER_REGNUM));
9506 if (TARGET_SH5)
9508 int n = total_saved_regs_space;
9509 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9510 save_schedule schedule;
9511 save_entry *entry;
9513 n += total_auto_space;
9515 /* If it wasn't saved, there's not much we can do. */
9516 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9517 return n;
9519 target_flags = copy_flags;
9521 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9522 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9523 if (entry->reg == pr_reg)
9525 target_flags = save_flags;
9526 return entry->offset;
9528 gcc_unreachable ();
9530 else
9531 return total_auto_space;
9534 /* Parse the -mfixed-range= option string. */
9535 void
9536 sh_fix_range (const char *const_str)
9538 int i, first, last;
9539 char *str, *dash, *comma;
9541 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9542 REG2 are either register names or register numbers. The effect
9543 of this option is to mark the registers in the range from REG1 to
9544 REG2 as ``fixed'' so they won't be used by the compiler. */
9546 i = strlen (const_str);
9547 str = (char *) alloca (i + 1);
9548 memcpy (str, const_str, i + 1);
9550 while (1)
9552 dash = strchr (str, '-');
9553 if (!dash)
9555 warning (0, "value of -mfixed-range must have form REG1-REG2");
9556 return;
9558 *dash = '\0';
9559 comma = strchr (dash + 1, ',');
9560 if (comma)
9561 *comma = '\0';
9563 first = decode_reg_name (str);
9564 if (first < 0)
9566 warning (0, "unknown register name: %s", str);
9567 return;
9570 last = decode_reg_name (dash + 1);
9571 if (last < 0)
9573 warning (0, "unknown register name: %s", dash + 1);
9574 return;
9577 *dash = '-';
9579 if (first > last)
9581 warning (0, "%s-%s is an empty range", str, dash + 1);
9582 return;
9585 for (i = first; i <= last; ++i)
9586 fixed_regs[i] = call_used_regs[i] = 1;
9588 if (!comma)
9589 break;
9591 *comma = ',';
9592 str = comma + 1;
9596 /* Insert any deferred function attributes from earlier pragmas. */
9597 static void
9598 sh_insert_attributes (tree node, tree *attributes)
9600 tree attrs;
9602 if (TREE_CODE (node) != FUNCTION_DECL)
9603 return;
9605 /* We are only interested in fields. */
9606 if (!DECL_P (node))
9607 return;
9609 /* Append the attributes to the deferred attributes. */
9610 *sh_deferred_function_attributes_tail = *attributes;
9611 attrs = sh_deferred_function_attributes;
9612 if (!attrs)
9613 return;
9615 /* Some attributes imply or require the interrupt attribute. */
9616 if (!lookup_attribute ("interrupt_handler", attrs)
9617 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9619 /* If we have a trapa_handler, but no interrupt_handler attribute,
9620 insert an interrupt_handler attribute. */
9621 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9622 /* We can't use sh_pr_interrupt here because that's not in the
9623 java frontend. */
9624 attrs
9625 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9626 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9627 if the interrupt attribute is missing, we ignore the attribute
9628 and warn. */
9629 else if (lookup_attribute ("sp_switch", attrs)
9630 || lookup_attribute ("trap_exit", attrs)
9631 || lookup_attribute ("nosave_low_regs", attrs)
9632 || lookup_attribute ("resbank", attrs))
9634 tree *tail;
9636 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9638 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9639 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9640 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9641 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9642 warning (OPT_Wattributes,
9643 "%qE attribute only applies to interrupt functions",
9644 TREE_PURPOSE (attrs));
9645 else
9647 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9648 NULL_TREE);
9649 tail = &TREE_CHAIN (*tail);
9652 attrs = *attributes;
9656 /* Install the processed list. */
9657 *attributes = attrs;
9659 /* Clear deferred attributes. */
9660 sh_deferred_function_attributes = NULL_TREE;
9661 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9663 return;
9666 /*------------------------------------------------------------------------------
9667 Target specific attributes
9668 Supported attributes are:
9670 * interrupt_handler
9671 Specifies this function is an interrupt handler.
9673 * trapa_handler
9674 Like interrupt_handler, but don't save all registers.
9676 * sp_switch
9677 Specifies an alternate stack for an interrupt handler to run on.
9679 * trap_exit
9680 Use a trapa to exit an interrupt function instead of rte.
9682 * nosave_low_regs
9683 Don't save r0..r7 in an interrupt handler function.
9684 This is useful on SH3* and SH4*, which have a separate set of low
9685 regs for user and privileged modes.
9686 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9687 those that run with interrupts disabled and thus can't be
9688 interrupted thenselves).
9690 * renesas
9691 Use Renesas calling/layout conventions (functions and structures).
9693 * resbank
9694 In case of an interrupt handler function, use a register bank to
9695 save registers R0-R14, MACH, MACL, GBR and PR.
9696 This is available only on SH2A targets.
9698 * function_vector
9699 Declares a function to be called using the TBR relative addressing
9700 mode. Takes an argument that specifies the slot number in the table
9701 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9704 /* Handle a 'resbank' attribute. */
9705 static tree
9706 sh_handle_resbank_handler_attribute (tree * node, tree name,
9707 tree args ATTRIBUTE_UNUSED,
9708 int flags ATTRIBUTE_UNUSED,
9709 bool * no_add_attrs)
9711 if (!TARGET_SH2A)
9713 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9714 name);
9715 *no_add_attrs = true;
9717 if (TREE_CODE (*node) != FUNCTION_DECL)
9719 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9720 name);
9721 *no_add_attrs = true;
9724 return NULL_TREE;
9727 /* Handle an "interrupt_handler" attribute; arguments as in
9728 struct attribute_spec.handler. */
9729 static tree
9730 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9731 tree args ATTRIBUTE_UNUSED,
9732 int flags ATTRIBUTE_UNUSED,
9733 bool *no_add_attrs)
9735 if (TREE_CODE (*node) != FUNCTION_DECL)
9737 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9738 name);
9739 *no_add_attrs = true;
9741 else if (TARGET_SHCOMPACT)
9743 error ("attribute interrupt_handler is not compatible with -m5-compact");
9744 *no_add_attrs = true;
9747 return NULL_TREE;
9750 /* Handle an 'function_vector' attribute; arguments as in
9751 struct attribute_spec.handler. */
9752 static tree
9753 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9754 tree args ATTRIBUTE_UNUSED,
9755 int flags ATTRIBUTE_UNUSED,
9756 bool * no_add_attrs)
9758 if (!TARGET_SH2A)
9760 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9761 name);
9762 *no_add_attrs = true;
9764 else if (TREE_CODE (*node) != FUNCTION_DECL)
9766 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9767 name);
9768 *no_add_attrs = true;
9770 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9772 /* The argument must be a constant integer. */
9773 warning (OPT_Wattributes,
9774 "%qE attribute argument not an integer constant",
9775 name);
9776 *no_add_attrs = true;
9778 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9780 /* The argument value must be between 0 to 255. */
9781 warning (OPT_Wattributes,
9782 "%qE attribute argument should be between 0 to 255",
9783 name);
9784 *no_add_attrs = true;
9786 return NULL_TREE;
9789 /* Returns true if current function has been assigned the attribute
9790 'function_vector'. */
9791 bool
9792 sh2a_is_function_vector_call (rtx x)
9794 if (GET_CODE (x) == SYMBOL_REF
9795 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9797 tree tr = SYMBOL_REF_DECL (x);
9799 if (sh2a_function_vector_p (tr))
9800 return true;
9803 return false;
9806 /* Returns the function vector number, if the attribute
9807 'function_vector' is assigned, otherwise returns zero. */
9809 sh2a_get_function_vector_number (rtx x)
9811 int num;
9812 tree list, t;
9814 if ((GET_CODE (x) == SYMBOL_REF)
9815 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9817 t = SYMBOL_REF_DECL (x);
9819 if (TREE_CODE (t) != FUNCTION_DECL)
9820 return 0;
9822 list = SH_ATTRIBUTES (t);
9823 while (list)
9825 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9827 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9828 return num;
9831 list = TREE_CHAIN (list);
9834 return 0;
9836 else
9837 return 0;
9840 /* Handle an "sp_switch" attribute; arguments as in
9841 struct attribute_spec.handler. */
9842 static tree
9843 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9844 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9846 if (TREE_CODE (*node) != FUNCTION_DECL)
9848 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9849 name);
9850 *no_add_attrs = true;
9852 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9854 /* The argument must be a constant string. */
9855 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9856 name);
9857 *no_add_attrs = true;
9860 return NULL_TREE;
9863 /* Handle an "trap_exit" attribute; arguments as in
9864 struct attribute_spec.handler. */
9865 static tree
9866 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9867 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9869 if (TREE_CODE (*node) != FUNCTION_DECL)
9871 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9872 name);
9873 *no_add_attrs = true;
9875 /* The argument specifies a trap number to be used in a trapa instruction
9876 at function exit (instead of an rte instruction). */
9877 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9879 /* The argument must be a constant integer. */
9880 warning (OPT_Wattributes, "%qE attribute argument not an "
9881 "integer constant", name);
9882 *no_add_attrs = true;
9885 return NULL_TREE;
9888 static tree
9889 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9890 tree name ATTRIBUTE_UNUSED,
9891 tree args ATTRIBUTE_UNUSED,
9892 int flags ATTRIBUTE_UNUSED,
9893 bool *no_add_attrs ATTRIBUTE_UNUSED)
9895 return NULL_TREE;
9898 /* True if __attribute__((renesas)) or -mrenesas. */
9899 bool
9900 sh_attr_renesas_p (const_tree td)
9902 if (TARGET_HITACHI)
9903 return true;
9904 if (td == NULL_TREE)
9905 return false;
9906 if (DECL_P (td))
9907 td = TREE_TYPE (td);
9908 if (td == error_mark_node)
9909 return false;
9910 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9911 != NULL_TREE);
9914 /* True if __attribute__((renesas)) or -mrenesas, for the current
9915 function. */
9916 bool
9917 sh_cfun_attr_renesas_p (void)
9919 return sh_attr_renesas_p (current_function_decl);
9922 /* Returns true if the current function has the "interrupt_handler"
9923 attribute set. */
9924 bool
9925 sh_cfun_interrupt_handler_p (void)
9927 return (lookup_attribute ("interrupt_handler",
9928 DECL_ATTRIBUTES (current_function_decl))
9929 != NULL_TREE);
9932 /* Returns true if FUNC has been assigned the attribute
9933 "function_vector". */
9934 bool
9935 sh2a_function_vector_p (tree func)
9937 tree list;
9938 if (TREE_CODE (func) != FUNCTION_DECL)
9939 return false;
9941 list = SH_ATTRIBUTES (func);
9942 while (list)
9944 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9945 return true;
9947 list = TREE_CHAIN (list);
9949 return false;
9952 /* Returns true if given tree has the "resbank" attribute set. */
9953 bool
9954 sh_cfun_resbank_handler_p (void)
9956 return ((lookup_attribute ("resbank",
9957 DECL_ATTRIBUTES (current_function_decl))
9958 != NULL_TREE)
9959 && (lookup_attribute ("interrupt_handler",
9960 DECL_ATTRIBUTES (current_function_decl))
9961 != NULL_TREE) && TARGET_SH2A);
9964 /* Returns true if the current function has a "trap_exit" attribute set. */
9965 bool
9966 sh_cfun_trap_exit_p (void)
9968 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9969 != NULL_TREE;
9972 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9973 static const char *
9974 sh_check_pch_target_flags (int old_flags)
9976 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9977 | MASK_SH_E | MASK_HARD_SH4
9978 | MASK_FPU_SINGLE | MASK_SH4))
9979 return _("created and used with different architectures / ABIs");
9980 if ((old_flags ^ target_flags) & MASK_HITACHI)
9981 return _("created and used with different ABIs");
9982 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9983 return _("created and used with different endianness");
9984 return NULL;
9987 /* Predicates used by the templates. */
9989 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9990 Used only in general_movsrc_operand. */
9991 bool
9992 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9994 switch (REGNO (op))
9996 case PR_REG:
9997 case MACL_REG:
9998 case MACH_REG:
9999 return true;
10001 return false;
10004 /* Returns true if OP is a floating point value with value 0.0. */
10005 bool
10006 fp_zero_operand (rtx op)
10008 REAL_VALUE_TYPE r;
10010 if (GET_MODE (op) != SFmode)
10011 return false;
10013 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10014 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
10017 /* Returns true if OP is a floating point value with value 1.0. */
10018 bool
10019 fp_one_operand (rtx op)
10021 REAL_VALUE_TYPE r;
10023 if (GET_MODE (op) != SFmode)
10024 return false;
10026 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10027 return REAL_VALUES_EQUAL (r, dconst1);
10030 /* Return the TLS type for TLS symbols. */
10031 enum tls_model
10032 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
10034 if (GET_CODE (op) != SYMBOL_REF)
10035 return TLS_MODEL_NONE;
10036 return SYMBOL_REF_TLS_MODEL (op);
10039 /* Return the destination address of a branch. */
10040 static int
10041 branch_dest (rtx branch)
10043 rtx dest = SET_SRC (PATTERN (branch));
10044 int dest_uid;
10046 if (GET_CODE (dest) == IF_THEN_ELSE)
10047 dest = XEXP (dest, 1);
10048 dest = XEXP (dest, 0);
10049 dest_uid = INSN_UID (dest);
10050 return INSN_ADDRESSES (dest_uid);
10053 /* Return nonzero if REG is not used after INSN.
10054 We assume REG is a reload reg, and therefore does
10055 not live past labels. It may live past calls or jumps though. */
10056 bool
10057 reg_unused_after (rtx reg, rtx_insn *insn)
10059 enum rtx_code code;
10060 rtx set;
10062 /* If the reg is set by this instruction, then it is safe for our
10063 case. Disregard the case where this is a store to memory, since
10064 we are checking a register used in the store address. */
10065 set = single_set (insn);
10066 if (set && !MEM_P (SET_DEST (set))
10067 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10068 return true;
10070 while ((insn = NEXT_INSN (insn)))
10072 rtx set;
10073 if (!INSN_P (insn))
10074 continue;
10076 code = GET_CODE (insn);
10078 #if 0
10079 /* If this is a label that existed before reload, then the register
10080 is dead here. However, if this is a label added by reorg, then
10081 the register may still be live here. We can't tell the difference,
10082 so we just ignore labels completely. */
10083 if (code == CODE_LABEL)
10084 return 1;
10085 /* else */
10086 #endif
10088 if (code == JUMP_INSN)
10089 return false;
10091 /* If this is a sequence, we must handle them all at once.
10092 We could have for instance a call that sets the target register,
10093 and an insn in a delay slot that uses the register. In this case,
10094 we must return 0. */
10095 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
10097 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
10098 int i;
10099 int retval = 0;
10101 for (i = 0; i < seq->len (); i++)
10103 rtx_insn *this_insn = seq->insn (i);
10104 rtx set = single_set (this_insn);
10106 if (CALL_P (this_insn))
10107 code = CALL_INSN;
10108 else if (JUMP_P (this_insn))
10110 if (INSN_ANNULLED_BRANCH_P (this_insn))
10111 return false;
10112 code = JUMP_INSN;
10115 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10116 return false;
10117 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10119 if (!MEM_P (SET_DEST (set)))
10120 retval = true;
10121 else
10122 return false;
10124 if (set == NULL_RTX
10125 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
10126 return false;
10128 if (retval == 1)
10129 return true;
10130 else if (code == JUMP_INSN)
10131 return false;
10134 set = single_set (insn);
10135 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10136 return false;
10137 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10138 return !MEM_P (SET_DEST (set));
10139 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
10140 return false;
10142 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10143 return true;
10145 return true;
10149 static GTY(()) rtx t_reg_rtx;
10151 get_t_reg_rtx (void)
10153 if (! t_reg_rtx)
10154 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10155 return t_reg_rtx;
10158 static GTY(()) tree fpscr_values;
10160 static void
10161 emit_fpu_switch (rtx scratch, int index)
10163 rtx src;
10165 if (fpscr_values == NULL)
10167 tree t;
10169 t = build_index_type (integer_one_node);
10170 t = build_array_type (integer_type_node, t);
10171 t = build_decl (BUILTINS_LOCATION,
10172 VAR_DECL, get_identifier ("__fpscr_values"), t);
10173 DECL_ARTIFICIAL (t) = 1;
10174 DECL_IGNORED_P (t) = 1;
10175 DECL_EXTERNAL (t) = 1;
10176 TREE_STATIC (t) = 1;
10177 TREE_PUBLIC (t) = 1;
10178 TREE_USED (t) = 1;
10180 fpscr_values = t;
10183 src = DECL_RTL (fpscr_values);
10184 if (!can_create_pseudo_p ())
10186 emit_move_insn (scratch, XEXP (src, 0));
10187 if (index != 0)
10188 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10189 src = adjust_automodify_address (src, SImode, scratch, index * 4);
10191 else
10192 src = adjust_address (src, SImode, index * 4);
10194 emit_insn (gen_lds_fpscr (src));
10197 static rtx get_free_reg (HARD_REG_SET);
10199 /* This function returns a register to use to load the address to load
10200 the fpscr from. Currently it always returns r1 or r7, but when we are
10201 able to use pseudo registers after combine, or have a better mechanism
10202 for choosing a register, it should be done here. */
10203 /* REGS_LIVE is the liveness information for the point for which we
10204 need this allocation. In some bare-bones exit blocks, r1 is live at the
10205 start. We can even have all of r0..r3 being live:
10206 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10207 INSN before which new insns are placed with will clobber the register
10208 we return. If a basic block consists only of setting the return value
10209 register to a pseudo and using that register, the return value is not
10210 live before or after this block, yet we we'll insert our insns right in
10211 the middle. */
10212 static rtx
10213 get_free_reg (HARD_REG_SET regs_live)
10215 if (! TEST_HARD_REG_BIT (regs_live, 1))
10216 return gen_rtx_REG (Pmode, 1);
10218 /* Hard reg 1 is live; since this is a small register classes target,
10219 there shouldn't be anything but a jump before the function end. */
10220 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10221 return gen_rtx_REG (Pmode, 7);
10224 /* This function will set the fpscr from memory.
10225 MODE is the mode we are setting it to. */
10226 void
10227 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10229 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10230 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10231 rtx addr_reg;
10233 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10234 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10237 /* Is the given character a logical line separator for the assembler? */
10238 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10239 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10240 #endif
10242 static bool
10243 sequence_insn_p (rtx_insn *insn)
10245 rtx_insn *prev, *next;
10247 prev = PREV_INSN (insn);
10248 if (prev == NULL)
10249 return false;
10251 next = NEXT_INSN (prev);
10252 if (next == NULL)
10253 return false;
10255 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10259 sh_insn_length_adjustment (rtx_insn *insn)
10261 /* Instructions with unfilled delay slots take up an extra two bytes for
10262 the nop in the delay slot. */
10263 if (((NONJUMP_INSN_P (insn)
10264 && GET_CODE (PATTERN (insn)) != USE
10265 && GET_CODE (PATTERN (insn)) != CLOBBER)
10266 || CALL_P (insn) || JUMP_P (insn))
10267 && ! sequence_insn_p (insn)
10268 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10269 return 2;
10271 /* Increase the insn length of a cbranch without a delay slot insn to
10272 force a delay slot which will be stuffed with a nop. */
10273 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
10274 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
10275 && ! sequence_insn_p (insn))
10276 return 2;
10278 /* sh-dsp parallel processing insn take four bytes instead of two. */
10280 if (NONJUMP_INSN_P (insn))
10282 int sum = 0;
10283 rtx body = PATTERN (insn);
10284 const char *templ;
10285 char c;
10286 bool maybe_label = true;
10288 if (GET_CODE (body) == ASM_INPUT)
10289 templ = XSTR (body, 0);
10290 else if (asm_noperands (body) >= 0)
10291 templ
10292 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10293 else
10294 return 0;
10297 int ppi_adjust = 0;
10300 c = *templ++;
10301 while (c == ' ' || c == '\t');
10302 /* all sh-dsp parallel-processing insns start with p.
10303 The only non-ppi sh insn starting with p is pref.
10304 The only ppi starting with pr is prnd. */
10305 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10306 ppi_adjust = 2;
10307 /* The repeat pseudo-insn expands two three insns, a total of
10308 six bytes in size. */
10309 else if ((c == 'r' || c == 'R')
10310 && ! strncasecmp ("epeat", templ, 5))
10311 ppi_adjust = 4;
10312 while (c && c != '\n'
10313 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10315 /* If this is a label, it is obviously not a ppi insn. */
10316 if (c == ':' && maybe_label)
10318 ppi_adjust = 0;
10319 break;
10321 else if (c == '\'' || c == '"')
10322 maybe_label = false;
10323 c = *templ++;
10325 sum += ppi_adjust;
10326 maybe_label = c != ':';
10328 while (c);
10329 return sum;
10331 return 0;
10334 /* Return TRUE for a valid displacement for the REG+disp addressing
10335 with MODE. */
10336 bool
10337 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
10338 bool allow_zero)
10340 if (! CONST_INT_P (op))
10341 return false;
10343 if (TARGET_SHMEDIA)
10345 int size;
10347 /* Check if this is the address of an unaligned load / store. */
10348 if (mode == VOIDmode)
10349 return satisfies_constraint_I06 (op);
10351 size = GET_MODE_SIZE (mode);
10352 return (!(INTVAL (op) & (size - 1))
10353 && INTVAL (op) >= -512 * size
10354 && INTVAL (op) < 512 * size);
10356 else
10358 const HOST_WIDE_INT offset = INTVAL (op);
10359 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10360 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10362 /* If the mode does not support any displacement always return false.
10363 Even though an index of '0' is actually always valid, it will cause
10364 troubles when e.g. a DFmode move is split into two SFmode moves,
10365 where one SFmode move will have index '0' and the other move will
10366 have index '4'. */
10367 if (!allow_zero && max_disp < 1)
10368 return false;
10370 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10374 /* Recognize an RTL expression that is a valid memory address for
10375 an instruction.
10376 The MODE argument is the machine mode for the MEM expression
10377 that wants to use this address.
10378 Allow REG
10379 REG+disp
10380 REG+r0
10381 REG++
10382 --REG
10384 GBR+disp */
10385 static bool
10386 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10388 if (! ALLOW_INDEXED_ADDRESS
10389 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10390 return false;
10392 if (REG_P (x) && REGNO (x) == GBR_REG)
10393 return true;
10395 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10396 return true;
10397 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10398 && ! TARGET_SHMEDIA
10399 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10400 return true;
10401 else if (GET_CODE (x) == PLUS)
10403 rtx xop0 = XEXP (x, 0);
10404 rtx xop1 = XEXP (x, 1);
10406 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10407 return gbr_displacement (xop1, mode);
10409 if (GET_MODE_SIZE (mode) <= 8
10410 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10411 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10412 return true;
10414 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10415 || ((xop0 == stack_pointer_rtx
10416 || xop0 == hard_frame_pointer_rtx)
10417 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10418 || ((xop1 == stack_pointer_rtx
10419 || xop1 == hard_frame_pointer_rtx)
10420 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10421 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10422 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10423 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10424 && TARGET_FMOVD && mode == DFmode)))
10426 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10427 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10428 return true;
10429 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10430 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10431 return true;
10435 return false;
10438 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10439 isn't protected by a PIC unspec. */
10440 bool
10441 nonpic_symbol_mentioned_p (rtx x)
10443 const char *fmt;
10444 int i;
10446 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10447 || GET_CODE (x) == PC)
10448 return true;
10450 /* We don't want to look into the possible MEM location of a
10451 CONST_DOUBLE, since we're not going to use it, in general. */
10452 if (GET_CODE (x) == CONST_DOUBLE)
10453 return false;
10455 if (GET_CODE (x) == UNSPEC
10456 && (XINT (x, 1) == UNSPEC_PIC
10457 || XINT (x, 1) == UNSPEC_GOT
10458 || XINT (x, 1) == UNSPEC_GOTOFF
10459 || XINT (x, 1) == UNSPEC_GOTPLT
10460 || XINT (x, 1) == UNSPEC_GOTTPOFF
10461 || XINT (x, 1) == UNSPEC_DTPOFF
10462 || XINT (x, 1) == UNSPEC_TPOFF
10463 || XINT (x, 1) == UNSPEC_PLT
10464 || XINT (x, 1) == UNSPEC_SYMOFF
10465 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10466 return false;
10468 fmt = GET_RTX_FORMAT (GET_CODE (x));
10469 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10471 if (fmt[i] == 'E')
10473 int j;
10474 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10475 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10476 return true;
10478 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10479 return true;
10482 return false;
10485 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10486 @GOTOFF in `reg'. */
10488 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
10489 rtx reg)
10491 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10492 return orig;
10494 if (GET_CODE (orig) == LABEL_REF
10495 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10497 if (reg == NULL_RTX)
10498 reg = gen_reg_rtx (Pmode);
10500 emit_insn (gen_symGOTOFF2reg (reg, orig));
10501 return reg;
10503 else if (GET_CODE (orig) == SYMBOL_REF)
10505 if (reg == NULL_RTX)
10506 reg = gen_reg_rtx (Pmode);
10508 emit_insn (gen_symGOT2reg (reg, orig));
10509 return reg;
10511 return orig;
10514 /* Given a (logical) mode size and an offset in bytes, try to find a the
10515 appropriate displacement value for a mov insn. On SH the displacements
10516 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10517 15 bytes in QImode. To compensate this we create a new base address by
10518 adding an adjustment value to it.
10520 If the originally requested offset is greater than 127 we prefer using
10521 values 124..127 over 128..131 to increase opportunities to use the
10522 add #imm, Rn insn.
10524 In some cases it is possible that a requested offset might seem unaligned
10525 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10526 This is compensated by adjusting the base address so that the effective
10527 address of the displacement move insn will be aligned.
10529 This is not the best possible way of rebasing the base address, as it
10530 does not look at other present displacement addressings around it.
10531 In some cases this can create more base address adjustments than would
10532 actually be necessary. */
10533 struct disp_adjust
10535 rtx offset_adjust;
10536 rtx mov_disp;
10539 static struct disp_adjust
10540 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
10542 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10544 /* Do not try to use SH2A's large displacements here, because this would
10545 effectively disable the small displacement insns. */
10546 const int mode_sz = GET_MODE_SIZE (mode);
10547 const int mov_insn_sz = mov_insn_size (mode, false);
10548 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10549 const int max_disp_next = max_disp + mov_insn_sz;
10550 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10551 HOST_WIDE_INT offset_adjust;
10553 /* In some cases this actually does happen and we must check for it. */
10554 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10555 return res;
10557 /* Keeps the previous behavior for QImode displacement addressing.
10558 This just decides how the offset is re-based. Removing this special
10559 case will result in slightly bigger code on average, but it's not that
10560 bad actually. */
10561 if (mov_insn_sz == 1)
10562 align_modifier = 0;
10564 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10566 if (mode_sz + offset - offset_adjust <= max_disp_next)
10568 res.offset_adjust = GEN_INT (offset_adjust);
10569 res.mov_disp = GEN_INT (offset - offset_adjust);
10572 return res;
10575 /* Try to modify an illegitimate address and make it legitimate.
10576 If we find one, return the new, valid address.
10577 Otherwise, return the original address. */
10578 static rtx
10579 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
10581 if (flag_pic)
10582 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10584 if (TARGET_SHMEDIA)
10585 return x;
10587 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10588 || (TARGET_SH2E && mode == SFmode))
10589 return x;
10591 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10592 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10594 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10595 INTVAL (XEXP (x, 1)));
10597 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10599 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10600 adj.offset_adjust, NULL_RTX, 0,
10601 OPTAB_LIB_WIDEN);
10602 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10605 return x;
10608 /* Attempt to replace *p, which is an address that needs reloading, with
10609 a valid memory address for an operand of mode MODE.
10610 Like for sh_legitimize_address, for the SH we try to get a normal form
10611 of the address. That will allow inheritance of the address reloads. */
10612 bool
10613 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10614 int itype)
10616 enum reload_type type = (enum reload_type) itype;
10617 const int mode_sz = GET_MODE_SIZE (mode);
10619 if (sh_lra_p ())
10620 return false;
10622 if (! ALLOW_INDEXED_ADDRESS
10623 && GET_CODE (*p) == PLUS
10624 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10626 *p = copy_rtx (*p);
10627 push_reload (*p, NULL_RTX, p, NULL,
10628 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10629 return true;
10632 if (! ALLOW_INDEXED_ADDRESS
10633 && GET_CODE (*p) == PLUS
10634 && GET_CODE (XEXP (*p, 0)) == PLUS)
10636 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10637 XEXP (XEXP (*p, 0), 1));
10638 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10639 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10640 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10641 return true;
10644 if (TARGET_SHMEDIA)
10645 return false;
10647 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10648 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10649 && (ALLOW_INDEXED_ADDRESS
10650 || XEXP (*p, 0) == stack_pointer_rtx
10651 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10653 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10654 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10656 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10658 push_reload (*p, NULL_RTX, p, NULL,
10659 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10660 return true;
10663 if (TARGET_SH2E && mode == SFmode)
10665 *p = copy_rtx (*p);
10666 push_reload (*p, NULL_RTX, p, NULL,
10667 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10668 return true;
10671 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10672 moves because then reload has a problem figuring the constraint
10673 that the move insn target/source reg must be R0.
10674 Or maybe some handling is wrong in sh_secondary_reload for this
10675 to work properly? */
10676 if ((mode_sz == 4 || mode_sz == 8)
10677 && ! (TARGET_SH4 && mode == DFmode)
10678 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10680 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10681 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10682 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10683 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10684 return true;
10688 /* We must re-recognize what we created before. */
10689 if (GET_CODE (*p) == PLUS
10690 && (mode_sz == 4 || mode_sz == 8)
10691 && GET_CODE (XEXP (*p, 0)) == PLUS
10692 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10693 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10694 && CONST_INT_P (XEXP (*p, 1))
10695 && ! (TARGET_SH2E && mode == SFmode))
10697 /* Because this address is so complex, we know it must have
10698 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10699 it is already unshared, and needs no further unsharing. */
10700 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10701 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10702 return true;
10705 return false;
10708 /* In the name of slightly smaller debug output, and to cater to
10709 general assembler lossage, recognize various UNSPEC sequences
10710 and turn them back into a direct symbol reference. */
10711 static rtx
10712 sh_delegitimize_address (rtx orig_x)
10714 rtx x, y;
10716 orig_x = delegitimize_mem_from_attrs (orig_x);
10718 x = orig_x;
10719 if (MEM_P (x))
10720 x = XEXP (x, 0);
10721 if (GET_CODE (x) == CONST)
10723 y = XEXP (x, 0);
10724 if (GET_CODE (y) == UNSPEC)
10726 if (XINT (y, 1) == UNSPEC_GOT
10727 || XINT (y, 1) == UNSPEC_GOTOFF
10728 || XINT (y, 1) == UNSPEC_SYMOFF)
10729 return XVECEXP (y, 0, 0);
10730 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10732 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10734 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10736 if (GET_CODE (symplt) == UNSPEC
10737 && XINT (symplt, 1) == UNSPEC_PLT)
10738 return XVECEXP (symplt, 0, 0);
10741 else if (TARGET_SHMEDIA
10742 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10743 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10745 rtx offset = XVECEXP (y, 0, 1);
10747 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10748 if (MEM_P (orig_x))
10749 x = replace_equiv_address_nv (orig_x, x);
10750 return x;
10755 return orig_x;
10758 /* Mark the use of a constant in the literal table. If the constant
10759 has multiple labels, make it unique. */
10760 static rtx
10761 mark_constant_pool_use (rtx x)
10763 rtx_insn *insn, *lab;
10764 rtx pattern;
10766 if (x == NULL_RTX)
10767 return x;
10769 switch (GET_CODE (x))
10771 case LABEL_REF:
10772 x = XEXP (x, 0);
10773 case CODE_LABEL:
10774 break;
10775 default:
10776 return x;
10779 /* Get the first label in the list of labels for the same constant
10780 and delete another labels in the list. */
10781 lab = as_a <rtx_insn *> (x);
10782 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10784 if (!LABEL_P (insn)
10785 || LABEL_REFS (insn) != NEXT_INSN (insn))
10786 break;
10787 lab = insn;
10790 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10791 as_a<rtx_insn *> (insn)->set_deleted ();
10793 /* Mark constants in a window. */
10794 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10796 if (!NONJUMP_INSN_P (insn))
10797 continue;
10799 pattern = PATTERN (insn);
10800 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10801 continue;
10803 switch (XINT (pattern, 1))
10805 case UNSPECV_CONST2:
10806 case UNSPECV_CONST4:
10807 case UNSPECV_CONST8:
10808 XVECEXP (pattern, 0, 1) = const1_rtx;
10809 break;
10810 case UNSPECV_WINDOW_END:
10811 if (XVECEXP (pattern, 0, 0) == x)
10812 return lab;
10813 break;
10814 case UNSPECV_CONST_END:
10815 return lab;
10816 default:
10817 break;
10821 return lab;
10824 /* Return true if it's possible to redirect BRANCH1 to the destination
10825 of an unconditional jump BRANCH2. We only want to do this if the
10826 resulting branch will have a short displacement. */
10827 static bool
10828 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
10830 /* Don't follow if BRANCH2 is possible to be a jump crossing between
10831 hot and cold partitions. */
10832 if (TARGET_SH1
10833 && flag_reorder_blocks_and_partition
10834 && simplejump_p (branch2)
10835 && CROSSING_JUMP_P (branch2))
10836 return false;
10838 if (flag_expensive_optimizations && simplejump_p (branch2))
10840 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10841 rtx_insn *insn;
10842 int distance;
10844 for (distance = 0, insn = NEXT_INSN (branch1);
10845 insn && distance < 256;
10846 insn = PREV_INSN (insn))
10848 if (insn == dest)
10849 return true;
10850 else
10851 distance += get_attr_length (insn);
10853 for (distance = 0, insn = NEXT_INSN (branch1);
10854 insn && distance < 256;
10855 insn = NEXT_INSN (insn))
10857 if (insn == dest)
10858 return true;
10859 else
10860 distance += get_attr_length (insn);
10863 return false;
10866 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10867 bool
10868 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10869 unsigned int new_reg)
10871 /* Interrupt functions can only use registers that have already been
10872 saved by the prologue, even if they would normally be
10873 call-clobbered. */
10874 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10875 return false;
10877 return true;
10880 /* Function to update the integer COST
10881 based on the relationship between INSN that is dependent on
10882 DEP_INSN through the dependence LINK. The default is to make no
10883 adjustment to COST. This can be used for example to specify to
10884 the scheduler that an output- or anti-dependence does not incur
10885 the same cost as a data-dependence. The return value should be
10886 the new value for COST. */
10887 static int
10888 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10889 rtx_insn *dep_insn, int cost)
10891 rtx reg, use_pat;
10893 if (TARGET_SHMEDIA)
10895 /* On SHmedia, if the dependence is an anti-dependence or
10896 output-dependence, there is no cost. */
10897 if (REG_NOTE_KIND (link) != 0)
10899 /* However, dependencies between target register loads and
10900 uses of the register in a subsequent block that are separated
10901 by a conditional branch are not modelled - we have to do with
10902 the anti-dependency between the target register load and the
10903 conditional branch that ends the current block. */
10904 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10905 && GET_CODE (PATTERN (dep_insn)) == SET
10906 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10907 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10908 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10910 int orig_cost = cost;
10911 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10912 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10913 ? insn : JUMP_LABEL (insn));
10914 /* On the likely path, the branch costs 1, on the unlikely path,
10915 it costs 3. */
10916 cost--;
10918 target = next_active_insn (target);
10919 while (target && ! flow_dependent_p (target, dep_insn)
10920 && --cost > 0);
10921 /* If two branches are executed in immediate succession, with the
10922 first branch properly predicted, this causes a stall at the
10923 second branch, hence we won't need the target for the
10924 second branch for two cycles after the launch of the first
10925 branch. */
10926 if (cost > orig_cost - 2)
10927 cost = orig_cost - 2;
10929 else
10930 cost = 0;
10933 else if (get_attr_is_mac_media (insn)
10934 && get_attr_is_mac_media (dep_insn))
10935 cost = 1;
10937 else if (! reload_completed
10938 && GET_CODE (PATTERN (insn)) == SET
10939 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10940 && GET_CODE (PATTERN (dep_insn)) == SET
10941 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10942 && cost < 4)
10943 cost = 4;
10944 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10945 that is needed at the target. */
10946 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10947 && ! flow_dependent_p (insn, dep_insn))
10948 cost--;
10950 else if (REG_NOTE_KIND (link) == 0)
10952 enum attr_type type;
10953 rtx dep_set;
10955 if (recog_memoized (insn) < 0
10956 || recog_memoized (dep_insn) < 0)
10957 return cost;
10959 dep_set = single_set (dep_insn);
10961 /* The latency that we specify in the scheduling description refers
10962 to the actual output, not to an auto-increment register; for that,
10963 the latency is one. */
10964 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10966 rtx set = single_set (insn);
10968 if (set
10969 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10970 && (!MEM_P (SET_DEST (set))
10971 || !reg_mentioned_p (SET_DEST (dep_set),
10972 XEXP (SET_DEST (set), 0))))
10973 cost = 1;
10975 /* The only input for a call that is timing-critical is the
10976 function's address. */
10977 if (CALL_P (insn))
10979 rtx call = get_call_rtx_from (insn);
10980 if (call
10981 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10982 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10983 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10984 cost -= TARGET_SH4_300 ? 3 : 6;
10986 /* Likewise, the most timing critical input for an sfuncs call
10987 is the function address. However, sfuncs typically start
10988 using their arguments pretty quickly.
10989 Assume a four cycle delay for SH4 before they are needed.
10990 Cached ST40-300 calls are quicker, so assume only a one
10991 cycle delay there.
10992 ??? Maybe we should encode the delays till input registers
10993 are needed by sfuncs into the sfunc call insn. */
10994 /* All sfunc calls are parallels with at least four components.
10995 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10996 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10997 && XVECLEN (PATTERN (insn), 0) >= 4
10998 && (reg = sfunc_uses_reg (insn)))
11000 if (! reg_set_p (reg, dep_insn))
11001 cost -= TARGET_SH4_300 ? 1 : 4;
11003 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
11005 enum attr_type dep_type = get_attr_type (dep_insn);
11007 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
11008 cost--;
11009 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
11010 && (type = get_attr_type (insn)) != TYPE_CALL
11011 && type != TYPE_SFUNC)
11012 cost--;
11013 /* When the preceding instruction loads the shift amount of
11014 the following SHAD/SHLD, the latency of the load is increased
11015 by 1 cycle. */
11016 if (get_attr_type (insn) == TYPE_DYN_SHIFT
11017 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
11018 && reg_overlap_mentioned_p (SET_DEST (dep_set),
11019 XEXP (SET_SRC (single_set (insn)),
11020 1)))
11021 cost++;
11022 /* When an LS group instruction with a latency of less than
11023 3 cycles is followed by a double-precision floating-point
11024 instruction, FIPR, or FTRV, the latency of the first
11025 instruction is increased to 3 cycles. */
11026 else if (cost < 3
11027 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
11028 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
11029 cost = 3;
11030 /* The lsw register of a double-precision computation is ready one
11031 cycle earlier. */
11032 else if (reload_completed
11033 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
11034 && (use_pat = single_set (insn))
11035 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
11036 SET_SRC (use_pat)))
11037 cost -= 1;
11039 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
11040 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
11041 cost -= 1;
11043 else if (TARGET_SH4_300)
11045 /* Stores need their input register two cycles later. */
11046 if (dep_set && cost >= 1
11047 && ((type = get_attr_type (insn)) == TYPE_STORE
11048 || type == TYPE_PSTORE
11049 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
11051 rtx set = single_set (insn);
11053 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
11054 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
11056 cost -= 2;
11057 /* But don't reduce the cost below 1 if the address depends
11058 on a side effect of dep_insn. */
11059 if (cost < 1
11060 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
11061 cost = 1;
11066 /* An anti-dependence penalty of two applies if the first insn is a double
11067 precision fadd / fsub / fmul. */
11068 else if (!TARGET_SH4_300
11069 && REG_NOTE_KIND (link) == REG_DEP_ANTI
11070 && recog_memoized (dep_insn) >= 0
11071 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
11072 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
11073 /* A lot of alleged anti-flow dependences are fake,
11074 so check this one is real. */
11075 && flow_dependent_p (dep_insn, insn))
11076 cost = 2;
11078 return cost;
11081 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
11082 if DEP_INSN is anti-flow dependent on INSN. */
11083 static bool
11084 flow_dependent_p (rtx insn, rtx dep_insn)
11086 rtx tmp = PATTERN (insn);
11088 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
11089 return tmp == NULL_RTX;
11092 /* A helper function for flow_dependent_p called through note_stores. */
11093 static void
11094 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
11096 rtx * pinsn = (rtx *) data;
11098 if (*pinsn && reg_referenced_p (x, *pinsn))
11099 *pinsn = NULL_RTX;
11102 /* For use by sh_allocate_initial_value. Note that sh.md contains some
11103 'special function' patterns (type sfunc) that clobber pr, but that
11104 do not look like function calls to leaf_function_p. Hence we must
11105 do this extra check. */
11106 static int
11107 sh_pr_n_sets (void)
11109 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11112 /* Return where to allocate pseudo for a given hard register initial
11113 value. */
11114 static rtx
11115 sh_allocate_initial_value (rtx hard_reg)
11117 rtx x;
11119 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11121 if (crtl->is_leaf
11122 && ! sh_pr_n_sets ()
11123 && ! (TARGET_SHCOMPACT
11124 && ((crtl->args.info.call_cookie
11125 & ~ CALL_COOKIE_RET_TRAMP (1))
11126 || crtl->saves_all_registers)))
11127 x = hard_reg;
11128 else
11129 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11131 else
11132 x = NULL_RTX;
11134 return x;
11137 /* This function returns "2" to indicate dual issue for the SH4
11138 processor. To be used by the DFA pipeline description. */
11139 static int
11140 sh_issue_rate (void)
11142 if (TARGET_SUPERSCALAR)
11143 return 2;
11144 else
11145 return 1;
11148 /* Functions for ready queue reordering for sched1. */
11150 /* Get weight for mode for a set x. */
11151 static short
11152 find_set_regmode_weight (rtx x, machine_mode mode)
11154 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11155 return 1;
11156 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11158 if (REG_P (SET_DEST (x)))
11160 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11161 return 1;
11162 else
11163 return 0;
11165 return 1;
11167 return 0;
11170 /* Get regmode weight for insn. */
11171 static short
11172 find_insn_regmode_weight (rtx insn, machine_mode mode)
11174 short reg_weight = 0;
11175 rtx x;
11177 /* Increment weight for each register born here. */
11178 x = PATTERN (insn);
11179 reg_weight += find_set_regmode_weight (x, mode);
11180 if (GET_CODE (x) == PARALLEL)
11182 int j;
11183 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11185 x = XVECEXP (PATTERN (insn), 0, j);
11186 reg_weight += find_set_regmode_weight (x, mode);
11189 /* Decrement weight for each register that dies here. */
11190 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11192 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11194 rtx note = XEXP (x, 0);
11195 if (REG_P (note) && GET_MODE (note) == mode)
11196 reg_weight--;
11199 return reg_weight;
11202 /* Calculate regmode weights for all insns of a basic block. */
11203 static void
11204 find_regmode_weight (basic_block b, machine_mode mode)
11206 rtx_insn *insn, *next_tail, *head, *tail;
11208 get_ebb_head_tail (b, b, &head, &tail);
11209 next_tail = NEXT_INSN (tail);
11211 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11213 /* Handle register life information. */
11214 if (!INSN_P (insn))
11215 continue;
11217 if (mode == SFmode)
11218 INSN_REGMODE_WEIGHT (insn, mode) =
11219 find_insn_regmode_weight (insn, mode)
11220 + 2 * find_insn_regmode_weight (insn, DFmode);
11221 else if (mode == SImode)
11222 INSN_REGMODE_WEIGHT (insn, mode) =
11223 find_insn_regmode_weight (insn, mode)
11224 + 2 * find_insn_regmode_weight (insn, DImode);
11228 /* Comparison function for ready queue sorting. */
11229 static int
11230 rank_for_reorder (const void *x, const void *y)
11232 rtx_insn *tmp = *(rtx_insn * const *) y;
11233 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11235 /* The insn in a schedule group should be issued the first. */
11236 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11237 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11239 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11240 minimizes instruction movement, thus minimizing sched's effect on
11241 register pressure. */
11242 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11245 /* Resort the array A in which only element at index N may be out of order. */
11246 static void
11247 swap_reorder (rtx_insn **a, int n)
11249 rtx_insn *insn = a[n - 1];
11250 int i = n - 2;
11252 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11254 a[i + 1] = a[i];
11255 i -= 1;
11257 a[i + 1] = insn;
11260 /* Sort the ready list by ascending priority. */
11261 static void
11262 ready_reorder (rtx_insn **ready, int nready)
11264 if (nready == 2)
11265 swap_reorder (ready, nready);
11266 else if (nready > 2)
11267 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11270 /* Count life regions of r0 for a block. */
11271 static int
11272 find_r0_life_regions (basic_block b)
11274 rtx_insn *end, *insn;
11275 rtx pset;
11276 rtx r0_reg;
11277 int live;
11278 int set;
11279 int death = 0;
11281 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11283 set = 1;
11284 live = 1;
11286 else
11288 set = 0;
11289 live = 0;
11292 insn = BB_HEAD (b);
11293 end = BB_END (b);
11294 r0_reg = gen_rtx_REG (SImode, R0_REG);
11295 while (1)
11297 if (INSN_P (insn))
11299 if (find_regno_note (insn, REG_DEAD, R0_REG))
11301 death++;
11302 live = 0;
11304 if (!live
11305 && (pset = single_set (insn))
11306 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11307 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11309 set++;
11310 live = 1;
11313 if (insn == end)
11314 break;
11315 insn = NEXT_INSN (insn);
11317 return set - death;
11320 /* Calculate regmode weights for all insns of all basic block. */
11321 static void
11322 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11323 int verbose ATTRIBUTE_UNUSED,
11324 int old_max_uid)
11326 basic_block b;
11328 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11329 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11330 r0_life_regions = 0;
11332 FOR_EACH_BB_REVERSE_FN (b, cfun)
11334 find_regmode_weight (b, SImode);
11335 find_regmode_weight (b, SFmode);
11336 if (!reload_completed)
11337 r0_life_regions += find_r0_life_regions (b);
11340 CURR_REGMODE_PRESSURE (SImode) = 0;
11341 CURR_REGMODE_PRESSURE (SFmode) = 0;
11344 /* Cleanup. */
11345 static void
11346 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11347 int verbose ATTRIBUTE_UNUSED)
11349 if (regmode_weight[0])
11351 free (regmode_weight[0]);
11352 regmode_weight[0] = NULL;
11354 if (regmode_weight[1])
11356 free (regmode_weight[1]);
11357 regmode_weight[1] = NULL;
11361 /* The scalar modes supported differs from the default version in TImode
11362 for 32-bit SHMEDIA. */
11363 static bool
11364 sh_scalar_mode_supported_p (machine_mode mode)
11366 if (TARGET_SHMEDIA32 && mode == TImode)
11367 return false;
11369 return default_scalar_mode_supported_p (mode);
11372 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11373 keep count of register pressures on SImode and SFmode. */
11374 static int
11375 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11376 int sched_verbose ATTRIBUTE_UNUSED,
11377 rtx_insn *insn,
11378 int can_issue_more)
11380 if (GET_CODE (PATTERN (insn)) != USE
11381 && GET_CODE (PATTERN (insn)) != CLOBBER)
11382 cached_can_issue_more = can_issue_more - 1;
11383 else
11384 cached_can_issue_more = can_issue_more;
11386 if (reload_completed)
11387 return cached_can_issue_more;
11389 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11390 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11392 return cached_can_issue_more;
11395 static void
11396 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11397 int verbose ATTRIBUTE_UNUSED,
11398 int veclen ATTRIBUTE_UNUSED)
11400 CURR_REGMODE_PRESSURE (SImode) = 0;
11401 CURR_REGMODE_PRESSURE (SFmode) = 0;
11404 /* Some magic numbers. */
11405 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11406 functions that already have high pressure on r0. */
11407 #define R0_MAX_LIFE_REGIONS 2
11408 /* Register Pressure thresholds for SImode and SFmode registers. */
11409 #define SIMODE_MAX_WEIGHT 5
11410 #define SFMODE_MAX_WEIGHT 10
11412 /* Return true if the pressure is high for MODE. */
11413 static bool
11414 high_pressure (machine_mode mode)
11416 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11417 functions that already have high pressure on r0. */
11418 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11419 return true;
11421 if (mode == SFmode)
11422 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11423 else
11424 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11427 /* Reorder ready queue if register pressure is high. */
11428 static int
11429 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11430 int sched_verbose ATTRIBUTE_UNUSED,
11431 rtx_insn **ready,
11432 int *n_readyp,
11433 int clock_var ATTRIBUTE_UNUSED)
11435 if (reload_completed)
11436 return sh_issue_rate ();
11438 if (high_pressure (SFmode) || high_pressure (SImode))
11440 ready_reorder (ready, *n_readyp);
11443 return sh_issue_rate ();
11446 /* Skip cycles if the current register pressure is high. */
11447 static int
11448 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11449 int sched_verbose ATTRIBUTE_UNUSED,
11450 rtx_insn **ready ATTRIBUTE_UNUSED,
11451 int *n_readyp ATTRIBUTE_UNUSED,
11452 int clock_var ATTRIBUTE_UNUSED)
11454 if (reload_completed)
11455 return cached_can_issue_more;
11457 if (high_pressure(SFmode) || high_pressure (SImode))
11458 skip_cycles = 1;
11460 return cached_can_issue_more;
11463 /* Skip cycles without sorting the ready queue. This will move insn from
11464 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11465 queue by sh_reorder. */
11467 /* Generally, skipping these many cycles are sufficient for all insns to move
11468 from Q -> R. */
11469 #define MAX_SKIPS 8
11471 static int
11472 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11473 int sched_verbose ATTRIBUTE_UNUSED,
11474 rtx_insn *insn ATTRIBUTE_UNUSED,
11475 int last_clock_var,
11476 int clock_var,
11477 int *sort_p)
11479 if (reload_completed)
11480 return 0;
11482 if (skip_cycles)
11484 if ((clock_var - last_clock_var) < MAX_SKIPS)
11486 *sort_p = 0;
11487 return 1;
11489 /* If this is the last cycle we are skipping, allow reordering of R. */
11490 if ((clock_var - last_clock_var) == MAX_SKIPS)
11492 *sort_p = 1;
11493 return 1;
11497 skip_cycles = 0;
11499 return 0;
11502 /* SHmedia requires registers for branches, so we can't generate new
11503 branches past reload. */
11504 static bool
11505 sh_cannot_modify_jumps_p (void)
11507 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11510 static reg_class_t
11511 sh_target_reg_class (void)
11513 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11516 static bool
11517 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11519 if (! shmedia_space_reserved_for_target_registers)
11520 return 0;
11521 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11522 return 0;
11524 HARD_REG_SET dummy;
11525 if (calc_live_regs (&dummy) >= 6 * 8)
11526 return 1;
11527 return 0;
11530 static bool
11531 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11533 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11537 On the SH1..SH4, the trampoline looks like
11538 2 0002 D202 mov.l l2,r2
11539 1 0000 D301 mov.l l1,r3
11540 3 0004 422B jmp @r2
11541 4 0006 0009 nop
11542 5 0008 00000000 l1: .long area
11543 6 000c 00000000 l2: .long function
11545 SH5 (compact) uses r1 instead of r3 for the static chain. */
11548 /* Emit RTL insns to initialize the variable parts of a trampoline.
11549 FNADDR is an RTX for the address of the function's pure code.
11550 CXT is an RTX for the static chain value for the function. */
11551 static void
11552 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11554 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11555 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11557 if (TARGET_SHMEDIA64)
11559 rtx tramp_templ;
11560 int fixed_len;
11562 rtx movi1 = GEN_INT (0xcc000010);
11563 rtx shori1 = GEN_INT (0xc8000010);
11564 rtx src, dst;
11566 /* The following trampoline works within a +- 128 KB range for cxt:
11567 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11568 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11569 gettr tr1,r1; blink tr0,r63 */
11570 /* Address rounding makes it hard to compute the exact bounds of the
11571 offset for this trampoline, but we have a rather generous offset
11572 range, so frame_offset should do fine as an upper bound. */
11573 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11575 /* ??? could optimize this trampoline initialization
11576 by writing DImode words with two insns each. */
11577 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11578 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11579 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11580 insn = gen_rtx_AND (DImode, insn, mask);
11581 /* Or in ptb/u .,tr1 pattern */
11582 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11583 insn = force_operand (insn, NULL_RTX);
11584 insn = gen_lowpart (SImode, insn);
11585 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11586 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11587 insn = gen_rtx_AND (DImode, insn, mask);
11588 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11589 insn = gen_lowpart (SImode, insn);
11590 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11591 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11592 insn = gen_rtx_AND (DImode, insn, mask);
11593 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11594 insn = gen_lowpart (SImode, insn);
11595 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11596 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11597 insn = gen_rtx_AND (DImode, insn, mask);
11598 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11599 insn = gen_lowpart (SImode, insn);
11600 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11601 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11602 insn = gen_rtx_AND (DImode, insn, mask);
11603 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11604 insn = gen_lowpart (SImode, insn);
11605 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11606 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11607 GEN_INT (0x6bf10600));
11608 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11609 GEN_INT (0x4415fc10));
11610 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11611 GEN_INT (0x4401fff0));
11612 emit_insn (gen_ic_invalidate_line (tramp));
11613 return;
11615 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11616 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11618 tramp_templ = gen_datalabel_ref (tramp_templ);
11619 dst = tramp_mem;
11620 src = gen_const_mem (BLKmode, tramp_templ);
11621 set_mem_align (dst, 256);
11622 set_mem_align (src, 64);
11623 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11625 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11626 emit_move_insn (adjust_address (tramp_mem, Pmode,
11627 fixed_len + GET_MODE_SIZE (Pmode)),
11628 cxt);
11629 emit_insn (gen_ic_invalidate_line (tramp));
11630 return;
11632 else if (TARGET_SHMEDIA)
11634 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11635 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11636 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11637 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11638 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11639 rotated 10 right, and higher 16 bit of every 32 selected. */
11640 rtx movishori
11641 = force_reg (V2HImode, (simplify_gen_subreg
11642 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11643 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11644 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11646 fnaddr = force_reg (SImode, fnaddr);
11647 cxt = force_reg (SImode, cxt);
11648 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11649 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11650 movishori));
11651 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11652 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11653 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11654 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11655 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11656 gen_rtx_SUBREG (V2HImode, cxt, 0),
11657 movishori));
11658 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11659 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11660 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11661 if (TARGET_LITTLE_ENDIAN)
11663 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11664 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11666 else
11668 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11669 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11671 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11672 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11673 emit_insn (gen_ic_invalidate_line (tramp));
11674 return;
11676 else if (TARGET_SHCOMPACT)
11678 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11679 return;
11681 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11682 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11683 SImode));
11684 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11685 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11686 SImode));
11687 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11688 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11689 if (TARGET_HARD_SH4 || TARGET_SH5)
11691 if (!TARGET_INLINE_IC_INVALIDATE
11692 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
11693 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11694 FUNCTION_ORDINARY),
11695 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11696 else
11697 emit_insn (gen_ic_invalidate_line (tramp));
11701 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11702 static rtx
11703 sh_trampoline_adjust_address (rtx tramp)
11705 if (TARGET_SHMEDIA)
11706 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11707 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11708 return tramp;
11711 /* FIXME: This is overly conservative. A SHcompact function that
11712 receives arguments ``by reference'' will have them stored in its
11713 own stack frame, so it must not pass pointers or references to
11714 these arguments to other functions by means of sibling calls. */
11715 /* If PIC, we cannot make sibling calls to global functions
11716 because the PLT requires r12 to be live. */
11717 static bool
11718 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11720 return (1
11721 && (! TARGET_SHCOMPACT
11722 || crtl->args.info.stack_regs == 0)
11723 && ! sh_cfun_interrupt_handler_p ()
11724 && (! flag_pic
11725 || (decl && ! TREE_PUBLIC (decl))
11726 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11729 /* Machine specific built-in functions. */
11731 struct builtin_description
11733 bool (* const is_enabled) (void);
11734 const enum insn_code icode;
11735 const char *const name;
11736 int signature;
11737 tree fndecl;
11740 static bool
11741 shmedia_builtin_p (void)
11743 return TARGET_SHMEDIA;
11746 /* This function can be used if there are any built-ins that are not for
11747 SHmedia. It's commented out to avoid the defined-but-unused warning. */
11748 static bool
11749 sh1_builtin_p (void)
11751 return TARGET_SH1;
11754 /* describe number and signedness of arguments; arg[0] == result
11755 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11756 /* 9: 64-bit pointer, 10: 32-bit pointer */
11757 static const char signature_args[][4] =
11759 #define SH_BLTIN_V2SI2 0
11760 { 4, 4 },
11761 #define SH_BLTIN_V4HI2 1
11762 { 4, 4 },
11763 #define SH_BLTIN_V2SI3 2
11764 { 4, 4, 4 },
11765 #define SH_BLTIN_V4HI3 3
11766 { 4, 4, 4 },
11767 #define SH_BLTIN_V8QI3 4
11768 { 4, 4, 4 },
11769 #define SH_BLTIN_MAC_HISI 5
11770 { 1, 4, 4, 1 },
11771 #define SH_BLTIN_SH_HI 6
11772 { 4, 4, 1 },
11773 #define SH_BLTIN_SH_SI 7
11774 { 4, 4, 1 },
11775 #define SH_BLTIN_V4HI2V2SI 8
11776 { 4, 4, 4 },
11777 #define SH_BLTIN_V4HI2V8QI 9
11778 { 4, 4, 4 },
11779 #define SH_BLTIN_SISF 10
11780 { 4, 2 },
11781 #define SH_BLTIN_LDUA_L 11
11782 { 2, 10 },
11783 #define SH_BLTIN_LDUA_Q 12
11784 { 1, 10 },
11785 #define SH_BLTIN_STUA_L 13
11786 { 0, 10, 2 },
11787 #define SH_BLTIN_STUA_Q 14
11788 { 0, 10, 1 },
11789 #define SH_BLTIN_LDUA_L64 15
11790 { 2, 9 },
11791 #define SH_BLTIN_LDUA_Q64 16
11792 { 1, 9 },
11793 #define SH_BLTIN_STUA_L64 17
11794 { 0, 9, 2 },
11795 #define SH_BLTIN_STUA_Q64 18
11796 { 0, 9, 1 },
11797 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11798 #define SH_BLTIN_2 19
11799 #define SH_BLTIN_SU 19
11800 { 1, 2 },
11801 #define SH_BLTIN_3 20
11802 #define SH_BLTIN_SUS 20
11803 { 2, 2, 1 },
11804 #define SH_BLTIN_PSSV 21
11805 { 0, 8, 2, 2 },
11806 #define SH_BLTIN_XXUU 22
11807 #define SH_BLTIN_UUUU 22
11808 { 1, 1, 1, 1 },
11809 #define SH_BLTIN_PV 23
11810 { 0, 8 },
11811 #define SH_BLTIN_VP 24
11812 { 8, 0 },
11813 #define SH_BLTIN_UV 25
11814 { 1, 0 },
11815 #define SH_BLTIN_VU 26
11816 { 0, 1 },
11818 /* mcmv: operands considered unsigned. */
11819 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11820 /* mperm: control value considered unsigned int. */
11821 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11822 /* mshards_q: returns signed short. */
11823 /* nsb: takes long long arg, returns unsigned char. */
11824 static struct builtin_description bdesc[] =
11826 { shmedia_builtin_p,
11827 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11828 { shmedia_builtin_p,
11829 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11830 { shmedia_builtin_p,
11831 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11832 { shmedia_builtin_p,
11833 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11834 { shmedia_builtin_p,
11835 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11836 { shmedia_builtin_p,
11837 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11838 { shmedia_builtin_p,
11839 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11840 { shmedia_builtin_p,
11841 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11842 { shmedia_builtin_p,
11843 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11844 { shmedia_builtin_p,
11845 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11846 { shmedia_builtin_p,
11847 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11848 { shmedia_builtin_p,
11849 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11850 { shmedia_builtin_p,
11851 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11852 { shmedia_builtin_p,
11853 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11854 { shmedia_builtin_p,
11855 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11856 { shmedia_builtin_p,
11857 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11858 { shmedia_builtin_p,
11859 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11860 { shmedia_builtin_p,
11861 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11862 { shmedia_builtin_p,
11863 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11864 { shmedia_builtin_p,
11865 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11866 { shmedia_builtin_p,
11867 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11868 { shmedia_builtin_p,
11869 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11870 { shmedia_builtin_p,
11871 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11872 { shmedia_builtin_p,
11873 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11874 { shmedia_builtin_p,
11875 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11876 { shmedia_builtin_p,
11877 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11878 { shmedia_builtin_p,
11879 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11880 { shmedia_builtin_p,
11881 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11882 { shmedia_builtin_p,
11883 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11884 { shmedia_builtin_p,
11885 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11886 { shmedia_builtin_p,
11887 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11888 { shmedia_builtin_p,
11889 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11890 { shmedia_builtin_p,
11891 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11892 { shmedia_builtin_p,
11893 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11894 { shmedia_builtin_p,
11895 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11896 { shmedia_builtin_p,
11897 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11898 { shmedia_builtin_p,
11899 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11900 { shmedia_builtin_p,
11901 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11902 { shmedia_builtin_p,
11903 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11904 { shmedia_builtin_p,
11905 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11906 { shmedia_builtin_p,
11907 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11908 { shmedia_builtin_p,
11909 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11910 { shmedia_builtin_p,
11911 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11912 { shmedia_builtin_p,
11913 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11914 { shmedia_builtin_p,
11915 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11916 { shmedia_builtin_p,
11917 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11918 { shmedia_builtin_p,
11919 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11920 { shmedia_builtin_p,
11921 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11922 { shmedia_builtin_p,
11923 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11924 { shmedia_builtin_p,
11925 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11926 { shmedia_builtin_p,
11927 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11928 { shmedia_builtin_p,
11929 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11930 { shmedia_builtin_p,
11931 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11932 { shmedia_builtin_p,
11933 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11934 { shmedia_builtin_p,
11935 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11936 { shmedia_builtin_p,
11937 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11938 { shmedia_builtin_p,
11939 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11940 { shmedia_builtin_p,
11941 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11942 { shmedia_builtin_p,
11943 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11944 { shmedia_builtin_p,
11945 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11946 { shmedia_builtin_p,
11947 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11948 { shmedia_builtin_p,
11949 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11950 { shmedia_builtin_p,
11951 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11952 { shmedia_builtin_p,
11953 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11954 { shmedia_builtin_p,
11955 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11956 { shmedia_builtin_p,
11957 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11958 { shmedia_builtin_p,
11959 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11960 { shmedia_builtin_p,
11961 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11962 { shmedia_builtin_p,
11963 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11964 { shmedia_builtin_p,
11965 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11966 { shmedia_builtin_p,
11967 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11968 { shmedia_builtin_p,
11969 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11970 { shmedia_builtin_p,
11971 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11972 { shmedia_builtin_p,
11973 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11974 { shmedia_builtin_p,
11975 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11976 { shmedia_builtin_p,
11977 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11978 { shmedia_builtin_p,
11979 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11980 { shmedia_builtin_p,
11981 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11982 { shmedia_builtin_p,
11983 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11984 { shmedia_builtin_p,
11985 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11986 { shmedia_builtin_p,
11987 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11988 { shmedia_builtin_p,
11989 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11990 { shmedia_builtin_p,
11991 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11993 { sh1_builtin_p,
11994 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
11995 { sh1_builtin_p,
11996 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
11999 static tree sh_builtin_get_fpscr;
12000 static tree sh_builtin_set_fpscr;
12002 static void
12003 sh_init_builtins (void)
12005 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
12006 memset (shared, 0, sizeof shared);
12008 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
12010 builtin_description* d = &bdesc[di];
12012 if (!d->is_enabled ())
12013 continue;
12015 tree type, arg_type = NULL_TREE;
12016 int signature = d->signature;
12018 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
12019 type = shared[signature];
12020 else
12022 int has_result = signature_args[signature][0] != 0;
12023 tree args[3];
12025 if ((signature_args[signature][1] & 8)
12026 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
12027 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
12028 continue;
12029 if (! TARGET_FPU_ANY
12030 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
12031 continue;
12032 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
12033 args[i] = NULL_TREE;
12034 for (int i = 3; ; i--)
12036 int arg = signature_args[signature][i];
12037 int opno = i - 1 + has_result;
12039 if (arg & 8)
12040 arg_type = ptr_type_node;
12041 else if (arg)
12042 arg_type = (*lang_hooks.types.type_for_mode)
12043 (insn_data[d->icode].operand[opno].mode, (arg & 1));
12044 else if (i)
12045 continue;
12046 else
12047 arg_type = void_type_node;
12048 if (i == 0)
12049 break;
12050 args[i-1] = arg_type;
12052 type = build_function_type_list (arg_type, args[0], args[1],
12053 args[2], NULL_TREE);
12054 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
12055 shared[signature] = type;
12057 d->fndecl =
12058 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
12059 NULL, NULL_TREE);
12060 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
12061 if (d->icode == CODE_FOR_sts_fpscr)
12062 sh_builtin_get_fpscr = d->fndecl;
12063 else if (d->icode == CODE_FOR_set_fpscr)
12064 sh_builtin_set_fpscr = d->fndecl;
12068 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
12070 static void
12071 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12073 const unsigned SH_FE_INVALID = 64;
12074 const unsigned SH_FE_DIVBYZERO = 32;
12075 const unsigned SH_FE_OVERFLOW = 16;
12076 const unsigned SH_FE_UNDERFLOW = 8;
12077 const unsigned SH_FE_INEXACT = 4;
12078 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
12079 | SH_FE_DIVBYZERO
12080 | SH_FE_OVERFLOW
12081 | SH_FE_UNDERFLOW
12082 | SH_FE_INEXACT);
12083 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
12084 tree fenv_var, mask, ld_fenv, masked_fenv;
12085 tree new_fenv_var, reload_fenv, restore_fnenv;
12086 tree update_call, atomic_feraiseexcept, hold_fnclex;
12088 if (! TARGET_FPU_ANY)
12089 return;
12091 /* Generate the equivalent of :
12092 unsigned int fenv_var;
12093 fenv_var = __builtin_sh_get_fpscr ();
12095 unsigned int masked_fenv;
12096 masked_fenv = fenv_var & mask;
12098 __builtin_sh_set_fpscr (masked_fenv); */
12100 fenv_var = create_tmp_var (unsigned_type_node);
12101 mask = build_int_cst (unsigned_type_node,
12102 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
12103 | SH_FE_ALL_EXCEPT));
12104 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
12105 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
12106 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
12107 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12108 *hold = build2 (COMPOUND_EXPR, void_type_node,
12109 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
12110 hold_fnclex);
12112 /* Store the value of masked_fenv to clear the exceptions:
12113 __builtin_sh_set_fpscr (masked_fenv); */
12115 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12117 /* Generate the equivalent of :
12118 unsigned int new_fenv_var;
12119 new_fenv_var = __builtin_sh_get_fpscr ();
12121 __builtin_sh_set_fpscr (fenv_var);
12123 __atomic_feraiseexcept (new_fenv_var); */
12125 new_fenv_var = create_tmp_var (unsigned_type_node);
12126 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
12127 build_call_expr (sh_builtin_get_fpscr, 0));
12128 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
12129 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12130 update_call = build_call_expr (atomic_feraiseexcept, 1,
12131 fold_convert (integer_type_node,
12132 new_fenv_var));
12133 *update = build2 (COMPOUND_EXPR, void_type_node,
12134 build2 (COMPOUND_EXPR, void_type_node,
12135 reload_fenv, restore_fnenv), update_call);
12138 /* Implements target hook vector_mode_supported_p. */
12139 bool
12140 sh_vector_mode_supported_p (machine_mode mode)
12142 if (TARGET_FPU_ANY
12143 && ((mode == V2SFmode)
12144 || (mode == V4SFmode)
12145 || (mode == V16SFmode)))
12146 return true;
12148 else if (TARGET_SHMEDIA
12149 && ((mode == V8QImode)
12150 || (mode == V2HImode)
12151 || (mode == V4HImode)
12152 || (mode == V2SImode)))
12153 return true;
12155 return false;
12158 bool
12159 sh_frame_pointer_required (void)
12161 /* If needed override this in other tm.h files to cope with various OS
12162 lossage requiring a frame pointer. */
12163 if (SUBTARGET_FRAME_POINTER_REQUIRED)
12164 return true;
12166 if (crtl->profile)
12167 return true;
12169 return false;
12172 /* Implements target hook dwarf_calling_convention. Return an enum
12173 of dwarf_calling_convention. */
12175 sh_dwarf_calling_convention (const_tree func)
12177 if (sh_attr_renesas_p (func))
12178 return DW_CC_GNU_renesas_sh;
12180 return DW_CC_normal;
12183 /* Returns the sh builtin decl for CODE. */
12184 static tree
12185 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
12187 if (code >= ARRAY_SIZE (bdesc))
12188 return error_mark_node;
12190 if (!bdesc[code].is_enabled ())
12191 return error_mark_node;
12193 return bdesc[code].fndecl;
12196 /* Expand an expression EXP that calls a built-in function,
12197 with result going to TARGET if that's convenient
12198 (and in mode MODE if that's convenient).
12199 SUBTARGET may be used as the target for computing one of EXP's operands.
12200 IGNORE is nonzero if the value is to be ignored. */
12201 static rtx
12202 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12203 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12205 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12206 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12207 const struct builtin_description *d = &bdesc[fcode];
12208 enum insn_code icode = d->icode;
12209 int signature = d->signature;
12210 int nop = 0;
12211 rtx op[4];
12213 if (signature_args[signature][0])
12215 if (ignore)
12216 return NULL_RTX;
12218 machine_mode tmode = insn_data[icode].operand[0].mode;
12219 if (! target || GET_MODE (target) != tmode
12220 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12221 target = gen_reg_rtx (tmode);
12222 op[nop++] = target;
12224 else
12225 target = NULL_RTX;
12227 for (int i = 1; i <= 3; i++, nop++)
12229 tree arg;
12230 machine_mode opmode, argmode;
12231 tree optype;
12233 if (! signature_args[signature][i])
12234 break;
12235 arg = CALL_EXPR_ARG (exp, i - 1);
12236 if (arg == error_mark_node)
12237 return const0_rtx;
12238 if (signature_args[signature][i] & 8)
12240 opmode = ptr_mode;
12241 optype = ptr_type_node;
12243 else
12245 opmode = insn_data[icode].operand[nop].mode;
12246 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12248 argmode = TYPE_MODE (TREE_TYPE (arg));
12249 if (argmode != opmode)
12250 arg = build1 (NOP_EXPR, optype, arg);
12251 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12252 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12253 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12256 rtx pat = NULL_RTX;
12258 switch (nop)
12260 case 1:
12261 pat = (*insn_data[d->icode].genfun) (op[0]);
12262 break;
12263 case 2:
12264 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12265 break;
12266 case 3:
12267 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12268 break;
12269 case 4:
12270 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12271 break;
12272 default:
12273 gcc_unreachable ();
12275 if (! pat)
12276 return NULL_RTX;
12277 emit_insn (pat);
12278 return target;
12281 void
12282 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12284 rtx sel0 = const0_rtx;
12285 rtx sel1 = const1_rtx;
12286 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12287 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12289 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12290 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12293 void
12294 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12296 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12298 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12299 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12302 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12303 We can allow any mode in any general register. The special registers
12304 only allow SImode. Don't allow any mode in the PR.
12306 We cannot hold DCmode values in the XD registers because alter_reg
12307 handles subregs of them incorrectly. We could work around this by
12308 spacing the XD registers like the DR registers, but this would require
12309 additional memory in every compilation to hold larger register vectors.
12310 We could hold SFmode / SCmode values in XD registers, but that
12311 would require a tertiary reload when reloading from / to memory,
12312 and a secondary reload to reload from / to general regs; that
12313 seems to be a losing proposition.
12315 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12316 it won't be ferried through GP registers first. */
12317 bool
12318 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
12320 if (SPECIAL_REGISTER_P (regno))
12321 return mode == SImode;
12323 if (regno == FPUL_REG)
12324 return (mode == SImode || mode == SFmode);
12326 if (FP_REGISTER_P (regno) && mode == SFmode)
12327 return true;
12329 if (mode == V2SFmode)
12331 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12332 || GENERAL_REGISTER_P (regno)))
12333 return true;
12334 else
12335 return false;
12338 if (mode == V4SFmode)
12340 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12341 || GENERAL_REGISTER_P (regno))
12342 return true;
12343 else
12344 return false;
12347 if (mode == V16SFmode)
12349 if (TARGET_SHMEDIA)
12351 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12352 return true;
12353 else
12354 return false;
12356 else
12357 return regno == FIRST_XD_REG;
12360 if (FP_REGISTER_P (regno))
12362 if (mode == SFmode
12363 || mode == SImode
12364 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12365 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12366 || mode == DCmode
12367 || (TARGET_SHMEDIA
12368 && (mode == DFmode || mode == DImode
12369 || mode == V2SFmode || mode == TImode)))
12370 && ((regno - FIRST_FP_REG) & 1) == 0)
12371 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12372 && ((regno - FIRST_FP_REG) & 3) == 0))
12373 return true;
12374 else
12375 return false;
12378 if (XD_REGISTER_P (regno))
12379 return mode == DFmode;
12381 if (TARGET_REGISTER_P (regno))
12382 return (mode == DImode || mode == SImode || mode == PDImode);
12384 if (regno == PR_REG)
12385 return mode == SImode;
12387 if (regno == FPSCR_REG)
12388 return mode == SImode;
12390 /* FIXME. This works around PR target/37633 for -O0. */
12391 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12393 unsigned int n = GET_MODE_SIZE (mode) / 8;
12395 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12396 && regno <= FIRST_GENERAL_REG + 14)
12397 return false;
12400 return true;
12403 /* Specify the modes required to caller save a given hard regno.
12404 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
12405 and returns ?Imode for float regs when sh_hard_regno_mode_ok
12406 permits integer modes on them. That makes LRA's split process
12407 unhappy. See PR55212.
12409 machine_mode
12410 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
12411 machine_mode mode)
12413 if (FP_REGISTER_P (regno)
12414 && (mode == SFmode
12415 || mode == SCmode
12416 || ((mode == DFmode || mode == DCmode)
12417 && ((regno - FIRST_FP_REG) & 1) == 0)))
12418 return mode;
12420 return choose_hard_reg_mode (regno, nregs, false);
12423 /* Return the class of registers for which a mode change from FROM to TO
12424 is invalid. */
12425 bool
12426 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
12427 enum reg_class rclass)
12429 /* We want to enable the use of SUBREGs as a means to
12430 VEC_SELECT a single element of a vector. */
12432 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12433 This can be problematic when SFmode vector subregs need to be accessed
12434 on the stack with displacement addressing, as it happens with -O0.
12435 Thus we disallow the mode change for -O0. */
12436 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12437 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12439 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12441 if (TARGET_LITTLE_ENDIAN)
12443 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12444 return reg_classes_intersect_p (DF_REGS, rclass);
12446 else
12448 if (GET_MODE_SIZE (from) < 8)
12449 return reg_classes_intersect_p (DF_REGS, rclass);
12452 return false;
12455 /* Return true if registers in machine mode MODE will likely be
12456 allocated to registers in small register classes. */
12457 bool
12458 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
12460 return (! TARGET_SHMEDIA);
12463 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12464 that label is used. */
12465 void
12466 sh_mark_label (rtx address, int nuses)
12468 if (GOTOFF_P (address))
12470 /* Extract the label or symbol. */
12471 address = XEXP (address, 0);
12472 if (GET_CODE (address) == PLUS)
12473 address = XEXP (address, 0);
12474 address = XVECEXP (address, 0, 0);
12476 if (GET_CODE (address) == LABEL_REF
12477 && LABEL_P (XEXP (address, 0)))
12478 LABEL_NUSES (XEXP (address, 0)) += nuses;
12481 /* Compute extra cost of moving data between one register class
12482 and another.
12484 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12485 uses this information. Hence, the general register <-> floating point
12486 register information here is not used for SFmode. */
12487 static int
12488 sh_register_move_cost (machine_mode mode,
12489 reg_class_t srcclass, reg_class_t dstclass)
12491 if (dstclass == T_REGS || dstclass == PR_REGS)
12492 return 10;
12494 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12495 return 4;
12497 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12498 && REGCLASS_HAS_FP_REG (srcclass)
12499 && REGCLASS_HAS_FP_REG (dstclass))
12500 return 4;
12502 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12503 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12505 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12506 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12507 return 9;
12509 if ((REGCLASS_HAS_FP_REG (dstclass)
12510 && REGCLASS_HAS_GENERAL_REG (srcclass))
12511 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12512 && REGCLASS_HAS_FP_REG (srcclass)))
12514 /* Discourage trying to use fp regs for a pointer. This also
12515 discourages fp regs with SImode because Pmode is an alias
12516 of SImode on this target. See PR target/48596. */
12517 int addend = (mode == Pmode) ? 40 : 0;
12519 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12520 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12523 if ((dstclass == FPUL_REGS
12524 && REGCLASS_HAS_GENERAL_REG (srcclass))
12525 || (srcclass == FPUL_REGS
12526 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12527 return 5;
12529 if ((dstclass == FPUL_REGS
12530 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12531 || (srcclass == FPUL_REGS
12532 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12533 return 7;
12535 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12536 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12537 return 20;
12539 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12540 if (TARGET_SHMEDIA
12541 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12543 if (sh_gettrcost >= 0)
12544 return sh_gettrcost;
12545 else if (!TARGET_PT_FIXED)
12546 return 100;
12549 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12550 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12551 return 4;
12553 if (TARGET_SHMEDIA
12554 || (TARGET_FMOVD
12555 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12556 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12557 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12559 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12562 static rtx
12563 emit_load_ptr (rtx reg, rtx addr)
12565 rtx mem = gen_const_mem (ptr_mode, addr);
12567 if (Pmode != ptr_mode)
12568 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12569 return emit_move_insn (reg, mem);
12572 static void
12573 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12574 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12575 tree function)
12577 CUMULATIVE_ARGS cum;
12578 int structure_value_byref = 0;
12579 rtx this_rtx, this_value, sibcall, funexp;
12580 rtx_insn *insns;
12581 tree funtype = TREE_TYPE (function);
12582 int simple_add = CONST_OK_FOR_ADD (delta);
12583 int did_load = 0;
12584 rtx scratch0, scratch1, scratch2;
12585 unsigned i;
12587 reload_completed = 1;
12588 epilogue_completed = 1;
12589 crtl->uses_only_leaf_regs = 1;
12591 emit_note (NOTE_INSN_PROLOGUE_END);
12593 /* Find the "this" pointer. We have such a wide range of ABIs for the
12594 SH that it's best to do this completely machine independently.
12595 "this" is passed as first argument, unless a structure return pointer
12596 comes first, in which case "this" comes second. */
12597 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12598 #ifndef PCC_STATIC_STRUCT_RETURN
12599 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12600 structure_value_byref = 1;
12601 #endif /* not PCC_STATIC_STRUCT_RETURN */
12602 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12604 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12606 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12608 this_rtx
12609 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12611 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12612 static chain pointer (even if you can't have nested virtual functions
12613 right now, someone might implement them sometime), and the rest of the
12614 registers are used for argument passing, are callee-saved, or reserved. */
12615 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12616 -ffixed-reg has been used. */
12617 if (! call_used_regs[0] || fixed_regs[0])
12618 error ("r0 needs to be available as a call-clobbered register");
12619 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12620 if (! TARGET_SH5)
12622 if (call_used_regs[1] && ! fixed_regs[1])
12623 scratch1 = gen_rtx_REG (ptr_mode, 1);
12624 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12625 pointing where to return struct values. */
12626 if (call_used_regs[3] && ! fixed_regs[3])
12627 scratch2 = gen_rtx_REG (Pmode, 3);
12629 else if (TARGET_SHMEDIA)
12631 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12632 if (i != REGNO (scratch0) &&
12633 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12635 scratch1 = gen_rtx_REG (ptr_mode, i);
12636 break;
12638 if (scratch1 == scratch0)
12639 error ("need a second call-clobbered general purpose register");
12640 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12641 if (call_used_regs[i] && ! fixed_regs[i])
12643 scratch2 = gen_rtx_REG (Pmode, i);
12644 break;
12646 if (scratch2 == scratch0)
12647 error ("need a call-clobbered target register");
12650 this_value = plus_constant (Pmode, this_rtx, delta);
12651 if (vcall_offset
12652 && (simple_add || scratch0 != scratch1)
12653 && strict_memory_address_p (ptr_mode, this_value))
12655 emit_load_ptr (scratch0, this_value);
12656 did_load = 1;
12659 if (!delta)
12660 ; /* Do nothing. */
12661 else if (simple_add)
12662 emit_move_insn (this_rtx, this_value);
12663 else
12665 emit_move_insn (scratch1, GEN_INT (delta));
12666 emit_insn (gen_add2_insn (this_rtx, scratch1));
12669 if (vcall_offset)
12671 rtx offset_addr;
12673 if (!did_load)
12674 emit_load_ptr (scratch0, this_rtx);
12676 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12677 if (strict_memory_address_p (ptr_mode, offset_addr))
12678 ; /* Do nothing. */
12679 else if (! TARGET_SH5 && scratch0 != scratch1)
12681 /* scratch0 != scratch1, and we have indexed loads. Get better
12682 schedule by loading the offset into r1 and using an indexed
12683 load - then the load of r1 can issue before the load from
12684 (this_rtx + delta) finishes. */
12685 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12686 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12688 else if (CONST_OK_FOR_ADD (vcall_offset))
12690 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12691 offset_addr = scratch0;
12693 else if (scratch0 != scratch1)
12695 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12696 emit_insn (gen_add2_insn (scratch0, scratch1));
12697 offset_addr = scratch0;
12699 else
12700 gcc_unreachable (); /* FIXME */
12701 emit_load_ptr (scratch0, offset_addr);
12703 if (Pmode != ptr_mode)
12704 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12705 emit_insn (gen_add2_insn (this_rtx, scratch0));
12708 /* Generate a tail call to the target function. */
12709 if (! TREE_USED (function))
12711 assemble_external (function);
12712 TREE_USED (function) = 1;
12714 funexp = XEXP (DECL_RTL (function), 0);
12715 /* If the function is overridden, so is the thunk, hence we don't
12716 need GOT addressing even if this is a public symbol. */
12717 #if 0
12718 if (TARGET_SH1 && ! flag_weak)
12719 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12720 else
12721 #endif
12722 if (TARGET_SH2 && flag_pic)
12724 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12725 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12727 else
12729 if (TARGET_SHMEDIA && flag_pic)
12731 funexp = gen_sym2PIC (funexp);
12732 PUT_MODE (funexp, Pmode);
12734 emit_move_insn (scratch2, funexp);
12735 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12736 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12738 sibcall = emit_call_insn (sibcall);
12739 SIBLING_CALL_P (sibcall) = 1;
12740 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12741 emit_barrier ();
12743 /* Run just enough of rest_of_compilation to do scheduling and get
12744 the insns emitted. Note that use_thunk calls
12745 assemble_start_function and assemble_end_function. */
12747 insns = get_insns ();
12749 if (optimize > 0)
12751 if (! cfun->cfg)
12752 init_flow (cfun);
12753 split_all_insns_noflow ();
12756 sh_reorg ();
12757 shorten_branches (insns);
12758 final_start_function (insns, file, 1);
12759 final (insns, file, 1);
12760 final_end_function ();
12762 reload_completed = 0;
12763 epilogue_completed = 0;
12767 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12769 rtx sym;
12771 /* If this is not an ordinary function, the name usually comes from a
12772 string literal or an sprintf buffer. Make sure we use the same
12773 string consistently, so that cse will be able to unify address loads. */
12774 if (kind != FUNCTION_ORDINARY)
12775 name = IDENTIFIER_POINTER (get_identifier (name));
12776 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12777 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12778 if (flag_pic)
12779 switch (kind)
12781 case FUNCTION_ORDINARY:
12782 break;
12783 case SFUNC_GOT:
12785 rtx reg = target ? target : gen_reg_rtx (Pmode);
12787 emit_insn (gen_symGOT2reg (reg, sym));
12788 sym = reg;
12789 break;
12791 case SFUNC_STATIC:
12793 /* ??? To allow cse to work, we use GOTOFF relocations.
12794 We could add combiner patterns to transform this into
12795 straight pc-relative calls with sym2PIC / bsrf when
12796 label load and function call are still 1:1 and in the
12797 same basic block during combine. */
12798 rtx reg = target ? target : gen_reg_rtx (Pmode);
12800 emit_insn (gen_symGOTOFF2reg (reg, sym));
12801 sym = reg;
12802 break;
12805 if (target && sym != target)
12807 emit_move_insn (target, sym);
12808 return target;
12810 return sym;
12813 /* Find the number of a general purpose register in S. */
12814 static int
12815 scavenge_reg (HARD_REG_SET *s)
12817 int r;
12818 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12819 if (TEST_HARD_REG_BIT (*s, r))
12820 return r;
12821 return -1;
12825 sh_get_pr_initial_val (void)
12827 rtx val;
12829 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12830 PR register on SHcompact, because it might be clobbered by the prologue.
12831 We check first if that is known to be the case. */
12832 if (TARGET_SHCOMPACT
12833 && ((crtl->args.info.call_cookie
12834 & ~ CALL_COOKIE_RET_TRAMP (1))
12835 || crtl->saves_all_registers))
12836 return gen_frame_mem (SImode, return_address_pointer_rtx);
12838 /* If we haven't finished rtl generation, there might be a nonlocal label
12839 that we haven't seen yet.
12840 ??? get_hard_reg_initial_val fails if it is called after register
12841 allocation has started, unless it has been called before for the
12842 same register. And even then, we end in trouble if we didn't use
12843 the register in the same basic block before. So call
12844 get_hard_reg_initial_val now and wrap it in an unspec if we might
12845 need to replace it. */
12846 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12847 combine can put the pseudo returned by get_hard_reg_initial_val into
12848 instructions that need a general purpose registers, which will fail to
12849 be recognized when the pseudo becomes allocated to PR. */
12851 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12852 if (TARGET_SH1)
12853 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12854 return val;
12857 bool
12858 sh_expand_t_scc (rtx operands[])
12860 enum rtx_code code = GET_CODE (operands[1]);
12861 rtx target = operands[0];
12862 rtx op0 = operands[2];
12863 rtx op1 = operands[3];
12864 rtx result = target;
12865 HOST_WIDE_INT val;
12867 if (!REG_P (op0) || REGNO (op0) != T_REG
12868 || !CONST_INT_P (op1))
12869 return false;
12870 if (!REG_P (result))
12871 result = gen_reg_rtx (SImode);
12872 val = INTVAL (op1);
12873 if ((code == EQ && val == 1) || (code == NE && val == 0))
12874 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12875 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12876 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12877 else if (code == EQ || code == NE)
12878 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12879 else
12880 return false;
12881 if (result != target)
12882 emit_move_insn (target, result);
12883 return true;
12886 /* INSN is an sfunc; return the rtx that describes the address used. */
12887 static rtx
12888 extract_sfunc_addr (rtx insn)
12890 rtx pattern, part = NULL_RTX;
12891 int len, i;
12893 pattern = PATTERN (insn);
12894 len = XVECLEN (pattern, 0);
12895 for (i = 0; i < len; i++)
12897 part = XVECEXP (pattern, 0, i);
12898 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12899 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12900 return XEXP (part, 0);
12902 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12903 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12906 /* Verify that the register in use_sfunc_addr still agrees with the address
12907 used in the sfunc. This prevents fill_slots_from_thread from changing
12908 use_sfunc_addr.
12909 INSN is the use_sfunc_addr instruction, and REG is the register it
12910 guards. */
12911 bool
12912 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12914 /* Search for the sfunc. It should really come right after INSN. */
12915 while ((insn = NEXT_INSN (insn)))
12917 if (LABEL_P (insn) || JUMP_P (insn))
12918 break;
12919 if (! INSN_P (insn))
12920 continue;
12922 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12923 insn = seq->insn (0);
12924 if (GET_CODE (PATTERN (insn)) != PARALLEL
12925 || get_attr_type (insn) != TYPE_SFUNC)
12926 continue;
12927 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12929 gcc_unreachable ();
12932 /* This function returns a constant rtx that represents 2**15 / pi in
12933 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12934 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12935 static GTY(()) rtx sh_fsca_sf2int_rtx;
12938 sh_fsca_sf2int (void)
12940 if (! sh_fsca_sf2int_rtx)
12942 REAL_VALUE_TYPE rv;
12944 real_from_string (&rv, "10430.378350470453");
12945 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12948 return sh_fsca_sf2int_rtx;
12951 /* This function returns a constant rtx that represents pi / 2**15 in
12952 SFmode. It's used to scale SFmode angles, in radians, to a
12953 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12954 maps to 0x10000. */
12955 static GTY(()) rtx sh_fsca_int2sf_rtx;
12958 sh_fsca_int2sf (void)
12960 if (! sh_fsca_int2sf_rtx)
12962 REAL_VALUE_TYPE rv;
12964 real_from_string (&rv, "9.587379924285257e-5");
12965 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12968 return sh_fsca_int2sf_rtx;
12971 /* Initialize the CUMULATIVE_ARGS structure. */
12972 void
12973 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12974 tree fntype,
12975 rtx libname ATTRIBUTE_UNUSED,
12976 tree fndecl,
12977 signed int n_named_args,
12978 machine_mode mode)
12980 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12981 pcum->free_single_fp_reg = 0;
12982 pcum->stack_regs = 0;
12983 pcum->byref_regs = 0;
12984 pcum->byref = 0;
12985 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12987 /* XXX - Should we check TARGET_HITACHI here ??? */
12988 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12990 if (fntype)
12992 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12993 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12994 pcum->prototype_p = prototype_p (fntype);
12995 pcum->arg_count [(int) SH_ARG_INT]
12996 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12998 pcum->call_cookie
12999 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
13000 && pcum->arg_count [(int) SH_ARG_INT] == 0
13001 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
13002 ? int_size_in_bytes (TREE_TYPE (fntype))
13003 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
13004 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
13005 == FIRST_RET_REG));
13007 else
13009 pcum->arg_count [(int) SH_ARG_INT] = 0;
13010 pcum->prototype_p = FALSE;
13011 if (mode != VOIDmode)
13013 pcum->call_cookie =
13014 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
13015 && GET_MODE_SIZE (mode) > 4
13016 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
13018 /* If the default ABI is the Renesas ABI then all library
13019 calls must assume that the library will be using the
13020 Renesas ABI. So if the function would return its result
13021 in memory then we must force the address of this memory
13022 block onto the stack. Ideally we would like to call
13023 targetm.calls.return_in_memory() here but we do not have
13024 the TYPE or the FNDECL available so we synthesize the
13025 contents of that function as best we can. */
13026 pcum->force_mem =
13027 (TARGET_DEFAULT & MASK_HITACHI)
13028 && (mode == BLKmode
13029 || (GET_MODE_SIZE (mode) > 4
13030 && !(mode == DFmode
13031 && TARGET_FPU_DOUBLE)));
13033 else
13035 pcum->call_cookie = 0;
13036 pcum->force_mem = FALSE;
13042 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
13044 enum rtx_code code = TRUNCATE;
13046 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
13048 rtx inner = XEXP (x, 0);
13049 machine_mode inner_mode = GET_MODE (inner);
13051 if (inner_mode == mode)
13052 return inner;
13053 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
13054 x = inner;
13055 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
13056 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
13058 code = GET_CODE (x);
13059 x = inner;
13062 return gen_rtx_fmt_e (code, mode, x);
13065 /* Look through X cleaning up truncates of registers that span multiple
13066 actual hard registers. Return the number of changes made. */
13068 shmedia_cleanup_truncate (rtx x)
13070 int n_changes = 0;
13071 subrtx_var_iterator::array_type array;
13072 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
13074 rtx x = *iter;
13075 if (GET_CODE (x) == TRUNCATE)
13077 rtx reg = XEXP (x, 0);
13078 machine_mode reg_mode = GET_MODE (reg);
13079 if (REG_P (reg) && GET_MODE_SIZE (reg_mode) > 8)
13081 int offset = subreg_lowpart_offset (DImode, reg_mode);
13082 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, offset);
13083 n_changes += 1;
13084 iter.skip_subrtxes ();
13088 return n_changes;
13091 /* Load and store depend on the highpart of the address. However,
13092 set_attr_alternative does not give well-defined results before reload,
13093 so we must look at the rtl ourselves to see if any of the feeding
13094 registers is used in a memref.
13096 Return true iff INSN contains a MEM. */
13097 bool
13098 sh_contains_memref_p (rtx insn)
13100 subrtx_iterator::array_type array;
13101 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13102 if (MEM_P (*iter))
13103 return true;
13104 return false;
13107 /* Return true iff INSN loads a banked register. */
13108 bool
13109 sh_loads_bankedreg_p (rtx insn)
13111 if (GET_CODE (PATTERN (insn)) == SET)
13113 rtx op = SET_DEST (PATTERN(insn));
13114 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13115 return true;
13118 return false;
13121 /* FNADDR is the MEM expression from a call expander. Return an address
13122 to use in an SHmedia insn pattern. */
13124 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13126 int is_sym;
13128 fnaddr = XEXP (fnaddr, 0);
13129 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13130 if (flag_pic && is_sym)
13132 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13134 rtx reg = gen_reg_rtx (Pmode);
13136 /* We must not use GOTPLT for sibcalls, because PIC_REG
13137 must be restored before the PLT code gets to run. */
13138 if (is_sibcall)
13139 emit_insn (gen_symGOT2reg (reg, fnaddr));
13140 else
13141 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13142 fnaddr = reg;
13144 else
13146 fnaddr = gen_sym2PIC (fnaddr);
13147 PUT_MODE (fnaddr, Pmode);
13150 /* If ptabs might trap, make this visible to the rest of the compiler.
13151 We generally assume that symbols pertain to valid locations, but
13152 it is possible to generate invalid symbols with asm or linker tricks.
13153 In a list of functions where each returns its successor, an invalid
13154 symbol might denote an empty list. */
13155 if (!TARGET_PT_FIXED
13156 && (!is_sym || TARGET_INVALID_SYMBOLS)
13157 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13159 rtx tr = gen_reg_rtx (PDImode);
13161 emit_insn (gen_ptabs (tr, fnaddr));
13162 fnaddr = tr;
13164 else if (! target_reg_operand (fnaddr, Pmode))
13165 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13166 return fnaddr;
13169 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13170 static reg_class_t
13171 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13173 if (rclass == NO_REGS
13174 && TARGET_SHMEDIA
13175 && (CONST_DOUBLE_P (x)
13176 || GET_CODE (x) == SYMBOL_REF
13177 || PIC_ADDR_P (x)))
13178 return GENERAL_REGS;
13180 return rclass;
13183 /* Implement TARGET_SECONDARY_RELOAD. */
13184 static reg_class_t
13185 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13186 machine_mode mode, secondary_reload_info *sri)
13188 enum reg_class rclass = (enum reg_class) rclass_i;
13190 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13191 && REG_P (XEXP (XEXP (x, 0), 0))
13192 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13193 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13195 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13196 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13198 if (REG_P (x) && REGNO (x) == GBR_REG)
13199 return NO_REGS;
13201 if (in_p)
13203 if (REGCLASS_HAS_FP_REG (rclass)
13204 && ! TARGET_SHMEDIA
13205 && immediate_operand ((x), mode)
13206 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
13207 switch (mode)
13209 case SFmode:
13210 sri->icode = CODE_FOR_reload_insf__frn;
13211 return NO_REGS;
13212 case DFmode:
13213 sri->icode = CODE_FOR_reload_indf__frn;
13214 return NO_REGS;
13215 case SImode:
13216 /* ??? If we knew that we are in the appropriate mode -
13217 single precision - we could use a reload pattern directly. */
13218 return FPUL_REGS;
13219 default:
13220 abort ();
13222 if (rclass == FPUL_REGS
13223 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13224 || REGNO (x) == T_REG))
13225 || GET_CODE (x) == PLUS))
13226 return GENERAL_REGS;
13227 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13229 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13230 return GENERAL_REGS;
13231 else if (mode == SFmode)
13232 return FP_REGS;
13233 sri->icode = CODE_FOR_reload_insi__i_fpul;
13234 return NO_REGS;
13236 if (rclass == FPSCR_REGS
13237 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13238 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13239 return GENERAL_REGS;
13240 if (REGCLASS_HAS_FP_REG (rclass)
13241 && TARGET_SHMEDIA
13242 && immediate_operand (x, mode)
13243 && x != CONST0_RTX (GET_MODE (x))
13244 && GET_MODE (x) != V4SFmode)
13245 return GENERAL_REGS;
13246 if ((mode == QImode || mode == HImode)
13247 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13249 sri->icode = ((mode == QImode)
13250 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13251 return NO_REGS;
13253 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13254 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13255 return TARGET_REGS;
13256 } /* end of input-only processing. */
13258 if (((REGCLASS_HAS_FP_REG (rclass)
13259 && (REG_P (x)
13260 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13261 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13262 && TARGET_FMOVD))))
13263 || (REGCLASS_HAS_GENERAL_REG (rclass)
13264 && REG_P (x)
13265 && FP_REGISTER_P (REGNO (x))))
13266 && ! TARGET_SHMEDIA
13267 && (mode == SFmode || mode == SImode))
13268 return FPUL_REGS;
13269 if ((rclass == FPUL_REGS
13270 || (REGCLASS_HAS_FP_REG (rclass)
13271 && ! TARGET_SHMEDIA && mode == SImode))
13272 && (MEM_P (x)
13273 || (REG_P (x)
13274 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13275 || REGNO (x) == T_REG
13276 || system_reg_operand (x, VOIDmode)))))
13278 if (rclass == FPUL_REGS)
13279 return GENERAL_REGS;
13280 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
13282 if ((rclass == TARGET_REGS
13283 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13284 && !satisfies_constraint_Csy (x)
13285 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13286 return GENERAL_REGS;
13287 if ((rclass == MAC_REGS || rclass == PR_REGS)
13288 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13289 && rclass != REGNO_REG_CLASS (REGNO (x)))
13290 return GENERAL_REGS;
13291 if (rclass != GENERAL_REGS && REG_P (x)
13292 && TARGET_REGISTER_P (REGNO (x)))
13293 return GENERAL_REGS;
13295 /* If here fall back to loading FPUL register through general registers.
13296 This case can happen when movsi_ie insn is picked initially to
13297 load/store the FPUL register from/to another register, and then the
13298 other register is allocated on the stack. */
13299 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13300 return GENERAL_REGS;
13302 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13303 the other operand.
13304 On SH2A could also just leave it alone here, which would result in a
13305 4 byte move insn being generated instead. However, for this to work
13306 the insns must have the appropriate alternatives. */
13307 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13308 && satisfies_constraint_Sdd (x)
13309 && sh_disp_addr_displacement (x)
13310 <= sh_max_mov_insn_displacement (mode, false))
13311 return R0_REGS;
13313 /* When reload is trying to address a QImode or HImode subreg on the stack,
13314 force any subreg byte into R0_REGS, as this is going to become a
13315 displacement address.
13316 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13317 is on the stack, the memref to it might already require a displacement
13318 and that has to be added to the final address. At this point we don't
13319 know the cumulative displacement so we assume the worst case. */
13320 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13321 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13322 return R0_REGS;
13324 return NO_REGS;
13327 /* Return true if SUBST can't safely replace its equivalent during RA. */
13328 static bool
13329 sh_cannot_substitute_mem_equiv_p (rtx)
13331 if (TARGET_SHMEDIA)
13332 return false;
13334 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
13335 uses R0 and may cause spill failure when R0 is already used.
13336 We have to return true for that case at least.
13337 Moreover SH has strong R0 parity and also have not enough numbers of
13338 the hard registers to make the equiv substitution win in the size
13339 and the speed on average working sets. The pseudos produced to
13340 hold the equiv values can't get good hard registers for bad cases
13341 and end up memory save/restore insns which make the code worse. */
13342 return true;
13345 /* Return true if DISP can be legitimized. */
13346 static bool
13347 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
13348 machine_mode mode)
13350 if (TARGET_SHMEDIA)
13351 return false;
13353 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
13354 || (TARGET_SH2E && mode == SFmode))
13355 return false;
13357 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
13358 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
13360 *disp = adj.mov_disp;
13361 *offs = adj.offset_adjust;
13362 return true;
13365 return false;
13368 /* Return true if movsf insn should be splited with an additional
13369 register. */
13370 bool
13371 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
13373 /* op0 == op1 */
13374 if (rtx_equal_p (op0, op1))
13375 return true;
13376 /* fy, FQ, reg */
13377 if (GET_CODE (op1) == CONST_DOUBLE
13378 && ! satisfies_constraint_G (op1)
13379 && ! satisfies_constraint_H (op1)
13380 && REG_P (op0)
13381 && REG_P (op2))
13382 return true;
13383 /* f, r, y */
13384 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
13385 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
13386 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13387 return true;
13388 /* r, f, y */
13389 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
13390 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
13391 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13392 return true;
13394 return false;
13397 static void
13398 sh_conditional_register_usage (void)
13400 int regno;
13401 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13402 if (! VALID_REGISTER_P (regno))
13403 fixed_regs[regno] = call_used_regs[regno] = 1;
13404 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13405 if (TARGET_SH5)
13407 call_used_regs[FIRST_GENERAL_REG + 8]
13408 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13409 call_really_used_regs[FIRST_GENERAL_REG + 8]
13410 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13412 if (TARGET_SHMEDIA)
13414 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13415 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13416 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13418 if (flag_pic)
13420 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13421 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13423 /* Renesas saves and restores mac registers on call. */
13424 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13426 call_really_used_regs[MACH_REG] = 0;
13427 call_really_used_regs[MACL_REG] = 0;
13430 if (TARGET_SHMEDIA)
13432 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13433 if (! fixed_regs[regno] && call_really_used_regs[regno])
13434 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13436 else
13437 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13438 if (! fixed_regs[regno] && call_really_used_regs[regno])
13439 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13441 call_really_used_regs[FPSCR_MODES_REG] = 0;
13442 call_really_used_regs[FPSCR_STAT_REG] = 0;
13445 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13447 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13448 static bool
13449 sh_legitimate_constant_p (machine_mode mode, rtx x)
13451 return (TARGET_SHMEDIA
13452 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13453 || x == CONST0_RTX (mode)
13454 || !TARGET_SHMEDIA_FPU
13455 || TARGET_SHMEDIA64)
13456 : (GET_CODE (x) != CONST_DOUBLE
13457 || mode == DFmode || mode == SFmode
13458 || mode == DImode || GET_MODE (x) == VOIDmode));
13461 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13463 static void
13464 sh_init_sync_libfuncs (void)
13466 init_sync_libfuncs (UNITS_PER_WORD);
13469 /* Return true if it is appropriate to emit `ret' instructions in the
13470 body of a function. */
13471 bool
13472 sh_can_use_simple_return_p (void)
13474 HARD_REG_SET live_regs_mask;
13475 int d;
13477 /* Some targets require special return insns. */
13478 if (TARGET_SHMEDIA
13479 || (TARGET_SHCOMPACT
13480 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13481 return false;
13483 if (! reload_completed || frame_pointer_needed)
13484 return false;
13486 /* Moving prologue around does't reduce the size. */
13487 if (optimize_function_for_size_p (cfun))
13488 return false;
13490 /* Finally, allow for pr save. */
13491 d = calc_live_regs (&live_regs_mask);
13493 if (rounded_frame_size (d) > 4)
13494 return false;
13496 return true;
13499 /*------------------------------------------------------------------------------
13500 Address mode optimization support code
13503 typedef HOST_WIDE_INT disp_t;
13504 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13505 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13506 static const disp_t INVALID_DISP = MAX_DISP;
13508 /* A memory reference which is described by a base register and a
13509 displacement. */
13510 class base_reg_disp
13512 public:
13513 base_reg_disp (rtx br, disp_t d);
13515 bool is_reg (void) const;
13516 bool is_disp (void) const;
13517 rtx reg (void) const;
13518 disp_t disp (void) const;
13520 private:
13521 rtx reg_;
13522 disp_t disp_;
13525 inline
13526 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13527 : reg_ (br), disp_ (d)
13531 inline bool
13532 base_reg_disp::is_reg (void) const
13534 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13537 inline bool
13538 base_reg_disp::is_disp (void) const
13540 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13543 inline rtx
13544 base_reg_disp::reg (void) const
13546 return reg_;
13549 inline disp_t
13550 base_reg_disp::disp (void) const
13552 return disp_;
13555 /* Find the base register and calculate the displacement for a given
13556 address rtx 'x'. */
13557 static base_reg_disp
13558 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
13559 rtx base_reg = NULL)
13561 if (REG_P (x))
13563 if (REGNO (x) == GBR_REG)
13564 return base_reg_disp (x, disp);
13566 /* We've reached a hard-reg. This is probably the point where
13567 function args are copied to pseudos. Do not go any further and
13568 stick to the pseudo. If the original mem addr was in a hard reg
13569 from the beginning, it will become the base reg. */
13570 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13571 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13573 /* Find the def of the reg and trace it. If there are more than one
13574 defs and they are not the same, assume it's not safe to proceed. */
13575 rtx_insn* last_i = NULL;
13576 rtx last_set = NULL;
13577 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
13578 d = DF_REF_NEXT_REG (d))
13580 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
13582 /* Accept multiple defs, as long as they are equal. */
13583 if (last_set == NULL || rtx_equal_p (last_set, set))
13585 last_i = DF_REF_INSN (d);
13586 last_set = set;
13588 else
13590 last_i = NULL;
13591 last_set = NULL;
13592 break;
13596 if (last_set != NULL && last_i != NULL)
13597 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
13598 XEXP (last_set, 0));
13600 /* When here, no previous insn was found that sets the reg.
13601 The input reg is already the base reg. */
13602 return base_reg_disp (x, disp);
13605 else if (GET_CODE (x) == PLUS)
13607 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13608 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13610 /* Either left or right val must be a reg.
13611 We don't handle the case of 'reg + reg' here. */
13612 if (left_val.is_reg () && right_val.is_disp ())
13613 return base_reg_disp (left_val.reg (), left_val.disp ()
13614 + right_val.disp () + disp);
13615 else if (right_val.is_reg () && left_val.is_disp ())
13616 return base_reg_disp (right_val.reg (), right_val.disp ()
13617 + left_val.disp () + disp);
13618 else
13619 return base_reg_disp (base_reg, disp);
13622 else if (CONST_INT_P (x))
13623 return base_reg_disp (NULL, disp + INTVAL (x));
13625 /* Didn't find anything useful. */
13626 return base_reg_disp (base_reg, disp);
13629 /* Given an insn and a memory operand, try to find an equivalent GBR
13630 based memory address and return the corresponding new memory address.
13631 Return NULL_RTX if not found. */
13633 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
13635 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
13636 return NULL_RTX;
13638 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13639 if (side_effects_p (XEXP (mem, 0)))
13640 return NULL_RTX;
13642 /* When not optimizing there might be no dataflow available. */
13643 if (df == NULL)
13644 return NULL_RTX;
13646 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13648 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13650 /* If GBR is marked as call clobbered we bail out if we see a call.
13651 FIXME: Actually should check if this mem refers to the gbr value
13652 before or after the call. If there is a store_gbr preceeding this
13653 mem, it's safe to use GBR for this mem.
13655 If GBR is not marked as call clobbered, but there is some other
13656 def than a call, it's probably a load_gbr upon which we also
13657 bail out to be on the safe side.
13658 FIXME: Should check if we have a use-after-def case, such as
13659 the call case above. */
13660 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
13661 d = DF_REF_NEXT_REG (d))
13663 if (CALL_P (DF_REF_INSN (d)))
13665 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
13666 return NULL_RTX;
13667 else
13668 continue;
13670 else
13671 return NULL_RTX;
13674 rtx disp = GEN_INT (gbr_disp.disp ());
13675 if (gbr_displacement (disp, GET_MODE (mem)))
13676 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13679 return NULL_RTX;
13682 /*------------------------------------------------------------------------------
13683 Manual insn combine support code.
13686 /* Return true if the specified insn contains any UNSPECs or
13687 UNSPEC_VOLATILEs. */
13688 static bool
13689 sh_unspec_insn_p (rtx x)
13691 subrtx_iterator::array_type array;
13692 FOR_EACH_SUBRTX (i, array, x, ALL)
13693 if (*i != NULL
13694 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
13695 return true;
13697 return false;
13700 /* Return true if the register operands of the specified insn are modified
13701 between the specified from and to insns (exclusive of those two). */
13702 bool
13703 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
13704 const rtx_insn* from,
13705 const rtx_insn* to)
13707 /* FIXME: Return true for multiple sets for now. */
13708 rtx s = single_set (operands_insn);
13709 if (s == NULL_RTX)
13710 return true;
13712 subrtx_iterator::array_type array;
13713 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
13714 if (*i != NULL &&
13715 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
13716 return true;
13718 return false;
13721 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
13722 negates the T bit and stores the result in the T bit. */
13723 bool
13724 sh_is_nott_insn (const rtx_insn* i)
13726 return i != NULL && GET_CODE (PATTERN (i)) == SET
13727 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
13728 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
13732 sh_movt_set_dest (const rtx_insn* i)
13734 if (i == NULL)
13735 return NULL;
13737 const_rtx p = PATTERN (i);
13738 return GET_CODE (p) == SET
13739 && arith_reg_dest (XEXP (p, 0), SImode)
13740 && t_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13743 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
13744 that stores the negated T bit in a register, and return the destination
13745 register rtx, or null. */
13747 sh_movrt_set_dest (const rtx_insn* i)
13749 if (i == NULL)
13750 return NULL;
13752 const_rtx p = PATTERN (i);
13754 /* The negc movrt replacement is inside a parallel. */
13755 if (GET_CODE (p) == PARALLEL)
13756 p = XVECEXP (p, 0, 0);
13758 return GET_CODE (p) == SET
13759 && arith_reg_dest (XEXP (p, 0), SImode)
13760 && negt_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13763 /* Given an insn and a reg number, tell whether the reg dies or is unused
13764 after the insn. */
13765 bool
13766 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
13768 return find_regno_note (i, REG_DEAD, regno) != NULL
13769 || find_regno_note (i, REG_UNUSED, regno) != NULL;
13772 /* Given an insn and a reg number, remove reg dead or reg unused notes to
13773 mark it as being used after the insn. */
13774 void
13775 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
13777 if (rtx n = find_regno_note (i, REG_DEAD, regno))
13778 remove_note (i, n);
13779 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
13780 remove_note (i, n);
13783 /* Given an insn check if it contains any post/pre inc/dec mem operands and
13784 add the REG_INC notes accordingly.
13785 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
13786 FIXME: This function is currently used by peephole2 patterns because
13787 the peephole2 pass does not preserve REG_INC notes. If the notes
13788 are dropped the following passes will do wrong things. */
13789 rtx_insn*
13790 sh_check_add_incdec_notes (rtx_insn* i)
13792 struct for_each_inc_dec_clb
13794 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
13795 rtx dest, rtx src ATTRIBUTE_UNUSED,
13796 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
13798 gcc_assert (REG_P (dest));
13800 rtx_insn* i = (rtx_insn*)arg;
13801 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
13802 add_reg_note (i, REG_INC, dest);
13804 return 0;
13808 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
13809 return i;
13812 /* Given an op rtx and an insn, try to find out whether the result of the
13813 specified op consists only of logical operations on T bit stores. */
13814 bool
13815 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
13817 if (!logical_operator (op, SImode))
13818 return false;
13820 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13821 int op_is_t_count = 0;
13823 for (int i = 0; i < 2; ++i)
13825 if (t_reg_operand (ops[i], VOIDmode)
13826 || negt_reg_operand (ops[i], VOIDmode))
13827 op_is_t_count++;
13829 else
13831 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13832 prev_nonnote_insn_bb);
13833 if (op_set.set_src == NULL_RTX)
13834 continue;
13836 if (t_reg_operand (op_set.set_src, VOIDmode)
13837 || negt_reg_operand (op_set.set_src, VOIDmode)
13838 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13839 op_is_t_count++;
13843 return op_is_t_count == 2;
13846 /* Given the operand that is extended in a sign/zero extend insn, and the
13847 insn, try to figure out whether the sign/zero extension can be replaced
13848 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13849 NULL_RTX otherwise. */
13851 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
13853 if (REG_P (extended_op))
13854 extended_op = extended_op;
13855 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13856 extended_op = SUBREG_REG (extended_op);
13857 else
13858 return NULL_RTX;
13860 /* Reg moves must be of the same mode. */
13861 if (GET_MODE (extended_op) != SImode)
13862 return NULL_RTX;
13864 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13865 if (s.set_src == NULL_RTX)
13866 return NULL_RTX;
13868 if (t_reg_operand (s.set_src, VOIDmode)
13869 || negt_reg_operand (s.set_src, VOIDmode))
13870 return extended_op;
13872 /* If the zero extended reg was formed by a logical operation, check the
13873 operands of the logical operation. If both originated from T bit
13874 stores the zero extension can be eliminated. */
13875 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13876 return extended_op;
13878 return NULL_RTX;
13881 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
13882 figure out whether it should be converted into a movt-xor sequence in
13883 the movrt_negc splitter.
13884 Returns true if insns have been modified and the splitter has succeeded. */
13885 bool
13886 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
13888 /* In cases such as
13889 tst r4,r4
13890 mov #-1,r1
13891 negc r1,r1
13892 tst r4,r4
13893 we can replace the T bit clobbering negc with a movt-xor sequence and
13894 eliminate the redundant comparison.
13895 Because the xor insn depends on register allocation results, allow this
13896 only before reload. */
13897 if (!can_create_pseudo_p ())
13898 return false;
13900 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13901 prev_nonnote_insn_bb);
13902 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13903 next_nonnote_insn_bb);
13905 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
13906 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
13907 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
13908 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
13909 t_before_negc.insn,
13910 t_after_negc.insn)
13911 && !sh_unspec_insn_p (t_after_negc.insn)
13912 && !volatile_insn_p (PATTERN (t_after_negc.insn))
13913 && !side_effects_p (PATTERN (t_after_negc.insn))
13914 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
13916 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
13917 set_insn_deleted (t_after_negc.insn);
13918 return true;
13920 else
13921 return false;
13924 /* Given a reg and the current insn, see if the value of the reg originated
13925 from a sign or zero extension and return the discovered information. */
13926 sh_extending_set_of_reg
13927 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
13929 if (reg == NULL)
13930 return sh_extending_set_of_reg (curr_insn);
13932 if (SUBREG_P (reg))
13933 reg = SUBREG_REG (reg);
13935 if (!REG_P (reg))
13936 return sh_extending_set_of_reg (curr_insn);
13938 /* FIXME: Also search the predecessor basic blocks. It seems that checking
13939 only the adjacent predecessor blocks would cover most of the cases.
13940 Also try to look through the first extension that we hit. There are some
13941 cases, where a zero_extend is followed an (implicit) sign_extend, and it
13942 fails to see the sign_extend. */
13943 sh_extending_set_of_reg result =
13944 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
13946 if (result.set_src != NULL)
13948 if (GET_CODE (result.set_src) == SIGN_EXTEND
13949 || GET_CODE (result.set_src) == ZERO_EXTEND)
13951 if (dump_file)
13952 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13953 "explicitly sign/zero extended in insn %d\n",
13954 REGNO (reg), INSN_UID (result.insn));
13955 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
13956 result.ext_code = GET_CODE (result.set_src);
13958 else if (MEM_P (result.set_src)
13959 && (GET_MODE (result.set_src) == QImode
13960 || GET_MODE (result.set_src) == HImode)
13961 && !sh_unspec_insn_p (result.insn))
13963 /* On SH QIHImode memory loads always sign extend. However, in
13964 some cases where it seems that the higher bits are not
13965 interesting, the loads will not be expanded as sign extending
13966 insns, but as QIHImode loads into QIHImode regs. We report that
13967 the reg has been sign extended by the mem load. When it is used
13968 as such, we must convert the mem load into a sign extending insn,
13969 see also sh_extending_set_of_reg::use_as_extended_reg. */
13970 if (dump_file)
13971 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13972 "implicitly sign extended in insn %d\n",
13973 REGNO (reg), INSN_UID (result.insn));
13974 result.from_mode = GET_MODE (result.set_src);
13975 result.ext_code = SIGN_EXTEND;
13979 return result;
13982 /* Given a reg that is known to be sign or zero extended at some insn,
13983 take the appropriate measures so that the extended value can be used as
13984 a reg at the specified insn and return the resulting reg rtx. */
13986 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
13988 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
13989 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
13990 gcc_assert (from_mode == QImode || from_mode == HImode);
13992 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
13994 if (dump_file)
13995 fprintf (dump_file,
13996 "use_as_extended_reg: converting non-extending mem load in "
13997 "insn %d into sign-extending load\n", INSN_UID (insn));
13999 rtx r = gen_reg_rtx (SImode);
14000 rtx_insn* i0;
14001 if (from_mode == QImode)
14002 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
14003 else if (from_mode == HImode)
14004 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
14005 else
14006 gcc_unreachable ();
14008 emit_insn_after (
14009 gen_move_insn (XEXP (set_rtx, 0),
14010 gen_lowpart (GET_MODE (set_src), r)), i0);
14011 set_insn_deleted (insn);
14012 return r;
14014 else
14016 rtx extension_dst = XEXP (set_rtx, 0);
14017 if (modified_between_p (extension_dst, insn, use_at_insn))
14019 if (dump_file)
14020 fprintf (dump_file,
14021 "use_as_extended_reg: dest reg %d of extending insn %d is "
14022 "modified, inserting a reg-reg copy\n",
14023 REGNO (extension_dst), INSN_UID (insn));
14025 rtx r = gen_reg_rtx (SImode);
14026 emit_insn_after (gen_move_insn (r, extension_dst), insn);
14027 return r;
14029 else
14031 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
14032 return extension_dst;
14037 bool
14038 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
14040 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
14041 && (from_mode == QImode || from_mode == HImode)
14042 && set_src != NULL)
14043 return arith_reg_operand (XEXP (set_src, 0), from_mode);
14044 else
14045 return false;
14049 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
14051 gcc_assert (can_use_as_unextended_reg ());
14053 rtx r = XEXP (set_src, 0);
14054 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
14056 if (modified_between_p (r, insn, use_at_insn))
14058 rtx r1 = gen_reg_rtx (SImode);
14059 emit_insn_after (gen_move_insn (r1, r0), insn);
14060 return r1;
14062 else
14064 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
14065 ? REGNO (SUBREG_REG (r))
14066 : REGNO (r));
14067 return r0;
14071 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
14072 perform the necessary checks on the operands and split it accordingly. */
14073 void
14074 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
14075 int subreg_offset, rtx operands[])
14077 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
14079 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
14080 curr_insn);
14081 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
14082 curr_insn);
14084 /* If one of the operands is known to be zero extended, that's already
14085 sufficient to mask out the unwanted high bits. */
14086 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
14088 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14089 operands[1]));
14090 return;
14092 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
14094 emit_insn (gen_tstsi_t (operands[0],
14095 eop1.use_as_extended_reg (curr_insn)));
14096 return;
14099 /* None of the operands seem to be zero extended.
14100 If both are sign extended it's OK, too. */
14101 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
14102 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
14104 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14105 eop1.use_as_extended_reg (curr_insn)));
14106 return;
14109 /* Otherwise we have to insert a zero extension on one of the operands to
14110 mask out the unwanted high bits.
14111 Prefer the operand that has no known extension. */
14112 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
14113 std::swap (operands[0], operands[1]);
14115 rtx tmp0 = gen_reg_rtx (SImode);
14116 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
14117 GET_MODE (operands[0]), subreg_offset);
14118 emit_insn (subreg_mode == QImode
14119 ? gen_zero_extendqisi2 (tmp0, tmp1)
14120 : gen_zero_extendhisi2 (tmp0, tmp1));
14121 emit_insn (gen_tstsi_t (tmp0, operands[1]));
14124 /* A helper class to increment/decrement a counter variable each time a
14125 function is entered/left. */
14126 class scope_counter
14128 public:
14129 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
14131 ~scope_counter (void)
14133 --m_counter;
14134 gcc_assert (m_counter >= 0);
14137 int count (void) const { return m_counter; }
14139 private:
14140 int& m_counter;
14143 /* Given an rtx x, determine whether the expression can be used to create
14144 an insn that calulates x and stores the result in the T bit.
14145 This is used by the 'treg_set_expr' predicate to construct insns sequences
14146 where T bit results are fed into other insns, such as addc, subc, negc
14147 insns.
14149 FIXME: The patterns that expand 'treg_set_expr' operands tend to
14150 distinguish between 'positive' and 'negative' forms. For now this has to
14151 be done in the preparation code. We could also introduce
14152 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
14153 two different patterns for the 'postive' and 'negative' forms. However,
14154 the total amount of lines of code seems to be about the same and the
14155 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
14156 recog function would need to look inside the expression by temporarily
14157 splitting it. */
14158 static int sh_recog_treg_set_expr_reent_count = 0;
14160 bool
14161 sh_recog_treg_set_expr (rtx op, machine_mode mode)
14163 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
14165 /* Limit the recursion count to avoid nested expressions which we can't
14166 resolve to a single treg set insn. */
14167 if (recursion.count () > 1)
14168 return false;
14170 /* Early accept known possible operands before doing recog. */
14171 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode))
14172 return true;
14174 /* Early reject impossible operands before doing recog.
14175 There are some (set ((t) (subreg ...))) patterns, but we must be careful
14176 not to allow any invalid reg-reg or mem-reg moves, or else other passes
14177 such as lower-subreg will bail out. Some insns such as SH4A movua are
14178 done with UNSPEC, so must reject those, too, or else it would result
14179 in an invalid reg -> treg move. */
14180 if (register_operand (op, mode) || memory_operand (op, mode)
14181 || sh_unspec_insn_p (op))
14182 return false;
14184 if (!can_create_pseudo_p ())
14185 return false;
14187 /* We are going to invoke recog in a re-entrant way and thus
14188 have to capture its current state and restore it afterwards. */
14189 recog_data_d prev_recog_data = recog_data;
14191 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
14192 SET_PREV_INSN (i) = NULL;
14193 SET_NEXT_INSN (i) = NULL;
14195 int result = recog (PATTERN (i), i, 0);
14197 /* It seems there is no insn like that. Create a simple negated
14198 version and try again. If we hit a negated form, we'll allow that
14199 and append a nott sequence when splitting out the insns. Insns that
14200 do the split can then remove the trailing nott if they know how to
14201 deal with it. */
14202 if (result < 0 && GET_CODE (op) == EQ)
14204 PUT_CODE (op, NE);
14205 result = recog (PATTERN (i), i, 0);
14206 PUT_CODE (op, EQ);
14208 if (result < 0 && GET_CODE (op) == NE)
14210 PUT_CODE (op, EQ);
14211 result = recog (PATTERN (i), i, 0);
14212 PUT_CODE (op, NE);
14215 recog_data = prev_recog_data;
14216 return result >= 0;
14219 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
14220 This can be used as a condition for insn/split patterns to allow certain
14221 T bit setting patters only to be matched as sub expressions of other
14222 patterns. */
14223 bool
14224 sh_in_recog_treg_set_expr (void)
14226 return sh_recog_treg_set_expr_reent_count > 0;
14229 /* Given an rtx x, which is assumed to be some expression that has been
14230 matched by the 'treg_set_expr' predicate before, split and emit the
14231 insns that are necessary to calculate the expression and store the result
14232 in the T bit.
14233 The splitting is done recursively similar to 'try_split' in emit-rt.c.
14234 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
14235 'delete_insn' which then causes the DF parts to bail out, because we
14236 currently are inside another gen_split* function and would invoke
14237 'try_split' in a reentrant way. */
14238 static std::pair<rtx_insn*, rtx_insn*>
14239 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
14241 if (dump_file)
14243 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
14244 print_rtl_single (dump_file, i);
14245 fprintf (dump_file, "\n");
14248 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
14250 if (seq == NULL)
14251 return std::make_pair (i, i);
14253 /* Avoid infinite splitter loops if any insn of the result matches
14254 the original pattern. */
14255 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
14256 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
14257 return std::make_pair (i, i);
14259 unshare_all_rtl_in_chain (seq);
14261 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
14262 a linked list, replace the single insn with the new insns. */
14263 rtx_insn* seqlast = seq;
14264 while (NEXT_INSN (seqlast) != NULL)
14265 seqlast = NEXT_INSN (seqlast);
14267 if (rtx_insn* iprev = PREV_INSN (i))
14268 SET_NEXT_INSN (iprev) = seq;
14269 if (rtx_insn* inext = NEXT_INSN (i))
14270 SET_PREV_INSN (inext) = seqlast;
14272 SET_PREV_INSN (seq) = PREV_INSN (i);
14273 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
14275 SET_PREV_INSN (i) = NULL;
14276 SET_NEXT_INSN (i) = NULL;
14278 /* Recursively split all insns. */
14279 for (i = seq; ; i = NEXT_INSN (i))
14281 std::pair<rtx_insn*, rtx_insn*> ii =
14282 sh_try_split_insn_simple (i, curr_insn, n + 1);
14283 if (i == seq)
14284 seq = ii.first;
14285 if (i == seqlast)
14287 seqlast = ii.second;
14288 break;
14290 i = ii.first;
14293 return std::make_pair (seq, seqlast);
14296 sh_treg_insns
14297 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
14299 if (t_reg_operand (x, VOIDmode))
14300 return sh_treg_insns ();
14302 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
14304 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
14305 SET_PREV_INSN (i) = NULL;
14306 SET_NEXT_INSN (i) = NULL;
14308 if (dump_file)
14310 fprintf (dump_file, "split_treg_set_expr insn:\n");
14311 print_rtl (dump_file, i);
14312 fprintf (dump_file, "\n");
14315 /* We are going to invoke recog/split_insns in a re-entrant way and thus
14316 have to capture its current state and restore it afterwards. */
14317 recog_data_d prev_recog_data = recog_data;
14319 int insn_code = recog (PATTERN (i), i, 0);
14321 /* If the insn was not found, see if we matched the negated form before
14322 and append a nott. */
14323 bool append_nott = false;
14325 if (insn_code < 0 && GET_CODE (x) == EQ)
14327 PUT_CODE (x, NE);
14328 insn_code = recog (PATTERN (i), i, 0);
14329 if (insn_code >= 0)
14330 append_nott = true;
14331 else
14332 PUT_CODE (x, EQ);
14334 if (insn_code < 0 && GET_CODE (x) == NE)
14336 PUT_CODE (x, EQ);
14337 insn_code = recog (PATTERN (i), i, 0);
14338 if (insn_code >= 0)
14339 append_nott = true;
14340 else
14341 PUT_CODE (x, NE);
14344 gcc_assert (insn_code >= 0);
14346 /* Try to recursively split the insn. Some insns might refuse to split
14347 any further while we are in the treg_set_expr splitting phase. They
14348 will be emitted as part of the outer insn and then split again. */
14349 std::pair<rtx_insn*, rtx_insn*> insnlist =
14350 sh_try_split_insn_simple (i, curr_insn);
14352 /* Restore recog state. */
14353 recog_data = prev_recog_data;
14355 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
14356 ? insnlist.second
14357 : NULL;
14358 if (dump_file)
14360 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
14361 print_rtl (dump_file, insnlist.first);
14362 fprintf (dump_file, "\n");
14364 if (nott_insn != NULL)
14365 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
14368 emit_insn (insnlist.first);
14370 if (nott_insn != NULL && append_nott)
14372 if (dump_file)
14373 fprintf (dump_file, "removing trailing nott\n");
14374 remove_insn (nott_insn);
14375 nott_insn = NULL;
14376 append_nott = false;
14379 if (append_nott)
14380 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
14382 rtx_insn* first_insn = get_insns ();
14384 if (dump_file)
14386 fprintf (dump_file, "resulting insns:\n");
14387 print_rtl (dump_file, first_insn);
14388 fprintf (dump_file, "\n");
14391 return sh_treg_insns (first_insn, nott_insn);
14394 /*------------------------------------------------------------------------------
14395 Mode switching support code.
14398 static void
14399 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
14400 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14402 if ((TARGET_SH4A_FP || TARGET_SH4_300)
14403 && prev_mode != FP_MODE_NONE && prev_mode != mode)
14405 emit_insn (gen_toggle_pr ());
14406 if (TARGET_FMOVD)
14407 emit_insn (gen_toggle_sz ());
14409 else if (mode != FP_MODE_NONE)
14411 rtx tmp = gen_reg_rtx (SImode);
14412 emit_insn (gen_sts_fpscr (tmp));
14413 rtx i = NULL;
14415 const unsigned HOST_WIDE_INT fpbits =
14416 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
14418 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
14419 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14420 else if (mode == FP_MODE_SINGLE)
14421 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
14422 else if (mode == FP_MODE_DOUBLE)
14423 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14424 else
14425 gcc_unreachable ();
14427 emit_insn (i);
14428 emit_insn (gen_lds_fpscr (tmp));
14432 static int
14433 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
14435 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
14438 static int
14439 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
14441 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
14442 get_attr_fp_set (insn) != FP_SET_NONE)
14443 return (int) get_attr_fp_set (insn);
14444 else
14445 return mode;
14448 static int
14449 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
14451 return NORMAL_MODE (entity);
14454 static int
14455 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
14457 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
14460 static int
14461 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
14463 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
14466 /*------------------------------------------------------------------------------
14467 Misc
14470 /* Return true if we use LRA instead of reload pass. */
14471 static bool
14472 sh_lra_p (void)
14474 return sh_lra_flag;
14477 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14479 static bool
14480 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14481 unsigned int align,
14482 enum by_pieces_operation op,
14483 bool speed_p)
14485 switch (op)
14487 case MOVE_BY_PIECES:
14488 return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
14489 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14490 case STORE_BY_PIECES:
14491 case SET_BY_PIECES:
14492 return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
14493 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14494 default:
14495 return default_use_by_pieces_infrastructure_p (size, align,
14496 op, speed_p);
14500 #include "gt-sh.h"