gcc/
[official-gcc.git] / gcc / config / sh / sh.c
blob9bcb423c11dc4a8543374c51ad0e16397a876262
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2015 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
23 #include <vector>
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "insn-config.h"
30 #include "rtl.h"
31 #include "hash-set.h"
32 #include "machmode.h"
33 #include "vec.h"
34 #include "double-int.h"
35 #include "input.h"
36 #include "alias.h"
37 #include "symtab.h"
38 #include "wide-int.h"
39 #include "inchash.h"
40 #include "tree.h"
41 #include "fold-const.h"
42 #include "stringpool.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "flags.h"
47 #include "hashtab.h"
48 #include "hard-reg-set.h"
49 #include "function.h"
50 #include "statistics.h"
51 #include "real.h"
52 #include "fixed-value.h"
53 #include "expmed.h"
54 #include "dojump.h"
55 #include "explow.h"
56 #include "emit-rtl.h"
57 #include "stmt.h"
58 #include "expr.h"
59 #include "insn-codes.h"
60 #include "optabs.h"
61 #include "reload.h"
62 #include "regs.h"
63 #include "output.h"
64 #include "insn-attr.h"
65 #include "diagnostic-core.h"
66 #include "recog.h"
67 #include "dwarf2.h"
68 #include "tm_p.h"
69 #include "target.h"
70 #include "target-def.h"
71 #include "langhooks.h"
72 #include "predict.h"
73 #include "dominance.h"
74 #include "cfg.h"
75 #include "cfgrtl.h"
76 #include "cfganal.h"
77 #include "lcm.h"
78 #include "cfgbuild.h"
79 #include "cfgcleanup.h"
80 #include "basic-block.h"
81 #include "df.h"
82 #include "intl.h"
83 #include "sched-int.h"
84 #include "params.h"
85 #include "ggc.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "is-a.h"
93 #include "gimple.h"
94 #include "gimplify.h"
95 #include "cfgloop.h"
96 #include "alloc-pool.h"
97 #include "tm-constrs.h"
98 #include "opts.h"
99 #include "tree-pass.h"
100 #include "pass_manager.h"
101 #include "context.h"
102 #include "builtins.h"
103 #include "rtl-iter.h"
105 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
107 /* These are some macros to abstract register modes. */
108 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
109 && ((HOST_WIDE_INT)(VALUE)) <= 511)
111 #define CONST_OK_FOR_ADD(size) \
112 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
113 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
114 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
115 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
117 /* Used to simplify the logic below. Find the attributes wherever
118 they may be. */
119 #define SH_ATTRIBUTES(decl) \
120 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
121 : DECL_ATTRIBUTES (decl) \
122 ? (DECL_ATTRIBUTES (decl)) \
123 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
125 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
126 int current_function_interrupt;
128 tree sh_deferred_function_attributes;
129 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
131 /* Global variables for machine-dependent things. */
133 /* Which cpu are we scheduling for. */
134 enum processor_type sh_cpu;
136 /* Definitions used in ready queue reordering for first scheduling pass. */
138 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
139 static short *regmode_weight[2];
141 /* Total SFmode and SImode weights of scheduled insns. */
142 static int curr_regmode_pressure[2];
144 /* Number of r0 life regions. */
145 static int r0_life_regions;
147 /* If true, skip cycles for Q -> R movement. */
148 static int skip_cycles = 0;
150 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
151 and returned from sh_reorder2. */
152 static short cached_can_issue_more;
154 /* Unique number for UNSPEC_BBR pattern. */
155 static unsigned int unspec_bbr_uid = 1;
157 /* Provides the class number of the smallest class containing
158 reg number. */
159 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
161 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
165 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
166 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
167 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
168 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
169 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
170 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
171 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
172 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
173 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
174 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
175 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
176 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
177 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
178 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
179 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
180 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
181 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
182 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
183 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
184 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
185 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
186 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
187 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
188 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
189 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
190 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
191 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
192 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
193 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
194 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
195 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
196 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
197 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
198 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
199 GENERAL_REGS, GENERAL_REGS,
202 char sh_register_names[FIRST_PSEUDO_REGISTER] \
203 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
205 char sh_additional_register_names[ADDREGNAMES_SIZE] \
206 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
207 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
209 int assembler_dialect;
211 static bool shmedia_space_reserved_for_target_registers;
213 static void split_branches (rtx_insn *);
214 static int branch_dest (rtx);
215 static void print_slot (rtx_sequence *);
216 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
217 static void dump_table (rtx_insn *, rtx_insn *);
218 static bool broken_move (rtx_insn *);
219 static bool mova_p (rtx_insn *);
220 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
221 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
222 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
223 static void sh_reorg (void);
224 static void sh_option_override (void);
225 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
226 static rtx_insn *frame_insn (rtx);
227 static rtx push (int);
228 static void pop (int);
229 static void push_regs (HARD_REG_SET *, int);
230 static int calc_live_regs (HARD_REG_SET *);
231 static HOST_WIDE_INT rounded_frame_size (int);
232 static bool sh_frame_pointer_required (void);
233 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
234 static int sh_mode_needed (int, rtx_insn *);
235 static int sh_mode_after (int, int, rtx_insn *);
236 static int sh_mode_entry (int);
237 static int sh_mode_exit (int);
238 static int sh_mode_priority (int entity, int n);
239 static bool sh_lra_p (void);
241 static rtx mark_constant_pool_use (rtx);
242 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
243 int, bool *);
244 static tree sh_handle_resbank_handler_attribute (tree *, tree,
245 tree, int, bool *);
246 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
247 tree, int, bool *);
248 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
249 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
250 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
251 static void sh_print_operand (FILE *, rtx, int);
252 static void sh_print_operand_address (FILE *, rtx);
253 static bool sh_print_operand_punct_valid_p (unsigned char code);
254 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
255 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
256 static void sh_insert_attributes (tree, tree *);
257 static const char *sh_check_pch_target_flags (int);
258 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
259 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
260 static int sh_issue_rate (void);
261 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
262 static short find_set_regmode_weight (rtx, machine_mode);
263 static short find_insn_regmode_weight (rtx, machine_mode);
264 static void find_regmode_weight (basic_block, machine_mode);
265 static int find_r0_life_regions (basic_block);
266 static void sh_md_init_global (FILE *, int, int);
267 static void sh_md_finish_global (FILE *, int);
268 static int rank_for_reorder (const void *, const void *);
269 static void swap_reorder (rtx_insn **, int);
270 static void ready_reorder (rtx_insn **, int);
271 static bool high_pressure (machine_mode);
272 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
273 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
274 static void sh_md_init (FILE *, int, int);
275 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
277 static bool sh_function_ok_for_sibcall (tree, tree);
279 static bool sh_cannot_modify_jumps_p (void);
280 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
281 static reg_class_t sh_target_reg_class (void);
282 static bool sh_optimize_target_register_callee_saved (bool);
283 static bool sh_ms_bitfield_layout_p (const_tree);
285 static void sh_init_builtins (void);
286 static tree sh_builtin_decl (unsigned, bool);
287 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
288 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
289 HOST_WIDE_INT, tree);
290 static void sh_file_start (void);
291 static bool flow_dependent_p (rtx, rtx);
292 static void flow_dependent_p_1 (rtx, const_rtx, void *);
293 static int shiftcosts (rtx);
294 static int and_xor_ior_costs (rtx, int);
295 static int addsubcosts (rtx);
296 static int multcosts (rtx);
297 static bool unspec_caller_rtx_p (rtx);
298 static bool sh_cannot_copy_insn_p (rtx_insn *);
299 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
300 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
301 static int sh_pr_n_sets (void);
302 static rtx sh_allocate_initial_value (rtx);
303 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
304 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
305 machine_mode,
306 struct secondary_reload_info *);
307 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
308 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
309 static rtx sh_delegitimize_address (rtx);
310 static bool sh_cannot_substitute_mem_equiv_p (rtx);
311 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
312 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
313 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
314 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
315 static int scavenge_reg (HARD_REG_SET *s);
316 struct save_schedule_s;
317 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
318 struct save_schedule_s *, int);
320 static rtx sh_struct_value_rtx (tree, int);
321 static rtx sh_function_value (const_tree, const_tree, bool);
322 static bool sh_function_value_regno_p (const unsigned int);
323 static rtx sh_libcall_value (machine_mode, const_rtx);
324 static bool sh_return_in_memory (const_tree, const_tree);
325 static rtx sh_builtin_saveregs (void);
326 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
327 tree, int *, int);
328 static bool sh_strict_argument_naming (cumulative_args_t);
329 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
330 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
331 static tree sh_build_builtin_va_list (void);
332 static void sh_va_start (tree, rtx);
333 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
334 static bool sh_promote_prototypes (const_tree);
335 static machine_mode sh_promote_function_mode (const_tree type,
336 machine_mode,
337 int *punsignedp,
338 const_tree funtype,
339 int for_return);
340 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
341 const_tree, bool);
342 static bool sh_callee_copies (cumulative_args_t, machine_mode,
343 const_tree, bool);
344 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
345 tree, bool);
346 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
347 const_tree, bool);
348 static rtx sh_function_arg (cumulative_args_t, machine_mode,
349 const_tree, bool);
350 static bool sh_scalar_mode_supported_p (machine_mode);
351 static int sh_dwarf_calling_convention (const_tree);
352 static void sh_encode_section_info (tree, rtx, int);
353 static bool sh2a_function_vector_p (tree);
354 static void sh_trampoline_init (rtx, tree, rtx);
355 static rtx sh_trampoline_adjust_address (rtx);
356 static void sh_conditional_register_usage (void);
357 static bool sh_legitimate_constant_p (machine_mode, rtx);
358 static int mov_insn_size (machine_mode, bool);
359 static int mov_insn_alignment_mask (machine_mode, bool);
360 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
361 unsigned int,
362 enum by_pieces_operation,
363 bool);
364 static bool sequence_insn_p (rtx_insn *);
365 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
366 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
367 machine_mode, bool);
368 static bool sh_legitimate_combined_insn (rtx_insn* insn);
370 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
372 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
374 static const struct attribute_spec sh_attribute_table[] =
376 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
377 affects_type_identity } */
378 { "interrupt_handler", 0, 0, true, false, false,
379 sh_handle_interrupt_handler_attribute, false },
380 { "sp_switch", 1, 1, true, false, false,
381 sh_handle_sp_switch_attribute, false },
382 { "trap_exit", 1, 1, true, false, false,
383 sh_handle_trap_exit_attribute, false },
384 { "renesas", 0, 0, false, true, false,
385 sh_handle_renesas_attribute, false },
386 { "trapa_handler", 0, 0, true, false, false,
387 sh_handle_interrupt_handler_attribute, false },
388 { "nosave_low_regs", 0, 0, true, false, false,
389 sh_handle_interrupt_handler_attribute, false },
390 { "resbank", 0, 0, true, false, false,
391 sh_handle_resbank_handler_attribute, false },
392 { "function_vector", 1, 1, true, false, false,
393 sh2a_handle_function_vector_handler_attribute, false },
394 { NULL, 0, 0, false, false, false, NULL, false }
397 /* Initialize the GCC target structure. */
398 #undef TARGET_ATTRIBUTE_TABLE
399 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
401 /* The next two are used for debug info when compiling with -gdwarf. */
402 #undef TARGET_ASM_UNALIGNED_HI_OP
403 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
404 #undef TARGET_ASM_UNALIGNED_SI_OP
405 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
407 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
408 #undef TARGET_ASM_UNALIGNED_DI_OP
409 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
410 #undef TARGET_ASM_ALIGNED_DI_OP
411 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
413 #undef TARGET_OPTION_OVERRIDE
414 #define TARGET_OPTION_OVERRIDE sh_option_override
416 #undef TARGET_PRINT_OPERAND
417 #define TARGET_PRINT_OPERAND sh_print_operand
418 #undef TARGET_PRINT_OPERAND_ADDRESS
419 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
420 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
421 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
422 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
423 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
425 #undef TARGET_ASM_FUNCTION_EPILOGUE
426 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
428 #undef TARGET_ASM_OUTPUT_MI_THUNK
429 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
431 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
432 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
433 hook_bool_const_tree_hwi_hwi_const_tree_true
435 #undef TARGET_ASM_FILE_START
436 #define TARGET_ASM_FILE_START sh_file_start
437 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
438 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
440 #undef TARGET_REGISTER_MOVE_COST
441 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
443 #undef TARGET_INSERT_ATTRIBUTES
444 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
446 #undef TARGET_SCHED_ADJUST_COST
447 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
449 #undef TARGET_SCHED_ISSUE_RATE
450 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
452 /* The next 5 hooks have been implemented for reenabling sched1. With the
453 help of these macros we are limiting the movement of insns in sched1 to
454 reduce the register pressure. The overall idea is to keep count of SImode
455 and SFmode regs required by already scheduled insns. When these counts
456 cross some threshold values; give priority to insns that free registers.
457 The insn that frees registers is most likely to be the insn with lowest
458 LUID (original insn order); but such an insn might be there in the stalled
459 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
460 up to a max of 8 cycles so that such insns may move from Q -> R.
462 The description of the hooks are as below:
464 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
465 scheduler; it is called inside the sched_init function just after
466 find_insn_reg_weights function call. It is used to calculate the SImode
467 and SFmode weights of insns of basic blocks; much similar to what
468 find_insn_reg_weights does.
469 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
471 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
472 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
473 (Q)->(R).
475 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
476 high; reorder the ready queue so that the insn with lowest LUID will be
477 issued next.
479 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
480 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
482 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
483 can be returned from TARGET_SCHED_REORDER2.
485 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
487 #undef TARGET_SCHED_DFA_NEW_CYCLE
488 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
490 #undef TARGET_SCHED_INIT_GLOBAL
491 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
493 #undef TARGET_SCHED_FINISH_GLOBAL
494 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
496 #undef TARGET_SCHED_VARIABLE_ISSUE
497 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
499 #undef TARGET_SCHED_REORDER
500 #define TARGET_SCHED_REORDER sh_reorder
502 #undef TARGET_SCHED_REORDER2
503 #define TARGET_SCHED_REORDER2 sh_reorder2
505 #undef TARGET_SCHED_INIT
506 #define TARGET_SCHED_INIT sh_md_init
508 #undef TARGET_DELEGITIMIZE_ADDRESS
509 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
511 #undef TARGET_LEGITIMIZE_ADDRESS
512 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
514 #undef TARGET_CANNOT_MODIFY_JUMPS_P
515 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
516 #undef TARGET_CAN_FOLLOW_JUMP
517 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
518 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
519 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
520 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
521 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
522 sh_optimize_target_register_callee_saved
524 #undef TARGET_MS_BITFIELD_LAYOUT_P
525 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
527 #undef TARGET_INIT_BUILTINS
528 #define TARGET_INIT_BUILTINS sh_init_builtins
529 #undef TARGET_BUILTIN_DECL
530 #define TARGET_BUILTIN_DECL sh_builtin_decl
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
534 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
535 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
537 #undef TARGET_CANNOT_COPY_INSN_P
538 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
539 #undef TARGET_RTX_COSTS
540 #define TARGET_RTX_COSTS sh_rtx_costs
541 #undef TARGET_ADDRESS_COST
542 #define TARGET_ADDRESS_COST sh_address_cost
543 #undef TARGET_ALLOCATE_INITIAL_VALUE
544 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
546 #undef TARGET_MACHINE_DEPENDENT_REORG
547 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
549 #undef TARGET_DWARF_REGISTER_SPAN
550 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
552 #ifdef HAVE_AS_TLS
553 #undef TARGET_HAVE_TLS
554 #define TARGET_HAVE_TLS true
555 #endif
557 #undef TARGET_PROMOTE_PROTOTYPES
558 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
559 #undef TARGET_PROMOTE_FUNCTION_MODE
560 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
562 #undef TARGET_FUNCTION_VALUE
563 #define TARGET_FUNCTION_VALUE sh_function_value
564 #undef TARGET_FUNCTION_VALUE_REGNO_P
565 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
566 #undef TARGET_LIBCALL_VALUE
567 #define TARGET_LIBCALL_VALUE sh_libcall_value
568 #undef TARGET_STRUCT_VALUE_RTX
569 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
570 #undef TARGET_RETURN_IN_MEMORY
571 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
573 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
574 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
575 #undef TARGET_SETUP_INCOMING_VARARGS
576 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
577 #undef TARGET_STRICT_ARGUMENT_NAMING
578 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
579 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
580 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
581 #undef TARGET_MUST_PASS_IN_STACK
582 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
583 #undef TARGET_PASS_BY_REFERENCE
584 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
585 #undef TARGET_CALLEE_COPIES
586 #define TARGET_CALLEE_COPIES sh_callee_copies
587 #undef TARGET_ARG_PARTIAL_BYTES
588 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
589 #undef TARGET_FUNCTION_ARG
590 #define TARGET_FUNCTION_ARG sh_function_arg
591 #undef TARGET_FUNCTION_ARG_ADVANCE
592 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
594 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
595 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
597 #undef TARGET_BUILD_BUILTIN_VA_LIST
598 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
599 #undef TARGET_EXPAND_BUILTIN_VA_START
600 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
601 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
602 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
604 #undef TARGET_SCALAR_MODE_SUPPORTED_P
605 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
606 #undef TARGET_VECTOR_MODE_SUPPORTED_P
607 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
609 #undef TARGET_CHECK_PCH_TARGET_FLAGS
610 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
612 #undef TARGET_DWARF_CALLING_CONVENTION
613 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
615 #undef TARGET_FRAME_POINTER_REQUIRED
616 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
618 #undef TARGET_MODE_EMIT
619 #define TARGET_MODE_EMIT sh_emit_mode_set
621 #undef TARGET_MODE_NEEDED
622 #define TARGET_MODE_NEEDED sh_mode_needed
624 #undef TARGET_MODE_AFTER
625 #define TARGET_MODE_AFTER sh_mode_after
627 #undef TARGET_MODE_ENTRY
628 #define TARGET_MODE_ENTRY sh_mode_entry
630 #undef TARGET_MODE_EXIT
631 #define TARGET_MODE_EXIT sh_mode_exit
633 #undef TARGET_MODE_PRIORITY
634 #define TARGET_MODE_PRIORITY sh_mode_priority
636 /* Return regmode weight for insn. */
637 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
638 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
640 /* Return current register pressure for regmode. */
641 #define CURR_REGMODE_PRESSURE(MODE)\
642 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
644 #undef TARGET_ENCODE_SECTION_INFO
645 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
647 #undef TARGET_LRA_P
648 #define TARGET_LRA_P sh_lra_p
650 #undef TARGET_SECONDARY_RELOAD
651 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
653 #undef TARGET_PREFERRED_RELOAD_CLASS
654 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
656 #undef TARGET_CONDITIONAL_REGISTER_USAGE
657 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
659 #undef TARGET_LEGITIMATE_ADDRESS_P
660 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
662 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
663 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
665 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
666 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
667 sh_legitimize_address_displacement
669 #undef TARGET_TRAMPOLINE_INIT
670 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
671 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
672 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
674 #undef TARGET_LEGITIMATE_CONSTANT_P
675 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
677 #undef TARGET_CANONICALIZE_COMPARISON
678 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
680 #undef TARGET_LEGITIMATE_COMBINED_INSN
681 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
683 #undef TARGET_FIXED_CONDITION_CODE_REGS
684 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
686 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
687 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
688 sh_use_by_pieces_infrastructure_p
690 /* Machine-specific symbol_ref flags. */
691 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
693 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
694 is used by optabs.c atomic op expansion code as well as in sync.md. */
695 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
696 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
698 struct gcc_target targetm = TARGET_INITIALIZER;
701 /* Information on the currently selected atomic model.
702 This is initialized in sh_option_override. */
703 static sh_atomic_model selected_atomic_model_;
705 const sh_atomic_model&
706 selected_atomic_model (void)
708 return selected_atomic_model_;
711 static sh_atomic_model
712 parse_validate_atomic_model_option (const char* str)
714 const char* model_names[sh_atomic_model::num_models];
715 model_names[sh_atomic_model::none] = "none";
716 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
717 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
718 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
719 model_names[sh_atomic_model::soft_imask] = "soft-imask";
721 const char* model_cdef_names[sh_atomic_model::num_models];
722 model_cdef_names[sh_atomic_model::none] = "NONE";
723 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
724 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
725 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
726 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
728 sh_atomic_model ret;
729 ret.type = sh_atomic_model::none;
730 ret.name = model_names[sh_atomic_model::none];
731 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
732 ret.strict = false;
733 ret.tcb_gbr_offset = -1;
735 /* Handle empty string as 'none'. */
736 if (str == NULL || *str == '\0')
737 return ret;
739 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
741 std::vector<std::string> tokens;
742 for (std::stringstream ss (str); ss.good (); )
744 tokens.push_back (std::string ());
745 std::getline (ss, tokens.back (), ',');
748 if (tokens.empty ())
749 err_ret ("invalid atomic model option");
751 /* The first token must be the atomic model name. */
753 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
754 if (tokens.front () == model_names[i])
756 ret.type = (sh_atomic_model::enum_type)i;
757 ret.name = model_names[i];
758 ret.cdef_name = model_cdef_names[i];
759 goto got_mode_name;
762 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
763 got_mode_name:;
766 /* Go through the remaining tokens. */
767 for (size_t i = 1; i < tokens.size (); ++i)
769 if (tokens[i] == "strict")
770 ret.strict = true;
771 else if (tokens[i].find ("gbr-offset=") == 0)
773 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
774 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
775 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
776 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
777 "option", offset_str.c_str ());
779 else
780 err_ret ("unknown parameter \"%s\" in atomic model option",
781 tokens[i].c_str ());
784 /* Check that the selection makes sense. */
785 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
786 err_ret ("atomic operations are not supported on SHmedia");
788 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
789 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
790 ret.name);
792 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
793 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
795 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
796 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
798 if (ret.type == sh_atomic_model::soft_tcb
799 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
800 || (ret.tcb_gbr_offset & 3) != 0))
801 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
802 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
803 ret.name);
805 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
806 err_ret ("cannot use atomic model %s in user mode", ret.name);
808 return ret;
810 #undef err_ret
813 /* Register SH specific RTL passes. */
814 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
815 const char* name);
816 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
817 const char* name);
818 static void
819 register_sh_passes (void)
821 if (!TARGET_SH1)
822 return;
824 /* Running the sh_treg_combine pass after ce1 generates better code when
825 comparisons are combined and reg-reg moves are introduced, because
826 reg-reg moves will be eliminated afterwards. However, there are quite
827 some cases where combine will be unable to fold comparison related insns,
828 thus for now don't do it.
829 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
830 PASS_POS_INSERT_AFTER, "ce1", 1);
833 /* Run sh_treg_combine pass after combine but before register allocation. */
834 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
835 PASS_POS_INSERT_AFTER, "split1", 1);
837 /* Run sh_treg_combine pass after register allocation and basic block
838 reordering as this sometimes creates new opportunities. */
839 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
840 PASS_POS_INSERT_AFTER, "split4", 1);
842 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
843 is known after a conditional branch.
844 This must be done after basic blocks and branch conditions have
845 stabilized and won't be changed by further passes. */
846 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
847 PASS_POS_INSERT_BEFORE, "sched2", 1);
850 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
851 various options, and do some machine dependent initialization. */
852 static void
853 sh_option_override (void)
855 int regno;
857 SUBTARGET_OVERRIDE_OPTIONS;
858 if (optimize > 1 && !optimize_size)
859 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
861 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
862 TARGET_CBRANCHDI4 = 1;
863 TARGET_CMPEQDI_T = 0;
865 sh_cpu = PROCESSOR_SH1;
866 assembler_dialect = 0;
867 if (TARGET_SH2)
868 sh_cpu = PROCESSOR_SH2;
869 if (TARGET_SH2E)
870 sh_cpu = PROCESSOR_SH2E;
871 if (TARGET_SH2A)
872 sh_cpu = PROCESSOR_SH2A;
873 if (TARGET_SH3)
874 sh_cpu = PROCESSOR_SH3;
875 if (TARGET_SH3E)
876 sh_cpu = PROCESSOR_SH3E;
877 if (TARGET_SH4)
879 assembler_dialect = 1;
880 sh_cpu = PROCESSOR_SH4;
882 if (TARGET_SH4A)
884 assembler_dialect = 1;
885 sh_cpu = PROCESSOR_SH4A;
887 if (TARGET_SH5)
889 sh_cpu = PROCESSOR_SH5;
890 target_flags |= MASK_ALIGN_DOUBLE;
891 if (TARGET_SHMEDIA_FPU)
892 target_flags |= MASK_FMOVD;
893 if (TARGET_SHMEDIA)
895 /* There are no delay slots on SHmedia. */
896 flag_delayed_branch = 0;
897 /* Relaxation isn't yet supported for SHmedia */
898 target_flags &= ~MASK_RELAX;
899 /* After reload, if conversion does little good but can cause
900 ICEs:
901 - find_if_block doesn't do anything for SH because we don't
902 have conditional execution patterns. (We use conditional
903 move patterns, which are handled differently, and only
904 before reload).
905 - find_cond_trap doesn't do anything for the SH because we
906 don't have conditional traps.
907 - find_if_case_1 uses redirect_edge_and_branch_force in
908 the only path that does an optimization, and this causes
909 an ICE when branch targets are in registers.
910 - find_if_case_2 doesn't do anything for the SHmedia after
911 reload except when it can redirect a tablejump - and
912 that's rather rare. */
913 flag_if_conversion2 = 0;
914 if (! strcmp (sh_div_str, "call"))
915 sh_div_strategy = SH_DIV_CALL;
916 else if (! strcmp (sh_div_str, "call2"))
917 sh_div_strategy = SH_DIV_CALL2;
918 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
919 sh_div_strategy = SH_DIV_FP;
920 else if (! strcmp (sh_div_str, "inv"))
921 sh_div_strategy = SH_DIV_INV;
922 else if (! strcmp (sh_div_str, "inv:minlat"))
923 sh_div_strategy = SH_DIV_INV_MINLAT;
924 else if (! strcmp (sh_div_str, "inv20u"))
925 sh_div_strategy = SH_DIV_INV20U;
926 else if (! strcmp (sh_div_str, "inv20l"))
927 sh_div_strategy = SH_DIV_INV20L;
928 else if (! strcmp (sh_div_str, "inv:call2"))
929 sh_div_strategy = SH_DIV_INV_CALL2;
930 else if (! strcmp (sh_div_str, "inv:call"))
931 sh_div_strategy = SH_DIV_INV_CALL;
932 else if (! strcmp (sh_div_str, "inv:fp"))
934 if (TARGET_FPU_ANY)
935 sh_div_strategy = SH_DIV_INV_FP;
936 else
937 sh_div_strategy = SH_DIV_INV;
939 TARGET_CBRANCHDI4 = 0;
940 /* Assembler CFI isn't yet fully supported for SHmedia. */
941 flag_dwarf2_cfi_asm = 0;
944 else
946 /* Only the sh64-elf assembler fully supports .quad properly. */
947 targetm.asm_out.aligned_op.di = NULL;
948 targetm.asm_out.unaligned_op.di = NULL;
951 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
952 Disable it for everything else. */
953 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
954 TARGET_USERMODE = false;
956 if (TARGET_SH1)
958 if (! strcmp (sh_div_str, "call-div1"))
959 sh_div_strategy = SH_DIV_CALL_DIV1;
960 else if (! strcmp (sh_div_str, "call-fp")
961 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
962 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
963 sh_div_strategy = SH_DIV_CALL_FP;
964 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
965 sh_div_strategy = SH_DIV_CALL_TABLE;
966 else
967 /* Pick one that makes most sense for the target in general.
968 It is not much good to use different functions depending
969 on -Os, since then we'll end up with two different functions
970 when some of the code is compiled for size, and some for
971 speed. */
973 /* SH4 tends to emphasize speed. */
974 if (TARGET_HARD_SH4)
975 sh_div_strategy = SH_DIV_CALL_TABLE;
976 /* These have their own way of doing things. */
977 else if (TARGET_SH2A)
978 sh_div_strategy = SH_DIV_INTRINSIC;
979 /* ??? Should we use the integer SHmedia function instead? */
980 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
981 sh_div_strategy = SH_DIV_CALL_FP;
982 /* SH1 .. SH3 cores often go into small-footprint systems, so
983 default to the smallest implementation available. */
984 else
985 sh_div_strategy = SH_DIV_CALL_DIV1;
987 if (!TARGET_SH1)
988 TARGET_PRETEND_CMOVE = 0;
989 if (sh_divsi3_libfunc[0])
990 ; /* User supplied - leave it alone. */
991 else if (TARGET_DIVIDE_CALL_FP)
992 sh_divsi3_libfunc = "__sdivsi3_i4";
993 else if (TARGET_DIVIDE_CALL_TABLE)
994 sh_divsi3_libfunc = "__sdivsi3_i4i";
995 else if (TARGET_SH5)
996 sh_divsi3_libfunc = "__sdivsi3_1";
997 else
998 sh_divsi3_libfunc = "__sdivsi3";
1000 if (sh_branch_cost == -1)
1002 /* The SH1 does not have delay slots, hence we get a pipeline stall
1003 at every branch. The SH4 is superscalar, so the single delay slot
1004 is not sufficient to keep both pipelines filled.
1005 In any case, set the default branch cost to '2', as it results in
1006 slightly overall smaller code and also enables some if conversions
1007 that are required for matching special T bit related insns. */
1008 sh_branch_cost = 2;
1011 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
1012 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
1013 TARGET_ZDCBRANCH = 1;
1015 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1016 if (! VALID_REGISTER_P (regno))
1017 sh_register_names[regno][0] = '\0';
1019 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
1020 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
1021 sh_additional_register_names[regno][0] = '\0';
1023 if ((flag_pic && ! TARGET_PREFERGOT)
1024 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
1025 flag_no_function_cse = 1;
1027 if (targetm.small_register_classes_for_mode_p (VOIDmode))
1029 /* Never run scheduling before reload, since that can
1030 break global alloc, and generates slower code anyway due
1031 to the pressure on R0. */
1032 /* Enable sched1 for SH4 if the user explicitly requests.
1033 When sched1 is enabled, the ready queue will be reordered by
1034 the target hooks if pressure is high. We can not do this for
1035 PIC, SH3 and lower as they give spill failures for R0. */
1036 if (!TARGET_HARD_SH4 || flag_pic)
1037 flag_schedule_insns = 0;
1038 /* ??? Current exception handling places basic block boundaries
1039 after call_insns. It causes the high pressure on R0 and gives
1040 spill failures for R0 in reload. See PR 22553 and the thread
1041 on gcc-patches
1042 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
1043 else if (flag_exceptions)
1045 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
1046 warning (0, "ignoring -fschedule-insns because of exception "
1047 "handling bug");
1048 flag_schedule_insns = 0;
1050 else if (flag_schedule_insns
1051 && !global_options_set.x_flag_schedule_insns)
1052 flag_schedule_insns = 0;
1055 /* Unwind info is not correct around the CFG unless either a frame
1056 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1057 unwind info generation to be aware of the CFG and propagating states
1058 around edges. */
1059 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1060 || flag_exceptions || flag_non_call_exceptions)
1061 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1063 warning (0, "unwind tables currently require either a frame pointer "
1064 "or -maccumulate-outgoing-args for correctness");
1065 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1068 /* Adjust loop, jump and function alignment values (in bytes), if those
1069 were not specified by the user using -falign-loops, -falign-jumps
1070 and -falign-functions options.
1071 32 bit alignment is better for speed, because instructions can be
1072 fetched as a pair from a longword boundary. For size use 16 bit
1073 alignment to get more compact code.
1074 Aligning all jumps increases the code size, even if it might
1075 result in slightly faster code. Thus, it is set to the smallest
1076 alignment possible if not specified by the user. */
1077 if (align_loops == 0)
1079 if (TARGET_SH5)
1080 align_loops = 8;
1081 else
1082 align_loops = optimize_size ? 2 : 4;
1085 if (align_jumps == 0)
1087 if (TARGET_SHMEDIA)
1088 align_jumps = 1 << CACHE_LOG;
1089 else
1090 align_jumps = 2;
1092 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1093 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1095 if (align_functions == 0)
1097 if (TARGET_SHMEDIA)
1098 align_functions = optimize_size
1099 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1100 else
1101 align_functions = optimize_size ? 2 : 4;
1104 /* The linker relaxation code breaks when a function contains
1105 alignments that are larger than that at the start of a
1106 compilation unit. */
1107 if (TARGET_RELAX)
1109 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1111 /* Also take possible .long constants / mova tables into account. */
1112 if (min_align < 4)
1113 min_align = 4;
1114 if (align_functions < min_align)
1115 align_functions = min_align;
1118 if (flag_unsafe_math_optimizations)
1120 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1121 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1122 TARGET_FSCA = 1;
1124 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1125 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1126 TARGET_FSRRA = 1;
1129 /* Allow fsrra insn only if -funsafe-math-optimizations and
1130 -ffinite-math-only is enabled. */
1131 TARGET_FSRRA = TARGET_FSRRA
1132 && flag_unsafe_math_optimizations
1133 && flag_finite_math_only;
1135 /* If the -mieee option was not explicitly set by the user, turn it on
1136 unless -ffinite-math-only was specified. See also PR 33135. */
1137 if (! global_options_set.x_TARGET_IEEE)
1138 TARGET_IEEE = ! flag_finite_math_only;
1140 if (sh_fixed_range_str)
1141 sh_fix_range (sh_fixed_range_str);
1143 /* This target defaults to strict volatile bitfields. */
1144 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1145 flag_strict_volatile_bitfields = 1;
1147 /* Parse atomic model option and make sure it is valid for the current
1148 target CPU. */
1149 selected_atomic_model_
1150 = parse_validate_atomic_model_option (sh_atomic_model_str);
1152 register_sh_passes ();
1155 /* Print the operand address in x to the stream. */
1156 static void
1157 sh_print_operand_address (FILE *stream, rtx x)
1159 switch (GET_CODE (x))
1161 case REG:
1162 case SUBREG:
1163 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1164 break;
1166 case PLUS:
1168 rtx base = XEXP (x, 0);
1169 rtx index = XEXP (x, 1);
1171 switch (GET_CODE (index))
1173 case CONST_INT:
1174 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1175 reg_names[true_regnum (base)]);
1176 break;
1178 case REG:
1179 case SUBREG:
1181 int base_num = true_regnum (base);
1182 int index_num = true_regnum (index);
1184 fprintf (stream, "@(r0,%s)",
1185 reg_names[MAX (base_num, index_num)]);
1186 break;
1189 default:
1190 gcc_unreachable ();
1193 break;
1195 case PRE_DEC:
1196 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1197 break;
1199 case POST_INC:
1200 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1201 break;
1203 default:
1204 x = mark_constant_pool_use (x);
1205 output_addr_const (stream, x);
1206 break;
1210 /* Print operand x (an rtx) in assembler syntax to file stream
1211 according to modifier code.
1213 '.' print a .s if insn needs delay slot
1214 ',' print LOCAL_LABEL_PREFIX
1215 '@' print trap, rte or rts depending upon pragma interruptness
1216 '#' output a nop if there is nothing to put in the delay slot
1217 ''' print likelihood suffix (/u for unlikely).
1218 '>' print branch target if -fverbose-asm
1219 'O' print a constant without the #
1220 'R' print the LSW of a dp value - changes if in little endian
1221 'S' print the MSW of a dp value - changes if in little endian
1222 'T' print the next word of a dp value - same as 'R' in big endian mode.
1223 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1224 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1225 'N' print 'r63' if the operand is (const_int 0).
1226 'd' print a V2SF reg as dN instead of fpN.
1227 'm' print a pair `base,offset' or `base,index', for LD and ST.
1228 'U' Likewise for {LD,ST}{HI,LO}.
1229 'V' print the position of a single bit set.
1230 'W' print the position of a single bit cleared.
1231 't' print a memory address which is a register.
1232 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1233 'o' output an operator. */
1234 static void
1235 sh_print_operand (FILE *stream, rtx x, int code)
1237 int regno;
1238 machine_mode mode;
1240 switch (code)
1242 tree trapa_attr;
1244 case '.':
1245 if (final_sequence
1246 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1247 && get_attr_length (final_sequence->insn (1)))
1248 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1249 break;
1250 case ',':
1251 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1252 break;
1253 case '@':
1254 trapa_attr = lookup_attribute ("trap_exit",
1255 DECL_ATTRIBUTES (current_function_decl));
1256 if (trapa_attr)
1257 fprintf (stream, "trapa #%ld",
1258 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1259 else if (sh_cfun_interrupt_handler_p ())
1261 if (sh_cfun_resbank_handler_p ())
1262 fprintf (stream, "resbank\n");
1263 fprintf (stream, "rte");
1265 else
1266 fprintf (stream, "rts");
1267 break;
1268 case '#':
1269 /* Output a nop if there's nothing in the delay slot. */
1270 if (dbr_sequence_length () == 0)
1271 fprintf (stream, "\n\tnop");
1272 break;
1273 case '\'':
1275 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1277 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1278 fputs ("/u", stream);
1279 break;
1281 case '>':
1282 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1284 fputs ("\t! target: ", stream);
1285 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1287 break;
1288 case 'O':
1289 x = mark_constant_pool_use (x);
1290 output_addr_const (stream, x);
1291 break;
1292 /* N.B.: %R / %S / %T adjust memory addresses by four.
1293 For SHMEDIA, that means they can be used to access the first and
1294 second 32 bit part of a 64 bit (or larger) value that
1295 might be held in floating point registers or memory.
1296 While they can be used to access 64 bit parts of a larger value
1297 held in general purpose registers, that won't work with memory -
1298 neither for fp registers, since the frxx names are used. */
1299 case 'R':
1300 if (REG_P (x) || GET_CODE (x) == SUBREG)
1302 regno = true_regnum (x);
1303 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1304 fputs (reg_names[regno], (stream));
1306 else if (MEM_P (x))
1308 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1309 sh_print_operand_address (stream, XEXP (x, 0));
1311 else
1313 rtx sub = NULL_RTX;
1315 mode = GET_MODE (x);
1316 if (mode == VOIDmode)
1317 mode = DImode;
1318 if (GET_MODE_SIZE (mode) >= 8)
1319 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1320 if (sub)
1321 sh_print_operand (stream, sub, 0);
1322 else
1323 output_operand_lossage ("invalid operand to %%R");
1325 break;
1326 case 'S':
1327 if (REG_P (x) || GET_CODE (x) == SUBREG)
1329 regno = true_regnum (x);
1330 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1331 fputs (reg_names[regno], (stream));
1333 else if (MEM_P (x))
1335 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1336 sh_print_operand_address (stream, XEXP (x, 0));
1338 else
1340 rtx sub = NULL_RTX;
1342 mode = GET_MODE (x);
1343 if (mode == VOIDmode)
1344 mode = DImode;
1345 if (GET_MODE_SIZE (mode) >= 8)
1346 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1347 if (sub)
1348 sh_print_operand (stream, sub, 0);
1349 else
1350 output_operand_lossage ("invalid operand to %%S");
1352 break;
1353 case 'T':
1354 /* Next word of a double. */
1355 switch (GET_CODE (x))
1357 case REG:
1358 fputs (reg_names[REGNO (x) + 1], (stream));
1359 break;
1360 case MEM:
1361 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1362 && GET_CODE (XEXP (x, 0)) != POST_INC)
1363 x = adjust_address (x, SImode, 4);
1364 sh_print_operand_address (stream, XEXP (x, 0));
1365 break;
1366 default:
1367 break;
1369 break;
1371 case 't':
1372 gcc_assert (MEM_P (x));
1373 x = XEXP (x, 0);
1374 switch (GET_CODE (x))
1376 case REG:
1377 case SUBREG:
1378 sh_print_operand (stream, x, 0);
1379 break;
1380 default:
1381 break;
1383 break;
1385 case 'o':
1386 switch (GET_CODE (x))
1388 case PLUS: fputs ("add", stream); break;
1389 case MINUS: fputs ("sub", stream); break;
1390 case MULT: fputs ("mul", stream); break;
1391 case DIV: fputs ("div", stream); break;
1392 case EQ: fputs ("eq", stream); break;
1393 case NE: fputs ("ne", stream); break;
1394 case GT: case LT: fputs ("gt", stream); break;
1395 case GE: case LE: fputs ("ge", stream); break;
1396 case GTU: case LTU: fputs ("gtu", stream); break;
1397 case GEU: case LEU: fputs ("geu", stream); break;
1398 default:
1399 break;
1401 break;
1402 case 'M':
1403 if (TARGET_SHMEDIA)
1405 if (MEM_P (x)
1406 && GET_CODE (XEXP (x, 0)) == PLUS
1407 && (REG_P (XEXP (XEXP (x, 0), 1))
1408 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1409 fputc ('x', stream);
1411 else
1413 if (MEM_P (x))
1415 switch (GET_MODE (x))
1417 case QImode: fputs (".b", stream); break;
1418 case HImode: fputs (".w", stream); break;
1419 case SImode: fputs (".l", stream); break;
1420 case SFmode: fputs (".s", stream); break;
1421 case DFmode: fputs (".d", stream); break;
1422 default: gcc_unreachable ();
1426 break;
1428 case 'm':
1429 gcc_assert (MEM_P (x));
1430 x = XEXP (x, 0);
1431 /* Fall through. */
1432 case 'U':
1433 switch (GET_CODE (x))
1435 case REG:
1436 case SUBREG:
1437 sh_print_operand (stream, x, 0);
1438 fputs (", 0", stream);
1439 break;
1441 case PLUS:
1442 sh_print_operand (stream, XEXP (x, 0), 0);
1443 fputs (", ", stream);
1444 sh_print_operand (stream, XEXP (x, 1), 0);
1445 break;
1447 default:
1448 gcc_unreachable ();
1450 break;
1452 case 'V':
1454 int num = exact_log2 (INTVAL (x));
1455 gcc_assert (num >= 0);
1456 fprintf (stream, "#%d", num);
1458 break;
1460 case 'W':
1462 int num = exact_log2 (~INTVAL (x));
1463 gcc_assert (num >= 0);
1464 fprintf (stream, "#%d", num);
1466 break;
1468 case 'd':
1469 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1471 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1472 break;
1474 case 'N':
1475 if (x == CONST0_RTX (GET_MODE (x)))
1477 fprintf ((stream), "r63");
1478 break;
1480 goto default_output;
1481 case 'u':
1482 if (CONST_INT_P (x))
1484 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1485 break;
1487 /* Fall through. */
1489 default_output:
1490 default:
1491 regno = 0;
1492 mode = GET_MODE (x);
1494 switch (GET_CODE (x))
1496 case TRUNCATE:
1498 rtx inner = XEXP (x, 0);
1499 int offset = 0;
1500 machine_mode inner_mode;
1502 /* We might see SUBREGs with vector mode registers inside. */
1503 if (GET_CODE (inner) == SUBREG
1504 && (GET_MODE_SIZE (GET_MODE (inner))
1505 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1506 && subreg_lowpart_p (inner))
1507 inner = SUBREG_REG (inner);
1508 if (CONST_INT_P (inner))
1510 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1511 goto default_output;
1513 inner_mode = GET_MODE (inner);
1514 if (GET_CODE (inner) == SUBREG
1515 && (GET_MODE_SIZE (GET_MODE (inner))
1516 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1517 && REG_P (SUBREG_REG (inner)))
1519 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1520 GET_MODE (SUBREG_REG (inner)),
1521 SUBREG_BYTE (inner),
1522 GET_MODE (inner));
1523 inner = SUBREG_REG (inner);
1525 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1526 abort ();
1527 /* Floating point register pairs are always big endian;
1528 general purpose registers are 64 bit wide. */
1529 regno = REGNO (inner);
1530 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1531 - HARD_REGNO_NREGS (regno, mode))
1532 + offset;
1533 x = inner;
1534 goto reg;
1536 case SIGN_EXTEND:
1537 x = XEXP (x, 0);
1538 goto reg;
1539 /* FIXME: We need this on SHmedia32 because reload generates
1540 some sign-extended HI or QI loads into DImode registers
1541 but, because Pmode is SImode, the address ends up with a
1542 subreg:SI of the DImode register. Maybe reload should be
1543 fixed so as to apply alter_subreg to such loads? */
1544 case IF_THEN_ELSE:
1545 gcc_assert (trapping_target_operand (x, VOIDmode));
1546 x = XEXP (XEXP (x, 2), 0);
1547 goto default_output;
1548 case SUBREG:
1549 gcc_assert (SUBREG_BYTE (x) == 0
1550 && REG_P (SUBREG_REG (x)));
1552 x = SUBREG_REG (x);
1553 /* Fall through. */
1555 reg:
1556 case REG:
1557 regno += REGNO (x);
1558 if (FP_REGISTER_P (regno)
1559 && mode == V16SFmode)
1560 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1561 else if (FP_REGISTER_P (REGNO (x))
1562 && mode == V4SFmode)
1563 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1564 else if (REG_P (x)
1565 && mode == V2SFmode)
1566 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1567 else if (FP_REGISTER_P (REGNO (x))
1568 && GET_MODE_SIZE (mode) > 4)
1569 fprintf ((stream), "d%s", reg_names[regno] + 1);
1570 else
1571 fputs (reg_names[regno], (stream));
1572 break;
1574 case MEM:
1575 output_address (XEXP (x, 0));
1576 break;
1578 default:
1579 if (TARGET_SH1)
1580 fputc ('#', stream);
1581 output_addr_const (stream, x);
1582 break;
1584 break;
1588 static bool
1589 sh_print_operand_punct_valid_p (unsigned char code)
1591 return (code == '.' || code == '#' || code == '@' || code == ','
1592 || code == '$' || code == '\'' || code == '>');
1595 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1596 static bool
1597 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1599 if (GET_CODE (x) == UNSPEC)
1601 switch (XINT (x, 1))
1603 case UNSPEC_DATALABEL:
1604 fputs ("datalabel ", file);
1605 output_addr_const (file, XVECEXP (x, 0, 0));
1606 break;
1607 case UNSPEC_PIC:
1608 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1609 output_addr_const (file, XVECEXP (x, 0, 0));
1610 break;
1611 case UNSPEC_GOT:
1612 output_addr_const (file, XVECEXP (x, 0, 0));
1613 fputs ("@GOT", file);
1614 break;
1615 case UNSPEC_GOTOFF:
1616 output_addr_const (file, XVECEXP (x, 0, 0));
1617 fputs ("@GOTOFF", file);
1618 break;
1619 case UNSPEC_PLT:
1620 output_addr_const (file, XVECEXP (x, 0, 0));
1621 fputs ("@PLT", file);
1622 break;
1623 case UNSPEC_GOTPLT:
1624 output_addr_const (file, XVECEXP (x, 0, 0));
1625 fputs ("@GOTPLT", file);
1626 break;
1627 case UNSPEC_DTPOFF:
1628 output_addr_const (file, XVECEXP (x, 0, 0));
1629 fputs ("@DTPOFF", file);
1630 break;
1631 case UNSPEC_GOTTPOFF:
1632 output_addr_const (file, XVECEXP (x, 0, 0));
1633 fputs ("@GOTTPOFF", file);
1634 break;
1635 case UNSPEC_TPOFF:
1636 output_addr_const (file, XVECEXP (x, 0, 0));
1637 fputs ("@TPOFF", file);
1638 break;
1639 case UNSPEC_CALLER:
1641 char name[32];
1642 /* LPCS stands for Label for PIC Call Site. */
1643 targetm.asm_out.generate_internal_label (name, "LPCS",
1644 INTVAL (XVECEXP (x, 0, 0)));
1645 assemble_name (file, name);
1647 break;
1648 case UNSPEC_EXTRACT_S16:
1649 case UNSPEC_EXTRACT_U16:
1651 rtx val, shift;
1653 val = XVECEXP (x, 0, 0);
1654 shift = XVECEXP (x, 0, 1);
1655 fputc ('(', file);
1656 if (shift != const0_rtx)
1657 fputc ('(', file);
1658 if (GET_CODE (val) == CONST
1659 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1661 fputc ('(', file);
1662 output_addr_const (file, val);
1663 fputc (')', file);
1665 else
1666 output_addr_const (file, val);
1667 if (shift != const0_rtx)
1669 fputs (" >> ", file);
1670 output_addr_const (file, shift);
1671 fputc (')', file);
1673 fputs (" & 65535)", file);
1675 break;
1676 case UNSPEC_SYMOFF:
1677 output_addr_const (file, XVECEXP (x, 0, 0));
1678 fputc ('-', file);
1679 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1681 fputc ('(', file);
1682 output_addr_const (file, XVECEXP (x, 0, 1));
1683 fputc (')', file);
1685 else
1686 output_addr_const (file, XVECEXP (x, 0, 1));
1687 break;
1688 case UNSPEC_PCREL_SYMOFF:
1689 output_addr_const (file, XVECEXP (x, 0, 0));
1690 fputs ("-(", file);
1691 output_addr_const (file, XVECEXP (x, 0, 1));
1692 fputs ("-.)", file);
1693 break;
1694 default:
1695 return false;
1697 return true;
1699 else
1700 return false;
1703 /* Encode symbol attributes of a SYMBOL_REF into its
1704 SYMBOL_REF_FLAGS. */
1705 static void
1706 sh_encode_section_info (tree decl, rtx rtl, int first)
1708 default_encode_section_info (decl, rtl, first);
1710 if (TREE_CODE (decl) == FUNCTION_DECL
1711 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1712 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1715 /* Prepare operands for a move define_expand; specifically, one of the
1716 operands must be in a register. */
1717 void
1718 prepare_move_operands (rtx operands[], machine_mode mode)
1720 if ((mode == SImode || mode == DImode)
1721 && flag_pic
1722 && ! ((mode == Pmode || mode == ptr_mode)
1723 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1725 rtx temp;
1726 if (SYMBOLIC_CONST_P (operands[1]))
1728 if (MEM_P (operands[0]))
1729 operands[1] = force_reg (Pmode, operands[1]);
1730 else if (TARGET_SHMEDIA
1731 && GET_CODE (operands[1]) == LABEL_REF
1732 && target_reg_operand (operands[0], mode))
1733 /* It's ok. */;
1734 else
1736 temp = (!can_create_pseudo_p ()
1737 ? operands[0]
1738 : gen_reg_rtx (Pmode));
1739 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1742 else if (GET_CODE (operands[1]) == CONST
1743 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1744 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1746 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1747 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1748 mode, temp);
1749 operands[1] = expand_binop (mode, add_optab, temp,
1750 XEXP (XEXP (operands[1], 0), 1),
1751 (!can_create_pseudo_p ()
1752 ? temp
1753 : gen_reg_rtx (Pmode)),
1754 0, OPTAB_LIB_WIDEN);
1758 if (! reload_in_progress && ! reload_completed)
1760 /* Copy the source to a register if both operands aren't registers. */
1761 if (! register_operand (operands[0], mode)
1762 && ! sh_register_operand (operands[1], mode))
1763 operands[1] = copy_to_mode_reg (mode, operands[1]);
1765 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1767 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1768 except that we can't use that function because it is static. */
1769 rtx new_rtx = change_address (operands[0], mode, 0);
1770 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1771 operands[0] = new_rtx;
1774 /* This case can happen while generating code to move the result
1775 of a library call to the target. Reject `st r0,@(rX,rY)' because
1776 reload will fail to find a spill register for rX, since r0 is already
1777 being used for the source. */
1778 else if (TARGET_SH1
1779 && refers_to_regno_p (R0_REG, operands[1])
1780 && MEM_P (operands[0])
1781 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1782 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1783 operands[1] = copy_to_mode_reg (mode, operands[1]);
1785 /* When the displacement addressing is used, RA will assign r0 to
1786 the pseudo register operand for the QI/HImode load/store.
1787 This tends to make a long live range for R0 and might cause
1788 anomalous register spills in some case with LRA. See PR
1789 target/55212.
1790 We split possible load/store to two move insns via r0 so as to
1791 shorten R0 live range. It will make some codes worse but will
1792 win on avarage for LRA. */
1793 else if (sh_lra_p ()
1794 && TARGET_SH1 && ! TARGET_SH2A
1795 && (mode == QImode || mode == HImode)
1796 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1797 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1799 bool load_p = REG_P (operands[0]);
1800 rtx reg = operands[load_p ? 0 : 1];
1801 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1803 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1804 && GET_CODE (adr) == PLUS
1805 && REG_P (XEXP (adr, 0))
1806 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1807 && CONST_INT_P (XEXP (adr, 1))
1808 && INTVAL (XEXP (adr, 1)) != 0
1809 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1811 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1812 emit_move_insn (r0_rtx, operands[1]);
1813 operands[1] = r0_rtx;
1818 if (mode == Pmode || mode == ptr_mode)
1820 rtx op0, op1, opc;
1821 enum tls_model tls_kind;
1823 op0 = operands[0];
1824 op1 = operands[1];
1825 if (GET_CODE (op1) == CONST
1826 && GET_CODE (XEXP (op1, 0)) == PLUS
1827 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1828 != TLS_MODEL_NONE))
1830 opc = XEXP (XEXP (op1, 0), 1);
1831 op1 = XEXP (XEXP (op1, 0), 0);
1833 else
1834 opc = NULL_RTX;
1836 if (! reload_in_progress && ! reload_completed
1837 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1839 rtx tga_op1, tga_ret, tmp, tmp2;
1841 if (! flag_pic
1842 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1843 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1844 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1846 /* Don't schedule insns for getting GOT address when
1847 the first scheduling is enabled, to avoid spill
1848 failures for R0. */
1849 if (flag_schedule_insns)
1850 emit_insn (gen_blockage ());
1851 emit_insn (gen_GOTaddr2picreg ());
1852 emit_use (gen_rtx_REG (SImode, PIC_REG));
1853 if (flag_schedule_insns)
1854 emit_insn (gen_blockage ());
1857 switch (tls_kind)
1859 case TLS_MODEL_GLOBAL_DYNAMIC:
1860 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1861 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1862 tmp = gen_reg_rtx (Pmode);
1863 emit_move_insn (tmp, tga_ret);
1864 op1 = tmp;
1865 break;
1867 case TLS_MODEL_LOCAL_DYNAMIC:
1868 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1869 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1871 tmp = gen_reg_rtx (Pmode);
1872 emit_move_insn (tmp, tga_ret);
1874 if (register_operand (op0, Pmode))
1875 tmp2 = op0;
1876 else
1877 tmp2 = gen_reg_rtx (Pmode);
1879 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1880 op1 = tmp2;
1881 break;
1883 case TLS_MODEL_INITIAL_EXEC:
1884 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1885 tmp = gen_sym2GOTTPOFF (op1);
1886 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1887 op1 = tga_op1;
1888 break;
1890 case TLS_MODEL_LOCAL_EXEC:
1891 tmp2 = gen_reg_rtx (Pmode);
1892 emit_insn (gen_store_gbr (tmp2));
1893 tmp = gen_reg_rtx (Pmode);
1894 emit_insn (gen_symTPOFF2reg (tmp, op1));
1896 if (register_operand (op0, Pmode))
1897 op1 = op0;
1898 else
1899 op1 = gen_reg_rtx (Pmode);
1901 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1902 break;
1904 default:
1905 gcc_unreachable ();
1907 if (opc)
1908 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1909 operands[1] = op1;
1914 /* Implement the canonicalize_comparison target hook for the combine
1915 pass. For the target hook this function is invoked via
1916 sh_canonicalize_comparison. This function is also re-used to
1917 canonicalize comparisons in cbranch pattern expanders. */
1918 static void
1919 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1920 machine_mode mode,
1921 bool op0_preserve_value)
1923 /* When invoked from within the combine pass the mode is not specified,
1924 so try to get it from one of the operands. */
1925 if (mode == VOIDmode)
1926 mode = GET_MODE (op0);
1927 if (mode == VOIDmode)
1928 mode = GET_MODE (op1);
1930 // We need to have a mode to do something useful here.
1931 if (mode == VOIDmode)
1932 return;
1934 // Currently, we don't deal with floats here.
1935 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1936 return;
1938 // Make sure that the constant operand is the second operand.
1939 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1941 if (op0_preserve_value)
1942 return;
1944 std::swap (op0, op1);
1945 cmp = swap_condition (cmp);
1948 if (CONST_INT_P (op1))
1950 /* Try to adjust the constant operand in such a way that available
1951 comparison insns can be utilized better and the constant can be
1952 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1953 constant pool. */
1954 const HOST_WIDE_INT val = INTVAL (op1);
1956 /* x > -1 --> x >= 0
1957 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1958 x <= -1 --> x < 0
1959 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1960 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1962 cmp = cmp == GT ? GE : LT;
1963 op1 = gen_int_mode (val + 1, mode);
1966 /* x >= 1 --> x > 0
1967 x >= 0x80 --> x > 0x7F
1968 x < 1 --> x <= 0
1969 x < 0x80 --> x <= 0x7F */
1970 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1972 cmp = cmp == GE ? GT : LE;
1973 op1 = gen_int_mode (val - 1, mode);
1976 /* unsigned x >= 1 --> x != 0
1977 unsigned x < 1 --> x == 0 */
1978 else if (val == 1 && (cmp == GEU || cmp == LTU))
1980 cmp = cmp == GEU ? NE : EQ;
1981 op1 = CONST0_RTX (mode);
1984 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1985 unsigned x < 0x80 --> unsigned x < 0x7F */
1986 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1988 cmp = cmp == GEU ? GTU : LEU;
1989 op1 = gen_int_mode (val - 1, mode);
1992 /* unsigned x > 0 --> x != 0
1993 unsigned x <= 0 --> x == 0 */
1994 else if (val == 0 && (cmp == GTU || cmp == LEU))
1995 cmp = cmp == GTU ? NE : EQ;
1997 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1998 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1999 else if (mode == SImode && (cmp == GTU || cmp == LEU)
2000 && val == 0x7FFFFFFF)
2002 cmp = cmp == GTU ? LT : GE;
2003 op1 = const0_rtx;
2006 /* unsigned x >= 0x80000000 --> signed x < 0
2007 unsigned x < 0x80000000 --> signed x >= 0 */
2008 else if (mode == SImode && (cmp == GEU || cmp == LTU)
2009 && (unsigned HOST_WIDE_INT)val
2010 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
2012 cmp = cmp == GEU ? LT : GE;
2013 op1 = const0_rtx;
2018 /* This function implements the canonicalize_comparison target hook.
2019 This wrapper around the internally used sh_canonicalize_comparison
2020 function is needed to do the enum rtx_code <-> int conversion.
2021 Target hooks cannot use enum rtx_code in its definition. */
2022 static void
2023 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
2024 bool op0_preserve_value)
2026 enum rtx_code tmp_code = (enum rtx_code)*code;
2027 sh_canonicalize_comparison (tmp_code, *op0, *op1,
2028 VOIDmode, op0_preserve_value);
2029 *code = (int)tmp_code;
2032 /* This function implements the legitimate_combined_insn target hook,
2033 which the combine pass uses to early reject combined insns, before
2034 it tries to recog the insn and determine its cost. */
2035 static bool
2036 sh_legitimate_combined_insn (rtx_insn* insn)
2038 /* Reject combinations of memory loads and zero extensions, as these
2039 interfere with other combine patterns such as zero extracts and bit
2040 tests. The SH2A movu.{b|w} insns are formed later in the
2041 'sh_optimize_extu_exts' pass after combine/split1. */
2042 rtx p = PATTERN (insn);
2043 if (GET_CODE (p) == SET
2044 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
2045 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
2046 && MEM_P (XEXP (XEXP (p, 1), 0)))
2047 return false;
2049 return true;
2052 bool
2053 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
2055 *p1 = T_REG;
2056 *p2 = INVALID_REGNUM;
2057 return true;
2060 enum rtx_code
2061 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2062 enum rtx_code comparison)
2064 /* The scratch reg is only available when this is invoked from within
2065 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2066 rtx scratch = NULL_RTX;
2068 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2069 comparison = GET_CODE (operands[0]);
2070 else
2071 scratch = operands[4];
2073 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2074 mode, false);
2076 /* Notice that this function is also invoked after reload by
2077 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2078 rtx op1 = operands[1];
2080 if (can_create_pseudo_p ())
2081 operands[1] = force_reg (mode, op1);
2082 /* When we are handling DImode comparisons, we want to keep constants so
2083 that we can optimize the component comparisons; however, memory loads
2084 are better issued as a whole so that they can be scheduled well.
2085 SImode equality comparisons allow I08 constants, but only when they
2086 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2087 into a register, that register might as well be r0, and we allow the
2088 constant. If it is already in a register, this is likely to be
2089 allocated to a different hard register, thus we load the constant into
2090 a register unless it is zero. */
2091 if (!REG_P (operands[2])
2092 && (!CONST_INT_P (operands[2])
2093 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2094 && ((comparison != EQ && comparison != NE)
2095 || (REG_P (op1) && REGNO (op1) != R0_REG)
2096 || !satisfies_constraint_I08 (operands[2])))))
2098 if (scratch && GET_MODE (scratch) == mode)
2100 emit_move_insn (scratch, operands[2]);
2101 operands[2] = scratch;
2103 else if (can_create_pseudo_p ())
2104 operands[2] = force_reg (mode, operands[2]);
2106 return comparison;
2109 void
2110 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2112 rtx (*branch_expander) (rtx) = gen_branch_true;
2113 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2114 switch (comparison)
2116 case NE: case LT: case LE: case LTU: case LEU:
2117 comparison = reverse_condition (comparison);
2118 branch_expander = gen_branch_false;
2119 default: ;
2121 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2122 gen_rtx_fmt_ee (comparison, SImode,
2123 operands[1], operands[2])));
2124 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2125 if (probability >= 0)
2126 add_int_reg_note (jump, REG_BR_PROB, probability);
2129 /* ??? How should we distribute probabilities when more than one branch
2130 is generated. So far we only have some ad-hoc observations:
2131 - If the operands are random, they are likely to differ in both parts.
2132 - If comparing items in a hash chain, the operands are random or equal;
2133 operation should be EQ or NE.
2134 - If items are searched in an ordered tree from the root, we can expect
2135 the highpart to be unequal about half of the time; operation should be
2136 an inequality comparison, operands non-constant, and overall probability
2137 about 50%. Likewise for quicksort.
2138 - Range checks will be often made against constants. Even if we assume for
2139 simplicity an even distribution of the non-constant operand over a
2140 sub-range here, the same probability could be generated with differently
2141 wide sub-ranges - as long as the ratio of the part of the subrange that
2142 is before the threshold to the part that comes after the threshold stays
2143 the same. Thus, we can't really tell anything here;
2144 assuming random distribution is at least simple.
2146 bool
2147 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2149 enum rtx_code msw_taken, msw_skip, lsw_taken;
2150 rtx_code_label *skip_label = NULL;
2151 rtx op1h, op1l, op2h, op2l;
2152 int num_branches;
2153 int prob, rev_prob;
2154 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2155 rtx scratch = operands[4];
2157 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2158 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2159 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2160 op1l = gen_lowpart (SImode, operands[1]);
2161 op2l = gen_lowpart (SImode, operands[2]);
2162 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2163 prob = split_branch_probability;
2164 rev_prob = REG_BR_PROB_BASE - prob;
2165 switch (comparison)
2167 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2168 That costs 1 cycle more when the first branch can be predicted taken,
2169 but saves us mispredicts because only one branch needs prediction.
2170 It also enables generating the cmpeqdi_t-1 pattern. */
2171 case EQ:
2172 if (TARGET_CMPEQDI_T)
2174 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2175 emit_jump_insn (gen_branch_true (operands[3]));
2176 return true;
2178 msw_skip = NE;
2179 lsw_taken = EQ;
2180 if (prob >= 0)
2182 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2183 msw_skip_prob = rev_prob;
2184 if (REG_BR_PROB_BASE <= 65535)
2185 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2186 else
2188 lsw_taken_prob
2189 = (prob
2190 ? (REG_BR_PROB_BASE
2191 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2192 / ((gcov_type) prob << 32)))
2193 : 0);
2196 break;
2197 case NE:
2198 if (TARGET_CMPEQDI_T)
2200 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2201 emit_jump_insn (gen_branch_false (operands[3]));
2202 return true;
2204 msw_taken = NE;
2205 msw_taken_prob = prob;
2206 lsw_taken = NE;
2207 lsw_taken_prob = 0;
2208 break;
2209 case GTU: case GT:
2210 msw_taken = comparison;
2211 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2212 break;
2213 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2214 msw_skip = swap_condition (msw_taken);
2215 lsw_taken = GTU;
2216 break;
2217 case GEU: case GE:
2218 if (op2l == CONST0_RTX (SImode))
2219 msw_taken = comparison;
2220 else
2222 msw_taken = comparison == GE ? GT : GTU;
2223 msw_skip = swap_condition (msw_taken);
2224 lsw_taken = GEU;
2226 break;
2227 case LTU: case LT:
2228 msw_taken = comparison;
2229 if (op2l == CONST0_RTX (SImode))
2230 break;
2231 msw_skip = swap_condition (msw_taken);
2232 lsw_taken = LTU;
2233 break;
2234 case LEU: case LE:
2235 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2236 msw_taken = comparison;
2237 else
2239 lsw_taken = LEU;
2240 if (comparison == LE)
2241 msw_taken = LT;
2242 else if (op2h != CONST0_RTX (SImode))
2243 msw_taken = LTU;
2244 else
2246 msw_skip = swap_condition (LTU);
2247 break;
2249 msw_skip = swap_condition (msw_taken);
2251 break;
2252 default: return false;
2254 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2255 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2256 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2257 if (comparison != EQ && comparison != NE && num_branches > 1)
2259 if (!CONSTANT_P (operands[2])
2260 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2261 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2263 msw_taken_prob = prob / 2U;
2264 msw_skip_prob
2265 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2266 lsw_taken_prob = prob;
2268 else
2270 msw_taken_prob = prob;
2271 msw_skip_prob = REG_BR_PROB_BASE;
2272 /* ??? If we have a constant op2h, should we use that when
2273 calculating lsw_taken_prob? */
2274 lsw_taken_prob = prob;
2277 operands[1] = op1h;
2278 operands[2] = op2h;
2279 operands[4] = NULL_RTX;
2280 if (reload_completed
2281 && ! arith_reg_or_0_operand (op2h, SImode)
2282 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2283 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2284 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2286 emit_move_insn (scratch, operands[2]);
2287 operands[2] = scratch;
2289 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2290 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2291 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2293 rtx taken_label = operands[3];
2295 /* Operands were possibly modified, but msw_skip doesn't expect this.
2296 Always use the original ones. */
2297 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2299 operands[1] = op1h;
2300 operands[2] = op2h;
2301 if (reload_completed
2302 && ! arith_reg_or_0_operand (op2h, SImode)
2303 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2305 emit_move_insn (scratch, operands[2]);
2306 operands[2] = scratch;
2310 operands[3] = skip_label = gen_label_rtx ();
2311 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2312 operands[3] = taken_label;
2314 operands[1] = op1l;
2315 operands[2] = op2l;
2316 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2318 if (reload_completed
2319 && ! arith_reg_or_0_operand (op2l, SImode)
2320 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2322 emit_move_insn (scratch, operands[2]);
2323 operands[2] = scratch;
2325 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2327 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2328 emit_label (skip_label);
2329 return true;
2332 /* Given an operand, return 1 if the evaluated operand plugged into an
2333 if_then_else will result in a branch_true, 0 if branch_false, or
2334 -1 if neither nor applies. The truth table goes like this:
2336 op | cmpval | code | result
2337 ---------+--------+---------+--------------------
2338 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2339 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2340 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2341 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2342 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2343 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2344 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2345 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2347 sh_eval_treg_value (rtx op)
2349 if (t_reg_operand (op, GET_MODE (op)))
2350 return 1;
2351 if (negt_reg_operand (op, GET_MODE (op)))
2352 return 0;
2354 rtx_code code = GET_CODE (op);
2355 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2356 return -1;
2358 int cmpop = code == EQ ? 1 : 0;
2359 int cmpval = INTVAL (XEXP (op, 1));
2360 if (cmpval != 0 && cmpval != 1)
2361 return -1;
2363 int t;
2364 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2365 t = 0;
2366 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2367 t = 1;
2368 else
2369 return -1;
2371 return t ^ (cmpval == cmpop);
2374 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2375 of floating-point comparisons. */
2376 static void
2377 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2379 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2380 && GET_CODE (insn) != PARALLEL)
2382 insn = gen_rtx_PARALLEL (VOIDmode,
2383 gen_rtvec (3, insn,
2384 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2385 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2387 emit_insn (insn);
2390 /* Prepare the operands for an scc instruction; make sure that the
2391 compare has been done and the result is in T_REG. */
2392 void
2393 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2395 rtx t_reg = get_t_reg_rtx ();
2396 enum rtx_code oldcode = code;
2397 machine_mode mode;
2399 /* First need a compare insn. */
2400 switch (code)
2402 case NE:
2403 /* It isn't possible to handle this case. */
2404 gcc_unreachable ();
2405 case LT:
2406 code = GT;
2407 break;
2408 case LE:
2409 code = GE;
2410 break;
2411 case LTU:
2412 code = GTU;
2413 break;
2414 case LEU:
2415 code = GEU;
2416 break;
2417 default:
2418 break;
2420 if (code != oldcode)
2421 std::swap (op0, op1);
2423 mode = GET_MODE (op0);
2424 if (mode == VOIDmode)
2425 mode = GET_MODE (op1);
2427 op0 = force_reg (mode, op0);
2428 if ((code != EQ && code != NE
2429 && (op1 != const0_rtx
2430 || code == GTU || code == GEU || code == LTU || code == LEU))
2431 || (mode == DImode && op1 != const0_rtx)
2432 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2433 op1 = force_reg (mode, op1);
2435 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2436 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2437 mode);
2441 sh_emit_cheap_store_flag (machine_mode mode, enum rtx_code code,
2442 rtx op0, rtx op1)
2444 rtx target = gen_reg_rtx (SImode);
2445 rtx tmp;
2447 gcc_assert (TARGET_SHMEDIA);
2448 switch (code)
2450 case EQ:
2451 case GT:
2452 case LT:
2453 case UNORDERED:
2454 case GTU:
2455 case LTU:
2456 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2457 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2458 code = NE;
2459 break;
2461 case NE:
2462 case GE:
2463 case LE:
2464 case ORDERED:
2465 case GEU:
2466 case LEU:
2467 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2468 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2469 code = EQ;
2470 break;
2472 case UNEQ:
2473 case UNGE:
2474 case UNGT:
2475 case UNLE:
2476 case UNLT:
2477 case LTGT:
2478 return NULL_RTX;
2480 default:
2481 gcc_unreachable ();
2484 if (mode == DImode)
2486 rtx t2 = gen_reg_rtx (DImode);
2487 emit_insn (gen_extendsidi2 (t2, target));
2488 target = t2;
2491 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2494 /* Called from the md file, set up the operands of a compare instruction. */
2495 void
2496 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2498 enum rtx_code code = GET_CODE (operands[0]);
2499 enum rtx_code branch_code;
2500 rtx op0 = operands[1];
2501 rtx op1 = operands[2];
2502 rtx insn;
2503 bool need_ccmpeq = false;
2505 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2507 op0 = force_reg (mode, op0);
2508 op1 = force_reg (mode, op1);
2510 else
2512 if (code != EQ || mode == DImode)
2514 /* Force args into regs, since we can't use constants here. */
2515 op0 = force_reg (mode, op0);
2516 if (op1 != const0_rtx || code == GTU || code == GEU)
2517 op1 = force_reg (mode, op1);
2521 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2523 if (code == LT
2524 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2525 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2527 std::swap (op0, op1);
2528 code = swap_condition (code);
2531 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2532 if (code == GE)
2534 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2535 need_ccmpeq = true;
2536 code = GT;
2539 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2540 to EQ/GT respectively. */
2541 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2544 switch (code)
2546 case EQ:
2547 case GT:
2548 case GE:
2549 case GTU:
2550 case GEU:
2551 branch_code = code;
2552 break;
2553 case NE:
2554 case LT:
2555 case LE:
2556 case LTU:
2557 case LEU:
2558 branch_code = reverse_condition (code);
2559 break;
2560 default:
2561 gcc_unreachable ();
2564 insn = gen_rtx_SET (get_t_reg_rtx (),
2565 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2567 sh_emit_set_t_insn (insn, mode);
2568 if (need_ccmpeq)
2569 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2571 if (branch_code == code)
2572 emit_jump_insn (gen_branch_true (operands[3]));
2573 else
2574 emit_jump_insn (gen_branch_false (operands[3]));
2577 void
2578 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2580 enum rtx_code code = GET_CODE (operands[1]);
2581 rtx op0 = operands[2];
2582 rtx op1 = operands[3];
2583 rtx_code_label *lab = NULL;
2584 bool invert = false;
2586 op0 = force_reg (mode, op0);
2587 if ((code != EQ && code != NE
2588 && (op1 != const0_rtx
2589 || code == GTU || code == GEU || code == LTU || code == LEU))
2590 || (mode == DImode && op1 != const0_rtx)
2591 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2592 op1 = force_reg (mode, op1);
2594 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2596 if (code == LT || code == LE)
2598 std::swap (op0, op1);
2599 code = swap_condition (code);
2601 if (code == GE)
2603 if (TARGET_IEEE)
2605 lab = gen_label_rtx ();
2606 sh_emit_scc_to_t (EQ, op0, op1);
2607 emit_jump_insn (gen_branch_true (lab));
2608 code = GT;
2610 else
2612 code = LT;
2613 invert = true;
2618 if (code == NE)
2620 code = EQ;
2621 invert = true;
2624 sh_emit_scc_to_t (code, op0, op1);
2625 if (lab)
2626 emit_label (lab);
2627 if (invert)
2628 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2629 else
2630 emit_move_insn (operands[0], get_t_reg_rtx ());
2633 /* Functions to output assembly code. */
2635 /* Return a sequence of instructions to perform DI or DF move.
2637 Since the SH cannot move a DI or DF in one instruction, we have
2638 to take care when we see overlapping source and dest registers. */
2639 const char *
2640 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2641 machine_mode mode)
2643 rtx dst = operands[0];
2644 rtx src = operands[1];
2646 if (MEM_P (dst)
2647 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2648 return "mov.l %T1,%0" "\n"
2649 " mov.l %1,%0";
2651 if (register_operand (dst, mode)
2652 && register_operand (src, mode))
2654 if (REGNO (src) == MACH_REG)
2655 return "sts mach,%S0" "\n"
2656 " sts macl,%R0";
2658 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2659 when mov.d r1,r0 do r1->r0 then r2->r1. */
2660 if (REGNO (src) + 1 == REGNO (dst))
2661 return "mov %T1,%T0" "\n"
2662 " mov %1,%0";
2663 else
2664 return "mov %1,%0" "\n"
2665 " mov %T1,%T0";
2667 else if (CONST_INT_P (src))
2669 if (INTVAL (src) < 0)
2670 output_asm_insn ("mov #-1,%S0", operands);
2671 else
2672 output_asm_insn ("mov #0,%S0", operands);
2674 return "mov %1,%R0";
2676 else if (MEM_P (src))
2678 int ptrreg = -1;
2679 int dreg = REGNO (dst);
2680 rtx inside = XEXP (src, 0);
2682 switch (GET_CODE (inside))
2684 case REG:
2685 ptrreg = REGNO (inside);
2686 break;
2688 case SUBREG:
2689 ptrreg = subreg_regno (inside);
2690 break;
2692 case PLUS:
2693 ptrreg = REGNO (XEXP (inside, 0));
2694 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2695 an offsettable address. Unfortunately, offsettable addresses use
2696 QImode to check the offset, and a QImode offsettable address
2697 requires r0 for the other operand, which is not currently
2698 supported, so we can't use the 'o' constraint.
2699 Thus we must check for and handle r0+REG addresses here.
2700 We punt for now, since this is likely very rare. */
2701 gcc_assert (!REG_P (XEXP (inside, 1)));
2702 break;
2704 case LABEL_REF:
2705 return "mov.l %1,%0" "\n"
2706 " mov.l %1+4,%T0";
2707 case POST_INC:
2708 return "mov.l %1,%0" "\n"
2709 " mov.l %1,%T0";
2710 default:
2711 gcc_unreachable ();
2714 /* Work out the safe way to copy. Copy into the second half first. */
2715 if (dreg == ptrreg)
2716 return "mov.l %T1,%T0" "\n"
2717 " mov.l %1,%0";
2720 return "mov.l %1,%0" "\n"
2721 " mov.l %T1,%T0";
2724 /* Print an instruction which would have gone into a delay slot after
2725 another instruction, but couldn't because the other instruction expanded
2726 into a sequence where putting the slot insn at the end wouldn't work. */
2727 static void
2728 print_slot (rtx_sequence *seq)
2730 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2732 seq->insn (1)->set_deleted ();
2735 const char *
2736 output_far_jump (rtx_insn *insn, rtx op)
2738 struct { rtx lab, reg, op; } this_jmp;
2739 rtx_code_label *braf_base_lab = NULL;
2740 const char *jump;
2741 int far;
2742 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2743 rtx_insn *prev;
2745 this_jmp.lab = gen_label_rtx ();
2747 if (TARGET_SH2
2748 && offset >= -32764
2749 && offset - get_attr_length (insn) <= 32766
2750 && ! CROSSING_JUMP_P (insn))
2752 far = 0;
2753 jump = "mov.w %O0,%1" "\n"
2754 " braf %1";
2756 else
2758 far = 1;
2759 if (flag_pic)
2761 if (TARGET_SH2)
2762 jump = "mov.l %O0,%1" "\n"
2763 " braf %1";
2764 else
2765 jump = "mov.l r0,@-r15" "\n"
2766 " mova %O0,r0" "\n"
2767 " mov.l @r0,%1" "\n"
2768 " add r0,%1" "\n"
2769 " mov.l @r15+,r0" "\n"
2770 " jmp @%1";
2772 else
2773 jump = "mov.l %O0,%1" "\n"
2774 " jmp @%1";
2776 /* If we have a scratch register available, use it. */
2777 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2778 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2780 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2781 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2782 jump = "mov.l r1,@-r15" "\n"
2783 " mova %O0,r0" "\n"
2784 " mov.l @r0,r1" "\n"
2785 " add r1,r0" "\n"
2786 " mov.l @r15+,r1" "\n"
2787 " jmp @%1";
2788 output_asm_insn (jump, &this_jmp.lab);
2789 if (dbr_sequence_length ())
2790 print_slot (final_sequence);
2791 else
2792 output_asm_insn ("nop", 0);
2794 else
2796 /* Output the delay slot insn first if any. */
2797 if (dbr_sequence_length ())
2798 print_slot (final_sequence);
2800 this_jmp.reg = gen_rtx_REG (SImode, 13);
2801 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2802 Fortunately, MACL is fixed and call-clobbered, and we never
2803 need its value across jumps, so save r13 in it instead of in
2804 the stack. */
2805 if (TARGET_SH5)
2806 output_asm_insn ("lds r13,macl", 0);
2807 else
2808 output_asm_insn ("mov.l r13,@-r15", 0);
2809 output_asm_insn (jump, &this_jmp.lab);
2810 if (TARGET_SH5)
2811 output_asm_insn ("sts macl,r13", 0);
2812 else
2813 output_asm_insn ("mov.l @r15+,r13", 0);
2815 if (far && flag_pic && TARGET_SH2)
2817 braf_base_lab = gen_label_rtx ();
2818 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2819 CODE_LABEL_NUMBER (braf_base_lab));
2821 if (far)
2822 output_asm_insn (".align 2", 0);
2823 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2824 this_jmp.op = op;
2825 if (far && flag_pic)
2827 if (TARGET_SH2)
2828 this_jmp.lab = braf_base_lab;
2829 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2831 else
2832 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2833 return "";
2836 /* Local label counter, used for constants in the pool and inside
2837 pattern branches. */
2838 static int lf = 100;
2840 /* Output code for ordinary branches. */
2841 const char *
2842 output_branch (int logic, rtx_insn *insn, rtx *operands)
2844 switch (get_attr_length (insn))
2846 case 6:
2847 /* This can happen if filling the delay slot has caused a forward
2848 branch to exceed its range (we could reverse it, but only
2849 when we know we won't overextend other branches; this should
2850 best be handled by relaxation).
2851 It can also happen when other condbranches hoist delay slot insn
2852 from their destination, thus leading to code size increase.
2853 But the branch will still be in the range -4092..+4098 bytes. */
2854 if (! TARGET_RELAX)
2856 int label = lf++;
2857 /* The call to print_slot will clobber the operands. */
2858 rtx op0 = operands[0];
2860 /* If the instruction in the delay slot is annulled (true), then
2861 there is no delay slot where we can put it now. The only safe
2862 place for it is after the label. final will do that by default. */
2864 if (final_sequence
2865 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2866 && get_attr_length (final_sequence->insn (1)))
2868 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2869 ASSEMBLER_DIALECT ? "/" : ".", label);
2870 print_slot (final_sequence);
2872 else
2873 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2875 output_asm_insn ("bra\t%l0", &op0);
2876 fprintf (asm_out_file, "\tnop\n");
2877 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2879 return "";
2881 /* When relaxing, handle this like a short branch. The linker
2882 will fix it up if it still doesn't fit after relaxation. */
2883 case 2:
2884 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2886 /* These are for SH2e, in which we have to account for the
2887 extra nop because of the hardware bug in annulled branches. */
2888 case 8:
2889 if (! TARGET_RELAX)
2891 int label = lf++;
2893 gcc_assert (!final_sequence
2894 || !(INSN_ANNULLED_BRANCH_P
2895 (XVECEXP (final_sequence, 0, 0))));
2896 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2897 logic ? "f" : "t",
2898 ASSEMBLER_DIALECT ? "/" : ".", label);
2899 fprintf (asm_out_file, "\tnop\n");
2900 output_asm_insn ("bra\t%l0", operands);
2901 fprintf (asm_out_file, "\tnop\n");
2902 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2904 return "";
2906 /* When relaxing, fall through. */
2907 case 4:
2909 char buffer[10];
2911 sprintf (buffer, "b%s%ss\t%%l0",
2912 logic ? "t" : "f",
2913 ASSEMBLER_DIALECT ? "/" : ".");
2914 output_asm_insn (buffer, &operands[0]);
2915 return "nop";
2918 default:
2919 /* There should be no longer branches now - that would
2920 indicate that something has destroyed the branches set
2921 up in machine_dependent_reorg. */
2922 gcc_unreachable ();
2926 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2927 fill in operands 9 as a label to the successor insn.
2928 We try to use jump threading where possible.
2929 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2930 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2931 follow jmp and bt, if the address is in range. */
2932 const char *
2933 output_branchy_insn (enum rtx_code code, const char *templ,
2934 rtx_insn *insn, rtx *operands)
2936 rtx_insn *next_insn = NEXT_INSN (insn);
2938 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2940 rtx src = SET_SRC (PATTERN (next_insn));
2941 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2943 /* Following branch not taken */
2944 rtx_code_label *lab = gen_label_rtx ();
2945 emit_label_after (lab, next_insn);
2946 INSN_ADDRESSES_NEW (lab,
2947 INSN_ADDRESSES (INSN_UID (next_insn))
2948 + get_attr_length (next_insn));
2949 operands[9] = lab;
2950 return templ;
2952 else
2954 int offset = (branch_dest (next_insn)
2955 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2956 if (offset >= -252 && offset <= 258)
2958 if (GET_CODE (src) == IF_THEN_ELSE)
2959 /* branch_true */
2960 src = XEXP (src, 1);
2961 operands[9] = src;
2962 return templ;
2966 rtx_code_label *lab = gen_label_rtx ();
2967 emit_label_after (lab, insn);
2968 INSN_ADDRESSES_NEW (lab,
2969 INSN_ADDRESSES (INSN_UID (insn))
2970 + get_attr_length (insn));
2971 operands[9] = lab;
2972 return templ;
2975 const char *
2976 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2978 return output_branchy_insn (NE, "bt %l9" "\n"
2979 " fcmp/eq %1,%0",
2980 insn, operands);
2983 /* Output the start of the assembler file. */
2984 static void
2985 sh_file_start (void)
2987 default_file_start ();
2989 if (TARGET_ELF)
2990 /* We need to show the text section with the proper
2991 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2992 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2993 will complain. We can teach GAS specifically about the
2994 default attributes for our choice of text section, but
2995 then we would have to change GAS again if/when we change
2996 the text section name. */
2997 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2998 else
2999 /* Switch to the data section so that the coffsem symbol
3000 isn't in the text section. */
3001 switch_to_section (data_section);
3003 if (TARGET_LITTLE_ENDIAN)
3004 fputs ("\t.little\n", asm_out_file);
3006 if (!TARGET_ELF)
3008 if (TARGET_SHCOMPACT)
3009 fputs ("\t.mode\tSHcompact\n", asm_out_file);
3010 else if (TARGET_SHMEDIA)
3011 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
3012 TARGET_SHMEDIA64 ? 64 : 32);
3016 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
3017 static bool
3018 unspec_caller_rtx_p (rtx pat)
3020 rtx base, offset;
3021 int i;
3023 split_const (pat, &base, &offset);
3024 if (GET_CODE (base) == UNSPEC)
3026 if (XINT (base, 1) == UNSPEC_CALLER)
3027 return true;
3028 for (i = 0; i < XVECLEN (base, 0); i++)
3029 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
3030 return true;
3032 return false;
3035 /* Indicate that INSN cannot be duplicated. This is true for insn
3036 that generates a unique label. */
3037 static bool
3038 sh_cannot_copy_insn_p (rtx_insn *insn)
3040 rtx pat;
3042 if (!reload_completed || !flag_pic)
3043 return false;
3045 if (!NONJUMP_INSN_P (insn))
3046 return false;
3047 if (asm_noperands (insn) >= 0)
3048 return false;
3050 pat = PATTERN (insn);
3051 if (GET_CODE (pat) != SET)
3052 return false;
3053 pat = SET_SRC (pat);
3055 if (unspec_caller_rtx_p (pat))
3056 return true;
3058 return false;
3061 /* Number of instructions used to make an arithmetic right shift by N. */
3062 static const char ashiftrt_insns[] =
3063 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3065 /* Description of a logical left or right shift, when expanded to a sequence
3066 of 1/2/8/16 shifts.
3067 Notice that one bit right shifts clobber the T bit. One bit left shifts
3068 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3069 enum
3071 ASHL_CLOBBERS_T = 1 << 0,
3072 LSHR_CLOBBERS_T = 1 << 1
3075 struct ashl_lshr_sequence
3077 char insn_count;
3078 signed char amount[6];
3079 char clobbers_t;
3082 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3084 { 0, { 0 }, 0 }, // 0
3085 { 1, { 1 }, LSHR_CLOBBERS_T },
3086 { 1, { 2 }, 0 },
3087 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3088 { 2, { 2, 2 }, 0 }, // 4
3089 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3090 { 3, { 2, 2, 2 }, 0 },
3091 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3092 { 1, { 8 }, 0 }, // 8
3093 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3094 { 2, { 8, 2 }, 0 },
3095 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3096 { 3, { 8, 2, 2 }, 0 }, // 12
3097 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3098 { 3, { 8, -2, 8 }, 0 },
3099 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3100 { 1, { 16 }, 0 }, // 16
3101 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3102 { 2, { 16, 2 }, 0 },
3103 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3104 { 3, { 16, 2, 2 }, 0 }, // 20
3105 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3106 { 3, { 16, -2, 8 }, 0 },
3107 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3108 { 2, { 16, 8 }, 0 }, // 24
3109 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3110 { 3, { 16, 8, 2 }, 0 },
3111 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3112 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3113 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3114 { 3, { 16, -2, 16 }, 0 },
3116 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3117 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3118 However, the shift-and combiner code needs this entry here to be in
3119 terms of real shift insns. */
3120 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3123 /* Individual shift amounts for shift amounts < 16, up to three highmost
3124 bits might be clobbered. This is typically used when combined with some
3125 kind of sign or zero extension. */
3126 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3128 { 0, { 0 }, 0 }, // 0
3129 { 1, { 1 }, LSHR_CLOBBERS_T },
3130 { 1, { 2 }, 0 },
3131 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3132 { 2, { 2, 2 }, 0 }, // 4
3133 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3134 { 2, { 8, -2 }, 0 },
3135 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3136 { 1, { 8 }, 0 }, // 8
3137 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3138 { 2, { 8, 2 }, 0 },
3139 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3140 { 3, { 8, 2, 2 }, 0 }, // 12
3141 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3142 { 2, { 16, -2 }, 0 },
3143 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3144 { 1, { 16 }, 0 }, // 16
3145 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3146 { 2, { 16, 2 }, 0 },
3147 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3148 { 3, { 16, 2, 2 }, 0 }, // 20
3149 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3150 { 3, { 16, -2, 8 }, 0 },
3151 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3152 { 2, { 16, 8 }, 0 }, // 24
3153 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3154 { 3, { 16, 8, 2 }, 0 },
3155 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3156 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3157 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3158 { 3, { 16, -2, 16 }, 0 },
3159 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3162 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3163 will clobber the T bit. */
3164 bool
3165 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3167 gcc_assert (CONST_INT_P (shift_amount));
3169 const int shift_amount_i = INTVAL (shift_amount) & 31;
3171 /* Special case for shift count of 31: use and-rotl sequence. */
3172 if (shift_amount_i == 31)
3173 return true;
3175 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3176 & ASHL_CLOBBERS_T) != 0;
3179 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3180 instructions will clobber the T bit. */
3181 bool
3182 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3184 gcc_assert (CONST_INT_P (shift_amount));
3186 const int shift_amount_i = INTVAL (shift_amount) & 31;
3188 /* Special case for shift count of 31: use shll-movt sequence. */
3189 if (shift_amount_i == 31)
3190 return true;
3192 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3193 & LSHR_CLOBBERS_T) != 0;
3196 /* Return true if it is potentially beneficial to use a dynamic shift
3197 instruction (shad / shar) instead of a combination of 1/2/8/16
3198 shift instructions for the specified shift count.
3199 If dynamic shifts are not available, always return false. */
3200 bool
3201 sh_dynamicalize_shift_p (rtx count)
3203 gcc_assert (CONST_INT_P (count));
3205 const int shift_amount_i = INTVAL (count) & 31;
3206 int insn_count;
3208 /* For left and right shifts, there are shorter 2 insn sequences for
3209 shift amounts of 31. */
3210 if (shift_amount_i == 31)
3211 insn_count = 2;
3212 else
3213 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3215 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3218 /* Assuming we have a value that has been sign-extended by at least one bit,
3219 can we use the ext_shift_amounts with the last shift turned to an
3220 arithmetic shift to shift it by N without data loss, and quicker than by
3221 other means? */
3222 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3224 /* Return the cost of a shift. */
3225 static inline int
3226 shiftcosts (rtx x)
3228 int value;
3230 if (TARGET_SHMEDIA)
3231 return 1;
3233 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3235 if (GET_MODE (x) == DImode
3236 && CONST_INT_P (XEXP (x, 1))
3237 && INTVAL (XEXP (x, 1)) == 1)
3238 return 2;
3240 /* Everything else is invalid, because there is no pattern for it. */
3241 return -1;
3243 /* If shift by a non constant, then this will be expensive. */
3244 if (!CONST_INT_P (XEXP (x, 1)))
3245 return SH_DYNAMIC_SHIFT_COST;
3247 /* Otherwise, return the true cost in instructions. Cope with out of range
3248 shift counts more or less arbitrarily. */
3249 value = INTVAL (XEXP (x, 1)) & 31;
3251 if (GET_CODE (x) == ASHIFTRT)
3253 int cost = ashiftrt_insns[value];
3254 /* If dynamic shifts are available and profitable in this case, then we
3255 put the constant in a reg and use shad. */
3256 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3257 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3258 return cost;
3260 else
3261 return ashl_lshr_seq[value].insn_count;
3264 /* Return the cost of an AND/XOR/IOR operation. */
3265 static inline int
3266 and_xor_ior_costs (rtx x, int code)
3268 /* On SH1-4 we have only max. SImode operations.
3269 Double the cost for modes > SImode. */
3270 const int cost_scale = !TARGET_SHMEDIA
3271 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3272 ? 2 : 1;
3274 /* A logical operation with two registers is a single cycle
3275 instruction. */
3276 if (!CONST_INT_P (XEXP (x, 1)))
3277 return 1 * cost_scale;
3279 int i = INTVAL (XEXP (x, 1));
3281 if (TARGET_SHMEDIA)
3283 if (satisfies_constraint_I10 (XEXP (x, 1))
3284 || satisfies_constraint_J16 (XEXP (x, 1)))
3285 return 1;
3286 else
3287 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3290 /* These constants are single cycle extu.[bw] instructions. */
3291 if ((i == 0xff || i == 0xffff) && code == AND)
3292 return 1 * cost_scale;
3293 /* Constants that can be used in an instruction as an immediate are
3294 a single cycle, but this requires r0, so make it a little more
3295 expensive. */
3296 if (CONST_OK_FOR_K08 (i))
3297 return 2 * cost_scale;
3298 /* Constants that can be loaded with a mov immediate need one more cycle.
3299 This case is probably unnecessary. */
3300 if (CONST_OK_FOR_I08 (i))
3301 return 2 * cost_scale;
3302 /* Any other constant requires an additional 2 cycle pc-relative load.
3303 This case is probably unnecessary. */
3304 return 3 * cost_scale;
3307 /* Return the cost of an addition or a subtraction. */
3308 static inline int
3309 addsubcosts (rtx x)
3311 if (GET_MODE (x) == SImode)
3313 /* The addc or subc patterns will eventually become one or two
3314 instructions. Below are some costs for some of the patterns
3315 which combine would reject because the costs of the individual
3316 insns in the patterns are lower.
3318 FIXME: It would be much easier if we had something like insn cost
3319 attributes and the cost calculation machinery used those attributes
3320 in the first place. This would eliminate redundant recog-like C
3321 code to calculate costs of complex patterns. */
3322 rtx op0 = XEXP (x, 0);
3323 rtx op1 = XEXP (x, 1);
3325 if (GET_CODE (x) == PLUS)
3327 if (GET_CODE (op0) == AND
3328 && XEXP (op0, 1) == const1_rtx
3329 && (GET_CODE (op1) == PLUS
3330 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3331 return 1;
3333 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3334 && GET_CODE (op1) == LSHIFTRT
3335 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3336 return 1;
3338 /* Let's assume that adding the result of an insns that stores into
3339 the T bit is cheap. */
3340 if (treg_set_expr (op1, SImode))
3341 return 1;
3342 if (treg_set_expr (op0, SImode))
3343 return 1;
3346 /* On SH1-4 we have only max. SImode operations.
3347 Double the cost for modes > SImode. */
3348 const int cost_scale = !TARGET_SHMEDIA
3349 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3350 ? 2 : 1;
3352 /* Adding a register is a single cycle insn. */
3353 if (REG_P (XEXP (x, 1))
3354 || GET_CODE (XEXP (x, 1)) == SUBREG)
3355 return 1 * cost_scale;
3357 /* Likewise for small constants. */
3358 if (CONST_INT_P (XEXP (x, 1))
3359 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3360 return 1 * cost_scale;
3362 if (TARGET_SHMEDIA)
3363 switch (GET_CODE (XEXP (x, 1)))
3365 case CONST:
3366 case LABEL_REF:
3367 case SYMBOL_REF:
3368 return TARGET_SHMEDIA64 ? 5 : 3;
3370 case CONST_INT:
3371 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3372 return 2;
3373 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3374 return 3;
3375 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3376 return 4;
3378 /* Fall through. */
3379 default:
3380 return 5;
3383 /* Any other constant requires a 2 cycle pc-relative load plus an
3384 addition. */
3385 return 3 * cost_scale;
3388 /* Return the cost of a multiply. */
3389 static inline int
3390 multcosts (rtx x ATTRIBUTE_UNUSED)
3392 if (sh_multcost >= 0)
3393 return sh_multcost;
3394 if (TARGET_SHMEDIA)
3395 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3396 accept constants. Ideally, we would use a cost of one or two and
3397 add the cost of the operand, but disregard the latter when inside loops
3398 and loop invariant code motion is still to follow.
3399 Using a multiply first and splitting it later if it's a loss
3400 doesn't work because of different sign / zero extension semantics
3401 of multiplies vs. shifts. */
3402 return optimize_size ? 2 : 3;
3404 if (TARGET_SH2)
3406 /* We have a mul insn, so we can never take more than the mul and the
3407 read of the mac reg, but count more because of the latency and extra
3408 reg usage. */
3409 if (optimize_size)
3410 return 2;
3411 return 3;
3414 /* If we're aiming at small code, then just count the number of
3415 insns in a multiply call sequence. */
3416 if (optimize_size)
3417 return 5;
3419 /* Otherwise count all the insns in the routine we'd be calling too. */
3420 return 20;
3423 /* Compute a (partial) cost for rtx X. Return true if the complete
3424 cost has been computed, and false if subexpressions should be
3425 scanned. In either case, *TOTAL contains the cost result. */
3426 static bool
3427 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3428 int *total, bool speed ATTRIBUTE_UNUSED)
3430 switch (code)
3432 /* The lower-subreg pass decides whether to split multi-word regs
3433 into individual regs by looking at the cost for a SET of certain
3434 modes with the following patterns:
3435 (set (reg) (reg))
3436 (set (reg) (const_int 0))
3437 On machines that support vector-move operations a multi-word move
3438 is the same cost as individual reg move. On SH there is no
3439 vector-move, so we have to provide the correct cost in the number
3440 of move insns to load/store the reg of the mode in question. */
3441 case SET:
3442 if (register_operand (SET_DEST (x), VOIDmode)
3443 && (register_operand (SET_SRC (x), VOIDmode)
3444 || satisfies_constraint_Z (SET_SRC (x))))
3446 const machine_mode mode = GET_MODE (SET_DEST (x));
3447 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3448 / mov_insn_size (mode, TARGET_SH2A));
3449 return true;
3451 return false;
3453 /* The cost of a mem access is mainly the cost of the address mode. */
3454 case MEM:
3455 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3456 true);
3457 return true;
3459 case IF_THEN_ELSE:
3460 /* This case is required for the if_then_else negc pattern. */
3461 if (treg_set_expr (XEXP (x, 0), SImode))
3463 *total = COSTS_N_INSNS (1);
3464 return true;
3466 else
3467 return false;
3469 /* Zero extracts of single bits are usually combine patterns for the
3470 tst insns. */
3471 case ZERO_EXTRACT:
3472 if (GET_CODE (XEXP (x, 0)) == XOR
3473 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3474 && XEXP (x, 1) == const1_rtx
3475 && CONST_INT_P (XEXP (x, 2))
3476 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3477 /* Check that the xor constaint overlaps with the extracted bit. */
3478 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3480 *total = 1; //COSTS_N_INSNS (1);
3481 return true;
3483 return false;
3485 /* The cost of a sign or zero extend depends on whether the source is a
3486 reg or a mem. In case of a mem take the address into acount. */
3487 case SIGN_EXTEND:
3488 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3490 *total = COSTS_N_INSNS (1);
3491 return true;
3493 if (MEM_P (XEXP (x, 0)))
3495 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3496 GET_MODE (XEXP (x, 0)),
3497 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3498 return true;
3500 return false;
3502 case ZERO_EXTEND:
3503 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3505 *total = COSTS_N_INSNS (1);
3506 return true;
3508 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3509 && (GET_MODE (XEXP (x, 0)) == QImode
3510 || GET_MODE (XEXP (x, 0)) == HImode))
3512 /* Handle SH2A's movu.b and movu.w insn. */
3513 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3514 GET_MODE (XEXP (x, 0)),
3515 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3516 return true;
3518 return false;
3520 /* mems for SFmode and DFmode can be inside a parallel due to
3521 the way the fpscr is handled. */
3522 case PARALLEL:
3523 for (int i = 0; i < XVECLEN (x, 0); i++)
3525 rtx xx = XVECEXP (x, 0, i);
3526 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3528 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3529 GET_MODE (XEXP (xx, 0)),
3530 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3531 return true;
3533 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3535 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3536 GET_MODE (XEXP (xx, 1)),
3537 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3538 return true;
3542 if (sh_1el_vec (x, VOIDmode))
3543 *total = outer_code != SET;
3544 else if (sh_rep_vec (x, VOIDmode))
3545 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3546 + (outer_code != SET));
3547 else
3548 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3549 return true;
3551 case CONST_INT:
3552 if (TARGET_SHMEDIA)
3554 if (INTVAL (x) == 0)
3555 *total = 0;
3556 else if (outer_code == AND && and_operand ((x), DImode))
3557 *total = 0;
3558 else if ((outer_code == IOR || outer_code == XOR
3559 || outer_code == PLUS)
3560 && CONST_OK_FOR_I10 (INTVAL (x)))
3561 *total = 0;
3562 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3563 *total = COSTS_N_INSNS (outer_code != SET);
3564 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3565 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3566 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3567 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3568 else
3569 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3570 return true;
3572 if (CONST_OK_FOR_I08 (INTVAL (x)))
3573 *total = 0;
3574 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3575 && CONST_OK_FOR_K08 (INTVAL (x)))
3576 *total = 1;
3577 /* prepare_cmp_insn will force costly constants int registers before
3578 the cbranch[sd]i4 patterns can see them, so preserve potentially
3579 interesting ones not covered by I08 above. */
3580 else if (outer_code == COMPARE
3581 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3582 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3583 || INTVAL (x) == 0x7fffffff
3584 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3585 *total = 1;
3586 else
3587 *total = 8;
3588 return true;
3590 case EQ:
3591 /* An and with a constant compared against zero is
3592 most likely going to be a TST #imm, R0 instruction.
3593 Notice that this does not catch the zero_extract variants from
3594 the md file. */
3595 if (XEXP (x, 1) == const0_rtx
3596 && (GET_CODE (XEXP (x, 0)) == AND
3597 || (SUBREG_P (XEXP (x, 0))
3598 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND)))
3600 *total = 1;
3601 return true;
3604 else if (XEXP (x, 1) == const0_rtx
3605 && GET_CODE (XEXP (x, 0)) == AND
3606 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3607 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3608 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3609 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3611 *total = 1;
3612 return true;
3614 else
3615 return false;
3617 case SMIN:
3618 case SMAX:
3619 /* This is most likely a clips.b or clips.w insn that is being made up
3620 by combine. */
3621 if (TARGET_SH2A
3622 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3623 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3624 && REG_P (XEXP (XEXP (x, 0), 0))
3625 && CONST_INT_P (XEXP (x, 1)))
3627 *total = COSTS_N_INSNS (1);
3628 return true;
3630 else
3631 return false;
3633 case CONST:
3634 case LABEL_REF:
3635 case SYMBOL_REF:
3636 if (TARGET_SHMEDIA64)
3637 *total = COSTS_N_INSNS (4);
3638 else if (TARGET_SHMEDIA32)
3639 *total = COSTS_N_INSNS (2);
3640 else
3641 *total = 5;
3642 return true;
3644 case CONST_DOUBLE:
3645 if (TARGET_SHMEDIA)
3646 *total = COSTS_N_INSNS (4);
3647 /* prepare_cmp_insn will force costly constants int registers before
3648 the cbranchdi4 pattern can see them, so preserve potentially
3649 interesting ones. */
3650 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3651 *total = 1;
3652 else
3653 *total = 10;
3654 return true;
3656 case CONST_VECTOR:
3657 /* FIXME: This looks broken. Only the last statement has any effect.
3658 Probably this could be folded with the PARALLEL case? */
3659 if (x == CONST0_RTX (GET_MODE (x)))
3660 *total = 0;
3661 else if (sh_1el_vec (x, VOIDmode))
3662 *total = outer_code != SET;
3663 if (sh_rep_vec (x, VOIDmode))
3664 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3665 + (outer_code != SET));
3666 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3667 return true;
3669 case PLUS:
3670 case MINUS:
3671 *total = COSTS_N_INSNS (addsubcosts (x));
3672 return true;
3674 case AND:
3675 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3676 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3678 *total = COSTS_N_INSNS (1);
3679 return true;
3681 /* Fall through. */
3683 case XOR:
3684 case IOR:
3685 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3686 return true;
3688 case MULT:
3689 *total = COSTS_N_INSNS (multcosts (x));
3690 return true;
3692 case LT:
3693 case GE:
3694 /* div0s sign comparison. */
3695 if (GET_CODE (XEXP (x, 0)) == XOR
3696 && REG_P ((XEXP (XEXP (x, 0), 0)))
3697 && REG_P ((XEXP (XEXP (x, 0), 1)))
3698 && satisfies_constraint_Z (XEXP (x, 1)))
3700 *total = COSTS_N_INSNS (1);
3701 return true;
3703 else
3704 return false;
3706 case LSHIFTRT:
3707 /* div0s sign comparison. */
3708 if (GET_CODE (XEXP (x, 0)) == XOR
3709 && REG_P ((XEXP (XEXP (x, 0), 0)))
3710 && REG_P ((XEXP (XEXP (x, 0), 1)))
3711 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3713 *total = COSTS_N_INSNS (1);
3714 return true;
3716 /* Fall through to shiftcosts. */
3717 case ASHIFT:
3718 case ASHIFTRT:
3720 int cost = shiftcosts (x);
3721 if (cost < 0)
3722 return false;
3723 *total = COSTS_N_INSNS (cost);
3724 return true;
3727 case DIV:
3728 case UDIV:
3729 case MOD:
3730 case UMOD:
3731 *total = COSTS_N_INSNS (20);
3732 return true;
3734 case FLOAT:
3735 case FIX:
3736 *total = 100;
3737 return true;
3739 default:
3740 return false;
3744 /* Determine the size of the fundamental move insn that will be used
3745 for the specified mode. */
3746 static inline int
3747 mov_insn_size (machine_mode mode, bool consider_sh2a)
3749 const int mode_sz = GET_MODE_SIZE (mode);
3751 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3752 || (TARGET_FMOVD && mode == DFmode))
3753 return mode_sz;
3754 else
3756 /* The max. available mode for actual move insns is SImode.
3757 Larger accesses will be split into multiple loads/stores. */
3758 const int max_mov_sz = GET_MODE_SIZE (SImode);
3759 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3763 /* Determine the maximum possible displacement for a move insn for the
3764 specified mode. */
3766 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3768 /* The 4 byte displacement move insns are the same as the 2 byte
3769 versions but take a 12 bit displacement. All we need to do is to
3770 scale the max. displacement value accordingly. */
3771 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3773 /* SH2A supports FPU move insns with 12 bit displacements.
3774 Other variants to do not support any kind of displacements for
3775 FPU move insns. */
3776 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3777 return 0;
3778 else
3780 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3781 const int mode_sz = GET_MODE_SIZE (mode);
3782 int r = 15 * mov_insn_sz * disp_scale;
3784 /* If the mov insn will be split into multiple loads/stores, the
3785 maximum possible displacement is a bit smaller. */
3786 if (mode_sz > mov_insn_sz)
3787 r -= mode_sz - mov_insn_sz;
3788 return r;
3792 /* Determine the alignment mask for a move insn of the
3793 specified mode. */
3794 static inline int
3795 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3797 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3798 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3801 /* Return the displacement value of a displacement address. */
3802 HOST_WIDE_INT
3803 sh_disp_addr_displacement (rtx x)
3805 gcc_assert (satisfies_constraint_Sdd (x));
3806 return INTVAL (XEXP (XEXP (x, 0), 1));
3809 /* Compute the cost of an address. */
3810 static int
3811 sh_address_cost (rtx x, machine_mode mode,
3812 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3814 /* 'GBR + 0'. Account one more because of R0 restriction. */
3815 if (REG_P (x) && REGNO (x) == GBR_REG)
3816 return 2;
3818 /* Simple reg, post-inc, pre-dec addressing. */
3819 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3820 return 1;
3822 /* 'reg + disp' addressing. */
3823 if (GET_CODE (x) == PLUS
3824 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3826 /* 'GBR + disp'. Account one more because of R0 restriction. */
3827 if (REGNO (XEXP (x, 0)) == GBR_REG
3828 && gbr_displacement (XEXP (x, 1), mode))
3829 return 2;
3831 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3833 if (offset == 0)
3834 return 1;
3836 /* The displacement would fit into a 2 byte move insn.
3837 HImode and QImode loads/stores with displacement put pressure on
3838 R0 which will most likely require another reg copy. Thus account
3839 a higher cost for that. */
3840 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3841 return (mode == HImode || mode == QImode) ? 2 : 1;
3843 /* The displacement would fit into a 4 byte move insn (SH2A). */
3844 if (TARGET_SH2A
3845 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3846 return 2;
3848 /* The displacement is probably out of range and will require extra
3849 calculations. */
3850 return 3;
3853 /* 'reg + reg' addressing. Account a slightly higher cost because of
3854 increased pressure on R0. */
3855 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3856 && ! TARGET_SHMEDIA)
3857 return 3;
3859 /* Not sure what it is - probably expensive. */
3860 return 10;
3863 /* Code to expand a shift. */
3864 static void
3865 gen_ashift (int type, int n, rtx reg)
3867 rtx n_rtx;
3869 /* Negative values here come from the shift_amounts array. */
3870 if (n < 0)
3872 if (type == ASHIFT)
3873 type = LSHIFTRT;
3874 else
3875 type = ASHIFT;
3876 n = -n;
3879 n_rtx = GEN_INT (n);
3880 gcc_assert (satisfies_constraint_P27 (n_rtx));
3882 switch (type)
3884 case ASHIFTRT:
3885 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3886 break;
3887 case LSHIFTRT:
3888 if (n == 1)
3889 emit_insn (gen_shlr (reg, reg));
3890 else
3891 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3892 break;
3893 case ASHIFT:
3894 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3895 break;
3896 default:
3897 gcc_unreachable ();
3901 /* Code to expand a HImode shift. */
3902 static void
3903 gen_ashift_hi (int type, int n, rtx reg)
3905 /* Negative values here come from the shift_amounts array. */
3906 if (n < 0)
3908 if (type == ASHIFT)
3909 type = LSHIFTRT;
3910 else
3911 type = ASHIFT;
3912 n = -n;
3915 switch (type)
3917 case ASHIFTRT:
3918 case LSHIFTRT:
3919 /* We don't have HImode right shift operations because using the
3920 ordinary 32 bit shift instructions for that doesn't generate proper
3921 zero/sign extension.
3922 gen_ashift_hi is only called in contexts where we know that the
3923 sign extension works out correctly. */
3925 int offset = 0;
3926 if (GET_CODE (reg) == SUBREG)
3928 offset = SUBREG_BYTE (reg);
3929 reg = SUBREG_REG (reg);
3931 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3932 break;
3934 case ASHIFT:
3935 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3936 break;
3940 /* Output RTL to split a constant shift into its component SH constant
3941 shift instructions. */
3942 void
3943 gen_shifty_op (int code, rtx *operands)
3945 int value = INTVAL (operands[2]);
3946 int max, i;
3948 /* Truncate the shift count in case it is out of bounds. */
3949 value = value & 31;
3951 if (value == 31)
3953 if (code == LSHIFTRT)
3955 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3956 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3957 return;
3959 else if (code == ASHIFT)
3961 /* There is a two instruction sequence for 31 bit left shifts,
3962 but it requires r0. */
3963 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3965 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3966 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3967 return;
3971 else if (value == 0)
3973 /* This can happen even when optimizing, if there were subregs before
3974 reload. Don't output a nop here, as this is never optimized away;
3975 use a no-op move instead. */
3976 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3977 return;
3980 max = ashl_lshr_seq[value].insn_count;
3981 for (i = 0; i < max; i++)
3982 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3985 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3986 don't matter. */
3987 void
3988 gen_shifty_hi_op (int code, rtx *operands)
3990 int value = INTVAL (operands[2]);
3991 int max, i;
3992 void (*gen_fun) (int, int, rtx);
3994 /* This operation is used by and_shl for SImode values with a few
3995 high bits known to be cleared. */
3996 value &= 31;
3997 if (value == 0)
3999 emit_insn (gen_nop ());
4000 return;
4003 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
4004 if (code == ASHIFT)
4006 max = ext_ashl_lshr_seq[value].insn_count;
4007 for (i = 0; i < max; i++)
4008 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4010 else
4011 /* When shifting right, emit the shifts in reverse order, so that
4012 solitary negative values come first. */
4013 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
4014 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4017 /* Output RTL for an arithmetic right shift.
4018 ??? Rewrite to use super-optimizer sequences. */
4019 bool
4020 expand_ashiftrt (rtx *operands)
4022 rtx wrk;
4023 char func[18];
4024 int value;
4026 if (TARGET_DYNSHIFT)
4028 if (!CONST_INT_P (operands[2]))
4030 rtx count = copy_to_mode_reg (SImode, operands[2]);
4031 emit_insn (gen_negsi2 (count, count));
4032 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4033 return true;
4035 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
4036 > 1 + SH_DYNAMIC_SHIFT_COST)
4038 rtx count
4039 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
4040 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4041 return true;
4044 if (!CONST_INT_P (operands[2]))
4045 return false;
4047 value = INTVAL (operands[2]) & 31;
4049 if (value == 31)
4051 /* If we are called from abs expansion, arrange things so that we
4052 we can use a single MT instruction that doesn't clobber the source,
4053 if LICM can hoist out the load of the constant zero. */
4054 if (currently_expanding_to_rtl)
4056 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
4057 operands[1]));
4058 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
4059 return true;
4061 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
4062 return true;
4064 else if (value >= 16 && value <= 19)
4066 wrk = gen_reg_rtx (SImode);
4067 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
4068 value -= 16;
4069 while (value--)
4070 gen_ashift (ASHIFTRT, 1, wrk);
4071 emit_move_insn (operands[0], wrk);
4072 return true;
4074 /* Expand a short sequence inline, longer call a magic routine. */
4075 else if (value <= 5)
4077 wrk = gen_reg_rtx (SImode);
4078 emit_move_insn (wrk, operands[1]);
4079 while (value--)
4080 gen_ashift (ASHIFTRT, 1, wrk);
4081 emit_move_insn (operands[0], wrk);
4082 return true;
4085 wrk = gen_reg_rtx (Pmode);
4087 /* Load the value into an arg reg and call a helper. */
4088 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
4089 sprintf (func, "__ashiftrt_r4_%d", value);
4090 function_symbol (wrk, func, SFUNC_STATIC);
4091 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
4092 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
4093 return true;
4096 /* Try to find a good way to implement the combiner pattern
4097 [(set (match_operand:SI 0 "register_operand" "r")
4098 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4099 (match_operand:SI 2 "const_int_operand" "n"))
4100 (match_operand:SI 3 "const_int_operand" "n"))) .
4101 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
4102 return 0 for simple right / left or left/right shift combination.
4103 return 1 for a combination of shifts with zero_extend.
4104 return 2 for a combination of shifts with an AND that needs r0.
4105 return 3 for a combination of shifts with an AND that needs an extra
4106 scratch register, when the three highmost bits of the AND mask are clear.
4107 return 4 for a combination of shifts with an AND that needs an extra
4108 scratch register, when any of the three highmost bits of the AND mask
4109 is set.
4110 If ATTRP is set, store an initial right shift width in ATTRP[0],
4111 and the instruction length in ATTRP[1] . These values are not valid
4112 when returning 0.
4113 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
4114 shift_amounts for the last shift value that is to be used before the
4115 sign extend. */
4117 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
4119 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
4120 int left = INTVAL (left_rtx), right;
4121 int best = 0;
4122 int cost, best_cost = 10000;
4123 int best_right = 0, best_len = 0;
4124 int i;
4125 int can_ext;
4127 if (left < 0 || left > 31)
4128 return 0;
4129 if (CONST_INT_P (mask_rtx))
4130 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
4131 else
4132 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
4133 /* Can this be expressed as a right shift / left shift pair? */
4134 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
4135 right = exact_log2 (lsb);
4136 mask2 = ~(mask + lsb - 1);
4137 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4138 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4139 if (! mask2)
4140 best_cost = ashl_lshr_seq[right].insn_count
4141 + ashl_lshr_seq[right + left].insn_count;
4142 /* mask has no trailing zeroes <==> ! right */
4143 else if (! right && mask2 == ~(lsb2 - 1))
4145 int late_right = exact_log2 (lsb2);
4146 best_cost = ashl_lshr_seq[left + late_right].insn_count
4147 + ashl_lshr_seq[late_right].insn_count;
4149 /* Try to use zero extend. */
4150 if (mask2 == ~(lsb2 - 1))
4152 int width, first;
4154 for (width = 8; width <= 16; width += 8)
4156 /* Can we zero-extend right away? */
4157 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4159 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4160 + ext_ashl_lshr_seq[left + right].insn_count;
4161 if (cost < best_cost)
4163 best = 1;
4164 best_cost = cost;
4165 best_right = right;
4166 best_len = cost;
4167 if (attrp)
4168 attrp[2] = -1;
4170 continue;
4172 /* ??? Could try to put zero extend into initial right shift,
4173 or even shift a bit left before the right shift. */
4174 /* Determine value of first part of left shift, to get to the
4175 zero extend cut-off point. */
4176 first = width - exact_log2 (lsb2) + right;
4177 if (first >= 0 && right + left - first >= 0)
4179 cost = ext_ashl_lshr_seq[right].insn_count
4180 + ext_ashl_lshr_seq[first].insn_count + 1
4181 + ext_ashl_lshr_seq[right + left - first].insn_count;
4183 if (cost < best_cost)
4185 best = 1;
4186 best_cost = cost;
4187 best_right = right;
4188 best_len = cost;
4189 if (attrp)
4190 attrp[2] = first;
4195 /* Try to use r0 AND pattern */
4196 for (i = 0; i <= 2; i++)
4198 if (i > right)
4199 break;
4200 if (! CONST_OK_FOR_K08 (mask >> i))
4201 continue;
4202 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4203 if (cost < best_cost)
4205 best = 2;
4206 best_cost = cost;
4207 best_right = i;
4208 best_len = cost - 1;
4211 /* Try to use a scratch register to hold the AND operand. */
4212 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4213 for (i = 0; i <= 2; i++)
4215 if (i > right)
4216 break;
4217 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4218 + (can_ext
4219 ? ext_ashl_lshr_seq
4220 : ashl_lshr_seq)[left + i].insn_count;
4221 if (cost < best_cost)
4223 best = 4 - can_ext;
4224 best_cost = cost;
4225 best_right = i;
4226 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4230 if (attrp)
4232 attrp[0] = best_right;
4233 attrp[1] = best_len;
4235 return best;
4238 /* This is used in length attributes of the unnamed instructions
4239 corresponding to shl_and_kind return values of 1 and 2. */
4241 shl_and_length (rtx insn)
4243 rtx set_src, left_rtx, mask_rtx;
4244 int attributes[3];
4246 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4247 left_rtx = XEXP (XEXP (set_src, 0), 1);
4248 mask_rtx = XEXP (set_src, 1);
4249 shl_and_kind (left_rtx, mask_rtx, attributes);
4250 return attributes[1];
4253 /* This is used in length attribute of the and_shl_scratch instruction. */
4255 shl_and_scr_length (rtx insn)
4257 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4258 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4259 rtx op = XEXP (set_src, 0);
4260 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4261 op = XEXP (XEXP (op, 0), 0);
4262 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4265 /* Generate rtl for instructions for which shl_and_kind advised a particular
4266 method of generating them, i.e. returned zero. */
4267 bool
4268 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4270 int attributes[3];
4271 unsigned HOST_WIDE_INT mask;
4272 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4273 int right, total_shift;
4274 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4276 right = attributes[0];
4277 total_shift = INTVAL (left_rtx) + right;
4278 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4279 switch (kind)
4281 default:
4282 return true;
4283 case 1:
4285 int first = attributes[2];
4286 rtx operands[3];
4288 if (first < 0)
4290 emit_insn ((mask << right) <= 0xff
4291 ? gen_zero_extendqisi2 (dest,
4292 gen_lowpart (QImode, source))
4293 : gen_zero_extendhisi2 (dest,
4294 gen_lowpart (HImode, source)));
4295 source = dest;
4297 if (source != dest)
4298 emit_insn (gen_movsi (dest, source));
4299 operands[0] = dest;
4300 if (right)
4302 operands[2] = GEN_INT (right);
4303 gen_shifty_hi_op (LSHIFTRT, operands);
4305 if (first > 0)
4307 operands[2] = GEN_INT (first);
4308 gen_shifty_hi_op (ASHIFT, operands);
4309 total_shift -= first;
4310 mask <<= first;
4312 if (first >= 0)
4313 emit_insn (mask <= 0xff
4314 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4315 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4316 if (total_shift > 0)
4318 operands[2] = GEN_INT (total_shift);
4319 gen_shifty_hi_op (ASHIFT, operands);
4321 break;
4323 case 4:
4324 shift_gen_fun = gen_shifty_op;
4325 case 3:
4326 /* If the topmost bit that matters is set, set the topmost bits
4327 that don't matter. This way, we might be able to get a shorter
4328 signed constant. */
4329 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4330 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4331 case 2:
4332 /* Don't expand fine-grained when combining, because that will
4333 make the pattern fail. */
4334 if (currently_expanding_to_rtl
4335 || reload_in_progress || reload_completed)
4337 rtx operands[3];
4339 /* Cases 3 and 4 should be handled by this split
4340 only while combining */
4341 gcc_assert (kind <= 2);
4342 if (right)
4344 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4345 source = dest;
4347 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4348 if (total_shift)
4350 operands[0] = dest;
4351 operands[1] = dest;
4352 operands[2] = GEN_INT (total_shift);
4353 shift_gen_fun (ASHIFT, operands);
4355 break;
4357 else
4359 int neg = 0;
4360 if (kind != 4 && total_shift < 16)
4362 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4363 if (neg > 0)
4364 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4365 else
4366 neg = 0;
4368 emit_insn (gen_and_shl_scratch (dest, source,
4369 GEN_INT (right),
4370 GEN_INT (mask),
4371 GEN_INT (total_shift + neg),
4372 GEN_INT (neg)));
4373 emit_insn (gen_movsi (dest, dest));
4374 break;
4377 return false;
4380 /* Try to find a good way to implement the combiner pattern
4381 [(set (match_operand:SI 0 "register_operand" "=r")
4382 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4383 (match_operand:SI 2 "const_int_operand" "n")
4384 (match_operand:SI 3 "const_int_operand" "n")
4385 (const_int 0)))
4386 (clobber (reg:SI T_REG))]
4387 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4388 return 0 for simple left / right shift combination.
4389 return 1 for left shift / 8 bit sign extend / left shift.
4390 return 2 for left shift / 16 bit sign extend / left shift.
4391 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4392 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4393 return 5 for left shift / 16 bit sign extend / right shift
4394 return 6 for < 8 bit sign extend / left shift.
4395 return 7 for < 8 bit sign extend / left shift / single right shift.
4396 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4398 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4400 int left, size, insize, ext;
4401 int cost = 0, best_cost;
4402 int kind;
4404 left = INTVAL (left_rtx);
4405 size = INTVAL (size_rtx);
4406 insize = size - left;
4407 gcc_assert (insize > 0);
4408 /* Default to left / right shift. */
4409 kind = 0;
4410 best_cost = ashl_lshr_seq[32 - insize].insn_count
4411 + ashl_lshr_seq[32 - size].insn_count;
4412 if (size <= 16)
4414 /* 16 bit shift / sign extend / 16 bit shift */
4415 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4416 + ashl_lshr_seq[16 - size].insn_count;
4417 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4418 below, by alternative 3 or something even better. */
4419 if (cost < best_cost)
4421 kind = 5;
4422 best_cost = cost;
4425 /* Try a plain sign extend between two shifts. */
4426 for (ext = 16; ext >= insize; ext -= 8)
4428 if (ext <= size)
4430 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4431 + ashl_lshr_seq[size - ext].insn_count;
4432 if (cost < best_cost)
4434 kind = ext / (unsigned) 8;
4435 best_cost = cost;
4438 /* Check if we can do a sloppy shift with a final signed shift
4439 restoring the sign. */
4440 if (EXT_SHIFT_SIGNED (size - ext))
4441 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4442 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4443 /* If not, maybe it's still cheaper to do the second shift sloppy,
4444 and do a final sign extend? */
4445 else if (size <= 16)
4446 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4447 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4448 + 1;
4449 else
4450 continue;
4451 if (cost < best_cost)
4453 kind = ext / (unsigned) 8 + 2;
4454 best_cost = cost;
4457 /* Check if we can sign extend in r0 */
4458 if (insize < 8)
4460 cost = 3 + ashl_lshr_seq[left].insn_count;
4461 if (cost < best_cost)
4463 kind = 6;
4464 best_cost = cost;
4466 /* Try the same with a final signed shift. */
4467 if (left < 31)
4469 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4470 if (cost < best_cost)
4472 kind = 7;
4473 best_cost = cost;
4477 if (TARGET_DYNSHIFT)
4479 /* Try to use a dynamic shift. */
4480 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4481 if (cost < best_cost)
4483 kind = 0;
4484 best_cost = cost;
4487 if (costp)
4488 *costp = cost;
4489 return kind;
4492 /* Function to be used in the length attribute of the instructions
4493 implementing this pattern. */
4495 shl_sext_length (rtx insn)
4497 rtx set_src, left_rtx, size_rtx;
4498 int cost;
4500 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4501 left_rtx = XEXP (XEXP (set_src, 0), 1);
4502 size_rtx = XEXP (set_src, 1);
4503 shl_sext_kind (left_rtx, size_rtx, &cost);
4504 return cost;
4507 /* Generate rtl for this pattern */
4508 bool
4509 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4511 int kind;
4512 int left, size, insize, cost;
4513 rtx operands[3];
4515 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4516 left = INTVAL (left_rtx);
4517 size = INTVAL (size_rtx);
4518 insize = size - left;
4519 switch (kind)
4521 case 1:
4522 case 2:
4523 case 3:
4524 case 4:
4526 int ext = kind & 1 ? 8 : 16;
4527 int shift2 = size - ext;
4529 /* Don't expand fine-grained when combining, because that will
4530 make the pattern fail. */
4531 if (! currently_expanding_to_rtl
4532 && ! reload_in_progress && ! reload_completed)
4534 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4535 emit_insn (gen_movsi (dest, source));
4536 break;
4538 if (dest != source)
4539 emit_insn (gen_movsi (dest, source));
4540 operands[0] = dest;
4541 if (ext - insize)
4543 operands[2] = GEN_INT (ext - insize);
4544 gen_shifty_hi_op (ASHIFT, operands);
4546 emit_insn (kind & 1
4547 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4548 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4549 if (kind <= 2)
4551 if (shift2)
4553 operands[2] = GEN_INT (shift2);
4554 gen_shifty_op (ASHIFT, operands);
4557 else
4559 if (shift2 > 0)
4561 if (EXT_SHIFT_SIGNED (shift2))
4563 operands[2] = GEN_INT (shift2 + 1);
4564 gen_shifty_op (ASHIFT, operands);
4565 operands[2] = const1_rtx;
4566 gen_shifty_op (ASHIFTRT, operands);
4567 break;
4569 operands[2] = GEN_INT (shift2);
4570 gen_shifty_hi_op (ASHIFT, operands);
4572 else if (shift2)
4574 operands[2] = GEN_INT (-shift2);
4575 gen_shifty_hi_op (LSHIFTRT, operands);
4577 emit_insn (size <= 8
4578 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4579 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4581 break;
4583 case 5:
4585 int i = 16 - size;
4586 if (! currently_expanding_to_rtl
4587 && ! reload_in_progress && ! reload_completed)
4588 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4589 else
4591 operands[0] = dest;
4592 operands[2] = GEN_INT (16 - insize);
4593 gen_shifty_hi_op (ASHIFT, operands);
4594 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4596 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4597 while (--i >= 0)
4598 gen_ashift (ASHIFTRT, 1, dest);
4599 break;
4601 case 6:
4602 case 7:
4603 /* Don't expand fine-grained when combining, because that will
4604 make the pattern fail. */
4605 if (! currently_expanding_to_rtl
4606 && ! reload_in_progress && ! reload_completed)
4608 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4609 emit_insn (gen_movsi (dest, source));
4610 break;
4612 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4613 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4614 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4615 operands[0] = dest;
4616 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4617 gen_shifty_op (ASHIFT, operands);
4618 if (kind == 7)
4619 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4620 break;
4621 default:
4622 return true;
4624 return false;
4627 /* Prefix a symbol_ref name with "datalabel". */
4629 gen_datalabel_ref (rtx sym)
4631 const char *str;
4633 if (GET_CODE (sym) == LABEL_REF)
4634 return gen_rtx_CONST (GET_MODE (sym),
4635 gen_rtx_UNSPEC (GET_MODE (sym),
4636 gen_rtvec (1, sym),
4637 UNSPEC_DATALABEL));
4639 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4641 str = XSTR (sym, 0);
4642 /* Share all SYMBOL_REF strings with the same value - that is important
4643 for cse. */
4644 str = IDENTIFIER_POINTER (get_identifier (str));
4645 XSTR (sym, 0) = str;
4647 return sym;
4651 static alloc_pool label_ref_list_pool;
4653 typedef struct label_ref_list_d
4655 rtx_code_label *label;
4656 struct label_ref_list_d *next;
4657 } *label_ref_list_t;
4659 /* The SH cannot load a large constant into a register, constants have to
4660 come from a pc relative load. The reference of a pc relative load
4661 instruction must be less than 1k in front of the instruction. This
4662 means that we often have to dump a constant inside a function, and
4663 generate code to branch around it.
4665 It is important to minimize this, since the branches will slow things
4666 down and make things bigger.
4668 Worst case code looks like:
4670 mov.l L1,rn
4671 bra L2
4673 align
4674 L1: .long value
4678 mov.l L3,rn
4679 bra L4
4681 align
4682 L3: .long value
4686 We fix this by performing a scan before scheduling, which notices which
4687 instructions need to have their operands fetched from the constant table
4688 and builds the table.
4690 The algorithm is:
4692 scan, find an instruction which needs a pcrel move. Look forward, find the
4693 last barrier which is within MAX_COUNT bytes of the requirement.
4694 If there isn't one, make one. Process all the instructions between
4695 the find and the barrier.
4697 In the above example, we can tell that L3 is within 1k of L1, so
4698 the first move can be shrunk from the 3 insn+constant sequence into
4699 just 1 insn, and the constant moved to L3 to make:
4701 mov.l L1,rn
4703 mov.l L3,rn
4704 bra L4
4706 align
4707 L3:.long value
4708 L4:.long value
4710 Then the second move becomes the target for the shortening process. */
4712 typedef struct
4714 rtx value; /* Value in table. */
4715 rtx_code_label *label; /* Label of value. */
4716 label_ref_list_t wend; /* End of window. */
4717 machine_mode mode; /* Mode of value. */
4719 /* True if this constant is accessed as part of a post-increment
4720 sequence. Note that HImode constants are never accessed in this way. */
4721 bool part_of_sequence_p;
4722 } pool_node;
4724 /* The maximum number of constants that can fit into one pool, since
4725 constants in the range 0..510 are at least 2 bytes long, and in the
4726 range from there to 1018 at least 4 bytes. */
4728 #define MAX_POOL_SIZE 372
4729 static pool_node pool_vector[MAX_POOL_SIZE];
4730 static int pool_size;
4731 static rtx_code_label *pool_window_label;
4732 static int pool_window_last;
4734 static int max_labelno_before_reorg;
4736 /* ??? If we need a constant in HImode which is the truncated value of a
4737 constant we need in SImode, we could combine the two entries thus saving
4738 two bytes. Is this common enough to be worth the effort of implementing
4739 it? */
4741 /* ??? This stuff should be done at the same time that we shorten branches.
4742 As it is now, we must assume that all branches are the maximum size, and
4743 this causes us to almost always output constant pools sooner than
4744 necessary. */
4746 /* Add a constant to the pool and return its label. */
4747 static rtx_code_label *
4748 add_constant (rtx x, machine_mode mode, rtx last_value)
4750 int i;
4751 rtx_code_label *lab, *new_rtx;
4752 label_ref_list_t ref, newref;
4754 /* First see if we've already got it. */
4755 for (i = 0; i < pool_size; i++)
4757 if (x->code == pool_vector[i].value->code
4758 && mode == pool_vector[i].mode)
4760 if (x->code == CODE_LABEL)
4762 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4763 continue;
4765 if (rtx_equal_p (x, pool_vector[i].value))
4767 lab = new_rtx = 0;
4768 if (! last_value
4769 || ! i
4770 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4772 new_rtx = gen_label_rtx ();
4773 LABEL_REFS (new_rtx) = pool_vector[i].label;
4774 pool_vector[i].label = lab = new_rtx;
4776 if (lab && pool_window_label)
4778 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4779 newref->label = pool_window_label;
4780 ref = pool_vector[pool_window_last].wend;
4781 newref->next = ref;
4782 pool_vector[pool_window_last].wend = newref;
4784 if (new_rtx)
4785 pool_window_label = new_rtx;
4786 pool_window_last = i;
4787 return lab;
4792 /* Need a new one. */
4793 pool_vector[pool_size].value = x;
4794 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4796 lab = 0;
4797 pool_vector[pool_size - 1].part_of_sequence_p = true;
4799 else
4800 lab = gen_label_rtx ();
4801 pool_vector[pool_size].mode = mode;
4802 pool_vector[pool_size].label = lab;
4803 pool_vector[pool_size].wend = NULL;
4804 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4805 if (lab && pool_window_label)
4807 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4808 newref->label = pool_window_label;
4809 ref = pool_vector[pool_window_last].wend;
4810 newref->next = ref;
4811 pool_vector[pool_window_last].wend = newref;
4813 if (lab)
4814 pool_window_label = lab;
4815 pool_window_last = pool_size;
4816 pool_size++;
4817 return lab;
4820 /* Output the literal table. START, if nonzero, is the first instruction
4821 this table is needed for, and also indicates that there is at least one
4822 casesi_worker_2 instruction; We have to emit the operand3 labels from
4823 these insns at a 4-byte aligned position. BARRIER is the barrier
4824 after which we are to place the table. */
4825 static void
4826 dump_table (rtx_insn *start, rtx_insn *barrier)
4828 rtx_insn *scan = barrier;
4829 int i;
4830 bool need_align = true;
4831 rtx lab;
4832 label_ref_list_t ref;
4833 bool have_df = false;
4835 /* Do two passes, first time dump out the HI sized constants. */
4837 for (i = 0; i < pool_size; i++)
4839 pool_node *p = &pool_vector[i];
4841 if (p->mode == HImode)
4843 if (need_align)
4845 scan = emit_insn_after (gen_align_2 (), scan);
4846 need_align = false;
4848 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4849 scan = emit_label_after (lab, scan);
4850 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4851 scan);
4852 for (ref = p->wend; ref; ref = ref->next)
4854 lab = ref->label;
4855 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4858 else if (p->mode == DFmode)
4859 have_df = true;
4862 need_align = true;
4864 if (start)
4866 scan = emit_insn_after (gen_align_4 (), scan);
4867 need_align = false;
4868 for (; start != barrier; start = NEXT_INSN (start))
4869 if (NONJUMP_INSN_P (start)
4870 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4872 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4873 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4875 scan = emit_label_after (lab, scan);
4878 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4880 rtx_insn *align_insn = NULL;
4882 scan = emit_label_after (gen_label_rtx (), scan);
4883 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4884 need_align = false;
4886 for (i = 0; i < pool_size; i++)
4888 pool_node *p = &pool_vector[i];
4890 switch (p->mode)
4892 case HImode:
4893 break;
4894 case SImode:
4895 case SFmode:
4896 if (align_insn && !p->part_of_sequence_p)
4898 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4899 emit_label_before (lab, align_insn);
4900 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4901 align_insn);
4902 for (ref = p->wend; ref; ref = ref->next)
4904 lab = ref->label;
4905 emit_insn_before (gen_consttable_window_end (lab),
4906 align_insn);
4908 delete_insn (align_insn);
4909 align_insn = NULL;
4910 continue;
4912 else
4914 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4915 scan = emit_label_after (lab, scan);
4916 scan = emit_insn_after (gen_consttable_4 (p->value,
4917 const0_rtx), scan);
4918 need_align = ! need_align;
4920 break;
4921 case DFmode:
4922 if (need_align)
4924 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4925 align_insn = scan;
4926 need_align = false;
4928 case DImode:
4929 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4930 scan = emit_label_after (lab, scan);
4931 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4932 scan);
4933 break;
4934 default:
4935 gcc_unreachable ();
4938 if (p->mode != HImode)
4940 for (ref = p->wend; ref; ref = ref->next)
4942 lab = ref->label;
4943 scan = emit_insn_after (gen_consttable_window_end (lab),
4944 scan);
4949 pool_size = 0;
4952 for (i = 0; i < pool_size; i++)
4954 pool_node *p = &pool_vector[i];
4956 switch (p->mode)
4958 case HImode:
4959 break;
4960 case SImode:
4961 case SFmode:
4962 if (need_align)
4964 need_align = false;
4965 scan = emit_label_after (gen_label_rtx (), scan);
4966 scan = emit_insn_after (gen_align_4 (), scan);
4968 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4969 scan = emit_label_after (lab, scan);
4970 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4971 scan);
4972 break;
4973 case DFmode:
4974 case DImode:
4975 if (need_align)
4977 need_align = false;
4978 scan = emit_label_after (gen_label_rtx (), scan);
4979 scan = emit_insn_after (gen_align_4 (), scan);
4981 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4982 scan = emit_label_after (lab, scan);
4983 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4984 scan);
4985 break;
4986 default:
4987 gcc_unreachable ();
4990 if (p->mode != HImode)
4992 for (ref = p->wend; ref; ref = ref->next)
4994 lab = ref->label;
4995 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
5000 scan = emit_insn_after (gen_consttable_end (), scan);
5001 scan = emit_barrier_after (scan);
5002 pool_size = 0;
5003 pool_window_label = NULL;
5004 pool_window_last = 0;
5007 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
5009 /* Nonzero if the insn is a move instruction which needs to be fixed. */
5011 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
5012 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
5013 need to fix it if the input value is CONST_OK_FOR_I08. */
5014 static bool
5015 broken_move (rtx_insn *insn)
5017 if (NONJUMP_INSN_P (insn))
5019 rtx pat = PATTERN (insn);
5020 if (GET_CODE (pat) == PARALLEL)
5021 pat = XVECEXP (pat, 0, 0);
5022 if (GET_CODE (pat) == SET
5023 /* We can load any 8-bit value if we don't care what the high
5024 order bits end up as. */
5025 && GET_MODE (SET_DEST (pat)) != QImode
5026 && (CONSTANT_P (SET_SRC (pat))
5027 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
5028 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
5029 /* Match mova_const. */
5030 || (GET_CODE (SET_SRC (pat)) == UNSPEC
5031 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
5032 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
5033 && ! (TARGET_SH2E
5034 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
5035 && (fp_zero_operand (SET_SRC (pat))
5036 || fp_one_operand (SET_SRC (pat)))
5037 /* In general we don't know the current setting of fpscr, so
5038 disable fldi.
5039 There is an exception if this was a register-register move
5040 before reload - and hence it was ascertained that we have
5041 single precision setting - and in a post-reload optimization
5042 we changed this to do a constant load. In that case
5043 we don't have an r0 clobber, hence we must use fldi. */
5044 && (TARGET_FMOVD
5045 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
5046 == SCRATCH))
5047 && REG_P (SET_DEST (pat))
5048 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
5049 && ! (TARGET_SH2A
5050 && GET_MODE (SET_DEST (pat)) == SImode
5051 && (satisfies_constraint_I20 (SET_SRC (pat))
5052 || satisfies_constraint_I28 (SET_SRC (pat))))
5053 && ! satisfies_constraint_I08 (SET_SRC (pat)))
5054 return true;
5057 return false;
5060 /* Return true if the specified insn is a mova insn. */
5061 static bool
5062 mova_p (rtx_insn *insn)
5064 return (NONJUMP_INSN_P (insn)
5065 && GET_CODE (PATTERN (insn)) == SET
5066 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
5067 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
5068 /* Don't match mova_const. */
5069 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
5072 /* Fix up a mova from a switch that went out of range. */
5073 static void
5074 fixup_mova (rtx_insn *mova)
5076 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
5077 if (! flag_pic)
5079 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
5080 INSN_CODE (mova) = -1;
5082 else
5084 rtx_insn *worker = mova;
5085 rtx_code_label *lab = gen_label_rtx ();
5086 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
5090 worker = NEXT_INSN (worker);
5091 gcc_assert (worker
5092 && !LABEL_P (worker)
5093 && !JUMP_P (worker));
5094 } while (NOTE_P (worker)
5095 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
5096 wpat = PATTERN (worker);
5097 wpat0 = XVECEXP (wpat, 0, 0);
5098 wpat1 = XVECEXP (wpat, 0, 1);
5099 wsrc = SET_SRC (wpat0);
5100 PATTERN (worker) = (gen_casesi_worker_2
5101 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
5102 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
5103 XEXP (wpat1, 0)));
5104 INSN_CODE (worker) = -1;
5105 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
5106 base = gen_rtx_LABEL_REF (Pmode, lab);
5107 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
5108 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
5109 INSN_CODE (mova) = -1;
5113 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
5114 *num_mova, and check if the new mova is not nested within the first one.
5115 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
5116 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
5117 static int
5118 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
5120 int n_addr = 0; /* Initialization to shut up spurious warning. */
5121 int f_target, n_target = 0; /* Likewise. */
5123 if (optimize)
5125 /* If NEW_MOVA has no address yet, it will be handled later. */
5126 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
5127 return -1;
5129 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
5130 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
5131 if (n_addr > n_target || n_addr + 1022 < n_target)
5133 /* Change the mova into a load.
5134 broken_move will then return true for it. */
5135 fixup_mova (new_mova);
5136 return 1;
5139 if (!(*num_mova)++)
5141 *first_mova = new_mova;
5142 return 2;
5144 if (!optimize
5145 || ((f_target
5146 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5147 >= n_target))
5148 return -1;
5150 (*num_mova)--;
5151 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5152 > n_target - n_addr)
5154 fixup_mova (*first_mova);
5155 return 0;
5157 else
5159 fixup_mova (new_mova);
5160 return 1;
5164 /* Find the last barrier from insn FROM which is close enough to hold the
5165 constant pool. If we can't find one, then create one near the end of
5166 the range. */
5167 static rtx_insn *
5168 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5170 int count_si = 0;
5171 int count_hi = 0;
5172 int found_hi = 0;
5173 int found_si = 0;
5174 int found_di = 0;
5175 int hi_align = 2;
5176 int si_align = 2;
5177 int leading_mova = num_mova;
5178 rtx_insn *barrier_before_mova = NULL;
5179 rtx_insn *found_barrier = NULL;
5180 rtx_insn *good_barrier = NULL;
5181 int si_limit;
5182 int hi_limit;
5183 rtx_insn *orig = from;
5184 rtx_insn *last_got = NULL;
5185 rtx_insn *last_symoff = NULL;
5187 /* For HImode: range is 510, add 4 because pc counts from address of
5188 second instruction after this one, subtract 2 for the jump instruction
5189 that we may need to emit before the table, subtract 2 for the instruction
5190 that fills the jump delay slot (in very rare cases, reorg will take an
5191 instruction from after the constant pool or will leave the delay slot
5192 empty). This gives 510.
5193 For SImode: range is 1020, add 4 because pc counts from address of
5194 second instruction after this one, subtract 2 in case pc is 2 byte
5195 aligned, subtract 2 for the jump instruction that we may need to emit
5196 before the table, subtract 2 for the instruction that fills the jump
5197 delay slot. This gives 1018. */
5199 /* The branch will always be shortened now that the reference address for
5200 forward branches is the successor address, thus we need no longer make
5201 adjustments to the [sh]i_limit for -O0. */
5203 si_limit = 1018;
5204 hi_limit = 510;
5206 while (from && count_si < si_limit && count_hi < hi_limit)
5208 int inc = get_attr_length (from);
5209 int new_align = 1;
5211 /* If this is a label that existed at the time of the compute_alignments
5212 call, determine the alignment. N.B. When find_barrier recurses for
5213 an out-of-reach mova, we might see labels at the start of previously
5214 inserted constant tables. */
5215 if (LABEL_P (from)
5216 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5218 if (optimize)
5219 new_align = 1 << label_to_alignment (from);
5220 else if (BARRIER_P (prev_nonnote_insn (from)))
5221 new_align = 1 << barrier_align (from);
5222 else
5223 new_align = 1;
5224 inc = 0;
5226 /* In case we are scanning a constant table because of recursion, check
5227 for explicit alignments. If the table is long, we might be forced
5228 to emit the new table in front of it; the length of the alignment
5229 might be the last straw. */
5230 else if (NONJUMP_INSN_P (from)
5231 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5232 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5233 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5234 /* When we find the end of a constant table, paste the new constant
5235 at the end. That is better than putting it in front because
5236 this way, we don't need extra alignment for adding a 4-byte-aligned
5237 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5238 else if (NONJUMP_INSN_P (from)
5239 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5240 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5241 return from;
5243 if (BARRIER_P (from))
5245 rtx_insn *next;
5247 found_barrier = from;
5249 /* If we are at the end of the function, or in front of an alignment
5250 instruction, we need not insert an extra alignment. We prefer
5251 this kind of barrier. */
5252 if (barrier_align (from) > 2)
5253 good_barrier = from;
5255 /* If we are at the end of a hot/cold block, dump the constants
5256 here. */
5257 next = NEXT_INSN (from);
5258 if (next
5259 && NOTE_P (next)
5260 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5261 break;
5264 if (broken_move (from))
5266 rtx pat, src, dst;
5267 machine_mode mode;
5269 pat = PATTERN (from);
5270 if (GET_CODE (pat) == PARALLEL)
5271 pat = XVECEXP (pat, 0, 0);
5272 src = SET_SRC (pat);
5273 dst = SET_DEST (pat);
5274 mode = GET_MODE (dst);
5276 /* GOT pcrelat setting comes in pair of
5277 mova .L8,r0
5278 mov.l .L8,r12
5279 instructions. (plus add r0,r12).
5280 Remember if we see one without the other. */
5281 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5282 last_got = last_got ? NULL : from;
5283 else if (PIC_ADDR_P (src))
5284 last_got = last_got ? NULL : from;
5286 /* We must explicitly check the mode, because sometimes the
5287 front end will generate code to load unsigned constants into
5288 HImode targets without properly sign extending them. */
5289 if (mode == HImode
5290 || (mode == SImode && satisfies_constraint_I16 (src)
5291 && REGNO (dst) != FPUL_REG))
5293 found_hi += 2;
5294 /* We put the short constants before the long constants, so
5295 we must count the length of short constants in the range
5296 for the long constants. */
5297 /* ??? This isn't optimal, but is easy to do. */
5298 si_limit -= 2;
5300 else
5302 /* We dump DF/DI constants before SF/SI ones, because
5303 the limit is the same, but the alignment requirements
5304 are higher. We may waste up to 4 additional bytes
5305 for alignment, and the DF/DI constant may have
5306 another SF/SI constant placed before it. */
5307 if (TARGET_SHCOMPACT
5308 && ! found_di
5309 && (mode == DFmode || mode == DImode))
5311 found_di = 1;
5312 si_limit -= 8;
5314 while (si_align > 2 && found_si + si_align - 2 > count_si)
5315 si_align >>= 1;
5316 if (found_si > count_si)
5317 count_si = found_si;
5318 found_si += GET_MODE_SIZE (mode);
5319 if (num_mova)
5320 si_limit -= GET_MODE_SIZE (mode);
5324 if (mova_p (from))
5326 switch (untangle_mova (&num_mova, &mova, from))
5328 case 1:
5329 if (flag_pic)
5331 rtx src = SET_SRC (PATTERN (from));
5332 if (GET_CODE (src) == CONST
5333 && GET_CODE (XEXP (src, 0)) == UNSPEC
5334 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5335 last_symoff = from;
5337 break;
5338 case 0: return find_barrier (0, 0, mova);
5339 case 2:
5341 leading_mova = 0;
5342 barrier_before_mova
5343 = good_barrier ? good_barrier : found_barrier;
5345 default: break;
5347 if (found_si > count_si)
5348 count_si = found_si;
5350 else if (JUMP_TABLE_DATA_P (from)
5351 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5353 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5354 || (num_mova
5355 && (prev_nonnote_insn (from)
5356 == XEXP (MOVA_LABELREF (mova), 0))))
5357 num_mova--;
5358 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5360 /* We have just passed the barrier in front of the
5361 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5362 the ADDR_DIFF_VEC is accessed as data, just like our pool
5363 constants, this is a good opportunity to accommodate what
5364 we have gathered so far.
5365 If we waited any longer, we could end up at a barrier in
5366 front of code, which gives worse cache usage for separated
5367 instruction / data caches. */
5368 good_barrier = found_barrier;
5369 break;
5371 else
5373 rtx body = PATTERN (from);
5374 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5377 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5378 else if (JUMP_P (from)
5379 && ! TARGET_SH2
5380 && ! optimize_size)
5381 new_align = 4;
5383 /* There is a possibility that a bf is transformed into a bf/s by the
5384 delay slot scheduler. */
5385 if (JUMP_P (from)
5386 && get_attr_type (from) == TYPE_CBRANCH
5387 && ! sequence_insn_p (from))
5388 inc += 2;
5390 if (found_si)
5392 count_si += inc;
5393 if (new_align > si_align)
5395 si_limit -= (count_si - 1) & (new_align - si_align);
5396 si_align = new_align;
5398 count_si = (count_si + new_align - 1) & -new_align;
5400 if (found_hi)
5402 count_hi += inc;
5403 if (new_align > hi_align)
5405 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5406 hi_align = new_align;
5408 count_hi = (count_hi + new_align - 1) & -new_align;
5410 from = NEXT_INSN (from);
5413 if (num_mova)
5415 if (leading_mova)
5417 /* Try as we might, the leading mova is out of range. Change
5418 it into a load (which will become a pcload) and retry. */
5419 fixup_mova (mova);
5420 return find_barrier (0, 0, mova);
5422 else
5424 /* Insert the constant pool table before the mova instruction,
5425 to prevent the mova label reference from going out of range. */
5426 from = mova;
5427 good_barrier = found_barrier = barrier_before_mova;
5431 if (found_barrier)
5433 if (good_barrier && next_real_insn (found_barrier))
5434 found_barrier = good_barrier;
5436 else
5438 /* We didn't find a barrier in time to dump our stuff,
5439 so we'll make one. */
5440 rtx_code_label *label = gen_label_rtx ();
5442 /* Don't emit a constant table in the middle of insns for
5443 casesi_worker_2. This is a bit overkill but is enough
5444 because casesi_worker_2 wouldn't appear so frequently. */
5445 if (last_symoff)
5446 from = last_symoff;
5448 /* If we exceeded the range, then we must back up over the last
5449 instruction we looked at. Otherwise, we just need to undo the
5450 NEXT_INSN at the end of the loop. */
5451 if (PREV_INSN (from) != orig
5452 && (count_hi > hi_limit || count_si > si_limit))
5453 from = PREV_INSN (PREV_INSN (from));
5454 else
5455 from = PREV_INSN (from);
5457 /* Don't emit a constant table int the middle of global pointer setting,
5458 since that that would move the addressing base GOT into another table.
5459 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5460 in the pool anyway, so just move up the whole constant pool.
5462 However, avoid doing so when the last single GOT mov is the starting
5463 insn itself. Going past above the start insn would create a negative
5464 offset, causing errors. */
5465 if (last_got && last_got != orig)
5466 from = PREV_INSN (last_got);
5468 /* Don't insert the constant pool table at the position which
5469 may be the landing pad. */
5470 if (flag_exceptions
5471 && CALL_P (from)
5472 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5473 from = PREV_INSN (from);
5475 /* Walk back to be just before any jump or label.
5476 Putting it before a label reduces the number of times the branch
5477 around the constant pool table will be hit. Putting it before
5478 a jump makes it more likely that the bra delay slot will be
5479 filled. */
5480 while (NOTE_P (from) || JUMP_P (from)
5481 || LABEL_P (from))
5482 from = PREV_INSN (from);
5484 /* Make sure we do not split between a call and its corresponding
5485 CALL_ARG_LOCATION note. */
5486 if (CALL_P (from))
5488 rtx_insn *next = NEXT_INSN (from);
5489 if (next && NOTE_P (next)
5490 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5491 from = next;
5494 from = emit_jump_insn_after (gen_jump (label), from);
5495 JUMP_LABEL (from) = label;
5496 LABEL_NUSES (label) = 1;
5497 found_barrier = emit_barrier_after (from);
5498 emit_label_after (label, found_barrier);
5501 return found_barrier;
5504 /* If the instruction INSN is implemented by a special function, and we can
5505 positively find the register that is used to call the sfunc, and this
5506 register is not used anywhere else in this instruction - except as the
5507 destination of a set, return this register; else, return 0. */
5509 sfunc_uses_reg (rtx_insn *insn)
5511 int i;
5512 rtx pattern, part, reg_part, reg;
5514 if (!NONJUMP_INSN_P (insn))
5515 return NULL_RTX;
5516 pattern = PATTERN (insn);
5517 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5518 return NULL_RTX;
5520 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5522 part = XVECEXP (pattern, 0, i);
5523 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5524 reg_part = part;
5526 if (! reg_part)
5527 return NULL_RTX;
5528 reg = XEXP (reg_part, 0);
5529 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5531 part = XVECEXP (pattern, 0, i);
5532 if (part == reg_part || GET_CODE (part) == CLOBBER)
5533 continue;
5534 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5535 && REG_P (SET_DEST (part)))
5536 ? SET_SRC (part) : part)))
5537 return NULL_RTX;
5539 return reg;
5542 /* See if the only way in which INSN uses REG is by calling it, or by
5543 setting it while calling it. Set *SET to a SET rtx if the register
5544 is set by INSN. */
5545 static bool
5546 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5548 rtx pattern, reg2;
5550 *set = NULL_RTX;
5552 reg2 = sfunc_uses_reg (insn);
5553 if (reg2 && REGNO (reg2) == REGNO (reg))
5555 pattern = single_set (insn);
5556 if (pattern
5557 && REG_P (SET_DEST (pattern))
5558 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5559 *set = pattern;
5560 return false;
5562 if (!CALL_P (insn))
5564 /* We don't use rtx_equal_p because we don't care if the mode is
5565 different. */
5566 pattern = single_set (insn);
5567 if (pattern
5568 && REG_P (SET_DEST (pattern))
5569 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5571 rtx par, part;
5572 int i;
5574 *set = pattern;
5575 par = PATTERN (insn);
5576 if (GET_CODE (par) == PARALLEL)
5577 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5579 part = XVECEXP (par, 0, i);
5580 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5581 return true;
5583 return reg_mentioned_p (reg, SET_SRC (pattern));
5586 return true;
5589 pattern = PATTERN (insn);
5591 if (GET_CODE (pattern) == PARALLEL)
5593 int i;
5595 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5596 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5597 return true;
5598 pattern = XVECEXP (pattern, 0, 0);
5601 if (GET_CODE (pattern) == SET)
5603 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5605 /* We don't use rtx_equal_p, because we don't care if the
5606 mode is different. */
5607 if (!REG_P (SET_DEST (pattern))
5608 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5609 return true;
5611 *set = pattern;
5614 pattern = SET_SRC (pattern);
5617 if (GET_CODE (pattern) != CALL
5618 || !MEM_P (XEXP (pattern, 0))
5619 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5620 return true;
5622 return false;
5625 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5626 general registers. Bits 0..15 mean that the respective registers
5627 are used as inputs in the instruction. Bits 16..31 mean that the
5628 registers 0..15, respectively, are used as outputs, or are clobbered.
5629 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5631 regs_used (rtx x, int is_dest)
5633 enum rtx_code code;
5634 const char *fmt;
5635 int i, used = 0;
5637 if (! x)
5638 return used;
5639 code = GET_CODE (x);
5640 switch (code)
5642 case REG:
5643 if (REGNO (x) < 16)
5644 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5645 << (REGNO (x) + is_dest));
5646 return 0;
5647 case SUBREG:
5649 rtx y = SUBREG_REG (x);
5651 if (!REG_P (y))
5652 break;
5653 if (REGNO (y) < 16)
5654 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5655 << (REGNO (y) +
5656 subreg_regno_offset (REGNO (y),
5657 GET_MODE (y),
5658 SUBREG_BYTE (x),
5659 GET_MODE (x)) + is_dest));
5660 return 0;
5662 case SET:
5663 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5664 case RETURN:
5665 /* If there was a return value, it must have been indicated with USE. */
5666 return 0x00ffff00;
5667 case CLOBBER:
5668 is_dest = 1;
5669 break;
5670 case MEM:
5671 is_dest = 0;
5672 break;
5673 case CALL:
5674 used |= 0x00ff00f0;
5675 break;
5676 default:
5677 break;
5680 fmt = GET_RTX_FORMAT (code);
5682 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5684 if (fmt[i] == 'E')
5686 int j;
5687 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5688 used |= regs_used (XVECEXP (x, i, j), is_dest);
5690 else if (fmt[i] == 'e')
5691 used |= regs_used (XEXP (x, i), is_dest);
5693 return used;
5696 /* Create an instruction that prevents redirection of a conditional branch
5697 to the destination of the JUMP with address ADDR.
5698 If the branch needs to be implemented as an indirect jump, try to find
5699 a scratch register for it.
5700 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5701 If any preceding insn that doesn't fit into a delay slot is good enough,
5702 pass 1. Pass 2 if a definite blocking insn is needed.
5703 -1 is used internally to avoid deep recursion.
5704 If a blocking instruction is made or recognized, return it. */
5705 static rtx_insn *
5706 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5708 int dead = 0;
5709 rtx_insn *prev = prev_nonnote_insn (jump);
5710 rtx dest;
5712 /* First, check if we already have an instruction that satisfies our need. */
5713 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5715 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5716 return prev;
5717 if (GET_CODE (PATTERN (prev)) == USE
5718 || GET_CODE (PATTERN (prev)) == CLOBBER
5719 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5720 prev = jump;
5721 else if ((need_block &= ~1) < 0)
5722 return prev;
5723 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5724 need_block = 0;
5726 if (GET_CODE (PATTERN (jump)) == RETURN)
5728 if (! need_block)
5729 return prev;
5730 /* Reorg even does nasty things with return insns that cause branches
5731 to go out of range - see find_end_label and callers. */
5732 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5734 /* We can't use JUMP_LABEL here because it might be undefined
5735 when not optimizing. */
5736 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5737 /* If the branch is out of range, try to find a scratch register for it. */
5738 if (optimize
5739 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5740 > 4092 + 4098))
5742 rtx_insn *scan;
5743 /* Don't look for the stack pointer as a scratch register,
5744 it would cause trouble if an interrupt occurred. */
5745 unsigned attempt = 0x7fff, used;
5746 int jump_left = flag_expensive_optimizations + 1;
5748 /* It is likely that the most recent eligible instruction is wanted for
5749 the delay slot. Therefore, find out which registers it uses, and
5750 try to avoid using them. */
5752 for (scan = jump; (scan = PREV_INSN (scan)); )
5754 enum rtx_code code;
5756 if (scan->deleted ())
5757 continue;
5758 code = GET_CODE (scan);
5759 if (code == CODE_LABEL || code == JUMP_INSN)
5760 break;
5761 if (code == INSN
5762 && GET_CODE (PATTERN (scan)) != USE
5763 && GET_CODE (PATTERN (scan)) != CLOBBER
5764 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5766 attempt &= ~regs_used (PATTERN (scan), 0);
5767 break;
5770 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5771 (scan = NEXT_INSN (scan)); )
5773 enum rtx_code code;
5775 if (scan->deleted ())
5776 continue;
5777 code = GET_CODE (scan);
5778 if (INSN_P (scan))
5780 used |= regs_used (PATTERN (scan), 0);
5781 if (code == CALL_INSN)
5782 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5783 dead |= (used >> 16) & ~used;
5784 if (dead & attempt)
5786 dead &= attempt;
5787 break;
5789 if (code == JUMP_INSN)
5791 if (jump_left-- && simplejump_p (scan))
5792 scan = JUMP_LABEL_AS_INSN (scan);
5793 else
5794 break;
5798 /* Mask out the stack pointer again, in case it was
5799 the only 'free' register we have found. */
5800 dead &= 0x7fff;
5802 /* If the immediate destination is still in range, check for possible
5803 threading with a jump beyond the delay slot insn.
5804 Don't check if we are called recursively; the jump has been or will be
5805 checked in a different invocation then. */
5807 else if (optimize && need_block >= 0)
5809 rtx_insn *next = next_active_insn (next_active_insn (dest));
5810 if (next && JUMP_P (next)
5811 && GET_CODE (PATTERN (next)) == SET
5812 && recog_memoized (next) == CODE_FOR_jump_compact)
5814 dest = JUMP_LABEL (next);
5815 if (dest
5816 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5817 > 4092 + 4098))
5818 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5822 if (dead)
5824 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5826 /* It would be nice if we could convert the jump into an indirect
5827 jump / far branch right now, and thus exposing all constituent
5828 instructions to further optimization. However, reorg uses
5829 simplejump_p to determine if there is an unconditional jump where
5830 it should try to schedule instructions from the target of the
5831 branch; simplejump_p fails for indirect jumps even if they have
5832 a JUMP_LABEL. */
5833 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5834 (reg, GEN_INT (unspec_bbr_uid++)),
5835 jump);
5836 /* ??? We would like this to have the scope of the jump, but that
5837 scope will change when a delay slot insn of an inner scope is added.
5838 Hence, after delay slot scheduling, we'll have to expect
5839 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5840 the jump. */
5842 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5843 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5844 return insn;
5846 else if (need_block)
5847 /* We can't use JUMP_LABEL here because it might be undefined
5848 when not optimizing. */
5849 return emit_insn_before (gen_block_branch_redirect
5850 (GEN_INT (unspec_bbr_uid++)),
5851 jump);
5852 return prev;
5855 #define CONDJUMP_MIN -252
5856 #define CONDJUMP_MAX 262
5857 struct far_branch
5859 /* A label (to be placed) in front of the jump
5860 that jumps to our ultimate destination. */
5861 rtx_insn *near_label;
5862 /* Where we are going to insert it if we cannot move the jump any farther,
5863 or the jump itself if we have picked up an existing jump. */
5864 rtx_insn *insert_place;
5865 /* The ultimate destination. */
5866 rtx_insn *far_label;
5867 struct far_branch *prev;
5868 /* If the branch has already been created, its address;
5869 else the address of its first prospective user. */
5870 int address;
5873 static void gen_far_branch (struct far_branch *);
5874 enum mdep_reorg_phase_e mdep_reorg_phase;
5875 static void
5876 gen_far_branch (struct far_branch *bp)
5878 rtx_insn *insn = bp->insert_place;
5879 rtx_insn *jump;
5880 rtx_code_label *label = gen_label_rtx ();
5881 int ok;
5883 emit_label_after (label, insn);
5884 if (bp->far_label)
5886 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5887 LABEL_NUSES (bp->far_label)++;
5889 else
5890 jump = emit_jump_insn_after (gen_return (), insn);
5892 /* Emit a barrier so that reorg knows that any following instructions
5893 are not reachable via a fall-through path.
5894 But don't do this when not optimizing, since we wouldn't suppress the
5895 alignment for the barrier then, and could end up with out-of-range
5896 pc-relative loads. */
5897 if (optimize)
5898 emit_barrier_after (jump);
5899 emit_label_after (bp->near_label, insn);
5901 if (bp->far_label)
5902 JUMP_LABEL (jump) = bp->far_label;
5903 else
5905 rtx pat = PATTERN (jump);
5906 gcc_assert (ANY_RETURN_P (pat));
5907 JUMP_LABEL (jump) = pat;
5910 ok = invert_jump (insn, label, 1);
5911 gcc_assert (ok);
5913 /* If we are branching around a jump (rather than a return), prevent
5914 reorg from using an insn from the jump target as the delay slot insn -
5915 when reorg did this, it pessimized code (we rather hide the delay slot)
5916 and it could cause branches to go out of range. */
5917 if (bp->far_label)
5918 (emit_insn_after
5919 (gen_stuff_delay_slot
5920 (GEN_INT (unspec_bbr_uid++),
5921 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5922 insn));
5923 /* Prevent reorg from undoing our splits. */
5924 gen_block_redirect (jump, bp->address += 2, 2);
5927 /* Fix up ADDR_DIFF_VECs. */
5928 void
5929 fixup_addr_diff_vecs (rtx_insn *first)
5931 rtx_insn *insn;
5933 for (insn = first; insn; insn = NEXT_INSN (insn))
5935 rtx vec_lab, pat, prevpat, x, braf_label;
5936 rtx_insn *prev;
5938 if (! JUMP_TABLE_DATA_P (insn)
5939 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5940 continue;
5941 pat = PATTERN (insn);
5942 vec_lab = XEXP (XEXP (pat, 0), 0);
5944 /* Search the matching casesi_jump_2. */
5945 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5947 if (!JUMP_P (prev))
5948 continue;
5949 prevpat = PATTERN (prev);
5950 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5951 continue;
5952 x = XVECEXP (prevpat, 0, 1);
5953 if (GET_CODE (x) != USE)
5954 continue;
5955 x = XEXP (x, 0);
5956 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5957 break;
5959 /* FIXME: This is a bug in the optimizer, but it seems harmless
5960 to just avoid panicing. */
5961 if (!prev)
5962 continue;
5964 /* Emit the reference label of the braf where it belongs, right after
5965 the casesi_jump_2 (i.e. braf). */
5966 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5967 emit_label_after (braf_label, prev);
5969 /* Fix up the ADDR_DIF_VEC to be relative
5970 to the reference address of the braf. */
5971 XEXP (XEXP (pat, 0), 0) = braf_label;
5975 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5976 a barrier. Return the base 2 logarithm of the desired alignment. */
5978 barrier_align (rtx_insn *barrier_or_label)
5980 rtx next, pat;
5982 if (! barrier_or_label)
5983 return 0;
5985 if (LABEL_P (barrier_or_label)
5986 && NEXT_INSN (barrier_or_label)
5987 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5988 return 2;
5990 if (BARRIER_P (barrier_or_label)
5991 && PREV_INSN (barrier_or_label)
5992 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5994 pat = PATTERN (PREV_INSN (barrier_or_label));
5995 /* If this is a very small table, we want to keep the alignment after
5996 the table to the minimum for proper code alignment. */
5997 return ((optimize_size
5998 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5999 <= (unsigned) 1 << (CACHE_LOG - 2)))
6000 ? 1 << TARGET_SHMEDIA : align_jumps_log);
6003 next = next_active_insn (barrier_or_label);
6005 if (! next)
6006 return 0;
6008 pat = PATTERN (next);
6010 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
6011 /* This is a barrier in front of a constant table. */
6012 return 0;
6014 if (optimize_size)
6015 return 0;
6017 if (! TARGET_SH2 || ! optimize)
6018 return align_jumps_log;
6020 /* When fixing up pcloads, a constant table might be inserted just before
6021 the basic block that ends with the barrier. Thus, we can't trust the
6022 instruction lengths before that. */
6023 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
6025 /* Check if there is an immediately preceding branch to the insn beyond
6026 the barrier. We must weight the cost of discarding useful information
6027 from the current cache line when executing this branch and there is
6028 an alignment, against that of fetching unneeded insn in front of the
6029 branch target when there is no alignment. */
6031 /* There are two delay_slot cases to consider. One is the simple case
6032 where the preceding branch is to the insn beyond the barrier (simple
6033 delay slot filling), and the other is where the preceding branch has
6034 a delay slot that is a duplicate of the insn after the barrier
6035 (fill_eager_delay_slots) and the branch is to the insn after the insn
6036 after the barrier. */
6038 int slot, credit;
6039 bool jump_to_next = false;
6041 /* Skip to the insn before the JUMP_INSN before the barrier under
6042 investigation. */
6043 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
6045 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
6046 credit >= 0 && prev && NONJUMP_INSN_P (prev);
6047 prev = prev_real_insn (prev))
6049 jump_to_next = false;
6050 if (GET_CODE (PATTERN (prev)) == USE
6051 || GET_CODE (PATTERN (prev)) == CLOBBER)
6052 continue;
6053 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
6055 prev = prev_seq->insn (1);
6056 if (INSN_UID (prev) == INSN_UID (next))
6058 /* Delay slot was filled with insn at jump target. */
6059 jump_to_next = true;
6060 continue;
6064 if (slot &&
6065 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
6066 slot = 0;
6067 credit -= get_attr_length (prev);
6069 if (prev && jump_to_label_p (prev))
6071 rtx_insn *x;
6072 if (jump_to_next
6073 || next_real_insn (JUMP_LABEL (prev)) == next
6074 /* If relax_delay_slots() decides NEXT was redundant
6075 with some previous instruction, it will have
6076 redirected PREV's jump to the following insn. */
6077 || JUMP_LABEL (prev) == next_nonnote_insn (next)
6078 /* There is no upper bound on redundant instructions
6079 that might have been skipped, but we must not put an
6080 alignment where none had been before. */
6081 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
6082 (INSN_P (x)
6083 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
6084 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
6085 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
6087 rtx pat = PATTERN (prev);
6088 if (GET_CODE (pat) == PARALLEL)
6089 pat = XVECEXP (pat, 0, 0);
6090 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
6091 return 0;
6096 return align_jumps_log;
6099 /* If we are inside a phony loop, almost any kind of label can turn up as the
6100 first one in the loop. Aligning a braf label causes incorrect switch
6101 destination addresses; we can detect braf labels because they are
6102 followed by a BARRIER.
6103 Applying loop alignment to small constant or switch tables is a waste
6104 of space, so we suppress this too. */
6106 sh_loop_align (rtx_insn *label)
6108 rtx_insn *next = label;
6110 if (! optimize || optimize_size)
6111 return 0;
6114 next = next_nonnote_insn (next);
6115 while (next && LABEL_P (next));
6117 if (! next
6118 || ! INSN_P (next)
6119 || recog_memoized (next) == CODE_FOR_consttable_2)
6120 return 0;
6122 return align_loops_log;
6125 /* Do a final pass over the function, just before delayed branch
6126 scheduling. */
6127 static void
6128 sh_reorg (void)
6130 rtx_insn *first, *insn, *mova = NULL;
6131 int num_mova;
6132 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
6133 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
6135 first = get_insns ();
6136 max_labelno_before_reorg = max_label_num ();
6138 /* We must split call insns before introducing `mova's. If we're
6139 optimizing, they'll have already been split. Otherwise, make
6140 sure we don't split them too late. */
6141 if (! optimize)
6142 split_all_insns_noflow ();
6144 if (TARGET_SHMEDIA)
6145 return;
6147 /* If relaxing, generate pseudo-ops to associate function calls with
6148 the symbols they call. It does no harm to not generate these
6149 pseudo-ops. However, when we can generate them, it enables the
6150 linker to potentially relax the jsr to a bsr, and eliminate the
6151 register load and, possibly, the constant pool entry. */
6153 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6154 if (TARGET_RELAX)
6156 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6157 own purposes. This works because none of the remaining passes
6158 need to look at them.
6160 ??? But it may break in the future. We should use a machine
6161 dependent REG_NOTE, or some other approach entirely. */
6162 for (insn = first; insn; insn = NEXT_INSN (insn))
6164 if (INSN_P (insn))
6166 rtx note;
6168 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6169 NULL_RTX)) != 0)
6170 remove_note (insn, note);
6174 for (insn = first; insn; insn = NEXT_INSN (insn))
6176 rtx pattern, reg, set, dies;
6177 rtx_code_label *label;
6178 rtx_insn *link, *scan;
6179 int rescan = 0, foundinsn = 0;
6181 if (CALL_P (insn))
6183 pattern = PATTERN (insn);
6185 if (GET_CODE (pattern) == PARALLEL)
6186 pattern = XVECEXP (pattern, 0, 0);
6187 if (GET_CODE (pattern) == SET)
6188 pattern = SET_SRC (pattern);
6190 if (GET_CODE (pattern) != CALL
6191 || !MEM_P (XEXP (pattern, 0)))
6192 continue;
6194 reg = XEXP (XEXP (pattern, 0), 0);
6196 else
6198 reg = sfunc_uses_reg (insn);
6199 if (! reg)
6200 continue;
6203 if (!REG_P (reg))
6204 continue;
6206 /* Try scanning backward to find where the register is set. */
6207 link = NULL;
6208 for (scan = PREV_INSN (insn);
6209 scan && !LABEL_P (scan);
6210 scan = PREV_INSN (scan))
6212 if (! INSN_P (scan))
6213 continue;
6215 if (! reg_mentioned_p (reg, scan))
6216 continue;
6218 if (noncall_uses_reg (reg, scan, &set))
6219 break;
6221 if (set)
6223 link = scan;
6224 break;
6228 if (! link)
6229 continue;
6231 /* The register is set at LINK. */
6233 /* We can only optimize the function call if the register is
6234 being set to a symbol. In theory, we could sometimes
6235 optimize calls to a constant location, but the assembler
6236 and linker do not support that at present. */
6237 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6238 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6239 continue;
6241 /* Scan forward from LINK to the place where REG dies, and
6242 make sure that the only insns which use REG are
6243 themselves function calls. */
6245 /* ??? This doesn't work for call targets that were allocated
6246 by reload, since there may not be a REG_DEAD note for the
6247 register. */
6249 dies = NULL_RTX;
6250 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6252 rtx scanset;
6254 /* Don't try to trace forward past a CODE_LABEL if we haven't
6255 seen INSN yet. Ordinarily, we will only find the setting insn
6256 if it is in the same basic block. However,
6257 cross-jumping can insert code labels in between the load and
6258 the call, and can result in situations where a single call
6259 insn may have two targets depending on where we came from. */
6261 if (LABEL_P (scan) && ! foundinsn)
6262 break;
6264 if (! INSN_P (scan))
6265 continue;
6267 /* Don't try to trace forward past a JUMP. To optimize
6268 safely, we would have to check that all the
6269 instructions at the jump destination did not use REG. */
6271 if (JUMP_P (scan))
6272 break;
6274 if (! reg_mentioned_p (reg, scan))
6275 continue;
6277 if (noncall_uses_reg (reg, scan, &scanset))
6278 break;
6280 if (scan == insn)
6281 foundinsn = 1;
6283 if (scan != insn
6284 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6286 /* There is a function call to this register other
6287 than the one we are checking. If we optimize
6288 this call, we need to rescan again below. */
6289 rescan = 1;
6292 /* ??? We shouldn't have to worry about SCANSET here.
6293 We should just be able to check for a REG_DEAD note
6294 on a function call. However, the REG_DEAD notes are
6295 apparently not dependable around libcalls; c-torture
6296 execute/920501-2 is a test case. If SCANSET is set,
6297 then this insn sets the register, so it must have
6298 died earlier. Unfortunately, this will only handle
6299 the cases in which the register is, in fact, set in a
6300 later insn. */
6302 /* ??? We shouldn't have to use FOUNDINSN here.
6303 This dates back to when we used LOG_LINKS to find
6304 the most recent insn which sets the register. */
6306 if (foundinsn
6307 && (scanset
6308 || find_reg_note (scan, REG_DEAD, reg)))
6310 dies = scan;
6311 break;
6315 if (! dies)
6317 /* Either there was a branch, or some insn used REG
6318 other than as a function call address. */
6319 continue;
6322 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6323 on the insn which sets the register, and on each call insn
6324 which uses the register. In final_prescan_insn we look for
6325 the REG_LABEL_OPERAND notes, and output the appropriate label
6326 or pseudo-op. */
6328 label = gen_label_rtx ();
6329 add_reg_note (link, REG_LABEL_OPERAND, label);
6330 add_reg_note (insn, REG_LABEL_OPERAND, label);
6331 if (rescan)
6333 scan = link;
6336 rtx reg2;
6338 scan = NEXT_INSN (scan);
6339 if (scan != insn
6340 && ((CALL_P (scan)
6341 && reg_mentioned_p (reg, scan))
6342 || ((reg2 = sfunc_uses_reg (scan))
6343 && REGNO (reg2) == REGNO (reg))))
6344 add_reg_note (scan, REG_LABEL_OPERAND, label);
6346 while (scan != dies);
6351 if (TARGET_SH2)
6352 fixup_addr_diff_vecs (first);
6354 if (optimize)
6356 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6357 shorten_branches (first);
6360 /* Scan the function looking for move instructions which have to be
6361 changed to pc-relative loads and insert the literal tables. */
6362 label_ref_list_pool = create_alloc_pool ("label references list",
6363 sizeof (struct label_ref_list_d),
6364 30);
6365 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6366 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6368 if (mova_p (insn))
6370 /* ??? basic block reordering can move a switch table dispatch
6371 below the switch table. Check if that has happened.
6372 We only have the addresses available when optimizing; but then,
6373 this check shouldn't be needed when not optimizing. */
6374 if (!untangle_mova (&num_mova, &mova, insn))
6376 insn = mova;
6377 num_mova = 0;
6380 else if (JUMP_TABLE_DATA_P (insn)
6381 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6382 && num_mova
6383 /* ??? loop invariant motion can also move a mova out of a
6384 loop. Since loop does this code motion anyway, maybe we
6385 should wrap UNSPEC_MOVA into a CONST, so that reload can
6386 move it back. */
6387 && ((num_mova > 1
6388 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6389 || (prev_nonnote_insn (insn)
6390 == XEXP (MOVA_LABELREF (mova), 0))))
6392 rtx_insn *scan;
6393 int total;
6395 num_mova--;
6397 /* Some code might have been inserted between the mova and
6398 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6399 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6400 total += get_attr_length (scan);
6402 /* range of mova is 1020, add 4 because pc counts from address of
6403 second instruction after this one, subtract 2 in case pc is 2
6404 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6405 cancels out with alignment effects of the mova itself. */
6406 if (total > 1022)
6408 /* Change the mova into a load, and restart scanning
6409 there. broken_move will then return true for mova. */
6410 fixup_mova (mova);
6411 insn = mova;
6414 if (broken_move (insn)
6415 || (NONJUMP_INSN_P (insn)
6416 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6418 rtx_insn *scan;
6419 /* Scan ahead looking for a barrier to stick the constant table
6420 behind. */
6421 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6422 rtx_insn *last_float_move = NULL;
6423 rtx last_float = 0, *last_float_addr = NULL;
6424 int need_aligned_label = 0;
6426 if (num_mova && ! mova_p (mova))
6428 /* find_barrier had to change the first mova into a
6429 pcload; thus, we have to start with this new pcload. */
6430 insn = mova;
6431 num_mova = 0;
6433 /* Now find all the moves between the points and modify them. */
6434 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6436 if (LABEL_P (scan))
6437 last_float = 0;
6438 if (NONJUMP_INSN_P (scan)
6439 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6440 need_aligned_label = 1;
6441 if (broken_move (scan))
6443 rtx *patp = &PATTERN (scan), pat = *patp;
6444 rtx src, dst;
6445 rtx lab;
6446 rtx newsrc;
6447 machine_mode mode;
6449 if (GET_CODE (pat) == PARALLEL)
6450 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6451 src = SET_SRC (pat);
6452 dst = SET_DEST (pat);
6453 mode = GET_MODE (dst);
6455 if (mode == SImode && satisfies_constraint_I16 (src)
6456 && REGNO (dst) != FPUL_REG)
6458 int offset = 0;
6460 mode = HImode;
6461 while (GET_CODE (dst) == SUBREG)
6463 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6464 GET_MODE (SUBREG_REG (dst)),
6465 SUBREG_BYTE (dst),
6466 GET_MODE (dst));
6467 dst = SUBREG_REG (dst);
6469 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6471 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6473 /* This must be an insn that clobbers r0. */
6474 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6475 XVECLEN (PATTERN (scan), 0)
6476 - 1);
6477 rtx clobber = *clobberp;
6479 gcc_assert (GET_CODE (clobber) == CLOBBER
6480 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6482 if (last_float
6483 && reg_set_between_p (r0_rtx, last_float_move, scan))
6484 last_float = 0;
6485 if (last_float
6486 && TARGET_SHCOMPACT
6487 && GET_MODE_SIZE (mode) != 4
6488 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6489 last_float = 0;
6490 lab = add_constant (src, mode, last_float);
6491 if (lab)
6492 emit_insn_before (gen_mova (lab), scan);
6493 else
6495 /* There will be a REG_UNUSED note for r0 on
6496 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6497 lest reorg:mark_target_live_regs will not
6498 consider r0 to be used, and we end up with delay
6499 slot insn in front of SCAN that clobbers r0. */
6500 rtx note
6501 = find_regno_note (last_float_move, REG_UNUSED, 0);
6503 /* If we are not optimizing, then there may not be
6504 a note. */
6505 if (note)
6506 PUT_REG_NOTE_KIND (note, REG_INC);
6508 *last_float_addr = r0_inc_rtx;
6510 last_float_move = scan;
6511 last_float = src;
6512 newsrc = gen_const_mem (mode,
6513 (((TARGET_SH4 && ! TARGET_FMOVD)
6514 || REGNO (dst) == FPUL_REG)
6515 ? r0_inc_rtx
6516 : r0_rtx));
6517 last_float_addr = &XEXP (newsrc, 0);
6519 /* Remove the clobber of r0. */
6520 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6521 gen_rtx_SCRATCH (Pmode));
6523 /* This is a mova needing a label. Create it. */
6524 else if (GET_CODE (src) == UNSPEC
6525 && XINT (src, 1) == UNSPEC_MOVA
6526 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6528 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6529 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6530 newsrc = gen_rtx_UNSPEC (SImode,
6531 gen_rtvec (1, newsrc),
6532 UNSPEC_MOVA);
6534 else if (GET_CODE (src) == UNSPEC_VOLATILE
6535 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6537 newsrc = XVECEXP (src, 0, 0);
6538 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6539 INSN_CODE (scan) = -1;
6540 continue;
6542 else
6544 lab = add_constant (src, mode, 0);
6545 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6546 newsrc = gen_const_mem (mode, newsrc);
6548 *patp = gen_rtx_SET (dst, newsrc);
6549 INSN_CODE (scan) = -1;
6552 dump_table (need_aligned_label ? insn : 0, barrier);
6553 insn = barrier;
6556 free_alloc_pool (label_ref_list_pool);
6557 for (insn = first; insn; insn = NEXT_INSN (insn))
6558 PUT_MODE (insn, VOIDmode);
6560 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6561 INSN_ADDRESSES_FREE ();
6562 split_branches (first);
6564 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6565 also has an effect on the register that holds the address of the sfunc.
6566 Insert an extra dummy insn in front of each sfunc that pretends to
6567 use this register. */
6568 if (flag_delayed_branch)
6570 for (insn = first; insn; insn = NEXT_INSN (insn))
6572 rtx reg = sfunc_uses_reg (insn);
6574 if (! reg)
6575 continue;
6576 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6579 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6582 /* Return the UID of the insn that follows the specified label. */
6584 get_dest_uid (rtx label, int max_uid)
6586 rtx_insn *dest = next_real_insn (label);
6587 int dest_uid;
6588 if (! dest)
6589 /* This can happen for an undefined label. */
6590 return 0;
6591 dest_uid = INSN_UID (dest);
6592 /* If this is a newly created branch redirection blocking instruction,
6593 we cannot index the branch_uid or insn_addresses arrays with its
6594 uid. But then, we won't need to, because the actual destination is
6595 the following branch. */
6596 while (dest_uid >= max_uid)
6598 dest = NEXT_INSN (dest);
6599 dest_uid = INSN_UID (dest);
6601 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6602 return 0;
6603 return dest_uid;
6606 /* Split condbranches that are out of range. Also add clobbers for
6607 scratch registers that are needed in far jumps.
6608 We do this before delay slot scheduling, so that it can take our
6609 newly created instructions into account. It also allows us to
6610 find branches with common targets more easily. */
6611 static void
6612 split_branches (rtx_insn *first)
6614 rtx_insn *insn;
6615 struct far_branch **uid_branch, *far_branch_list = 0;
6616 int max_uid = get_max_uid ();
6617 int ok;
6619 /* Find out which branches are out of range. */
6620 shorten_branches (first);
6622 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6623 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6625 for (insn = first; insn; insn = NEXT_INSN (insn))
6626 if (! INSN_P (insn))
6627 continue;
6628 else if (insn->deleted ())
6630 /* Shorten_branches would split this instruction again,
6631 so transform it into a note. */
6632 SET_INSN_DELETED (insn);
6634 else if (JUMP_P (insn))
6636 enum attr_type type = get_attr_type (insn);
6637 if (type == TYPE_CBRANCH)
6639 rtx_insn *next, *beyond;
6641 if (get_attr_length (insn) > 4)
6643 rtx src = SET_SRC (PATTERN (insn));
6644 rtx olabel = XEXP (XEXP (src, 1), 0);
6645 int addr = INSN_ADDRESSES (INSN_UID (insn));
6646 rtx_insn *label = 0;
6647 int dest_uid = get_dest_uid (olabel, max_uid);
6648 struct far_branch *bp = uid_branch[dest_uid];
6650 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6651 the label if the LABEL_NUSES count drops to zero. There is
6652 always a jump_optimize pass that sets these values, but it
6653 proceeds to delete unreferenced code, and then if not
6654 optimizing, to un-delete the deleted instructions, thus
6655 leaving labels with too low uses counts. */
6656 if (! optimize)
6658 JUMP_LABEL (insn) = olabel;
6659 LABEL_NUSES (olabel)++;
6661 if (! bp)
6663 bp = (struct far_branch *) alloca (sizeof *bp);
6664 uid_branch[dest_uid] = bp;
6665 bp->prev = far_branch_list;
6666 far_branch_list = bp;
6667 bp->far_label = as_a <rtx_insn *> (
6668 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6669 0));
6670 LABEL_NUSES (bp->far_label)++;
6672 else
6674 label = bp->near_label;
6675 if (! label && bp->address - addr >= CONDJUMP_MIN)
6677 rtx_insn *block = bp->insert_place;
6679 if (GET_CODE (PATTERN (block)) == RETURN)
6680 block = PREV_INSN (block);
6681 else
6682 block = gen_block_redirect (block,
6683 bp->address, 2);
6684 label = emit_label_after (gen_label_rtx (),
6685 PREV_INSN (block));
6686 bp->near_label = label;
6688 else if (label && ! NEXT_INSN (label))
6690 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6691 bp->insert_place = insn;
6692 else
6693 gen_far_branch (bp);
6696 if (! label
6697 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6699 bp->near_label = label = gen_label_rtx ();
6700 bp->insert_place = insn;
6701 bp->address = addr;
6703 ok = redirect_jump (insn, label, 0);
6704 gcc_assert (ok);
6706 else
6708 /* get_attr_length (insn) == 2 */
6709 /* Check if we have a pattern where reorg wants to redirect
6710 the branch to a label from an unconditional branch that
6711 is too far away. */
6712 /* We can't use JUMP_LABEL here because it might be undefined
6713 when not optimizing. */
6714 /* A syntax error might cause beyond to be NULL_RTX. */
6715 beyond
6716 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6717 0));
6719 if (beyond
6720 && (JUMP_P (beyond)
6721 || ((beyond = next_active_insn (beyond))
6722 && JUMP_P (beyond)))
6723 && GET_CODE (PATTERN (beyond)) == SET
6724 && recog_memoized (beyond) == CODE_FOR_jump_compact
6725 && ((INSN_ADDRESSES
6726 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6727 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6728 > 252 + 258 + 2))
6729 gen_block_redirect (beyond,
6730 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6733 next = next_active_insn (insn);
6735 if (next
6736 && (JUMP_P (next)
6737 || ((next = next_active_insn (next))
6738 && JUMP_P (next)))
6739 && GET_CODE (PATTERN (next)) == SET
6740 && recog_memoized (next) == CODE_FOR_jump_compact
6741 && ((INSN_ADDRESSES
6742 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6743 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6744 > 252 + 258 + 2))
6745 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6747 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6749 int addr = INSN_ADDRESSES (INSN_UID (insn));
6750 rtx_insn *far_label = 0;
6751 int dest_uid = 0;
6752 struct far_branch *bp;
6754 if (type == TYPE_JUMP)
6756 if (CROSSING_JUMP_P (insn))
6758 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6759 insn);
6760 continue;
6763 far_label = as_a <rtx_insn *> (
6764 XEXP (SET_SRC (PATTERN (insn)), 0));
6765 dest_uid = get_dest_uid (far_label, max_uid);
6766 if (! dest_uid)
6768 /* Parse errors can lead to labels outside
6769 the insn stream. */
6770 if (! NEXT_INSN (far_label))
6771 continue;
6773 if (! optimize)
6775 JUMP_LABEL (insn) = far_label;
6776 LABEL_NUSES (far_label)++;
6778 redirect_jump (insn, ret_rtx, 1);
6779 far_label = 0;
6782 bp = uid_branch[dest_uid];
6783 if (! bp)
6785 bp = (struct far_branch *) alloca (sizeof *bp);
6786 uid_branch[dest_uid] = bp;
6787 bp->prev = far_branch_list;
6788 far_branch_list = bp;
6789 bp->near_label = 0;
6790 bp->far_label = far_label;
6791 if (far_label)
6792 LABEL_NUSES (far_label)++;
6794 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6795 if (addr - bp->address <= CONDJUMP_MAX)
6796 emit_label_after (bp->near_label, PREV_INSN (insn));
6797 else
6799 gen_far_branch (bp);
6800 bp->near_label = 0;
6802 else
6803 bp->near_label = 0;
6804 bp->address = addr;
6805 bp->insert_place = insn;
6806 if (! far_label)
6807 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6808 else
6809 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6812 /* Generate all pending far branches,
6813 and free our references to the far labels. */
6814 while (far_branch_list)
6816 if (far_branch_list->near_label
6817 && ! NEXT_INSN (far_branch_list->near_label))
6818 gen_far_branch (far_branch_list);
6819 if (optimize
6820 && far_branch_list->far_label
6821 && ! --LABEL_NUSES (far_branch_list->far_label))
6822 delete_insn (far_branch_list->far_label);
6823 far_branch_list = far_branch_list->prev;
6826 /* Instruction length information is no longer valid due to the new
6827 instructions that have been generated. */
6828 init_insn_lengths ();
6831 /* Dump out instruction addresses, which is useful for debugging the
6832 constant pool table stuff.
6834 If relaxing, output the label and pseudo-ops used to link together
6835 calls and the instruction which set the registers.
6837 ??? The addresses printed by this routine for insns are nonsense for
6838 insns which are inside of a sequence where none of the inner insns have
6839 variable length. This is because the second pass of shorten_branches
6840 does not bother to update them. */
6841 void
6842 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6843 int noperands ATTRIBUTE_UNUSED)
6845 if (TARGET_DUMPISIZE)
6846 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6848 if (TARGET_RELAX)
6850 rtx note;
6852 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6853 if (note)
6855 rtx pattern;
6857 pattern = PATTERN (insn);
6858 if (GET_CODE (pattern) == PARALLEL)
6859 pattern = XVECEXP (pattern, 0, 0);
6860 switch (GET_CODE (pattern))
6862 case SET:
6863 if (GET_CODE (SET_SRC (pattern)) != CALL
6864 && get_attr_type (insn) != TYPE_SFUNC)
6866 targetm.asm_out.internal_label
6867 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6868 break;
6870 /* else FALLTHROUGH */
6871 case CALL:
6872 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6873 CODE_LABEL_NUMBER (XEXP (note, 0)));
6874 break;
6876 default:
6877 gcc_unreachable ();
6883 /* Dump out any constants accumulated in the final pass. These will
6884 only be labels. */
6885 const char *
6886 output_jump_label_table (void)
6888 int i;
6890 if (pool_size)
6892 fprintf (asm_out_file, "\t.align 2\n");
6893 for (i = 0; i < pool_size; i++)
6895 pool_node *p = &pool_vector[i];
6897 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6898 CODE_LABEL_NUMBER (p->label));
6899 output_asm_insn (".long %O0", &p->value);
6901 pool_size = 0;
6904 return "";
6907 /* A full frame looks like:
6909 arg-5
6910 arg-4
6911 [ if current_function_anonymous_args
6912 arg-3
6913 arg-2
6914 arg-1
6915 arg-0 ]
6916 saved-fp
6917 saved-r10
6918 saved-r11
6919 saved-r12
6920 saved-pr
6921 local-n
6923 local-1
6924 local-0 <- fp points here.
6926 Number of bytes pushed for anonymous args, used to pass information
6927 between expand_prologue and expand_epilogue.
6929 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6930 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6931 for an epilogue and a negative value means that it's for a sibcall
6932 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6933 all the registers that are about to be restored, and hence dead. */
6934 static void
6935 output_stack_adjust (int size, rtx reg, int epilogue_p,
6936 HARD_REG_SET *live_regs_mask, bool frame_p)
6938 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6939 if (size)
6941 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6943 /* This test is bogus, as output_stack_adjust is used to re-align the
6944 stack. */
6945 #if 0
6946 gcc_assert (!(size % align));
6947 #endif
6949 if (CONST_OK_FOR_ADD (size))
6950 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6951 /* Try to do it with two partial adjustments; however, we must make
6952 sure that the stack is properly aligned at all times, in case
6953 an interrupt occurs between the two partial adjustments. */
6954 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6955 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6957 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6958 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6960 else
6962 rtx const_reg;
6963 rtx insn;
6964 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6965 int i;
6967 /* If TEMP is invalid, we could temporarily save a general
6968 register to MACL. However, there is currently no need
6969 to handle this case, so just die when we see it. */
6970 if (epilogue_p < 0
6971 || current_function_interrupt
6972 || ! call_really_used_regs[temp] || fixed_regs[temp])
6973 temp = -1;
6974 if (temp < 0 && ! current_function_interrupt
6975 && (TARGET_SHMEDIA || epilogue_p >= 0))
6977 HARD_REG_SET temps;
6978 COPY_HARD_REG_SET (temps, call_used_reg_set);
6979 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6980 if (epilogue_p > 0)
6982 int nreg = 0;
6983 if (crtl->return_rtx)
6985 machine_mode mode;
6986 mode = GET_MODE (crtl->return_rtx);
6987 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6988 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6990 for (i = 0; i < nreg; i++)
6991 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6992 if (crtl->calls_eh_return)
6994 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6995 for (i = 0; i <= 3; i++)
6996 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6999 if (TARGET_SHMEDIA && epilogue_p < 0)
7000 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
7001 CLEAR_HARD_REG_BIT (temps, i);
7002 if (epilogue_p <= 0)
7004 for (i = FIRST_PARM_REG;
7005 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
7006 CLEAR_HARD_REG_BIT (temps, i);
7007 if (cfun->static_chain_decl != NULL)
7008 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
7010 temp = scavenge_reg (&temps);
7012 if (temp < 0 && live_regs_mask)
7014 HARD_REG_SET temps;
7016 COPY_HARD_REG_SET (temps, *live_regs_mask);
7017 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
7018 temp = scavenge_reg (&temps);
7020 if (temp < 0)
7022 rtx adj_reg, tmp_reg, mem;
7024 /* If we reached here, the most likely case is the (sibcall)
7025 epilogue for non SHmedia. Put a special push/pop sequence
7026 for such case as the last resort. This looks lengthy but
7027 would not be problem because it seems to be very
7028 rare. */
7030 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
7033 /* ??? There is still the slight possibility that r4 or
7034 r5 have been reserved as fixed registers or assigned
7035 as global registers, and they change during an
7036 interrupt. There are possible ways to handle this:
7038 - If we are adjusting the frame pointer (r14), we can do
7039 with a single temp register and an ordinary push / pop
7040 on the stack.
7041 - Grab any call-used or call-saved registers (i.e. not
7042 fixed or globals) for the temps we need. We might
7043 also grab r14 if we are adjusting the stack pointer.
7044 If we can't find enough available registers, issue
7045 a diagnostic and die - the user must have reserved
7046 way too many registers.
7047 But since all this is rather unlikely to happen and
7048 would require extra testing, we just die if r4 / r5
7049 are not available. */
7050 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
7051 && !global_regs[4] && !global_regs[5]);
7053 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
7054 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
7055 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
7056 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
7057 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
7058 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7059 emit_move_insn (mem, tmp_reg);
7060 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
7061 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7062 emit_move_insn (mem, tmp_reg);
7063 emit_move_insn (reg, adj_reg);
7064 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7065 emit_move_insn (adj_reg, mem);
7066 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7067 emit_move_insn (tmp_reg, mem);
7068 /* Tell flow the insns that pop r4/r5 aren't dead. */
7069 emit_use (tmp_reg);
7070 emit_use (adj_reg);
7071 return;
7073 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
7075 /* If SIZE is negative, subtract the positive value.
7076 This sometimes allows a constant pool entry to be shared
7077 between prologue and epilogue code. */
7078 if (size < 0)
7080 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
7081 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
7083 else
7085 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
7086 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
7088 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7089 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
7090 GEN_INT (size))));
7095 /* Emit the specified insn and mark it as frame related.
7096 FIXME: Rename this to emit_frame_insn. */
7097 static rtx_insn *
7098 frame_insn (rtx x)
7100 rtx_insn *insn = emit_insn (x);
7101 RTX_FRAME_RELATED_P (insn) = 1;
7102 return insn;
7105 /* Output RTL to push register RN onto the stack. */
7106 static rtx
7107 push (int rn)
7109 rtx x;
7110 if (rn == FPUL_REG)
7111 x = gen_push_fpul ();
7112 else if (rn == FPSCR_REG)
7113 x = gen_push_fpscr ();
7114 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7115 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7117 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7118 return NULL_RTX;
7119 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
7121 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7122 x = gen_push_e (gen_rtx_REG (SFmode, rn));
7123 else
7124 x = gen_push (gen_rtx_REG (SImode, rn));
7126 x = frame_insn (x);
7127 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7128 return x;
7131 /* Output RTL to pop register RN from the stack. */
7132 static void
7133 pop (int rn)
7135 rtx x, sp_reg, reg;
7136 if (rn == FPUL_REG)
7137 x = gen_pop_fpul ();
7138 else if (rn == FPSCR_REG)
7139 x = gen_pop_fpscr ();
7140 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7141 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7143 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7144 return;
7145 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7147 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7148 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7149 else
7150 x = gen_pop (gen_rtx_REG (SImode, rn));
7152 x = emit_insn (x);
7154 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7155 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7156 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7157 : SET_DEST (PATTERN (x)));
7158 add_reg_note (x, REG_CFA_RESTORE, reg);
7159 add_reg_note (x, REG_CFA_ADJUST_CFA,
7160 gen_rtx_SET (sp_reg,
7161 plus_constant (SImode, sp_reg,
7162 GET_MODE_SIZE (GET_MODE (reg)))));
7163 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7164 RTX_FRAME_RELATED_P (x) = 1;
7167 /* Generate code to push the regs specified in the mask. */
7168 static void
7169 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7171 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7172 int skip_fpscr = 0;
7174 /* Push PR last; this gives better latencies after the prologue, and
7175 candidates for the return delay slot when there are no general
7176 registers pushed. */
7177 for (; i < FIRST_PSEUDO_REGISTER; i++)
7179 /* If this is an interrupt handler, and the SZ bit varies,
7180 and we have to push any floating point register, we need
7181 to switch to the correct precision first. */
7182 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7183 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7185 HARD_REG_SET unsaved;
7187 push (FPSCR_REG);
7188 COMPL_HARD_REG_SET (unsaved, *mask);
7189 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7190 skip_fpscr = 1;
7192 if (i != PR_REG
7193 && (i != FPSCR_REG || ! skip_fpscr)
7194 && TEST_HARD_REG_BIT (*mask, i))
7196 /* If the ISR has RESBANK attribute assigned, don't push any of
7197 the following registers - R0-R14, MACH, MACL and GBR. */
7198 if (! (sh_cfun_resbank_handler_p ()
7199 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7200 || i == MACH_REG
7201 || i == MACL_REG
7202 || i == GBR_REG)))
7203 push (i);
7207 /* Push banked registers last to improve delay slot opportunities. */
7208 if (interrupt_handler)
7210 bool use_movml = false;
7212 if (TARGET_SH2A)
7214 unsigned int count = 0;
7216 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7217 if (TEST_HARD_REG_BIT (*mask, i))
7218 count++;
7219 else
7220 break;
7222 /* Use movml when all banked registers are pushed. */
7223 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7224 use_movml = true;
7227 if (sh_cfun_resbank_handler_p ())
7228 ; /* Do nothing. */
7229 else if (use_movml)
7231 rtx x, mem, reg, set;
7232 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7234 /* We must avoid scheduling multiple store insn with another
7235 insns. */
7236 emit_insn (gen_blockage ());
7237 x = gen_movml_push_banked (sp_reg);
7238 x = frame_insn (x);
7239 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7241 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7242 reg = gen_rtx_REG (SImode, i);
7243 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7246 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
7247 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7248 emit_insn (gen_blockage ());
7250 else
7251 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7252 if (TEST_HARD_REG_BIT (*mask, i))
7253 push (i);
7256 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7257 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7258 push (PR_REG);
7261 /* Calculate how much extra space is needed to save all callee-saved
7262 target registers.
7263 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7264 static int
7265 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7267 int reg;
7268 int stack_space = 0;
7269 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7271 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7272 if ((! call_really_used_regs[reg] || interrupt_handler)
7273 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7274 /* Leave space to save this target register on the stack,
7275 in case target register allocation wants to use it. */
7276 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7277 return stack_space;
7280 /* Decide whether we should reserve space for callee-save target registers,
7281 in case target register allocation wants to use them. REGS_SAVED is
7282 the space, in bytes, that is already required for register saves.
7283 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7284 static int
7285 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7286 HARD_REG_SET *live_regs_mask)
7288 if (optimize_size)
7289 return 0;
7290 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7293 /* Decide how much space to reserve for callee-save target registers
7294 in case target register allocation wants to use them.
7295 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7296 static int
7297 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7299 if (shmedia_space_reserved_for_target_registers)
7300 return shmedia_target_regs_stack_space (live_regs_mask);
7301 else
7302 return 0;
7305 /* Work out the registers which need to be saved, both as a mask and a
7306 count of saved words. Return the count.
7308 If doing a pragma interrupt function, then push all regs used by the
7309 function, and if we call another function (we can tell by looking at PR),
7310 make sure that all the regs it clobbers are safe too. */
7311 static int
7312 calc_live_regs (HARD_REG_SET *live_regs_mask)
7314 unsigned int reg;
7315 int count;
7316 tree attrs;
7317 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7318 bool nosave_low_regs;
7319 int pr_live, has_call;
7321 attrs = DECL_ATTRIBUTES (current_function_decl);
7322 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7323 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7324 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7325 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7327 CLEAR_HARD_REG_SET (*live_regs_mask);
7328 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7329 && df_regs_ever_live_p (FPSCR_REG))
7330 target_flags &= ~MASK_FPU_SINGLE;
7331 /* If we can save a lot of saves by switching to double mode, do that. */
7332 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7333 && TARGET_FPU_SINGLE)
7334 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7335 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7336 && (! call_really_used_regs[reg]
7337 || interrupt_handler)
7338 && ++count > 2)
7340 target_flags &= ~MASK_FPU_SINGLE;
7341 break;
7343 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7344 knows how to use it. That means the pseudo originally allocated for
7345 the initial value can become the PR_MEDIA_REG hard register, as seen for
7346 execute/20010122-1.c:test9. */
7347 if (TARGET_SHMEDIA)
7348 /* ??? this function is called from initial_elimination_offset, hence we
7349 can't use the result of sh_media_register_for_return here. */
7350 pr_live = sh_pr_n_sets ();
7351 else
7353 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7354 pr_live = (pr_initial
7355 ? (!REG_P (pr_initial)
7356 || REGNO (pr_initial) != (PR_REG))
7357 : df_regs_ever_live_p (PR_REG));
7358 /* For Shcompact, if not optimizing, we end up with a memory reference
7359 using the return address pointer for __builtin_return_address even
7360 though there is no actual need to put the PR register on the stack. */
7361 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7363 /* Force PR to be live if the prologue has to call the SHmedia
7364 argument decoder or register saver. */
7365 if (TARGET_SHCOMPACT
7366 && ((crtl->args.info.call_cookie
7367 & ~ CALL_COOKIE_RET_TRAMP (1))
7368 || crtl->saves_all_registers))
7369 pr_live = 1;
7370 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7371 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7373 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7374 ? pr_live
7375 : interrupt_handler
7376 ? (/* Need to save all the regs ever live. */
7377 (df_regs_ever_live_p (reg)
7378 || (call_really_used_regs[reg]
7379 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7380 || reg == PIC_OFFSET_TABLE_REGNUM)
7381 && has_call)
7382 || (TARGET_SHMEDIA && has_call
7383 && REGISTER_NATURAL_MODE (reg) == SImode
7384 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7385 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7386 && reg != RETURN_ADDRESS_POINTER_REGNUM
7387 && reg != T_REG && reg != GBR_REG
7388 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7389 /* Push fpscr only on targets which have FPU */
7390 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7391 : (/* Only push those regs which are used and need to be saved. */
7392 (TARGET_SHCOMPACT
7393 && flag_pic
7394 && crtl->args.info.call_cookie
7395 && reg == PIC_OFFSET_TABLE_REGNUM)
7396 || (df_regs_ever_live_p (reg)
7397 && ((!call_really_used_regs[reg]
7398 && !(reg != PIC_OFFSET_TABLE_REGNUM
7399 && fixed_regs[reg] && call_used_regs[reg]))
7400 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7401 || (crtl->calls_eh_return
7402 && (reg == EH_RETURN_DATA_REGNO (0)
7403 || reg == EH_RETURN_DATA_REGNO (1)
7404 || reg == EH_RETURN_DATA_REGNO (2)
7405 || reg == EH_RETURN_DATA_REGNO (3)))
7406 || ((reg == MACL_REG || reg == MACH_REG)
7407 && df_regs_ever_live_p (reg)
7408 && sh_cfun_attr_renesas_p ())
7411 SET_HARD_REG_BIT (*live_regs_mask, reg);
7412 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7414 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7415 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7417 if (FP_REGISTER_P (reg))
7419 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7421 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7422 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7425 else if (XD_REGISTER_P (reg))
7427 /* Must switch to double mode to access these registers. */
7428 target_flags &= ~MASK_FPU_SINGLE;
7432 if (nosave_low_regs && reg == R8_REG)
7433 break;
7435 /* If we have a target register optimization pass after prologue / epilogue
7436 threading, we need to assume all target registers will be live even if
7437 they aren't now. */
7438 if (flag_branch_target_load_optimize2
7439 && TARGET_SAVE_ALL_TARGET_REGS
7440 && shmedia_space_reserved_for_target_registers)
7441 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7442 if ((! call_really_used_regs[reg] || interrupt_handler)
7443 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7445 SET_HARD_REG_BIT (*live_regs_mask, reg);
7446 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7448 /* If this is an interrupt handler, we don't have any call-clobbered
7449 registers we can conveniently use for target register save/restore.
7450 Make sure we save at least one general purpose register when we need
7451 to save target registers. */
7452 if (interrupt_handler
7453 && hard_reg_set_intersect_p (*live_regs_mask,
7454 reg_class_contents[TARGET_REGS])
7455 && ! hard_reg_set_intersect_p (*live_regs_mask,
7456 reg_class_contents[GENERAL_REGS]))
7458 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7459 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7462 return count;
7465 /* Code to generate prologue and epilogue sequences */
7467 /* PUSHED is the number of bytes that are being pushed on the
7468 stack for register saves. Return the frame size, padded
7469 appropriately so that the stack stays properly aligned. */
7470 static HOST_WIDE_INT
7471 rounded_frame_size (int pushed)
7473 HOST_WIDE_INT size = get_frame_size ();
7474 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7476 if (ACCUMULATE_OUTGOING_ARGS)
7477 size += crtl->outgoing_args_size;
7479 return ((size + pushed + align - 1) & -align) - pushed;
7482 /* Choose a call-clobbered target-branch register that remains
7483 unchanged along the whole function. We set it up as the return
7484 value in the prologue. */
7486 sh_media_register_for_return (void)
7488 int regno;
7489 int tr0_used;
7491 if (! crtl->is_leaf)
7492 return -1;
7493 if (lookup_attribute ("interrupt_handler",
7494 DECL_ATTRIBUTES (current_function_decl)))
7495 return -1;
7496 if (sh_cfun_interrupt_handler_p ())
7497 return -1;
7499 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7501 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7502 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7503 return regno;
7505 return -1;
7508 /* The maximum registers we need to save are:
7509 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7510 - 32 floating point registers (for each pair, we save none,
7511 one single precision value, or a double precision value).
7512 - 8 target registers
7513 - add 1 entry for a delimiter. */
7514 #define MAX_SAVED_REGS (62+32+8)
7516 typedef struct save_entry_s
7518 unsigned char reg;
7519 unsigned char mode;
7520 short offset;
7521 } save_entry;
7523 #define MAX_TEMPS 4
7525 /* There will be a delimiter entry with VOIDmode both at the start and the
7526 end of a filled in schedule. The end delimiter has the offset of the
7527 save with the smallest (i.e. most negative) offset. */
7528 typedef struct save_schedule_s
7530 save_entry entries[MAX_SAVED_REGS + 2];
7531 int temps[MAX_TEMPS+1];
7532 } save_schedule;
7534 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7535 use reverse order. Returns the last entry written to (not counting
7536 the delimiter). OFFSET_BASE is a number to be added to all offset
7537 entries. */
7538 static save_entry *
7539 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7540 int offset_base)
7542 int align, i;
7543 save_entry *entry = schedule->entries;
7544 int tmpx = 0;
7545 int offset;
7547 if (! current_function_interrupt)
7548 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7549 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7550 && ! FUNCTION_ARG_REGNO_P (i)
7551 && i != FIRST_RET_REG
7552 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7553 && ! (crtl->calls_eh_return
7554 && (i == EH_RETURN_STACKADJ_REGNO
7555 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7556 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7557 schedule->temps[tmpx++] = i;
7558 entry->reg = -1;
7559 entry->mode = VOIDmode;
7560 entry->offset = offset_base;
7561 entry++;
7562 /* We loop twice: first, we save 8-byte aligned registers in the
7563 higher addresses, that are known to be aligned. Then, we
7564 proceed to saving 32-bit registers that don't need 8-byte
7565 alignment.
7566 If this is an interrupt function, all registers that need saving
7567 need to be saved in full. moreover, we need to postpone saving
7568 target registers till we have saved some general purpose registers
7569 we can then use as scratch registers. */
7570 offset = offset_base;
7571 for (align = 1; align >= 0; align--)
7573 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7574 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7576 machine_mode mode = REGISTER_NATURAL_MODE (i);
7577 int reg = i;
7579 if (current_function_interrupt)
7581 if (TARGET_REGISTER_P (i))
7582 continue;
7583 if (GENERAL_REGISTER_P (i))
7584 mode = DImode;
7586 if (mode == SFmode && (i % 2) == 1
7587 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7588 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7590 mode = DFmode;
7591 i--;
7592 reg--;
7595 /* If we're doing the aligned pass and this is not aligned,
7596 or we're doing the unaligned pass and this is aligned,
7597 skip it. */
7598 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7599 != align)
7600 continue;
7602 if (current_function_interrupt
7603 && GENERAL_REGISTER_P (i)
7604 && tmpx < MAX_TEMPS)
7605 schedule->temps[tmpx++] = i;
7607 offset -= GET_MODE_SIZE (mode);
7608 entry->reg = i;
7609 entry->mode = mode;
7610 entry->offset = offset;
7611 entry++;
7613 if (align && current_function_interrupt)
7614 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7615 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7617 offset -= GET_MODE_SIZE (DImode);
7618 entry->reg = i;
7619 entry->mode = DImode;
7620 entry->offset = offset;
7621 entry++;
7624 entry->reg = -1;
7625 entry->mode = VOIDmode;
7626 entry->offset = offset;
7627 schedule->temps[tmpx] = -1;
7628 return entry - 1;
7631 /* Expand code for the function prologue. */
7632 void
7633 sh_expand_prologue (void)
7635 HARD_REG_SET live_regs_mask;
7636 int d, i;
7637 int d_rounding = 0;
7638 int save_flags = target_flags;
7639 int pretend_args;
7640 int stack_usage;
7641 tree sp_switch_attr
7642 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7644 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7646 /* We have pretend args if we had an object sent partially in registers
7647 and partially on the stack, e.g. a large structure. */
7648 pretend_args = crtl->args.pretend_args_size;
7649 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7650 && (NPARM_REGS(SImode)
7651 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7652 pretend_args = 0;
7654 output_stack_adjust (-pretend_args
7655 - crtl->args.info.stack_regs * 8,
7656 stack_pointer_rtx, 0, NULL, true);
7657 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7659 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7660 /* We're going to use the PIC register to load the address of the
7661 incoming-argument decoder and/or of the return trampoline from
7662 the GOT, so make sure the PIC register is preserved and
7663 initialized. */
7664 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7666 if (TARGET_SHCOMPACT
7667 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7669 int reg;
7671 /* First, make all registers with incoming arguments that will
7672 be pushed onto the stack live, so that register renaming
7673 doesn't overwrite them. */
7674 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7675 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7676 >= NPARM_REGS (SImode) - reg)
7677 for (; reg < NPARM_REGS (SImode); reg++)
7678 emit_insn (gen_shcompact_preserve_incoming_args
7679 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7680 else if (CALL_COOKIE_INT_REG_GET
7681 (crtl->args.info.call_cookie, reg) == 1)
7682 emit_insn (gen_shcompact_preserve_incoming_args
7683 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7685 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7686 stack_pointer_rtx);
7687 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7688 GEN_INT (crtl->args.info.call_cookie));
7689 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7690 gen_rtx_REG (SImode, R0_REG));
7692 else if (TARGET_SHMEDIA)
7694 int tr = sh_media_register_for_return ();
7696 if (tr >= 0)
7697 emit_move_insn (gen_rtx_REG (DImode, tr),
7698 gen_rtx_REG (DImode, PR_MEDIA_REG));
7701 /* Emit the code for SETUP_VARARGS. */
7702 if (cfun->stdarg)
7704 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7706 /* Push arg regs as if they'd been provided by caller in stack. */
7707 for (i = 0; i < NPARM_REGS(SImode); i++)
7709 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7711 if (i >= (NPARM_REGS(SImode)
7712 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7714 break;
7715 push (rn);
7716 stack_usage += GET_MODE_SIZE (SImode);
7721 /* If we're supposed to switch stacks at function entry, do so now. */
7722 if (sp_switch_attr)
7724 rtx lab, newsrc;
7725 /* The argument specifies a variable holding the address of the
7726 stack the interrupt function should switch to/from at entry/exit. */
7727 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7728 const char *s
7729 = ggc_strdup (TREE_STRING_POINTER (arg));
7730 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7732 lab = add_constant (sp_switch, SImode, 0);
7733 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7735 emit_insn (gen_sp_switch_1 (newsrc));
7738 d = calc_live_regs (&live_regs_mask);
7739 /* ??? Maybe we could save some switching if we can move a mode switch
7740 that already happens to be at the function start into the prologue. */
7741 if (target_flags != save_flags && ! current_function_interrupt)
7742 emit_insn (gen_toggle_sz ());
7744 if (TARGET_SH5)
7746 int offset_base, offset;
7747 rtx r0 = NULL_RTX;
7748 int offset_in_r0 = -1;
7749 int sp_in_r0 = 0;
7750 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7751 int total_size, save_size;
7752 save_schedule schedule;
7753 save_entry *entry;
7754 int *tmp_pnt;
7756 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7757 && ! current_function_interrupt)
7758 r0 = gen_rtx_REG (Pmode, R0_REG);
7760 /* D is the actual number of bytes that we need for saving registers,
7761 however, in initial_elimination_offset we have committed to using
7762 an additional TREGS_SPACE amount of bytes - in order to keep both
7763 addresses to arguments supplied by the caller and local variables
7764 valid, we must keep this gap. Place it between the incoming
7765 arguments and the actually saved registers in a bid to optimize
7766 locality of reference. */
7767 total_size = d + tregs_space;
7768 total_size += rounded_frame_size (total_size);
7769 save_size = total_size - rounded_frame_size (d);
7770 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7771 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7772 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7774 /* If adjusting the stack in a single step costs nothing extra, do so.
7775 I.e. either if a single addi is enough, or we need a movi anyway,
7776 and we don't exceed the maximum offset range (the test for the
7777 latter is conservative for simplicity). */
7778 if (TARGET_SHMEDIA
7779 && (CONST_OK_FOR_I10 (-total_size)
7780 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7781 && total_size <= 2044)))
7782 d_rounding = total_size - save_size;
7784 offset_base = d + d_rounding;
7786 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7787 0, NULL, true);
7788 stack_usage += save_size + d_rounding;
7790 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7791 tmp_pnt = schedule.temps;
7792 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7794 machine_mode mode = (machine_mode) entry->mode;
7795 unsigned int reg = entry->reg;
7796 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7797 rtx orig_reg_rtx;
7799 offset = entry->offset;
7801 reg_rtx = gen_rtx_REG (mode, reg);
7803 mem_rtx = gen_frame_mem (mode,
7804 gen_rtx_PLUS (Pmode,
7805 stack_pointer_rtx,
7806 GEN_INT (offset)));
7808 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7810 gcc_assert (r0);
7811 mem_rtx = NULL_RTX;
7814 if (HAVE_PRE_DECREMENT
7815 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7816 || mem_rtx == NULL_RTX
7817 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7819 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7821 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7822 pre_dec = NULL_RTX;
7823 else
7825 mem_rtx = NULL_RTX;
7826 offset += GET_MODE_SIZE (mode);
7830 if (mem_rtx != NULL_RTX)
7831 goto addr_ok;
7833 if (offset_in_r0 == -1)
7835 emit_move_insn (r0, GEN_INT (offset));
7836 offset_in_r0 = offset;
7838 else if (offset != offset_in_r0)
7840 emit_move_insn (r0,
7841 gen_rtx_PLUS
7842 (Pmode, r0,
7843 GEN_INT (offset - offset_in_r0)));
7844 offset_in_r0 += offset - offset_in_r0;
7847 if (pre_dec != NULL_RTX)
7849 if (! sp_in_r0)
7851 emit_move_insn (r0,
7852 gen_rtx_PLUS
7853 (Pmode, r0, stack_pointer_rtx));
7854 sp_in_r0 = 1;
7857 offset -= GET_MODE_SIZE (mode);
7858 offset_in_r0 -= GET_MODE_SIZE (mode);
7860 mem_rtx = pre_dec;
7862 else if (sp_in_r0)
7863 mem_rtx = gen_frame_mem (mode, r0);
7864 else
7865 mem_rtx = gen_frame_mem (mode,
7866 gen_rtx_PLUS (Pmode,
7867 stack_pointer_rtx,
7868 r0));
7870 /* We must not use an r0-based address for target-branch
7871 registers or for special registers without pre-dec
7872 memory addresses, since we store their values in r0
7873 first. */
7874 gcc_assert (!TARGET_REGISTER_P (reg)
7875 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7876 || mem_rtx == pre_dec));
7878 addr_ok:
7879 orig_reg_rtx = reg_rtx;
7880 if (TARGET_REGISTER_P (reg)
7881 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7882 && mem_rtx != pre_dec))
7884 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7886 emit_move_insn (tmp_reg, reg_rtx);
7888 if (REGNO (tmp_reg) == R0_REG)
7890 offset_in_r0 = -1;
7891 sp_in_r0 = 0;
7892 gcc_assert (!refers_to_regno_p (R0_REG, mem_rtx));
7895 if (*++tmp_pnt <= 0)
7896 tmp_pnt = schedule.temps;
7898 reg_rtx = tmp_reg;
7901 rtx insn;
7903 /* Mark as interesting for dwarf cfi generator */
7904 insn = emit_move_insn (mem_rtx, reg_rtx);
7905 RTX_FRAME_RELATED_P (insn) = 1;
7906 /* If we use an intermediate register for the save, we can't
7907 describe this exactly in cfi as a copy of the to-be-saved
7908 register into the temporary register and then the temporary
7909 register on the stack, because the temporary register can
7910 have a different natural size than the to-be-saved register.
7911 Thus, we gloss over the intermediate copy and pretend we do
7912 a direct save from the to-be-saved register. */
7913 if (REGNO (reg_rtx) != reg)
7915 rtx set;
7917 set = gen_rtx_SET (mem_rtx, orig_reg_rtx);
7918 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7921 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7923 rtx reg_rtx = gen_rtx_REG (mode, reg);
7924 rtx set;
7925 rtx mem_rtx = gen_frame_mem (mode,
7926 gen_rtx_PLUS (Pmode,
7927 stack_pointer_rtx,
7928 GEN_INT (offset)));
7930 set = gen_rtx_SET (mem_rtx, reg_rtx);
7931 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7936 gcc_assert (entry->offset == d_rounding);
7938 else
7940 push_regs (&live_regs_mask, current_function_interrupt);
7941 stack_usage += d;
7944 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7945 emit_insn (gen_GOTaddr2picreg ());
7947 if (SHMEDIA_REGS_STACK_ADJUST ())
7949 /* This must NOT go through the PLT, otherwise mach and macl
7950 may be clobbered. */
7951 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7952 (TARGET_FPU_ANY
7953 ? "__GCC_push_shmedia_regs"
7954 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7955 emit_insn (gen_shmedia_save_restore_regs_compact
7956 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7959 if (target_flags != save_flags && ! current_function_interrupt)
7960 emit_insn (gen_toggle_sz ());
7962 target_flags = save_flags;
7964 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7965 stack_pointer_rtx, 0, NULL, true);
7966 stack_usage += rounded_frame_size (d) - d_rounding;
7968 if (frame_pointer_needed)
7969 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7971 if (TARGET_SHCOMPACT
7972 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7974 /* This must NOT go through the PLT, otherwise mach and macl
7975 may be clobbered. */
7976 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7977 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7978 emit_insn (gen_shcompact_incoming_args ());
7981 /* If we are profiling, make sure no instructions are scheduled before
7982 the call to mcount. Similarly if some call instructions are swapped
7983 before frame related insns, it'll confuse the unwinder because
7984 currently SH has no unwind info for function epilogues. */
7985 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7986 emit_insn (gen_blockage ());
7988 if (flag_stack_usage_info)
7989 current_function_static_stack_size = stack_usage;
7992 /* Expand code for the function epilogue. */
7993 void
7994 sh_expand_epilogue (bool sibcall_p)
7996 HARD_REG_SET live_regs_mask;
7997 int d, i;
7998 int d_rounding = 0;
8000 int save_flags = target_flags;
8001 int frame_size, save_size;
8002 int fpscr_deferred = 0;
8003 int e = sibcall_p ? -1 : 1;
8005 d = calc_live_regs (&live_regs_mask);
8007 save_size = d;
8008 frame_size = rounded_frame_size (d);
8010 if (TARGET_SH5)
8012 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
8013 int total_size;
8014 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
8015 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8016 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
8018 total_size = d + tregs_space;
8019 total_size += rounded_frame_size (total_size);
8020 save_size = total_size - frame_size;
8022 /* If adjusting the stack in a single step costs nothing extra, do so.
8023 I.e. either if a single addi is enough, or we need a movi anyway,
8024 and we don't exceed the maximum offset range (the test for the
8025 latter is conservative for simplicity). */
8026 if (TARGET_SHMEDIA
8027 && ! frame_pointer_needed
8028 && (CONST_OK_FOR_I10 (total_size)
8029 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
8030 && total_size <= 2044)))
8031 d_rounding = frame_size;
8033 frame_size -= d_rounding;
8036 if (frame_pointer_needed)
8038 /* We must avoid scheduling the epilogue with previous basic blocks.
8039 See PR/18032 and PR/40313. */
8040 emit_insn (gen_blockage ());
8041 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
8042 &live_regs_mask, true);
8044 /* We must avoid moving the stack pointer adjustment past code
8045 which reads from the local frame, else an interrupt could
8046 occur after the SP adjustment and clobber data in the local
8047 frame. */
8048 emit_insn (gen_blockage ());
8049 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
8051 else if (frame_size)
8053 /* We must avoid moving the stack pointer adjustment past code
8054 which reads from the local frame, else an interrupt could
8055 occur after the SP adjustment and clobber data in the local
8056 frame. */
8057 emit_insn (gen_blockage ());
8058 output_stack_adjust (frame_size, stack_pointer_rtx, e,
8059 &live_regs_mask, true);
8062 if (SHMEDIA_REGS_STACK_ADJUST ())
8064 function_symbol (gen_rtx_REG (Pmode, R0_REG),
8065 (TARGET_FPU_ANY
8066 ? "__GCC_pop_shmedia_regs"
8067 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
8068 /* This must NOT go through the PLT, otherwise mach and macl
8069 may be clobbered. */
8070 emit_insn (gen_shmedia_save_restore_regs_compact
8071 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
8074 /* Pop all the registers. */
8076 if (target_flags != save_flags && ! current_function_interrupt)
8077 emit_insn (gen_toggle_sz ());
8078 if (TARGET_SH5)
8080 int offset_base, offset;
8081 int offset_in_r0 = -1;
8082 int sp_in_r0 = 0;
8083 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
8084 save_schedule schedule;
8085 save_entry *entry;
8086 int *tmp_pnt;
8088 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
8089 offset_base = -entry[1].offset + d_rounding;
8090 tmp_pnt = schedule.temps;
8091 for (; entry->mode != VOIDmode; entry--)
8093 machine_mode mode = (machine_mode) entry->mode;
8094 int reg = entry->reg;
8095 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
8097 offset = offset_base + entry->offset;
8098 reg_rtx = gen_rtx_REG (mode, reg);
8100 mem_rtx = gen_frame_mem (mode,
8101 gen_rtx_PLUS (Pmode,
8102 stack_pointer_rtx,
8103 GEN_INT (offset)));
8105 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
8106 mem_rtx = NULL_RTX;
8108 if (HAVE_POST_INCREMENT
8109 && (offset == offset_in_r0
8110 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
8111 && mem_rtx == NULL_RTX)
8112 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
8114 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
8116 if (!memory_address_p (mode, XEXP (post_inc, 0)))
8117 post_inc = NULL_RTX;
8118 else
8119 mem_rtx = NULL_RTX;
8122 if (mem_rtx != NULL_RTX)
8123 goto addr_ok;
8125 if (offset_in_r0 == -1)
8127 emit_move_insn (r0, GEN_INT (offset));
8128 offset_in_r0 = offset;
8130 else if (offset != offset_in_r0)
8132 emit_move_insn (r0,
8133 gen_rtx_PLUS
8134 (Pmode, r0,
8135 GEN_INT (offset - offset_in_r0)));
8136 offset_in_r0 += offset - offset_in_r0;
8139 if (post_inc != NULL_RTX)
8141 if (! sp_in_r0)
8143 emit_move_insn (r0,
8144 gen_rtx_PLUS
8145 (Pmode, r0, stack_pointer_rtx));
8146 sp_in_r0 = 1;
8149 mem_rtx = post_inc;
8151 offset_in_r0 += GET_MODE_SIZE (mode);
8153 else if (sp_in_r0)
8154 mem_rtx = gen_frame_mem (mode, r0);
8155 else
8156 mem_rtx = gen_frame_mem (mode,
8157 gen_rtx_PLUS (Pmode,
8158 stack_pointer_rtx,
8159 r0));
8161 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8162 || mem_rtx == post_inc);
8164 addr_ok:
8165 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8166 && mem_rtx != post_inc)
8168 emit_move_insn (r0, mem_rtx);
8169 mem_rtx = r0;
8171 else if (TARGET_REGISTER_P (reg))
8173 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8175 /* Give the scheduler a bit of freedom by using up to
8176 MAX_TEMPS registers in a round-robin fashion. */
8177 emit_move_insn (tmp_reg, mem_rtx);
8178 mem_rtx = tmp_reg;
8179 if (*++tmp_pnt < 0)
8180 tmp_pnt = schedule.temps;
8183 emit_move_insn (reg_rtx, mem_rtx);
8186 gcc_assert (entry->offset + offset_base == d + d_rounding);
8188 else /* ! TARGET_SH5 */
8190 int last_reg;
8192 save_size = 0;
8193 /* For an ISR with RESBANK attribute assigned, don't pop PR
8194 register. */
8195 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8196 && !sh_cfun_resbank_handler_p ())
8198 if (!frame_pointer_needed)
8199 emit_insn (gen_blockage ());
8200 pop (PR_REG);
8203 /* Banked registers are popped first to avoid being scheduled in the
8204 delay slot. RTE switches banks before the ds instruction. */
8205 if (current_function_interrupt)
8207 bool use_movml = false;
8209 if (TARGET_SH2A)
8211 unsigned int count = 0;
8213 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8214 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8215 count++;
8216 else
8217 break;
8219 /* Use movml when all banked register are poped. */
8220 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8221 use_movml = true;
8224 if (sh_cfun_resbank_handler_p ())
8225 ; /* Do nothing. */
8226 else if (use_movml)
8228 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8230 /* We must avoid scheduling multiple load insn with another
8231 insns. */
8232 emit_insn (gen_blockage ());
8233 emit_insn (gen_movml_pop_banked (sp_reg));
8234 emit_insn (gen_blockage ());
8236 else
8237 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8238 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8239 pop (i);
8241 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8243 else
8244 last_reg = FIRST_PSEUDO_REGISTER;
8246 for (i = 0; i < last_reg; i++)
8248 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8250 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8251 && hard_reg_set_intersect_p (live_regs_mask,
8252 reg_class_contents[DF_REGS]))
8253 fpscr_deferred = 1;
8254 /* For an ISR with RESBANK attribute assigned, don't pop
8255 following registers, R0-R14, MACH, MACL and GBR. */
8256 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8257 && ! (sh_cfun_resbank_handler_p ()
8258 && ((j >= FIRST_GENERAL_REG
8259 && j < LAST_GENERAL_REG)
8260 || j == MACH_REG
8261 || j == MACL_REG
8262 || j == GBR_REG)))
8263 pop (j);
8265 if (j == FIRST_FP_REG && fpscr_deferred)
8266 pop (FPSCR_REG);
8269 if (target_flags != save_flags && ! current_function_interrupt)
8270 emit_insn (gen_toggle_sz ());
8271 target_flags = save_flags;
8273 output_stack_adjust (crtl->args.pretend_args_size
8274 + save_size + d_rounding
8275 + crtl->args.info.stack_regs * 8,
8276 stack_pointer_rtx, e, NULL, true);
8278 if (crtl->calls_eh_return)
8279 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8280 EH_RETURN_STACKADJ_RTX));
8282 /* Switch back to the normal stack if necessary. */
8283 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8284 emit_insn (gen_sp_switch_2 ());
8286 /* Tell flow the insn that pops PR isn't dead. */
8287 /* PR_REG will never be live in SHmedia mode, and we don't need to
8288 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8289 by the return pattern. */
8290 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8291 emit_use (gen_rtx_REG (SImode, PR_REG));
8294 /* Emit code to change the current function's return address to RA.
8295 TEMP is available as a scratch register, if needed. */
8296 void
8297 sh_set_return_address (rtx ra, rtx tmp)
8299 HARD_REG_SET live_regs_mask;
8300 int d;
8301 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8302 int pr_offset;
8304 d = calc_live_regs (&live_regs_mask);
8306 /* If pr_reg isn't life, we can set it (or the register given in
8307 sh_media_register_for_return) directly. */
8308 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8310 rtx rr;
8312 if (TARGET_SHMEDIA)
8314 int rr_regno = sh_media_register_for_return ();
8316 if (rr_regno < 0)
8317 rr_regno = pr_reg;
8319 rr = gen_rtx_REG (DImode, rr_regno);
8321 else
8322 rr = gen_rtx_REG (SImode, pr_reg);
8324 emit_insn (GEN_MOV (rr, ra));
8325 /* Tell flow the register for return isn't dead. */
8326 emit_use (rr);
8327 return;
8330 if (TARGET_SH5)
8332 int offset;
8333 save_schedule schedule;
8334 save_entry *entry;
8336 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8337 offset = entry[1].offset;
8338 for (; entry->mode != VOIDmode; entry--)
8339 if (entry->reg == pr_reg)
8340 goto found;
8342 /* We can't find pr register. */
8343 gcc_unreachable ();
8345 found:
8346 offset = entry->offset - offset;
8347 pr_offset = (rounded_frame_size (d) + offset
8348 + SHMEDIA_REGS_STACK_ADJUST ());
8350 else
8351 pr_offset = rounded_frame_size (d);
8353 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8355 if (frame_pointer_needed)
8356 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8357 else
8358 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8360 tmp = gen_frame_mem (Pmode, tmp);
8361 emit_insn (GEN_MOV (tmp, ra));
8362 /* Tell this store isn't dead. */
8363 emit_use (tmp);
8366 /* Clear variables at function end. */
8367 static void
8368 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8369 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8373 static rtx
8374 sh_builtin_saveregs (void)
8376 /* First unnamed integer register. */
8377 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8378 /* Number of integer registers we need to save. */
8379 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8380 /* First unnamed SFmode float reg */
8381 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8382 /* Number of SFmode float regs to save. */
8383 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8384 rtx regbuf, fpregs;
8385 int bufsize, regno;
8386 alias_set_type alias_set;
8388 if (TARGET_SH5)
8390 if (n_intregs)
8392 int pushregs = n_intregs;
8394 while (pushregs < NPARM_REGS (SImode) - 1
8395 && (CALL_COOKIE_INT_REG_GET
8396 (crtl->args.info.call_cookie,
8397 NPARM_REGS (SImode) - pushregs)
8398 == 1))
8400 crtl->args.info.call_cookie
8401 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8402 - pushregs, 1);
8403 pushregs++;
8406 if (pushregs == NPARM_REGS (SImode))
8407 crtl->args.info.call_cookie
8408 |= (CALL_COOKIE_INT_REG (0, 1)
8409 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8410 else
8411 crtl->args.info.call_cookie
8412 |= CALL_COOKIE_STACKSEQ (pushregs);
8414 crtl->args.pretend_args_size += 8 * n_intregs;
8416 if (TARGET_SHCOMPACT)
8417 return const0_rtx;
8420 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8422 error ("__builtin_saveregs not supported by this subtarget");
8423 return const0_rtx;
8426 if (TARGET_SHMEDIA)
8427 n_floatregs = 0;
8429 /* Allocate block of memory for the regs. */
8430 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8431 Or can assign_stack_local accept a 0 SIZE argument? */
8432 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8434 if (TARGET_SHMEDIA)
8435 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8436 else if (n_floatregs & 1)
8438 rtx addr;
8440 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8441 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8442 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8443 regbuf = change_address (regbuf, BLKmode, addr);
8445 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8447 rtx addr, mask;
8449 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8450 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8451 XEXP (regbuf, 0), 4));
8452 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8453 emit_insn (gen_andsi3 (addr, addr, mask));
8454 regbuf = change_address (regbuf, BLKmode, addr);
8456 else
8457 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8458 alias_set = get_varargs_alias_set ();
8459 set_mem_alias_set (regbuf, alias_set);
8461 /* Save int args.
8462 This is optimized to only save the regs that are necessary. Explicitly
8463 named args need not be saved. */
8464 if (n_intregs > 0)
8465 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8466 adjust_address (regbuf, BLKmode,
8467 n_floatregs * UNITS_PER_WORD),
8468 n_intregs);
8470 if (TARGET_SHMEDIA)
8471 /* Return the address of the regbuf. */
8472 return XEXP (regbuf, 0);
8474 /* Save float args.
8475 This is optimized to only save the regs that are necessary. Explicitly
8476 named args need not be saved.
8477 We explicitly build a pointer to the buffer because it halves the insn
8478 count when not optimizing (otherwise the pointer is built for each reg
8479 saved).
8480 We emit the moves in reverse order so that we can use predecrement. */
8482 fpregs = copy_to_mode_reg (Pmode,
8483 plus_constant (Pmode, XEXP (regbuf, 0),
8484 n_floatregs * UNITS_PER_WORD));
8485 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8487 rtx mem;
8488 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8490 emit_insn (gen_addsi3 (fpregs, fpregs,
8491 GEN_INT (-2 * UNITS_PER_WORD)));
8492 mem = change_address (regbuf, DFmode, fpregs);
8493 emit_move_insn (mem,
8494 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8496 regno = first_floatreg;
8497 if (regno & 1)
8499 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8500 mem = change_address (regbuf, SFmode, fpregs);
8501 emit_move_insn (mem,
8502 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8503 + regno - SH_REG_MSW_OFFSET));
8506 else
8507 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8509 rtx mem;
8511 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8512 mem = change_address (regbuf, SFmode, fpregs);
8513 emit_move_insn (mem,
8514 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8517 /* Return the address of the regbuf. */
8518 return XEXP (regbuf, 0);
8521 /* Define the `__builtin_va_list' type for the ABI. */
8522 static tree
8523 sh_build_builtin_va_list (void)
8525 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8526 tree record, type_decl;
8528 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8529 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8530 return ptr_type_node;
8532 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8533 type_decl = build_decl (BUILTINS_LOCATION,
8534 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8536 f_next_o = build_decl (BUILTINS_LOCATION,
8537 FIELD_DECL, get_identifier ("__va_next_o"),
8538 ptr_type_node);
8539 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8540 FIELD_DECL,
8541 get_identifier ("__va_next_o_limit"),
8542 ptr_type_node);
8543 f_next_fp = build_decl (BUILTINS_LOCATION,
8544 FIELD_DECL, get_identifier ("__va_next_fp"),
8545 ptr_type_node);
8546 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8547 FIELD_DECL,
8548 get_identifier ("__va_next_fp_limit"),
8549 ptr_type_node);
8550 f_next_stack = build_decl (BUILTINS_LOCATION,
8551 FIELD_DECL, get_identifier ("__va_next_stack"),
8552 ptr_type_node);
8554 DECL_FIELD_CONTEXT (f_next_o) = record;
8555 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8556 DECL_FIELD_CONTEXT (f_next_fp) = record;
8557 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8558 DECL_FIELD_CONTEXT (f_next_stack) = record;
8560 TYPE_STUB_DECL (record) = type_decl;
8561 TYPE_NAME (record) = type_decl;
8562 TYPE_FIELDS (record) = f_next_o;
8563 DECL_CHAIN (f_next_o) = f_next_o_limit;
8564 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8565 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8566 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8568 layout_type (record);
8570 return record;
8573 /* Implement `va_start' for varargs and stdarg. */
8574 static void
8575 sh_va_start (tree valist, rtx nextarg)
8577 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8578 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8579 tree t, u;
8580 int nfp, nint;
8582 if (TARGET_SH5)
8584 expand_builtin_saveregs ();
8585 std_expand_builtin_va_start (valist, nextarg);
8586 return;
8589 if ((! TARGET_SH2E && ! TARGET_SH4)
8590 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8592 std_expand_builtin_va_start (valist, nextarg);
8593 return;
8596 f_next_o = TYPE_FIELDS (va_list_type_node);
8597 f_next_o_limit = DECL_CHAIN (f_next_o);
8598 f_next_fp = DECL_CHAIN (f_next_o_limit);
8599 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8600 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8602 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8603 NULL_TREE);
8604 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8605 valist, f_next_o_limit, NULL_TREE);
8606 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8607 NULL_TREE);
8608 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8609 valist, f_next_fp_limit, NULL_TREE);
8610 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8611 valist, f_next_stack, NULL_TREE);
8613 /* Call __builtin_saveregs. */
8614 u = make_tree (sizetype, expand_builtin_saveregs ());
8615 u = fold_convert (ptr_type_node, u);
8616 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8617 TREE_SIDE_EFFECTS (t) = 1;
8618 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8620 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8621 if (nfp < 8)
8622 nfp = 8 - nfp;
8623 else
8624 nfp = 0;
8625 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8626 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8627 TREE_SIDE_EFFECTS (t) = 1;
8628 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8630 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8631 TREE_SIDE_EFFECTS (t) = 1;
8632 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8634 nint = crtl->args.info.arg_count[SH_ARG_INT];
8635 if (nint < 4)
8636 nint = 4 - nint;
8637 else
8638 nint = 0;
8639 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8640 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8641 TREE_SIDE_EFFECTS (t) = 1;
8642 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8644 u = make_tree (ptr_type_node, nextarg);
8645 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8646 TREE_SIDE_EFFECTS (t) = 1;
8647 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8650 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8651 member, return it. */
8652 static tree
8653 find_sole_member (tree type)
8655 tree field, member = NULL_TREE;
8657 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8659 if (TREE_CODE (field) != FIELD_DECL)
8660 continue;
8661 if (!DECL_SIZE (field))
8662 return NULL_TREE;
8663 if (integer_zerop (DECL_SIZE (field)))
8664 continue;
8665 if (member)
8666 return NULL_TREE;
8667 member = field;
8669 return member;
8672 /* Implement `va_arg'. */
8673 static tree
8674 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8675 gimple_seq *post_p ATTRIBUTE_UNUSED)
8677 HOST_WIDE_INT size, rsize;
8678 tree tmp, pptr_type_node;
8679 tree addr, lab_over = NULL, result = NULL;
8680 bool pass_by_ref;
8681 tree eff_type;
8683 if (!VOID_TYPE_P (type))
8684 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8685 else
8686 pass_by_ref = false;
8688 if (pass_by_ref)
8689 type = build_pointer_type (type);
8691 size = int_size_in_bytes (type);
8692 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8693 pptr_type_node = build_pointer_type (ptr_type_node);
8695 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8696 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8698 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8699 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8700 int pass_as_float;
8701 tree lab_false;
8702 tree member;
8704 f_next_o = TYPE_FIELDS (va_list_type_node);
8705 f_next_o_limit = DECL_CHAIN (f_next_o);
8706 f_next_fp = DECL_CHAIN (f_next_o_limit);
8707 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8708 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8710 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8711 NULL_TREE);
8712 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8713 valist, f_next_o_limit, NULL_TREE);
8714 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8715 valist, f_next_fp, NULL_TREE);
8716 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8717 valist, f_next_fp_limit, NULL_TREE);
8718 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8719 valist, f_next_stack, NULL_TREE);
8721 /* Structures with a single member with a distinct mode are passed
8722 like their member. This is relevant if the latter has a REAL_TYPE
8723 or COMPLEX_TYPE type. */
8724 eff_type = type;
8725 while (TREE_CODE (eff_type) == RECORD_TYPE
8726 && (member = find_sole_member (eff_type))
8727 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8728 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8729 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8731 tree field_type = TREE_TYPE (member);
8733 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8734 eff_type = field_type;
8735 else
8737 gcc_assert ((TYPE_ALIGN (eff_type)
8738 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8739 || (TYPE_ALIGN (eff_type)
8740 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8741 break;
8745 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8747 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8748 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8749 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8750 && size <= 16));
8752 else
8754 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8757 addr = create_tmp_var (pptr_type_node);
8758 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8759 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8761 valist = build_simple_mem_ref (addr);
8763 if (pass_as_float)
8765 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
8766 tree cmp;
8767 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8769 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8770 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8772 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8773 tmp = next_fp_limit;
8774 if (size > 4 && !is_double)
8775 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8776 tmp = build2 (GE_EXPR, boolean_type_node,
8777 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8778 cmp = build3 (COND_EXPR, void_type_node, tmp,
8779 build1 (GOTO_EXPR, void_type_node,
8780 unshare_expr (lab_false)), NULL_TREE);
8781 if (!is_double)
8782 gimplify_and_add (cmp, pre_p);
8784 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8785 || (is_double || size == 16))
8787 tmp = fold_convert (sizetype, next_fp_tmp);
8788 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8789 size_int (UNITS_PER_WORD));
8790 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8791 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8793 if (is_double)
8794 gimplify_and_add (cmp, pre_p);
8796 #ifdef FUNCTION_ARG_SCmode_WART
8797 if (TYPE_MODE (eff_type) == SCmode
8798 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8800 tree subtype = TREE_TYPE (eff_type);
8801 tree real, imag;
8803 imag
8804 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8805 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8807 real
8808 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8809 real = get_initialized_tmp_var (real, pre_p, NULL);
8811 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8812 if (type != eff_type)
8813 result = build1 (VIEW_CONVERT_EXPR, type, result);
8814 result = get_initialized_tmp_var (result, pre_p, NULL);
8816 #endif /* FUNCTION_ARG_SCmode_WART */
8818 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8819 gimplify_and_add (tmp, pre_p);
8821 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8822 gimplify_and_add (tmp, pre_p);
8824 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8825 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8826 gimplify_assign (unshare_expr (next_fp_tmp),
8827 unshare_expr (valist), pre_p);
8829 gimplify_assign (unshare_expr (valist),
8830 unshare_expr (next_fp_tmp), post_p);
8831 valist = next_fp_tmp;
8833 else
8835 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8836 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8837 unshare_expr (next_o_limit));
8838 tmp = build3 (COND_EXPR, void_type_node, tmp,
8839 build1 (GOTO_EXPR, void_type_node,
8840 unshare_expr (lab_false)),
8841 NULL_TREE);
8842 gimplify_and_add (tmp, pre_p);
8844 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8845 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8847 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8848 gimplify_and_add (tmp, pre_p);
8850 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8851 gimplify_and_add (tmp, pre_p);
8853 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8854 gimplify_assign (unshare_expr (next_o),
8855 unshare_expr (next_o_limit), pre_p);
8857 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8858 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8861 if (!result)
8863 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8864 gimplify_and_add (tmp, pre_p);
8868 /* ??? In va-sh.h, there had been code to make values larger than
8869 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8871 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8872 if (result)
8874 gimplify_assign (result, tmp, pre_p);
8875 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8876 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8877 gimplify_and_add (tmp, pre_p);
8879 else
8880 result = tmp;
8882 if (pass_by_ref)
8883 result = build_va_arg_indirect_ref (result);
8885 return result;
8888 /* 64 bit floating points memory transfers are paired single precision loads
8889 or store. So DWARF information needs fixing in little endian (unless
8890 PR=SZ=1 in FPSCR). */
8892 sh_dwarf_register_span (rtx reg)
8894 unsigned regno = REGNO (reg);
8896 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8897 return NULL_RTX;
8899 return
8900 gen_rtx_PARALLEL (VOIDmode,
8901 gen_rtvec (2,
8902 gen_rtx_REG (SFmode, regno + 1),
8903 gen_rtx_REG (SFmode, regno)));
8906 static machine_mode
8907 sh_promote_function_mode (const_tree type, machine_mode mode,
8908 int *punsignedp, const_tree funtype,
8909 int for_return)
8911 if (sh_promote_prototypes (funtype))
8912 return promote_mode (type, mode, punsignedp);
8913 else
8914 return default_promote_function_mode (type, mode, punsignedp, funtype,
8915 for_return);
8918 static bool
8919 sh_promote_prototypes (const_tree type)
8921 if (TARGET_HITACHI)
8922 return false;
8923 if (! type)
8924 return true;
8925 return ! sh_attr_renesas_p (type);
8928 /* Whether an argument must be passed by reference. On SHcompact, we
8929 pretend arguments wider than 32-bits that would have been passed in
8930 registers are passed by reference, so that an SHmedia trampoline
8931 loads them into the full 64-bits registers. */
8932 static int
8933 shcompact_byref (const CUMULATIVE_ARGS *cum, machine_mode mode,
8934 const_tree type, bool named)
8936 unsigned HOST_WIDE_INT size;
8938 if (type)
8939 size = int_size_in_bytes (type);
8940 else
8941 size = GET_MODE_SIZE (mode);
8943 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8944 && (!named
8945 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8946 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8947 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8948 && size > 4
8949 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8950 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8951 return size;
8952 else
8953 return 0;
8956 static bool
8957 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8958 const_tree type, bool named)
8960 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8962 if (targetm.calls.must_pass_in_stack (mode, type))
8963 return true;
8965 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8966 wants to know about pass-by-reference semantics for incoming
8967 arguments. */
8968 if (! cum)
8969 return false;
8971 if (TARGET_SHCOMPACT)
8973 cum->byref = shcompact_byref (cum, mode, type, named);
8974 return cum->byref != 0;
8977 return false;
8980 static bool
8981 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
8982 const_tree type, bool named ATTRIBUTE_UNUSED)
8984 /* ??? How can it possibly be correct to return true only on the
8985 caller side of the equation? Is there someplace else in the
8986 sh backend that's magically producing the copies? */
8987 return (get_cumulative_args (cum)->outgoing
8988 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8989 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8992 /* Round a register number up to a proper boundary for an arg of mode
8993 MODE.
8994 The SH doesn't care about double alignment, so we only
8995 round doubles to even regs when asked to explicitly. */
8996 static int
8997 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
8999 /* FIXME: This used to be a macro and has been copy pasted into this
9000 function as is. Make this more readable. */
9001 return
9002 (((TARGET_ALIGN_DOUBLE
9003 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9004 && (mode == DFmode || mode == DCmode)
9005 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
9006 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
9007 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
9008 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
9009 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
9012 /* Return true if arg of the specified mode should be be passed in a register
9013 or false otherwise. */
9014 static bool
9015 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
9016 const_tree type)
9018 /* FIXME: This used to be a macro and has been copy pasted into this
9019 function as is. Make this more readable. */
9020 return
9021 ((type == 0
9022 || (! TREE_ADDRESSABLE (type)
9023 && (! (TARGET_HITACHI || cum.renesas_abi)
9024 || ! (AGGREGATE_TYPE_P (type)
9025 || (!TARGET_FPU_ANY
9026 && (GET_MODE_CLASS (mode) == MODE_FLOAT
9027 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
9028 && ! cum.force_mem
9029 && (TARGET_SH2E
9030 ? ((mode) == BLKmode
9031 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
9032 + int_size_in_bytes (type))
9033 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
9034 : ((sh_round_reg (cum, mode)
9035 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
9036 <= NPARM_REGS (mode)))
9037 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
9040 static int
9041 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9042 tree type, bool named ATTRIBUTE_UNUSED)
9044 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9045 int words = 0;
9047 if (!TARGET_SH5
9048 && sh_pass_in_reg_p (*cum, mode, type)
9049 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
9050 && (sh_round_reg (*cum, mode)
9051 + (mode != BLKmode
9052 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
9053 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
9054 > NPARM_REGS (mode)))
9055 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
9057 else if (!TARGET_SHCOMPACT
9058 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
9059 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
9061 return words * UNITS_PER_WORD;
9065 /* Define where to put the arguments to a function.
9066 Value is zero to push the argument on the stack,
9067 or a hard register in which to store the argument.
9069 MODE is the argument's machine mode.
9070 TYPE is the data type of the argument (as a tree).
9071 This is null for libcalls where that information may
9072 not be available.
9073 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9074 the preceding args and about the function being called.
9075 NAMED is nonzero if this argument is a named parameter
9076 (otherwise it is an extra parameter matching an ellipsis).
9078 On SH the first args are normally in registers
9079 and the rest are pushed. Any arg that starts within the first
9080 NPARM_REGS words is at least partially passed in a register unless
9081 its data type forbids. */
9082 static rtx
9083 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
9084 const_tree type, bool named)
9086 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9088 if (! TARGET_SH5 && mode == VOIDmode)
9089 return GEN_INT (ca->renesas_abi ? 1 : 0);
9091 if (! TARGET_SH5
9092 && sh_pass_in_reg_p (*ca, mode, type)
9093 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
9095 int regno;
9097 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
9098 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
9100 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
9101 gen_rtx_REG (SFmode,
9102 BASE_ARG_REG (mode)
9103 + (sh_round_reg (*ca, mode) ^ 1)),
9104 const0_rtx);
9105 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
9106 gen_rtx_REG (SFmode,
9107 BASE_ARG_REG (mode)
9108 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
9109 GEN_INT (4));
9110 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
9113 /* If the alignment of a DF value causes an SF register to be
9114 skipped, we will use that skipped register for the next SF
9115 value. */
9116 if ((TARGET_HITACHI || ca->renesas_abi)
9117 && ca->free_single_fp_reg
9118 && mode == SFmode)
9119 return gen_rtx_REG (mode, ca->free_single_fp_reg);
9121 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
9122 ^ (mode == SFmode && TARGET_SH4
9123 && TARGET_LITTLE_ENDIAN
9124 && ! TARGET_HITACHI && ! ca->renesas_abi);
9125 return gen_rtx_REG (mode, regno);
9129 if (TARGET_SH5)
9131 if (mode == VOIDmode && TARGET_SHCOMPACT)
9132 return GEN_INT (ca->call_cookie);
9134 /* The following test assumes unnamed arguments are promoted to
9135 DFmode. */
9136 if (mode == SFmode && ca->free_single_fp_reg)
9137 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
9139 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9140 && (named || ! ca->prototype_p)
9141 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9143 if (! ca->prototype_p && TARGET_SHMEDIA)
9144 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9146 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9147 FIRST_FP_PARM_REG
9148 + ca->arg_count[(int) SH_ARG_FLOAT]);
9151 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9152 && (! TARGET_SHCOMPACT
9153 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9154 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9155 type, named))))
9157 return gen_rtx_REG (mode, (FIRST_PARM_REG
9158 + ca->arg_count[(int) SH_ARG_INT]));
9161 return NULL_RTX;
9164 return NULL_RTX;
9167 /* Update the data in CUM to advance over an argument
9168 of mode MODE and data type TYPE.
9169 (TYPE is null for libcalls where that information may not be
9170 available.) */
9171 static void
9172 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
9173 const_tree type, bool named)
9175 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9177 if (ca->force_mem)
9178 ca->force_mem = 0;
9179 else if (TARGET_SH5)
9181 const_tree type2 = (ca->byref && type
9182 ? TREE_TYPE (type)
9183 : type);
9184 machine_mode mode2 = (ca->byref && type
9185 ? TYPE_MODE (type2)
9186 : mode);
9187 int dwords = ((ca->byref
9188 ? ca->byref
9189 : mode2 == BLKmode
9190 ? int_size_in_bytes (type2)
9191 : GET_MODE_SIZE (mode2)) + 7) / 8;
9192 int numregs = MIN (dwords, NPARM_REGS (SImode)
9193 - ca->arg_count[(int) SH_ARG_INT]);
9195 if (numregs)
9197 ca->arg_count[(int) SH_ARG_INT] += numregs;
9198 if (TARGET_SHCOMPACT
9199 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9201 ca->call_cookie
9202 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9203 - numregs, 1);
9204 /* N.B. We want this also for outgoing. */
9205 ca->stack_regs += numregs;
9207 else if (ca->byref)
9209 if (! ca->outgoing)
9210 ca->stack_regs += numregs;
9211 ca->byref_regs += numregs;
9212 ca->byref = 0;
9214 ca->call_cookie
9215 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9216 - numregs, 2);
9217 while (--numregs);
9218 ca->call_cookie
9219 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9220 - 1, 1);
9222 else if (dwords > numregs)
9224 int pushregs = numregs;
9226 if (TARGET_SHCOMPACT)
9227 ca->stack_regs += numregs;
9228 while (pushregs < NPARM_REGS (SImode) - 1
9229 && (CALL_COOKIE_INT_REG_GET
9230 (ca->call_cookie,
9231 NPARM_REGS (SImode) - pushregs)
9232 == 1))
9234 ca->call_cookie
9235 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9236 - pushregs, 1);
9237 pushregs++;
9239 if (numregs == NPARM_REGS (SImode))
9240 ca->call_cookie
9241 |= CALL_COOKIE_INT_REG (0, 1)
9242 | CALL_COOKIE_STACKSEQ (numregs - 1);
9243 else
9244 ca->call_cookie
9245 |= CALL_COOKIE_STACKSEQ (numregs);
9248 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9249 && (named || ! ca->prototype_p))
9251 if (mode2 == SFmode && ca->free_single_fp_reg)
9252 ca->free_single_fp_reg = 0;
9253 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9254 < NPARM_REGS (SFmode))
9256 int numfpregs
9257 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9258 NPARM_REGS (SFmode)
9259 - ca->arg_count[(int) SH_ARG_FLOAT]);
9261 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9263 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9265 if (ca->outgoing && numregs > 0)
9268 ca->call_cookie
9269 |= (CALL_COOKIE_INT_REG
9270 (ca->arg_count[(int) SH_ARG_INT]
9271 - numregs + ((numfpregs - 2) / 2),
9272 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9273 - numfpregs) / 2));
9275 while (numfpregs -= 2);
9277 else if (mode2 == SFmode && (named)
9278 && (ca->arg_count[(int) SH_ARG_FLOAT]
9279 < NPARM_REGS (SFmode)))
9280 ca->free_single_fp_reg
9281 = FIRST_FP_PARM_REG - numfpregs
9282 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9285 return;
9288 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9290 /* Note that we've used the skipped register. */
9291 if (mode == SFmode && ca->free_single_fp_reg)
9293 ca->free_single_fp_reg = 0;
9294 return;
9296 /* When we have a DF after an SF, there's an SF register that get
9297 skipped in order to align the DF value. We note this skipped
9298 register, because the next SF value will use it, and not the
9299 SF that follows the DF. */
9300 if (mode == DFmode
9301 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9303 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9304 + BASE_ARG_REG (mode));
9308 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9309 || sh_pass_in_reg_p (*ca, mode, type))
9310 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9311 = (sh_round_reg (*ca, mode)
9312 + (mode == BLKmode
9313 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9314 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9317 /* The Renesas calling convention doesn't quite fit into this scheme since
9318 the address is passed like an invisible argument, but one that is always
9319 passed in memory. */
9320 static rtx
9321 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9323 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9324 return NULL_RTX;
9325 return gen_rtx_REG (Pmode, 2);
9328 /* Worker function for TARGET_FUNCTION_VALUE.
9330 For the SH, this is like LIBCALL_VALUE, except that we must change the
9331 mode like PROMOTE_MODE does.
9332 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9333 tested here has to be kept in sync with the one in
9334 explow.c:promote_mode. */
9335 static rtx
9336 sh_function_value (const_tree valtype,
9337 const_tree fn_decl_or_type,
9338 bool outgoing ATTRIBUTE_UNUSED)
9340 if (fn_decl_or_type
9341 && !DECL_P (fn_decl_or_type))
9342 fn_decl_or_type = NULL;
9344 return gen_rtx_REG (
9345 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9346 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9347 && (TREE_CODE (valtype) == INTEGER_TYPE
9348 || TREE_CODE (valtype) == ENUMERAL_TYPE
9349 || TREE_CODE (valtype) == BOOLEAN_TYPE
9350 || TREE_CODE (valtype) == REAL_TYPE
9351 || TREE_CODE (valtype) == OFFSET_TYPE))
9352 && sh_promote_prototypes (fn_decl_or_type)
9353 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9354 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9357 /* Worker function for TARGET_LIBCALL_VALUE. */
9358 static rtx
9359 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9361 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9364 /* Return true if N is a possible register number of function value. */
9365 static bool
9366 sh_function_value_regno_p (const unsigned int regno)
9368 return ((regno) == FIRST_RET_REG
9369 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9370 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9373 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9374 static bool
9375 sh_return_in_memory (const_tree type, const_tree fndecl)
9377 if (TARGET_SH5)
9379 if (TYPE_MODE (type) == BLKmode)
9380 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9381 else
9382 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9384 else
9386 return (TYPE_MODE (type) == BLKmode
9387 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9388 && TREE_CODE (type) == RECORD_TYPE));
9392 /* We actually emit the code in sh_expand_prologue. We used to use
9393 a static variable to flag that we need to emit this code, but that
9394 doesn't when inlining, when functions are deferred and then emitted
9395 later. Fortunately, we already have two flags that are part of struct
9396 function that tell if a function uses varargs or stdarg. */
9397 static void
9398 sh_setup_incoming_varargs (cumulative_args_t ca,
9399 machine_mode mode,
9400 tree type,
9401 int *pretend_arg_size,
9402 int second_time ATTRIBUTE_UNUSED)
9404 gcc_assert (cfun->stdarg);
9405 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9407 int named_parm_regs, anon_parm_regs;
9409 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9410 + (mode == BLKmode
9411 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9412 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9413 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9414 if (anon_parm_regs > 0)
9415 *pretend_arg_size = anon_parm_regs * 4;
9419 static bool
9420 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9422 return TARGET_SH5;
9425 static bool
9426 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9428 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9430 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9434 /* Define the offset between two registers, one to be eliminated, and
9435 the other its replacement, at the start of a routine. */
9437 initial_elimination_offset (int from, int to)
9439 int regs_saved;
9440 int regs_saved_rounding = 0;
9441 int total_saved_regs_space;
9442 int total_auto_space;
9443 int save_flags = target_flags;
9444 int copy_flags;
9445 HARD_REG_SET live_regs_mask;
9447 shmedia_space_reserved_for_target_registers = false;
9448 regs_saved = calc_live_regs (&live_regs_mask);
9449 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9451 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9453 shmedia_space_reserved_for_target_registers = true;
9454 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9457 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9458 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9459 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9461 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9462 copy_flags = target_flags;
9463 target_flags = save_flags;
9465 total_saved_regs_space = regs_saved + regs_saved_rounding;
9467 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9468 return total_saved_regs_space + total_auto_space
9469 + crtl->args.info.byref_regs * 8;
9471 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9472 return total_saved_regs_space + total_auto_space
9473 + crtl->args.info.byref_regs * 8;
9475 /* Initial gap between fp and sp is 0. */
9476 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9477 return 0;
9479 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9480 return rounded_frame_size (0);
9482 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9483 return rounded_frame_size (0);
9485 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9486 && (to == HARD_FRAME_POINTER_REGNUM
9487 || to == STACK_POINTER_REGNUM));
9488 if (TARGET_SH5)
9490 int n = total_saved_regs_space;
9491 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9492 save_schedule schedule;
9493 save_entry *entry;
9495 n += total_auto_space;
9497 /* If it wasn't saved, there's not much we can do. */
9498 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9499 return n;
9501 target_flags = copy_flags;
9503 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9504 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9505 if (entry->reg == pr_reg)
9507 target_flags = save_flags;
9508 return entry->offset;
9510 gcc_unreachable ();
9512 else
9513 return total_auto_space;
9516 /* Parse the -mfixed-range= option string. */
9517 void
9518 sh_fix_range (const char *const_str)
9520 int i, first, last;
9521 char *str, *dash, *comma;
9523 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9524 REG2 are either register names or register numbers. The effect
9525 of this option is to mark the registers in the range from REG1 to
9526 REG2 as ``fixed'' so they won't be used by the compiler. */
9528 i = strlen (const_str);
9529 str = (char *) alloca (i + 1);
9530 memcpy (str, const_str, i + 1);
9532 while (1)
9534 dash = strchr (str, '-');
9535 if (!dash)
9537 warning (0, "value of -mfixed-range must have form REG1-REG2");
9538 return;
9540 *dash = '\0';
9541 comma = strchr (dash + 1, ',');
9542 if (comma)
9543 *comma = '\0';
9545 first = decode_reg_name (str);
9546 if (first < 0)
9548 warning (0, "unknown register name: %s", str);
9549 return;
9552 last = decode_reg_name (dash + 1);
9553 if (last < 0)
9555 warning (0, "unknown register name: %s", dash + 1);
9556 return;
9559 *dash = '-';
9561 if (first > last)
9563 warning (0, "%s-%s is an empty range", str, dash + 1);
9564 return;
9567 for (i = first; i <= last; ++i)
9568 fixed_regs[i] = call_used_regs[i] = 1;
9570 if (!comma)
9571 break;
9573 *comma = ',';
9574 str = comma + 1;
9578 /* Insert any deferred function attributes from earlier pragmas. */
9579 static void
9580 sh_insert_attributes (tree node, tree *attributes)
9582 tree attrs;
9584 if (TREE_CODE (node) != FUNCTION_DECL)
9585 return;
9587 /* We are only interested in fields. */
9588 if (!DECL_P (node))
9589 return;
9591 /* Append the attributes to the deferred attributes. */
9592 *sh_deferred_function_attributes_tail = *attributes;
9593 attrs = sh_deferred_function_attributes;
9594 if (!attrs)
9595 return;
9597 /* Some attributes imply or require the interrupt attribute. */
9598 if (!lookup_attribute ("interrupt_handler", attrs)
9599 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9601 /* If we have a trapa_handler, but no interrupt_handler attribute,
9602 insert an interrupt_handler attribute. */
9603 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9604 /* We can't use sh_pr_interrupt here because that's not in the
9605 java frontend. */
9606 attrs
9607 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9608 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9609 if the interrupt attribute is missing, we ignore the attribute
9610 and warn. */
9611 else if (lookup_attribute ("sp_switch", attrs)
9612 || lookup_attribute ("trap_exit", attrs)
9613 || lookup_attribute ("nosave_low_regs", attrs)
9614 || lookup_attribute ("resbank", attrs))
9616 tree *tail;
9618 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9620 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9621 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9622 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9623 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9624 warning (OPT_Wattributes,
9625 "%qE attribute only applies to interrupt functions",
9626 TREE_PURPOSE (attrs));
9627 else
9629 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9630 NULL_TREE);
9631 tail = &TREE_CHAIN (*tail);
9634 attrs = *attributes;
9638 /* Install the processed list. */
9639 *attributes = attrs;
9641 /* Clear deferred attributes. */
9642 sh_deferred_function_attributes = NULL_TREE;
9643 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9645 return;
9648 /*------------------------------------------------------------------------------
9649 Target specific attributes
9650 Supported attributes are:
9652 * interrupt_handler
9653 Specifies this function is an interrupt handler.
9655 * trapa_handler
9656 Like interrupt_handler, but don't save all registers.
9658 * sp_switch
9659 Specifies an alternate stack for an interrupt handler to run on.
9661 * trap_exit
9662 Use a trapa to exit an interrupt function instead of rte.
9664 * nosave_low_regs
9665 Don't save r0..r7 in an interrupt handler function.
9666 This is useful on SH3* and SH4*, which have a separate set of low
9667 regs for user and privileged modes.
9668 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9669 those that run with interrupts disabled and thus can't be
9670 interrupted thenselves).
9672 * renesas
9673 Use Renesas calling/layout conventions (functions and structures).
9675 * resbank
9676 In case of an interrupt handler function, use a register bank to
9677 save registers R0-R14, MACH, MACL, GBR and PR.
9678 This is available only on SH2A targets.
9680 * function_vector
9681 Declares a function to be called using the TBR relative addressing
9682 mode. Takes an argument that specifies the slot number in the table
9683 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9686 /* Handle a 'resbank' attribute. */
9687 static tree
9688 sh_handle_resbank_handler_attribute (tree * node, tree name,
9689 tree args ATTRIBUTE_UNUSED,
9690 int flags ATTRIBUTE_UNUSED,
9691 bool * no_add_attrs)
9693 if (!TARGET_SH2A)
9695 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9696 name);
9697 *no_add_attrs = true;
9699 if (TREE_CODE (*node) != FUNCTION_DECL)
9701 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9702 name);
9703 *no_add_attrs = true;
9706 return NULL_TREE;
9709 /* Handle an "interrupt_handler" attribute; arguments as in
9710 struct attribute_spec.handler. */
9711 static tree
9712 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9713 tree args ATTRIBUTE_UNUSED,
9714 int flags ATTRIBUTE_UNUSED,
9715 bool *no_add_attrs)
9717 if (TREE_CODE (*node) != FUNCTION_DECL)
9719 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9720 name);
9721 *no_add_attrs = true;
9723 else if (TARGET_SHCOMPACT)
9725 error ("attribute interrupt_handler is not compatible with -m5-compact");
9726 *no_add_attrs = true;
9729 return NULL_TREE;
9732 /* Handle an 'function_vector' attribute; arguments as in
9733 struct attribute_spec.handler. */
9734 static tree
9735 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9736 tree args ATTRIBUTE_UNUSED,
9737 int flags ATTRIBUTE_UNUSED,
9738 bool * no_add_attrs)
9740 if (!TARGET_SH2A)
9742 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9743 name);
9744 *no_add_attrs = true;
9746 else if (TREE_CODE (*node) != FUNCTION_DECL)
9748 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9749 name);
9750 *no_add_attrs = true;
9752 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9754 /* The argument must be a constant integer. */
9755 warning (OPT_Wattributes,
9756 "%qE attribute argument not an integer constant",
9757 name);
9758 *no_add_attrs = true;
9760 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9762 /* The argument value must be between 0 to 255. */
9763 warning (OPT_Wattributes,
9764 "%qE attribute argument should be between 0 to 255",
9765 name);
9766 *no_add_attrs = true;
9768 return NULL_TREE;
9771 /* Returns true if current function has been assigned the attribute
9772 'function_vector'. */
9773 bool
9774 sh2a_is_function_vector_call (rtx x)
9776 if (GET_CODE (x) == SYMBOL_REF
9777 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9779 tree tr = SYMBOL_REF_DECL (x);
9781 if (sh2a_function_vector_p (tr))
9782 return true;
9785 return false;
9788 /* Returns the function vector number, if the attribute
9789 'function_vector' is assigned, otherwise returns zero. */
9791 sh2a_get_function_vector_number (rtx x)
9793 int num;
9794 tree list, t;
9796 if ((GET_CODE (x) == SYMBOL_REF)
9797 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9799 t = SYMBOL_REF_DECL (x);
9801 if (TREE_CODE (t) != FUNCTION_DECL)
9802 return 0;
9804 list = SH_ATTRIBUTES (t);
9805 while (list)
9807 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9809 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9810 return num;
9813 list = TREE_CHAIN (list);
9816 return 0;
9818 else
9819 return 0;
9822 /* Handle an "sp_switch" attribute; arguments as in
9823 struct attribute_spec.handler. */
9824 static tree
9825 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9826 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9828 if (TREE_CODE (*node) != FUNCTION_DECL)
9830 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9831 name);
9832 *no_add_attrs = true;
9834 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9836 /* The argument must be a constant string. */
9837 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9838 name);
9839 *no_add_attrs = true;
9842 return NULL_TREE;
9845 /* Handle an "trap_exit" attribute; arguments as in
9846 struct attribute_spec.handler. */
9847 static tree
9848 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9849 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9851 if (TREE_CODE (*node) != FUNCTION_DECL)
9853 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9854 name);
9855 *no_add_attrs = true;
9857 /* The argument specifies a trap number to be used in a trapa instruction
9858 at function exit (instead of an rte instruction). */
9859 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9861 /* The argument must be a constant integer. */
9862 warning (OPT_Wattributes, "%qE attribute argument not an "
9863 "integer constant", name);
9864 *no_add_attrs = true;
9867 return NULL_TREE;
9870 static tree
9871 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9872 tree name ATTRIBUTE_UNUSED,
9873 tree args ATTRIBUTE_UNUSED,
9874 int flags ATTRIBUTE_UNUSED,
9875 bool *no_add_attrs ATTRIBUTE_UNUSED)
9877 return NULL_TREE;
9880 /* True if __attribute__((renesas)) or -mrenesas. */
9881 bool
9882 sh_attr_renesas_p (const_tree td)
9884 if (TARGET_HITACHI)
9885 return true;
9886 if (td == NULL_TREE)
9887 return false;
9888 if (DECL_P (td))
9889 td = TREE_TYPE (td);
9890 if (td == error_mark_node)
9891 return false;
9892 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9893 != NULL_TREE);
9896 /* True if __attribute__((renesas)) or -mrenesas, for the current
9897 function. */
9898 bool
9899 sh_cfun_attr_renesas_p (void)
9901 return sh_attr_renesas_p (current_function_decl);
9904 /* Returns true if the current function has the "interrupt_handler"
9905 attribute set. */
9906 bool
9907 sh_cfun_interrupt_handler_p (void)
9909 return (lookup_attribute ("interrupt_handler",
9910 DECL_ATTRIBUTES (current_function_decl))
9911 != NULL_TREE);
9914 /* Returns true if FUNC has been assigned the attribute
9915 "function_vector". */
9916 bool
9917 sh2a_function_vector_p (tree func)
9919 tree list;
9920 if (TREE_CODE (func) != FUNCTION_DECL)
9921 return false;
9923 list = SH_ATTRIBUTES (func);
9924 while (list)
9926 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9927 return true;
9929 list = TREE_CHAIN (list);
9931 return false;
9934 /* Returns true if given tree has the "resbank" attribute set. */
9935 bool
9936 sh_cfun_resbank_handler_p (void)
9938 return ((lookup_attribute ("resbank",
9939 DECL_ATTRIBUTES (current_function_decl))
9940 != NULL_TREE)
9941 && (lookup_attribute ("interrupt_handler",
9942 DECL_ATTRIBUTES (current_function_decl))
9943 != NULL_TREE) && TARGET_SH2A);
9946 /* Returns true if the current function has a "trap_exit" attribute set. */
9947 bool
9948 sh_cfun_trap_exit_p (void)
9950 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9951 != NULL_TREE;
9954 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9955 static const char *
9956 sh_check_pch_target_flags (int old_flags)
9958 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9959 | MASK_SH_E | MASK_HARD_SH4
9960 | MASK_FPU_SINGLE | MASK_SH4))
9961 return _("created and used with different architectures / ABIs");
9962 if ((old_flags ^ target_flags) & MASK_HITACHI)
9963 return _("created and used with different ABIs");
9964 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9965 return _("created and used with different endianness");
9966 return NULL;
9969 /* Predicates used by the templates. */
9971 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9972 Used only in general_movsrc_operand. */
9973 bool
9974 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9976 switch (REGNO (op))
9978 case PR_REG:
9979 case MACL_REG:
9980 case MACH_REG:
9981 return true;
9983 return false;
9986 /* Returns true if OP is a floating point value with value 0.0. */
9987 bool
9988 fp_zero_operand (rtx op)
9990 REAL_VALUE_TYPE r;
9992 if (GET_MODE (op) != SFmode)
9993 return false;
9995 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9996 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9999 /* Returns true if OP is a floating point value with value 1.0. */
10000 bool
10001 fp_one_operand (rtx op)
10003 REAL_VALUE_TYPE r;
10005 if (GET_MODE (op) != SFmode)
10006 return false;
10008 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10009 return REAL_VALUES_EQUAL (r, dconst1);
10012 /* Return the TLS type for TLS symbols. */
10013 enum tls_model
10014 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
10016 if (GET_CODE (op) != SYMBOL_REF)
10017 return TLS_MODEL_NONE;
10018 return SYMBOL_REF_TLS_MODEL (op);
10021 /* Return the destination address of a branch. */
10022 static int
10023 branch_dest (rtx branch)
10025 rtx dest = SET_SRC (PATTERN (branch));
10026 int dest_uid;
10028 if (GET_CODE (dest) == IF_THEN_ELSE)
10029 dest = XEXP (dest, 1);
10030 dest = XEXP (dest, 0);
10031 dest_uid = INSN_UID (dest);
10032 return INSN_ADDRESSES (dest_uid);
10035 /* Return nonzero if REG is not used after INSN.
10036 We assume REG is a reload reg, and therefore does
10037 not live past labels. It may live past calls or jumps though. */
10038 bool
10039 reg_unused_after (rtx reg, rtx_insn *insn)
10041 enum rtx_code code;
10042 rtx set;
10044 /* If the reg is set by this instruction, then it is safe for our
10045 case. Disregard the case where this is a store to memory, since
10046 we are checking a register used in the store address. */
10047 set = single_set (insn);
10048 if (set && !MEM_P (SET_DEST (set))
10049 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10050 return true;
10052 while ((insn = NEXT_INSN (insn)))
10054 rtx set;
10055 if (!INSN_P (insn))
10056 continue;
10058 code = GET_CODE (insn);
10060 #if 0
10061 /* If this is a label that existed before reload, then the register
10062 is dead here. However, if this is a label added by reorg, then
10063 the register may still be live here. We can't tell the difference,
10064 so we just ignore labels completely. */
10065 if (code == CODE_LABEL)
10066 return 1;
10067 /* else */
10068 #endif
10070 if (code == JUMP_INSN)
10071 return false;
10073 /* If this is a sequence, we must handle them all at once.
10074 We could have for instance a call that sets the target register,
10075 and an insn in a delay slot that uses the register. In this case,
10076 we must return 0. */
10077 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
10079 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
10080 int i;
10081 int retval = 0;
10083 for (i = 0; i < seq->len (); i++)
10085 rtx_insn *this_insn = seq->insn (i);
10086 rtx set = single_set (this_insn);
10088 if (CALL_P (this_insn))
10089 code = CALL_INSN;
10090 else if (JUMP_P (this_insn))
10092 if (INSN_ANNULLED_BRANCH_P (this_insn))
10093 return false;
10094 code = JUMP_INSN;
10097 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10098 return false;
10099 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10101 if (!MEM_P (SET_DEST (set)))
10102 retval = true;
10103 else
10104 return false;
10106 if (set == NULL_RTX
10107 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
10108 return false;
10110 if (retval == 1)
10111 return true;
10112 else if (code == JUMP_INSN)
10113 return false;
10116 set = single_set (insn);
10117 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10118 return false;
10119 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10120 return !MEM_P (SET_DEST (set));
10121 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
10122 return false;
10124 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10125 return true;
10127 return true;
10131 static GTY(()) rtx t_reg_rtx;
10133 get_t_reg_rtx (void)
10135 if (! t_reg_rtx)
10136 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10137 return t_reg_rtx;
10140 static GTY(()) tree fpscr_values;
10142 static void
10143 emit_fpu_switch (rtx scratch, int index)
10145 rtx src;
10147 if (fpscr_values == NULL)
10149 tree t;
10151 t = build_index_type (integer_one_node);
10152 t = build_array_type (integer_type_node, t);
10153 t = build_decl (BUILTINS_LOCATION,
10154 VAR_DECL, get_identifier ("__fpscr_values"), t);
10155 DECL_ARTIFICIAL (t) = 1;
10156 DECL_IGNORED_P (t) = 1;
10157 DECL_EXTERNAL (t) = 1;
10158 TREE_STATIC (t) = 1;
10159 TREE_PUBLIC (t) = 1;
10160 TREE_USED (t) = 1;
10162 fpscr_values = t;
10165 src = DECL_RTL (fpscr_values);
10166 if (!can_create_pseudo_p ())
10168 emit_move_insn (scratch, XEXP (src, 0));
10169 if (index != 0)
10170 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10171 src = adjust_automodify_address (src, SImode, scratch, index * 4);
10173 else
10174 src = adjust_address (src, SImode, index * 4);
10176 emit_insn (gen_lds_fpscr (src));
10179 static rtx get_free_reg (HARD_REG_SET);
10181 /* This function returns a register to use to load the address to load
10182 the fpscr from. Currently it always returns r1 or r7, but when we are
10183 able to use pseudo registers after combine, or have a better mechanism
10184 for choosing a register, it should be done here. */
10185 /* REGS_LIVE is the liveness information for the point for which we
10186 need this allocation. In some bare-bones exit blocks, r1 is live at the
10187 start. We can even have all of r0..r3 being live:
10188 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10189 INSN before which new insns are placed with will clobber the register
10190 we return. If a basic block consists only of setting the return value
10191 register to a pseudo and using that register, the return value is not
10192 live before or after this block, yet we we'll insert our insns right in
10193 the middle. */
10194 static rtx
10195 get_free_reg (HARD_REG_SET regs_live)
10197 if (! TEST_HARD_REG_BIT (regs_live, 1))
10198 return gen_rtx_REG (Pmode, 1);
10200 /* Hard reg 1 is live; since this is a small register classes target,
10201 there shouldn't be anything but a jump before the function end. */
10202 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10203 return gen_rtx_REG (Pmode, 7);
10206 /* This function will set the fpscr from memory.
10207 MODE is the mode we are setting it to. */
10208 void
10209 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10211 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10212 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10213 rtx addr_reg;
10215 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10216 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10219 /* Is the given character a logical line separator for the assembler? */
10220 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10221 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10222 #endif
10224 static bool
10225 sequence_insn_p (rtx_insn *insn)
10227 rtx_insn *prev, *next;
10229 prev = PREV_INSN (insn);
10230 if (prev == NULL)
10231 return false;
10233 next = NEXT_INSN (prev);
10234 if (next == NULL)
10235 return false;
10237 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10241 sh_insn_length_adjustment (rtx_insn *insn)
10243 /* Instructions with unfilled delay slots take up an extra two bytes for
10244 the nop in the delay slot. */
10245 if (((NONJUMP_INSN_P (insn)
10246 && GET_CODE (PATTERN (insn)) != USE
10247 && GET_CODE (PATTERN (insn)) != CLOBBER)
10248 || CALL_P (insn) || JUMP_P (insn))
10249 && ! sequence_insn_p (insn)
10250 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10251 return 2;
10253 /* Increase the insn length of a cbranch without a delay slot insn to
10254 force a delay slot which will be stuffed with a nop. */
10255 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
10256 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
10257 && ! sequence_insn_p (insn))
10258 return 2;
10260 /* sh-dsp parallel processing insn take four bytes instead of two. */
10262 if (NONJUMP_INSN_P (insn))
10264 int sum = 0;
10265 rtx body = PATTERN (insn);
10266 const char *templ;
10267 char c;
10268 bool maybe_label = true;
10270 if (GET_CODE (body) == ASM_INPUT)
10271 templ = XSTR (body, 0);
10272 else if (asm_noperands (body) >= 0)
10273 templ
10274 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10275 else
10276 return 0;
10279 int ppi_adjust = 0;
10282 c = *templ++;
10283 while (c == ' ' || c == '\t');
10284 /* all sh-dsp parallel-processing insns start with p.
10285 The only non-ppi sh insn starting with p is pref.
10286 The only ppi starting with pr is prnd. */
10287 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10288 ppi_adjust = 2;
10289 /* The repeat pseudo-insn expands two three insns, a total of
10290 six bytes in size. */
10291 else if ((c == 'r' || c == 'R')
10292 && ! strncasecmp ("epeat", templ, 5))
10293 ppi_adjust = 4;
10294 while (c && c != '\n'
10295 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10297 /* If this is a label, it is obviously not a ppi insn. */
10298 if (c == ':' && maybe_label)
10300 ppi_adjust = 0;
10301 break;
10303 else if (c == '\'' || c == '"')
10304 maybe_label = false;
10305 c = *templ++;
10307 sum += ppi_adjust;
10308 maybe_label = c != ':';
10310 while (c);
10311 return sum;
10313 return 0;
10316 /* Return TRUE for a valid displacement for the REG+disp addressing
10317 with MODE. */
10318 bool
10319 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
10320 bool allow_zero)
10322 if (! CONST_INT_P (op))
10323 return false;
10325 if (TARGET_SHMEDIA)
10327 int size;
10329 /* Check if this is the address of an unaligned load / store. */
10330 if (mode == VOIDmode)
10331 return satisfies_constraint_I06 (op);
10333 size = GET_MODE_SIZE (mode);
10334 return (!(INTVAL (op) & (size - 1))
10335 && INTVAL (op) >= -512 * size
10336 && INTVAL (op) < 512 * size);
10338 else
10340 const HOST_WIDE_INT offset = INTVAL (op);
10341 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10342 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10344 /* If the mode does not support any displacement always return false.
10345 Even though an index of '0' is actually always valid, it will cause
10346 troubles when e.g. a DFmode move is split into two SFmode moves,
10347 where one SFmode move will have index '0' and the other move will
10348 have index '4'. */
10349 if (!allow_zero && max_disp < 1)
10350 return false;
10352 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10356 /* Recognize an RTL expression that is a valid memory address for
10357 an instruction.
10358 The MODE argument is the machine mode for the MEM expression
10359 that wants to use this address.
10360 Allow REG
10361 REG+disp
10362 REG+r0
10363 REG++
10364 --REG
10366 GBR+disp */
10367 static bool
10368 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10370 if (! ALLOW_INDEXED_ADDRESS
10371 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10372 return false;
10374 if (REG_P (x) && REGNO (x) == GBR_REG)
10375 return true;
10377 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10378 return true;
10379 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10380 && ! TARGET_SHMEDIA
10381 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10382 return true;
10383 else if (GET_CODE (x) == PLUS)
10385 rtx xop0 = XEXP (x, 0);
10386 rtx xop1 = XEXP (x, 1);
10388 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10389 return gbr_displacement (xop1, mode);
10391 if (GET_MODE_SIZE (mode) <= 8
10392 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10393 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10394 return true;
10396 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10397 || ((xop0 == stack_pointer_rtx
10398 || xop0 == hard_frame_pointer_rtx)
10399 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10400 || ((xop1 == stack_pointer_rtx
10401 || xop1 == hard_frame_pointer_rtx)
10402 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10403 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10404 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10405 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10406 && TARGET_FMOVD && mode == DFmode)))
10408 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10409 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10410 return true;
10411 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10412 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10413 return true;
10417 return false;
10420 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10421 isn't protected by a PIC unspec. */
10422 bool
10423 nonpic_symbol_mentioned_p (rtx x)
10425 const char *fmt;
10426 int i;
10428 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10429 || GET_CODE (x) == PC)
10430 return true;
10432 /* We don't want to look into the possible MEM location of a
10433 CONST_DOUBLE, since we're not going to use it, in general. */
10434 if (GET_CODE (x) == CONST_DOUBLE)
10435 return false;
10437 if (GET_CODE (x) == UNSPEC
10438 && (XINT (x, 1) == UNSPEC_PIC
10439 || XINT (x, 1) == UNSPEC_GOT
10440 || XINT (x, 1) == UNSPEC_GOTOFF
10441 || XINT (x, 1) == UNSPEC_GOTPLT
10442 || XINT (x, 1) == UNSPEC_GOTTPOFF
10443 || XINT (x, 1) == UNSPEC_DTPOFF
10444 || XINT (x, 1) == UNSPEC_TPOFF
10445 || XINT (x, 1) == UNSPEC_PLT
10446 || XINT (x, 1) == UNSPEC_SYMOFF
10447 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10448 return false;
10450 fmt = GET_RTX_FORMAT (GET_CODE (x));
10451 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10453 if (fmt[i] == 'E')
10455 int j;
10456 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10457 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10458 return true;
10460 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10461 return true;
10464 return false;
10467 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10468 @GOTOFF in `reg'. */
10470 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
10471 rtx reg)
10473 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10474 return orig;
10476 if (GET_CODE (orig) == LABEL_REF
10477 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10479 if (reg == NULL_RTX)
10480 reg = gen_reg_rtx (Pmode);
10482 emit_insn (gen_symGOTOFF2reg (reg, orig));
10483 return reg;
10485 else if (GET_CODE (orig) == SYMBOL_REF)
10487 if (reg == NULL_RTX)
10488 reg = gen_reg_rtx (Pmode);
10490 emit_insn (gen_symGOT2reg (reg, orig));
10491 return reg;
10493 return orig;
10496 /* Given a (logical) mode size and an offset in bytes, try to find a the
10497 appropriate displacement value for a mov insn. On SH the displacements
10498 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10499 15 bytes in QImode. To compensate this we create a new base address by
10500 adding an adjustment value to it.
10502 If the originally requested offset is greater than 127 we prefer using
10503 values 124..127 over 128..131 to increase opportunities to use the
10504 add #imm, Rn insn.
10506 In some cases it is possible that a requested offset might seem unaligned
10507 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10508 This is compensated by adjusting the base address so that the effective
10509 address of the displacement move insn will be aligned.
10511 This is not the best possible way of rebasing the base address, as it
10512 does not look at other present displacement addressings around it.
10513 In some cases this can create more base address adjustments than would
10514 actually be necessary. */
10515 struct disp_adjust
10517 rtx offset_adjust;
10518 rtx mov_disp;
10521 static struct disp_adjust
10522 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
10524 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10526 /* Do not try to use SH2A's large displacements here, because this would
10527 effectively disable the small displacement insns. */
10528 const int mode_sz = GET_MODE_SIZE (mode);
10529 const int mov_insn_sz = mov_insn_size (mode, false);
10530 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10531 const int max_disp_next = max_disp + mov_insn_sz;
10532 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10533 HOST_WIDE_INT offset_adjust;
10535 /* In some cases this actually does happen and we must check for it. */
10536 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10537 return res;
10539 /* Keeps the previous behavior for QImode displacement addressing.
10540 This just decides how the offset is re-based. Removing this special
10541 case will result in slightly bigger code on average, but it's not that
10542 bad actually. */
10543 if (mov_insn_sz == 1)
10544 align_modifier = 0;
10546 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10548 if (mode_sz + offset - offset_adjust <= max_disp_next)
10550 res.offset_adjust = GEN_INT (offset_adjust);
10551 res.mov_disp = GEN_INT (offset - offset_adjust);
10554 return res;
10557 /* Try to modify an illegitimate address and make it legitimate.
10558 If we find one, return the new, valid address.
10559 Otherwise, return the original address. */
10560 static rtx
10561 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
10563 if (flag_pic)
10564 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10566 if (TARGET_SHMEDIA)
10567 return x;
10569 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10570 || (TARGET_SH2E && mode == SFmode))
10571 return x;
10573 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10574 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10576 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10577 INTVAL (XEXP (x, 1)));
10579 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10581 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10582 adj.offset_adjust, NULL_RTX, 0,
10583 OPTAB_LIB_WIDEN);
10584 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10587 return x;
10590 /* Attempt to replace *p, which is an address that needs reloading, with
10591 a valid memory address for an operand of mode MODE.
10592 Like for sh_legitimize_address, for the SH we try to get a normal form
10593 of the address. That will allow inheritance of the address reloads. */
10594 bool
10595 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10596 int itype)
10598 enum reload_type type = (enum reload_type) itype;
10599 const int mode_sz = GET_MODE_SIZE (mode);
10601 if (sh_lra_p ())
10602 return false;
10604 if (! ALLOW_INDEXED_ADDRESS
10605 && GET_CODE (*p) == PLUS
10606 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10608 *p = copy_rtx (*p);
10609 push_reload (*p, NULL_RTX, p, NULL,
10610 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10611 return true;
10614 if (! ALLOW_INDEXED_ADDRESS
10615 && GET_CODE (*p) == PLUS
10616 && GET_CODE (XEXP (*p, 0)) == PLUS)
10618 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10619 XEXP (XEXP (*p, 0), 1));
10620 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10621 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10622 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10623 return true;
10626 if (TARGET_SHMEDIA)
10627 return false;
10629 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10630 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10631 && (ALLOW_INDEXED_ADDRESS
10632 || XEXP (*p, 0) == stack_pointer_rtx
10633 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10635 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10636 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10638 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10640 push_reload (*p, NULL_RTX, p, NULL,
10641 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10642 return true;
10645 if (TARGET_SH2E && mode == SFmode)
10647 *p = copy_rtx (*p);
10648 push_reload (*p, NULL_RTX, p, NULL,
10649 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10650 return true;
10653 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10654 moves because then reload has a problem figuring the constraint
10655 that the move insn target/source reg must be R0.
10656 Or maybe some handling is wrong in sh_secondary_reload for this
10657 to work properly? */
10658 if ((mode_sz == 4 || mode_sz == 8)
10659 && ! (TARGET_SH4 && mode == DFmode)
10660 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10662 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10663 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10664 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10665 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10666 return true;
10670 /* We must re-recognize what we created before. */
10671 if (GET_CODE (*p) == PLUS
10672 && (mode_sz == 4 || mode_sz == 8)
10673 && GET_CODE (XEXP (*p, 0)) == PLUS
10674 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10675 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10676 && CONST_INT_P (XEXP (*p, 1))
10677 && ! (TARGET_SH2E && mode == SFmode))
10679 /* Because this address is so complex, we know it must have
10680 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10681 it is already unshared, and needs no further unsharing. */
10682 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10683 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10684 return true;
10687 return false;
10690 /* In the name of slightly smaller debug output, and to cater to
10691 general assembler lossage, recognize various UNSPEC sequences
10692 and turn them back into a direct symbol reference. */
10693 static rtx
10694 sh_delegitimize_address (rtx orig_x)
10696 rtx x, y;
10698 orig_x = delegitimize_mem_from_attrs (orig_x);
10700 x = orig_x;
10701 if (MEM_P (x))
10702 x = XEXP (x, 0);
10703 if (GET_CODE (x) == CONST)
10705 y = XEXP (x, 0);
10706 if (GET_CODE (y) == UNSPEC)
10708 if (XINT (y, 1) == UNSPEC_GOT
10709 || XINT (y, 1) == UNSPEC_GOTOFF
10710 || XINT (y, 1) == UNSPEC_SYMOFF)
10711 return XVECEXP (y, 0, 0);
10712 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10714 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10716 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10718 if (GET_CODE (symplt) == UNSPEC
10719 && XINT (symplt, 1) == UNSPEC_PLT)
10720 return XVECEXP (symplt, 0, 0);
10723 else if (TARGET_SHMEDIA
10724 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10725 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10727 rtx offset = XVECEXP (y, 0, 1);
10729 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10730 if (MEM_P (orig_x))
10731 x = replace_equiv_address_nv (orig_x, x);
10732 return x;
10737 return orig_x;
10740 /* Mark the use of a constant in the literal table. If the constant
10741 has multiple labels, make it unique. */
10742 static rtx
10743 mark_constant_pool_use (rtx x)
10745 rtx_insn *insn, *lab;
10746 rtx pattern;
10748 if (x == NULL_RTX)
10749 return x;
10751 switch (GET_CODE (x))
10753 case LABEL_REF:
10754 x = XEXP (x, 0);
10755 case CODE_LABEL:
10756 break;
10757 default:
10758 return x;
10761 /* Get the first label in the list of labels for the same constant
10762 and delete another labels in the list. */
10763 lab = as_a <rtx_insn *> (x);
10764 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10766 if (!LABEL_P (insn)
10767 || LABEL_REFS (insn) != NEXT_INSN (insn))
10768 break;
10769 lab = insn;
10772 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10773 as_a<rtx_insn *> (insn)->set_deleted ();
10775 /* Mark constants in a window. */
10776 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10778 if (!NONJUMP_INSN_P (insn))
10779 continue;
10781 pattern = PATTERN (insn);
10782 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10783 continue;
10785 switch (XINT (pattern, 1))
10787 case UNSPECV_CONST2:
10788 case UNSPECV_CONST4:
10789 case UNSPECV_CONST8:
10790 XVECEXP (pattern, 0, 1) = const1_rtx;
10791 break;
10792 case UNSPECV_WINDOW_END:
10793 if (XVECEXP (pattern, 0, 0) == x)
10794 return lab;
10795 break;
10796 case UNSPECV_CONST_END:
10797 return lab;
10798 default:
10799 break;
10803 return lab;
10806 /* Return true if it's possible to redirect BRANCH1 to the destination
10807 of an unconditional jump BRANCH2. We only want to do this if the
10808 resulting branch will have a short displacement. */
10809 static bool
10810 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
10812 /* Don't follow if BRANCH2 is possible to be a jump crossing between
10813 hot and cold partitions. */
10814 if (TARGET_SH1
10815 && flag_reorder_blocks_and_partition
10816 && simplejump_p (branch2)
10817 && CROSSING_JUMP_P (branch2))
10818 return false;
10820 if (flag_expensive_optimizations && simplejump_p (branch2))
10822 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10823 rtx_insn *insn;
10824 int distance;
10826 for (distance = 0, insn = NEXT_INSN (branch1);
10827 insn && distance < 256;
10828 insn = PREV_INSN (insn))
10830 if (insn == dest)
10831 return true;
10832 else
10833 distance += get_attr_length (insn);
10835 for (distance = 0, insn = NEXT_INSN (branch1);
10836 insn && distance < 256;
10837 insn = NEXT_INSN (insn))
10839 if (insn == dest)
10840 return true;
10841 else
10842 distance += get_attr_length (insn);
10845 return false;
10848 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10849 bool
10850 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10851 unsigned int new_reg)
10853 /* Interrupt functions can only use registers that have already been
10854 saved by the prologue, even if they would normally be
10855 call-clobbered. */
10856 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10857 return false;
10859 return true;
10862 /* Function to update the integer COST
10863 based on the relationship between INSN that is dependent on
10864 DEP_INSN through the dependence LINK. The default is to make no
10865 adjustment to COST. This can be used for example to specify to
10866 the scheduler that an output- or anti-dependence does not incur
10867 the same cost as a data-dependence. The return value should be
10868 the new value for COST. */
10869 static int
10870 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10871 rtx_insn *dep_insn, int cost)
10873 rtx reg, use_pat;
10875 if (TARGET_SHMEDIA)
10877 /* On SHmedia, if the dependence is an anti-dependence or
10878 output-dependence, there is no cost. */
10879 if (REG_NOTE_KIND (link) != 0)
10881 /* However, dependencies between target register loads and
10882 uses of the register in a subsequent block that are separated
10883 by a conditional branch are not modelled - we have to do with
10884 the anti-dependency between the target register load and the
10885 conditional branch that ends the current block. */
10886 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10887 && GET_CODE (PATTERN (dep_insn)) == SET
10888 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10889 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10890 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10892 int orig_cost = cost;
10893 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10894 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10895 ? insn : JUMP_LABEL (insn));
10896 /* On the likely path, the branch costs 1, on the unlikely path,
10897 it costs 3. */
10898 cost--;
10900 target = next_active_insn (target);
10901 while (target && ! flow_dependent_p (target, dep_insn)
10902 && --cost > 0);
10903 /* If two branches are executed in immediate succession, with the
10904 first branch properly predicted, this causes a stall at the
10905 second branch, hence we won't need the target for the
10906 second branch for two cycles after the launch of the first
10907 branch. */
10908 if (cost > orig_cost - 2)
10909 cost = orig_cost - 2;
10911 else
10912 cost = 0;
10915 else if (get_attr_is_mac_media (insn)
10916 && get_attr_is_mac_media (dep_insn))
10917 cost = 1;
10919 else if (! reload_completed
10920 && GET_CODE (PATTERN (insn)) == SET
10921 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10922 && GET_CODE (PATTERN (dep_insn)) == SET
10923 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10924 && cost < 4)
10925 cost = 4;
10926 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10927 that is needed at the target. */
10928 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10929 && ! flow_dependent_p (insn, dep_insn))
10930 cost--;
10932 else if (REG_NOTE_KIND (link) == 0)
10934 enum attr_type type;
10935 rtx dep_set;
10937 if (recog_memoized (insn) < 0
10938 || recog_memoized (dep_insn) < 0)
10939 return cost;
10941 dep_set = single_set (dep_insn);
10943 /* The latency that we specify in the scheduling description refers
10944 to the actual output, not to an auto-increment register; for that,
10945 the latency is one. */
10946 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10948 rtx set = single_set (insn);
10950 if (set
10951 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10952 && (!MEM_P (SET_DEST (set))
10953 || !reg_mentioned_p (SET_DEST (dep_set),
10954 XEXP (SET_DEST (set), 0))))
10955 cost = 1;
10957 /* The only input for a call that is timing-critical is the
10958 function's address. */
10959 if (CALL_P (insn))
10961 rtx call = get_call_rtx_from (insn);
10962 if (call
10963 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10964 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10965 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10966 cost -= TARGET_SH4_300 ? 3 : 6;
10968 /* Likewise, the most timing critical input for an sfuncs call
10969 is the function address. However, sfuncs typically start
10970 using their arguments pretty quickly.
10971 Assume a four cycle delay for SH4 before they are needed.
10972 Cached ST40-300 calls are quicker, so assume only a one
10973 cycle delay there.
10974 ??? Maybe we should encode the delays till input registers
10975 are needed by sfuncs into the sfunc call insn. */
10976 /* All sfunc calls are parallels with at least four components.
10977 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10978 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10979 && XVECLEN (PATTERN (insn), 0) >= 4
10980 && (reg = sfunc_uses_reg (insn)))
10982 if (! reg_set_p (reg, dep_insn))
10983 cost -= TARGET_SH4_300 ? 1 : 4;
10985 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10987 enum attr_type dep_type = get_attr_type (dep_insn);
10989 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10990 cost--;
10991 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10992 && (type = get_attr_type (insn)) != TYPE_CALL
10993 && type != TYPE_SFUNC)
10994 cost--;
10995 /* When the preceding instruction loads the shift amount of
10996 the following SHAD/SHLD, the latency of the load is increased
10997 by 1 cycle. */
10998 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10999 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
11000 && reg_overlap_mentioned_p (SET_DEST (dep_set),
11001 XEXP (SET_SRC (single_set (insn)),
11002 1)))
11003 cost++;
11004 /* When an LS group instruction with a latency of less than
11005 3 cycles is followed by a double-precision floating-point
11006 instruction, FIPR, or FTRV, the latency of the first
11007 instruction is increased to 3 cycles. */
11008 else if (cost < 3
11009 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
11010 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
11011 cost = 3;
11012 /* The lsw register of a double-precision computation is ready one
11013 cycle earlier. */
11014 else if (reload_completed
11015 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
11016 && (use_pat = single_set (insn))
11017 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
11018 SET_SRC (use_pat)))
11019 cost -= 1;
11021 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
11022 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
11023 cost -= 1;
11025 else if (TARGET_SH4_300)
11027 /* Stores need their input register two cycles later. */
11028 if (dep_set && cost >= 1
11029 && ((type = get_attr_type (insn)) == TYPE_STORE
11030 || type == TYPE_PSTORE
11031 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
11033 rtx set = single_set (insn);
11035 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
11036 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
11038 cost -= 2;
11039 /* But don't reduce the cost below 1 if the address depends
11040 on a side effect of dep_insn. */
11041 if (cost < 1
11042 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
11043 cost = 1;
11048 /* An anti-dependence penalty of two applies if the first insn is a double
11049 precision fadd / fsub / fmul. */
11050 else if (!TARGET_SH4_300
11051 && REG_NOTE_KIND (link) == REG_DEP_ANTI
11052 && recog_memoized (dep_insn) >= 0
11053 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
11054 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
11055 /* A lot of alleged anti-flow dependences are fake,
11056 so check this one is real. */
11057 && flow_dependent_p (dep_insn, insn))
11058 cost = 2;
11060 return cost;
11063 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
11064 if DEP_INSN is anti-flow dependent on INSN. */
11065 static bool
11066 flow_dependent_p (rtx insn, rtx dep_insn)
11068 rtx tmp = PATTERN (insn);
11070 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
11071 return tmp == NULL_RTX;
11074 /* A helper function for flow_dependent_p called through note_stores. */
11075 static void
11076 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
11078 rtx * pinsn = (rtx *) data;
11080 if (*pinsn && reg_referenced_p (x, *pinsn))
11081 *pinsn = NULL_RTX;
11084 /* For use by sh_allocate_initial_value. Note that sh.md contains some
11085 'special function' patterns (type sfunc) that clobber pr, but that
11086 do not look like function calls to leaf_function_p. Hence we must
11087 do this extra check. */
11088 static int
11089 sh_pr_n_sets (void)
11091 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11094 /* Return where to allocate pseudo for a given hard register initial
11095 value. */
11096 static rtx
11097 sh_allocate_initial_value (rtx hard_reg)
11099 rtx x;
11101 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11103 if (crtl->is_leaf
11104 && ! sh_pr_n_sets ()
11105 && ! (TARGET_SHCOMPACT
11106 && ((crtl->args.info.call_cookie
11107 & ~ CALL_COOKIE_RET_TRAMP (1))
11108 || crtl->saves_all_registers)))
11109 x = hard_reg;
11110 else
11111 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11113 else
11114 x = NULL_RTX;
11116 return x;
11119 /* This function returns "2" to indicate dual issue for the SH4
11120 processor. To be used by the DFA pipeline description. */
11121 static int
11122 sh_issue_rate (void)
11124 if (TARGET_SUPERSCALAR)
11125 return 2;
11126 else
11127 return 1;
11130 /* Functions for ready queue reordering for sched1. */
11132 /* Get weight for mode for a set x. */
11133 static short
11134 find_set_regmode_weight (rtx x, machine_mode mode)
11136 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11137 return 1;
11138 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11140 if (REG_P (SET_DEST (x)))
11142 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11143 return 1;
11144 else
11145 return 0;
11147 return 1;
11149 return 0;
11152 /* Get regmode weight for insn. */
11153 static short
11154 find_insn_regmode_weight (rtx insn, machine_mode mode)
11156 short reg_weight = 0;
11157 rtx x;
11159 /* Increment weight for each register born here. */
11160 x = PATTERN (insn);
11161 reg_weight += find_set_regmode_weight (x, mode);
11162 if (GET_CODE (x) == PARALLEL)
11164 int j;
11165 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11167 x = XVECEXP (PATTERN (insn), 0, j);
11168 reg_weight += find_set_regmode_weight (x, mode);
11171 /* Decrement weight for each register that dies here. */
11172 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11174 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11176 rtx note = XEXP (x, 0);
11177 if (REG_P (note) && GET_MODE (note) == mode)
11178 reg_weight--;
11181 return reg_weight;
11184 /* Calculate regmode weights for all insns of a basic block. */
11185 static void
11186 find_regmode_weight (basic_block b, machine_mode mode)
11188 rtx_insn *insn, *next_tail, *head, *tail;
11190 get_ebb_head_tail (b, b, &head, &tail);
11191 next_tail = NEXT_INSN (tail);
11193 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11195 /* Handle register life information. */
11196 if (!INSN_P (insn))
11197 continue;
11199 if (mode == SFmode)
11200 INSN_REGMODE_WEIGHT (insn, mode) =
11201 find_insn_regmode_weight (insn, mode)
11202 + 2 * find_insn_regmode_weight (insn, DFmode);
11203 else if (mode == SImode)
11204 INSN_REGMODE_WEIGHT (insn, mode) =
11205 find_insn_regmode_weight (insn, mode)
11206 + 2 * find_insn_regmode_weight (insn, DImode);
11210 /* Comparison function for ready queue sorting. */
11211 static int
11212 rank_for_reorder (const void *x, const void *y)
11214 rtx_insn *tmp = *(rtx_insn * const *) y;
11215 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11217 /* The insn in a schedule group should be issued the first. */
11218 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11219 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11221 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11222 minimizes instruction movement, thus minimizing sched's effect on
11223 register pressure. */
11224 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11227 /* Resort the array A in which only element at index N may be out of order. */
11228 static void
11229 swap_reorder (rtx_insn **a, int n)
11231 rtx_insn *insn = a[n - 1];
11232 int i = n - 2;
11234 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11236 a[i + 1] = a[i];
11237 i -= 1;
11239 a[i + 1] = insn;
11242 /* Sort the ready list by ascending priority. */
11243 static void
11244 ready_reorder (rtx_insn **ready, int nready)
11246 if (nready == 2)
11247 swap_reorder (ready, nready);
11248 else if (nready > 2)
11249 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11252 /* Count life regions of r0 for a block. */
11253 static int
11254 find_r0_life_regions (basic_block b)
11256 rtx_insn *end, *insn;
11257 rtx pset;
11258 rtx r0_reg;
11259 int live;
11260 int set;
11261 int death = 0;
11263 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11265 set = 1;
11266 live = 1;
11268 else
11270 set = 0;
11271 live = 0;
11274 insn = BB_HEAD (b);
11275 end = BB_END (b);
11276 r0_reg = gen_rtx_REG (SImode, R0_REG);
11277 while (1)
11279 if (INSN_P (insn))
11281 if (find_regno_note (insn, REG_DEAD, R0_REG))
11283 death++;
11284 live = 0;
11286 if (!live
11287 && (pset = single_set (insn))
11288 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11289 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11291 set++;
11292 live = 1;
11295 if (insn == end)
11296 break;
11297 insn = NEXT_INSN (insn);
11299 return set - death;
11302 /* Calculate regmode weights for all insns of all basic block. */
11303 static void
11304 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11305 int verbose ATTRIBUTE_UNUSED,
11306 int old_max_uid)
11308 basic_block b;
11310 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11311 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11312 r0_life_regions = 0;
11314 FOR_EACH_BB_REVERSE_FN (b, cfun)
11316 find_regmode_weight (b, SImode);
11317 find_regmode_weight (b, SFmode);
11318 if (!reload_completed)
11319 r0_life_regions += find_r0_life_regions (b);
11322 CURR_REGMODE_PRESSURE (SImode) = 0;
11323 CURR_REGMODE_PRESSURE (SFmode) = 0;
11326 /* Cleanup. */
11327 static void
11328 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11329 int verbose ATTRIBUTE_UNUSED)
11331 if (regmode_weight[0])
11333 free (regmode_weight[0]);
11334 regmode_weight[0] = NULL;
11336 if (regmode_weight[1])
11338 free (regmode_weight[1]);
11339 regmode_weight[1] = NULL;
11343 /* The scalar modes supported differs from the default version in TImode
11344 for 32-bit SHMEDIA. */
11345 static bool
11346 sh_scalar_mode_supported_p (machine_mode mode)
11348 if (TARGET_SHMEDIA32 && mode == TImode)
11349 return false;
11351 return default_scalar_mode_supported_p (mode);
11354 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11355 keep count of register pressures on SImode and SFmode. */
11356 static int
11357 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11358 int sched_verbose ATTRIBUTE_UNUSED,
11359 rtx_insn *insn,
11360 int can_issue_more)
11362 if (GET_CODE (PATTERN (insn)) != USE
11363 && GET_CODE (PATTERN (insn)) != CLOBBER)
11364 cached_can_issue_more = can_issue_more - 1;
11365 else
11366 cached_can_issue_more = can_issue_more;
11368 if (reload_completed)
11369 return cached_can_issue_more;
11371 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11372 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11374 return cached_can_issue_more;
11377 static void
11378 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11379 int verbose ATTRIBUTE_UNUSED,
11380 int veclen ATTRIBUTE_UNUSED)
11382 CURR_REGMODE_PRESSURE (SImode) = 0;
11383 CURR_REGMODE_PRESSURE (SFmode) = 0;
11386 /* Some magic numbers. */
11387 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11388 functions that already have high pressure on r0. */
11389 #define R0_MAX_LIFE_REGIONS 2
11390 /* Register Pressure thresholds for SImode and SFmode registers. */
11391 #define SIMODE_MAX_WEIGHT 5
11392 #define SFMODE_MAX_WEIGHT 10
11394 /* Return true if the pressure is high for MODE. */
11395 static bool
11396 high_pressure (machine_mode mode)
11398 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11399 functions that already have high pressure on r0. */
11400 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11401 return true;
11403 if (mode == SFmode)
11404 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11405 else
11406 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11409 /* Reorder ready queue if register pressure is high. */
11410 static int
11411 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11412 int sched_verbose ATTRIBUTE_UNUSED,
11413 rtx_insn **ready,
11414 int *n_readyp,
11415 int clock_var ATTRIBUTE_UNUSED)
11417 if (reload_completed)
11418 return sh_issue_rate ();
11420 if (high_pressure (SFmode) || high_pressure (SImode))
11422 ready_reorder (ready, *n_readyp);
11425 return sh_issue_rate ();
11428 /* Skip cycles if the current register pressure is high. */
11429 static int
11430 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11431 int sched_verbose ATTRIBUTE_UNUSED,
11432 rtx_insn **ready ATTRIBUTE_UNUSED,
11433 int *n_readyp ATTRIBUTE_UNUSED,
11434 int clock_var ATTRIBUTE_UNUSED)
11436 if (reload_completed)
11437 return cached_can_issue_more;
11439 if (high_pressure(SFmode) || high_pressure (SImode))
11440 skip_cycles = 1;
11442 return cached_can_issue_more;
11445 /* Skip cycles without sorting the ready queue. This will move insn from
11446 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11447 queue by sh_reorder. */
11449 /* Generally, skipping these many cycles are sufficient for all insns to move
11450 from Q -> R. */
11451 #define MAX_SKIPS 8
11453 static int
11454 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11455 int sched_verbose ATTRIBUTE_UNUSED,
11456 rtx_insn *insn ATTRIBUTE_UNUSED,
11457 int last_clock_var,
11458 int clock_var,
11459 int *sort_p)
11461 if (reload_completed)
11462 return 0;
11464 if (skip_cycles)
11466 if ((clock_var - last_clock_var) < MAX_SKIPS)
11468 *sort_p = 0;
11469 return 1;
11471 /* If this is the last cycle we are skipping, allow reordering of R. */
11472 if ((clock_var - last_clock_var) == MAX_SKIPS)
11474 *sort_p = 1;
11475 return 1;
11479 skip_cycles = 0;
11481 return 0;
11484 /* SHmedia requires registers for branches, so we can't generate new
11485 branches past reload. */
11486 static bool
11487 sh_cannot_modify_jumps_p (void)
11489 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11492 static reg_class_t
11493 sh_target_reg_class (void)
11495 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11498 static bool
11499 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11501 if (! shmedia_space_reserved_for_target_registers)
11502 return 0;
11503 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11504 return 0;
11506 HARD_REG_SET dummy;
11507 if (calc_live_regs (&dummy) >= 6 * 8)
11508 return 1;
11509 return 0;
11512 static bool
11513 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11515 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11519 On the SH1..SH4, the trampoline looks like
11520 2 0002 D202 mov.l l2,r2
11521 1 0000 D301 mov.l l1,r3
11522 3 0004 422B jmp @r2
11523 4 0006 0009 nop
11524 5 0008 00000000 l1: .long area
11525 6 000c 00000000 l2: .long function
11527 SH5 (compact) uses r1 instead of r3 for the static chain. */
11530 /* Emit RTL insns to initialize the variable parts of a trampoline.
11531 FNADDR is an RTX for the address of the function's pure code.
11532 CXT is an RTX for the static chain value for the function. */
11533 static void
11534 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11536 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11537 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11539 if (TARGET_SHMEDIA64)
11541 rtx tramp_templ;
11542 int fixed_len;
11544 rtx movi1 = GEN_INT (0xcc000010);
11545 rtx shori1 = GEN_INT (0xc8000010);
11546 rtx src, dst;
11548 /* The following trampoline works within a +- 128 KB range for cxt:
11549 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11550 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11551 gettr tr1,r1; blink tr0,r63 */
11552 /* Address rounding makes it hard to compute the exact bounds of the
11553 offset for this trampoline, but we have a rather generous offset
11554 range, so frame_offset should do fine as an upper bound. */
11555 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11557 /* ??? could optimize this trampoline initialization
11558 by writing DImode words with two insns each. */
11559 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11560 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11561 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11562 insn = gen_rtx_AND (DImode, insn, mask);
11563 /* Or in ptb/u .,tr1 pattern */
11564 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11565 insn = force_operand (insn, NULL_RTX);
11566 insn = gen_lowpart (SImode, insn);
11567 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11568 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11569 insn = gen_rtx_AND (DImode, insn, mask);
11570 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11571 insn = gen_lowpart (SImode, insn);
11572 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11573 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11574 insn = gen_rtx_AND (DImode, insn, mask);
11575 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11576 insn = gen_lowpart (SImode, insn);
11577 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11578 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11579 insn = gen_rtx_AND (DImode, insn, mask);
11580 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11581 insn = gen_lowpart (SImode, insn);
11582 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11583 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11584 insn = gen_rtx_AND (DImode, insn, mask);
11585 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11586 insn = gen_lowpart (SImode, insn);
11587 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11588 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11589 GEN_INT (0x6bf10600));
11590 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11591 GEN_INT (0x4415fc10));
11592 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11593 GEN_INT (0x4401fff0));
11594 emit_insn (gen_ic_invalidate_line (tramp));
11595 return;
11597 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11598 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11600 tramp_templ = gen_datalabel_ref (tramp_templ);
11601 dst = tramp_mem;
11602 src = gen_const_mem (BLKmode, tramp_templ);
11603 set_mem_align (dst, 256);
11604 set_mem_align (src, 64);
11605 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11607 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11608 emit_move_insn (adjust_address (tramp_mem, Pmode,
11609 fixed_len + GET_MODE_SIZE (Pmode)),
11610 cxt);
11611 emit_insn (gen_ic_invalidate_line (tramp));
11612 return;
11614 else if (TARGET_SHMEDIA)
11616 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11617 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11618 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11619 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11620 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11621 rotated 10 right, and higher 16 bit of every 32 selected. */
11622 rtx movishori
11623 = force_reg (V2HImode, (simplify_gen_subreg
11624 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11625 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11626 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11628 fnaddr = force_reg (SImode, fnaddr);
11629 cxt = force_reg (SImode, cxt);
11630 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11631 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11632 movishori));
11633 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11634 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11635 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11636 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11637 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11638 gen_rtx_SUBREG (V2HImode, cxt, 0),
11639 movishori));
11640 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11641 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11642 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11643 if (TARGET_LITTLE_ENDIAN)
11645 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11646 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11648 else
11650 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11651 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11653 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11654 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11655 emit_insn (gen_ic_invalidate_line (tramp));
11656 return;
11658 else if (TARGET_SHCOMPACT)
11660 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11661 return;
11663 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11664 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11665 SImode));
11666 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11667 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11668 SImode));
11669 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11670 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11671 if (TARGET_HARD_SH4 || TARGET_SH5)
11673 if (!TARGET_INLINE_IC_INVALIDATE
11674 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
11675 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11676 FUNCTION_ORDINARY),
11677 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11678 else
11679 emit_insn (gen_ic_invalidate_line (tramp));
11683 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11684 static rtx
11685 sh_trampoline_adjust_address (rtx tramp)
11687 if (TARGET_SHMEDIA)
11688 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11689 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11690 return tramp;
11693 /* FIXME: This is overly conservative. A SHcompact function that
11694 receives arguments ``by reference'' will have them stored in its
11695 own stack frame, so it must not pass pointers or references to
11696 these arguments to other functions by means of sibling calls. */
11697 /* If PIC, we cannot make sibling calls to global functions
11698 because the PLT requires r12 to be live. */
11699 static bool
11700 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11702 return (1
11703 && (! TARGET_SHCOMPACT
11704 || crtl->args.info.stack_regs == 0)
11705 && ! sh_cfun_interrupt_handler_p ()
11706 && (! flag_pic
11707 || (decl && ! TREE_PUBLIC (decl))
11708 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11711 /* Machine specific built-in functions. */
11713 struct builtin_description
11715 bool (* const is_enabled) (void);
11716 const enum insn_code icode;
11717 const char *const name;
11718 int signature;
11719 tree fndecl;
11722 static bool
11723 shmedia_builtin_p (void)
11725 return TARGET_SHMEDIA;
11728 /* This function can be used if there are any built-ins that are not for
11729 SHmedia. It's commented out to avoid the defined-but-unused warning. */
11730 static bool
11731 sh1_builtin_p (void)
11733 return TARGET_SH1;
11736 /* describe number and signedness of arguments; arg[0] == result
11737 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11738 /* 9: 64-bit pointer, 10: 32-bit pointer */
11739 static const char signature_args[][4] =
11741 #define SH_BLTIN_V2SI2 0
11742 { 4, 4 },
11743 #define SH_BLTIN_V4HI2 1
11744 { 4, 4 },
11745 #define SH_BLTIN_V2SI3 2
11746 { 4, 4, 4 },
11747 #define SH_BLTIN_V4HI3 3
11748 { 4, 4, 4 },
11749 #define SH_BLTIN_V8QI3 4
11750 { 4, 4, 4 },
11751 #define SH_BLTIN_MAC_HISI 5
11752 { 1, 4, 4, 1 },
11753 #define SH_BLTIN_SH_HI 6
11754 { 4, 4, 1 },
11755 #define SH_BLTIN_SH_SI 7
11756 { 4, 4, 1 },
11757 #define SH_BLTIN_V4HI2V2SI 8
11758 { 4, 4, 4 },
11759 #define SH_BLTIN_V4HI2V8QI 9
11760 { 4, 4, 4 },
11761 #define SH_BLTIN_SISF 10
11762 { 4, 2 },
11763 #define SH_BLTIN_LDUA_L 11
11764 { 2, 10 },
11765 #define SH_BLTIN_LDUA_Q 12
11766 { 1, 10 },
11767 #define SH_BLTIN_STUA_L 13
11768 { 0, 10, 2 },
11769 #define SH_BLTIN_STUA_Q 14
11770 { 0, 10, 1 },
11771 #define SH_BLTIN_LDUA_L64 15
11772 { 2, 9 },
11773 #define SH_BLTIN_LDUA_Q64 16
11774 { 1, 9 },
11775 #define SH_BLTIN_STUA_L64 17
11776 { 0, 9, 2 },
11777 #define SH_BLTIN_STUA_Q64 18
11778 { 0, 9, 1 },
11779 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11780 #define SH_BLTIN_2 19
11781 #define SH_BLTIN_SU 19
11782 { 1, 2 },
11783 #define SH_BLTIN_3 20
11784 #define SH_BLTIN_SUS 20
11785 { 2, 2, 1 },
11786 #define SH_BLTIN_PSSV 21
11787 { 0, 8, 2, 2 },
11788 #define SH_BLTIN_XXUU 22
11789 #define SH_BLTIN_UUUU 22
11790 { 1, 1, 1, 1 },
11791 #define SH_BLTIN_PV 23
11792 { 0, 8 },
11793 #define SH_BLTIN_VP 24
11794 { 8, 0 },
11795 #define SH_BLTIN_UV 25
11796 { 1, 0 },
11797 #define SH_BLTIN_VU 26
11798 { 0, 1 },
11800 /* mcmv: operands considered unsigned. */
11801 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11802 /* mperm: control value considered unsigned int. */
11803 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11804 /* mshards_q: returns signed short. */
11805 /* nsb: takes long long arg, returns unsigned char. */
11806 static struct builtin_description bdesc[] =
11808 { shmedia_builtin_p,
11809 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11810 { shmedia_builtin_p,
11811 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11812 { shmedia_builtin_p,
11813 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11814 { shmedia_builtin_p,
11815 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11816 { shmedia_builtin_p,
11817 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11818 { shmedia_builtin_p,
11819 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11820 { shmedia_builtin_p,
11821 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11822 { shmedia_builtin_p,
11823 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11824 { shmedia_builtin_p,
11825 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11826 { shmedia_builtin_p,
11827 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11828 { shmedia_builtin_p,
11829 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11830 { shmedia_builtin_p,
11831 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11832 { shmedia_builtin_p,
11833 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11834 { shmedia_builtin_p,
11835 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11836 { shmedia_builtin_p,
11837 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11838 { shmedia_builtin_p,
11839 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11840 { shmedia_builtin_p,
11841 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11842 { shmedia_builtin_p,
11843 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11844 { shmedia_builtin_p,
11845 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11846 { shmedia_builtin_p,
11847 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11848 { shmedia_builtin_p,
11849 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11850 { shmedia_builtin_p,
11851 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11852 { shmedia_builtin_p,
11853 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11854 { shmedia_builtin_p,
11855 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11856 { shmedia_builtin_p,
11857 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11858 { shmedia_builtin_p,
11859 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11860 { shmedia_builtin_p,
11861 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11862 { shmedia_builtin_p,
11863 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11864 { shmedia_builtin_p,
11865 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11866 { shmedia_builtin_p,
11867 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11868 { shmedia_builtin_p,
11869 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11870 { shmedia_builtin_p,
11871 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11872 { shmedia_builtin_p,
11873 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11874 { shmedia_builtin_p,
11875 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11876 { shmedia_builtin_p,
11877 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11878 { shmedia_builtin_p,
11879 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11880 { shmedia_builtin_p,
11881 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11882 { shmedia_builtin_p,
11883 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11884 { shmedia_builtin_p,
11885 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11886 { shmedia_builtin_p,
11887 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11888 { shmedia_builtin_p,
11889 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11890 { shmedia_builtin_p,
11891 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11892 { shmedia_builtin_p,
11893 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11894 { shmedia_builtin_p,
11895 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11896 { shmedia_builtin_p,
11897 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11898 { shmedia_builtin_p,
11899 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11900 { shmedia_builtin_p,
11901 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11902 { shmedia_builtin_p,
11903 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11904 { shmedia_builtin_p,
11905 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11906 { shmedia_builtin_p,
11907 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11908 { shmedia_builtin_p,
11909 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11910 { shmedia_builtin_p,
11911 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11912 { shmedia_builtin_p,
11913 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11914 { shmedia_builtin_p,
11915 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11916 { shmedia_builtin_p,
11917 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11918 { shmedia_builtin_p,
11919 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11920 { shmedia_builtin_p,
11921 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11922 { shmedia_builtin_p,
11923 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11924 { shmedia_builtin_p,
11925 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11926 { shmedia_builtin_p,
11927 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11928 { shmedia_builtin_p,
11929 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11930 { shmedia_builtin_p,
11931 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11932 { shmedia_builtin_p,
11933 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11934 { shmedia_builtin_p,
11935 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11936 { shmedia_builtin_p,
11937 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11938 { shmedia_builtin_p,
11939 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11940 { shmedia_builtin_p,
11941 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11942 { shmedia_builtin_p,
11943 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11944 { shmedia_builtin_p,
11945 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11946 { shmedia_builtin_p,
11947 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11948 { shmedia_builtin_p,
11949 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11950 { shmedia_builtin_p,
11951 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11952 { shmedia_builtin_p,
11953 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11954 { shmedia_builtin_p,
11955 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11956 { shmedia_builtin_p,
11957 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11958 { shmedia_builtin_p,
11959 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11960 { shmedia_builtin_p,
11961 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11962 { shmedia_builtin_p,
11963 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11964 { shmedia_builtin_p,
11965 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11966 { shmedia_builtin_p,
11967 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11968 { shmedia_builtin_p,
11969 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11970 { shmedia_builtin_p,
11971 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11972 { shmedia_builtin_p,
11973 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11975 { sh1_builtin_p,
11976 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
11977 { sh1_builtin_p,
11978 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
11981 static tree sh_builtin_get_fpscr;
11982 static tree sh_builtin_set_fpscr;
11984 static void
11985 sh_init_builtins (void)
11987 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11988 memset (shared, 0, sizeof shared);
11990 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11992 builtin_description* d = &bdesc[di];
11994 if (!d->is_enabled ())
11995 continue;
11997 tree type, arg_type = NULL_TREE;
11998 int signature = d->signature;
12000 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
12001 type = shared[signature];
12002 else
12004 int has_result = signature_args[signature][0] != 0;
12005 tree args[3];
12007 if ((signature_args[signature][1] & 8)
12008 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
12009 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
12010 continue;
12011 if (! TARGET_FPU_ANY
12012 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
12013 continue;
12014 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
12015 args[i] = NULL_TREE;
12016 for (int i = 3; ; i--)
12018 int arg = signature_args[signature][i];
12019 int opno = i - 1 + has_result;
12021 if (arg & 8)
12022 arg_type = ptr_type_node;
12023 else if (arg)
12024 arg_type = (*lang_hooks.types.type_for_mode)
12025 (insn_data[d->icode].operand[opno].mode, (arg & 1));
12026 else if (i)
12027 continue;
12028 else
12029 arg_type = void_type_node;
12030 if (i == 0)
12031 break;
12032 args[i-1] = arg_type;
12034 type = build_function_type_list (arg_type, args[0], args[1],
12035 args[2], NULL_TREE);
12036 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
12037 shared[signature] = type;
12039 d->fndecl =
12040 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
12041 NULL, NULL_TREE);
12042 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
12043 if (d->icode == CODE_FOR_sts_fpscr)
12044 sh_builtin_get_fpscr = d->fndecl;
12045 else if (d->icode == CODE_FOR_set_fpscr)
12046 sh_builtin_set_fpscr = d->fndecl;
12050 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
12052 static void
12053 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12055 const unsigned SH_FE_INVALID = 64;
12056 const unsigned SH_FE_DIVBYZERO = 32;
12057 const unsigned SH_FE_OVERFLOW = 16;
12058 const unsigned SH_FE_UNDERFLOW = 8;
12059 const unsigned SH_FE_INEXACT = 4;
12060 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
12061 | SH_FE_DIVBYZERO
12062 | SH_FE_OVERFLOW
12063 | SH_FE_UNDERFLOW
12064 | SH_FE_INEXACT);
12065 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
12066 tree fenv_var, mask, ld_fenv, masked_fenv;
12067 tree new_fenv_var, reload_fenv, restore_fnenv;
12068 tree update_call, atomic_feraiseexcept, hold_fnclex;
12070 if (! TARGET_FPU_ANY)
12071 return;
12073 /* Generate the equivalent of :
12074 unsigned int fenv_var;
12075 fenv_var = __builtin_sh_get_fpscr ();
12077 unsigned int masked_fenv;
12078 masked_fenv = fenv_var & mask;
12080 __builtin_sh_set_fpscr (masked_fenv); */
12082 fenv_var = create_tmp_var (unsigned_type_node);
12083 mask = build_int_cst (unsigned_type_node,
12084 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
12085 | SH_FE_ALL_EXCEPT));
12086 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
12087 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
12088 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
12089 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12090 *hold = build2 (COMPOUND_EXPR, void_type_node,
12091 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
12092 hold_fnclex);
12094 /* Store the value of masked_fenv to clear the exceptions:
12095 __builtin_sh_set_fpscr (masked_fenv); */
12097 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12099 /* Generate the equivalent of :
12100 unsigned int new_fenv_var;
12101 new_fenv_var = __builtin_sh_get_fpscr ();
12103 __builtin_sh_set_fpscr (fenv_var);
12105 __atomic_feraiseexcept (new_fenv_var); */
12107 new_fenv_var = create_tmp_var (unsigned_type_node);
12108 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
12109 build_call_expr (sh_builtin_get_fpscr, 0));
12110 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
12111 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12112 update_call = build_call_expr (atomic_feraiseexcept, 1,
12113 fold_convert (integer_type_node,
12114 new_fenv_var));
12115 *update = build2 (COMPOUND_EXPR, void_type_node,
12116 build2 (COMPOUND_EXPR, void_type_node,
12117 reload_fenv, restore_fnenv), update_call);
12120 /* Implements target hook vector_mode_supported_p. */
12121 bool
12122 sh_vector_mode_supported_p (machine_mode mode)
12124 if (TARGET_FPU_ANY
12125 && ((mode == V2SFmode)
12126 || (mode == V4SFmode)
12127 || (mode == V16SFmode)))
12128 return true;
12130 else if (TARGET_SHMEDIA
12131 && ((mode == V8QImode)
12132 || (mode == V2HImode)
12133 || (mode == V4HImode)
12134 || (mode == V2SImode)))
12135 return true;
12137 return false;
12140 bool
12141 sh_frame_pointer_required (void)
12143 /* If needed override this in other tm.h files to cope with various OS
12144 lossage requiring a frame pointer. */
12145 if (SUBTARGET_FRAME_POINTER_REQUIRED)
12146 return true;
12148 if (crtl->profile)
12149 return true;
12151 return false;
12154 /* Implements target hook dwarf_calling_convention. Return an enum
12155 of dwarf_calling_convention. */
12157 sh_dwarf_calling_convention (const_tree func)
12159 if (sh_attr_renesas_p (func))
12160 return DW_CC_GNU_renesas_sh;
12162 return DW_CC_normal;
12165 /* Returns the sh builtin decl for CODE. */
12166 static tree
12167 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
12169 if (code >= ARRAY_SIZE (bdesc))
12170 return error_mark_node;
12172 if (!bdesc[code].is_enabled ())
12173 return error_mark_node;
12175 return bdesc[code].fndecl;
12178 /* Expand an expression EXP that calls a built-in function,
12179 with result going to TARGET if that's convenient
12180 (and in mode MODE if that's convenient).
12181 SUBTARGET may be used as the target for computing one of EXP's operands.
12182 IGNORE is nonzero if the value is to be ignored. */
12183 static rtx
12184 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12185 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12187 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12188 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12189 const struct builtin_description *d = &bdesc[fcode];
12190 enum insn_code icode = d->icode;
12191 int signature = d->signature;
12192 int nop = 0;
12193 rtx op[4];
12195 if (signature_args[signature][0])
12197 if (ignore)
12198 return NULL_RTX;
12200 machine_mode tmode = insn_data[icode].operand[0].mode;
12201 if (! target || GET_MODE (target) != tmode
12202 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12203 target = gen_reg_rtx (tmode);
12204 op[nop++] = target;
12206 else
12207 target = NULL_RTX;
12209 for (int i = 1; i <= 3; i++, nop++)
12211 tree arg;
12212 machine_mode opmode, argmode;
12213 tree optype;
12215 if (! signature_args[signature][i])
12216 break;
12217 arg = CALL_EXPR_ARG (exp, i - 1);
12218 if (arg == error_mark_node)
12219 return const0_rtx;
12220 if (signature_args[signature][i] & 8)
12222 opmode = ptr_mode;
12223 optype = ptr_type_node;
12225 else
12227 opmode = insn_data[icode].operand[nop].mode;
12228 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12230 argmode = TYPE_MODE (TREE_TYPE (arg));
12231 if (argmode != opmode)
12232 arg = build1 (NOP_EXPR, optype, arg);
12233 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12234 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12235 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12238 rtx pat = NULL_RTX;
12240 switch (nop)
12242 case 1:
12243 pat = (*insn_data[d->icode].genfun) (op[0]);
12244 break;
12245 case 2:
12246 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12247 break;
12248 case 3:
12249 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12250 break;
12251 case 4:
12252 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12253 break;
12254 default:
12255 gcc_unreachable ();
12257 if (! pat)
12258 return NULL_RTX;
12259 emit_insn (pat);
12260 return target;
12263 void
12264 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12266 rtx sel0 = const0_rtx;
12267 rtx sel1 = const1_rtx;
12268 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12269 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12271 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12272 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12275 void
12276 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12278 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12280 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12281 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12284 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12285 We can allow any mode in any general register. The special registers
12286 only allow SImode. Don't allow any mode in the PR.
12288 We cannot hold DCmode values in the XD registers because alter_reg
12289 handles subregs of them incorrectly. We could work around this by
12290 spacing the XD registers like the DR registers, but this would require
12291 additional memory in every compilation to hold larger register vectors.
12292 We could hold SFmode / SCmode values in XD registers, but that
12293 would require a tertiary reload when reloading from / to memory,
12294 and a secondary reload to reload from / to general regs; that
12295 seems to be a losing proposition.
12297 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12298 it won't be ferried through GP registers first. */
12299 bool
12300 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
12302 if (SPECIAL_REGISTER_P (regno))
12303 return mode == SImode;
12305 if (regno == FPUL_REG)
12306 return (mode == SImode || mode == SFmode);
12308 if (FP_REGISTER_P (regno) && mode == SFmode)
12309 return true;
12311 if (mode == V2SFmode)
12313 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12314 || GENERAL_REGISTER_P (regno)))
12315 return true;
12316 else
12317 return false;
12320 if (mode == V4SFmode)
12322 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12323 || GENERAL_REGISTER_P (regno))
12324 return true;
12325 else
12326 return false;
12329 if (mode == V16SFmode)
12331 if (TARGET_SHMEDIA)
12333 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12334 return true;
12335 else
12336 return false;
12338 else
12339 return regno == FIRST_XD_REG;
12342 if (FP_REGISTER_P (regno))
12344 if (mode == SFmode
12345 || mode == SImode
12346 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12347 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12348 || mode == DCmode
12349 || (TARGET_SHMEDIA
12350 && (mode == DFmode || mode == DImode
12351 || mode == V2SFmode || mode == TImode)))
12352 && ((regno - FIRST_FP_REG) & 1) == 0)
12353 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12354 && ((regno - FIRST_FP_REG) & 3) == 0))
12355 return true;
12356 else
12357 return false;
12360 if (XD_REGISTER_P (regno))
12361 return mode == DFmode;
12363 if (TARGET_REGISTER_P (regno))
12364 return (mode == DImode || mode == SImode || mode == PDImode);
12366 if (regno == PR_REG)
12367 return mode == SImode;
12369 if (regno == FPSCR_REG)
12370 return mode == SImode;
12372 /* FIXME. This works around PR target/37633 for -O0. */
12373 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12375 unsigned int n = GET_MODE_SIZE (mode) / 8;
12377 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12378 && regno <= FIRST_GENERAL_REG + 14)
12379 return false;
12382 return true;
12385 /* Specify the modes required to caller save a given hard regno.
12386 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
12387 and returns ?Imode for float regs when sh_hard_regno_mode_ok
12388 permits integer modes on them. That makes LRA's split process
12389 unhappy. See PR55212.
12391 machine_mode
12392 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
12393 machine_mode mode)
12395 if (FP_REGISTER_P (regno)
12396 && (mode == SFmode
12397 || mode == SCmode
12398 || ((mode == DFmode || mode == DCmode)
12399 && ((regno - FIRST_FP_REG) & 1) == 0)))
12400 return mode;
12402 return choose_hard_reg_mode (regno, nregs, false);
12405 /* Return the class of registers for which a mode change from FROM to TO
12406 is invalid. */
12407 bool
12408 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
12409 enum reg_class rclass)
12411 /* We want to enable the use of SUBREGs as a means to
12412 VEC_SELECT a single element of a vector. */
12414 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12415 This can be problematic when SFmode vector subregs need to be accessed
12416 on the stack with displacement addressing, as it happens with -O0.
12417 Thus we disallow the mode change for -O0. */
12418 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12419 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12421 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12423 if (TARGET_LITTLE_ENDIAN)
12425 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12426 return reg_classes_intersect_p (DF_REGS, rclass);
12428 else
12430 if (GET_MODE_SIZE (from) < 8)
12431 return reg_classes_intersect_p (DF_REGS, rclass);
12434 return false;
12437 /* Return true if registers in machine mode MODE will likely be
12438 allocated to registers in small register classes. */
12439 bool
12440 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
12442 return (! TARGET_SHMEDIA);
12445 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12446 that label is used. */
12447 void
12448 sh_mark_label (rtx address, int nuses)
12450 if (GOTOFF_P (address))
12452 /* Extract the label or symbol. */
12453 address = XEXP (address, 0);
12454 if (GET_CODE (address) == PLUS)
12455 address = XEXP (address, 0);
12456 address = XVECEXP (address, 0, 0);
12458 if (GET_CODE (address) == LABEL_REF
12459 && LABEL_P (XEXP (address, 0)))
12460 LABEL_NUSES (XEXP (address, 0)) += nuses;
12463 /* Compute extra cost of moving data between one register class
12464 and another.
12466 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12467 uses this information. Hence, the general register <-> floating point
12468 register information here is not used for SFmode. */
12469 static int
12470 sh_register_move_cost (machine_mode mode,
12471 reg_class_t srcclass, reg_class_t dstclass)
12473 if (dstclass == T_REGS || dstclass == PR_REGS)
12474 return 10;
12476 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12477 return 4;
12479 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12480 && REGCLASS_HAS_FP_REG (srcclass)
12481 && REGCLASS_HAS_FP_REG (dstclass))
12482 return 4;
12484 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12485 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12487 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12488 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12489 return 9;
12491 if ((REGCLASS_HAS_FP_REG (dstclass)
12492 && REGCLASS_HAS_GENERAL_REG (srcclass))
12493 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12494 && REGCLASS_HAS_FP_REG (srcclass)))
12496 /* Discourage trying to use fp regs for a pointer. This also
12497 discourages fp regs with SImode because Pmode is an alias
12498 of SImode on this target. See PR target/48596. */
12499 int addend = (mode == Pmode) ? 40 : 0;
12501 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12502 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12505 if ((dstclass == FPUL_REGS
12506 && REGCLASS_HAS_GENERAL_REG (srcclass))
12507 || (srcclass == FPUL_REGS
12508 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12509 return 5;
12511 if ((dstclass == FPUL_REGS
12512 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12513 || (srcclass == FPUL_REGS
12514 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12515 return 7;
12517 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12518 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12519 return 20;
12521 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12522 if (TARGET_SHMEDIA
12523 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12525 if (sh_gettrcost >= 0)
12526 return sh_gettrcost;
12527 else if (!TARGET_PT_FIXED)
12528 return 100;
12531 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12532 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12533 return 4;
12535 if (TARGET_SHMEDIA
12536 || (TARGET_FMOVD
12537 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12538 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12539 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12541 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12544 static rtx
12545 emit_load_ptr (rtx reg, rtx addr)
12547 rtx mem = gen_const_mem (ptr_mode, addr);
12549 if (Pmode != ptr_mode)
12550 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12551 return emit_move_insn (reg, mem);
12554 static void
12555 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12556 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12557 tree function)
12559 CUMULATIVE_ARGS cum;
12560 int structure_value_byref = 0;
12561 rtx this_rtx, this_value, sibcall, funexp;
12562 rtx_insn *insns;
12563 tree funtype = TREE_TYPE (function);
12564 int simple_add = CONST_OK_FOR_ADD (delta);
12565 int did_load = 0;
12566 rtx scratch0, scratch1, scratch2;
12567 unsigned i;
12569 reload_completed = 1;
12570 epilogue_completed = 1;
12571 crtl->uses_only_leaf_regs = 1;
12573 emit_note (NOTE_INSN_PROLOGUE_END);
12575 /* Find the "this" pointer. We have such a wide range of ABIs for the
12576 SH that it's best to do this completely machine independently.
12577 "this" is passed as first argument, unless a structure return pointer
12578 comes first, in which case "this" comes second. */
12579 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12580 #ifndef PCC_STATIC_STRUCT_RETURN
12581 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12582 structure_value_byref = 1;
12583 #endif /* not PCC_STATIC_STRUCT_RETURN */
12584 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12586 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12588 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12590 this_rtx
12591 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12593 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12594 static chain pointer (even if you can't have nested virtual functions
12595 right now, someone might implement them sometime), and the rest of the
12596 registers are used for argument passing, are callee-saved, or reserved. */
12597 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12598 -ffixed-reg has been used. */
12599 if (! call_used_regs[0] || fixed_regs[0])
12600 error ("r0 needs to be available as a call-clobbered register");
12601 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12602 if (! TARGET_SH5)
12604 if (call_used_regs[1] && ! fixed_regs[1])
12605 scratch1 = gen_rtx_REG (ptr_mode, 1);
12606 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12607 pointing where to return struct values. */
12608 if (call_used_regs[3] && ! fixed_regs[3])
12609 scratch2 = gen_rtx_REG (Pmode, 3);
12611 else if (TARGET_SHMEDIA)
12613 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12614 if (i != REGNO (scratch0) &&
12615 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12617 scratch1 = gen_rtx_REG (ptr_mode, i);
12618 break;
12620 if (scratch1 == scratch0)
12621 error ("need a second call-clobbered general purpose register");
12622 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12623 if (call_used_regs[i] && ! fixed_regs[i])
12625 scratch2 = gen_rtx_REG (Pmode, i);
12626 break;
12628 if (scratch2 == scratch0)
12629 error ("need a call-clobbered target register");
12632 this_value = plus_constant (Pmode, this_rtx, delta);
12633 if (vcall_offset
12634 && (simple_add || scratch0 != scratch1)
12635 && strict_memory_address_p (ptr_mode, this_value))
12637 emit_load_ptr (scratch0, this_value);
12638 did_load = 1;
12641 if (!delta)
12642 ; /* Do nothing. */
12643 else if (simple_add)
12644 emit_move_insn (this_rtx, this_value);
12645 else
12647 emit_move_insn (scratch1, GEN_INT (delta));
12648 emit_insn (gen_add2_insn (this_rtx, scratch1));
12651 if (vcall_offset)
12653 rtx offset_addr;
12655 if (!did_load)
12656 emit_load_ptr (scratch0, this_rtx);
12658 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12659 if (strict_memory_address_p (ptr_mode, offset_addr))
12660 ; /* Do nothing. */
12661 else if (! TARGET_SH5 && scratch0 != scratch1)
12663 /* scratch0 != scratch1, and we have indexed loads. Get better
12664 schedule by loading the offset into r1 and using an indexed
12665 load - then the load of r1 can issue before the load from
12666 (this_rtx + delta) finishes. */
12667 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12668 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12670 else if (CONST_OK_FOR_ADD (vcall_offset))
12672 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12673 offset_addr = scratch0;
12675 else if (scratch0 != scratch1)
12677 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12678 emit_insn (gen_add2_insn (scratch0, scratch1));
12679 offset_addr = scratch0;
12681 else
12682 gcc_unreachable (); /* FIXME */
12683 emit_load_ptr (scratch0, offset_addr);
12685 if (Pmode != ptr_mode)
12686 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12687 emit_insn (gen_add2_insn (this_rtx, scratch0));
12690 /* Generate a tail call to the target function. */
12691 if (! TREE_USED (function))
12693 assemble_external (function);
12694 TREE_USED (function) = 1;
12696 funexp = XEXP (DECL_RTL (function), 0);
12697 /* If the function is overridden, so is the thunk, hence we don't
12698 need GOT addressing even if this is a public symbol. */
12699 #if 0
12700 if (TARGET_SH1 && ! flag_weak)
12701 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12702 else
12703 #endif
12704 if (TARGET_SH2 && flag_pic)
12706 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12707 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12709 else
12711 if (TARGET_SHMEDIA && flag_pic)
12713 funexp = gen_sym2PIC (funexp);
12714 PUT_MODE (funexp, Pmode);
12716 emit_move_insn (scratch2, funexp);
12717 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12718 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12720 sibcall = emit_call_insn (sibcall);
12721 SIBLING_CALL_P (sibcall) = 1;
12722 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12723 emit_barrier ();
12725 /* Run just enough of rest_of_compilation to do scheduling and get
12726 the insns emitted. Note that use_thunk calls
12727 assemble_start_function and assemble_end_function. */
12729 insns = get_insns ();
12731 if (optimize > 0)
12733 if (! cfun->cfg)
12734 init_flow (cfun);
12735 split_all_insns_noflow ();
12738 sh_reorg ();
12739 shorten_branches (insns);
12740 final_start_function (insns, file, 1);
12741 final (insns, file, 1);
12742 final_end_function ();
12744 reload_completed = 0;
12745 epilogue_completed = 0;
12749 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12751 rtx sym;
12753 /* If this is not an ordinary function, the name usually comes from a
12754 string literal or an sprintf buffer. Make sure we use the same
12755 string consistently, so that cse will be able to unify address loads. */
12756 if (kind != FUNCTION_ORDINARY)
12757 name = IDENTIFIER_POINTER (get_identifier (name));
12758 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12759 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12760 if (flag_pic)
12761 switch (kind)
12763 case FUNCTION_ORDINARY:
12764 break;
12765 case SFUNC_GOT:
12767 rtx reg = target ? target : gen_reg_rtx (Pmode);
12769 emit_insn (gen_symGOT2reg (reg, sym));
12770 sym = reg;
12771 break;
12773 case SFUNC_STATIC:
12775 /* ??? To allow cse to work, we use GOTOFF relocations.
12776 We could add combiner patterns to transform this into
12777 straight pc-relative calls with sym2PIC / bsrf when
12778 label load and function call are still 1:1 and in the
12779 same basic block during combine. */
12780 rtx reg = target ? target : gen_reg_rtx (Pmode);
12782 emit_insn (gen_symGOTOFF2reg (reg, sym));
12783 sym = reg;
12784 break;
12787 if (target && sym != target)
12789 emit_move_insn (target, sym);
12790 return target;
12792 return sym;
12795 /* Find the number of a general purpose register in S. */
12796 static int
12797 scavenge_reg (HARD_REG_SET *s)
12799 int r;
12800 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12801 if (TEST_HARD_REG_BIT (*s, r))
12802 return r;
12803 return -1;
12807 sh_get_pr_initial_val (void)
12809 rtx val;
12811 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12812 PR register on SHcompact, because it might be clobbered by the prologue.
12813 We check first if that is known to be the case. */
12814 if (TARGET_SHCOMPACT
12815 && ((crtl->args.info.call_cookie
12816 & ~ CALL_COOKIE_RET_TRAMP (1))
12817 || crtl->saves_all_registers))
12818 return gen_frame_mem (SImode, return_address_pointer_rtx);
12820 /* If we haven't finished rtl generation, there might be a nonlocal label
12821 that we haven't seen yet.
12822 ??? get_hard_reg_initial_val fails if it is called after register
12823 allocation has started, unless it has been called before for the
12824 same register. And even then, we end in trouble if we didn't use
12825 the register in the same basic block before. So call
12826 get_hard_reg_initial_val now and wrap it in an unspec if we might
12827 need to replace it. */
12828 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12829 combine can put the pseudo returned by get_hard_reg_initial_val into
12830 instructions that need a general purpose registers, which will fail to
12831 be recognized when the pseudo becomes allocated to PR. */
12833 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12834 if (TARGET_SH1)
12835 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12836 return val;
12839 bool
12840 sh_expand_t_scc (rtx operands[])
12842 enum rtx_code code = GET_CODE (operands[1]);
12843 rtx target = operands[0];
12844 rtx op0 = operands[2];
12845 rtx op1 = operands[3];
12846 rtx result = target;
12847 HOST_WIDE_INT val;
12849 if (!REG_P (op0) || REGNO (op0) != T_REG
12850 || !CONST_INT_P (op1))
12851 return false;
12852 if (!REG_P (result))
12853 result = gen_reg_rtx (SImode);
12854 val = INTVAL (op1);
12855 if ((code == EQ && val == 1) || (code == NE && val == 0))
12856 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12857 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12858 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12859 else if (code == EQ || code == NE)
12860 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12861 else
12862 return false;
12863 if (result != target)
12864 emit_move_insn (target, result);
12865 return true;
12868 /* INSN is an sfunc; return the rtx that describes the address used. */
12869 static rtx
12870 extract_sfunc_addr (rtx insn)
12872 rtx pattern, part = NULL_RTX;
12873 int len, i;
12875 pattern = PATTERN (insn);
12876 len = XVECLEN (pattern, 0);
12877 for (i = 0; i < len; i++)
12879 part = XVECEXP (pattern, 0, i);
12880 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12881 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12882 return XEXP (part, 0);
12884 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12885 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12888 /* Verify that the register in use_sfunc_addr still agrees with the address
12889 used in the sfunc. This prevents fill_slots_from_thread from changing
12890 use_sfunc_addr.
12891 INSN is the use_sfunc_addr instruction, and REG is the register it
12892 guards. */
12893 bool
12894 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12896 /* Search for the sfunc. It should really come right after INSN. */
12897 while ((insn = NEXT_INSN (insn)))
12899 if (LABEL_P (insn) || JUMP_P (insn))
12900 break;
12901 if (! INSN_P (insn))
12902 continue;
12904 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12905 insn = seq->insn (0);
12906 if (GET_CODE (PATTERN (insn)) != PARALLEL
12907 || get_attr_type (insn) != TYPE_SFUNC)
12908 continue;
12909 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12911 gcc_unreachable ();
12914 /* This function returns a constant rtx that represents 2**15 / pi in
12915 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12916 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12917 static GTY(()) rtx sh_fsca_sf2int_rtx;
12920 sh_fsca_sf2int (void)
12922 if (! sh_fsca_sf2int_rtx)
12924 REAL_VALUE_TYPE rv;
12926 real_from_string (&rv, "10430.378350470453");
12927 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12930 return sh_fsca_sf2int_rtx;
12933 /* This function returns a constant rtx that represents pi / 2**15 in
12934 SFmode. It's used to scale SFmode angles, in radians, to a
12935 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12936 maps to 0x10000. */
12937 static GTY(()) rtx sh_fsca_int2sf_rtx;
12940 sh_fsca_int2sf (void)
12942 if (! sh_fsca_int2sf_rtx)
12944 REAL_VALUE_TYPE rv;
12946 real_from_string (&rv, "9.587379924285257e-5");
12947 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12950 return sh_fsca_int2sf_rtx;
12953 /* Initialize the CUMULATIVE_ARGS structure. */
12954 void
12955 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12956 tree fntype,
12957 rtx libname ATTRIBUTE_UNUSED,
12958 tree fndecl,
12959 signed int n_named_args,
12960 machine_mode mode)
12962 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12963 pcum->free_single_fp_reg = 0;
12964 pcum->stack_regs = 0;
12965 pcum->byref_regs = 0;
12966 pcum->byref = 0;
12967 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12969 /* XXX - Should we check TARGET_HITACHI here ??? */
12970 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12972 if (fntype)
12974 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12975 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12976 pcum->prototype_p = prototype_p (fntype);
12977 pcum->arg_count [(int) SH_ARG_INT]
12978 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12980 pcum->call_cookie
12981 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12982 && pcum->arg_count [(int) SH_ARG_INT] == 0
12983 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12984 ? int_size_in_bytes (TREE_TYPE (fntype))
12985 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12986 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12987 == FIRST_RET_REG));
12989 else
12991 pcum->arg_count [(int) SH_ARG_INT] = 0;
12992 pcum->prototype_p = FALSE;
12993 if (mode != VOIDmode)
12995 pcum->call_cookie =
12996 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12997 && GET_MODE_SIZE (mode) > 4
12998 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
13000 /* If the default ABI is the Renesas ABI then all library
13001 calls must assume that the library will be using the
13002 Renesas ABI. So if the function would return its result
13003 in memory then we must force the address of this memory
13004 block onto the stack. Ideally we would like to call
13005 targetm.calls.return_in_memory() here but we do not have
13006 the TYPE or the FNDECL available so we synthesize the
13007 contents of that function as best we can. */
13008 pcum->force_mem =
13009 (TARGET_DEFAULT & MASK_HITACHI)
13010 && (mode == BLKmode
13011 || (GET_MODE_SIZE (mode) > 4
13012 && !(mode == DFmode
13013 && TARGET_FPU_DOUBLE)));
13015 else
13017 pcum->call_cookie = 0;
13018 pcum->force_mem = FALSE;
13024 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
13026 enum rtx_code code = TRUNCATE;
13028 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
13030 rtx inner = XEXP (x, 0);
13031 machine_mode inner_mode = GET_MODE (inner);
13033 if (inner_mode == mode)
13034 return inner;
13035 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
13036 x = inner;
13037 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
13038 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
13040 code = GET_CODE (x);
13041 x = inner;
13044 return gen_rtx_fmt_e (code, mode, x);
13047 /* Look through X cleaning up truncates of registers that span multiple
13048 actual hard registers. Return the number of changes made. */
13050 shmedia_cleanup_truncate (rtx x)
13052 int n_changes = 0;
13053 subrtx_var_iterator::array_type array;
13054 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
13056 rtx x = *iter;
13057 if (GET_CODE (x) == TRUNCATE)
13059 rtx reg = XEXP (x, 0);
13060 machine_mode reg_mode = GET_MODE (reg);
13061 if (REG_P (reg) && GET_MODE_SIZE (reg_mode) > 8)
13063 int offset = subreg_lowpart_offset (DImode, reg_mode);
13064 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, offset);
13065 n_changes += 1;
13066 iter.skip_subrtxes ();
13070 return n_changes;
13073 /* Load and store depend on the highpart of the address. However,
13074 set_attr_alternative does not give well-defined results before reload,
13075 so we must look at the rtl ourselves to see if any of the feeding
13076 registers is used in a memref.
13078 Return true iff INSN contains a MEM. */
13079 bool
13080 sh_contains_memref_p (rtx insn)
13082 subrtx_iterator::array_type array;
13083 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13084 if (MEM_P (*iter))
13085 return true;
13086 return false;
13089 /* Return true iff INSN loads a banked register. */
13090 bool
13091 sh_loads_bankedreg_p (rtx insn)
13093 if (GET_CODE (PATTERN (insn)) == SET)
13095 rtx op = SET_DEST (PATTERN(insn));
13096 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13097 return true;
13100 return false;
13103 /* FNADDR is the MEM expression from a call expander. Return an address
13104 to use in an SHmedia insn pattern. */
13106 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13108 int is_sym;
13110 fnaddr = XEXP (fnaddr, 0);
13111 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13112 if (flag_pic && is_sym)
13114 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13116 rtx reg = gen_reg_rtx (Pmode);
13118 /* We must not use GOTPLT for sibcalls, because PIC_REG
13119 must be restored before the PLT code gets to run. */
13120 if (is_sibcall)
13121 emit_insn (gen_symGOT2reg (reg, fnaddr));
13122 else
13123 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13124 fnaddr = reg;
13126 else
13128 fnaddr = gen_sym2PIC (fnaddr);
13129 PUT_MODE (fnaddr, Pmode);
13132 /* If ptabs might trap, make this visible to the rest of the compiler.
13133 We generally assume that symbols pertain to valid locations, but
13134 it is possible to generate invalid symbols with asm or linker tricks.
13135 In a list of functions where each returns its successor, an invalid
13136 symbol might denote an empty list. */
13137 if (!TARGET_PT_FIXED
13138 && (!is_sym || TARGET_INVALID_SYMBOLS)
13139 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13141 rtx tr = gen_reg_rtx (PDImode);
13143 emit_insn (gen_ptabs (tr, fnaddr));
13144 fnaddr = tr;
13146 else if (! target_reg_operand (fnaddr, Pmode))
13147 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13148 return fnaddr;
13151 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13152 static reg_class_t
13153 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13155 if (rclass == NO_REGS
13156 && TARGET_SHMEDIA
13157 && (CONST_DOUBLE_P (x)
13158 || GET_CODE (x) == SYMBOL_REF
13159 || PIC_ADDR_P (x)))
13160 return GENERAL_REGS;
13162 return rclass;
13165 /* Implement TARGET_SECONDARY_RELOAD. */
13166 static reg_class_t
13167 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13168 machine_mode mode, secondary_reload_info *sri)
13170 enum reg_class rclass = (enum reg_class) rclass_i;
13172 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13173 && REG_P (XEXP (XEXP (x, 0), 0))
13174 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13175 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13177 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13178 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13180 if (REG_P (x) && REGNO (x) == GBR_REG)
13181 return NO_REGS;
13183 if (in_p)
13185 if (REGCLASS_HAS_FP_REG (rclass)
13186 && ! TARGET_SHMEDIA
13187 && immediate_operand ((x), mode)
13188 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
13189 switch (mode)
13191 case SFmode:
13192 sri->icode = CODE_FOR_reload_insf__frn;
13193 return NO_REGS;
13194 case DFmode:
13195 sri->icode = CODE_FOR_reload_indf__frn;
13196 return NO_REGS;
13197 case SImode:
13198 /* ??? If we knew that we are in the appropriate mode -
13199 single precision - we could use a reload pattern directly. */
13200 return FPUL_REGS;
13201 default:
13202 abort ();
13204 if (rclass == FPUL_REGS
13205 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13206 || REGNO (x) == T_REG))
13207 || GET_CODE (x) == PLUS))
13208 return GENERAL_REGS;
13209 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13211 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13212 return GENERAL_REGS;
13213 else if (mode == SFmode)
13214 return FP_REGS;
13215 sri->icode = CODE_FOR_reload_insi__i_fpul;
13216 return NO_REGS;
13218 if (rclass == FPSCR_REGS
13219 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13220 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13221 return GENERAL_REGS;
13222 if (REGCLASS_HAS_FP_REG (rclass)
13223 && TARGET_SHMEDIA
13224 && immediate_operand (x, mode)
13225 && x != CONST0_RTX (GET_MODE (x))
13226 && GET_MODE (x) != V4SFmode)
13227 return GENERAL_REGS;
13228 if ((mode == QImode || mode == HImode)
13229 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13231 sri->icode = ((mode == QImode)
13232 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13233 return NO_REGS;
13235 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13236 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13237 return TARGET_REGS;
13238 } /* end of input-only processing. */
13240 if (((REGCLASS_HAS_FP_REG (rclass)
13241 && (REG_P (x)
13242 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13243 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13244 && TARGET_FMOVD))))
13245 || (REGCLASS_HAS_GENERAL_REG (rclass)
13246 && REG_P (x)
13247 && FP_REGISTER_P (REGNO (x))))
13248 && ! TARGET_SHMEDIA
13249 && (mode == SFmode || mode == SImode))
13250 return FPUL_REGS;
13251 if ((rclass == FPUL_REGS
13252 || (REGCLASS_HAS_FP_REG (rclass)
13253 && ! TARGET_SHMEDIA && mode == SImode))
13254 && (MEM_P (x)
13255 || (REG_P (x)
13256 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13257 || REGNO (x) == T_REG
13258 || system_reg_operand (x, VOIDmode)))))
13260 if (rclass == FPUL_REGS)
13261 return GENERAL_REGS;
13262 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
13264 if ((rclass == TARGET_REGS
13265 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13266 && !satisfies_constraint_Csy (x)
13267 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13268 return GENERAL_REGS;
13269 if ((rclass == MAC_REGS || rclass == PR_REGS)
13270 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13271 && rclass != REGNO_REG_CLASS (REGNO (x)))
13272 return GENERAL_REGS;
13273 if (rclass != GENERAL_REGS && REG_P (x)
13274 && TARGET_REGISTER_P (REGNO (x)))
13275 return GENERAL_REGS;
13277 /* If here fall back to loading FPUL register through general registers.
13278 This case can happen when movsi_ie insn is picked initially to
13279 load/store the FPUL register from/to another register, and then the
13280 other register is allocated on the stack. */
13281 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13282 return GENERAL_REGS;
13284 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13285 the other operand.
13286 On SH2A could also just leave it alone here, which would result in a
13287 4 byte move insn being generated instead. However, for this to work
13288 the insns must have the appropriate alternatives. */
13289 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13290 && satisfies_constraint_Sdd (x)
13291 && sh_disp_addr_displacement (x)
13292 <= sh_max_mov_insn_displacement (mode, false))
13293 return R0_REGS;
13295 /* When reload is trying to address a QImode or HImode subreg on the stack,
13296 force any subreg byte into R0_REGS, as this is going to become a
13297 displacement address.
13298 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13299 is on the stack, the memref to it might already require a displacement
13300 and that has to be added to the final address. At this point we don't
13301 know the cumulative displacement so we assume the worst case. */
13302 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13303 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13304 return R0_REGS;
13306 return NO_REGS;
13309 /* Return true if SUBST can't safely replace its equivalent during RA. */
13310 static bool
13311 sh_cannot_substitute_mem_equiv_p (rtx)
13313 if (TARGET_SHMEDIA)
13314 return false;
13316 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
13317 uses R0 and may cause spill failure when R0 is already used.
13318 We have to return true for that case at least.
13319 Moreover SH has strong R0 parity and also have not enough numbers of
13320 the hard registers to make the equiv substitution win in the size
13321 and the speed on average working sets. The pseudos produced to
13322 hold the equiv values can't get good hard registers for bad cases
13323 and end up memory save/restore insns which make the code worse. */
13324 return true;
13327 /* Return true if DISP can be legitimized. */
13328 static bool
13329 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
13330 machine_mode mode)
13332 if (TARGET_SHMEDIA)
13333 return false;
13335 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
13336 || (TARGET_SH2E && mode == SFmode))
13337 return false;
13339 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
13340 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
13342 *disp = adj.mov_disp;
13343 *offs = adj.offset_adjust;
13344 return true;
13347 return false;
13350 /* Return true if movsf insn should be splited with an additional
13351 register. */
13352 bool
13353 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
13355 /* op0 == op1 */
13356 if (rtx_equal_p (op0, op1))
13357 return true;
13358 /* fy, FQ, reg */
13359 if (GET_CODE (op1) == CONST_DOUBLE
13360 && ! satisfies_constraint_G (op1)
13361 && ! satisfies_constraint_H (op1)
13362 && REG_P (op0)
13363 && REG_P (op2))
13364 return true;
13365 /* f, r, y */
13366 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
13367 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
13368 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13369 return true;
13370 /* r, f, y */
13371 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
13372 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
13373 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13374 return true;
13376 return false;
13379 static void
13380 sh_conditional_register_usage (void)
13382 int regno;
13383 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13384 if (! VALID_REGISTER_P (regno))
13385 fixed_regs[regno] = call_used_regs[regno] = 1;
13386 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13387 if (TARGET_SH5)
13389 call_used_regs[FIRST_GENERAL_REG + 8]
13390 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13391 call_really_used_regs[FIRST_GENERAL_REG + 8]
13392 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13394 if (TARGET_SHMEDIA)
13396 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13397 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13398 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13400 if (flag_pic)
13402 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13403 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13405 /* Renesas saves and restores mac registers on call. */
13406 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13408 call_really_used_regs[MACH_REG] = 0;
13409 call_really_used_regs[MACL_REG] = 0;
13412 if (TARGET_SHMEDIA)
13414 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13415 if (! fixed_regs[regno] && call_really_used_regs[regno])
13416 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13418 else
13419 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13420 if (! fixed_regs[regno] && call_really_used_regs[regno])
13421 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13423 call_really_used_regs[FPSCR_MODES_REG] = 0;
13424 call_really_used_regs[FPSCR_STAT_REG] = 0;
13427 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13429 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13430 static bool
13431 sh_legitimate_constant_p (machine_mode mode, rtx x)
13433 return (TARGET_SHMEDIA
13434 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13435 || x == CONST0_RTX (mode)
13436 || !TARGET_SHMEDIA_FPU
13437 || TARGET_SHMEDIA64)
13438 : (GET_CODE (x) != CONST_DOUBLE
13439 || mode == DFmode || mode == SFmode
13440 || mode == DImode || GET_MODE (x) == VOIDmode));
13443 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13445 static void
13446 sh_init_sync_libfuncs (void)
13448 init_sync_libfuncs (UNITS_PER_WORD);
13451 /* Return true if it is appropriate to emit `ret' instructions in the
13452 body of a function. */
13453 bool
13454 sh_can_use_simple_return_p (void)
13456 HARD_REG_SET live_regs_mask;
13457 int d;
13459 /* Some targets require special return insns. */
13460 if (TARGET_SHMEDIA
13461 || (TARGET_SHCOMPACT
13462 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13463 return false;
13465 if (! reload_completed || frame_pointer_needed)
13466 return false;
13468 /* Moving prologue around does't reduce the size. */
13469 if (optimize_function_for_size_p (cfun))
13470 return false;
13472 /* Finally, allow for pr save. */
13473 d = calc_live_regs (&live_regs_mask);
13475 if (rounded_frame_size (d) > 4)
13476 return false;
13478 return true;
13481 /*------------------------------------------------------------------------------
13482 Address mode optimization support code
13485 typedef HOST_WIDE_INT disp_t;
13486 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13487 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13488 static const disp_t INVALID_DISP = MAX_DISP;
13490 /* A memory reference which is described by a base register and a
13491 displacement. */
13492 class base_reg_disp
13494 public:
13495 base_reg_disp (rtx br, disp_t d);
13497 bool is_reg (void) const;
13498 bool is_disp (void) const;
13499 rtx reg (void) const;
13500 disp_t disp (void) const;
13502 private:
13503 rtx reg_;
13504 disp_t disp_;
13507 inline
13508 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13509 : reg_ (br), disp_ (d)
13513 inline bool
13514 base_reg_disp::is_reg (void) const
13516 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13519 inline bool
13520 base_reg_disp::is_disp (void) const
13522 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13525 inline rtx
13526 base_reg_disp::reg (void) const
13528 return reg_;
13531 inline disp_t
13532 base_reg_disp::disp (void) const
13534 return disp_;
13537 /* Find the base register and calculate the displacement for a given
13538 address rtx 'x'. */
13539 static base_reg_disp
13540 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
13541 rtx base_reg = NULL)
13543 if (REG_P (x))
13545 if (REGNO (x) == GBR_REG)
13546 return base_reg_disp (x, disp);
13548 /* We've reached a hard-reg. This is probably the point where
13549 function args are copied to pseudos. Do not go any further and
13550 stick to the pseudo. If the original mem addr was in a hard reg
13551 from the beginning, it will become the base reg. */
13552 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13553 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13555 /* Find the def of the reg and trace it. If there are more than one
13556 defs and they are not the same, assume it's not safe to proceed. */
13557 rtx_insn* last_i = NULL;
13558 rtx last_set = NULL;
13559 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
13560 d = DF_REF_NEXT_REG (d))
13562 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
13564 /* Accept multiple defs, as long as they are equal. */
13565 if (last_set == NULL || rtx_equal_p (last_set, set))
13567 last_i = DF_REF_INSN (d);
13568 last_set = set;
13570 else
13572 last_i = NULL;
13573 last_set = NULL;
13574 break;
13578 if (last_set != NULL && last_i != NULL)
13579 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
13580 XEXP (last_set, 0));
13582 /* When here, no previous insn was found that sets the reg.
13583 The input reg is already the base reg. */
13584 return base_reg_disp (x, disp);
13587 else if (GET_CODE (x) == PLUS)
13589 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13590 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13592 /* Either left or right val must be a reg.
13593 We don't handle the case of 'reg + reg' here. */
13594 if (left_val.is_reg () && right_val.is_disp ())
13595 return base_reg_disp (left_val.reg (), left_val.disp ()
13596 + right_val.disp () + disp);
13597 else if (right_val.is_reg () && left_val.is_disp ())
13598 return base_reg_disp (right_val.reg (), right_val.disp ()
13599 + left_val.disp () + disp);
13600 else
13601 return base_reg_disp (base_reg, disp);
13604 else if (CONST_INT_P (x))
13605 return base_reg_disp (NULL, disp + INTVAL (x));
13607 /* Didn't find anything useful. */
13608 return base_reg_disp (base_reg, disp);
13611 /* Given an insn and a memory operand, try to find an equivalent GBR
13612 based memory address and return the corresponding new memory address.
13613 Return NULL_RTX if not found. */
13615 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
13617 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
13618 return NULL_RTX;
13620 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13621 if (side_effects_p (XEXP (mem, 0)))
13622 return NULL_RTX;
13624 /* When not optimizing there might be no dataflow available. */
13625 if (df == NULL)
13626 return NULL_RTX;
13628 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13630 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13632 /* If GBR is marked as call clobbered we bail out if we see a call.
13633 FIXME: Actually should check if this mem refers to the gbr value
13634 before or after the call. If there is a store_gbr preceeding this
13635 mem, it's safe to use GBR for this mem.
13637 If GBR is not marked as call clobbered, but there is some other
13638 def than a call, it's probably a load_gbr upon which we also
13639 bail out to be on the safe side.
13640 FIXME: Should check if we have a use-after-def case, such as
13641 the call case above. */
13642 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
13643 d = DF_REF_NEXT_REG (d))
13645 if (CALL_P (DF_REF_INSN (d)))
13647 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
13648 return NULL_RTX;
13649 else
13650 continue;
13652 else
13653 return NULL_RTX;
13656 rtx disp = GEN_INT (gbr_disp.disp ());
13657 if (gbr_displacement (disp, GET_MODE (mem)))
13658 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13661 return NULL_RTX;
13664 /*------------------------------------------------------------------------------
13665 Manual insn combine support code.
13668 /* Return true if the specified insn contains any UNSPECs or
13669 UNSPEC_VOLATILEs. */
13670 static bool
13671 sh_unspec_insn_p (rtx x)
13673 subrtx_iterator::array_type array;
13674 FOR_EACH_SUBRTX (i, array, x, ALL)
13675 if (*i != NULL
13676 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
13677 return true;
13679 return false;
13682 /* Return true if the register operands of the specified insn are modified
13683 between the specified from and to insns (exclusive of those two). */
13684 bool
13685 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
13686 const rtx_insn* from,
13687 const rtx_insn* to)
13689 /* FIXME: Return true for multiple sets for now. */
13690 rtx s = single_set (operands_insn);
13691 if (s == NULL_RTX)
13692 return true;
13694 subrtx_iterator::array_type array;
13695 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
13696 if (*i != NULL &&
13697 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
13698 return true;
13700 return false;
13703 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
13704 negates the T bit and stores the result in the T bit. */
13705 bool
13706 sh_is_nott_insn (const rtx_insn* i)
13708 return i != NULL && GET_CODE (PATTERN (i)) == SET
13709 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
13710 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
13714 sh_movt_set_dest (const rtx_insn* i)
13716 if (i == NULL)
13717 return NULL;
13719 const_rtx p = PATTERN (i);
13720 return GET_CODE (p) == SET
13721 && arith_reg_dest (XEXP (p, 0), SImode)
13722 && t_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13725 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
13726 that stores the negated T bit in a register, and return the destination
13727 register rtx, or null. */
13729 sh_movrt_set_dest (const rtx_insn* i)
13731 if (i == NULL)
13732 return NULL;
13734 const_rtx p = PATTERN (i);
13736 /* The negc movrt replacement is inside a parallel. */
13737 if (GET_CODE (p) == PARALLEL)
13738 p = XVECEXP (p, 0, 0);
13740 return GET_CODE (p) == SET
13741 && arith_reg_dest (XEXP (p, 0), SImode)
13742 && negt_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13745 /* Given an insn and a reg number, tell whether the reg dies or is unused
13746 after the insn. */
13747 bool
13748 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
13750 return find_regno_note (i, REG_DEAD, regno) != NULL
13751 || find_regno_note (i, REG_UNUSED, regno) != NULL;
13754 /* Given an insn and a reg number, remove reg dead or reg unused notes to
13755 mark it as being used after the insn. */
13756 void
13757 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
13759 if (rtx n = find_regno_note (i, REG_DEAD, regno))
13760 remove_note (i, n);
13761 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
13762 remove_note (i, n);
13765 /* Given an insn check if it contains any post/pre inc/dec mem operands and
13766 add the REG_INC notes accordingly.
13767 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
13768 FIXME: This function is currently used by peephole2 patterns because
13769 the peephole2 pass does not preserve REG_INC notes. If the notes
13770 are dropped the following passes will do wrong things. */
13771 rtx_insn*
13772 sh_check_add_incdec_notes (rtx_insn* i)
13774 struct for_each_inc_dec_clb
13776 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
13777 rtx dest, rtx src ATTRIBUTE_UNUSED,
13778 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
13780 gcc_assert (REG_P (dest));
13782 rtx_insn* i = (rtx_insn*)arg;
13783 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
13784 add_reg_note (i, REG_INC, dest);
13786 return 0;
13790 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
13791 return i;
13794 /* Given an op rtx and an insn, try to find out whether the result of the
13795 specified op consists only of logical operations on T bit stores. */
13796 bool
13797 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
13799 if (!logical_operator (op, SImode))
13800 return false;
13802 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13803 int op_is_t_count = 0;
13805 for (int i = 0; i < 2; ++i)
13807 if (t_reg_operand (ops[i], VOIDmode)
13808 || negt_reg_operand (ops[i], VOIDmode))
13809 op_is_t_count++;
13811 else
13813 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13814 prev_nonnote_insn_bb);
13815 if (op_set.set_src == NULL_RTX)
13816 continue;
13818 if (t_reg_operand (op_set.set_src, VOIDmode)
13819 || negt_reg_operand (op_set.set_src, VOIDmode)
13820 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13821 op_is_t_count++;
13825 return op_is_t_count == 2;
13828 /* Given the operand that is extended in a sign/zero extend insn, and the
13829 insn, try to figure out whether the sign/zero extension can be replaced
13830 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13831 NULL_RTX otherwise. */
13833 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
13835 if (REG_P (extended_op))
13836 extended_op = extended_op;
13837 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13838 extended_op = SUBREG_REG (extended_op);
13839 else
13840 return NULL_RTX;
13842 /* Reg moves must be of the same mode. */
13843 if (GET_MODE (extended_op) != SImode)
13844 return NULL_RTX;
13846 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13847 if (s.set_src == NULL_RTX)
13848 return NULL_RTX;
13850 if (t_reg_operand (s.set_src, VOIDmode)
13851 || negt_reg_operand (s.set_src, VOIDmode))
13852 return extended_op;
13854 /* If the zero extended reg was formed by a logical operation, check the
13855 operands of the logical operation. If both originated from T bit
13856 stores the zero extension can be eliminated. */
13857 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13858 return extended_op;
13860 return NULL_RTX;
13863 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
13864 figure out whether it should be converted into a movt-xor sequence in
13865 the movrt_negc splitter.
13866 Returns true if insns have been modified and the splitter has succeeded. */
13867 bool
13868 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
13870 /* In cases such as
13871 tst r4,r4
13872 mov #-1,r1
13873 negc r1,r1
13874 tst r4,r4
13875 we can replace the T bit clobbering negc with a movt-xor sequence and
13876 eliminate the redundant comparison.
13877 Because the xor insn depends on register allocation results, allow this
13878 only before reload. */
13879 if (!can_create_pseudo_p ())
13880 return false;
13882 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13883 prev_nonnote_insn_bb);
13884 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13885 next_nonnote_insn_bb);
13887 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
13888 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
13889 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
13890 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
13891 t_before_negc.insn,
13892 t_after_negc.insn)
13893 && !sh_unspec_insn_p (t_after_negc.insn)
13894 && !volatile_insn_p (PATTERN (t_after_negc.insn))
13895 && !side_effects_p (PATTERN (t_after_negc.insn))
13896 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
13898 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
13899 set_insn_deleted (t_after_negc.insn);
13900 return true;
13902 else
13903 return false;
13906 /* Given a reg and the current insn, see if the value of the reg originated
13907 from a sign or zero extension and return the discovered information. */
13908 sh_extending_set_of_reg
13909 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
13911 if (reg == NULL)
13912 return sh_extending_set_of_reg (curr_insn);
13914 if (SUBREG_P (reg))
13915 reg = SUBREG_REG (reg);
13917 if (!REG_P (reg))
13918 return sh_extending_set_of_reg (curr_insn);
13920 /* FIXME: Also search the predecessor basic blocks. It seems that checking
13921 only the adjacent predecessor blocks would cover most of the cases.
13922 Also try to look through the first extension that we hit. There are some
13923 cases, where a zero_extend is followed an (implicit) sign_extend, and it
13924 fails to see the sign_extend. */
13925 sh_extending_set_of_reg result =
13926 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
13928 if (result.set_src != NULL)
13930 if (GET_CODE (result.set_src) == SIGN_EXTEND
13931 || GET_CODE (result.set_src) == ZERO_EXTEND)
13933 if (dump_file)
13934 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13935 "explicitly sign/zero extended in insn %d\n",
13936 REGNO (reg), INSN_UID (result.insn));
13937 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
13938 result.ext_code = GET_CODE (result.set_src);
13940 else if (MEM_P (result.set_src)
13941 && (GET_MODE (result.set_src) == QImode
13942 || GET_MODE (result.set_src) == HImode)
13943 && !sh_unspec_insn_p (result.insn))
13945 /* On SH QIHImode memory loads always sign extend. However, in
13946 some cases where it seems that the higher bits are not
13947 interesting, the loads will not be expanded as sign extending
13948 insns, but as QIHImode loads into QIHImode regs. We report that
13949 the reg has been sign extended by the mem load. When it is used
13950 as such, we must convert the mem load into a sign extending insn,
13951 see also sh_extending_set_of_reg::use_as_extended_reg. */
13952 if (dump_file)
13953 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13954 "implicitly sign extended in insn %d\n",
13955 REGNO (reg), INSN_UID (result.insn));
13956 result.from_mode = GET_MODE (result.set_src);
13957 result.ext_code = SIGN_EXTEND;
13961 return result;
13964 /* Given a reg that is known to be sign or zero extended at some insn,
13965 take the appropriate measures so that the extended value can be used as
13966 a reg at the specified insn and return the resulting reg rtx. */
13968 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
13970 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
13971 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
13972 gcc_assert (from_mode == QImode || from_mode == HImode);
13974 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
13976 if (dump_file)
13977 fprintf (dump_file,
13978 "use_as_extended_reg: converting non-extending mem load in "
13979 "insn %d into sign-extending load\n", INSN_UID (insn));
13981 rtx r = gen_reg_rtx (SImode);
13982 rtx_insn* i0;
13983 if (from_mode == QImode)
13984 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
13985 else if (from_mode == HImode)
13986 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
13987 else
13988 gcc_unreachable ();
13990 emit_insn_after (
13991 gen_move_insn (XEXP (set_rtx, 0),
13992 gen_lowpart (GET_MODE (set_src), r)), i0);
13993 set_insn_deleted (insn);
13994 return r;
13996 else
13998 rtx extension_dst = XEXP (set_rtx, 0);
13999 if (modified_between_p (extension_dst, insn, use_at_insn))
14001 if (dump_file)
14002 fprintf (dump_file,
14003 "use_as_extended_reg: dest reg %d of extending insn %d is "
14004 "modified, inserting a reg-reg copy\n",
14005 REGNO (extension_dst), INSN_UID (insn));
14007 rtx r = gen_reg_rtx (SImode);
14008 emit_insn_after (gen_move_insn (r, extension_dst), insn);
14009 return r;
14011 else
14013 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
14014 return extension_dst;
14019 bool
14020 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
14022 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
14023 && (from_mode == QImode || from_mode == HImode)
14024 && set_src != NULL)
14025 return arith_reg_operand (XEXP (set_src, 0), from_mode);
14026 else
14027 return false;
14031 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
14033 gcc_assert (can_use_as_unextended_reg ());
14035 rtx r = XEXP (set_src, 0);
14036 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
14038 if (modified_between_p (r, insn, use_at_insn))
14040 rtx r1 = gen_reg_rtx (SImode);
14041 emit_insn_after (gen_move_insn (r1, r0), insn);
14042 return r1;
14044 else
14046 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
14047 ? REGNO (SUBREG_REG (r))
14048 : REGNO (r));
14049 return r0;
14053 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
14054 perform the necessary checks on the operands and split it accordingly. */
14055 void
14056 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
14057 int subreg_offset, rtx operands[])
14059 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
14061 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
14062 curr_insn);
14063 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
14064 curr_insn);
14066 /* If one of the operands is known to be zero extended, that's already
14067 sufficient to mask out the unwanted high bits. */
14068 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
14070 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14071 operands[1]));
14072 return;
14074 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
14076 emit_insn (gen_tstsi_t (operands[0],
14077 eop1.use_as_extended_reg (curr_insn)));
14078 return;
14081 /* None of the operands seem to be zero extended.
14082 If both are sign extended it's OK, too. */
14083 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
14084 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
14086 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14087 eop1.use_as_extended_reg (curr_insn)));
14088 return;
14091 /* Otherwise we have to insert a zero extension on one of the operands to
14092 mask out the unwanted high bits.
14093 Prefer the operand that has no known extension. */
14094 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
14095 std::swap (operands[0], operands[1]);
14097 rtx tmp0 = gen_reg_rtx (SImode);
14098 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
14099 GET_MODE (operands[0]), subreg_offset);
14100 emit_insn (subreg_mode == QImode
14101 ? gen_zero_extendqisi2 (tmp0, tmp1)
14102 : gen_zero_extendhisi2 (tmp0, tmp1));
14103 emit_insn (gen_tstsi_t (tmp0, operands[1]));
14106 /* A helper class to increment/decrement a counter variable each time a
14107 function is entered/left. */
14108 class scope_counter
14110 public:
14111 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
14113 ~scope_counter (void)
14115 --m_counter;
14116 gcc_assert (m_counter >= 0);
14119 int count (void) const { return m_counter; }
14121 private:
14122 int& m_counter;
14125 /* Given an rtx x, determine whether the expression can be used to create
14126 an insn that calulates x and stores the result in the T bit.
14127 This is used by the 'treg_set_expr' predicate to construct insns sequences
14128 where T bit results are fed into other insns, such as addc, subc, negc
14129 insns.
14131 FIXME: The patterns that expand 'treg_set_expr' operands tend to
14132 distinguish between 'positive' and 'negative' forms. For now this has to
14133 be done in the preparation code. We could also introduce
14134 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
14135 two different patterns for the 'postive' and 'negative' forms. However,
14136 the total amount of lines of code seems to be about the same and the
14137 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
14138 recog function would need to look inside the expression by temporarily
14139 splitting it. */
14140 static int sh_recog_treg_set_expr_reent_count = 0;
14142 bool
14143 sh_recog_treg_set_expr (rtx op, machine_mode mode)
14145 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
14147 /* Limit the recursion count to avoid nested expressions which we can't
14148 resolve to a single treg set insn. */
14149 if (recursion.count () > 1)
14150 return false;
14152 /* Early accept known possible operands before doing recog. */
14153 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode))
14154 return true;
14156 /* Early reject impossible operands before doing recog.
14157 There are some (set ((t) (subreg ...))) patterns, but we must be careful
14158 not to allow any invalid reg-reg or mem-reg moves, or else other passes
14159 such as lower-subreg will bail out. Some insns such as SH4A movua are
14160 done with UNSPEC, so must reject those, too, or else it would result
14161 in an invalid reg -> treg move. */
14162 if (register_operand (op, mode) || memory_operand (op, mode)
14163 || sh_unspec_insn_p (op))
14164 return false;
14166 if (!can_create_pseudo_p ())
14167 return false;
14169 /* We are going to invoke recog in a re-entrant way and thus
14170 have to capture its current state and restore it afterwards. */
14171 recog_data_d prev_recog_data = recog_data;
14173 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
14174 SET_PREV_INSN (i) = NULL;
14175 SET_NEXT_INSN (i) = NULL;
14177 int result = recog (PATTERN (i), i, 0);
14179 /* It seems there is no insn like that. Create a simple negated
14180 version and try again. If we hit a negated form, we'll allow that
14181 and append a nott sequence when splitting out the insns. Insns that
14182 do the split can then remove the trailing nott if they know how to
14183 deal with it. */
14184 if (result < 0 && GET_CODE (op) == EQ)
14186 PUT_CODE (op, NE);
14187 result = recog (PATTERN (i), i, 0);
14188 PUT_CODE (op, EQ);
14190 if (result < 0 && GET_CODE (op) == NE)
14192 PUT_CODE (op, EQ);
14193 result = recog (PATTERN (i), i, 0);
14194 PUT_CODE (op, NE);
14197 recog_data = prev_recog_data;
14198 return result >= 0;
14201 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
14202 This can be used as a condition for insn/split patterns to allow certain
14203 T bit setting patters only to be matched as sub expressions of other
14204 patterns. */
14205 bool
14206 sh_in_recog_treg_set_expr (void)
14208 return sh_recog_treg_set_expr_reent_count > 0;
14211 /* Given an rtx x, which is assumed to be some expression that has been
14212 matched by the 'treg_set_expr' predicate before, split and emit the
14213 insns that are necessary to calculate the expression and store the result
14214 in the T bit.
14215 The splitting is done recursively similar to 'try_split' in emit-rt.c.
14216 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
14217 'delete_insn' which then causes the DF parts to bail out, because we
14218 currently are inside another gen_split* function and would invoke
14219 'try_split' in a reentrant way. */
14220 static std::pair<rtx_insn*, rtx_insn*>
14221 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
14223 if (dump_file)
14225 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
14226 print_rtl_single (dump_file, i);
14227 fprintf (dump_file, "\n");
14230 rtx_insn* seq = safe_as_a<rtx_insn*> (split_insns (PATTERN (i), curr_insn));
14232 if (seq == NULL)
14233 return std::make_pair (i, i);
14235 /* Avoid infinite splitter loops if any insn of the result matches
14236 the original pattern. */
14237 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
14238 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
14239 return std::make_pair (i, i);
14241 unshare_all_rtl_in_chain (seq);
14243 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
14244 a linked list, replace the single insn with the new insns. */
14245 rtx_insn* seqlast = seq;
14246 while (NEXT_INSN (seqlast) != NULL)
14247 seqlast = NEXT_INSN (seqlast);
14249 if (rtx_insn* iprev = PREV_INSN (i))
14250 SET_NEXT_INSN (iprev) = seq;
14251 if (rtx_insn* inext = NEXT_INSN (i))
14252 SET_PREV_INSN (inext) = seqlast;
14254 SET_PREV_INSN (seq) = PREV_INSN (i);
14255 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
14257 SET_PREV_INSN (i) = NULL;
14258 SET_NEXT_INSN (i) = NULL;
14260 /* Recursively split all insns. */
14261 for (i = seq; ; i = NEXT_INSN (i))
14263 std::pair<rtx_insn*, rtx_insn*> ii =
14264 sh_try_split_insn_simple (i, curr_insn, n + 1);
14265 if (i == seq)
14266 seq = ii.first;
14267 if (i == seqlast)
14269 seqlast = ii.second;
14270 break;
14272 i = ii.first;
14275 return std::make_pair (seq, seqlast);
14278 sh_treg_insns
14279 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
14281 if (t_reg_operand (x, VOIDmode))
14282 return sh_treg_insns ();
14284 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
14286 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
14287 SET_PREV_INSN (i) = NULL;
14288 SET_NEXT_INSN (i) = NULL;
14290 if (dump_file)
14292 fprintf (dump_file, "split_treg_set_expr insn:\n");
14293 print_rtl (dump_file, i);
14294 fprintf (dump_file, "\n");
14297 /* We are going to invoke recog/split_insns in a re-entrant way and thus
14298 have to capture its current state and restore it afterwards. */
14299 recog_data_d prev_recog_data = recog_data;
14301 int insn_code = recog (PATTERN (i), i, 0);
14303 /* If the insn was not found, see if we matched the negated form before
14304 and append a nott. */
14305 bool append_nott = false;
14307 if (insn_code < 0 && GET_CODE (x) == EQ)
14309 PUT_CODE (x, NE);
14310 insn_code = recog (PATTERN (i), i, 0);
14311 if (insn_code >= 0)
14312 append_nott = true;
14313 else
14314 PUT_CODE (x, EQ);
14316 if (insn_code < 0 && GET_CODE (x) == NE)
14318 PUT_CODE (x, EQ);
14319 insn_code = recog (PATTERN (i), i, 0);
14320 if (insn_code >= 0)
14321 append_nott = true;
14322 else
14323 PUT_CODE (x, NE);
14326 gcc_assert (insn_code >= 0);
14328 /* Try to recursively split the insn. Some insns might refuse to split
14329 any further while we are in the treg_set_expr splitting phase. They
14330 will be emitted as part of the outer insn and then split again. */
14331 std::pair<rtx_insn*, rtx_insn*> insnlist =
14332 sh_try_split_insn_simple (i, curr_insn);
14334 /* Restore recog state. */
14335 recog_data = prev_recog_data;
14337 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
14338 ? insnlist.second
14339 : NULL;
14340 if (dump_file)
14342 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
14343 print_rtl (dump_file, insnlist.first);
14344 fprintf (dump_file, "\n");
14346 if (nott_insn != NULL)
14347 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
14350 emit_insn (insnlist.first);
14352 if (nott_insn != NULL && append_nott)
14354 if (dump_file)
14355 fprintf (dump_file, "removing trailing nott\n");
14356 remove_insn (nott_insn);
14357 nott_insn = NULL;
14358 append_nott = false;
14361 if (append_nott)
14362 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
14364 rtx_insn* first_insn = get_insns ();
14366 if (dump_file)
14368 fprintf (dump_file, "resulting insns:\n");
14369 print_rtl (dump_file, first_insn);
14370 fprintf (dump_file, "\n");
14373 return sh_treg_insns (first_insn, nott_insn);
14376 /*------------------------------------------------------------------------------
14377 Mode switching support code.
14380 static void
14381 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
14382 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14384 if ((TARGET_SH4A_FP || TARGET_SH4_300)
14385 && prev_mode != FP_MODE_NONE && prev_mode != mode)
14387 emit_insn (gen_toggle_pr ());
14388 if (TARGET_FMOVD)
14389 emit_insn (gen_toggle_sz ());
14391 else if (mode != FP_MODE_NONE)
14393 rtx tmp = gen_reg_rtx (SImode);
14394 emit_insn (gen_sts_fpscr (tmp));
14395 rtx i = NULL;
14397 const unsigned HOST_WIDE_INT fpbits =
14398 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
14400 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
14401 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14402 else if (mode == FP_MODE_SINGLE)
14403 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
14404 else if (mode == FP_MODE_DOUBLE)
14405 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14406 else
14407 gcc_unreachable ();
14409 emit_insn (i);
14410 emit_insn (gen_lds_fpscr (tmp));
14414 static int
14415 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
14417 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
14420 static int
14421 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
14423 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
14424 get_attr_fp_set (insn) != FP_SET_NONE)
14425 return (int) get_attr_fp_set (insn);
14426 else
14427 return mode;
14430 static int
14431 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
14433 return NORMAL_MODE (entity);
14436 static int
14437 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
14439 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
14442 static int
14443 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
14445 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
14448 /*------------------------------------------------------------------------------
14449 Misc
14452 /* Return true if we use LRA instead of reload pass. */
14453 static bool
14454 sh_lra_p (void)
14456 return sh_lra_flag;
14459 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14461 static bool
14462 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14463 unsigned int align,
14464 enum by_pieces_operation op,
14465 bool speed_p)
14467 switch (op)
14469 case MOVE_BY_PIECES:
14470 return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
14471 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14472 case STORE_BY_PIECES:
14473 case SET_BY_PIECES:
14474 return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
14475 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14476 default:
14477 return default_use_by_pieces_infrastructure_p (size, align,
14478 op, speed_p);
14482 #include "gt-sh.h"