* [SH] Split QI/HImode load/store via r0 when LRA is enabled.
[official-gcc.git] / gcc / config / sh / sh.c
blob8ad8afa5ac6ee671a03ffd54bda625e1ff496c36
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2014 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
23 #include <vector>
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "insn-config.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "stringpool.h"
33 #include "stor-layout.h"
34 #include "calls.h"
35 #include "varasm.h"
36 #include "flags.h"
37 #include "expr.h"
38 #include "insn-codes.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "hard-reg-set.h"
46 #include "input.h"
47 #include "function.h"
48 #include "regs.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "diagnostic-core.h"
52 #include "recog.h"
53 #include "dwarf2.h"
54 #include "tm_p.h"
55 #include "target.h"
56 #include "target-def.h"
57 #include "langhooks.h"
58 #include "predict.h"
59 #include "dominance.h"
60 #include "cfg.h"
61 #include "cfgrtl.h"
62 #include "cfganal.h"
63 #include "lcm.h"
64 #include "cfgbuild.h"
65 #include "cfgcleanup.h"
66 #include "basic-block.h"
67 #include "df.h"
68 #include "intl.h"
69 #include "sched-int.h"
70 #include "params.h"
71 #include "ggc.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "is-a.h"
79 #include "gimple.h"
80 #include "gimplify.h"
81 #include "cfgloop.h"
82 #include "alloc-pool.h"
83 #include "tm-constrs.h"
84 #include "opts.h"
85 #include "tree-pass.h"
86 #include "pass_manager.h"
87 #include "context.h"
88 #include "builtins.h"
89 #include "rtl-iter.h"
91 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
93 /* These are some macros to abstract register modes. */
94 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
95 && ((HOST_WIDE_INT)(VALUE)) <= 511)
97 #define CONST_OK_FOR_ADD(size) \
98 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
99 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
100 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
101 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
103 /* Used to simplify the logic below. Find the attributes wherever
104 they may be. */
105 #define SH_ATTRIBUTES(decl) \
106 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
107 : DECL_ATTRIBUTES (decl) \
108 ? (DECL_ATTRIBUTES (decl)) \
109 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
111 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
112 int current_function_interrupt;
114 tree sh_deferred_function_attributes;
115 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
117 /* Global variables for machine-dependent things. */
119 /* Which cpu are we scheduling for. */
120 enum processor_type sh_cpu;
122 /* Definitions used in ready queue reordering for first scheduling pass. */
124 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
125 static short *regmode_weight[2];
127 /* Total SFmode and SImode weights of scheduled insns. */
128 static int curr_regmode_pressure[2];
130 /* Number of r0 life regions. */
131 static int r0_life_regions;
133 /* If true, skip cycles for Q -> R movement. */
134 static int skip_cycles = 0;
136 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
137 and returned from sh_reorder2. */
138 static short cached_can_issue_more;
140 /* Unique number for UNSPEC_BBR pattern. */
141 static unsigned int unspec_bbr_uid = 1;
143 /* Provides the class number of the smallest class containing
144 reg number. */
145 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
147 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
151 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
152 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
153 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
154 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
155 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
156 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
157 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
161 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
167 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
168 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
169 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
170 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
171 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
172 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
173 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
174 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
175 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
176 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
177 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
178 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
179 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
180 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
181 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
182 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
183 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
184 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
185 GENERAL_REGS, GENERAL_REGS,
188 char sh_register_names[FIRST_PSEUDO_REGISTER] \
189 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
191 char sh_additional_register_names[ADDREGNAMES_SIZE] \
192 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
193 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
195 int assembler_dialect;
197 static bool shmedia_space_reserved_for_target_registers;
199 static void split_branches (rtx_insn *);
200 static int branch_dest (rtx);
201 static void print_slot (rtx_sequence *);
202 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
203 static void dump_table (rtx_insn *, rtx_insn *);
204 static bool broken_move (rtx_insn *);
205 static bool mova_p (rtx_insn *);
206 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
207 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
208 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
209 static void sh_reorg (void);
210 static void sh_option_override (void);
211 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
212 static rtx_insn *frame_insn (rtx);
213 static rtx push (int);
214 static void pop (int);
215 static void push_regs (HARD_REG_SET *, int);
216 static int calc_live_regs (HARD_REG_SET *);
217 static HOST_WIDE_INT rounded_frame_size (int);
218 static bool sh_frame_pointer_required (void);
219 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
220 static int sh_mode_needed (int, rtx_insn *);
221 static int sh_mode_after (int, int, rtx_insn *);
222 static int sh_mode_entry (int);
223 static int sh_mode_exit (int);
224 static int sh_mode_priority (int entity, int n);
225 static bool sh_lra_p (void);
227 static rtx mark_constant_pool_use (rtx);
228 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
229 int, bool *);
230 static tree sh_handle_resbank_handler_attribute (tree *, tree,
231 tree, int, bool *);
232 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
233 tree, int, bool *);
234 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
235 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
236 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
237 static void sh_print_operand (FILE *, rtx, int);
238 static void sh_print_operand_address (FILE *, rtx);
239 static bool sh_print_operand_punct_valid_p (unsigned char code);
240 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
241 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
242 static void sh_insert_attributes (tree, tree *);
243 static const char *sh_check_pch_target_flags (int);
244 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
245 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
246 static int sh_issue_rate (void);
247 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
248 static short find_set_regmode_weight (rtx, machine_mode);
249 static short find_insn_regmode_weight (rtx, machine_mode);
250 static void find_regmode_weight (basic_block, machine_mode);
251 static int find_r0_life_regions (basic_block);
252 static void sh_md_init_global (FILE *, int, int);
253 static void sh_md_finish_global (FILE *, int);
254 static int rank_for_reorder (const void *, const void *);
255 static void swap_reorder (rtx_insn **, int);
256 static void ready_reorder (rtx_insn **, int);
257 static bool high_pressure (machine_mode);
258 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
259 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
260 static void sh_md_init (FILE *, int, int);
261 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
263 static bool sh_function_ok_for_sibcall (tree, tree);
265 static bool sh_cannot_modify_jumps_p (void);
266 static reg_class_t sh_target_reg_class (void);
267 static bool sh_optimize_target_register_callee_saved (bool);
268 static bool sh_ms_bitfield_layout_p (const_tree);
270 static void sh_init_builtins (void);
271 static tree sh_builtin_decl (unsigned, bool);
272 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
273 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
274 HOST_WIDE_INT, tree);
275 static void sh_file_start (void);
276 static bool flow_dependent_p (rtx, rtx);
277 static void flow_dependent_p_1 (rtx, const_rtx, void *);
278 static int shiftcosts (rtx);
279 static int and_xor_ior_costs (rtx, int);
280 static int addsubcosts (rtx);
281 static int multcosts (rtx);
282 static bool unspec_caller_rtx_p (rtx);
283 static bool sh_cannot_copy_insn_p (rtx_insn *);
284 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
285 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
286 static int sh_pr_n_sets (void);
287 static rtx sh_allocate_initial_value (rtx);
288 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
289 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
290 machine_mode,
291 struct secondary_reload_info *);
292 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
293 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
294 static rtx sh_delegitimize_address (rtx);
295 static bool sh_cannot_substitute_mem_equiv_p (rtx);
296 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
297 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
298 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
299 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
300 static int scavenge_reg (HARD_REG_SET *s);
301 struct save_schedule_s;
302 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
303 struct save_schedule_s *, int);
305 static rtx sh_struct_value_rtx (tree, int);
306 static rtx sh_function_value (const_tree, const_tree, bool);
307 static bool sh_function_value_regno_p (const unsigned int);
308 static rtx sh_libcall_value (machine_mode, const_rtx);
309 static bool sh_return_in_memory (const_tree, const_tree);
310 static rtx sh_builtin_saveregs (void);
311 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
312 tree, int *, int);
313 static bool sh_strict_argument_naming (cumulative_args_t);
314 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
315 static tree sh_build_builtin_va_list (void);
316 static void sh_va_start (tree, rtx);
317 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
318 static bool sh_promote_prototypes (const_tree);
319 static machine_mode sh_promote_function_mode (const_tree type,
320 machine_mode,
321 int *punsignedp,
322 const_tree funtype,
323 int for_return);
324 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
325 const_tree, bool);
326 static bool sh_callee_copies (cumulative_args_t, machine_mode,
327 const_tree, bool);
328 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
329 tree, bool);
330 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
331 const_tree, bool);
332 static rtx sh_function_arg (cumulative_args_t, machine_mode,
333 const_tree, bool);
334 static bool sh_scalar_mode_supported_p (machine_mode);
335 static int sh_dwarf_calling_convention (const_tree);
336 static void sh_encode_section_info (tree, rtx, int);
337 static bool sh2a_function_vector_p (tree);
338 static void sh_trampoline_init (rtx, tree, rtx);
339 static rtx sh_trampoline_adjust_address (rtx);
340 static void sh_conditional_register_usage (void);
341 static bool sh_legitimate_constant_p (machine_mode, rtx);
342 static int mov_insn_size (machine_mode, bool);
343 static int mov_insn_alignment_mask (machine_mode, bool);
344 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
345 unsigned int,
346 enum by_pieces_operation,
347 bool);
348 static bool sequence_insn_p (rtx_insn *);
349 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
350 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
351 machine_mode, bool);
352 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
354 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
356 static const struct attribute_spec sh_attribute_table[] =
358 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
359 affects_type_identity } */
360 { "interrupt_handler", 0, 0, true, false, false,
361 sh_handle_interrupt_handler_attribute, false },
362 { "sp_switch", 1, 1, true, false, false,
363 sh_handle_sp_switch_attribute, false },
364 { "trap_exit", 1, 1, true, false, false,
365 sh_handle_trap_exit_attribute, false },
366 { "renesas", 0, 0, false, true, false,
367 sh_handle_renesas_attribute, false },
368 { "trapa_handler", 0, 0, true, false, false,
369 sh_handle_interrupt_handler_attribute, false },
370 { "nosave_low_regs", 0, 0, true, false, false,
371 sh_handle_interrupt_handler_attribute, false },
372 { "resbank", 0, 0, true, false, false,
373 sh_handle_resbank_handler_attribute, false },
374 { "function_vector", 1, 1, true, false, false,
375 sh2a_handle_function_vector_handler_attribute, false },
376 { NULL, 0, 0, false, false, false, NULL, false }
379 /* Initialize the GCC target structure. */
380 #undef TARGET_ATTRIBUTE_TABLE
381 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
383 /* The next two are used for debug info when compiling with -gdwarf. */
384 #undef TARGET_ASM_UNALIGNED_HI_OP
385 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
386 #undef TARGET_ASM_UNALIGNED_SI_OP
387 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
389 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
390 #undef TARGET_ASM_UNALIGNED_DI_OP
391 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
392 #undef TARGET_ASM_ALIGNED_DI_OP
393 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
395 #undef TARGET_OPTION_OVERRIDE
396 #define TARGET_OPTION_OVERRIDE sh_option_override
398 #undef TARGET_PRINT_OPERAND
399 #define TARGET_PRINT_OPERAND sh_print_operand
400 #undef TARGET_PRINT_OPERAND_ADDRESS
401 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
402 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
403 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
404 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
405 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
407 #undef TARGET_ASM_FUNCTION_EPILOGUE
408 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
410 #undef TARGET_ASM_OUTPUT_MI_THUNK
411 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
413 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
414 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
415 hook_bool_const_tree_hwi_hwi_const_tree_true
417 #undef TARGET_ASM_FILE_START
418 #define TARGET_ASM_FILE_START sh_file_start
419 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
420 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
422 #undef TARGET_REGISTER_MOVE_COST
423 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
425 #undef TARGET_INSERT_ATTRIBUTES
426 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
428 #undef TARGET_SCHED_ADJUST_COST
429 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
431 #undef TARGET_SCHED_ISSUE_RATE
432 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
434 /* The next 5 hooks have been implemented for reenabling sched1. With the
435 help of these macros we are limiting the movement of insns in sched1 to
436 reduce the register pressure. The overall idea is to keep count of SImode
437 and SFmode regs required by already scheduled insns. When these counts
438 cross some threshold values; give priority to insns that free registers.
439 The insn that frees registers is most likely to be the insn with lowest
440 LUID (original insn order); but such an insn might be there in the stalled
441 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
442 up to a max of 8 cycles so that such insns may move from Q -> R.
444 The description of the hooks are as below:
446 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
447 scheduler; it is called inside the sched_init function just after
448 find_insn_reg_weights function call. It is used to calculate the SImode
449 and SFmode weights of insns of basic blocks; much similar to what
450 find_insn_reg_weights does.
451 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
453 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
454 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
455 (Q)->(R).
457 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
458 high; reorder the ready queue so that the insn with lowest LUID will be
459 issued next.
461 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
462 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
464 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
465 can be returned from TARGET_SCHED_REORDER2.
467 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
469 #undef TARGET_SCHED_DFA_NEW_CYCLE
470 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
472 #undef TARGET_SCHED_INIT_GLOBAL
473 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
475 #undef TARGET_SCHED_FINISH_GLOBAL
476 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
478 #undef TARGET_SCHED_VARIABLE_ISSUE
479 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
481 #undef TARGET_SCHED_REORDER
482 #define TARGET_SCHED_REORDER sh_reorder
484 #undef TARGET_SCHED_REORDER2
485 #define TARGET_SCHED_REORDER2 sh_reorder2
487 #undef TARGET_SCHED_INIT
488 #define TARGET_SCHED_INIT sh_md_init
490 #undef TARGET_DELEGITIMIZE_ADDRESS
491 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
493 #undef TARGET_LEGITIMIZE_ADDRESS
494 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
496 #undef TARGET_CANNOT_MODIFY_JUMPS_P
497 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
498 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
499 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
500 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
501 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
502 sh_optimize_target_register_callee_saved
504 #undef TARGET_MS_BITFIELD_LAYOUT_P
505 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
507 #undef TARGET_INIT_BUILTINS
508 #define TARGET_INIT_BUILTINS sh_init_builtins
509 #undef TARGET_BUILTIN_DECL
510 #define TARGET_BUILTIN_DECL sh_builtin_decl
511 #undef TARGET_EXPAND_BUILTIN
512 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
514 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
515 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
517 #undef TARGET_CANNOT_COPY_INSN_P
518 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
519 #undef TARGET_RTX_COSTS
520 #define TARGET_RTX_COSTS sh_rtx_costs
521 #undef TARGET_ADDRESS_COST
522 #define TARGET_ADDRESS_COST sh_address_cost
523 #undef TARGET_ALLOCATE_INITIAL_VALUE
524 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
529 #undef TARGET_DWARF_REGISTER_SPAN
530 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
532 #ifdef HAVE_AS_TLS
533 #undef TARGET_HAVE_TLS
534 #define TARGET_HAVE_TLS true
535 #endif
537 #undef TARGET_PROMOTE_PROTOTYPES
538 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
542 #undef TARGET_FUNCTION_VALUE
543 #define TARGET_FUNCTION_VALUE sh_function_value
544 #undef TARGET_FUNCTION_VALUE_REGNO_P
545 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
546 #undef TARGET_LIBCALL_VALUE
547 #define TARGET_LIBCALL_VALUE sh_libcall_value
548 #undef TARGET_STRUCT_VALUE_RTX
549 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
550 #undef TARGET_RETURN_IN_MEMORY
551 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
553 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
554 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
555 #undef TARGET_SETUP_INCOMING_VARARGS
556 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
557 #undef TARGET_STRICT_ARGUMENT_NAMING
558 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
559 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
560 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
561 #undef TARGET_MUST_PASS_IN_STACK
562 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
563 #undef TARGET_PASS_BY_REFERENCE
564 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
565 #undef TARGET_CALLEE_COPIES
566 #define TARGET_CALLEE_COPIES sh_callee_copies
567 #undef TARGET_ARG_PARTIAL_BYTES
568 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
569 #undef TARGET_FUNCTION_ARG
570 #define TARGET_FUNCTION_ARG sh_function_arg
571 #undef TARGET_FUNCTION_ARG_ADVANCE
572 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
574 #undef TARGET_BUILD_BUILTIN_VA_LIST
575 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
576 #undef TARGET_EXPAND_BUILTIN_VA_START
577 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
578 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
579 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
581 #undef TARGET_SCALAR_MODE_SUPPORTED_P
582 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
583 #undef TARGET_VECTOR_MODE_SUPPORTED_P
584 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
586 #undef TARGET_CHECK_PCH_TARGET_FLAGS
587 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
589 #undef TARGET_DWARF_CALLING_CONVENTION
590 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
592 #undef TARGET_FRAME_POINTER_REQUIRED
593 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
595 #undef TARGET_MODE_EMIT
596 #define TARGET_MODE_EMIT sh_emit_mode_set
598 #undef TARGET_MODE_NEEDED
599 #define TARGET_MODE_NEEDED sh_mode_needed
601 #undef TARGET_MODE_AFTER
602 #define TARGET_MODE_AFTER sh_mode_after
604 #undef TARGET_MODE_ENTRY
605 #define TARGET_MODE_ENTRY sh_mode_entry
607 #undef TARGET_MODE_EXIT
608 #define TARGET_MODE_EXIT sh_mode_exit
610 #undef TARGET_MODE_PRIORITY
611 #define TARGET_MODE_PRIORITY sh_mode_priority
613 /* Return regmode weight for insn. */
614 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
615 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
617 /* Return current register pressure for regmode. */
618 #define CURR_REGMODE_PRESSURE(MODE)\
619 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
621 #undef TARGET_ENCODE_SECTION_INFO
622 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
624 #undef TARGET_LRA_P
625 #define TARGET_LRA_P sh_lra_p
627 #undef TARGET_SECONDARY_RELOAD
628 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
630 #undef TARGET_PREFERRED_RELOAD_CLASS
631 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
633 #undef TARGET_CONDITIONAL_REGISTER_USAGE
634 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
636 #undef TARGET_LEGITIMATE_ADDRESS_P
637 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
639 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
640 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
642 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
643 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
644 sh_legitimize_address_displacement
646 #undef TARGET_TRAMPOLINE_INIT
647 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
648 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
649 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
654 #undef TARGET_CANONICALIZE_COMPARISON
655 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
657 #undef TARGET_FIXED_CONDITION_CODE_REGS
658 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
660 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
661 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
662 sh_use_by_pieces_infrastructure_p
664 /* Machine-specific symbol_ref flags. */
665 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
667 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
668 is used by optabs.c atomic op expansion code as well as in sync.md. */
669 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
670 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
672 struct gcc_target targetm = TARGET_INITIALIZER;
675 /* Information on the currently selected atomic model.
676 This is initialized in sh_option_override. */
677 static sh_atomic_model selected_atomic_model_;
679 const sh_atomic_model&
680 selected_atomic_model (void)
682 return selected_atomic_model_;
685 static sh_atomic_model
686 parse_validate_atomic_model_option (const char* str)
688 const char* model_names[sh_atomic_model::num_models];
689 model_names[sh_atomic_model::none] = "none";
690 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
691 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
692 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
693 model_names[sh_atomic_model::soft_imask] = "soft-imask";
695 const char* model_cdef_names[sh_atomic_model::num_models];
696 model_cdef_names[sh_atomic_model::none] = "NONE";
697 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
698 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
699 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
700 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
702 sh_atomic_model ret;
703 ret.type = sh_atomic_model::none;
704 ret.name = model_names[sh_atomic_model::none];
705 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
706 ret.strict = false;
707 ret.tcb_gbr_offset = -1;
709 /* Handle empty string as 'none'. */
710 if (str == NULL || *str == '\0')
711 return ret;
713 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
715 std::vector<std::string> tokens;
716 for (std::stringstream ss (str); ss.good (); )
718 tokens.push_back (std::string ());
719 std::getline (ss, tokens.back (), ',');
722 if (tokens.empty ())
723 err_ret ("invalid atomic model option");
725 /* The first token must be the atomic model name. */
727 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
728 if (tokens.front () == model_names[i])
730 ret.type = (sh_atomic_model::enum_type)i;
731 ret.name = model_names[i];
732 ret.cdef_name = model_cdef_names[i];
733 goto got_mode_name;
736 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
737 got_mode_name:;
740 /* Go through the remaining tokens. */
741 for (size_t i = 1; i < tokens.size (); ++i)
743 if (tokens[i] == "strict")
744 ret.strict = true;
745 else if (tokens[i].find ("gbr-offset=") == 0)
747 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
748 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
749 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
750 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
751 "option", offset_str.c_str ());
753 else
754 err_ret ("unknown parameter \"%s\" in atomic model option",
755 tokens[i].c_str ());
758 /* Check that the selection makes sense. */
759 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
760 err_ret ("atomic operations are not supported on SHmedia");
762 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
763 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
764 ret.name);
766 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
767 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
769 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
770 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
772 if (ret.type == sh_atomic_model::soft_tcb
773 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
774 || (ret.tcb_gbr_offset & 3) != 0))
775 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
776 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
777 ret.name);
779 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
780 err_ret ("cannot use atomic model %s in user mode", ret.name);
782 return ret;
784 #undef err_ret
787 /* Register SH specific RTL passes. */
788 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
789 const char* name);
790 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
791 const char* name);
792 static void
793 register_sh_passes (void)
795 if (!TARGET_SH1)
796 return;
798 /* Running the sh_treg_combine pass after ce1 generates better code when
799 comparisons are combined and reg-reg moves are introduced, because
800 reg-reg moves will be eliminated afterwards. However, there are quite
801 some cases where combine will be unable to fold comparison related insns,
802 thus for now don't do it.
803 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
804 PASS_POS_INSERT_AFTER, "ce1", 1);
807 /* Run sh_treg_combine pass after combine but before register allocation. */
808 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
809 PASS_POS_INSERT_AFTER, "split1", 1);
811 /* Run sh_treg_combine pass after register allocation and basic block
812 reordering as this sometimes creates new opportunities. */
813 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
814 PASS_POS_INSERT_AFTER, "split4", 1);
816 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
817 is known after a conditional branch.
818 This must be done after basic blocks and branch conditions have
819 stabilized and won't be changed by further passes. */
820 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
821 PASS_POS_INSERT_BEFORE, "sched2", 1);
824 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
825 various options, and do some machine dependent initialization. */
826 static void
827 sh_option_override (void)
829 int regno;
831 SUBTARGET_OVERRIDE_OPTIONS;
832 if (optimize > 1 && !optimize_size)
833 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
835 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
836 TARGET_CBRANCHDI4 = 1;
837 TARGET_CMPEQDI_T = 0;
839 sh_cpu = PROCESSOR_SH1;
840 assembler_dialect = 0;
841 if (TARGET_SH2)
842 sh_cpu = PROCESSOR_SH2;
843 if (TARGET_SH2E)
844 sh_cpu = PROCESSOR_SH2E;
845 if (TARGET_SH2A)
846 sh_cpu = PROCESSOR_SH2A;
847 if (TARGET_SH3)
848 sh_cpu = PROCESSOR_SH3;
849 if (TARGET_SH3E)
850 sh_cpu = PROCESSOR_SH3E;
851 if (TARGET_SH4)
853 assembler_dialect = 1;
854 sh_cpu = PROCESSOR_SH4;
856 if (TARGET_SH4A)
858 assembler_dialect = 1;
859 sh_cpu = PROCESSOR_SH4A;
861 if (TARGET_SH5)
863 sh_cpu = PROCESSOR_SH5;
864 target_flags |= MASK_ALIGN_DOUBLE;
865 if (TARGET_SHMEDIA_FPU)
866 target_flags |= MASK_FMOVD;
867 if (TARGET_SHMEDIA)
869 /* There are no delay slots on SHmedia. */
870 flag_delayed_branch = 0;
871 /* Relaxation isn't yet supported for SHmedia */
872 target_flags &= ~MASK_RELAX;
873 /* After reload, if conversion does little good but can cause
874 ICEs:
875 - find_if_block doesn't do anything for SH because we don't
876 have conditional execution patterns. (We use conditional
877 move patterns, which are handled differently, and only
878 before reload).
879 - find_cond_trap doesn't do anything for the SH because we
880 don't have conditional traps.
881 - find_if_case_1 uses redirect_edge_and_branch_force in
882 the only path that does an optimization, and this causes
883 an ICE when branch targets are in registers.
884 - find_if_case_2 doesn't do anything for the SHmedia after
885 reload except when it can redirect a tablejump - and
886 that's rather rare. */
887 flag_if_conversion2 = 0;
888 if (! strcmp (sh_div_str, "call"))
889 sh_div_strategy = SH_DIV_CALL;
890 else if (! strcmp (sh_div_str, "call2"))
891 sh_div_strategy = SH_DIV_CALL2;
892 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
893 sh_div_strategy = SH_DIV_FP;
894 else if (! strcmp (sh_div_str, "inv"))
895 sh_div_strategy = SH_DIV_INV;
896 else if (! strcmp (sh_div_str, "inv:minlat"))
897 sh_div_strategy = SH_DIV_INV_MINLAT;
898 else if (! strcmp (sh_div_str, "inv20u"))
899 sh_div_strategy = SH_DIV_INV20U;
900 else if (! strcmp (sh_div_str, "inv20l"))
901 sh_div_strategy = SH_DIV_INV20L;
902 else if (! strcmp (sh_div_str, "inv:call2"))
903 sh_div_strategy = SH_DIV_INV_CALL2;
904 else if (! strcmp (sh_div_str, "inv:call"))
905 sh_div_strategy = SH_DIV_INV_CALL;
906 else if (! strcmp (sh_div_str, "inv:fp"))
908 if (TARGET_FPU_ANY)
909 sh_div_strategy = SH_DIV_INV_FP;
910 else
911 sh_div_strategy = SH_DIV_INV;
913 TARGET_CBRANCHDI4 = 0;
914 /* Assembler CFI isn't yet fully supported for SHmedia. */
915 flag_dwarf2_cfi_asm = 0;
918 else
920 /* Only the sh64-elf assembler fully supports .quad properly. */
921 targetm.asm_out.aligned_op.di = NULL;
922 targetm.asm_out.unaligned_op.di = NULL;
925 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
926 Disable it for everything else. */
927 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
928 TARGET_USERMODE = false;
930 if (TARGET_SH1)
932 if (! strcmp (sh_div_str, "call-div1"))
933 sh_div_strategy = SH_DIV_CALL_DIV1;
934 else if (! strcmp (sh_div_str, "call-fp")
935 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
936 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
937 sh_div_strategy = SH_DIV_CALL_FP;
938 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
939 sh_div_strategy = SH_DIV_CALL_TABLE;
940 else
941 /* Pick one that makes most sense for the target in general.
942 It is not much good to use different functions depending
943 on -Os, since then we'll end up with two different functions
944 when some of the code is compiled for size, and some for
945 speed. */
947 /* SH4 tends to emphasize speed. */
948 if (TARGET_HARD_SH4)
949 sh_div_strategy = SH_DIV_CALL_TABLE;
950 /* These have their own way of doing things. */
951 else if (TARGET_SH2A)
952 sh_div_strategy = SH_DIV_INTRINSIC;
953 /* ??? Should we use the integer SHmedia function instead? */
954 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
955 sh_div_strategy = SH_DIV_CALL_FP;
956 /* SH1 .. SH3 cores often go into small-footprint systems, so
957 default to the smallest implementation available. */
958 else
959 sh_div_strategy = SH_DIV_CALL_DIV1;
961 if (!TARGET_SH1)
962 TARGET_PRETEND_CMOVE = 0;
963 if (sh_divsi3_libfunc[0])
964 ; /* User supplied - leave it alone. */
965 else if (TARGET_DIVIDE_CALL_FP)
966 sh_divsi3_libfunc = "__sdivsi3_i4";
967 else if (TARGET_DIVIDE_CALL_TABLE)
968 sh_divsi3_libfunc = "__sdivsi3_i4i";
969 else if (TARGET_SH5)
970 sh_divsi3_libfunc = "__sdivsi3_1";
971 else
972 sh_divsi3_libfunc = "__sdivsi3";
974 if (sh_branch_cost == -1)
976 /* The SH1 does not have delay slots, hence we get a pipeline stall
977 at every branch. The SH4 is superscalar, so the single delay slot
978 is not sufficient to keep both pipelines filled.
979 In any case, set the default branch cost to '2', as it results in
980 slightly overall smaller code and also enables some if conversions
981 that are required for matching special T bit related insns. */
982 sh_branch_cost = 2;
985 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
986 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
987 TARGET_ZDCBRANCH = 1;
989 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
990 if (! VALID_REGISTER_P (regno))
991 sh_register_names[regno][0] = '\0';
993 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
994 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
995 sh_additional_register_names[regno][0] = '\0';
997 if ((flag_pic && ! TARGET_PREFERGOT)
998 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
999 flag_no_function_cse = 1;
1001 if (targetm.small_register_classes_for_mode_p (VOIDmode))
1003 /* Never run scheduling before reload, since that can
1004 break global alloc, and generates slower code anyway due
1005 to the pressure on R0. */
1006 /* Enable sched1 for SH4 if the user explicitly requests.
1007 When sched1 is enabled, the ready queue will be reordered by
1008 the target hooks if pressure is high. We can not do this for
1009 PIC, SH3 and lower as they give spill failures for R0. */
1010 if (!TARGET_HARD_SH4 || flag_pic)
1011 flag_schedule_insns = 0;
1012 /* ??? Current exception handling places basic block boundaries
1013 after call_insns. It causes the high pressure on R0 and gives
1014 spill failures for R0 in reload. See PR 22553 and the thread
1015 on gcc-patches
1016 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
1017 else if (flag_exceptions)
1019 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
1020 warning (0, "ignoring -fschedule-insns because of exception "
1021 "handling bug");
1022 flag_schedule_insns = 0;
1024 else if (flag_schedule_insns
1025 && !global_options_set.x_flag_schedule_insns)
1026 flag_schedule_insns = 0;
1029 /* Unwind info is not correct around the CFG unless either a frame
1030 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1031 unwind info generation to be aware of the CFG and propagating states
1032 around edges. */
1033 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1034 || flag_exceptions || flag_non_call_exceptions)
1035 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1037 warning (0, "unwind tables currently require either a frame pointer "
1038 "or -maccumulate-outgoing-args for correctness");
1039 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1042 /* Unwinding with -freorder-blocks-and-partition does not work on this
1043 architecture, because it requires far jumps to label crossing between
1044 hot/cold sections which are rejected on this architecture. */
1045 if (flag_reorder_blocks_and_partition)
1047 if (flag_exceptions)
1049 inform (input_location,
1050 "-freorder-blocks-and-partition does not work with "
1051 "exceptions on this architecture");
1052 flag_reorder_blocks_and_partition = 0;
1053 flag_reorder_blocks = 1;
1055 else if (flag_unwind_tables)
1057 inform (input_location,
1058 "-freorder-blocks-and-partition does not support unwind "
1059 "info on this architecture");
1060 flag_reorder_blocks_and_partition = 0;
1061 flag_reorder_blocks = 1;
1065 /* Adjust loop, jump and function alignment values (in bytes), if those
1066 were not specified by the user using -falign-loops, -falign-jumps
1067 and -falign-functions options.
1068 32 bit alignment is better for speed, because instructions can be
1069 fetched as a pair from a longword boundary. For size use 16 bit
1070 alignment to get more compact code.
1071 Aligning all jumps increases the code size, even if it might
1072 result in slightly faster code. Thus, it is set to the smallest
1073 alignment possible if not specified by the user. */
1074 if (align_loops == 0)
1076 if (TARGET_SH5)
1077 align_loops = 8;
1078 else
1079 align_loops = optimize_size ? 2 : 4;
1082 if (align_jumps == 0)
1084 if (TARGET_SHMEDIA)
1085 align_jumps = 1 << CACHE_LOG;
1086 else
1087 align_jumps = 2;
1089 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1090 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1092 if (align_functions == 0)
1094 if (TARGET_SHMEDIA)
1095 align_functions = optimize_size
1096 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1097 else
1098 align_functions = optimize_size ? 2 : 4;
1101 /* The linker relaxation code breaks when a function contains
1102 alignments that are larger than that at the start of a
1103 compilation unit. */
1104 if (TARGET_RELAX)
1106 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1108 /* Also take possible .long constants / mova tables into account. */
1109 if (min_align < 4)
1110 min_align = 4;
1111 if (align_functions < min_align)
1112 align_functions = min_align;
1115 if (flag_unsafe_math_optimizations)
1117 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1118 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1119 TARGET_FSCA = 1;
1121 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1122 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1123 TARGET_FSRRA = 1;
1126 /* Allow fsrra insn only if -funsafe-math-optimizations and
1127 -ffinite-math-only is enabled. */
1128 TARGET_FSRRA = TARGET_FSRRA
1129 && flag_unsafe_math_optimizations
1130 && flag_finite_math_only;
1132 /* If the -mieee option was not explicitly set by the user, turn it on
1133 unless -ffinite-math-only was specified. See also PR 33135. */
1134 if (! global_options_set.x_TARGET_IEEE)
1135 TARGET_IEEE = ! flag_finite_math_only;
1137 if (sh_fixed_range_str)
1138 sh_fix_range (sh_fixed_range_str);
1140 /* This target defaults to strict volatile bitfields. */
1141 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1142 flag_strict_volatile_bitfields = 1;
1144 /* Parse atomic model option and make sure it is valid for the current
1145 target CPU. */
1146 selected_atomic_model_
1147 = parse_validate_atomic_model_option (sh_atomic_model_str);
1149 register_sh_passes ();
1152 /* Print the operand address in x to the stream. */
1153 static void
1154 sh_print_operand_address (FILE *stream, rtx x)
1156 switch (GET_CODE (x))
1158 case REG:
1159 case SUBREG:
1160 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1161 break;
1163 case PLUS:
1165 rtx base = XEXP (x, 0);
1166 rtx index = XEXP (x, 1);
1168 switch (GET_CODE (index))
1170 case CONST_INT:
1171 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1172 reg_names[true_regnum (base)]);
1173 break;
1175 case REG:
1176 case SUBREG:
1178 int base_num = true_regnum (base);
1179 int index_num = true_regnum (index);
1181 fprintf (stream, "@(r0,%s)",
1182 reg_names[MAX (base_num, index_num)]);
1183 break;
1186 default:
1187 gcc_unreachable ();
1190 break;
1192 case PRE_DEC:
1193 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1194 break;
1196 case POST_INC:
1197 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1198 break;
1200 default:
1201 x = mark_constant_pool_use (x);
1202 output_addr_const (stream, x);
1203 break;
1207 /* Print operand x (an rtx) in assembler syntax to file stream
1208 according to modifier code.
1210 '.' print a .s if insn needs delay slot
1211 ',' print LOCAL_LABEL_PREFIX
1212 '@' print trap, rte or rts depending upon pragma interruptness
1213 '#' output a nop if there is nothing to put in the delay slot
1214 ''' print likelihood suffix (/u for unlikely).
1215 '>' print branch target if -fverbose-asm
1216 'O' print a constant without the #
1217 'R' print the LSW of a dp value - changes if in little endian
1218 'S' print the MSW of a dp value - changes if in little endian
1219 'T' print the next word of a dp value - same as 'R' in big endian mode.
1220 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1221 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1222 'N' print 'r63' if the operand is (const_int 0).
1223 'd' print a V2SF reg as dN instead of fpN.
1224 'm' print a pair `base,offset' or `base,index', for LD and ST.
1225 'U' Likewise for {LD,ST}{HI,LO}.
1226 'V' print the position of a single bit set.
1227 'W' print the position of a single bit cleared.
1228 't' print a memory address which is a register.
1229 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1230 'o' output an operator. */
1231 static void
1232 sh_print_operand (FILE *stream, rtx x, int code)
1234 int regno;
1235 machine_mode mode;
1237 switch (code)
1239 tree trapa_attr;
1241 case '.':
1242 if (final_sequence
1243 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1244 && get_attr_length (final_sequence->insn (1)))
1245 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1246 break;
1247 case ',':
1248 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1249 break;
1250 case '@':
1251 trapa_attr = lookup_attribute ("trap_exit",
1252 DECL_ATTRIBUTES (current_function_decl));
1253 if (trapa_attr)
1254 fprintf (stream, "trapa #%ld",
1255 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1256 else if (sh_cfun_interrupt_handler_p ())
1258 if (sh_cfun_resbank_handler_p ())
1259 fprintf (stream, "resbank\n");
1260 fprintf (stream, "rte");
1262 else
1263 fprintf (stream, "rts");
1264 break;
1265 case '#':
1266 /* Output a nop if there's nothing in the delay slot. */
1267 if (dbr_sequence_length () == 0)
1268 fprintf (stream, "\n\tnop");
1269 break;
1270 case '\'':
1272 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1274 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1275 fputs ("/u", stream);
1276 break;
1278 case '>':
1279 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1281 fputs ("\t! target: ", stream);
1282 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1284 break;
1285 case 'O':
1286 x = mark_constant_pool_use (x);
1287 output_addr_const (stream, x);
1288 break;
1289 /* N.B.: %R / %S / %T adjust memory addresses by four.
1290 For SHMEDIA, that means they can be used to access the first and
1291 second 32 bit part of a 64 bit (or larger) value that
1292 might be held in floating point registers or memory.
1293 While they can be used to access 64 bit parts of a larger value
1294 held in general purpose registers, that won't work with memory -
1295 neither for fp registers, since the frxx names are used. */
1296 case 'R':
1297 if (REG_P (x) || GET_CODE (x) == SUBREG)
1299 regno = true_regnum (x);
1300 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1301 fputs (reg_names[regno], (stream));
1303 else if (MEM_P (x))
1305 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1306 sh_print_operand_address (stream, XEXP (x, 0));
1308 else
1310 rtx sub = NULL_RTX;
1312 mode = GET_MODE (x);
1313 if (mode == VOIDmode)
1314 mode = DImode;
1315 if (GET_MODE_SIZE (mode) >= 8)
1316 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1317 if (sub)
1318 sh_print_operand (stream, sub, 0);
1319 else
1320 output_operand_lossage ("invalid operand to %%R");
1322 break;
1323 case 'S':
1324 if (REG_P (x) || GET_CODE (x) == SUBREG)
1326 regno = true_regnum (x);
1327 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1328 fputs (reg_names[regno], (stream));
1330 else if (MEM_P (x))
1332 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1333 sh_print_operand_address (stream, XEXP (x, 0));
1335 else
1337 rtx sub = NULL_RTX;
1339 mode = GET_MODE (x);
1340 if (mode == VOIDmode)
1341 mode = DImode;
1342 if (GET_MODE_SIZE (mode) >= 8)
1343 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1344 if (sub)
1345 sh_print_operand (stream, sub, 0);
1346 else
1347 output_operand_lossage ("invalid operand to %%S");
1349 break;
1350 case 'T':
1351 /* Next word of a double. */
1352 switch (GET_CODE (x))
1354 case REG:
1355 fputs (reg_names[REGNO (x) + 1], (stream));
1356 break;
1357 case MEM:
1358 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1359 && GET_CODE (XEXP (x, 0)) != POST_INC)
1360 x = adjust_address (x, SImode, 4);
1361 sh_print_operand_address (stream, XEXP (x, 0));
1362 break;
1363 default:
1364 break;
1366 break;
1368 case 't':
1369 gcc_assert (MEM_P (x));
1370 x = XEXP (x, 0);
1371 switch (GET_CODE (x))
1373 case REG:
1374 case SUBREG:
1375 sh_print_operand (stream, x, 0);
1376 break;
1377 default:
1378 break;
1380 break;
1382 case 'o':
1383 switch (GET_CODE (x))
1385 case PLUS: fputs ("add", stream); break;
1386 case MINUS: fputs ("sub", stream); break;
1387 case MULT: fputs ("mul", stream); break;
1388 case DIV: fputs ("div", stream); break;
1389 case EQ: fputs ("eq", stream); break;
1390 case NE: fputs ("ne", stream); break;
1391 case GT: case LT: fputs ("gt", stream); break;
1392 case GE: case LE: fputs ("ge", stream); break;
1393 case GTU: case LTU: fputs ("gtu", stream); break;
1394 case GEU: case LEU: fputs ("geu", stream); break;
1395 default:
1396 break;
1398 break;
1399 case 'M':
1400 if (TARGET_SHMEDIA)
1402 if (MEM_P (x)
1403 && GET_CODE (XEXP (x, 0)) == PLUS
1404 && (REG_P (XEXP (XEXP (x, 0), 1))
1405 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1406 fputc ('x', stream);
1408 else
1410 if (MEM_P (x))
1412 switch (GET_MODE (x))
1414 case QImode: fputs (".b", stream); break;
1415 case HImode: fputs (".w", stream); break;
1416 case SImode: fputs (".l", stream); break;
1417 case SFmode: fputs (".s", stream); break;
1418 case DFmode: fputs (".d", stream); break;
1419 default: gcc_unreachable ();
1423 break;
1425 case 'm':
1426 gcc_assert (MEM_P (x));
1427 x = XEXP (x, 0);
1428 /* Fall through. */
1429 case 'U':
1430 switch (GET_CODE (x))
1432 case REG:
1433 case SUBREG:
1434 sh_print_operand (stream, x, 0);
1435 fputs (", 0", stream);
1436 break;
1438 case PLUS:
1439 sh_print_operand (stream, XEXP (x, 0), 0);
1440 fputs (", ", stream);
1441 sh_print_operand (stream, XEXP (x, 1), 0);
1442 break;
1444 default:
1445 gcc_unreachable ();
1447 break;
1449 case 'V':
1451 int num = exact_log2 (INTVAL (x));
1452 gcc_assert (num >= 0);
1453 fprintf (stream, "#%d", num);
1455 break;
1457 case 'W':
1459 int num = exact_log2 (~INTVAL (x));
1460 gcc_assert (num >= 0);
1461 fprintf (stream, "#%d", num);
1463 break;
1465 case 'd':
1466 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1468 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1469 break;
1471 case 'N':
1472 if (x == CONST0_RTX (GET_MODE (x)))
1474 fprintf ((stream), "r63");
1475 break;
1477 goto default_output;
1478 case 'u':
1479 if (CONST_INT_P (x))
1481 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1482 break;
1484 /* Fall through. */
1486 default_output:
1487 default:
1488 regno = 0;
1489 mode = GET_MODE (x);
1491 switch (GET_CODE (x))
1493 case TRUNCATE:
1495 rtx inner = XEXP (x, 0);
1496 int offset = 0;
1497 machine_mode inner_mode;
1499 /* We might see SUBREGs with vector mode registers inside. */
1500 if (GET_CODE (inner) == SUBREG
1501 && (GET_MODE_SIZE (GET_MODE (inner))
1502 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1503 && subreg_lowpart_p (inner))
1504 inner = SUBREG_REG (inner);
1505 if (CONST_INT_P (inner))
1507 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1508 goto default_output;
1510 inner_mode = GET_MODE (inner);
1511 if (GET_CODE (inner) == SUBREG
1512 && (GET_MODE_SIZE (GET_MODE (inner))
1513 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1514 && REG_P (SUBREG_REG (inner)))
1516 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1517 GET_MODE (SUBREG_REG (inner)),
1518 SUBREG_BYTE (inner),
1519 GET_MODE (inner));
1520 inner = SUBREG_REG (inner);
1522 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1523 abort ();
1524 /* Floating point register pairs are always big endian;
1525 general purpose registers are 64 bit wide. */
1526 regno = REGNO (inner);
1527 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1528 - HARD_REGNO_NREGS (regno, mode))
1529 + offset;
1530 x = inner;
1531 goto reg;
1533 case SIGN_EXTEND:
1534 x = XEXP (x, 0);
1535 goto reg;
1536 /* FIXME: We need this on SHmedia32 because reload generates
1537 some sign-extended HI or QI loads into DImode registers
1538 but, because Pmode is SImode, the address ends up with a
1539 subreg:SI of the DImode register. Maybe reload should be
1540 fixed so as to apply alter_subreg to such loads? */
1541 case IF_THEN_ELSE:
1542 gcc_assert (trapping_target_operand (x, VOIDmode));
1543 x = XEXP (XEXP (x, 2), 0);
1544 goto default_output;
1545 case SUBREG:
1546 gcc_assert (SUBREG_BYTE (x) == 0
1547 && REG_P (SUBREG_REG (x)));
1549 x = SUBREG_REG (x);
1550 /* Fall through. */
1552 reg:
1553 case REG:
1554 regno += REGNO (x);
1555 if (FP_REGISTER_P (regno)
1556 && mode == V16SFmode)
1557 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1558 else if (FP_REGISTER_P (REGNO (x))
1559 && mode == V4SFmode)
1560 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1561 else if (REG_P (x)
1562 && mode == V2SFmode)
1563 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1564 else if (FP_REGISTER_P (REGNO (x))
1565 && GET_MODE_SIZE (mode) > 4)
1566 fprintf ((stream), "d%s", reg_names[regno] + 1);
1567 else
1568 fputs (reg_names[regno], (stream));
1569 break;
1571 case MEM:
1572 output_address (XEXP (x, 0));
1573 break;
1575 default:
1576 if (TARGET_SH1)
1577 fputc ('#', stream);
1578 output_addr_const (stream, x);
1579 break;
1581 break;
1585 static bool
1586 sh_print_operand_punct_valid_p (unsigned char code)
1588 return (code == '.' || code == '#' || code == '@' || code == ','
1589 || code == '$' || code == '\'' || code == '>');
1592 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1593 static bool
1594 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1596 if (GET_CODE (x) == UNSPEC)
1598 switch (XINT (x, 1))
1600 case UNSPEC_DATALABEL:
1601 fputs ("datalabel ", file);
1602 output_addr_const (file, XVECEXP (x, 0, 0));
1603 break;
1604 case UNSPEC_PIC:
1605 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1606 output_addr_const (file, XVECEXP (x, 0, 0));
1607 break;
1608 case UNSPEC_GOT:
1609 output_addr_const (file, XVECEXP (x, 0, 0));
1610 fputs ("@GOT", file);
1611 break;
1612 case UNSPEC_GOTOFF:
1613 output_addr_const (file, XVECEXP (x, 0, 0));
1614 fputs ("@GOTOFF", file);
1615 break;
1616 case UNSPEC_PLT:
1617 output_addr_const (file, XVECEXP (x, 0, 0));
1618 fputs ("@PLT", file);
1619 break;
1620 case UNSPEC_GOTPLT:
1621 output_addr_const (file, XVECEXP (x, 0, 0));
1622 fputs ("@GOTPLT", file);
1623 break;
1624 case UNSPEC_DTPOFF:
1625 output_addr_const (file, XVECEXP (x, 0, 0));
1626 fputs ("@DTPOFF", file);
1627 break;
1628 case UNSPEC_GOTTPOFF:
1629 output_addr_const (file, XVECEXP (x, 0, 0));
1630 fputs ("@GOTTPOFF", file);
1631 break;
1632 case UNSPEC_TPOFF:
1633 output_addr_const (file, XVECEXP (x, 0, 0));
1634 fputs ("@TPOFF", file);
1635 break;
1636 case UNSPEC_CALLER:
1638 char name[32];
1639 /* LPCS stands for Label for PIC Call Site. */
1640 targetm.asm_out.generate_internal_label (name, "LPCS",
1641 INTVAL (XVECEXP (x, 0, 0)));
1642 assemble_name (file, name);
1644 break;
1645 case UNSPEC_EXTRACT_S16:
1646 case UNSPEC_EXTRACT_U16:
1648 rtx val, shift;
1650 val = XVECEXP (x, 0, 0);
1651 shift = XVECEXP (x, 0, 1);
1652 fputc ('(', file);
1653 if (shift != const0_rtx)
1654 fputc ('(', file);
1655 if (GET_CODE (val) == CONST
1656 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1658 fputc ('(', file);
1659 output_addr_const (file, val);
1660 fputc (')', file);
1662 else
1663 output_addr_const (file, val);
1664 if (shift != const0_rtx)
1666 fputs (" >> ", file);
1667 output_addr_const (file, shift);
1668 fputc (')', file);
1670 fputs (" & 65535)", file);
1672 break;
1673 case UNSPEC_SYMOFF:
1674 output_addr_const (file, XVECEXP (x, 0, 0));
1675 fputc ('-', file);
1676 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1678 fputc ('(', file);
1679 output_addr_const (file, XVECEXP (x, 0, 1));
1680 fputc (')', file);
1682 else
1683 output_addr_const (file, XVECEXP (x, 0, 1));
1684 break;
1685 case UNSPEC_PCREL_SYMOFF:
1686 output_addr_const (file, XVECEXP (x, 0, 0));
1687 fputs ("-(", file);
1688 output_addr_const (file, XVECEXP (x, 0, 1));
1689 fputs ("-.)", file);
1690 break;
1691 default:
1692 return false;
1694 return true;
1696 else
1697 return false;
1700 /* Encode symbol attributes of a SYMBOL_REF into its
1701 SYMBOL_REF_FLAGS. */
1702 static void
1703 sh_encode_section_info (tree decl, rtx rtl, int first)
1705 default_encode_section_info (decl, rtl, first);
1707 if (TREE_CODE (decl) == FUNCTION_DECL
1708 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1709 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1712 /* Prepare operands for a move define_expand; specifically, one of the
1713 operands must be in a register. */
1714 void
1715 prepare_move_operands (rtx operands[], machine_mode mode)
1717 if ((mode == SImode || mode == DImode)
1718 && flag_pic
1719 && ! ((mode == Pmode || mode == ptr_mode)
1720 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1722 rtx temp;
1723 if (SYMBOLIC_CONST_P (operands[1]))
1725 if (MEM_P (operands[0]))
1726 operands[1] = force_reg (Pmode, operands[1]);
1727 else if (TARGET_SHMEDIA
1728 && GET_CODE (operands[1]) == LABEL_REF
1729 && target_reg_operand (operands[0], mode))
1730 /* It's ok. */;
1731 else
1733 temp = (!can_create_pseudo_p ()
1734 ? operands[0]
1735 : gen_reg_rtx (Pmode));
1736 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1739 else if (GET_CODE (operands[1]) == CONST
1740 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1741 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1743 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1744 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1745 mode, temp);
1746 operands[1] = expand_binop (mode, add_optab, temp,
1747 XEXP (XEXP (operands[1], 0), 1),
1748 (!can_create_pseudo_p ()
1749 ? temp
1750 : gen_reg_rtx (Pmode)),
1751 0, OPTAB_LIB_WIDEN);
1755 if (! reload_in_progress && ! reload_completed)
1757 /* Copy the source to a register if both operands aren't registers. */
1758 if (! register_operand (operands[0], mode)
1759 && ! sh_register_operand (operands[1], mode))
1760 operands[1] = copy_to_mode_reg (mode, operands[1]);
1762 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1764 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1765 except that we can't use that function because it is static. */
1766 rtx new_rtx = change_address (operands[0], mode, 0);
1767 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1768 operands[0] = new_rtx;
1771 /* This case can happen while generating code to move the result
1772 of a library call to the target. Reject `st r0,@(rX,rY)' because
1773 reload will fail to find a spill register for rX, since r0 is already
1774 being used for the source. */
1775 else if (TARGET_SH1
1776 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1777 && MEM_P (operands[0])
1778 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1779 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1780 operands[1] = copy_to_mode_reg (mode, operands[1]);
1782 /* When the displacement addressing is used, RA will assign r0 to
1783 the pseudo register operand for the QI/HImode load/store.
1784 This tends to make a long live range for R0 and might cause
1785 anomalous register spills in some case with LRA. See PR
1786 target/55212.
1787 We split possible load/store to two move insns via r0 so as to
1788 shorten R0 live range. It will make some codes worse but will
1789 win on avarage for LRA. */
1790 else if (sh_lra_p ()
1791 && TARGET_SH1 && ! TARGET_SH2A
1792 && (mode == QImode || mode == HImode)
1793 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1794 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1796 bool load_p = REG_P (operands[0]);
1797 rtx reg = operands[load_p ? 0 : 1];
1798 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1800 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1801 && GET_CODE (adr) == PLUS
1802 && REG_P (XEXP (adr, 0))
1803 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1804 && CONST_INT_P (XEXP (adr, 1))
1805 && INTVAL (XEXP (adr, 1)) != 0
1806 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1808 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1809 emit_move_insn (r0_rtx, operands[1]);
1810 operands[1] = r0_rtx;
1815 if (mode == Pmode || mode == ptr_mode)
1817 rtx op0, op1, opc;
1818 enum tls_model tls_kind;
1820 op0 = operands[0];
1821 op1 = operands[1];
1822 if (GET_CODE (op1) == CONST
1823 && GET_CODE (XEXP (op1, 0)) == PLUS
1824 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1825 != TLS_MODEL_NONE))
1827 opc = XEXP (XEXP (op1, 0), 1);
1828 op1 = XEXP (XEXP (op1, 0), 0);
1830 else
1831 opc = NULL_RTX;
1833 if (! reload_in_progress && ! reload_completed
1834 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1836 rtx tga_op1, tga_ret, tmp, tmp2;
1838 if (! flag_pic
1839 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1840 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1841 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1843 /* Don't schedule insns for getting GOT address when
1844 the first scheduling is enabled, to avoid spill
1845 failures for R0. */
1846 if (flag_schedule_insns)
1847 emit_insn (gen_blockage ());
1848 emit_insn (gen_GOTaddr2picreg ());
1849 emit_use (gen_rtx_REG (SImode, PIC_REG));
1850 if (flag_schedule_insns)
1851 emit_insn (gen_blockage ());
1854 switch (tls_kind)
1856 case TLS_MODEL_GLOBAL_DYNAMIC:
1857 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1858 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1859 tmp = gen_reg_rtx (Pmode);
1860 emit_move_insn (tmp, tga_ret);
1861 op1 = tmp;
1862 break;
1864 case TLS_MODEL_LOCAL_DYNAMIC:
1865 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1866 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1868 tmp = gen_reg_rtx (Pmode);
1869 emit_move_insn (tmp, tga_ret);
1871 if (register_operand (op0, Pmode))
1872 tmp2 = op0;
1873 else
1874 tmp2 = gen_reg_rtx (Pmode);
1876 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1877 op1 = tmp2;
1878 break;
1880 case TLS_MODEL_INITIAL_EXEC:
1881 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1882 tmp = gen_sym2GOTTPOFF (op1);
1883 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1884 op1 = tga_op1;
1885 break;
1887 case TLS_MODEL_LOCAL_EXEC:
1888 tmp2 = gen_reg_rtx (Pmode);
1889 emit_insn (gen_store_gbr (tmp2));
1890 tmp = gen_reg_rtx (Pmode);
1891 emit_insn (gen_symTPOFF2reg (tmp, op1));
1893 if (register_operand (op0, Pmode))
1894 op1 = op0;
1895 else
1896 op1 = gen_reg_rtx (Pmode);
1898 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1899 break;
1901 default:
1902 gcc_unreachable ();
1904 if (opc)
1905 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1906 operands[1] = op1;
1911 /* Implement the canonicalize_comparison target hook for the combine
1912 pass. For the target hook this function is invoked via
1913 sh_canonicalize_comparison. This function is also re-used to
1914 canonicalize comparisons in cbranch pattern expanders. */
1915 static void
1916 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1917 machine_mode mode,
1918 bool op0_preserve_value)
1920 /* When invoked from within the combine pass the mode is not specified,
1921 so try to get it from one of the operands. */
1922 if (mode == VOIDmode)
1923 mode = GET_MODE (op0);
1924 if (mode == VOIDmode)
1925 mode = GET_MODE (op1);
1927 // We need to have a mode to do something useful here.
1928 if (mode == VOIDmode)
1929 return;
1931 // Currently, we don't deal with floats here.
1932 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1933 return;
1935 // Make sure that the constant operand is the second operand.
1936 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1938 if (op0_preserve_value)
1939 return;
1941 std::swap (op0, op1);
1942 cmp = swap_condition (cmp);
1945 if (CONST_INT_P (op1))
1947 /* Try to adjust the constant operand in such a way that available
1948 comparison insns can be utilized better and the constant can be
1949 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1950 constant pool. */
1951 const HOST_WIDE_INT val = INTVAL (op1);
1953 /* x > -1 --> x >= 0
1954 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1955 x <= -1 --> x < 0
1956 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1957 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1959 cmp = cmp == GT ? GE : LT;
1960 op1 = gen_int_mode (val + 1, mode);
1963 /* x >= 1 --> x > 0
1964 x >= 0x80 --> x > 0x7F
1965 x < 1 --> x <= 0
1966 x < 0x80 --> x <= 0x7F */
1967 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1969 cmp = cmp == GE ? GT : LE;
1970 op1 = gen_int_mode (val - 1, mode);
1973 /* unsigned x >= 1 --> x != 0
1974 unsigned x < 1 --> x == 0 */
1975 else if (val == 1 && (cmp == GEU || cmp == LTU))
1977 cmp = cmp == GEU ? NE : EQ;
1978 op1 = CONST0_RTX (mode);
1981 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1982 unsigned x < 0x80 --> unsigned x < 0x7F */
1983 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1985 cmp = cmp == GEU ? GTU : LEU;
1986 op1 = gen_int_mode (val - 1, mode);
1989 /* unsigned x > 0 --> x != 0
1990 unsigned x <= 0 --> x == 0 */
1991 else if (val == 0 && (cmp == GTU || cmp == LEU))
1992 cmp = cmp == GTU ? NE : EQ;
1994 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1995 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1996 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1997 && val == 0x7FFFFFFF)
1999 cmp = cmp == GTU ? LT : GE;
2000 op1 = const0_rtx;
2003 /* unsigned x >= 0x80000000 --> signed x < 0
2004 unsigned x < 0x80000000 --> signed x >= 0 */
2005 else if (mode == SImode && (cmp == GEU || cmp == LTU)
2006 && (unsigned HOST_WIDE_INT)val
2007 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
2009 cmp = cmp == GEU ? LT : GE;
2010 op1 = const0_rtx;
2015 /* This function implements the canonicalize_comparison target hook.
2016 This wrapper around the internally used sh_canonicalize_comparison
2017 function is needed to do the enum rtx_code <-> int conversion.
2018 Target hooks cannot use enum rtx_code in its definition. */
2019 static void
2020 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
2021 bool op0_preserve_value)
2023 enum rtx_code tmp_code = (enum rtx_code)*code;
2024 sh_canonicalize_comparison (tmp_code, *op0, *op1,
2025 VOIDmode, op0_preserve_value);
2026 *code = (int)tmp_code;
2029 bool
2030 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
2032 *p1 = T_REG;
2033 *p2 = INVALID_REGNUM;
2034 return true;
2037 enum rtx_code
2038 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2039 enum rtx_code comparison)
2041 /* The scratch reg is only available when this is invoked from within
2042 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2043 rtx scratch = NULL_RTX;
2045 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2046 comparison = GET_CODE (operands[0]);
2047 else
2048 scratch = operands[4];
2050 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2051 mode, false);
2053 /* Notice that this function is also invoked after reload by
2054 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2055 rtx op1 = operands[1];
2057 if (can_create_pseudo_p ())
2058 operands[1] = force_reg (mode, op1);
2059 /* When we are handling DImode comparisons, we want to keep constants so
2060 that we can optimize the component comparisons; however, memory loads
2061 are better issued as a whole so that they can be scheduled well.
2062 SImode equality comparisons allow I08 constants, but only when they
2063 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2064 into a register, that register might as well be r0, and we allow the
2065 constant. If it is already in a register, this is likely to be
2066 allocated to a different hard register, thus we load the constant into
2067 a register unless it is zero. */
2068 if (!REG_P (operands[2])
2069 && (!CONST_INT_P (operands[2])
2070 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2071 && ((comparison != EQ && comparison != NE)
2072 || (REG_P (op1) && REGNO (op1) != R0_REG)
2073 || !satisfies_constraint_I08 (operands[2])))))
2075 if (scratch && GET_MODE (scratch) == mode)
2077 emit_move_insn (scratch, operands[2]);
2078 operands[2] = scratch;
2080 else if (can_create_pseudo_p ())
2081 operands[2] = force_reg (mode, operands[2]);
2083 return comparison;
2086 void
2087 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2089 rtx (*branch_expander) (rtx) = gen_branch_true;
2090 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2091 switch (comparison)
2093 case NE: case LT: case LE: case LTU: case LEU:
2094 comparison = reverse_condition (comparison);
2095 branch_expander = gen_branch_false;
2096 default: ;
2098 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
2099 gen_rtx_fmt_ee (comparison, SImode,
2100 operands[1], operands[2])));
2101 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2102 if (probability >= 0)
2103 add_int_reg_note (jump, REG_BR_PROB, probability);
2106 /* ??? How should we distribute probabilities when more than one branch
2107 is generated. So far we only have some ad-hoc observations:
2108 - If the operands are random, they are likely to differ in both parts.
2109 - If comparing items in a hash chain, the operands are random or equal;
2110 operation should be EQ or NE.
2111 - If items are searched in an ordered tree from the root, we can expect
2112 the highpart to be unequal about half of the time; operation should be
2113 an inequality comparison, operands non-constant, and overall probability
2114 about 50%. Likewise for quicksort.
2115 - Range checks will be often made against constants. Even if we assume for
2116 simplicity an even distribution of the non-constant operand over a
2117 sub-range here, the same probability could be generated with differently
2118 wide sub-ranges - as long as the ratio of the part of the subrange that
2119 is before the threshold to the part that comes after the threshold stays
2120 the same. Thus, we can't really tell anything here;
2121 assuming random distribution is at least simple.
2123 bool
2124 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2126 enum rtx_code msw_taken, msw_skip, lsw_taken;
2127 rtx_code_label *skip_label = NULL;
2128 rtx op1h, op1l, op2h, op2l;
2129 int num_branches;
2130 int prob, rev_prob;
2131 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2132 rtx scratch = operands[4];
2134 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2135 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2136 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2137 op1l = gen_lowpart (SImode, operands[1]);
2138 op2l = gen_lowpart (SImode, operands[2]);
2139 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2140 prob = split_branch_probability;
2141 rev_prob = REG_BR_PROB_BASE - prob;
2142 switch (comparison)
2144 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2145 That costs 1 cycle more when the first branch can be predicted taken,
2146 but saves us mispredicts because only one branch needs prediction.
2147 It also enables generating the cmpeqdi_t-1 pattern. */
2148 case EQ:
2149 if (TARGET_CMPEQDI_T)
2151 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2152 emit_jump_insn (gen_branch_true (operands[3]));
2153 return true;
2155 msw_skip = NE;
2156 lsw_taken = EQ;
2157 if (prob >= 0)
2159 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2160 msw_skip_prob = rev_prob;
2161 if (REG_BR_PROB_BASE <= 65535)
2162 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2163 else
2165 lsw_taken_prob
2166 = (prob
2167 ? (REG_BR_PROB_BASE
2168 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2169 / ((gcov_type) prob << 32)))
2170 : 0);
2173 break;
2174 case NE:
2175 if (TARGET_CMPEQDI_T)
2177 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2178 emit_jump_insn (gen_branch_false (operands[3]));
2179 return true;
2181 msw_taken = NE;
2182 msw_taken_prob = prob;
2183 lsw_taken = NE;
2184 lsw_taken_prob = 0;
2185 break;
2186 case GTU: case GT:
2187 msw_taken = comparison;
2188 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2189 break;
2190 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2191 msw_skip = swap_condition (msw_taken);
2192 lsw_taken = GTU;
2193 break;
2194 case GEU: case GE:
2195 if (op2l == CONST0_RTX (SImode))
2196 msw_taken = comparison;
2197 else
2199 msw_taken = comparison == GE ? GT : GTU;
2200 msw_skip = swap_condition (msw_taken);
2201 lsw_taken = GEU;
2203 break;
2204 case LTU: case LT:
2205 msw_taken = comparison;
2206 if (op2l == CONST0_RTX (SImode))
2207 break;
2208 msw_skip = swap_condition (msw_taken);
2209 lsw_taken = LTU;
2210 break;
2211 case LEU: case LE:
2212 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2213 msw_taken = comparison;
2214 else
2216 lsw_taken = LEU;
2217 if (comparison == LE)
2218 msw_taken = LT;
2219 else if (op2h != CONST0_RTX (SImode))
2220 msw_taken = LTU;
2221 else
2223 msw_skip = swap_condition (LTU);
2224 break;
2226 msw_skip = swap_condition (msw_taken);
2228 break;
2229 default: return false;
2231 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2232 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2233 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2234 if (comparison != EQ && comparison != NE && num_branches > 1)
2236 if (!CONSTANT_P (operands[2])
2237 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2238 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2240 msw_taken_prob = prob / 2U;
2241 msw_skip_prob
2242 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2243 lsw_taken_prob = prob;
2245 else
2247 msw_taken_prob = prob;
2248 msw_skip_prob = REG_BR_PROB_BASE;
2249 /* ??? If we have a constant op2h, should we use that when
2250 calculating lsw_taken_prob? */
2251 lsw_taken_prob = prob;
2254 operands[1] = op1h;
2255 operands[2] = op2h;
2256 operands[4] = NULL_RTX;
2257 if (reload_completed
2258 && ! arith_reg_or_0_operand (op2h, SImode)
2259 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2260 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2261 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2263 emit_move_insn (scratch, operands[2]);
2264 operands[2] = scratch;
2266 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2267 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2268 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2270 rtx taken_label = operands[3];
2272 /* Operands were possibly modified, but msw_skip doesn't expect this.
2273 Always use the original ones. */
2274 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2276 operands[1] = op1h;
2277 operands[2] = op2h;
2278 if (reload_completed
2279 && ! arith_reg_or_0_operand (op2h, SImode)
2280 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2282 emit_move_insn (scratch, operands[2]);
2283 operands[2] = scratch;
2287 operands[3] = skip_label = gen_label_rtx ();
2288 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2289 operands[3] = taken_label;
2291 operands[1] = op1l;
2292 operands[2] = op2l;
2293 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2295 if (reload_completed
2296 && ! arith_reg_or_0_operand (op2l, SImode)
2297 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2299 emit_move_insn (scratch, operands[2]);
2300 operands[2] = scratch;
2302 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2304 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2305 emit_label (skip_label);
2306 return true;
2309 /* Given an operand, return 1 if the evaluated operand plugged into an
2310 if_then_else will result in a branch_true, 0 if branch_false, or
2311 -1 if neither nor applies. The truth table goes like this:
2313 op | cmpval | code | result
2314 ---------+--------+---------+--------------------
2315 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2316 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2317 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2318 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2319 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2320 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2321 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2322 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2324 sh_eval_treg_value (rtx op)
2326 if (t_reg_operand (op, GET_MODE (op)))
2327 return 1;
2328 if (negt_reg_operand (op, GET_MODE (op)))
2329 return 0;
2331 rtx_code code = GET_CODE (op);
2332 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2333 return -1;
2335 int cmpop = code == EQ ? 1 : 0;
2336 int cmpval = INTVAL (XEXP (op, 1));
2337 if (cmpval != 0 && cmpval != 1)
2338 return -1;
2340 int t;
2341 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2342 t = 0;
2343 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2344 t = 1;
2345 else
2346 return -1;
2348 return t ^ (cmpval == cmpop);
2351 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2352 of floating-point comparisons. */
2353 static void
2354 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2356 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2357 && GET_CODE (insn) != PARALLEL)
2359 insn = gen_rtx_PARALLEL (VOIDmode,
2360 gen_rtvec (3, insn,
2361 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2362 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2364 emit_insn (insn);
2367 /* Prepare the operands for an scc instruction; make sure that the
2368 compare has been done and the result is in T_REG. */
2369 void
2370 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2372 rtx t_reg = get_t_reg_rtx ();
2373 enum rtx_code oldcode = code;
2374 machine_mode mode;
2376 /* First need a compare insn. */
2377 switch (code)
2379 case NE:
2380 /* It isn't possible to handle this case. */
2381 gcc_unreachable ();
2382 case LT:
2383 code = GT;
2384 break;
2385 case LE:
2386 code = GE;
2387 break;
2388 case LTU:
2389 code = GTU;
2390 break;
2391 case LEU:
2392 code = GEU;
2393 break;
2394 default:
2395 break;
2397 if (code != oldcode)
2398 std::swap (op0, op1);
2400 mode = GET_MODE (op0);
2401 if (mode == VOIDmode)
2402 mode = GET_MODE (op1);
2404 op0 = force_reg (mode, op0);
2405 if ((code != EQ && code != NE
2406 && (op1 != const0_rtx
2407 || code == GTU || code == GEU || code == LTU || code == LEU))
2408 || (mode == DImode && op1 != const0_rtx)
2409 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2410 op1 = force_reg (mode, op1);
2412 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2413 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2414 mode);
2418 sh_emit_cheap_store_flag (machine_mode mode, enum rtx_code code,
2419 rtx op0, rtx op1)
2421 rtx target = gen_reg_rtx (SImode);
2422 rtx tmp;
2424 gcc_assert (TARGET_SHMEDIA);
2425 switch (code)
2427 case EQ:
2428 case GT:
2429 case LT:
2430 case UNORDERED:
2431 case GTU:
2432 case LTU:
2433 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2434 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2435 code = NE;
2436 break;
2438 case NE:
2439 case GE:
2440 case LE:
2441 case ORDERED:
2442 case GEU:
2443 case LEU:
2444 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2445 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2446 code = EQ;
2447 break;
2449 case UNEQ:
2450 case UNGE:
2451 case UNGT:
2452 case UNLE:
2453 case UNLT:
2454 case LTGT:
2455 return NULL_RTX;
2457 default:
2458 gcc_unreachable ();
2461 if (mode == DImode)
2463 rtx t2 = gen_reg_rtx (DImode);
2464 emit_insn (gen_extendsidi2 (t2, target));
2465 target = t2;
2468 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2471 /* Called from the md file, set up the operands of a compare instruction. */
2472 void
2473 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2475 enum rtx_code code = GET_CODE (operands[0]);
2476 enum rtx_code branch_code;
2477 rtx op0 = operands[1];
2478 rtx op1 = operands[2];
2479 rtx insn;
2480 bool need_ccmpeq = false;
2482 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2484 op0 = force_reg (mode, op0);
2485 op1 = force_reg (mode, op1);
2487 else
2489 if (code != EQ || mode == DImode)
2491 /* Force args into regs, since we can't use constants here. */
2492 op0 = force_reg (mode, op0);
2493 if (op1 != const0_rtx || code == GTU || code == GEU)
2494 op1 = force_reg (mode, op1);
2498 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2500 if (code == LT
2501 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2502 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2504 std::swap (op0, op1);
2505 code = swap_condition (code);
2508 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2509 if (code == GE)
2511 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2512 need_ccmpeq = true;
2513 code = GT;
2516 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2517 to EQ/GT respectively. */
2518 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2521 switch (code)
2523 case EQ:
2524 case GT:
2525 case GE:
2526 case GTU:
2527 case GEU:
2528 branch_code = code;
2529 break;
2530 case NE:
2531 case LT:
2532 case LE:
2533 case LTU:
2534 case LEU:
2535 branch_code = reverse_condition (code);
2536 break;
2537 default:
2538 gcc_unreachable ();
2541 insn = gen_rtx_SET (VOIDmode,
2542 get_t_reg_rtx (),
2543 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2545 sh_emit_set_t_insn (insn, mode);
2546 if (need_ccmpeq)
2547 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2549 if (branch_code == code)
2550 emit_jump_insn (gen_branch_true (operands[3]));
2551 else
2552 emit_jump_insn (gen_branch_false (operands[3]));
2555 void
2556 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2558 enum rtx_code code = GET_CODE (operands[1]);
2559 rtx op0 = operands[2];
2560 rtx op1 = operands[3];
2561 rtx_code_label *lab = NULL;
2562 bool invert = false;
2564 op0 = force_reg (mode, op0);
2565 if ((code != EQ && code != NE
2566 && (op1 != const0_rtx
2567 || code == GTU || code == GEU || code == LTU || code == LEU))
2568 || (mode == DImode && op1 != const0_rtx)
2569 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2570 op1 = force_reg (mode, op1);
2572 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2574 if (code == LT || code == LE)
2576 std::swap (op0, op1);
2577 code = swap_condition (code);
2579 if (code == GE)
2581 if (TARGET_IEEE)
2583 lab = gen_label_rtx ();
2584 sh_emit_scc_to_t (EQ, op0, op1);
2585 emit_jump_insn (gen_branch_true (lab));
2586 code = GT;
2588 else
2590 code = LT;
2591 invert = true;
2596 if (code == NE)
2598 code = EQ;
2599 invert = true;
2602 sh_emit_scc_to_t (code, op0, op1);
2603 if (lab)
2604 emit_label (lab);
2605 if (invert)
2606 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2607 else
2608 emit_move_insn (operands[0], get_t_reg_rtx ());
2611 /* Functions to output assembly code. */
2613 /* Return a sequence of instructions to perform DI or DF move.
2615 Since the SH cannot move a DI or DF in one instruction, we have
2616 to take care when we see overlapping source and dest registers. */
2617 const char *
2618 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2619 machine_mode mode)
2621 rtx dst = operands[0];
2622 rtx src = operands[1];
2624 if (MEM_P (dst)
2625 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2626 return "mov.l %T1,%0" "\n"
2627 " mov.l %1,%0";
2629 if (register_operand (dst, mode)
2630 && register_operand (src, mode))
2632 if (REGNO (src) == MACH_REG)
2633 return "sts mach,%S0" "\n"
2634 " sts macl,%R0";
2636 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2637 when mov.d r1,r0 do r1->r0 then r2->r1. */
2638 if (REGNO (src) + 1 == REGNO (dst))
2639 return "mov %T1,%T0" "\n"
2640 " mov %1,%0";
2641 else
2642 return "mov %1,%0" "\n"
2643 " mov %T1,%T0";
2645 else if (CONST_INT_P (src))
2647 if (INTVAL (src) < 0)
2648 output_asm_insn ("mov #-1,%S0", operands);
2649 else
2650 output_asm_insn ("mov #0,%S0", operands);
2652 return "mov %1,%R0";
2654 else if (MEM_P (src))
2656 int ptrreg = -1;
2657 int dreg = REGNO (dst);
2658 rtx inside = XEXP (src, 0);
2660 switch (GET_CODE (inside))
2662 case REG:
2663 ptrreg = REGNO (inside);
2664 break;
2666 case SUBREG:
2667 ptrreg = subreg_regno (inside);
2668 break;
2670 case PLUS:
2671 ptrreg = REGNO (XEXP (inside, 0));
2672 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2673 an offsettable address. Unfortunately, offsettable addresses use
2674 QImode to check the offset, and a QImode offsettable address
2675 requires r0 for the other operand, which is not currently
2676 supported, so we can't use the 'o' constraint.
2677 Thus we must check for and handle r0+REG addresses here.
2678 We punt for now, since this is likely very rare. */
2679 gcc_assert (!REG_P (XEXP (inside, 1)));
2680 break;
2682 case LABEL_REF:
2683 return "mov.l %1,%0" "\n"
2684 " mov.l %1+4,%T0";
2685 case POST_INC:
2686 return "mov.l %1,%0" "\n"
2687 " mov.l %1,%T0";
2688 default:
2689 gcc_unreachable ();
2692 /* Work out the safe way to copy. Copy into the second half first. */
2693 if (dreg == ptrreg)
2694 return "mov.l %T1,%T0" "\n"
2695 " mov.l %1,%0";
2698 return "mov.l %1,%0" "\n"
2699 " mov.l %T1,%T0";
2702 /* Print an instruction which would have gone into a delay slot after
2703 another instruction, but couldn't because the other instruction expanded
2704 into a sequence where putting the slot insn at the end wouldn't work. */
2705 static void
2706 print_slot (rtx_sequence *seq)
2708 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2710 seq->insn (1)->set_deleted ();
2713 const char *
2714 output_far_jump (rtx_insn *insn, rtx op)
2716 struct { rtx lab, reg, op; } this_jmp;
2717 rtx_code_label *braf_base_lab = NULL;
2718 const char *jump;
2719 int far;
2720 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2721 rtx_insn *prev;
2723 this_jmp.lab = gen_label_rtx ();
2725 if (TARGET_SH2
2726 && offset >= -32764
2727 && offset - get_attr_length (insn) <= 32766)
2729 far = 0;
2730 jump = "mov.w %O0,%1" "\n"
2731 " braf %1";
2733 else
2735 far = 1;
2736 if (flag_pic)
2738 if (TARGET_SH2)
2739 jump = "mov.l %O0,%1" "\n"
2740 " braf %1";
2741 else
2742 jump = "mov.l r0,@-r15" "\n"
2743 " mova %O0,r0" "\n"
2744 " mov.l @r0,%1" "\n"
2745 " add r0,%1" "\n"
2746 " mov.l @r15+,r0" "\n"
2747 " jmp @%1";
2749 else
2750 jump = "mov.l %O0,%1" "\n"
2751 " jmp @%1";
2753 /* If we have a scratch register available, use it. */
2754 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2755 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2757 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2758 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2759 jump = "mov.l r1,@-r15" "\n"
2760 " mova %O0,r0" "\n"
2761 " mov.l @r0,r1" "\n"
2762 " add r1,r0" "\n"
2763 " mov.l @r15+,r1" "\n"
2764 " jmp @%1";
2765 output_asm_insn (jump, &this_jmp.lab);
2766 if (dbr_sequence_length ())
2767 print_slot (final_sequence);
2768 else
2769 output_asm_insn ("nop", 0);
2771 else
2773 /* Output the delay slot insn first if any. */
2774 if (dbr_sequence_length ())
2775 print_slot (final_sequence);
2777 this_jmp.reg = gen_rtx_REG (SImode, 13);
2778 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2779 Fortunately, MACL is fixed and call-clobbered, and we never
2780 need its value across jumps, so save r13 in it instead of in
2781 the stack. */
2782 if (TARGET_SH5)
2783 output_asm_insn ("lds r13,macl", 0);
2784 else
2785 output_asm_insn ("mov.l r13,@-r15", 0);
2786 output_asm_insn (jump, &this_jmp.lab);
2787 if (TARGET_SH5)
2788 output_asm_insn ("sts macl,r13", 0);
2789 else
2790 output_asm_insn ("mov.l @r15+,r13", 0);
2792 if (far && flag_pic && TARGET_SH2)
2794 braf_base_lab = gen_label_rtx ();
2795 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2796 CODE_LABEL_NUMBER (braf_base_lab));
2798 if (far)
2799 output_asm_insn (".align 2", 0);
2800 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2801 this_jmp.op = op;
2802 if (far && flag_pic)
2804 if (TARGET_SH2)
2805 this_jmp.lab = braf_base_lab;
2806 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2808 else
2809 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2810 return "";
2813 /* Local label counter, used for constants in the pool and inside
2814 pattern branches. */
2815 static int lf = 100;
2817 /* Output code for ordinary branches. */
2818 const char *
2819 output_branch (int logic, rtx_insn *insn, rtx *operands)
2821 switch (get_attr_length (insn))
2823 case 6:
2824 /* This can happen if filling the delay slot has caused a forward
2825 branch to exceed its range (we could reverse it, but only
2826 when we know we won't overextend other branches; this should
2827 best be handled by relaxation).
2828 It can also happen when other condbranches hoist delay slot insn
2829 from their destination, thus leading to code size increase.
2830 But the branch will still be in the range -4092..+4098 bytes. */
2831 if (! TARGET_RELAX)
2833 int label = lf++;
2834 /* The call to print_slot will clobber the operands. */
2835 rtx op0 = operands[0];
2837 /* If the instruction in the delay slot is annulled (true), then
2838 there is no delay slot where we can put it now. The only safe
2839 place for it is after the label. final will do that by default. */
2841 if (final_sequence
2842 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2843 && get_attr_length (final_sequence->insn (1)))
2845 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2846 ASSEMBLER_DIALECT ? "/" : ".", label);
2847 print_slot (final_sequence);
2849 else
2850 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2852 output_asm_insn ("bra\t%l0", &op0);
2853 fprintf (asm_out_file, "\tnop\n");
2854 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2856 return "";
2858 /* When relaxing, handle this like a short branch. The linker
2859 will fix it up if it still doesn't fit after relaxation. */
2860 case 2:
2861 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2863 /* These are for SH2e, in which we have to account for the
2864 extra nop because of the hardware bug in annulled branches. */
2865 case 8:
2866 if (! TARGET_RELAX)
2868 int label = lf++;
2870 gcc_assert (!final_sequence
2871 || !(INSN_ANNULLED_BRANCH_P
2872 (XVECEXP (final_sequence, 0, 0))));
2873 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2874 logic ? "f" : "t",
2875 ASSEMBLER_DIALECT ? "/" : ".", label);
2876 fprintf (asm_out_file, "\tnop\n");
2877 output_asm_insn ("bra\t%l0", operands);
2878 fprintf (asm_out_file, "\tnop\n");
2879 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2881 return "";
2883 /* When relaxing, fall through. */
2884 case 4:
2886 char buffer[10];
2888 sprintf (buffer, "b%s%ss\t%%l0",
2889 logic ? "t" : "f",
2890 ASSEMBLER_DIALECT ? "/" : ".");
2891 output_asm_insn (buffer, &operands[0]);
2892 return "nop";
2895 default:
2896 /* There should be no longer branches now - that would
2897 indicate that something has destroyed the branches set
2898 up in machine_dependent_reorg. */
2899 gcc_unreachable ();
2903 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2904 fill in operands 9 as a label to the successor insn.
2905 We try to use jump threading where possible.
2906 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2907 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2908 follow jmp and bt, if the address is in range. */
2909 const char *
2910 output_branchy_insn (enum rtx_code code, const char *templ,
2911 rtx_insn *insn, rtx *operands)
2913 rtx_insn *next_insn = NEXT_INSN (insn);
2915 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2917 rtx src = SET_SRC (PATTERN (next_insn));
2918 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2920 /* Following branch not taken */
2921 rtx_code_label *lab = gen_label_rtx ();
2922 emit_label_after (lab, next_insn);
2923 INSN_ADDRESSES_NEW (lab,
2924 INSN_ADDRESSES (INSN_UID (next_insn))
2925 + get_attr_length (next_insn));
2926 operands[9] = lab;
2927 return templ;
2929 else
2931 int offset = (branch_dest (next_insn)
2932 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2933 if (offset >= -252 && offset <= 258)
2935 if (GET_CODE (src) == IF_THEN_ELSE)
2936 /* branch_true */
2937 src = XEXP (src, 1);
2938 operands[9] = src;
2939 return templ;
2943 rtx_code_label *lab = gen_label_rtx ();
2944 emit_label_after (lab, insn);
2945 INSN_ADDRESSES_NEW (lab,
2946 INSN_ADDRESSES (INSN_UID (insn))
2947 + get_attr_length (insn));
2948 operands[9] = lab;
2949 return templ;
2952 const char *
2953 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2955 return output_branchy_insn (NE, "bt %l9" "\n"
2956 " fcmp/eq %1,%0",
2957 insn, operands);
2960 /* Output the start of the assembler file. */
2961 static void
2962 sh_file_start (void)
2964 default_file_start ();
2966 if (TARGET_ELF)
2967 /* We need to show the text section with the proper
2968 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2969 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2970 will complain. We can teach GAS specifically about the
2971 default attributes for our choice of text section, but
2972 then we would have to change GAS again if/when we change
2973 the text section name. */
2974 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2975 else
2976 /* Switch to the data section so that the coffsem symbol
2977 isn't in the text section. */
2978 switch_to_section (data_section);
2980 if (TARGET_LITTLE_ENDIAN)
2981 fputs ("\t.little\n", asm_out_file);
2983 if (!TARGET_ELF)
2985 if (TARGET_SHCOMPACT)
2986 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2987 else if (TARGET_SHMEDIA)
2988 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2989 TARGET_SHMEDIA64 ? 64 : 32);
2993 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2994 static bool
2995 unspec_caller_rtx_p (rtx pat)
2997 rtx base, offset;
2998 int i;
3000 split_const (pat, &base, &offset);
3001 if (GET_CODE (base) == UNSPEC)
3003 if (XINT (base, 1) == UNSPEC_CALLER)
3004 return true;
3005 for (i = 0; i < XVECLEN (base, 0); i++)
3006 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
3007 return true;
3009 return false;
3012 /* Indicate that INSN cannot be duplicated. This is true for insn
3013 that generates a unique label. */
3014 static bool
3015 sh_cannot_copy_insn_p (rtx_insn *insn)
3017 rtx pat;
3019 if (!reload_completed || !flag_pic)
3020 return false;
3022 if (!NONJUMP_INSN_P (insn))
3023 return false;
3024 if (asm_noperands (insn) >= 0)
3025 return false;
3027 pat = PATTERN (insn);
3028 if (GET_CODE (pat) != SET)
3029 return false;
3030 pat = SET_SRC (pat);
3032 if (unspec_caller_rtx_p (pat))
3033 return true;
3035 return false;
3038 /* Number of instructions used to make an arithmetic right shift by N. */
3039 static const char ashiftrt_insns[] =
3040 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3042 /* Description of a logical left or right shift, when expanded to a sequence
3043 of 1/2/8/16 shifts.
3044 Notice that one bit right shifts clobber the T bit. One bit left shifts
3045 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3046 enum
3048 ASHL_CLOBBERS_T = 1 << 0,
3049 LSHR_CLOBBERS_T = 1 << 1
3052 struct ashl_lshr_sequence
3054 char insn_count;
3055 signed char amount[6];
3056 char clobbers_t;
3059 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3061 { 0, { 0 }, 0 }, // 0
3062 { 1, { 1 }, LSHR_CLOBBERS_T },
3063 { 1, { 2 }, 0 },
3064 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3065 { 2, { 2, 2 }, 0 }, // 4
3066 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3067 { 3, { 2, 2, 2 }, 0 },
3068 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3069 { 1, { 8 }, 0 }, // 8
3070 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3071 { 2, { 8, 2 }, 0 },
3072 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3073 { 3, { 8, 2, 2 }, 0 }, // 12
3074 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3075 { 3, { 8, -2, 8 }, 0 },
3076 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3077 { 1, { 16 }, 0 }, // 16
3078 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3079 { 2, { 16, 2 }, 0 },
3080 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3081 { 3, { 16, 2, 2 }, 0 }, // 20
3082 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3083 { 3, { 16, -2, 8 }, 0 },
3084 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3085 { 2, { 16, 8 }, 0 }, // 24
3086 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3087 { 3, { 16, 8, 2 }, 0 },
3088 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3089 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3090 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3091 { 3, { 16, -2, 16 }, 0 },
3093 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3094 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3095 However, the shift-and combiner code needs this entry here to be in
3096 terms of real shift insns. */
3097 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3100 /* Individual shift amounts for shift amounts < 16, up to three highmost
3101 bits might be clobbered. This is typically used when combined with some
3102 kind of sign or zero extension. */
3103 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3105 { 0, { 0 }, 0 }, // 0
3106 { 1, { 1 }, LSHR_CLOBBERS_T },
3107 { 1, { 2 }, 0 },
3108 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3109 { 2, { 2, 2 }, 0 }, // 4
3110 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3111 { 2, { 8, -2 }, 0 },
3112 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3113 { 1, { 8 }, 0 }, // 8
3114 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3115 { 2, { 8, 2 }, 0 },
3116 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3117 { 3, { 8, 2, 2 }, 0 }, // 12
3118 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3119 { 2, { 16, -2 }, 0 },
3120 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3121 { 1, { 16 }, 0 }, // 16
3122 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3123 { 2, { 16, 2 }, 0 },
3124 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3125 { 3, { 16, 2, 2 }, 0 }, // 20
3126 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3127 { 3, { 16, -2, 8 }, 0 },
3128 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3129 { 2, { 16, 8 }, 0 }, // 24
3130 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3131 { 3, { 16, 8, 2 }, 0 },
3132 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3133 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3134 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3135 { 3, { 16, -2, 16 }, 0 },
3136 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3139 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3140 will clobber the T bit. */
3141 bool
3142 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3144 gcc_assert (CONST_INT_P (shift_amount));
3146 const int shift_amount_i = INTVAL (shift_amount) & 31;
3148 /* Special case for shift count of 31: use and-rotl sequence. */
3149 if (shift_amount_i == 31)
3150 return true;
3152 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3153 & ASHL_CLOBBERS_T) != 0;
3156 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3157 instructions will clobber the T bit. */
3158 bool
3159 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3161 gcc_assert (CONST_INT_P (shift_amount));
3163 const int shift_amount_i = INTVAL (shift_amount) & 31;
3165 /* Special case for shift count of 31: use shll-movt sequence. */
3166 if (shift_amount_i == 31)
3167 return true;
3169 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3170 & LSHR_CLOBBERS_T) != 0;
3173 /* Return true if it is potentially beneficial to use a dynamic shift
3174 instruction (shad / shar) instead of a combination of 1/2/8/16
3175 shift instructions for the specified shift count.
3176 If dynamic shifts are not available, always return false. */
3177 bool
3178 sh_dynamicalize_shift_p (rtx count)
3180 gcc_assert (CONST_INT_P (count));
3182 const int shift_amount_i = INTVAL (count) & 31;
3183 int insn_count;
3185 /* For left and right shifts, there are shorter 2 insn sequences for
3186 shift amounts of 31. */
3187 if (shift_amount_i == 31)
3188 insn_count = 2;
3189 else
3190 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3192 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3195 /* Assuming we have a value that has been sign-extended by at least one bit,
3196 can we use the ext_shift_amounts with the last shift turned to an
3197 arithmetic shift to shift it by N without data loss, and quicker than by
3198 other means? */
3199 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3201 /* Return the cost of a shift. */
3202 static inline int
3203 shiftcosts (rtx x)
3205 int value;
3207 if (TARGET_SHMEDIA)
3208 return 1;
3210 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3212 if (GET_MODE (x) == DImode
3213 && CONST_INT_P (XEXP (x, 1))
3214 && INTVAL (XEXP (x, 1)) == 1)
3215 return 2;
3217 /* Everything else is invalid, because there is no pattern for it. */
3218 return -1;
3220 /* If shift by a non constant, then this will be expensive. */
3221 if (!CONST_INT_P (XEXP (x, 1)))
3222 return SH_DYNAMIC_SHIFT_COST;
3224 /* Otherwise, return the true cost in instructions. Cope with out of range
3225 shift counts more or less arbitrarily. */
3226 value = INTVAL (XEXP (x, 1)) & 31;
3228 if (GET_CODE (x) == ASHIFTRT)
3230 int cost = ashiftrt_insns[value];
3231 /* If dynamic shifts are available and profitable in this case, then we
3232 put the constant in a reg and use shad. */
3233 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3234 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3235 return cost;
3237 else
3238 return ashl_lshr_seq[value].insn_count;
3241 /* Return the cost of an AND/XOR/IOR operation. */
3242 static inline int
3243 and_xor_ior_costs (rtx x, int code)
3245 /* On SH1-4 we have only max. SImode operations.
3246 Double the cost for modes > SImode. */
3247 const int cost_scale = !TARGET_SHMEDIA
3248 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3249 ? 2 : 1;
3251 /* A logical operation with two registers is a single cycle
3252 instruction. */
3253 if (!CONST_INT_P (XEXP (x, 1)))
3254 return 1 * cost_scale;
3256 int i = INTVAL (XEXP (x, 1));
3258 if (TARGET_SHMEDIA)
3260 if (satisfies_constraint_I10 (XEXP (x, 1))
3261 || satisfies_constraint_J16 (XEXP (x, 1)))
3262 return 1;
3263 else
3264 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3267 /* These constants are single cycle extu.[bw] instructions. */
3268 if ((i == 0xff || i == 0xffff) && code == AND)
3269 return 1 * cost_scale;
3270 /* Constants that can be used in an instruction as an immediate are
3271 a single cycle, but this requires r0, so make it a little more
3272 expensive. */
3273 if (CONST_OK_FOR_K08 (i))
3274 return 2 * cost_scale;
3275 /* Constants that can be loaded with a mov immediate need one more cycle.
3276 This case is probably unnecessary. */
3277 if (CONST_OK_FOR_I08 (i))
3278 return 2 * cost_scale;
3279 /* Any other constant requires an additional 2 cycle pc-relative load.
3280 This case is probably unnecessary. */
3281 return 3 * cost_scale;
3284 /* Return the cost of an addition or a subtraction. */
3285 static inline int
3286 addsubcosts (rtx x)
3288 if (GET_MODE (x) == SImode)
3290 /* The addc or subc patterns will eventually become one or two
3291 instructions. Below are some costs for some of the patterns
3292 which combine would reject because the costs of the individual
3293 insns in the patterns are lower.
3295 FIXME: It would be much easier if we had something like insn cost
3296 attributes and the cost calculation machinery used those attributes
3297 in the first place. This would eliminate redundant recog-like C
3298 code to calculate costs of complex patterns. */
3299 rtx op0 = XEXP (x, 0);
3300 rtx op1 = XEXP (x, 1);
3302 if (GET_CODE (x) == PLUS)
3304 if (GET_CODE (op0) == AND
3305 && XEXP (op0, 1) == const1_rtx
3306 && (GET_CODE (op1) == PLUS
3307 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3308 return 1;
3310 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3311 && GET_CODE (op1) == LSHIFTRT
3312 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3313 return 1;
3317 /* On SH1-4 we have only max. SImode operations.
3318 Double the cost for modes > SImode. */
3319 const int cost_scale = !TARGET_SHMEDIA
3320 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3321 ? 2 : 1;
3323 /* Adding a register is a single cycle insn. */
3324 if (REG_P (XEXP (x, 1))
3325 || GET_CODE (XEXP (x, 1)) == SUBREG)
3326 return 1 * cost_scale;
3328 /* Likewise for small constants. */
3329 if (CONST_INT_P (XEXP (x, 1))
3330 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3331 return 1 * cost_scale;
3333 if (TARGET_SHMEDIA)
3334 switch (GET_CODE (XEXP (x, 1)))
3336 case CONST:
3337 case LABEL_REF:
3338 case SYMBOL_REF:
3339 return TARGET_SHMEDIA64 ? 5 : 3;
3341 case CONST_INT:
3342 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3343 return 2;
3344 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3345 return 3;
3346 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3347 return 4;
3349 /* Fall through. */
3350 default:
3351 return 5;
3354 /* Any other constant requires a 2 cycle pc-relative load plus an
3355 addition. */
3356 return 3 * cost_scale;
3359 /* Return the cost of a multiply. */
3360 static inline int
3361 multcosts (rtx x ATTRIBUTE_UNUSED)
3363 if (sh_multcost >= 0)
3364 return sh_multcost;
3365 if (TARGET_SHMEDIA)
3366 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3367 accept constants. Ideally, we would use a cost of one or two and
3368 add the cost of the operand, but disregard the latter when inside loops
3369 and loop invariant code motion is still to follow.
3370 Using a multiply first and splitting it later if it's a loss
3371 doesn't work because of different sign / zero extension semantics
3372 of multiplies vs. shifts. */
3373 return optimize_size ? 2 : 3;
3375 if (TARGET_SH2)
3377 /* We have a mul insn, so we can never take more than the mul and the
3378 read of the mac reg, but count more because of the latency and extra
3379 reg usage. */
3380 if (optimize_size)
3381 return 2;
3382 return 3;
3385 /* If we're aiming at small code, then just count the number of
3386 insns in a multiply call sequence. */
3387 if (optimize_size)
3388 return 5;
3390 /* Otherwise count all the insns in the routine we'd be calling too. */
3391 return 20;
3394 /* Compute a (partial) cost for rtx X. Return true if the complete
3395 cost has been computed, and false if subexpressions should be
3396 scanned. In either case, *TOTAL contains the cost result. */
3397 static bool
3398 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3399 int *total, bool speed ATTRIBUTE_UNUSED)
3401 switch (code)
3403 /* The lower-subreg pass decides whether to split multi-word regs
3404 into individual regs by looking at the cost for a SET of certain
3405 modes with the following patterns:
3406 (set (reg) (reg))
3407 (set (reg) (const_int 0))
3408 On machines that support vector-move operations a multi-word move
3409 is the same cost as individual reg move. On SH there is no
3410 vector-move, so we have to provide the correct cost in the number
3411 of move insns to load/store the reg of the mode in question. */
3412 case SET:
3413 if (register_operand (SET_DEST (x), VOIDmode)
3414 && (register_operand (SET_SRC (x), VOIDmode)
3415 || satisfies_constraint_Z (SET_SRC (x))))
3417 const machine_mode mode = GET_MODE (SET_DEST (x));
3418 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3419 / mov_insn_size (mode, TARGET_SH2A));
3420 return true;
3422 return false;
3424 /* The cost of a mem access is mainly the cost of the address mode. */
3425 case MEM:
3426 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3427 true);
3428 return true;
3430 /* The cost of a sign or zero extend depends on whether the source is a
3431 reg or a mem. In case of a mem take the address into acount. */
3432 case SIGN_EXTEND:
3433 if (REG_P (XEXP (x, 0)))
3435 *total = COSTS_N_INSNS (1);
3436 return true;
3438 if (MEM_P (XEXP (x, 0)))
3440 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3441 GET_MODE (XEXP (x, 0)),
3442 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3443 return true;
3445 return false;
3447 case ZERO_EXTEND:
3448 if (REG_P (XEXP (x, 0)))
3450 *total = COSTS_N_INSNS (1);
3451 return true;
3453 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3454 && (GET_MODE (XEXP (x, 0)) == QImode
3455 || GET_MODE (XEXP (x, 0)) == HImode))
3457 /* Handle SH2A's movu.b and movu.w insn. */
3458 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3459 GET_MODE (XEXP (x, 0)),
3460 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3461 return true;
3463 return false;
3465 /* mems for SFmode and DFmode can be inside a parallel due to
3466 the way the fpscr is handled. */
3467 case PARALLEL:
3468 for (int i = 0; i < XVECLEN (x, 0); i++)
3470 rtx xx = XVECEXP (x, 0, i);
3471 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3473 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3474 GET_MODE (XEXP (xx, 0)),
3475 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3476 return true;
3478 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3480 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3481 GET_MODE (XEXP (xx, 1)),
3482 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3483 return true;
3487 if (sh_1el_vec (x, VOIDmode))
3488 *total = outer_code != SET;
3489 else if (sh_rep_vec (x, VOIDmode))
3490 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3491 + (outer_code != SET));
3492 else
3493 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3494 return true;
3496 case CONST_INT:
3497 if (TARGET_SHMEDIA)
3499 if (INTVAL (x) == 0)
3500 *total = 0;
3501 else if (outer_code == AND && and_operand ((x), DImode))
3502 *total = 0;
3503 else if ((outer_code == IOR || outer_code == XOR
3504 || outer_code == PLUS)
3505 && CONST_OK_FOR_I10 (INTVAL (x)))
3506 *total = 0;
3507 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3508 *total = COSTS_N_INSNS (outer_code != SET);
3509 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3510 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3511 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3512 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3513 else
3514 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3515 return true;
3517 if (CONST_OK_FOR_I08 (INTVAL (x)))
3518 *total = 0;
3519 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3520 && CONST_OK_FOR_K08 (INTVAL (x)))
3521 *total = 1;
3522 /* prepare_cmp_insn will force costly constants int registers before
3523 the cbranch[sd]i4 patterns can see them, so preserve potentially
3524 interesting ones not covered by I08 above. */
3525 else if (outer_code == COMPARE
3526 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3527 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3528 || INTVAL (x) == 0x7fffffff
3529 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3530 *total = 1;
3531 else
3532 *total = 8;
3533 return true;
3535 case EQ:
3536 /* An and with a constant compared against zero is
3537 most likely going to be a TST #imm, R0 instruction.
3538 Notice that this does not catch the zero_extract variants from
3539 the md file. */
3540 if (GET_CODE (XEXP (x, 0)) == AND
3541 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3543 *total = 1;
3544 return true;
3546 else
3547 return false;
3549 case SMIN:
3550 case SMAX:
3551 /* This is most likely a clips.b or clips.w insn that is being made up
3552 by combine. */
3553 if (TARGET_SH2A
3554 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3555 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3556 && REG_P (XEXP (XEXP (x, 0), 0))
3557 && CONST_INT_P (XEXP (x, 1)))
3559 *total = COSTS_N_INSNS (1);
3560 return true;
3562 else
3563 return false;
3565 case CONST:
3566 case LABEL_REF:
3567 case SYMBOL_REF:
3568 if (TARGET_SHMEDIA64)
3569 *total = COSTS_N_INSNS (4);
3570 else if (TARGET_SHMEDIA32)
3571 *total = COSTS_N_INSNS (2);
3572 else
3573 *total = 5;
3574 return true;
3576 case CONST_DOUBLE:
3577 if (TARGET_SHMEDIA)
3578 *total = COSTS_N_INSNS (4);
3579 /* prepare_cmp_insn will force costly constants int registers before
3580 the cbranchdi4 pattern can see them, so preserve potentially
3581 interesting ones. */
3582 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3583 *total = 1;
3584 else
3585 *total = 10;
3586 return true;
3588 case CONST_VECTOR:
3589 /* FIXME: This looks broken. Only the last statement has any effect.
3590 Probably this could be folded with the PARALLEL case? */
3591 if (x == CONST0_RTX (GET_MODE (x)))
3592 *total = 0;
3593 else if (sh_1el_vec (x, VOIDmode))
3594 *total = outer_code != SET;
3595 if (sh_rep_vec (x, VOIDmode))
3596 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3597 + (outer_code != SET));
3598 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3599 return true;
3601 case PLUS:
3602 case MINUS:
3603 *total = COSTS_N_INSNS (addsubcosts (x));
3604 return true;
3606 case AND:
3607 case XOR:
3608 case IOR:
3609 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3610 return true;
3612 case MULT:
3613 *total = COSTS_N_INSNS (multcosts (x));
3614 return true;
3616 case LT:
3617 case GE:
3618 /* div0s sign comparison. */
3619 if (GET_CODE (XEXP (x, 0)) == XOR
3620 && REG_P ((XEXP (XEXP (x, 0), 0)))
3621 && REG_P ((XEXP (XEXP (x, 0), 1)))
3622 && satisfies_constraint_Z (XEXP (x, 1)))
3624 *total = COSTS_N_INSNS (1);
3625 return true;
3627 else
3628 return false;
3630 case LSHIFTRT:
3631 /* div0s sign comparison. */
3632 if (GET_CODE (XEXP (x, 0)) == XOR
3633 && REG_P ((XEXP (XEXP (x, 0), 0)))
3634 && REG_P ((XEXP (XEXP (x, 0), 1)))
3635 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3637 *total = COSTS_N_INSNS (1);
3638 return true;
3640 /* Fall through to shiftcosts. */
3641 case ASHIFT:
3642 case ASHIFTRT:
3644 int cost = shiftcosts (x);
3645 if (cost < 0)
3646 return false;
3647 *total = COSTS_N_INSNS (cost);
3648 return true;
3651 case DIV:
3652 case UDIV:
3653 case MOD:
3654 case UMOD:
3655 *total = COSTS_N_INSNS (20);
3656 return true;
3658 case FLOAT:
3659 case FIX:
3660 *total = 100;
3661 return true;
3663 default:
3664 return false;
3668 /* Determine the size of the fundamental move insn that will be used
3669 for the specified mode. */
3670 static inline int
3671 mov_insn_size (machine_mode mode, bool consider_sh2a)
3673 const int mode_sz = GET_MODE_SIZE (mode);
3675 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3676 || (TARGET_FMOVD && mode == DFmode))
3677 return mode_sz;
3678 else
3680 /* The max. available mode for actual move insns is SImode.
3681 Larger accesses will be split into multiple loads/stores. */
3682 const int max_mov_sz = GET_MODE_SIZE (SImode);
3683 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3687 /* Determine the maximum possible displacement for a move insn for the
3688 specified mode. */
3690 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3692 /* The 4 byte displacement move insns are the same as the 2 byte
3693 versions but take a 12 bit displacement. All we need to do is to
3694 scale the max. displacement value accordingly. */
3695 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3697 /* SH2A supports FPU move insns with 12 bit displacements.
3698 Other variants to do not support any kind of displacements for
3699 FPU move insns. */
3700 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3701 return 0;
3702 else
3704 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3705 const int mode_sz = GET_MODE_SIZE (mode);
3706 int r = 15 * mov_insn_sz * disp_scale;
3708 /* If the mov insn will be split into multiple loads/stores, the
3709 maximum possible displacement is a bit smaller. */
3710 if (mode_sz > mov_insn_sz)
3711 r -= mode_sz - mov_insn_sz;
3712 return r;
3716 /* Determine the alignment mask for a move insn of the
3717 specified mode. */
3718 static inline int
3719 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3721 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3722 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3725 /* Return the displacement value of a displacement address. */
3726 HOST_WIDE_INT
3727 sh_disp_addr_displacement (rtx x)
3729 gcc_assert (satisfies_constraint_Sdd (x));
3730 return INTVAL (XEXP (XEXP (x, 0), 1));
3733 /* Compute the cost of an address. */
3734 static int
3735 sh_address_cost (rtx x, machine_mode mode,
3736 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3738 /* 'GBR + 0'. Account one more because of R0 restriction. */
3739 if (REG_P (x) && REGNO (x) == GBR_REG)
3740 return 2;
3742 /* Simple reg, post-inc, pre-dec addressing. */
3743 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3744 return 1;
3746 /* 'reg + disp' addressing. */
3747 if (GET_CODE (x) == PLUS
3748 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3750 /* 'GBR + disp'. Account one more because of R0 restriction. */
3751 if (REGNO (XEXP (x, 0)) == GBR_REG
3752 && gbr_displacement (XEXP (x, 1), mode))
3753 return 2;
3755 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3757 if (offset == 0)
3758 return 1;
3760 /* The displacement would fit into a 2 byte move insn.
3761 HImode and QImode loads/stores with displacement put pressure on
3762 R0 which will most likely require another reg copy. Thus account
3763 a higher cost for that. */
3764 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3765 return (mode == HImode || mode == QImode) ? 2 : 1;
3767 /* The displacement would fit into a 4 byte move insn (SH2A). */
3768 if (TARGET_SH2A
3769 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3770 return 2;
3772 /* The displacement is probably out of range and will require extra
3773 calculations. */
3774 return 3;
3777 /* 'reg + reg' addressing. Account a slightly higher cost because of
3778 increased pressure on R0. */
3779 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3780 && ! TARGET_SHMEDIA)
3781 return 3;
3783 /* Not sure what it is - probably expensive. */
3784 return 10;
3787 /* Code to expand a shift. */
3788 static void
3789 gen_ashift (int type, int n, rtx reg)
3791 rtx n_rtx;
3793 /* Negative values here come from the shift_amounts array. */
3794 if (n < 0)
3796 if (type == ASHIFT)
3797 type = LSHIFTRT;
3798 else
3799 type = ASHIFT;
3800 n = -n;
3803 n_rtx = GEN_INT (n);
3804 gcc_assert (satisfies_constraint_P27 (n_rtx));
3806 switch (type)
3808 case ASHIFTRT:
3809 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3810 break;
3811 case LSHIFTRT:
3812 if (n == 1)
3813 emit_insn (gen_shlr (reg, reg));
3814 else
3815 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3816 break;
3817 case ASHIFT:
3818 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3819 break;
3820 default:
3821 gcc_unreachable ();
3825 /* Code to expand a HImode shift. */
3826 static void
3827 gen_ashift_hi (int type, int n, rtx reg)
3829 /* Negative values here come from the shift_amounts array. */
3830 if (n < 0)
3832 if (type == ASHIFT)
3833 type = LSHIFTRT;
3834 else
3835 type = ASHIFT;
3836 n = -n;
3839 switch (type)
3841 case ASHIFTRT:
3842 case LSHIFTRT:
3843 /* We don't have HImode right shift operations because using the
3844 ordinary 32 bit shift instructions for that doesn't generate proper
3845 zero/sign extension.
3846 gen_ashift_hi is only called in contexts where we know that the
3847 sign extension works out correctly. */
3849 int offset = 0;
3850 if (GET_CODE (reg) == SUBREG)
3852 offset = SUBREG_BYTE (reg);
3853 reg = SUBREG_REG (reg);
3855 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3856 break;
3858 case ASHIFT:
3859 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3860 break;
3864 /* Output RTL to split a constant shift into its component SH constant
3865 shift instructions. */
3866 void
3867 gen_shifty_op (int code, rtx *operands)
3869 int value = INTVAL (operands[2]);
3870 int max, i;
3872 /* Truncate the shift count in case it is out of bounds. */
3873 value = value & 31;
3875 if (value == 31)
3877 if (code == LSHIFTRT)
3879 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3880 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3881 return;
3883 else if (code == ASHIFT)
3885 /* There is a two instruction sequence for 31 bit left shifts,
3886 but it requires r0. */
3887 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3889 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3890 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3891 return;
3895 else if (value == 0)
3897 /* This can happen even when optimizing, if there were subregs before
3898 reload. Don't output a nop here, as this is never optimized away;
3899 use a no-op move instead. */
3900 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3901 return;
3904 max = ashl_lshr_seq[value].insn_count;
3905 for (i = 0; i < max; i++)
3906 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3909 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3910 don't matter. */
3911 void
3912 gen_shifty_hi_op (int code, rtx *operands)
3914 int value = INTVAL (operands[2]);
3915 int max, i;
3916 void (*gen_fun) (int, int, rtx);
3918 /* This operation is used by and_shl for SImode values with a few
3919 high bits known to be cleared. */
3920 value &= 31;
3921 if (value == 0)
3923 emit_insn (gen_nop ());
3924 return;
3927 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3928 if (code == ASHIFT)
3930 max = ext_ashl_lshr_seq[value].insn_count;
3931 for (i = 0; i < max; i++)
3932 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3934 else
3935 /* When shifting right, emit the shifts in reverse order, so that
3936 solitary negative values come first. */
3937 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3938 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3941 /* Output RTL for an arithmetic right shift.
3942 ??? Rewrite to use super-optimizer sequences. */
3943 bool
3944 expand_ashiftrt (rtx *operands)
3946 rtx wrk;
3947 char func[18];
3948 int value;
3950 if (TARGET_DYNSHIFT)
3952 if (!CONST_INT_P (operands[2]))
3954 rtx count = copy_to_mode_reg (SImode, operands[2]);
3955 emit_insn (gen_negsi2 (count, count));
3956 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3957 return true;
3959 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3960 > 1 + SH_DYNAMIC_SHIFT_COST)
3962 rtx count
3963 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3964 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3965 return true;
3968 if (!CONST_INT_P (operands[2]))
3969 return false;
3971 value = INTVAL (operands[2]) & 31;
3973 if (value == 31)
3975 /* If we are called from abs expansion, arrange things so that we
3976 we can use a single MT instruction that doesn't clobber the source,
3977 if LICM can hoist out the load of the constant zero. */
3978 if (currently_expanding_to_rtl)
3980 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3981 operands[1]));
3982 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3983 return true;
3985 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3986 return true;
3988 else if (value >= 16 && value <= 19)
3990 wrk = gen_reg_rtx (SImode);
3991 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3992 value -= 16;
3993 while (value--)
3994 gen_ashift (ASHIFTRT, 1, wrk);
3995 emit_move_insn (operands[0], wrk);
3996 return true;
3998 /* Expand a short sequence inline, longer call a magic routine. */
3999 else if (value <= 5)
4001 wrk = gen_reg_rtx (SImode);
4002 emit_move_insn (wrk, operands[1]);
4003 while (value--)
4004 gen_ashift (ASHIFTRT, 1, wrk);
4005 emit_move_insn (operands[0], wrk);
4006 return true;
4009 wrk = gen_reg_rtx (Pmode);
4011 /* Load the value into an arg reg and call a helper. */
4012 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
4013 sprintf (func, "__ashiftrt_r4_%d", value);
4014 function_symbol (wrk, func, SFUNC_STATIC);
4015 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
4016 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
4017 return true;
4020 /* Try to find a good way to implement the combiner pattern
4021 [(set (match_operand:SI 0 "register_operand" "r")
4022 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4023 (match_operand:SI 2 "const_int_operand" "n"))
4024 (match_operand:SI 3 "const_int_operand" "n"))) .
4025 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
4026 return 0 for simple right / left or left/right shift combination.
4027 return 1 for a combination of shifts with zero_extend.
4028 return 2 for a combination of shifts with an AND that needs r0.
4029 return 3 for a combination of shifts with an AND that needs an extra
4030 scratch register, when the three highmost bits of the AND mask are clear.
4031 return 4 for a combination of shifts with an AND that needs an extra
4032 scratch register, when any of the three highmost bits of the AND mask
4033 is set.
4034 If ATTRP is set, store an initial right shift width in ATTRP[0],
4035 and the instruction length in ATTRP[1] . These values are not valid
4036 when returning 0.
4037 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
4038 shift_amounts for the last shift value that is to be used before the
4039 sign extend. */
4041 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
4043 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
4044 int left = INTVAL (left_rtx), right;
4045 int best = 0;
4046 int cost, best_cost = 10000;
4047 int best_right = 0, best_len = 0;
4048 int i;
4049 int can_ext;
4051 if (left < 0 || left > 31)
4052 return 0;
4053 if (CONST_INT_P (mask_rtx))
4054 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
4055 else
4056 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
4057 /* Can this be expressed as a right shift / left shift pair? */
4058 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
4059 right = exact_log2 (lsb);
4060 mask2 = ~(mask + lsb - 1);
4061 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4062 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4063 if (! mask2)
4064 best_cost = ashl_lshr_seq[right].insn_count
4065 + ashl_lshr_seq[right + left].insn_count;
4066 /* mask has no trailing zeroes <==> ! right */
4067 else if (! right && mask2 == ~(lsb2 - 1))
4069 int late_right = exact_log2 (lsb2);
4070 best_cost = ashl_lshr_seq[left + late_right].insn_count
4071 + ashl_lshr_seq[late_right].insn_count;
4073 /* Try to use zero extend. */
4074 if (mask2 == ~(lsb2 - 1))
4076 int width, first;
4078 for (width = 8; width <= 16; width += 8)
4080 /* Can we zero-extend right away? */
4081 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4083 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4084 + ext_ashl_lshr_seq[left + right].insn_count;
4085 if (cost < best_cost)
4087 best = 1;
4088 best_cost = cost;
4089 best_right = right;
4090 best_len = cost;
4091 if (attrp)
4092 attrp[2] = -1;
4094 continue;
4096 /* ??? Could try to put zero extend into initial right shift,
4097 or even shift a bit left before the right shift. */
4098 /* Determine value of first part of left shift, to get to the
4099 zero extend cut-off point. */
4100 first = width - exact_log2 (lsb2) + right;
4101 if (first >= 0 && right + left - first >= 0)
4103 cost = ext_ashl_lshr_seq[right].insn_count
4104 + ext_ashl_lshr_seq[first].insn_count + 1
4105 + ext_ashl_lshr_seq[right + left - first].insn_count;
4107 if (cost < best_cost)
4109 best = 1;
4110 best_cost = cost;
4111 best_right = right;
4112 best_len = cost;
4113 if (attrp)
4114 attrp[2] = first;
4119 /* Try to use r0 AND pattern */
4120 for (i = 0; i <= 2; i++)
4122 if (i > right)
4123 break;
4124 if (! CONST_OK_FOR_K08 (mask >> i))
4125 continue;
4126 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4127 if (cost < best_cost)
4129 best = 2;
4130 best_cost = cost;
4131 best_right = i;
4132 best_len = cost - 1;
4135 /* Try to use a scratch register to hold the AND operand. */
4136 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4137 for (i = 0; i <= 2; i++)
4139 if (i > right)
4140 break;
4141 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4142 + (can_ext
4143 ? ext_ashl_lshr_seq
4144 : ashl_lshr_seq)[left + i].insn_count;
4145 if (cost < best_cost)
4147 best = 4 - can_ext;
4148 best_cost = cost;
4149 best_right = i;
4150 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4154 if (attrp)
4156 attrp[0] = best_right;
4157 attrp[1] = best_len;
4159 return best;
4162 /* This is used in length attributes of the unnamed instructions
4163 corresponding to shl_and_kind return values of 1 and 2. */
4165 shl_and_length (rtx insn)
4167 rtx set_src, left_rtx, mask_rtx;
4168 int attributes[3];
4170 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4171 left_rtx = XEXP (XEXP (set_src, 0), 1);
4172 mask_rtx = XEXP (set_src, 1);
4173 shl_and_kind (left_rtx, mask_rtx, attributes);
4174 return attributes[1];
4177 /* This is used in length attribute of the and_shl_scratch instruction. */
4179 shl_and_scr_length (rtx insn)
4181 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4182 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4183 rtx op = XEXP (set_src, 0);
4184 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4185 op = XEXP (XEXP (op, 0), 0);
4186 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4189 /* Generate rtl for instructions for which shl_and_kind advised a particular
4190 method of generating them, i.e. returned zero. */
4191 bool
4192 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4194 int attributes[3];
4195 unsigned HOST_WIDE_INT mask;
4196 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4197 int right, total_shift;
4198 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4200 right = attributes[0];
4201 total_shift = INTVAL (left_rtx) + right;
4202 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4203 switch (kind)
4205 default:
4206 return true;
4207 case 1:
4209 int first = attributes[2];
4210 rtx operands[3];
4212 if (first < 0)
4214 emit_insn ((mask << right) <= 0xff
4215 ? gen_zero_extendqisi2 (dest,
4216 gen_lowpart (QImode, source))
4217 : gen_zero_extendhisi2 (dest,
4218 gen_lowpart (HImode, source)));
4219 source = dest;
4221 if (source != dest)
4222 emit_insn (gen_movsi (dest, source));
4223 operands[0] = dest;
4224 if (right)
4226 operands[2] = GEN_INT (right);
4227 gen_shifty_hi_op (LSHIFTRT, operands);
4229 if (first > 0)
4231 operands[2] = GEN_INT (first);
4232 gen_shifty_hi_op (ASHIFT, operands);
4233 total_shift -= first;
4234 mask <<= first;
4236 if (first >= 0)
4237 emit_insn (mask <= 0xff
4238 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4239 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4240 if (total_shift > 0)
4242 operands[2] = GEN_INT (total_shift);
4243 gen_shifty_hi_op (ASHIFT, operands);
4245 break;
4247 case 4:
4248 shift_gen_fun = gen_shifty_op;
4249 case 3:
4250 /* If the topmost bit that matters is set, set the topmost bits
4251 that don't matter. This way, we might be able to get a shorter
4252 signed constant. */
4253 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4254 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4255 case 2:
4256 /* Don't expand fine-grained when combining, because that will
4257 make the pattern fail. */
4258 if (currently_expanding_to_rtl
4259 || reload_in_progress || reload_completed)
4261 rtx operands[3];
4263 /* Cases 3 and 4 should be handled by this split
4264 only while combining */
4265 gcc_assert (kind <= 2);
4266 if (right)
4268 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4269 source = dest;
4271 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4272 if (total_shift)
4274 operands[0] = dest;
4275 operands[1] = dest;
4276 operands[2] = GEN_INT (total_shift);
4277 shift_gen_fun (ASHIFT, operands);
4279 break;
4281 else
4283 int neg = 0;
4284 if (kind != 4 && total_shift < 16)
4286 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4287 if (neg > 0)
4288 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4289 else
4290 neg = 0;
4292 emit_insn (gen_and_shl_scratch (dest, source,
4293 GEN_INT (right),
4294 GEN_INT (mask),
4295 GEN_INT (total_shift + neg),
4296 GEN_INT (neg)));
4297 emit_insn (gen_movsi (dest, dest));
4298 break;
4301 return false;
4304 /* Try to find a good way to implement the combiner pattern
4305 [(set (match_operand:SI 0 "register_operand" "=r")
4306 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4307 (match_operand:SI 2 "const_int_operand" "n")
4308 (match_operand:SI 3 "const_int_operand" "n")
4309 (const_int 0)))
4310 (clobber (reg:SI T_REG))]
4311 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4312 return 0 for simple left / right shift combination.
4313 return 1 for left shift / 8 bit sign extend / left shift.
4314 return 2 for left shift / 16 bit sign extend / left shift.
4315 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4316 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4317 return 5 for left shift / 16 bit sign extend / right shift
4318 return 6 for < 8 bit sign extend / left shift.
4319 return 7 for < 8 bit sign extend / left shift / single right shift.
4320 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4322 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4324 int left, size, insize, ext;
4325 int cost = 0, best_cost;
4326 int kind;
4328 left = INTVAL (left_rtx);
4329 size = INTVAL (size_rtx);
4330 insize = size - left;
4331 gcc_assert (insize > 0);
4332 /* Default to left / right shift. */
4333 kind = 0;
4334 best_cost = ashl_lshr_seq[32 - insize].insn_count
4335 + ashl_lshr_seq[32 - size].insn_count;
4336 if (size <= 16)
4338 /* 16 bit shift / sign extend / 16 bit shift */
4339 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4340 + ashl_lshr_seq[16 - size].insn_count;
4341 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4342 below, by alternative 3 or something even better. */
4343 if (cost < best_cost)
4345 kind = 5;
4346 best_cost = cost;
4349 /* Try a plain sign extend between two shifts. */
4350 for (ext = 16; ext >= insize; ext -= 8)
4352 if (ext <= size)
4354 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4355 + ashl_lshr_seq[size - ext].insn_count;
4356 if (cost < best_cost)
4358 kind = ext / (unsigned) 8;
4359 best_cost = cost;
4362 /* Check if we can do a sloppy shift with a final signed shift
4363 restoring the sign. */
4364 if (EXT_SHIFT_SIGNED (size - ext))
4365 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4366 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4367 /* If not, maybe it's still cheaper to do the second shift sloppy,
4368 and do a final sign extend? */
4369 else if (size <= 16)
4370 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4371 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4372 + 1;
4373 else
4374 continue;
4375 if (cost < best_cost)
4377 kind = ext / (unsigned) 8 + 2;
4378 best_cost = cost;
4381 /* Check if we can sign extend in r0 */
4382 if (insize < 8)
4384 cost = 3 + ashl_lshr_seq[left].insn_count;
4385 if (cost < best_cost)
4387 kind = 6;
4388 best_cost = cost;
4390 /* Try the same with a final signed shift. */
4391 if (left < 31)
4393 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4394 if (cost < best_cost)
4396 kind = 7;
4397 best_cost = cost;
4401 if (TARGET_DYNSHIFT)
4403 /* Try to use a dynamic shift. */
4404 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4405 if (cost < best_cost)
4407 kind = 0;
4408 best_cost = cost;
4411 if (costp)
4412 *costp = cost;
4413 return kind;
4416 /* Function to be used in the length attribute of the instructions
4417 implementing this pattern. */
4419 shl_sext_length (rtx insn)
4421 rtx set_src, left_rtx, size_rtx;
4422 int cost;
4424 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4425 left_rtx = XEXP (XEXP (set_src, 0), 1);
4426 size_rtx = XEXP (set_src, 1);
4427 shl_sext_kind (left_rtx, size_rtx, &cost);
4428 return cost;
4431 /* Generate rtl for this pattern */
4432 bool
4433 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4435 int kind;
4436 int left, size, insize, cost;
4437 rtx operands[3];
4439 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4440 left = INTVAL (left_rtx);
4441 size = INTVAL (size_rtx);
4442 insize = size - left;
4443 switch (kind)
4445 case 1:
4446 case 2:
4447 case 3:
4448 case 4:
4450 int ext = kind & 1 ? 8 : 16;
4451 int shift2 = size - ext;
4453 /* Don't expand fine-grained when combining, because that will
4454 make the pattern fail. */
4455 if (! currently_expanding_to_rtl
4456 && ! reload_in_progress && ! reload_completed)
4458 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4459 emit_insn (gen_movsi (dest, source));
4460 break;
4462 if (dest != source)
4463 emit_insn (gen_movsi (dest, source));
4464 operands[0] = dest;
4465 if (ext - insize)
4467 operands[2] = GEN_INT (ext - insize);
4468 gen_shifty_hi_op (ASHIFT, operands);
4470 emit_insn (kind & 1
4471 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4472 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4473 if (kind <= 2)
4475 if (shift2)
4477 operands[2] = GEN_INT (shift2);
4478 gen_shifty_op (ASHIFT, operands);
4481 else
4483 if (shift2 > 0)
4485 if (EXT_SHIFT_SIGNED (shift2))
4487 operands[2] = GEN_INT (shift2 + 1);
4488 gen_shifty_op (ASHIFT, operands);
4489 operands[2] = const1_rtx;
4490 gen_shifty_op (ASHIFTRT, operands);
4491 break;
4493 operands[2] = GEN_INT (shift2);
4494 gen_shifty_hi_op (ASHIFT, operands);
4496 else if (shift2)
4498 operands[2] = GEN_INT (-shift2);
4499 gen_shifty_hi_op (LSHIFTRT, operands);
4501 emit_insn (size <= 8
4502 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4503 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4505 break;
4507 case 5:
4509 int i = 16 - size;
4510 if (! currently_expanding_to_rtl
4511 && ! reload_in_progress && ! reload_completed)
4512 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4513 else
4515 operands[0] = dest;
4516 operands[2] = GEN_INT (16 - insize);
4517 gen_shifty_hi_op (ASHIFT, operands);
4518 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4520 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4521 while (--i >= 0)
4522 gen_ashift (ASHIFTRT, 1, dest);
4523 break;
4525 case 6:
4526 case 7:
4527 /* Don't expand fine-grained when combining, because that will
4528 make the pattern fail. */
4529 if (! currently_expanding_to_rtl
4530 && ! reload_in_progress && ! reload_completed)
4532 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4533 emit_insn (gen_movsi (dest, source));
4534 break;
4536 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4537 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4538 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4539 operands[0] = dest;
4540 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4541 gen_shifty_op (ASHIFT, operands);
4542 if (kind == 7)
4543 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4544 break;
4545 default:
4546 return true;
4548 return false;
4551 /* Prefix a symbol_ref name with "datalabel". */
4553 gen_datalabel_ref (rtx sym)
4555 const char *str;
4557 if (GET_CODE (sym) == LABEL_REF)
4558 return gen_rtx_CONST (GET_MODE (sym),
4559 gen_rtx_UNSPEC (GET_MODE (sym),
4560 gen_rtvec (1, sym),
4561 UNSPEC_DATALABEL));
4563 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4565 str = XSTR (sym, 0);
4566 /* Share all SYMBOL_REF strings with the same value - that is important
4567 for cse. */
4568 str = IDENTIFIER_POINTER (get_identifier (str));
4569 XSTR (sym, 0) = str;
4571 return sym;
4575 static alloc_pool label_ref_list_pool;
4577 typedef struct label_ref_list_d
4579 rtx_code_label *label;
4580 struct label_ref_list_d *next;
4581 } *label_ref_list_t;
4583 /* The SH cannot load a large constant into a register, constants have to
4584 come from a pc relative load. The reference of a pc relative load
4585 instruction must be less than 1k in front of the instruction. This
4586 means that we often have to dump a constant inside a function, and
4587 generate code to branch around it.
4589 It is important to minimize this, since the branches will slow things
4590 down and make things bigger.
4592 Worst case code looks like:
4594 mov.l L1,rn
4595 bra L2
4597 align
4598 L1: .long value
4602 mov.l L3,rn
4603 bra L4
4605 align
4606 L3: .long value
4610 We fix this by performing a scan before scheduling, which notices which
4611 instructions need to have their operands fetched from the constant table
4612 and builds the table.
4614 The algorithm is:
4616 scan, find an instruction which needs a pcrel move. Look forward, find the
4617 last barrier which is within MAX_COUNT bytes of the requirement.
4618 If there isn't one, make one. Process all the instructions between
4619 the find and the barrier.
4621 In the above example, we can tell that L3 is within 1k of L1, so
4622 the first move can be shrunk from the 3 insn+constant sequence into
4623 just 1 insn, and the constant moved to L3 to make:
4625 mov.l L1,rn
4627 mov.l L3,rn
4628 bra L4
4630 align
4631 L3:.long value
4632 L4:.long value
4634 Then the second move becomes the target for the shortening process. */
4636 typedef struct
4638 rtx value; /* Value in table. */
4639 rtx_code_label *label; /* Label of value. */
4640 label_ref_list_t wend; /* End of window. */
4641 machine_mode mode; /* Mode of value. */
4643 /* True if this constant is accessed as part of a post-increment
4644 sequence. Note that HImode constants are never accessed in this way. */
4645 bool part_of_sequence_p;
4646 } pool_node;
4648 /* The maximum number of constants that can fit into one pool, since
4649 constants in the range 0..510 are at least 2 bytes long, and in the
4650 range from there to 1018 at least 4 bytes. */
4652 #define MAX_POOL_SIZE 372
4653 static pool_node pool_vector[MAX_POOL_SIZE];
4654 static int pool_size;
4655 static rtx_code_label *pool_window_label;
4656 static int pool_window_last;
4658 static int max_labelno_before_reorg;
4660 /* ??? If we need a constant in HImode which is the truncated value of a
4661 constant we need in SImode, we could combine the two entries thus saving
4662 two bytes. Is this common enough to be worth the effort of implementing
4663 it? */
4665 /* ??? This stuff should be done at the same time that we shorten branches.
4666 As it is now, we must assume that all branches are the maximum size, and
4667 this causes us to almost always output constant pools sooner than
4668 necessary. */
4670 /* Add a constant to the pool and return its label. */
4671 static rtx_code_label *
4672 add_constant (rtx x, machine_mode mode, rtx last_value)
4674 int i;
4675 rtx_code_label *lab, *new_rtx;
4676 label_ref_list_t ref, newref;
4678 /* First see if we've already got it. */
4679 for (i = 0; i < pool_size; i++)
4681 if (x->code == pool_vector[i].value->code
4682 && mode == pool_vector[i].mode)
4684 if (x->code == CODE_LABEL)
4686 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4687 continue;
4689 if (rtx_equal_p (x, pool_vector[i].value))
4691 lab = new_rtx = 0;
4692 if (! last_value
4693 || ! i
4694 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4696 new_rtx = gen_label_rtx ();
4697 LABEL_REFS (new_rtx) = pool_vector[i].label;
4698 pool_vector[i].label = lab = new_rtx;
4700 if (lab && pool_window_label)
4702 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4703 newref->label = pool_window_label;
4704 ref = pool_vector[pool_window_last].wend;
4705 newref->next = ref;
4706 pool_vector[pool_window_last].wend = newref;
4708 if (new_rtx)
4709 pool_window_label = new_rtx;
4710 pool_window_last = i;
4711 return lab;
4716 /* Need a new one. */
4717 pool_vector[pool_size].value = x;
4718 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4720 lab = 0;
4721 pool_vector[pool_size - 1].part_of_sequence_p = true;
4723 else
4724 lab = gen_label_rtx ();
4725 pool_vector[pool_size].mode = mode;
4726 pool_vector[pool_size].label = lab;
4727 pool_vector[pool_size].wend = NULL;
4728 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4729 if (lab && pool_window_label)
4731 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4732 newref->label = pool_window_label;
4733 ref = pool_vector[pool_window_last].wend;
4734 newref->next = ref;
4735 pool_vector[pool_window_last].wend = newref;
4737 if (lab)
4738 pool_window_label = lab;
4739 pool_window_last = pool_size;
4740 pool_size++;
4741 return lab;
4744 /* Output the literal table. START, if nonzero, is the first instruction
4745 this table is needed for, and also indicates that there is at least one
4746 casesi_worker_2 instruction; We have to emit the operand3 labels from
4747 these insns at a 4-byte aligned position. BARRIER is the barrier
4748 after which we are to place the table. */
4749 static void
4750 dump_table (rtx_insn *start, rtx_insn *barrier)
4752 rtx_insn *scan = barrier;
4753 int i;
4754 bool need_align = true;
4755 rtx lab;
4756 label_ref_list_t ref;
4757 bool have_df = false;
4759 /* Do two passes, first time dump out the HI sized constants. */
4761 for (i = 0; i < pool_size; i++)
4763 pool_node *p = &pool_vector[i];
4765 if (p->mode == HImode)
4767 if (need_align)
4769 scan = emit_insn_after (gen_align_2 (), scan);
4770 need_align = false;
4772 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4773 scan = emit_label_after (lab, scan);
4774 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4775 scan);
4776 for (ref = p->wend; ref; ref = ref->next)
4778 lab = ref->label;
4779 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4782 else if (p->mode == DFmode)
4783 have_df = true;
4786 need_align = true;
4788 if (start)
4790 scan = emit_insn_after (gen_align_4 (), scan);
4791 need_align = false;
4792 for (; start != barrier; start = NEXT_INSN (start))
4793 if (NONJUMP_INSN_P (start)
4794 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4796 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4797 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4799 scan = emit_label_after (lab, scan);
4802 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4804 rtx_insn *align_insn = NULL;
4806 scan = emit_label_after (gen_label_rtx (), scan);
4807 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4808 need_align = false;
4810 for (i = 0; i < pool_size; i++)
4812 pool_node *p = &pool_vector[i];
4814 switch (p->mode)
4816 case HImode:
4817 break;
4818 case SImode:
4819 case SFmode:
4820 if (align_insn && !p->part_of_sequence_p)
4822 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4823 emit_label_before (lab, align_insn);
4824 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4825 align_insn);
4826 for (ref = p->wend; ref; ref = ref->next)
4828 lab = ref->label;
4829 emit_insn_before (gen_consttable_window_end (lab),
4830 align_insn);
4832 delete_insn (align_insn);
4833 align_insn = NULL;
4834 continue;
4836 else
4838 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4839 scan = emit_label_after (lab, scan);
4840 scan = emit_insn_after (gen_consttable_4 (p->value,
4841 const0_rtx), scan);
4842 need_align = ! need_align;
4844 break;
4845 case DFmode:
4846 if (need_align)
4848 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4849 align_insn = scan;
4850 need_align = false;
4852 case DImode:
4853 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4854 scan = emit_label_after (lab, scan);
4855 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4856 scan);
4857 break;
4858 default:
4859 gcc_unreachable ();
4862 if (p->mode != HImode)
4864 for (ref = p->wend; ref; ref = ref->next)
4866 lab = ref->label;
4867 scan = emit_insn_after (gen_consttable_window_end (lab),
4868 scan);
4873 pool_size = 0;
4876 for (i = 0; i < pool_size; i++)
4878 pool_node *p = &pool_vector[i];
4880 switch (p->mode)
4882 case HImode:
4883 break;
4884 case SImode:
4885 case SFmode:
4886 if (need_align)
4888 need_align = false;
4889 scan = emit_label_after (gen_label_rtx (), scan);
4890 scan = emit_insn_after (gen_align_4 (), scan);
4892 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4893 scan = emit_label_after (lab, scan);
4894 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4895 scan);
4896 break;
4897 case DFmode:
4898 case DImode:
4899 if (need_align)
4901 need_align = false;
4902 scan = emit_label_after (gen_label_rtx (), scan);
4903 scan = emit_insn_after (gen_align_4 (), scan);
4905 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4906 scan = emit_label_after (lab, scan);
4907 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4908 scan);
4909 break;
4910 default:
4911 gcc_unreachable ();
4914 if (p->mode != HImode)
4916 for (ref = p->wend; ref; ref = ref->next)
4918 lab = ref->label;
4919 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4924 scan = emit_insn_after (gen_consttable_end (), scan);
4925 scan = emit_barrier_after (scan);
4926 pool_size = 0;
4927 pool_window_label = NULL;
4928 pool_window_last = 0;
4931 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4933 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4935 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4936 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4937 need to fix it if the input value is CONST_OK_FOR_I08. */
4938 static bool
4939 broken_move (rtx_insn *insn)
4941 if (NONJUMP_INSN_P (insn))
4943 rtx pat = PATTERN (insn);
4944 if (GET_CODE (pat) == PARALLEL)
4945 pat = XVECEXP (pat, 0, 0);
4946 if (GET_CODE (pat) == SET
4947 /* We can load any 8-bit value if we don't care what the high
4948 order bits end up as. */
4949 && GET_MODE (SET_DEST (pat)) != QImode
4950 && (CONSTANT_P (SET_SRC (pat))
4951 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4952 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4953 /* Match mova_const. */
4954 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4955 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4956 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4957 && ! (TARGET_SH2E
4958 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4959 && (fp_zero_operand (SET_SRC (pat))
4960 || fp_one_operand (SET_SRC (pat)))
4961 /* In general we don't know the current setting of fpscr, so
4962 disable fldi.
4963 There is an exception if this was a register-register move
4964 before reload - and hence it was ascertained that we have
4965 single precision setting - and in a post-reload optimization
4966 we changed this to do a constant load. In that case
4967 we don't have an r0 clobber, hence we must use fldi. */
4968 && (TARGET_FMOVD
4969 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4970 == SCRATCH))
4971 && REG_P (SET_DEST (pat))
4972 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4973 && ! (TARGET_SH2A
4974 && GET_MODE (SET_DEST (pat)) == SImode
4975 && (satisfies_constraint_I20 (SET_SRC (pat))
4976 || satisfies_constraint_I28 (SET_SRC (pat))))
4977 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4978 return true;
4981 return false;
4984 /* Return true if the specified insn is a mova insn. */
4985 static bool
4986 mova_p (rtx_insn *insn)
4988 return (NONJUMP_INSN_P (insn)
4989 && GET_CODE (PATTERN (insn)) == SET
4990 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4991 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4992 /* Don't match mova_const. */
4993 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4996 /* Fix up a mova from a switch that went out of range. */
4997 static void
4998 fixup_mova (rtx_insn *mova)
5000 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
5001 if (! flag_pic)
5003 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
5004 INSN_CODE (mova) = -1;
5006 else
5008 rtx_insn *worker = mova;
5009 rtx_code_label *lab = gen_label_rtx ();
5010 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
5014 worker = NEXT_INSN (worker);
5015 gcc_assert (worker
5016 && !LABEL_P (worker)
5017 && !JUMP_P (worker));
5018 } while (NOTE_P (worker)
5019 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
5020 wpat = PATTERN (worker);
5021 wpat0 = XVECEXP (wpat, 0, 0);
5022 wpat1 = XVECEXP (wpat, 0, 1);
5023 wsrc = SET_SRC (wpat0);
5024 PATTERN (worker) = (gen_casesi_worker_2
5025 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
5026 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
5027 XEXP (wpat1, 0)));
5028 INSN_CODE (worker) = -1;
5029 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
5030 base = gen_rtx_LABEL_REF (Pmode, lab);
5031 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
5032 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
5033 INSN_CODE (mova) = -1;
5037 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
5038 *num_mova, and check if the new mova is not nested within the first one.
5039 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
5040 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
5041 static int
5042 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
5044 int n_addr = 0; /* Initialization to shut up spurious warning. */
5045 int f_target, n_target = 0; /* Likewise. */
5047 if (optimize)
5049 /* If NEW_MOVA has no address yet, it will be handled later. */
5050 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
5051 return -1;
5053 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
5054 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
5055 if (n_addr > n_target || n_addr + 1022 < n_target)
5057 /* Change the mova into a load.
5058 broken_move will then return true for it. */
5059 fixup_mova (new_mova);
5060 return 1;
5063 if (!(*num_mova)++)
5065 *first_mova = new_mova;
5066 return 2;
5068 if (!optimize
5069 || ((f_target
5070 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5071 >= n_target))
5072 return -1;
5074 (*num_mova)--;
5075 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5076 > n_target - n_addr)
5078 fixup_mova (*first_mova);
5079 return 0;
5081 else
5083 fixup_mova (new_mova);
5084 return 1;
5088 /* Find the last barrier from insn FROM which is close enough to hold the
5089 constant pool. If we can't find one, then create one near the end of
5090 the range. */
5091 static rtx_insn *
5092 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5094 int count_si = 0;
5095 int count_hi = 0;
5096 int found_hi = 0;
5097 int found_si = 0;
5098 int found_di = 0;
5099 int hi_align = 2;
5100 int si_align = 2;
5101 int leading_mova = num_mova;
5102 rtx_insn *barrier_before_mova = NULL;
5103 rtx_insn *found_barrier = NULL;
5104 rtx_insn *good_barrier = NULL;
5105 int si_limit;
5106 int hi_limit;
5107 rtx_insn *orig = from;
5108 rtx_insn *last_got = NULL;
5109 rtx_insn *last_symoff = NULL;
5111 /* For HImode: range is 510, add 4 because pc counts from address of
5112 second instruction after this one, subtract 2 for the jump instruction
5113 that we may need to emit before the table, subtract 2 for the instruction
5114 that fills the jump delay slot (in very rare cases, reorg will take an
5115 instruction from after the constant pool or will leave the delay slot
5116 empty). This gives 510.
5117 For SImode: range is 1020, add 4 because pc counts from address of
5118 second instruction after this one, subtract 2 in case pc is 2 byte
5119 aligned, subtract 2 for the jump instruction that we may need to emit
5120 before the table, subtract 2 for the instruction that fills the jump
5121 delay slot. This gives 1018. */
5123 /* The branch will always be shortened now that the reference address for
5124 forward branches is the successor address, thus we need no longer make
5125 adjustments to the [sh]i_limit for -O0. */
5127 si_limit = 1018;
5128 hi_limit = 510;
5130 while (from && count_si < si_limit && count_hi < hi_limit)
5132 int inc = get_attr_length (from);
5133 int new_align = 1;
5135 /* If this is a label that existed at the time of the compute_alignments
5136 call, determine the alignment. N.B. When find_barrier recurses for
5137 an out-of-reach mova, we might see labels at the start of previously
5138 inserted constant tables. */
5139 if (LABEL_P (from)
5140 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5142 if (optimize)
5143 new_align = 1 << label_to_alignment (from);
5144 else if (BARRIER_P (prev_nonnote_insn (from)))
5145 new_align = 1 << barrier_align (from);
5146 else
5147 new_align = 1;
5148 inc = 0;
5150 /* In case we are scanning a constant table because of recursion, check
5151 for explicit alignments. If the table is long, we might be forced
5152 to emit the new table in front of it; the length of the alignment
5153 might be the last straw. */
5154 else if (NONJUMP_INSN_P (from)
5155 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5156 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5157 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5158 /* When we find the end of a constant table, paste the new constant
5159 at the end. That is better than putting it in front because
5160 this way, we don't need extra alignment for adding a 4-byte-aligned
5161 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5162 else if (NONJUMP_INSN_P (from)
5163 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5164 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5165 return from;
5167 if (BARRIER_P (from))
5169 rtx_insn *next;
5171 found_barrier = from;
5173 /* If we are at the end of the function, or in front of an alignment
5174 instruction, we need not insert an extra alignment. We prefer
5175 this kind of barrier. */
5176 if (barrier_align (from) > 2)
5177 good_barrier = from;
5179 /* If we are at the end of a hot/cold block, dump the constants
5180 here. */
5181 next = NEXT_INSN (from);
5182 if (next
5183 && NOTE_P (next)
5184 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5185 break;
5188 if (broken_move (from))
5190 rtx pat, src, dst;
5191 machine_mode mode;
5193 pat = PATTERN (from);
5194 if (GET_CODE (pat) == PARALLEL)
5195 pat = XVECEXP (pat, 0, 0);
5196 src = SET_SRC (pat);
5197 dst = SET_DEST (pat);
5198 mode = GET_MODE (dst);
5200 /* GOT pcrelat setting comes in pair of
5201 mova .L8,r0
5202 mov.l .L8,r12
5203 instructions. (plus add r0,r12).
5204 Remember if we see one without the other. */
5205 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5206 last_got = last_got ? NULL : from;
5207 else if (PIC_ADDR_P (src))
5208 last_got = last_got ? NULL : from;
5210 /* We must explicitly check the mode, because sometimes the
5211 front end will generate code to load unsigned constants into
5212 HImode targets without properly sign extending them. */
5213 if (mode == HImode
5214 || (mode == SImode && satisfies_constraint_I16 (src)
5215 && REGNO (dst) != FPUL_REG))
5217 found_hi += 2;
5218 /* We put the short constants before the long constants, so
5219 we must count the length of short constants in the range
5220 for the long constants. */
5221 /* ??? This isn't optimal, but is easy to do. */
5222 si_limit -= 2;
5224 else
5226 /* We dump DF/DI constants before SF/SI ones, because
5227 the limit is the same, but the alignment requirements
5228 are higher. We may waste up to 4 additional bytes
5229 for alignment, and the DF/DI constant may have
5230 another SF/SI constant placed before it. */
5231 if (TARGET_SHCOMPACT
5232 && ! found_di
5233 && (mode == DFmode || mode == DImode))
5235 found_di = 1;
5236 si_limit -= 8;
5238 while (si_align > 2 && found_si + si_align - 2 > count_si)
5239 si_align >>= 1;
5240 if (found_si > count_si)
5241 count_si = found_si;
5242 found_si += GET_MODE_SIZE (mode);
5243 if (num_mova)
5244 si_limit -= GET_MODE_SIZE (mode);
5248 if (mova_p (from))
5250 switch (untangle_mova (&num_mova, &mova, from))
5252 case 1:
5253 if (flag_pic)
5255 rtx src = SET_SRC (PATTERN (from));
5256 if (GET_CODE (src) == CONST
5257 && GET_CODE (XEXP (src, 0)) == UNSPEC
5258 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5259 last_symoff = from;
5261 break;
5262 case 0: return find_barrier (0, 0, mova);
5263 case 2:
5265 leading_mova = 0;
5266 barrier_before_mova
5267 = good_barrier ? good_barrier : found_barrier;
5269 default: break;
5271 if (found_si > count_si)
5272 count_si = found_si;
5274 else if (JUMP_TABLE_DATA_P (from)
5275 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5277 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5278 || (num_mova
5279 && (prev_nonnote_insn (from)
5280 == XEXP (MOVA_LABELREF (mova), 0))))
5281 num_mova--;
5282 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5284 /* We have just passed the barrier in front of the
5285 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5286 the ADDR_DIFF_VEC is accessed as data, just like our pool
5287 constants, this is a good opportunity to accommodate what
5288 we have gathered so far.
5289 If we waited any longer, we could end up at a barrier in
5290 front of code, which gives worse cache usage for separated
5291 instruction / data caches. */
5292 good_barrier = found_barrier;
5293 break;
5295 else
5297 rtx body = PATTERN (from);
5298 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5301 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5302 else if (JUMP_P (from)
5303 && ! TARGET_SH2
5304 && ! optimize_size)
5305 new_align = 4;
5307 /* There is a possibility that a bf is transformed into a bf/s by the
5308 delay slot scheduler. */
5309 if (JUMP_P (from)
5310 && get_attr_type (from) == TYPE_CBRANCH
5311 && ! sequence_insn_p (from))
5312 inc += 2;
5314 if (found_si)
5316 count_si += inc;
5317 if (new_align > si_align)
5319 si_limit -= (count_si - 1) & (new_align - si_align);
5320 si_align = new_align;
5322 count_si = (count_si + new_align - 1) & -new_align;
5324 if (found_hi)
5326 count_hi += inc;
5327 if (new_align > hi_align)
5329 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5330 hi_align = new_align;
5332 count_hi = (count_hi + new_align - 1) & -new_align;
5334 from = NEXT_INSN (from);
5337 if (num_mova)
5339 if (leading_mova)
5341 /* Try as we might, the leading mova is out of range. Change
5342 it into a load (which will become a pcload) and retry. */
5343 fixup_mova (mova);
5344 return find_barrier (0, 0, mova);
5346 else
5348 /* Insert the constant pool table before the mova instruction,
5349 to prevent the mova label reference from going out of range. */
5350 from = mova;
5351 good_barrier = found_barrier = barrier_before_mova;
5355 if (found_barrier)
5357 if (good_barrier && next_real_insn (found_barrier))
5358 found_barrier = good_barrier;
5360 else
5362 /* We didn't find a barrier in time to dump our stuff,
5363 so we'll make one. */
5364 rtx_code_label *label = gen_label_rtx ();
5366 /* Don't emit a constant table in the middle of insns for
5367 casesi_worker_2. This is a bit overkill but is enough
5368 because casesi_worker_2 wouldn't appear so frequently. */
5369 if (last_symoff)
5370 from = last_symoff;
5372 /* If we exceeded the range, then we must back up over the last
5373 instruction we looked at. Otherwise, we just need to undo the
5374 NEXT_INSN at the end of the loop. */
5375 if (PREV_INSN (from) != orig
5376 && (count_hi > hi_limit || count_si > si_limit))
5377 from = PREV_INSN (PREV_INSN (from));
5378 else
5379 from = PREV_INSN (from);
5381 /* Don't emit a constant table int the middle of global pointer setting,
5382 since that that would move the addressing base GOT into another table.
5383 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5384 in the pool anyway, so just move up the whole constant pool.
5386 However, avoid doing so when the last single GOT mov is the starting
5387 insn itself. Going past above the start insn would create a negative
5388 offset, causing errors. */
5389 if (last_got && last_got != orig)
5390 from = PREV_INSN (last_got);
5392 /* Don't insert the constant pool table at the position which
5393 may be the landing pad. */
5394 if (flag_exceptions
5395 && CALL_P (from)
5396 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5397 from = PREV_INSN (from);
5399 /* Walk back to be just before any jump or label.
5400 Putting it before a label reduces the number of times the branch
5401 around the constant pool table will be hit. Putting it before
5402 a jump makes it more likely that the bra delay slot will be
5403 filled. */
5404 while (NOTE_P (from) || JUMP_P (from)
5405 || LABEL_P (from))
5406 from = PREV_INSN (from);
5408 /* Make sure we do not split between a call and its corresponding
5409 CALL_ARG_LOCATION note. */
5410 if (CALL_P (from))
5412 rtx_insn *next = NEXT_INSN (from);
5413 if (next && NOTE_P (next)
5414 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5415 from = next;
5418 from = emit_jump_insn_after (gen_jump (label), from);
5419 JUMP_LABEL (from) = label;
5420 LABEL_NUSES (label) = 1;
5421 found_barrier = emit_barrier_after (from);
5422 emit_label_after (label, found_barrier);
5425 return found_barrier;
5428 /* If the instruction INSN is implemented by a special function, and we can
5429 positively find the register that is used to call the sfunc, and this
5430 register is not used anywhere else in this instruction - except as the
5431 destination of a set, return this register; else, return 0. */
5433 sfunc_uses_reg (rtx_insn *insn)
5435 int i;
5436 rtx pattern, part, reg_part, reg;
5438 if (!NONJUMP_INSN_P (insn))
5439 return NULL_RTX;
5440 pattern = PATTERN (insn);
5441 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5442 return NULL_RTX;
5444 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5446 part = XVECEXP (pattern, 0, i);
5447 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5448 reg_part = part;
5450 if (! reg_part)
5451 return NULL_RTX;
5452 reg = XEXP (reg_part, 0);
5453 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5455 part = XVECEXP (pattern, 0, i);
5456 if (part == reg_part || GET_CODE (part) == CLOBBER)
5457 continue;
5458 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5459 && REG_P (SET_DEST (part)))
5460 ? SET_SRC (part) : part)))
5461 return NULL_RTX;
5463 return reg;
5466 /* See if the only way in which INSN uses REG is by calling it, or by
5467 setting it while calling it. Set *SET to a SET rtx if the register
5468 is set by INSN. */
5469 static bool
5470 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5472 rtx pattern, reg2;
5474 *set = NULL_RTX;
5476 reg2 = sfunc_uses_reg (insn);
5477 if (reg2 && REGNO (reg2) == REGNO (reg))
5479 pattern = single_set (insn);
5480 if (pattern
5481 && REG_P (SET_DEST (pattern))
5482 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5483 *set = pattern;
5484 return false;
5486 if (!CALL_P (insn))
5488 /* We don't use rtx_equal_p because we don't care if the mode is
5489 different. */
5490 pattern = single_set (insn);
5491 if (pattern
5492 && REG_P (SET_DEST (pattern))
5493 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5495 rtx par, part;
5496 int i;
5498 *set = pattern;
5499 par = PATTERN (insn);
5500 if (GET_CODE (par) == PARALLEL)
5501 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5503 part = XVECEXP (par, 0, i);
5504 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5505 return true;
5507 return reg_mentioned_p (reg, SET_SRC (pattern));
5510 return true;
5513 pattern = PATTERN (insn);
5515 if (GET_CODE (pattern) == PARALLEL)
5517 int i;
5519 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5520 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5521 return true;
5522 pattern = XVECEXP (pattern, 0, 0);
5525 if (GET_CODE (pattern) == SET)
5527 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5529 /* We don't use rtx_equal_p, because we don't care if the
5530 mode is different. */
5531 if (!REG_P (SET_DEST (pattern))
5532 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5533 return true;
5535 *set = pattern;
5538 pattern = SET_SRC (pattern);
5541 if (GET_CODE (pattern) != CALL
5542 || !MEM_P (XEXP (pattern, 0))
5543 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5544 return true;
5546 return false;
5549 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5550 general registers. Bits 0..15 mean that the respective registers
5551 are used as inputs in the instruction. Bits 16..31 mean that the
5552 registers 0..15, respectively, are used as outputs, or are clobbered.
5553 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5555 regs_used (rtx x, int is_dest)
5557 enum rtx_code code;
5558 const char *fmt;
5559 int i, used = 0;
5561 if (! x)
5562 return used;
5563 code = GET_CODE (x);
5564 switch (code)
5566 case REG:
5567 if (REGNO (x) < 16)
5568 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5569 << (REGNO (x) + is_dest));
5570 return 0;
5571 case SUBREG:
5573 rtx y = SUBREG_REG (x);
5575 if (!REG_P (y))
5576 break;
5577 if (REGNO (y) < 16)
5578 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5579 << (REGNO (y) +
5580 subreg_regno_offset (REGNO (y),
5581 GET_MODE (y),
5582 SUBREG_BYTE (x),
5583 GET_MODE (x)) + is_dest));
5584 return 0;
5586 case SET:
5587 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5588 case RETURN:
5589 /* If there was a return value, it must have been indicated with USE. */
5590 return 0x00ffff00;
5591 case CLOBBER:
5592 is_dest = 1;
5593 break;
5594 case MEM:
5595 is_dest = 0;
5596 break;
5597 case CALL:
5598 used |= 0x00ff00f0;
5599 break;
5600 default:
5601 break;
5604 fmt = GET_RTX_FORMAT (code);
5606 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5608 if (fmt[i] == 'E')
5610 int j;
5611 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5612 used |= regs_used (XVECEXP (x, i, j), is_dest);
5614 else if (fmt[i] == 'e')
5615 used |= regs_used (XEXP (x, i), is_dest);
5617 return used;
5620 /* Create an instruction that prevents redirection of a conditional branch
5621 to the destination of the JUMP with address ADDR.
5622 If the branch needs to be implemented as an indirect jump, try to find
5623 a scratch register for it.
5624 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5625 If any preceding insn that doesn't fit into a delay slot is good enough,
5626 pass 1. Pass 2 if a definite blocking insn is needed.
5627 -1 is used internally to avoid deep recursion.
5628 If a blocking instruction is made or recognized, return it. */
5629 static rtx_insn *
5630 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5632 int dead = 0;
5633 rtx_insn *prev = prev_nonnote_insn (jump);
5634 rtx dest;
5636 /* First, check if we already have an instruction that satisfies our need. */
5637 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5639 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5640 return prev;
5641 if (GET_CODE (PATTERN (prev)) == USE
5642 || GET_CODE (PATTERN (prev)) == CLOBBER
5643 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5644 prev = jump;
5645 else if ((need_block &= ~1) < 0)
5646 return prev;
5647 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5648 need_block = 0;
5650 if (GET_CODE (PATTERN (jump)) == RETURN)
5652 if (! need_block)
5653 return prev;
5654 /* Reorg even does nasty things with return insns that cause branches
5655 to go out of range - see find_end_label and callers. */
5656 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5658 /* We can't use JUMP_LABEL here because it might be undefined
5659 when not optimizing. */
5660 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5661 /* If the branch is out of range, try to find a scratch register for it. */
5662 if (optimize
5663 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5664 > 4092 + 4098))
5666 rtx_insn *scan;
5667 /* Don't look for the stack pointer as a scratch register,
5668 it would cause trouble if an interrupt occurred. */
5669 unsigned attempt = 0x7fff, used;
5670 int jump_left = flag_expensive_optimizations + 1;
5672 /* It is likely that the most recent eligible instruction is wanted for
5673 the delay slot. Therefore, find out which registers it uses, and
5674 try to avoid using them. */
5676 for (scan = jump; (scan = PREV_INSN (scan)); )
5678 enum rtx_code code;
5680 if (scan->deleted ())
5681 continue;
5682 code = GET_CODE (scan);
5683 if (code == CODE_LABEL || code == JUMP_INSN)
5684 break;
5685 if (code == INSN
5686 && GET_CODE (PATTERN (scan)) != USE
5687 && GET_CODE (PATTERN (scan)) != CLOBBER
5688 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5690 attempt &= ~regs_used (PATTERN (scan), 0);
5691 break;
5694 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5695 (scan = NEXT_INSN (scan)); )
5697 enum rtx_code code;
5699 if (scan->deleted ())
5700 continue;
5701 code = GET_CODE (scan);
5702 if (INSN_P (scan))
5704 used |= regs_used (PATTERN (scan), 0);
5705 if (code == CALL_INSN)
5706 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5707 dead |= (used >> 16) & ~used;
5708 if (dead & attempt)
5710 dead &= attempt;
5711 break;
5713 if (code == JUMP_INSN)
5715 if (jump_left-- && simplejump_p (scan))
5716 scan = JUMP_LABEL_AS_INSN (scan);
5717 else
5718 break;
5722 /* Mask out the stack pointer again, in case it was
5723 the only 'free' register we have found. */
5724 dead &= 0x7fff;
5726 /* If the immediate destination is still in range, check for possible
5727 threading with a jump beyond the delay slot insn.
5728 Don't check if we are called recursively; the jump has been or will be
5729 checked in a different invocation then. */
5731 else if (optimize && need_block >= 0)
5733 rtx_insn *next = next_active_insn (next_active_insn (dest));
5734 if (next && JUMP_P (next)
5735 && GET_CODE (PATTERN (next)) == SET
5736 && recog_memoized (next) == CODE_FOR_jump_compact)
5738 dest = JUMP_LABEL (next);
5739 if (dest
5740 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5741 > 4092 + 4098))
5742 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5746 if (dead)
5748 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5750 /* It would be nice if we could convert the jump into an indirect
5751 jump / far branch right now, and thus exposing all constituent
5752 instructions to further optimization. However, reorg uses
5753 simplejump_p to determine if there is an unconditional jump where
5754 it should try to schedule instructions from the target of the
5755 branch; simplejump_p fails for indirect jumps even if they have
5756 a JUMP_LABEL. */
5757 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5758 (reg, GEN_INT (unspec_bbr_uid++)),
5759 jump);
5760 /* ??? We would like this to have the scope of the jump, but that
5761 scope will change when a delay slot insn of an inner scope is added.
5762 Hence, after delay slot scheduling, we'll have to expect
5763 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5764 the jump. */
5766 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5767 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5768 return insn;
5770 else if (need_block)
5771 /* We can't use JUMP_LABEL here because it might be undefined
5772 when not optimizing. */
5773 return emit_insn_before (gen_block_branch_redirect
5774 (GEN_INT (unspec_bbr_uid++)),
5775 jump);
5776 return prev;
5779 #define CONDJUMP_MIN -252
5780 #define CONDJUMP_MAX 262
5781 struct far_branch
5783 /* A label (to be placed) in front of the jump
5784 that jumps to our ultimate destination. */
5785 rtx_insn *near_label;
5786 /* Where we are going to insert it if we cannot move the jump any farther,
5787 or the jump itself if we have picked up an existing jump. */
5788 rtx_insn *insert_place;
5789 /* The ultimate destination. */
5790 rtx_insn *far_label;
5791 struct far_branch *prev;
5792 /* If the branch has already been created, its address;
5793 else the address of its first prospective user. */
5794 int address;
5797 static void gen_far_branch (struct far_branch *);
5798 enum mdep_reorg_phase_e mdep_reorg_phase;
5799 static void
5800 gen_far_branch (struct far_branch *bp)
5802 rtx_insn *insn = bp->insert_place;
5803 rtx_insn *jump;
5804 rtx_code_label *label = gen_label_rtx ();
5805 int ok;
5807 emit_label_after (label, insn);
5808 if (bp->far_label)
5810 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5811 LABEL_NUSES (bp->far_label)++;
5813 else
5814 jump = emit_jump_insn_after (gen_return (), insn);
5816 /* Emit a barrier so that reorg knows that any following instructions
5817 are not reachable via a fall-through path.
5818 But don't do this when not optimizing, since we wouldn't suppress the
5819 alignment for the barrier then, and could end up with out-of-range
5820 pc-relative loads. */
5821 if (optimize)
5822 emit_barrier_after (jump);
5823 emit_label_after (bp->near_label, insn);
5825 if (bp->far_label)
5826 JUMP_LABEL (jump) = bp->far_label;
5827 else
5829 rtx pat = PATTERN (jump);
5830 gcc_assert (ANY_RETURN_P (pat));
5831 JUMP_LABEL (jump) = pat;
5834 ok = invert_jump (insn, label, 1);
5835 gcc_assert (ok);
5837 /* If we are branching around a jump (rather than a return), prevent
5838 reorg from using an insn from the jump target as the delay slot insn -
5839 when reorg did this, it pessimized code (we rather hide the delay slot)
5840 and it could cause branches to go out of range. */
5841 if (bp->far_label)
5842 (emit_insn_after
5843 (gen_stuff_delay_slot
5844 (GEN_INT (unspec_bbr_uid++),
5845 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5846 insn));
5847 /* Prevent reorg from undoing our splits. */
5848 gen_block_redirect (jump, bp->address += 2, 2);
5851 /* Fix up ADDR_DIFF_VECs. */
5852 void
5853 fixup_addr_diff_vecs (rtx_insn *first)
5855 rtx_insn *insn;
5857 for (insn = first; insn; insn = NEXT_INSN (insn))
5859 rtx vec_lab, pat, prevpat, x, braf_label;
5860 rtx_insn *prev;
5862 if (! JUMP_TABLE_DATA_P (insn)
5863 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5864 continue;
5865 pat = PATTERN (insn);
5866 vec_lab = XEXP (XEXP (pat, 0), 0);
5868 /* Search the matching casesi_jump_2. */
5869 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5871 if (!JUMP_P (prev))
5872 continue;
5873 prevpat = PATTERN (prev);
5874 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5875 continue;
5876 x = XVECEXP (prevpat, 0, 1);
5877 if (GET_CODE (x) != USE)
5878 continue;
5879 x = XEXP (x, 0);
5880 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5881 break;
5883 /* FIXME: This is a bug in the optimizer, but it seems harmless
5884 to just avoid panicing. */
5885 if (!prev)
5886 continue;
5888 /* Emit the reference label of the braf where it belongs, right after
5889 the casesi_jump_2 (i.e. braf). */
5890 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5891 emit_label_after (braf_label, prev);
5893 /* Fix up the ADDR_DIF_VEC to be relative
5894 to the reference address of the braf. */
5895 XEXP (XEXP (pat, 0), 0) = braf_label;
5899 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5900 a barrier. Return the base 2 logarithm of the desired alignment. */
5902 barrier_align (rtx_insn *barrier_or_label)
5904 rtx next, pat;
5906 if (! barrier_or_label)
5907 return 0;
5909 if (LABEL_P (barrier_or_label)
5910 && NEXT_INSN (barrier_or_label)
5911 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5912 return 2;
5914 if (BARRIER_P (barrier_or_label)
5915 && PREV_INSN (barrier_or_label)
5916 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5918 pat = PATTERN (PREV_INSN (barrier_or_label));
5919 /* If this is a very small table, we want to keep the alignment after
5920 the table to the minimum for proper code alignment. */
5921 return ((optimize_size
5922 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5923 <= (unsigned) 1 << (CACHE_LOG - 2)))
5924 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5927 next = next_active_insn (barrier_or_label);
5929 if (! next)
5930 return 0;
5932 pat = PATTERN (next);
5934 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5935 /* This is a barrier in front of a constant table. */
5936 return 0;
5938 if (optimize_size)
5939 return 0;
5941 if (! TARGET_SH2 || ! optimize)
5942 return align_jumps_log;
5944 /* When fixing up pcloads, a constant table might be inserted just before
5945 the basic block that ends with the barrier. Thus, we can't trust the
5946 instruction lengths before that. */
5947 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5949 /* Check if there is an immediately preceding branch to the insn beyond
5950 the barrier. We must weight the cost of discarding useful information
5951 from the current cache line when executing this branch and there is
5952 an alignment, against that of fetching unneeded insn in front of the
5953 branch target when there is no alignment. */
5955 /* There are two delay_slot cases to consider. One is the simple case
5956 where the preceding branch is to the insn beyond the barrier (simple
5957 delay slot filling), and the other is where the preceding branch has
5958 a delay slot that is a duplicate of the insn after the barrier
5959 (fill_eager_delay_slots) and the branch is to the insn after the insn
5960 after the barrier. */
5962 int slot, credit;
5963 bool jump_to_next = false;
5965 /* Skip to the insn before the JUMP_INSN before the barrier under
5966 investigation. */
5967 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5969 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5970 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5971 prev = prev_real_insn (prev))
5973 jump_to_next = false;
5974 if (GET_CODE (PATTERN (prev)) == USE
5975 || GET_CODE (PATTERN (prev)) == CLOBBER)
5976 continue;
5977 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5979 prev = prev_seq->insn (1);
5980 if (INSN_UID (prev) == INSN_UID (next))
5982 /* Delay slot was filled with insn at jump target. */
5983 jump_to_next = true;
5984 continue;
5988 if (slot &&
5989 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5990 slot = 0;
5991 credit -= get_attr_length (prev);
5993 if (prev && jump_to_label_p (prev))
5995 rtx_insn *x;
5996 if (jump_to_next
5997 || next_real_insn (JUMP_LABEL (prev)) == next
5998 /* If relax_delay_slots() decides NEXT was redundant
5999 with some previous instruction, it will have
6000 redirected PREV's jump to the following insn. */
6001 || JUMP_LABEL (prev) == next_nonnote_insn (next)
6002 /* There is no upper bound on redundant instructions
6003 that might have been skipped, but we must not put an
6004 alignment where none had been before. */
6005 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
6006 (INSN_P (x)
6007 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
6008 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
6009 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
6011 rtx pat = PATTERN (prev);
6012 if (GET_CODE (pat) == PARALLEL)
6013 pat = XVECEXP (pat, 0, 0);
6014 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
6015 return 0;
6020 return align_jumps_log;
6023 /* If we are inside a phony loop, almost any kind of label can turn up as the
6024 first one in the loop. Aligning a braf label causes incorrect switch
6025 destination addresses; we can detect braf labels because they are
6026 followed by a BARRIER.
6027 Applying loop alignment to small constant or switch tables is a waste
6028 of space, so we suppress this too. */
6030 sh_loop_align (rtx_insn *label)
6032 rtx_insn *next = label;
6034 if (! optimize || optimize_size)
6035 return 0;
6038 next = next_nonnote_insn (next);
6039 while (next && LABEL_P (next));
6041 if (! next
6042 || ! INSN_P (next)
6043 || recog_memoized (next) == CODE_FOR_consttable_2)
6044 return 0;
6046 return align_loops_log;
6049 /* Do a final pass over the function, just before delayed branch
6050 scheduling. */
6051 static void
6052 sh_reorg (void)
6054 rtx_insn *first, *insn, *mova = NULL;
6055 int num_mova;
6056 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
6057 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
6059 first = get_insns ();
6060 max_labelno_before_reorg = max_label_num ();
6062 /* We must split call insns before introducing `mova's. If we're
6063 optimizing, they'll have already been split. Otherwise, make
6064 sure we don't split them too late. */
6065 if (! optimize)
6066 split_all_insns_noflow ();
6068 if (TARGET_SHMEDIA)
6069 return;
6071 /* If relaxing, generate pseudo-ops to associate function calls with
6072 the symbols they call. It does no harm to not generate these
6073 pseudo-ops. However, when we can generate them, it enables the
6074 linker to potentially relax the jsr to a bsr, and eliminate the
6075 register load and, possibly, the constant pool entry. */
6077 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6078 if (TARGET_RELAX)
6080 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6081 own purposes. This works because none of the remaining passes
6082 need to look at them.
6084 ??? But it may break in the future. We should use a machine
6085 dependent REG_NOTE, or some other approach entirely. */
6086 for (insn = first; insn; insn = NEXT_INSN (insn))
6088 if (INSN_P (insn))
6090 rtx note;
6092 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6093 NULL_RTX)) != 0)
6094 remove_note (insn, note);
6098 for (insn = first; insn; insn = NEXT_INSN (insn))
6100 rtx pattern, reg, set, dies;
6101 rtx_code_label *label;
6102 rtx_insn *link, *scan;
6103 int rescan = 0, foundinsn = 0;
6105 if (CALL_P (insn))
6107 pattern = PATTERN (insn);
6109 if (GET_CODE (pattern) == PARALLEL)
6110 pattern = XVECEXP (pattern, 0, 0);
6111 if (GET_CODE (pattern) == SET)
6112 pattern = SET_SRC (pattern);
6114 if (GET_CODE (pattern) != CALL
6115 || !MEM_P (XEXP (pattern, 0)))
6116 continue;
6118 reg = XEXP (XEXP (pattern, 0), 0);
6120 else
6122 reg = sfunc_uses_reg (insn);
6123 if (! reg)
6124 continue;
6127 if (!REG_P (reg))
6128 continue;
6130 /* Try scanning backward to find where the register is set. */
6131 link = NULL;
6132 for (scan = PREV_INSN (insn);
6133 scan && !LABEL_P (scan);
6134 scan = PREV_INSN (scan))
6136 if (! INSN_P (scan))
6137 continue;
6139 if (! reg_mentioned_p (reg, scan))
6140 continue;
6142 if (noncall_uses_reg (reg, scan, &set))
6143 break;
6145 if (set)
6147 link = scan;
6148 break;
6152 if (! link)
6153 continue;
6155 /* The register is set at LINK. */
6157 /* We can only optimize the function call if the register is
6158 being set to a symbol. In theory, we could sometimes
6159 optimize calls to a constant location, but the assembler
6160 and linker do not support that at present. */
6161 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6162 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6163 continue;
6165 /* Scan forward from LINK to the place where REG dies, and
6166 make sure that the only insns which use REG are
6167 themselves function calls. */
6169 /* ??? This doesn't work for call targets that were allocated
6170 by reload, since there may not be a REG_DEAD note for the
6171 register. */
6173 dies = NULL_RTX;
6174 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6176 rtx scanset;
6178 /* Don't try to trace forward past a CODE_LABEL if we haven't
6179 seen INSN yet. Ordinarily, we will only find the setting insn
6180 if it is in the same basic block. However,
6181 cross-jumping can insert code labels in between the load and
6182 the call, and can result in situations where a single call
6183 insn may have two targets depending on where we came from. */
6185 if (LABEL_P (scan) && ! foundinsn)
6186 break;
6188 if (! INSN_P (scan))
6189 continue;
6191 /* Don't try to trace forward past a JUMP. To optimize
6192 safely, we would have to check that all the
6193 instructions at the jump destination did not use REG. */
6195 if (JUMP_P (scan))
6196 break;
6198 if (! reg_mentioned_p (reg, scan))
6199 continue;
6201 if (noncall_uses_reg (reg, scan, &scanset))
6202 break;
6204 if (scan == insn)
6205 foundinsn = 1;
6207 if (scan != insn
6208 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6210 /* There is a function call to this register other
6211 than the one we are checking. If we optimize
6212 this call, we need to rescan again below. */
6213 rescan = 1;
6216 /* ??? We shouldn't have to worry about SCANSET here.
6217 We should just be able to check for a REG_DEAD note
6218 on a function call. However, the REG_DEAD notes are
6219 apparently not dependable around libcalls; c-torture
6220 execute/920501-2 is a test case. If SCANSET is set,
6221 then this insn sets the register, so it must have
6222 died earlier. Unfortunately, this will only handle
6223 the cases in which the register is, in fact, set in a
6224 later insn. */
6226 /* ??? We shouldn't have to use FOUNDINSN here.
6227 This dates back to when we used LOG_LINKS to find
6228 the most recent insn which sets the register. */
6230 if (foundinsn
6231 && (scanset
6232 || find_reg_note (scan, REG_DEAD, reg)))
6234 dies = scan;
6235 break;
6239 if (! dies)
6241 /* Either there was a branch, or some insn used REG
6242 other than as a function call address. */
6243 continue;
6246 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6247 on the insn which sets the register, and on each call insn
6248 which uses the register. In final_prescan_insn we look for
6249 the REG_LABEL_OPERAND notes, and output the appropriate label
6250 or pseudo-op. */
6252 label = gen_label_rtx ();
6253 add_reg_note (link, REG_LABEL_OPERAND, label);
6254 add_reg_note (insn, REG_LABEL_OPERAND, label);
6255 if (rescan)
6257 scan = link;
6260 rtx reg2;
6262 scan = NEXT_INSN (scan);
6263 if (scan != insn
6264 && ((CALL_P (scan)
6265 && reg_mentioned_p (reg, scan))
6266 || ((reg2 = sfunc_uses_reg (scan))
6267 && REGNO (reg2) == REGNO (reg))))
6268 add_reg_note (scan, REG_LABEL_OPERAND, label);
6270 while (scan != dies);
6275 if (TARGET_SH2)
6276 fixup_addr_diff_vecs (first);
6278 if (optimize)
6280 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6281 shorten_branches (first);
6284 /* Scan the function looking for move instructions which have to be
6285 changed to pc-relative loads and insert the literal tables. */
6286 label_ref_list_pool = create_alloc_pool ("label references list",
6287 sizeof (struct label_ref_list_d),
6288 30);
6289 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6290 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6292 if (mova_p (insn))
6294 /* ??? basic block reordering can move a switch table dispatch
6295 below the switch table. Check if that has happened.
6296 We only have the addresses available when optimizing; but then,
6297 this check shouldn't be needed when not optimizing. */
6298 if (!untangle_mova (&num_mova, &mova, insn))
6300 insn = mova;
6301 num_mova = 0;
6304 else if (JUMP_TABLE_DATA_P (insn)
6305 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6306 && num_mova
6307 /* ??? loop invariant motion can also move a mova out of a
6308 loop. Since loop does this code motion anyway, maybe we
6309 should wrap UNSPEC_MOVA into a CONST, so that reload can
6310 move it back. */
6311 && ((num_mova > 1
6312 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6313 || (prev_nonnote_insn (insn)
6314 == XEXP (MOVA_LABELREF (mova), 0))))
6316 rtx_insn *scan;
6317 int total;
6319 num_mova--;
6321 /* Some code might have been inserted between the mova and
6322 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6323 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6324 total += get_attr_length (scan);
6326 /* range of mova is 1020, add 4 because pc counts from address of
6327 second instruction after this one, subtract 2 in case pc is 2
6328 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6329 cancels out with alignment effects of the mova itself. */
6330 if (total > 1022)
6332 /* Change the mova into a load, and restart scanning
6333 there. broken_move will then return true for mova. */
6334 fixup_mova (mova);
6335 insn = mova;
6338 if (broken_move (insn)
6339 || (NONJUMP_INSN_P (insn)
6340 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6342 rtx_insn *scan;
6343 /* Scan ahead looking for a barrier to stick the constant table
6344 behind. */
6345 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6346 rtx_insn *last_float_move = NULL;
6347 rtx last_float = 0, *last_float_addr = NULL;
6348 int need_aligned_label = 0;
6350 if (num_mova && ! mova_p (mova))
6352 /* find_barrier had to change the first mova into a
6353 pcload; thus, we have to start with this new pcload. */
6354 insn = mova;
6355 num_mova = 0;
6357 /* Now find all the moves between the points and modify them. */
6358 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6360 if (LABEL_P (scan))
6361 last_float = 0;
6362 if (NONJUMP_INSN_P (scan)
6363 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6364 need_aligned_label = 1;
6365 if (broken_move (scan))
6367 rtx *patp = &PATTERN (scan), pat = *patp;
6368 rtx src, dst;
6369 rtx lab;
6370 rtx newsrc;
6371 machine_mode mode;
6373 if (GET_CODE (pat) == PARALLEL)
6374 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6375 src = SET_SRC (pat);
6376 dst = SET_DEST (pat);
6377 mode = GET_MODE (dst);
6379 if (mode == SImode && satisfies_constraint_I16 (src)
6380 && REGNO (dst) != FPUL_REG)
6382 int offset = 0;
6384 mode = HImode;
6385 while (GET_CODE (dst) == SUBREG)
6387 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6388 GET_MODE (SUBREG_REG (dst)),
6389 SUBREG_BYTE (dst),
6390 GET_MODE (dst));
6391 dst = SUBREG_REG (dst);
6393 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6395 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6397 /* This must be an insn that clobbers r0. */
6398 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6399 XVECLEN (PATTERN (scan), 0)
6400 - 1);
6401 rtx clobber = *clobberp;
6403 gcc_assert (GET_CODE (clobber) == CLOBBER
6404 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6406 if (last_float
6407 && reg_set_between_p (r0_rtx, last_float_move, scan))
6408 last_float = 0;
6409 if (last_float
6410 && TARGET_SHCOMPACT
6411 && GET_MODE_SIZE (mode) != 4
6412 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6413 last_float = 0;
6414 lab = add_constant (src, mode, last_float);
6415 if (lab)
6416 emit_insn_before (gen_mova (lab), scan);
6417 else
6419 /* There will be a REG_UNUSED note for r0 on
6420 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6421 lest reorg:mark_target_live_regs will not
6422 consider r0 to be used, and we end up with delay
6423 slot insn in front of SCAN that clobbers r0. */
6424 rtx note
6425 = find_regno_note (last_float_move, REG_UNUSED, 0);
6427 /* If we are not optimizing, then there may not be
6428 a note. */
6429 if (note)
6430 PUT_REG_NOTE_KIND (note, REG_INC);
6432 *last_float_addr = r0_inc_rtx;
6434 last_float_move = scan;
6435 last_float = src;
6436 newsrc = gen_const_mem (mode,
6437 (((TARGET_SH4 && ! TARGET_FMOVD)
6438 || REGNO (dst) == FPUL_REG)
6439 ? r0_inc_rtx
6440 : r0_rtx));
6441 last_float_addr = &XEXP (newsrc, 0);
6443 /* Remove the clobber of r0. */
6444 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6445 gen_rtx_SCRATCH (Pmode));
6447 /* This is a mova needing a label. Create it. */
6448 else if (GET_CODE (src) == UNSPEC
6449 && XINT (src, 1) == UNSPEC_MOVA
6450 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6452 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6453 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6454 newsrc = gen_rtx_UNSPEC (SImode,
6455 gen_rtvec (1, newsrc),
6456 UNSPEC_MOVA);
6458 else if (GET_CODE (src) == UNSPEC_VOLATILE
6459 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6461 newsrc = XVECEXP (src, 0, 0);
6462 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6463 INSN_CODE (scan) = -1;
6464 continue;
6466 else
6468 lab = add_constant (src, mode, 0);
6469 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6470 newsrc = gen_const_mem (mode, newsrc);
6472 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6473 INSN_CODE (scan) = -1;
6476 dump_table (need_aligned_label ? insn : 0, barrier);
6477 insn = barrier;
6480 free_alloc_pool (label_ref_list_pool);
6481 for (insn = first; insn; insn = NEXT_INSN (insn))
6482 PUT_MODE (insn, VOIDmode);
6484 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6485 INSN_ADDRESSES_FREE ();
6486 split_branches (first);
6488 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6489 also has an effect on the register that holds the address of the sfunc.
6490 Insert an extra dummy insn in front of each sfunc that pretends to
6491 use this register. */
6492 if (flag_delayed_branch)
6494 for (insn = first; insn; insn = NEXT_INSN (insn))
6496 rtx reg = sfunc_uses_reg (insn);
6498 if (! reg)
6499 continue;
6500 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6503 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6506 /* Return the UID of the insn that follows the specified label. */
6508 get_dest_uid (rtx label, int max_uid)
6510 rtx_insn *dest = next_real_insn (label);
6511 int dest_uid;
6512 if (! dest)
6513 /* This can happen for an undefined label. */
6514 return 0;
6515 dest_uid = INSN_UID (dest);
6516 /* If this is a newly created branch redirection blocking instruction,
6517 we cannot index the branch_uid or insn_addresses arrays with its
6518 uid. But then, we won't need to, because the actual destination is
6519 the following branch. */
6520 while (dest_uid >= max_uid)
6522 dest = NEXT_INSN (dest);
6523 dest_uid = INSN_UID (dest);
6525 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6526 return 0;
6527 return dest_uid;
6530 /* Split condbranches that are out of range. Also add clobbers for
6531 scratch registers that are needed in far jumps.
6532 We do this before delay slot scheduling, so that it can take our
6533 newly created instructions into account. It also allows us to
6534 find branches with common targets more easily. */
6535 static void
6536 split_branches (rtx_insn *first)
6538 rtx_insn *insn;
6539 struct far_branch **uid_branch, *far_branch_list = 0;
6540 int max_uid = get_max_uid ();
6541 int ok;
6543 /* Find out which branches are out of range. */
6544 shorten_branches (first);
6546 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6547 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6549 for (insn = first; insn; insn = NEXT_INSN (insn))
6550 if (! INSN_P (insn))
6551 continue;
6552 else if (insn->deleted ())
6554 /* Shorten_branches would split this instruction again,
6555 so transform it into a note. */
6556 SET_INSN_DELETED (insn);
6558 else if (JUMP_P (insn))
6560 enum attr_type type = get_attr_type (insn);
6561 if (type == TYPE_CBRANCH)
6563 rtx_insn *next, *beyond;
6565 if (get_attr_length (insn) > 4)
6567 rtx src = SET_SRC (PATTERN (insn));
6568 rtx olabel = XEXP (XEXP (src, 1), 0);
6569 int addr = INSN_ADDRESSES (INSN_UID (insn));
6570 rtx_insn *label = 0;
6571 int dest_uid = get_dest_uid (olabel, max_uid);
6572 struct far_branch *bp = uid_branch[dest_uid];
6574 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6575 the label if the LABEL_NUSES count drops to zero. There is
6576 always a jump_optimize pass that sets these values, but it
6577 proceeds to delete unreferenced code, and then if not
6578 optimizing, to un-delete the deleted instructions, thus
6579 leaving labels with too low uses counts. */
6580 if (! optimize)
6582 JUMP_LABEL (insn) = olabel;
6583 LABEL_NUSES (olabel)++;
6585 if (! bp)
6587 bp = (struct far_branch *) alloca (sizeof *bp);
6588 uid_branch[dest_uid] = bp;
6589 bp->prev = far_branch_list;
6590 far_branch_list = bp;
6591 bp->far_label = as_a <rtx_insn *> (
6592 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6593 0));
6594 LABEL_NUSES (bp->far_label)++;
6596 else
6598 label = bp->near_label;
6599 if (! label && bp->address - addr >= CONDJUMP_MIN)
6601 rtx_insn *block = bp->insert_place;
6603 if (GET_CODE (PATTERN (block)) == RETURN)
6604 block = PREV_INSN (block);
6605 else
6606 block = gen_block_redirect (block,
6607 bp->address, 2);
6608 label = emit_label_after (gen_label_rtx (),
6609 PREV_INSN (block));
6610 bp->near_label = label;
6612 else if (label && ! NEXT_INSN (label))
6614 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6615 bp->insert_place = insn;
6616 else
6617 gen_far_branch (bp);
6620 if (! label
6621 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6623 bp->near_label = label = gen_label_rtx ();
6624 bp->insert_place = insn;
6625 bp->address = addr;
6627 ok = redirect_jump (insn, label, 0);
6628 gcc_assert (ok);
6630 else
6632 /* get_attr_length (insn) == 2 */
6633 /* Check if we have a pattern where reorg wants to redirect
6634 the branch to a label from an unconditional branch that
6635 is too far away. */
6636 /* We can't use JUMP_LABEL here because it might be undefined
6637 when not optimizing. */
6638 /* A syntax error might cause beyond to be NULL_RTX. */
6639 beyond
6640 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6641 0));
6643 if (beyond
6644 && (JUMP_P (beyond)
6645 || ((beyond = next_active_insn (beyond))
6646 && JUMP_P (beyond)))
6647 && GET_CODE (PATTERN (beyond)) == SET
6648 && recog_memoized (beyond) == CODE_FOR_jump_compact
6649 && ((INSN_ADDRESSES
6650 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6651 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6652 > 252 + 258 + 2))
6653 gen_block_redirect (beyond,
6654 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6657 next = next_active_insn (insn);
6659 if (next
6660 && (JUMP_P (next)
6661 || ((next = next_active_insn (next))
6662 && JUMP_P (next)))
6663 && GET_CODE (PATTERN (next)) == SET
6664 && recog_memoized (next) == CODE_FOR_jump_compact
6665 && ((INSN_ADDRESSES
6666 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6667 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6668 > 252 + 258 + 2))
6669 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6671 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6673 int addr = INSN_ADDRESSES (INSN_UID (insn));
6674 rtx_insn *far_label = 0;
6675 int dest_uid = 0;
6676 struct far_branch *bp;
6678 if (type == TYPE_JUMP)
6680 far_label = as_a <rtx_insn *> (
6681 XEXP (SET_SRC (PATTERN (insn)), 0));
6682 dest_uid = get_dest_uid (far_label, max_uid);
6683 if (! dest_uid)
6685 /* Parse errors can lead to labels outside
6686 the insn stream. */
6687 if (! NEXT_INSN (far_label))
6688 continue;
6690 if (! optimize)
6692 JUMP_LABEL (insn) = far_label;
6693 LABEL_NUSES (far_label)++;
6695 redirect_jump (insn, ret_rtx, 1);
6696 far_label = 0;
6699 bp = uid_branch[dest_uid];
6700 if (! bp)
6702 bp = (struct far_branch *) alloca (sizeof *bp);
6703 uid_branch[dest_uid] = bp;
6704 bp->prev = far_branch_list;
6705 far_branch_list = bp;
6706 bp->near_label = 0;
6707 bp->far_label = far_label;
6708 if (far_label)
6709 LABEL_NUSES (far_label)++;
6711 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6712 if (addr - bp->address <= CONDJUMP_MAX)
6713 emit_label_after (bp->near_label, PREV_INSN (insn));
6714 else
6716 gen_far_branch (bp);
6717 bp->near_label = 0;
6719 else
6720 bp->near_label = 0;
6721 bp->address = addr;
6722 bp->insert_place = insn;
6723 if (! far_label)
6724 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6725 else
6726 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6729 /* Generate all pending far branches,
6730 and free our references to the far labels. */
6731 while (far_branch_list)
6733 if (far_branch_list->near_label
6734 && ! NEXT_INSN (far_branch_list->near_label))
6735 gen_far_branch (far_branch_list);
6736 if (optimize
6737 && far_branch_list->far_label
6738 && ! --LABEL_NUSES (far_branch_list->far_label))
6739 delete_insn (far_branch_list->far_label);
6740 far_branch_list = far_branch_list->prev;
6743 /* Instruction length information is no longer valid due to the new
6744 instructions that have been generated. */
6745 init_insn_lengths ();
6748 /* Dump out instruction addresses, which is useful for debugging the
6749 constant pool table stuff.
6751 If relaxing, output the label and pseudo-ops used to link together
6752 calls and the instruction which set the registers.
6754 ??? The addresses printed by this routine for insns are nonsense for
6755 insns which are inside of a sequence where none of the inner insns have
6756 variable length. This is because the second pass of shorten_branches
6757 does not bother to update them. */
6758 void
6759 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6760 int noperands ATTRIBUTE_UNUSED)
6762 if (TARGET_DUMPISIZE)
6763 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6765 if (TARGET_RELAX)
6767 rtx note;
6769 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6770 if (note)
6772 rtx pattern;
6774 pattern = PATTERN (insn);
6775 if (GET_CODE (pattern) == PARALLEL)
6776 pattern = XVECEXP (pattern, 0, 0);
6777 switch (GET_CODE (pattern))
6779 case SET:
6780 if (GET_CODE (SET_SRC (pattern)) != CALL
6781 && get_attr_type (insn) != TYPE_SFUNC)
6783 targetm.asm_out.internal_label
6784 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6785 break;
6787 /* else FALLTHROUGH */
6788 case CALL:
6789 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6790 CODE_LABEL_NUMBER (XEXP (note, 0)));
6791 break;
6793 default:
6794 gcc_unreachable ();
6800 /* Dump out any constants accumulated in the final pass. These will
6801 only be labels. */
6802 const char *
6803 output_jump_label_table (void)
6805 int i;
6807 if (pool_size)
6809 fprintf (asm_out_file, "\t.align 2\n");
6810 for (i = 0; i < pool_size; i++)
6812 pool_node *p = &pool_vector[i];
6814 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6815 CODE_LABEL_NUMBER (p->label));
6816 output_asm_insn (".long %O0", &p->value);
6818 pool_size = 0;
6821 return "";
6824 /* A full frame looks like:
6826 arg-5
6827 arg-4
6828 [ if current_function_anonymous_args
6829 arg-3
6830 arg-2
6831 arg-1
6832 arg-0 ]
6833 saved-fp
6834 saved-r10
6835 saved-r11
6836 saved-r12
6837 saved-pr
6838 local-n
6840 local-1
6841 local-0 <- fp points here.
6843 Number of bytes pushed for anonymous args, used to pass information
6844 between expand_prologue and expand_epilogue.
6846 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6847 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6848 for an epilogue and a negative value means that it's for a sibcall
6849 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6850 all the registers that are about to be restored, and hence dead. */
6851 static void
6852 output_stack_adjust (int size, rtx reg, int epilogue_p,
6853 HARD_REG_SET *live_regs_mask, bool frame_p)
6855 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6856 if (size)
6858 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6860 /* This test is bogus, as output_stack_adjust is used to re-align the
6861 stack. */
6862 #if 0
6863 gcc_assert (!(size % align));
6864 #endif
6866 if (CONST_OK_FOR_ADD (size))
6867 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6868 /* Try to do it with two partial adjustments; however, we must make
6869 sure that the stack is properly aligned at all times, in case
6870 an interrupt occurs between the two partial adjustments. */
6871 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6872 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6874 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6875 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6877 else
6879 rtx const_reg;
6880 rtx insn;
6881 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6882 int i;
6884 /* If TEMP is invalid, we could temporarily save a general
6885 register to MACL. However, there is currently no need
6886 to handle this case, so just die when we see it. */
6887 if (epilogue_p < 0
6888 || current_function_interrupt
6889 || ! call_really_used_regs[temp] || fixed_regs[temp])
6890 temp = -1;
6891 if (temp < 0 && ! current_function_interrupt
6892 && (TARGET_SHMEDIA || epilogue_p >= 0))
6894 HARD_REG_SET temps;
6895 COPY_HARD_REG_SET (temps, call_used_reg_set);
6896 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6897 if (epilogue_p > 0)
6899 int nreg = 0;
6900 if (crtl->return_rtx)
6902 machine_mode mode;
6903 mode = GET_MODE (crtl->return_rtx);
6904 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6905 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6907 for (i = 0; i < nreg; i++)
6908 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6909 if (crtl->calls_eh_return)
6911 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6912 for (i = 0; i <= 3; i++)
6913 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6916 if (TARGET_SHMEDIA && epilogue_p < 0)
6917 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6918 CLEAR_HARD_REG_BIT (temps, i);
6919 if (epilogue_p <= 0)
6921 for (i = FIRST_PARM_REG;
6922 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6923 CLEAR_HARD_REG_BIT (temps, i);
6924 if (cfun->static_chain_decl != NULL)
6925 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6927 temp = scavenge_reg (&temps);
6929 if (temp < 0 && live_regs_mask)
6931 HARD_REG_SET temps;
6933 COPY_HARD_REG_SET (temps, *live_regs_mask);
6934 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6935 temp = scavenge_reg (&temps);
6937 if (temp < 0)
6939 rtx adj_reg, tmp_reg, mem;
6941 /* If we reached here, the most likely case is the (sibcall)
6942 epilogue for non SHmedia. Put a special push/pop sequence
6943 for such case as the last resort. This looks lengthy but
6944 would not be problem because it seems to be very
6945 rare. */
6947 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6950 /* ??? There is still the slight possibility that r4 or
6951 r5 have been reserved as fixed registers or assigned
6952 as global registers, and they change during an
6953 interrupt. There are possible ways to handle this:
6955 - If we are adjusting the frame pointer (r14), we can do
6956 with a single temp register and an ordinary push / pop
6957 on the stack.
6958 - Grab any call-used or call-saved registers (i.e. not
6959 fixed or globals) for the temps we need. We might
6960 also grab r14 if we are adjusting the stack pointer.
6961 If we can't find enough available registers, issue
6962 a diagnostic and die - the user must have reserved
6963 way too many registers.
6964 But since all this is rather unlikely to happen and
6965 would require extra testing, we just die if r4 / r5
6966 are not available. */
6967 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6968 && !global_regs[4] && !global_regs[5]);
6970 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6971 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6972 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6973 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6974 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6975 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6976 emit_move_insn (mem, tmp_reg);
6977 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6978 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6979 emit_move_insn (mem, tmp_reg);
6980 emit_move_insn (reg, adj_reg);
6981 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6982 emit_move_insn (adj_reg, mem);
6983 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6984 emit_move_insn (tmp_reg, mem);
6985 /* Tell flow the insns that pop r4/r5 aren't dead. */
6986 emit_use (tmp_reg);
6987 emit_use (adj_reg);
6988 return;
6990 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6992 /* If SIZE is negative, subtract the positive value.
6993 This sometimes allows a constant pool entry to be shared
6994 between prologue and epilogue code. */
6995 if (size < 0)
6997 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6998 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
7000 else
7002 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
7003 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
7005 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7006 gen_rtx_SET (VOIDmode, reg,
7007 gen_rtx_PLUS (SImode, reg,
7008 GEN_INT (size))));
7013 /* Emit the specified insn and mark it as frame related.
7014 FIXME: Rename this to emit_frame_insn. */
7015 static rtx_insn *
7016 frame_insn (rtx x)
7018 rtx_insn *insn = emit_insn (x);
7019 RTX_FRAME_RELATED_P (insn) = 1;
7020 return insn;
7023 /* Output RTL to push register RN onto the stack. */
7024 static rtx
7025 push (int rn)
7027 rtx x;
7028 if (rn == FPUL_REG)
7029 x = gen_push_fpul ();
7030 else if (rn == FPSCR_REG)
7031 x = gen_push_fpscr ();
7032 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7033 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7035 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7036 return NULL_RTX;
7037 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
7039 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7040 x = gen_push_e (gen_rtx_REG (SFmode, rn));
7041 else
7042 x = gen_push (gen_rtx_REG (SImode, rn));
7044 x = frame_insn (x);
7045 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7046 return x;
7049 /* Output RTL to pop register RN from the stack. */
7050 static void
7051 pop (int rn)
7053 rtx x, sp_reg, reg;
7054 if (rn == FPUL_REG)
7055 x = gen_pop_fpul ();
7056 else if (rn == FPSCR_REG)
7057 x = gen_pop_fpscr ();
7058 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7059 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7061 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7062 return;
7063 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7065 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7066 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7067 else
7068 x = gen_pop (gen_rtx_REG (SImode, rn));
7070 x = emit_insn (x);
7072 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7073 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7074 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7075 : SET_DEST (PATTERN (x)));
7076 add_reg_note (x, REG_CFA_RESTORE, reg);
7077 add_reg_note (x, REG_CFA_ADJUST_CFA,
7078 gen_rtx_SET (SImode, sp_reg,
7079 plus_constant (SImode, sp_reg,
7080 GET_MODE_SIZE (GET_MODE (reg)))));
7081 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7082 RTX_FRAME_RELATED_P (x) = 1;
7085 /* Generate code to push the regs specified in the mask. */
7086 static void
7087 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7089 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7090 int skip_fpscr = 0;
7092 /* Push PR last; this gives better latencies after the prologue, and
7093 candidates for the return delay slot when there are no general
7094 registers pushed. */
7095 for (; i < FIRST_PSEUDO_REGISTER; i++)
7097 /* If this is an interrupt handler, and the SZ bit varies,
7098 and we have to push any floating point register, we need
7099 to switch to the correct precision first. */
7100 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7101 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7103 HARD_REG_SET unsaved;
7105 push (FPSCR_REG);
7106 COMPL_HARD_REG_SET (unsaved, *mask);
7107 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7108 skip_fpscr = 1;
7110 if (i != PR_REG
7111 && (i != FPSCR_REG || ! skip_fpscr)
7112 && TEST_HARD_REG_BIT (*mask, i))
7114 /* If the ISR has RESBANK attribute assigned, don't push any of
7115 the following registers - R0-R14, MACH, MACL and GBR. */
7116 if (! (sh_cfun_resbank_handler_p ()
7117 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7118 || i == MACH_REG
7119 || i == MACL_REG
7120 || i == GBR_REG)))
7121 push (i);
7125 /* Push banked registers last to improve delay slot opportunities. */
7126 if (interrupt_handler)
7128 bool use_movml = false;
7130 if (TARGET_SH2A)
7132 unsigned int count = 0;
7134 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7135 if (TEST_HARD_REG_BIT (*mask, i))
7136 count++;
7137 else
7138 break;
7140 /* Use movml when all banked registers are pushed. */
7141 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7142 use_movml = true;
7145 if (sh_cfun_resbank_handler_p ())
7146 ; /* Do nothing. */
7147 else if (use_movml)
7149 rtx x, mem, reg, set;
7150 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7152 /* We must avoid scheduling multiple store insn with another
7153 insns. */
7154 emit_insn (gen_blockage ());
7155 x = gen_movml_push_banked (sp_reg);
7156 x = frame_insn (x);
7157 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7159 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7160 reg = gen_rtx_REG (SImode, i);
7161 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7164 set = gen_rtx_SET (SImode, sp_reg,
7165 plus_constant (Pmode, sp_reg, - 32));
7166 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7167 emit_insn (gen_blockage ());
7169 else
7170 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7171 if (TEST_HARD_REG_BIT (*mask, i))
7172 push (i);
7175 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7176 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7177 push (PR_REG);
7180 /* Calculate how much extra space is needed to save all callee-saved
7181 target registers.
7182 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7183 static int
7184 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7186 int reg;
7187 int stack_space = 0;
7188 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7190 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7191 if ((! call_really_used_regs[reg] || interrupt_handler)
7192 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7193 /* Leave space to save this target register on the stack,
7194 in case target register allocation wants to use it. */
7195 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7196 return stack_space;
7199 /* Decide whether we should reserve space for callee-save target registers,
7200 in case target register allocation wants to use them. REGS_SAVED is
7201 the space, in bytes, that is already required for register saves.
7202 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7203 static int
7204 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7205 HARD_REG_SET *live_regs_mask)
7207 if (optimize_size)
7208 return 0;
7209 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7212 /* Decide how much space to reserve for callee-save target registers
7213 in case target register allocation wants to use them.
7214 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7215 static int
7216 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7218 if (shmedia_space_reserved_for_target_registers)
7219 return shmedia_target_regs_stack_space (live_regs_mask);
7220 else
7221 return 0;
7224 /* Work out the registers which need to be saved, both as a mask and a
7225 count of saved words. Return the count.
7227 If doing a pragma interrupt function, then push all regs used by the
7228 function, and if we call another function (we can tell by looking at PR),
7229 make sure that all the regs it clobbers are safe too. */
7230 static int
7231 calc_live_regs (HARD_REG_SET *live_regs_mask)
7233 unsigned int reg;
7234 int count;
7235 tree attrs;
7236 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7237 bool nosave_low_regs;
7238 int pr_live, has_call;
7240 attrs = DECL_ATTRIBUTES (current_function_decl);
7241 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7242 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7243 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7244 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7246 CLEAR_HARD_REG_SET (*live_regs_mask);
7247 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7248 && df_regs_ever_live_p (FPSCR_REG))
7249 target_flags &= ~MASK_FPU_SINGLE;
7250 /* If we can save a lot of saves by switching to double mode, do that. */
7251 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7252 && TARGET_FPU_SINGLE)
7253 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7254 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7255 && (! call_really_used_regs[reg]
7256 || interrupt_handler)
7257 && ++count > 2)
7259 target_flags &= ~MASK_FPU_SINGLE;
7260 break;
7262 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7263 knows how to use it. That means the pseudo originally allocated for
7264 the initial value can become the PR_MEDIA_REG hard register, as seen for
7265 execute/20010122-1.c:test9. */
7266 if (TARGET_SHMEDIA)
7267 /* ??? this function is called from initial_elimination_offset, hence we
7268 can't use the result of sh_media_register_for_return here. */
7269 pr_live = sh_pr_n_sets ();
7270 else
7272 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7273 pr_live = (pr_initial
7274 ? (!REG_P (pr_initial)
7275 || REGNO (pr_initial) != (PR_REG))
7276 : df_regs_ever_live_p (PR_REG));
7277 /* For Shcompact, if not optimizing, we end up with a memory reference
7278 using the return address pointer for __builtin_return_address even
7279 though there is no actual need to put the PR register on the stack. */
7280 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7282 /* Force PR to be live if the prologue has to call the SHmedia
7283 argument decoder or register saver. */
7284 if (TARGET_SHCOMPACT
7285 && ((crtl->args.info.call_cookie
7286 & ~ CALL_COOKIE_RET_TRAMP (1))
7287 || crtl->saves_all_registers))
7288 pr_live = 1;
7289 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7290 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7292 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7293 ? pr_live
7294 : interrupt_handler
7295 ? (/* Need to save all the regs ever live. */
7296 (df_regs_ever_live_p (reg)
7297 || (call_really_used_regs[reg]
7298 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7299 || reg == PIC_OFFSET_TABLE_REGNUM)
7300 && has_call)
7301 || (TARGET_SHMEDIA && has_call
7302 && REGISTER_NATURAL_MODE (reg) == SImode
7303 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7304 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7305 && reg != RETURN_ADDRESS_POINTER_REGNUM
7306 && reg != T_REG && reg != GBR_REG
7307 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7308 /* Push fpscr only on targets which have FPU */
7309 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7310 : (/* Only push those regs which are used and need to be saved. */
7311 (TARGET_SHCOMPACT
7312 && flag_pic
7313 && crtl->args.info.call_cookie
7314 && reg == PIC_OFFSET_TABLE_REGNUM)
7315 || (df_regs_ever_live_p (reg)
7316 && ((!call_really_used_regs[reg]
7317 && !(reg != PIC_OFFSET_TABLE_REGNUM
7318 && fixed_regs[reg] && call_used_regs[reg]))
7319 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7320 || (crtl->calls_eh_return
7321 && (reg == EH_RETURN_DATA_REGNO (0)
7322 || reg == EH_RETURN_DATA_REGNO (1)
7323 || reg == EH_RETURN_DATA_REGNO (2)
7324 || reg == EH_RETURN_DATA_REGNO (3)))
7325 || ((reg == MACL_REG || reg == MACH_REG)
7326 && df_regs_ever_live_p (reg)
7327 && sh_cfun_attr_renesas_p ())
7330 SET_HARD_REG_BIT (*live_regs_mask, reg);
7331 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7333 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7334 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7336 if (FP_REGISTER_P (reg))
7338 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7340 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7341 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7344 else if (XD_REGISTER_P (reg))
7346 /* Must switch to double mode to access these registers. */
7347 target_flags &= ~MASK_FPU_SINGLE;
7351 if (nosave_low_regs && reg == R8_REG)
7352 break;
7354 /* If we have a target register optimization pass after prologue / epilogue
7355 threading, we need to assume all target registers will be live even if
7356 they aren't now. */
7357 if (flag_branch_target_load_optimize2
7358 && TARGET_SAVE_ALL_TARGET_REGS
7359 && shmedia_space_reserved_for_target_registers)
7360 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7361 if ((! call_really_used_regs[reg] || interrupt_handler)
7362 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7364 SET_HARD_REG_BIT (*live_regs_mask, reg);
7365 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7367 /* If this is an interrupt handler, we don't have any call-clobbered
7368 registers we can conveniently use for target register save/restore.
7369 Make sure we save at least one general purpose register when we need
7370 to save target registers. */
7371 if (interrupt_handler
7372 && hard_reg_set_intersect_p (*live_regs_mask,
7373 reg_class_contents[TARGET_REGS])
7374 && ! hard_reg_set_intersect_p (*live_regs_mask,
7375 reg_class_contents[GENERAL_REGS]))
7377 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7378 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7381 return count;
7384 /* Code to generate prologue and epilogue sequences */
7386 /* PUSHED is the number of bytes that are being pushed on the
7387 stack for register saves. Return the frame size, padded
7388 appropriately so that the stack stays properly aligned. */
7389 static HOST_WIDE_INT
7390 rounded_frame_size (int pushed)
7392 HOST_WIDE_INT size = get_frame_size ();
7393 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7395 if (ACCUMULATE_OUTGOING_ARGS)
7396 size += crtl->outgoing_args_size;
7398 return ((size + pushed + align - 1) & -align) - pushed;
7401 /* Choose a call-clobbered target-branch register that remains
7402 unchanged along the whole function. We set it up as the return
7403 value in the prologue. */
7405 sh_media_register_for_return (void)
7407 int regno;
7408 int tr0_used;
7410 if (! crtl->is_leaf)
7411 return -1;
7412 if (lookup_attribute ("interrupt_handler",
7413 DECL_ATTRIBUTES (current_function_decl)))
7414 return -1;
7415 if (sh_cfun_interrupt_handler_p ())
7416 return -1;
7418 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7420 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7421 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7422 return regno;
7424 return -1;
7427 /* The maximum registers we need to save are:
7428 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7429 - 32 floating point registers (for each pair, we save none,
7430 one single precision value, or a double precision value).
7431 - 8 target registers
7432 - add 1 entry for a delimiter. */
7433 #define MAX_SAVED_REGS (62+32+8)
7435 typedef struct save_entry_s
7437 unsigned char reg;
7438 unsigned char mode;
7439 short offset;
7440 } save_entry;
7442 #define MAX_TEMPS 4
7444 /* There will be a delimiter entry with VOIDmode both at the start and the
7445 end of a filled in schedule. The end delimiter has the offset of the
7446 save with the smallest (i.e. most negative) offset. */
7447 typedef struct save_schedule_s
7449 save_entry entries[MAX_SAVED_REGS + 2];
7450 int temps[MAX_TEMPS+1];
7451 } save_schedule;
7453 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7454 use reverse order. Returns the last entry written to (not counting
7455 the delimiter). OFFSET_BASE is a number to be added to all offset
7456 entries. */
7457 static save_entry *
7458 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7459 int offset_base)
7461 int align, i;
7462 save_entry *entry = schedule->entries;
7463 int tmpx = 0;
7464 int offset;
7466 if (! current_function_interrupt)
7467 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7468 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7469 && ! FUNCTION_ARG_REGNO_P (i)
7470 && i != FIRST_RET_REG
7471 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7472 && ! (crtl->calls_eh_return
7473 && (i == EH_RETURN_STACKADJ_REGNO
7474 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7475 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7476 schedule->temps[tmpx++] = i;
7477 entry->reg = -1;
7478 entry->mode = VOIDmode;
7479 entry->offset = offset_base;
7480 entry++;
7481 /* We loop twice: first, we save 8-byte aligned registers in the
7482 higher addresses, that are known to be aligned. Then, we
7483 proceed to saving 32-bit registers that don't need 8-byte
7484 alignment.
7485 If this is an interrupt function, all registers that need saving
7486 need to be saved in full. moreover, we need to postpone saving
7487 target registers till we have saved some general purpose registers
7488 we can then use as scratch registers. */
7489 offset = offset_base;
7490 for (align = 1; align >= 0; align--)
7492 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7493 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7495 machine_mode mode = REGISTER_NATURAL_MODE (i);
7496 int reg = i;
7498 if (current_function_interrupt)
7500 if (TARGET_REGISTER_P (i))
7501 continue;
7502 if (GENERAL_REGISTER_P (i))
7503 mode = DImode;
7505 if (mode == SFmode && (i % 2) == 1
7506 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7507 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7509 mode = DFmode;
7510 i--;
7511 reg--;
7514 /* If we're doing the aligned pass and this is not aligned,
7515 or we're doing the unaligned pass and this is aligned,
7516 skip it. */
7517 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7518 != align)
7519 continue;
7521 if (current_function_interrupt
7522 && GENERAL_REGISTER_P (i)
7523 && tmpx < MAX_TEMPS)
7524 schedule->temps[tmpx++] = i;
7526 offset -= GET_MODE_SIZE (mode);
7527 entry->reg = i;
7528 entry->mode = mode;
7529 entry->offset = offset;
7530 entry++;
7532 if (align && current_function_interrupt)
7533 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7534 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7536 offset -= GET_MODE_SIZE (DImode);
7537 entry->reg = i;
7538 entry->mode = DImode;
7539 entry->offset = offset;
7540 entry++;
7543 entry->reg = -1;
7544 entry->mode = VOIDmode;
7545 entry->offset = offset;
7546 schedule->temps[tmpx] = -1;
7547 return entry - 1;
7550 /* Expand code for the function prologue. */
7551 void
7552 sh_expand_prologue (void)
7554 HARD_REG_SET live_regs_mask;
7555 int d, i;
7556 int d_rounding = 0;
7557 int save_flags = target_flags;
7558 int pretend_args;
7559 int stack_usage;
7560 tree sp_switch_attr
7561 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7563 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7565 /* We have pretend args if we had an object sent partially in registers
7566 and partially on the stack, e.g. a large structure. */
7567 pretend_args = crtl->args.pretend_args_size;
7568 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7569 && (NPARM_REGS(SImode)
7570 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7571 pretend_args = 0;
7573 output_stack_adjust (-pretend_args
7574 - crtl->args.info.stack_regs * 8,
7575 stack_pointer_rtx, 0, NULL, true);
7576 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7578 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7579 /* We're going to use the PIC register to load the address of the
7580 incoming-argument decoder and/or of the return trampoline from
7581 the GOT, so make sure the PIC register is preserved and
7582 initialized. */
7583 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7585 if (TARGET_SHCOMPACT
7586 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7588 int reg;
7590 /* First, make all registers with incoming arguments that will
7591 be pushed onto the stack live, so that register renaming
7592 doesn't overwrite them. */
7593 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7594 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7595 >= NPARM_REGS (SImode) - reg)
7596 for (; reg < NPARM_REGS (SImode); reg++)
7597 emit_insn (gen_shcompact_preserve_incoming_args
7598 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7599 else if (CALL_COOKIE_INT_REG_GET
7600 (crtl->args.info.call_cookie, reg) == 1)
7601 emit_insn (gen_shcompact_preserve_incoming_args
7602 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7604 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7605 stack_pointer_rtx);
7606 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7607 GEN_INT (crtl->args.info.call_cookie));
7608 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7609 gen_rtx_REG (SImode, R0_REG));
7611 else if (TARGET_SHMEDIA)
7613 int tr = sh_media_register_for_return ();
7615 if (tr >= 0)
7616 emit_move_insn (gen_rtx_REG (DImode, tr),
7617 gen_rtx_REG (DImode, PR_MEDIA_REG));
7620 /* Emit the code for SETUP_VARARGS. */
7621 if (cfun->stdarg)
7623 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7625 /* Push arg regs as if they'd been provided by caller in stack. */
7626 for (i = 0; i < NPARM_REGS(SImode); i++)
7628 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7630 if (i >= (NPARM_REGS(SImode)
7631 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7633 break;
7634 push (rn);
7635 stack_usage += GET_MODE_SIZE (SImode);
7640 /* If we're supposed to switch stacks at function entry, do so now. */
7641 if (sp_switch_attr)
7643 rtx lab, newsrc;
7644 /* The argument specifies a variable holding the address of the
7645 stack the interrupt function should switch to/from at entry/exit. */
7646 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7647 const char *s
7648 = ggc_strdup (TREE_STRING_POINTER (arg));
7649 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7651 lab = add_constant (sp_switch, SImode, 0);
7652 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7654 emit_insn (gen_sp_switch_1 (newsrc));
7657 d = calc_live_regs (&live_regs_mask);
7658 /* ??? Maybe we could save some switching if we can move a mode switch
7659 that already happens to be at the function start into the prologue. */
7660 if (target_flags != save_flags && ! current_function_interrupt)
7661 emit_insn (gen_toggle_sz ());
7663 if (TARGET_SH5)
7665 int offset_base, offset;
7666 rtx r0 = NULL_RTX;
7667 int offset_in_r0 = -1;
7668 int sp_in_r0 = 0;
7669 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7670 int total_size, save_size;
7671 save_schedule schedule;
7672 save_entry *entry;
7673 int *tmp_pnt;
7675 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7676 && ! current_function_interrupt)
7677 r0 = gen_rtx_REG (Pmode, R0_REG);
7679 /* D is the actual number of bytes that we need for saving registers,
7680 however, in initial_elimination_offset we have committed to using
7681 an additional TREGS_SPACE amount of bytes - in order to keep both
7682 addresses to arguments supplied by the caller and local variables
7683 valid, we must keep this gap. Place it between the incoming
7684 arguments and the actually saved registers in a bid to optimize
7685 locality of reference. */
7686 total_size = d + tregs_space;
7687 total_size += rounded_frame_size (total_size);
7688 save_size = total_size - rounded_frame_size (d);
7689 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7690 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7691 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7693 /* If adjusting the stack in a single step costs nothing extra, do so.
7694 I.e. either if a single addi is enough, or we need a movi anyway,
7695 and we don't exceed the maximum offset range (the test for the
7696 latter is conservative for simplicity). */
7697 if (TARGET_SHMEDIA
7698 && (CONST_OK_FOR_I10 (-total_size)
7699 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7700 && total_size <= 2044)))
7701 d_rounding = total_size - save_size;
7703 offset_base = d + d_rounding;
7705 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7706 0, NULL, true);
7707 stack_usage += save_size + d_rounding;
7709 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7710 tmp_pnt = schedule.temps;
7711 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7713 machine_mode mode = (machine_mode) entry->mode;
7714 unsigned int reg = entry->reg;
7715 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7716 rtx orig_reg_rtx;
7718 offset = entry->offset;
7720 reg_rtx = gen_rtx_REG (mode, reg);
7722 mem_rtx = gen_frame_mem (mode,
7723 gen_rtx_PLUS (Pmode,
7724 stack_pointer_rtx,
7725 GEN_INT (offset)));
7727 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7729 gcc_assert (r0);
7730 mem_rtx = NULL_RTX;
7733 if (HAVE_PRE_DECREMENT
7734 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7735 || mem_rtx == NULL_RTX
7736 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7738 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7740 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7741 pre_dec = NULL_RTX;
7742 else
7744 mem_rtx = NULL_RTX;
7745 offset += GET_MODE_SIZE (mode);
7749 if (mem_rtx != NULL_RTX)
7750 goto addr_ok;
7752 if (offset_in_r0 == -1)
7754 emit_move_insn (r0, GEN_INT (offset));
7755 offset_in_r0 = offset;
7757 else if (offset != offset_in_r0)
7759 emit_move_insn (r0,
7760 gen_rtx_PLUS
7761 (Pmode, r0,
7762 GEN_INT (offset - offset_in_r0)));
7763 offset_in_r0 += offset - offset_in_r0;
7766 if (pre_dec != NULL_RTX)
7768 if (! sp_in_r0)
7770 emit_move_insn (r0,
7771 gen_rtx_PLUS
7772 (Pmode, r0, stack_pointer_rtx));
7773 sp_in_r0 = 1;
7776 offset -= GET_MODE_SIZE (mode);
7777 offset_in_r0 -= GET_MODE_SIZE (mode);
7779 mem_rtx = pre_dec;
7781 else if (sp_in_r0)
7782 mem_rtx = gen_frame_mem (mode, r0);
7783 else
7784 mem_rtx = gen_frame_mem (mode,
7785 gen_rtx_PLUS (Pmode,
7786 stack_pointer_rtx,
7787 r0));
7789 /* We must not use an r0-based address for target-branch
7790 registers or for special registers without pre-dec
7791 memory addresses, since we store their values in r0
7792 first. */
7793 gcc_assert (!TARGET_REGISTER_P (reg)
7794 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7795 || mem_rtx == pre_dec));
7797 addr_ok:
7798 orig_reg_rtx = reg_rtx;
7799 if (TARGET_REGISTER_P (reg)
7800 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7801 && mem_rtx != pre_dec))
7803 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7805 emit_move_insn (tmp_reg, reg_rtx);
7807 if (REGNO (tmp_reg) == R0_REG)
7809 offset_in_r0 = -1;
7810 sp_in_r0 = 0;
7811 gcc_assert (!refers_to_regno_p
7812 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7815 if (*++tmp_pnt <= 0)
7816 tmp_pnt = schedule.temps;
7818 reg_rtx = tmp_reg;
7821 rtx insn;
7823 /* Mark as interesting for dwarf cfi generator */
7824 insn = emit_move_insn (mem_rtx, reg_rtx);
7825 RTX_FRAME_RELATED_P (insn) = 1;
7826 /* If we use an intermediate register for the save, we can't
7827 describe this exactly in cfi as a copy of the to-be-saved
7828 register into the temporary register and then the temporary
7829 register on the stack, because the temporary register can
7830 have a different natural size than the to-be-saved register.
7831 Thus, we gloss over the intermediate copy and pretend we do
7832 a direct save from the to-be-saved register. */
7833 if (REGNO (reg_rtx) != reg)
7835 rtx set;
7837 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7838 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7841 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7843 rtx reg_rtx = gen_rtx_REG (mode, reg);
7844 rtx set;
7845 rtx mem_rtx = gen_frame_mem (mode,
7846 gen_rtx_PLUS (Pmode,
7847 stack_pointer_rtx,
7848 GEN_INT (offset)));
7850 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7851 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7856 gcc_assert (entry->offset == d_rounding);
7858 else
7860 push_regs (&live_regs_mask, current_function_interrupt);
7861 stack_usage += d;
7864 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7865 emit_insn (gen_GOTaddr2picreg ());
7867 if (SHMEDIA_REGS_STACK_ADJUST ())
7869 /* This must NOT go through the PLT, otherwise mach and macl
7870 may be clobbered. */
7871 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7872 (TARGET_FPU_ANY
7873 ? "__GCC_push_shmedia_regs"
7874 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7875 emit_insn (gen_shmedia_save_restore_regs_compact
7876 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7879 if (target_flags != save_flags && ! current_function_interrupt)
7880 emit_insn (gen_toggle_sz ());
7882 target_flags = save_flags;
7884 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7885 stack_pointer_rtx, 0, NULL, true);
7886 stack_usage += rounded_frame_size (d) - d_rounding;
7888 if (frame_pointer_needed)
7889 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7891 if (TARGET_SHCOMPACT
7892 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7894 /* This must NOT go through the PLT, otherwise mach and macl
7895 may be clobbered. */
7896 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7897 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7898 emit_insn (gen_shcompact_incoming_args ());
7901 /* If we are profiling, make sure no instructions are scheduled before
7902 the call to mcount. Similarly if some call instructions are swapped
7903 before frame related insns, it'll confuse the unwinder because
7904 currently SH has no unwind info for function epilogues. */
7905 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7906 emit_insn (gen_blockage ());
7908 if (flag_stack_usage_info)
7909 current_function_static_stack_size = stack_usage;
7912 /* Expand code for the function epilogue. */
7913 void
7914 sh_expand_epilogue (bool sibcall_p)
7916 HARD_REG_SET live_regs_mask;
7917 int d, i;
7918 int d_rounding = 0;
7920 int save_flags = target_flags;
7921 int frame_size, save_size;
7922 int fpscr_deferred = 0;
7923 int e = sibcall_p ? -1 : 1;
7925 d = calc_live_regs (&live_regs_mask);
7927 save_size = d;
7928 frame_size = rounded_frame_size (d);
7930 if (TARGET_SH5)
7932 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7933 int total_size;
7934 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7935 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7936 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7938 total_size = d + tregs_space;
7939 total_size += rounded_frame_size (total_size);
7940 save_size = total_size - frame_size;
7942 /* If adjusting the stack in a single step costs nothing extra, do so.
7943 I.e. either if a single addi is enough, or we need a movi anyway,
7944 and we don't exceed the maximum offset range (the test for the
7945 latter is conservative for simplicity). */
7946 if (TARGET_SHMEDIA
7947 && ! frame_pointer_needed
7948 && (CONST_OK_FOR_I10 (total_size)
7949 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7950 && total_size <= 2044)))
7951 d_rounding = frame_size;
7953 frame_size -= d_rounding;
7956 if (frame_pointer_needed)
7958 /* We must avoid scheduling the epilogue with previous basic blocks.
7959 See PR/18032 and PR/40313. */
7960 emit_insn (gen_blockage ());
7961 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7962 &live_regs_mask, true);
7964 /* We must avoid moving the stack pointer adjustment past code
7965 which reads from the local frame, else an interrupt could
7966 occur after the SP adjustment and clobber data in the local
7967 frame. */
7968 emit_insn (gen_blockage ());
7969 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7971 else if (frame_size)
7973 /* We must avoid moving the stack pointer adjustment past code
7974 which reads from the local frame, else an interrupt could
7975 occur after the SP adjustment and clobber data in the local
7976 frame. */
7977 emit_insn (gen_blockage ());
7978 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7979 &live_regs_mask, true);
7982 if (SHMEDIA_REGS_STACK_ADJUST ())
7984 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7985 (TARGET_FPU_ANY
7986 ? "__GCC_pop_shmedia_regs"
7987 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7988 /* This must NOT go through the PLT, otherwise mach and macl
7989 may be clobbered. */
7990 emit_insn (gen_shmedia_save_restore_regs_compact
7991 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7994 /* Pop all the registers. */
7996 if (target_flags != save_flags && ! current_function_interrupt)
7997 emit_insn (gen_toggle_sz ());
7998 if (TARGET_SH5)
8000 int offset_base, offset;
8001 int offset_in_r0 = -1;
8002 int sp_in_r0 = 0;
8003 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
8004 save_schedule schedule;
8005 save_entry *entry;
8006 int *tmp_pnt;
8008 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
8009 offset_base = -entry[1].offset + d_rounding;
8010 tmp_pnt = schedule.temps;
8011 for (; entry->mode != VOIDmode; entry--)
8013 machine_mode mode = (machine_mode) entry->mode;
8014 int reg = entry->reg;
8015 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
8017 offset = offset_base + entry->offset;
8018 reg_rtx = gen_rtx_REG (mode, reg);
8020 mem_rtx = gen_frame_mem (mode,
8021 gen_rtx_PLUS (Pmode,
8022 stack_pointer_rtx,
8023 GEN_INT (offset)));
8025 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
8026 mem_rtx = NULL_RTX;
8028 if (HAVE_POST_INCREMENT
8029 && (offset == offset_in_r0
8030 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
8031 && mem_rtx == NULL_RTX)
8032 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
8034 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
8036 if (!memory_address_p (mode, XEXP (post_inc, 0)))
8037 post_inc = NULL_RTX;
8038 else
8039 mem_rtx = NULL_RTX;
8042 if (mem_rtx != NULL_RTX)
8043 goto addr_ok;
8045 if (offset_in_r0 == -1)
8047 emit_move_insn (r0, GEN_INT (offset));
8048 offset_in_r0 = offset;
8050 else if (offset != offset_in_r0)
8052 emit_move_insn (r0,
8053 gen_rtx_PLUS
8054 (Pmode, r0,
8055 GEN_INT (offset - offset_in_r0)));
8056 offset_in_r0 += offset - offset_in_r0;
8059 if (post_inc != NULL_RTX)
8061 if (! sp_in_r0)
8063 emit_move_insn (r0,
8064 gen_rtx_PLUS
8065 (Pmode, r0, stack_pointer_rtx));
8066 sp_in_r0 = 1;
8069 mem_rtx = post_inc;
8071 offset_in_r0 += GET_MODE_SIZE (mode);
8073 else if (sp_in_r0)
8074 mem_rtx = gen_frame_mem (mode, r0);
8075 else
8076 mem_rtx = gen_frame_mem (mode,
8077 gen_rtx_PLUS (Pmode,
8078 stack_pointer_rtx,
8079 r0));
8081 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8082 || mem_rtx == post_inc);
8084 addr_ok:
8085 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8086 && mem_rtx != post_inc)
8088 emit_move_insn (r0, mem_rtx);
8089 mem_rtx = r0;
8091 else if (TARGET_REGISTER_P (reg))
8093 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8095 /* Give the scheduler a bit of freedom by using up to
8096 MAX_TEMPS registers in a round-robin fashion. */
8097 emit_move_insn (tmp_reg, mem_rtx);
8098 mem_rtx = tmp_reg;
8099 if (*++tmp_pnt < 0)
8100 tmp_pnt = schedule.temps;
8103 emit_move_insn (reg_rtx, mem_rtx);
8106 gcc_assert (entry->offset + offset_base == d + d_rounding);
8108 else /* ! TARGET_SH5 */
8110 int last_reg;
8112 save_size = 0;
8113 /* For an ISR with RESBANK attribute assigned, don't pop PR
8114 register. */
8115 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8116 && !sh_cfun_resbank_handler_p ())
8118 if (!frame_pointer_needed)
8119 emit_insn (gen_blockage ());
8120 pop (PR_REG);
8123 /* Banked registers are popped first to avoid being scheduled in the
8124 delay slot. RTE switches banks before the ds instruction. */
8125 if (current_function_interrupt)
8127 bool use_movml = false;
8129 if (TARGET_SH2A)
8131 unsigned int count = 0;
8133 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8134 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8135 count++;
8136 else
8137 break;
8139 /* Use movml when all banked register are poped. */
8140 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8141 use_movml = true;
8144 if (sh_cfun_resbank_handler_p ())
8145 ; /* Do nothing. */
8146 else if (use_movml)
8148 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8150 /* We must avoid scheduling multiple load insn with another
8151 insns. */
8152 emit_insn (gen_blockage ());
8153 emit_insn (gen_movml_pop_banked (sp_reg));
8154 emit_insn (gen_blockage ());
8156 else
8157 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8158 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8159 pop (i);
8161 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8163 else
8164 last_reg = FIRST_PSEUDO_REGISTER;
8166 for (i = 0; i < last_reg; i++)
8168 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8170 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8171 && hard_reg_set_intersect_p (live_regs_mask,
8172 reg_class_contents[DF_REGS]))
8173 fpscr_deferred = 1;
8174 /* For an ISR with RESBANK attribute assigned, don't pop
8175 following registers, R0-R14, MACH, MACL and GBR. */
8176 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8177 && ! (sh_cfun_resbank_handler_p ()
8178 && ((j >= FIRST_GENERAL_REG
8179 && j < LAST_GENERAL_REG)
8180 || j == MACH_REG
8181 || j == MACL_REG
8182 || j == GBR_REG)))
8183 pop (j);
8185 if (j == FIRST_FP_REG && fpscr_deferred)
8186 pop (FPSCR_REG);
8189 if (target_flags != save_flags && ! current_function_interrupt)
8190 emit_insn (gen_toggle_sz ());
8191 target_flags = save_flags;
8193 output_stack_adjust (crtl->args.pretend_args_size
8194 + save_size + d_rounding
8195 + crtl->args.info.stack_regs * 8,
8196 stack_pointer_rtx, e, NULL, true);
8198 if (crtl->calls_eh_return)
8199 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8200 EH_RETURN_STACKADJ_RTX));
8202 /* Switch back to the normal stack if necessary. */
8203 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8204 emit_insn (gen_sp_switch_2 ());
8206 /* Tell flow the insn that pops PR isn't dead. */
8207 /* PR_REG will never be live in SHmedia mode, and we don't need to
8208 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8209 by the return pattern. */
8210 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8211 emit_use (gen_rtx_REG (SImode, PR_REG));
8214 /* Emit code to change the current function's return address to RA.
8215 TEMP is available as a scratch register, if needed. */
8216 void
8217 sh_set_return_address (rtx ra, rtx tmp)
8219 HARD_REG_SET live_regs_mask;
8220 int d;
8221 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8222 int pr_offset;
8224 d = calc_live_regs (&live_regs_mask);
8226 /* If pr_reg isn't life, we can set it (or the register given in
8227 sh_media_register_for_return) directly. */
8228 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8230 rtx rr;
8232 if (TARGET_SHMEDIA)
8234 int rr_regno = sh_media_register_for_return ();
8236 if (rr_regno < 0)
8237 rr_regno = pr_reg;
8239 rr = gen_rtx_REG (DImode, rr_regno);
8241 else
8242 rr = gen_rtx_REG (SImode, pr_reg);
8244 emit_insn (GEN_MOV (rr, ra));
8245 /* Tell flow the register for return isn't dead. */
8246 emit_use (rr);
8247 return;
8250 if (TARGET_SH5)
8252 int offset;
8253 save_schedule schedule;
8254 save_entry *entry;
8256 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8257 offset = entry[1].offset;
8258 for (; entry->mode != VOIDmode; entry--)
8259 if (entry->reg == pr_reg)
8260 goto found;
8262 /* We can't find pr register. */
8263 gcc_unreachable ();
8265 found:
8266 offset = entry->offset - offset;
8267 pr_offset = (rounded_frame_size (d) + offset
8268 + SHMEDIA_REGS_STACK_ADJUST ());
8270 else
8271 pr_offset = rounded_frame_size (d);
8273 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8275 if (frame_pointer_needed)
8276 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8277 else
8278 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8280 tmp = gen_frame_mem (Pmode, tmp);
8281 emit_insn (GEN_MOV (tmp, ra));
8282 /* Tell this store isn't dead. */
8283 emit_use (tmp);
8286 /* Clear variables at function end. */
8287 static void
8288 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8289 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8293 static rtx
8294 sh_builtin_saveregs (void)
8296 /* First unnamed integer register. */
8297 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8298 /* Number of integer registers we need to save. */
8299 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8300 /* First unnamed SFmode float reg */
8301 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8302 /* Number of SFmode float regs to save. */
8303 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8304 rtx regbuf, fpregs;
8305 int bufsize, regno;
8306 alias_set_type alias_set;
8308 if (TARGET_SH5)
8310 if (n_intregs)
8312 int pushregs = n_intregs;
8314 while (pushregs < NPARM_REGS (SImode) - 1
8315 && (CALL_COOKIE_INT_REG_GET
8316 (crtl->args.info.call_cookie,
8317 NPARM_REGS (SImode) - pushregs)
8318 == 1))
8320 crtl->args.info.call_cookie
8321 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8322 - pushregs, 1);
8323 pushregs++;
8326 if (pushregs == NPARM_REGS (SImode))
8327 crtl->args.info.call_cookie
8328 |= (CALL_COOKIE_INT_REG (0, 1)
8329 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8330 else
8331 crtl->args.info.call_cookie
8332 |= CALL_COOKIE_STACKSEQ (pushregs);
8334 crtl->args.pretend_args_size += 8 * n_intregs;
8336 if (TARGET_SHCOMPACT)
8337 return const0_rtx;
8340 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8342 error ("__builtin_saveregs not supported by this subtarget");
8343 return const0_rtx;
8346 if (TARGET_SHMEDIA)
8347 n_floatregs = 0;
8349 /* Allocate block of memory for the regs. */
8350 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8351 Or can assign_stack_local accept a 0 SIZE argument? */
8352 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8354 if (TARGET_SHMEDIA)
8355 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8356 else if (n_floatregs & 1)
8358 rtx addr;
8360 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8361 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8362 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8363 regbuf = change_address (regbuf, BLKmode, addr);
8365 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8367 rtx addr, mask;
8369 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8370 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8371 XEXP (regbuf, 0), 4));
8372 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8373 emit_insn (gen_andsi3 (addr, addr, mask));
8374 regbuf = change_address (regbuf, BLKmode, addr);
8376 else
8377 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8378 alias_set = get_varargs_alias_set ();
8379 set_mem_alias_set (regbuf, alias_set);
8381 /* Save int args.
8382 This is optimized to only save the regs that are necessary. Explicitly
8383 named args need not be saved. */
8384 if (n_intregs > 0)
8385 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8386 adjust_address (regbuf, BLKmode,
8387 n_floatregs * UNITS_PER_WORD),
8388 n_intregs);
8390 if (TARGET_SHMEDIA)
8391 /* Return the address of the regbuf. */
8392 return XEXP (regbuf, 0);
8394 /* Save float args.
8395 This is optimized to only save the regs that are necessary. Explicitly
8396 named args need not be saved.
8397 We explicitly build a pointer to the buffer because it halves the insn
8398 count when not optimizing (otherwise the pointer is built for each reg
8399 saved).
8400 We emit the moves in reverse order so that we can use predecrement. */
8402 fpregs = copy_to_mode_reg (Pmode,
8403 plus_constant (Pmode, XEXP (regbuf, 0),
8404 n_floatregs * UNITS_PER_WORD));
8405 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8407 rtx mem;
8408 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8410 emit_insn (gen_addsi3 (fpregs, fpregs,
8411 GEN_INT (-2 * UNITS_PER_WORD)));
8412 mem = change_address (regbuf, DFmode, fpregs);
8413 emit_move_insn (mem,
8414 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8416 regno = first_floatreg;
8417 if (regno & 1)
8419 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8420 mem = change_address (regbuf, SFmode, fpregs);
8421 emit_move_insn (mem,
8422 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8423 + regno - SH_REG_MSW_OFFSET));
8426 else
8427 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8429 rtx mem;
8431 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8432 mem = change_address (regbuf, SFmode, fpregs);
8433 emit_move_insn (mem,
8434 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8437 /* Return the address of the regbuf. */
8438 return XEXP (regbuf, 0);
8441 /* Define the `__builtin_va_list' type for the ABI. */
8442 static tree
8443 sh_build_builtin_va_list (void)
8445 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8446 tree record, type_decl;
8448 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8449 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8450 return ptr_type_node;
8452 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8453 type_decl = build_decl (BUILTINS_LOCATION,
8454 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8456 f_next_o = build_decl (BUILTINS_LOCATION,
8457 FIELD_DECL, get_identifier ("__va_next_o"),
8458 ptr_type_node);
8459 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8460 FIELD_DECL,
8461 get_identifier ("__va_next_o_limit"),
8462 ptr_type_node);
8463 f_next_fp = build_decl (BUILTINS_LOCATION,
8464 FIELD_DECL, get_identifier ("__va_next_fp"),
8465 ptr_type_node);
8466 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8467 FIELD_DECL,
8468 get_identifier ("__va_next_fp_limit"),
8469 ptr_type_node);
8470 f_next_stack = build_decl (BUILTINS_LOCATION,
8471 FIELD_DECL, get_identifier ("__va_next_stack"),
8472 ptr_type_node);
8474 DECL_FIELD_CONTEXT (f_next_o) = record;
8475 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8476 DECL_FIELD_CONTEXT (f_next_fp) = record;
8477 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8478 DECL_FIELD_CONTEXT (f_next_stack) = record;
8480 TYPE_STUB_DECL (record) = type_decl;
8481 TYPE_NAME (record) = type_decl;
8482 TYPE_FIELDS (record) = f_next_o;
8483 DECL_CHAIN (f_next_o) = f_next_o_limit;
8484 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8485 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8486 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8488 layout_type (record);
8490 return record;
8493 /* Implement `va_start' for varargs and stdarg. */
8494 static void
8495 sh_va_start (tree valist, rtx nextarg)
8497 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8498 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8499 tree t, u;
8500 int nfp, nint;
8502 if (TARGET_SH5)
8504 expand_builtin_saveregs ();
8505 std_expand_builtin_va_start (valist, nextarg);
8506 return;
8509 if ((! TARGET_SH2E && ! TARGET_SH4)
8510 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8512 std_expand_builtin_va_start (valist, nextarg);
8513 return;
8516 f_next_o = TYPE_FIELDS (va_list_type_node);
8517 f_next_o_limit = DECL_CHAIN (f_next_o);
8518 f_next_fp = DECL_CHAIN (f_next_o_limit);
8519 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8520 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8522 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8523 NULL_TREE);
8524 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8525 valist, f_next_o_limit, NULL_TREE);
8526 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8527 NULL_TREE);
8528 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8529 valist, f_next_fp_limit, NULL_TREE);
8530 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8531 valist, f_next_stack, NULL_TREE);
8533 /* Call __builtin_saveregs. */
8534 u = make_tree (sizetype, expand_builtin_saveregs ());
8535 u = fold_convert (ptr_type_node, u);
8536 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8537 TREE_SIDE_EFFECTS (t) = 1;
8538 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8540 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8541 if (nfp < 8)
8542 nfp = 8 - nfp;
8543 else
8544 nfp = 0;
8545 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8546 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8547 TREE_SIDE_EFFECTS (t) = 1;
8548 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8550 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8551 TREE_SIDE_EFFECTS (t) = 1;
8552 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8554 nint = crtl->args.info.arg_count[SH_ARG_INT];
8555 if (nint < 4)
8556 nint = 4 - nint;
8557 else
8558 nint = 0;
8559 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8560 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8561 TREE_SIDE_EFFECTS (t) = 1;
8562 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8564 u = make_tree (ptr_type_node, nextarg);
8565 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8566 TREE_SIDE_EFFECTS (t) = 1;
8567 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8570 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8571 member, return it. */
8572 static tree
8573 find_sole_member (tree type)
8575 tree field, member = NULL_TREE;
8577 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8579 if (TREE_CODE (field) != FIELD_DECL)
8580 continue;
8581 if (!DECL_SIZE (field))
8582 return NULL_TREE;
8583 if (integer_zerop (DECL_SIZE (field)))
8584 continue;
8585 if (member)
8586 return NULL_TREE;
8587 member = field;
8589 return member;
8592 /* Implement `va_arg'. */
8593 static tree
8594 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8595 gimple_seq *post_p ATTRIBUTE_UNUSED)
8597 HOST_WIDE_INT size, rsize;
8598 tree tmp, pptr_type_node;
8599 tree addr, lab_over = NULL, result = NULL;
8600 bool pass_by_ref;
8601 tree eff_type;
8603 if (!VOID_TYPE_P (type))
8604 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8605 else
8606 pass_by_ref = false;
8608 if (pass_by_ref)
8609 type = build_pointer_type (type);
8611 size = int_size_in_bytes (type);
8612 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8613 pptr_type_node = build_pointer_type (ptr_type_node);
8615 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8616 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8618 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8619 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8620 int pass_as_float;
8621 tree lab_false;
8622 tree member;
8624 f_next_o = TYPE_FIELDS (va_list_type_node);
8625 f_next_o_limit = DECL_CHAIN (f_next_o);
8626 f_next_fp = DECL_CHAIN (f_next_o_limit);
8627 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8628 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8630 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8631 NULL_TREE);
8632 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8633 valist, f_next_o_limit, NULL_TREE);
8634 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8635 valist, f_next_fp, NULL_TREE);
8636 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8637 valist, f_next_fp_limit, NULL_TREE);
8638 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8639 valist, f_next_stack, NULL_TREE);
8641 /* Structures with a single member with a distinct mode are passed
8642 like their member. This is relevant if the latter has a REAL_TYPE
8643 or COMPLEX_TYPE type. */
8644 eff_type = type;
8645 while (TREE_CODE (eff_type) == RECORD_TYPE
8646 && (member = find_sole_member (eff_type))
8647 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8648 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8649 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8651 tree field_type = TREE_TYPE (member);
8653 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8654 eff_type = field_type;
8655 else
8657 gcc_assert ((TYPE_ALIGN (eff_type)
8658 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8659 || (TYPE_ALIGN (eff_type)
8660 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8661 break;
8665 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8667 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8668 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8669 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8670 && size <= 16));
8672 else
8674 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8677 addr = create_tmp_var (pptr_type_node);
8678 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8679 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8681 valist = build_simple_mem_ref (addr);
8683 if (pass_as_float)
8685 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
8686 tree cmp;
8687 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8689 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8690 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8692 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8693 tmp = next_fp_limit;
8694 if (size > 4 && !is_double)
8695 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8696 tmp = build2 (GE_EXPR, boolean_type_node,
8697 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8698 cmp = build3 (COND_EXPR, void_type_node, tmp,
8699 build1 (GOTO_EXPR, void_type_node,
8700 unshare_expr (lab_false)), NULL_TREE);
8701 if (!is_double)
8702 gimplify_and_add (cmp, pre_p);
8704 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8705 || (is_double || size == 16))
8707 tmp = fold_convert (sizetype, next_fp_tmp);
8708 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8709 size_int (UNITS_PER_WORD));
8710 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8711 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8713 if (is_double)
8714 gimplify_and_add (cmp, pre_p);
8716 #ifdef FUNCTION_ARG_SCmode_WART
8717 if (TYPE_MODE (eff_type) == SCmode
8718 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8720 tree subtype = TREE_TYPE (eff_type);
8721 tree real, imag;
8723 imag
8724 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8725 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8727 real
8728 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8729 real = get_initialized_tmp_var (real, pre_p, NULL);
8731 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8732 if (type != eff_type)
8733 result = build1 (VIEW_CONVERT_EXPR, type, result);
8734 result = get_initialized_tmp_var (result, pre_p, NULL);
8736 #endif /* FUNCTION_ARG_SCmode_WART */
8738 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8739 gimplify_and_add (tmp, pre_p);
8741 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8742 gimplify_and_add (tmp, pre_p);
8744 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8745 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8746 gimplify_assign (unshare_expr (next_fp_tmp),
8747 unshare_expr (valist), pre_p);
8749 gimplify_assign (unshare_expr (valist),
8750 unshare_expr (next_fp_tmp), post_p);
8751 valist = next_fp_tmp;
8753 else
8755 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8756 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8757 unshare_expr (next_o_limit));
8758 tmp = build3 (COND_EXPR, void_type_node, tmp,
8759 build1 (GOTO_EXPR, void_type_node,
8760 unshare_expr (lab_false)),
8761 NULL_TREE);
8762 gimplify_and_add (tmp, pre_p);
8764 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8765 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8767 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8768 gimplify_and_add (tmp, pre_p);
8770 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8771 gimplify_and_add (tmp, pre_p);
8773 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8774 gimplify_assign (unshare_expr (next_o),
8775 unshare_expr (next_o_limit), pre_p);
8777 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8778 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8781 if (!result)
8783 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8784 gimplify_and_add (tmp, pre_p);
8788 /* ??? In va-sh.h, there had been code to make values larger than
8789 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8791 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8792 if (result)
8794 gimplify_assign (result, tmp, pre_p);
8795 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8796 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8797 gimplify_and_add (tmp, pre_p);
8799 else
8800 result = tmp;
8802 if (pass_by_ref)
8803 result = build_va_arg_indirect_ref (result);
8805 return result;
8808 /* 64 bit floating points memory transfers are paired single precision loads
8809 or store. So DWARF information needs fixing in little endian (unless
8810 PR=SZ=1 in FPSCR). */
8812 sh_dwarf_register_span (rtx reg)
8814 unsigned regno = REGNO (reg);
8816 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8817 return NULL_RTX;
8819 return
8820 gen_rtx_PARALLEL (VOIDmode,
8821 gen_rtvec (2,
8822 gen_rtx_REG (SFmode, regno + 1),
8823 gen_rtx_REG (SFmode, regno)));
8826 static machine_mode
8827 sh_promote_function_mode (const_tree type, machine_mode mode,
8828 int *punsignedp, const_tree funtype,
8829 int for_return)
8831 if (sh_promote_prototypes (funtype))
8832 return promote_mode (type, mode, punsignedp);
8833 else
8834 return default_promote_function_mode (type, mode, punsignedp, funtype,
8835 for_return);
8838 static bool
8839 sh_promote_prototypes (const_tree type)
8841 if (TARGET_HITACHI)
8842 return false;
8843 if (! type)
8844 return true;
8845 return ! sh_attr_renesas_p (type);
8848 /* Whether an argument must be passed by reference. On SHcompact, we
8849 pretend arguments wider than 32-bits that would have been passed in
8850 registers are passed by reference, so that an SHmedia trampoline
8851 loads them into the full 64-bits registers. */
8852 static int
8853 shcompact_byref (const CUMULATIVE_ARGS *cum, machine_mode mode,
8854 const_tree type, bool named)
8856 unsigned HOST_WIDE_INT size;
8858 if (type)
8859 size = int_size_in_bytes (type);
8860 else
8861 size = GET_MODE_SIZE (mode);
8863 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8864 && (!named
8865 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8866 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8867 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8868 && size > 4
8869 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8870 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8871 return size;
8872 else
8873 return 0;
8876 static bool
8877 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8878 const_tree type, bool named)
8880 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8882 if (targetm.calls.must_pass_in_stack (mode, type))
8883 return true;
8885 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8886 wants to know about pass-by-reference semantics for incoming
8887 arguments. */
8888 if (! cum)
8889 return false;
8891 if (TARGET_SHCOMPACT)
8893 cum->byref = shcompact_byref (cum, mode, type, named);
8894 return cum->byref != 0;
8897 return false;
8900 static bool
8901 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
8902 const_tree type, bool named ATTRIBUTE_UNUSED)
8904 /* ??? How can it possibly be correct to return true only on the
8905 caller side of the equation? Is there someplace else in the
8906 sh backend that's magically producing the copies? */
8907 return (get_cumulative_args (cum)->outgoing
8908 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8909 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8912 /* Round a register number up to a proper boundary for an arg of mode
8913 MODE.
8914 The SH doesn't care about double alignment, so we only
8915 round doubles to even regs when asked to explicitly. */
8916 static int
8917 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
8919 /* FIXME: This used to be a macro and has been copy pasted into this
8920 function as is. Make this more readable. */
8921 return
8922 (((TARGET_ALIGN_DOUBLE
8923 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
8924 && (mode == DFmode || mode == DCmode)
8925 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
8926 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
8927 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
8928 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
8929 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
8932 /* Return true if arg of the specified mode should be be passed in a register
8933 or false otherwise. */
8934 static bool
8935 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
8936 const_tree type)
8938 /* FIXME: This used to be a macro and has been copy pasted into this
8939 function as is. Make this more readable. */
8940 return
8941 ((type == 0
8942 || (! TREE_ADDRESSABLE (type)
8943 && (! (TARGET_HITACHI || cum.renesas_abi)
8944 || ! (AGGREGATE_TYPE_P (type)
8945 || (!TARGET_FPU_ANY
8946 && (GET_MODE_CLASS (mode) == MODE_FLOAT
8947 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
8948 && ! cum.force_mem
8949 && (TARGET_SH2E
8950 ? ((mode) == BLKmode
8951 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
8952 + int_size_in_bytes (type))
8953 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
8954 : ((sh_round_reg (cum, mode)
8955 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
8956 <= NPARM_REGS (mode)))
8957 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
8960 static int
8961 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
8962 tree type, bool named ATTRIBUTE_UNUSED)
8964 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8965 int words = 0;
8967 if (!TARGET_SH5
8968 && sh_pass_in_reg_p (*cum, mode, type)
8969 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8970 && (sh_round_reg (*cum, mode)
8971 + (mode != BLKmode
8972 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
8973 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8974 > NPARM_REGS (mode)))
8975 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8977 else if (!TARGET_SHCOMPACT
8978 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8979 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8981 return words * UNITS_PER_WORD;
8985 /* Define where to put the arguments to a function.
8986 Value is zero to push the argument on the stack,
8987 or a hard register in which to store the argument.
8989 MODE is the argument's machine mode.
8990 TYPE is the data type of the argument (as a tree).
8991 This is null for libcalls where that information may
8992 not be available.
8993 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8994 the preceding args and about the function being called.
8995 NAMED is nonzero if this argument is a named parameter
8996 (otherwise it is an extra parameter matching an ellipsis).
8998 On SH the first args are normally in registers
8999 and the rest are pushed. Any arg that starts within the first
9000 NPARM_REGS words is at least partially passed in a register unless
9001 its data type forbids. */
9002 static rtx
9003 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
9004 const_tree type, bool named)
9006 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9008 if (! TARGET_SH5 && mode == VOIDmode)
9009 return GEN_INT (ca->renesas_abi ? 1 : 0);
9011 if (! TARGET_SH5
9012 && sh_pass_in_reg_p (*ca, mode, type)
9013 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
9015 int regno;
9017 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
9018 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
9020 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
9021 gen_rtx_REG (SFmode,
9022 BASE_ARG_REG (mode)
9023 + (sh_round_reg (*ca, mode) ^ 1)),
9024 const0_rtx);
9025 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
9026 gen_rtx_REG (SFmode,
9027 BASE_ARG_REG (mode)
9028 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
9029 GEN_INT (4));
9030 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
9033 /* If the alignment of a DF value causes an SF register to be
9034 skipped, we will use that skipped register for the next SF
9035 value. */
9036 if ((TARGET_HITACHI || ca->renesas_abi)
9037 && ca->free_single_fp_reg
9038 && mode == SFmode)
9039 return gen_rtx_REG (mode, ca->free_single_fp_reg);
9041 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
9042 ^ (mode == SFmode && TARGET_SH4
9043 && TARGET_LITTLE_ENDIAN
9044 && ! TARGET_HITACHI && ! ca->renesas_abi);
9045 return gen_rtx_REG (mode, regno);
9049 if (TARGET_SH5)
9051 if (mode == VOIDmode && TARGET_SHCOMPACT)
9052 return GEN_INT (ca->call_cookie);
9054 /* The following test assumes unnamed arguments are promoted to
9055 DFmode. */
9056 if (mode == SFmode && ca->free_single_fp_reg)
9057 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
9059 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9060 && (named || ! ca->prototype_p)
9061 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9063 if (! ca->prototype_p && TARGET_SHMEDIA)
9064 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9066 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9067 FIRST_FP_PARM_REG
9068 + ca->arg_count[(int) SH_ARG_FLOAT]);
9071 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9072 && (! TARGET_SHCOMPACT
9073 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9074 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9075 type, named))))
9077 return gen_rtx_REG (mode, (FIRST_PARM_REG
9078 + ca->arg_count[(int) SH_ARG_INT]));
9081 return NULL_RTX;
9084 return NULL_RTX;
9087 /* Update the data in CUM to advance over an argument
9088 of mode MODE and data type TYPE.
9089 (TYPE is null for libcalls where that information may not be
9090 available.) */
9091 static void
9092 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
9093 const_tree type, bool named)
9095 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9097 if (ca->force_mem)
9098 ca->force_mem = 0;
9099 else if (TARGET_SH5)
9101 const_tree type2 = (ca->byref && type
9102 ? TREE_TYPE (type)
9103 : type);
9104 machine_mode mode2 = (ca->byref && type
9105 ? TYPE_MODE (type2)
9106 : mode);
9107 int dwords = ((ca->byref
9108 ? ca->byref
9109 : mode2 == BLKmode
9110 ? int_size_in_bytes (type2)
9111 : GET_MODE_SIZE (mode2)) + 7) / 8;
9112 int numregs = MIN (dwords, NPARM_REGS (SImode)
9113 - ca->arg_count[(int) SH_ARG_INT]);
9115 if (numregs)
9117 ca->arg_count[(int) SH_ARG_INT] += numregs;
9118 if (TARGET_SHCOMPACT
9119 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9121 ca->call_cookie
9122 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9123 - numregs, 1);
9124 /* N.B. We want this also for outgoing. */
9125 ca->stack_regs += numregs;
9127 else if (ca->byref)
9129 if (! ca->outgoing)
9130 ca->stack_regs += numregs;
9131 ca->byref_regs += numregs;
9132 ca->byref = 0;
9134 ca->call_cookie
9135 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9136 - numregs, 2);
9137 while (--numregs);
9138 ca->call_cookie
9139 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9140 - 1, 1);
9142 else if (dwords > numregs)
9144 int pushregs = numregs;
9146 if (TARGET_SHCOMPACT)
9147 ca->stack_regs += numregs;
9148 while (pushregs < NPARM_REGS (SImode) - 1
9149 && (CALL_COOKIE_INT_REG_GET
9150 (ca->call_cookie,
9151 NPARM_REGS (SImode) - pushregs)
9152 == 1))
9154 ca->call_cookie
9155 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9156 - pushregs, 1);
9157 pushregs++;
9159 if (numregs == NPARM_REGS (SImode))
9160 ca->call_cookie
9161 |= CALL_COOKIE_INT_REG (0, 1)
9162 | CALL_COOKIE_STACKSEQ (numregs - 1);
9163 else
9164 ca->call_cookie
9165 |= CALL_COOKIE_STACKSEQ (numregs);
9168 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9169 && (named || ! ca->prototype_p))
9171 if (mode2 == SFmode && ca->free_single_fp_reg)
9172 ca->free_single_fp_reg = 0;
9173 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9174 < NPARM_REGS (SFmode))
9176 int numfpregs
9177 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9178 NPARM_REGS (SFmode)
9179 - ca->arg_count[(int) SH_ARG_FLOAT]);
9181 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9183 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9185 if (ca->outgoing && numregs > 0)
9188 ca->call_cookie
9189 |= (CALL_COOKIE_INT_REG
9190 (ca->arg_count[(int) SH_ARG_INT]
9191 - numregs + ((numfpregs - 2) / 2),
9192 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9193 - numfpregs) / 2));
9195 while (numfpregs -= 2);
9197 else if (mode2 == SFmode && (named)
9198 && (ca->arg_count[(int) SH_ARG_FLOAT]
9199 < NPARM_REGS (SFmode)))
9200 ca->free_single_fp_reg
9201 = FIRST_FP_PARM_REG - numfpregs
9202 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9205 return;
9208 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9210 /* Note that we've used the skipped register. */
9211 if (mode == SFmode && ca->free_single_fp_reg)
9213 ca->free_single_fp_reg = 0;
9214 return;
9216 /* When we have a DF after an SF, there's an SF register that get
9217 skipped in order to align the DF value. We note this skipped
9218 register, because the next SF value will use it, and not the
9219 SF that follows the DF. */
9220 if (mode == DFmode
9221 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9223 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9224 + BASE_ARG_REG (mode));
9228 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9229 || sh_pass_in_reg_p (*ca, mode, type))
9230 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9231 = (sh_round_reg (*ca, mode)
9232 + (mode == BLKmode
9233 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9234 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9237 /* The Renesas calling convention doesn't quite fit into this scheme since
9238 the address is passed like an invisible argument, but one that is always
9239 passed in memory. */
9240 static rtx
9241 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9243 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9244 return NULL_RTX;
9245 return gen_rtx_REG (Pmode, 2);
9248 /* Worker function for TARGET_FUNCTION_VALUE.
9250 For the SH, this is like LIBCALL_VALUE, except that we must change the
9251 mode like PROMOTE_MODE does.
9252 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9253 tested here has to be kept in sync with the one in
9254 explow.c:promote_mode. */
9255 static rtx
9256 sh_function_value (const_tree valtype,
9257 const_tree fn_decl_or_type,
9258 bool outgoing ATTRIBUTE_UNUSED)
9260 if (fn_decl_or_type
9261 && !DECL_P (fn_decl_or_type))
9262 fn_decl_or_type = NULL;
9264 return gen_rtx_REG (
9265 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9266 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9267 && (TREE_CODE (valtype) == INTEGER_TYPE
9268 || TREE_CODE (valtype) == ENUMERAL_TYPE
9269 || TREE_CODE (valtype) == BOOLEAN_TYPE
9270 || TREE_CODE (valtype) == REAL_TYPE
9271 || TREE_CODE (valtype) == OFFSET_TYPE))
9272 && sh_promote_prototypes (fn_decl_or_type)
9273 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9274 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9277 /* Worker function for TARGET_LIBCALL_VALUE. */
9278 static rtx
9279 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9281 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9284 /* Return true if N is a possible register number of function value. */
9285 static bool
9286 sh_function_value_regno_p (const unsigned int regno)
9288 return ((regno) == FIRST_RET_REG
9289 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9290 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9293 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9294 static bool
9295 sh_return_in_memory (const_tree type, const_tree fndecl)
9297 if (TARGET_SH5)
9299 if (TYPE_MODE (type) == BLKmode)
9300 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9301 else
9302 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9304 else
9306 return (TYPE_MODE (type) == BLKmode
9307 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9308 && TREE_CODE (type) == RECORD_TYPE));
9312 /* We actually emit the code in sh_expand_prologue. We used to use
9313 a static variable to flag that we need to emit this code, but that
9314 doesn't when inlining, when functions are deferred and then emitted
9315 later. Fortunately, we already have two flags that are part of struct
9316 function that tell if a function uses varargs or stdarg. */
9317 static void
9318 sh_setup_incoming_varargs (cumulative_args_t ca,
9319 machine_mode mode,
9320 tree type,
9321 int *pretend_arg_size,
9322 int second_time ATTRIBUTE_UNUSED)
9324 gcc_assert (cfun->stdarg);
9325 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9327 int named_parm_regs, anon_parm_regs;
9329 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9330 + (mode == BLKmode
9331 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9332 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9333 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9334 if (anon_parm_regs > 0)
9335 *pretend_arg_size = anon_parm_regs * 4;
9339 static bool
9340 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9342 return TARGET_SH5;
9345 static bool
9346 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9348 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9350 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9354 /* Define the offset between two registers, one to be eliminated, and
9355 the other its replacement, at the start of a routine. */
9357 initial_elimination_offset (int from, int to)
9359 int regs_saved;
9360 int regs_saved_rounding = 0;
9361 int total_saved_regs_space;
9362 int total_auto_space;
9363 int save_flags = target_flags;
9364 int copy_flags;
9365 HARD_REG_SET live_regs_mask;
9367 shmedia_space_reserved_for_target_registers = false;
9368 regs_saved = calc_live_regs (&live_regs_mask);
9369 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9371 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9373 shmedia_space_reserved_for_target_registers = true;
9374 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9377 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9378 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9379 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9381 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9382 copy_flags = target_flags;
9383 target_flags = save_flags;
9385 total_saved_regs_space = regs_saved + regs_saved_rounding;
9387 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9388 return total_saved_regs_space + total_auto_space
9389 + crtl->args.info.byref_regs * 8;
9391 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9392 return total_saved_regs_space + total_auto_space
9393 + crtl->args.info.byref_regs * 8;
9395 /* Initial gap between fp and sp is 0. */
9396 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9397 return 0;
9399 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9400 return rounded_frame_size (0);
9402 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9403 return rounded_frame_size (0);
9405 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9406 && (to == HARD_FRAME_POINTER_REGNUM
9407 || to == STACK_POINTER_REGNUM));
9408 if (TARGET_SH5)
9410 int n = total_saved_regs_space;
9411 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9412 save_schedule schedule;
9413 save_entry *entry;
9415 n += total_auto_space;
9417 /* If it wasn't saved, there's not much we can do. */
9418 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9419 return n;
9421 target_flags = copy_flags;
9423 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9424 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9425 if (entry->reg == pr_reg)
9427 target_flags = save_flags;
9428 return entry->offset;
9430 gcc_unreachable ();
9432 else
9433 return total_auto_space;
9436 /* Parse the -mfixed-range= option string. */
9437 void
9438 sh_fix_range (const char *const_str)
9440 int i, first, last;
9441 char *str, *dash, *comma;
9443 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9444 REG2 are either register names or register numbers. The effect
9445 of this option is to mark the registers in the range from REG1 to
9446 REG2 as ``fixed'' so they won't be used by the compiler. */
9448 i = strlen (const_str);
9449 str = (char *) alloca (i + 1);
9450 memcpy (str, const_str, i + 1);
9452 while (1)
9454 dash = strchr (str, '-');
9455 if (!dash)
9457 warning (0, "value of -mfixed-range must have form REG1-REG2");
9458 return;
9460 *dash = '\0';
9461 comma = strchr (dash + 1, ',');
9462 if (comma)
9463 *comma = '\0';
9465 first = decode_reg_name (str);
9466 if (first < 0)
9468 warning (0, "unknown register name: %s", str);
9469 return;
9472 last = decode_reg_name (dash + 1);
9473 if (last < 0)
9475 warning (0, "unknown register name: %s", dash + 1);
9476 return;
9479 *dash = '-';
9481 if (first > last)
9483 warning (0, "%s-%s is an empty range", str, dash + 1);
9484 return;
9487 for (i = first; i <= last; ++i)
9488 fixed_regs[i] = call_used_regs[i] = 1;
9490 if (!comma)
9491 break;
9493 *comma = ',';
9494 str = comma + 1;
9498 /* Insert any deferred function attributes from earlier pragmas. */
9499 static void
9500 sh_insert_attributes (tree node, tree *attributes)
9502 tree attrs;
9504 if (TREE_CODE (node) != FUNCTION_DECL)
9505 return;
9507 /* We are only interested in fields. */
9508 if (!DECL_P (node))
9509 return;
9511 /* Append the attributes to the deferred attributes. */
9512 *sh_deferred_function_attributes_tail = *attributes;
9513 attrs = sh_deferred_function_attributes;
9514 if (!attrs)
9515 return;
9517 /* Some attributes imply or require the interrupt attribute. */
9518 if (!lookup_attribute ("interrupt_handler", attrs)
9519 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9521 /* If we have a trapa_handler, but no interrupt_handler attribute,
9522 insert an interrupt_handler attribute. */
9523 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9524 /* We can't use sh_pr_interrupt here because that's not in the
9525 java frontend. */
9526 attrs
9527 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9528 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9529 if the interrupt attribute is missing, we ignore the attribute
9530 and warn. */
9531 else if (lookup_attribute ("sp_switch", attrs)
9532 || lookup_attribute ("trap_exit", attrs)
9533 || lookup_attribute ("nosave_low_regs", attrs)
9534 || lookup_attribute ("resbank", attrs))
9536 tree *tail;
9538 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9540 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9541 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9542 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9543 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9544 warning (OPT_Wattributes,
9545 "%qE attribute only applies to interrupt functions",
9546 TREE_PURPOSE (attrs));
9547 else
9549 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9550 NULL_TREE);
9551 tail = &TREE_CHAIN (*tail);
9554 attrs = *attributes;
9558 /* Install the processed list. */
9559 *attributes = attrs;
9561 /* Clear deferred attributes. */
9562 sh_deferred_function_attributes = NULL_TREE;
9563 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9565 return;
9568 /*------------------------------------------------------------------------------
9569 Target specific attributes
9570 Supported attributes are:
9572 * interrupt_handler
9573 Specifies this function is an interrupt handler.
9575 * trapa_handler
9576 Like interrupt_handler, but don't save all registers.
9578 * sp_switch
9579 Specifies an alternate stack for an interrupt handler to run on.
9581 * trap_exit
9582 Use a trapa to exit an interrupt function instead of rte.
9584 * nosave_low_regs
9585 Don't save r0..r7 in an interrupt handler function.
9586 This is useful on SH3* and SH4*, which have a separate set of low
9587 regs for user and privileged modes.
9588 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9589 those that run with interrupts disabled and thus can't be
9590 interrupted thenselves).
9592 * renesas
9593 Use Renesas calling/layout conventions (functions and structures).
9595 * resbank
9596 In case of an interrupt handler function, use a register bank to
9597 save registers R0-R14, MACH, MACL, GBR and PR.
9598 This is available only on SH2A targets.
9600 * function_vector
9601 Declares a function to be called using the TBR relative addressing
9602 mode. Takes an argument that specifies the slot number in the table
9603 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9606 /* Handle a 'resbank' attribute. */
9607 static tree
9608 sh_handle_resbank_handler_attribute (tree * node, tree name,
9609 tree args ATTRIBUTE_UNUSED,
9610 int flags ATTRIBUTE_UNUSED,
9611 bool * no_add_attrs)
9613 if (!TARGET_SH2A)
9615 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9616 name);
9617 *no_add_attrs = true;
9619 if (TREE_CODE (*node) != FUNCTION_DECL)
9621 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9622 name);
9623 *no_add_attrs = true;
9626 return NULL_TREE;
9629 /* Handle an "interrupt_handler" attribute; arguments as in
9630 struct attribute_spec.handler. */
9631 static tree
9632 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9633 tree args ATTRIBUTE_UNUSED,
9634 int flags ATTRIBUTE_UNUSED,
9635 bool *no_add_attrs)
9637 if (TREE_CODE (*node) != FUNCTION_DECL)
9639 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9640 name);
9641 *no_add_attrs = true;
9643 else if (TARGET_SHCOMPACT)
9645 error ("attribute interrupt_handler is not compatible with -m5-compact");
9646 *no_add_attrs = true;
9649 return NULL_TREE;
9652 /* Handle an 'function_vector' attribute; arguments as in
9653 struct attribute_spec.handler. */
9654 static tree
9655 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9656 tree args ATTRIBUTE_UNUSED,
9657 int flags ATTRIBUTE_UNUSED,
9658 bool * no_add_attrs)
9660 if (!TARGET_SH2A)
9662 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9663 name);
9664 *no_add_attrs = true;
9666 else if (TREE_CODE (*node) != FUNCTION_DECL)
9668 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9669 name);
9670 *no_add_attrs = true;
9672 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9674 /* The argument must be a constant integer. */
9675 warning (OPT_Wattributes,
9676 "%qE attribute argument not an integer constant",
9677 name);
9678 *no_add_attrs = true;
9680 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9682 /* The argument value must be between 0 to 255. */
9683 warning (OPT_Wattributes,
9684 "%qE attribute argument should be between 0 to 255",
9685 name);
9686 *no_add_attrs = true;
9688 return NULL_TREE;
9691 /* Returns true if current function has been assigned the attribute
9692 'function_vector'. */
9693 bool
9694 sh2a_is_function_vector_call (rtx x)
9696 if (GET_CODE (x) == SYMBOL_REF
9697 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9699 tree tr = SYMBOL_REF_DECL (x);
9701 if (sh2a_function_vector_p (tr))
9702 return true;
9705 return false;
9708 /* Returns the function vector number, if the attribute
9709 'function_vector' is assigned, otherwise returns zero. */
9711 sh2a_get_function_vector_number (rtx x)
9713 int num;
9714 tree list, t;
9716 if ((GET_CODE (x) == SYMBOL_REF)
9717 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9719 t = SYMBOL_REF_DECL (x);
9721 if (TREE_CODE (t) != FUNCTION_DECL)
9722 return 0;
9724 list = SH_ATTRIBUTES (t);
9725 while (list)
9727 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9729 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9730 return num;
9733 list = TREE_CHAIN (list);
9736 return 0;
9738 else
9739 return 0;
9742 /* Handle an "sp_switch" attribute; arguments as in
9743 struct attribute_spec.handler. */
9744 static tree
9745 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9746 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9748 if (TREE_CODE (*node) != FUNCTION_DECL)
9750 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9751 name);
9752 *no_add_attrs = true;
9754 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9756 /* The argument must be a constant string. */
9757 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9758 name);
9759 *no_add_attrs = true;
9762 return NULL_TREE;
9765 /* Handle an "trap_exit" attribute; arguments as in
9766 struct attribute_spec.handler. */
9767 static tree
9768 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9769 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9771 if (TREE_CODE (*node) != FUNCTION_DECL)
9773 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9774 name);
9775 *no_add_attrs = true;
9777 /* The argument specifies a trap number to be used in a trapa instruction
9778 at function exit (instead of an rte instruction). */
9779 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9781 /* The argument must be a constant integer. */
9782 warning (OPT_Wattributes, "%qE attribute argument not an "
9783 "integer constant", name);
9784 *no_add_attrs = true;
9787 return NULL_TREE;
9790 static tree
9791 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9792 tree name ATTRIBUTE_UNUSED,
9793 tree args ATTRIBUTE_UNUSED,
9794 int flags ATTRIBUTE_UNUSED,
9795 bool *no_add_attrs ATTRIBUTE_UNUSED)
9797 return NULL_TREE;
9800 /* True if __attribute__((renesas)) or -mrenesas. */
9801 bool
9802 sh_attr_renesas_p (const_tree td)
9804 if (TARGET_HITACHI)
9805 return true;
9806 if (td == NULL_TREE)
9807 return false;
9808 if (DECL_P (td))
9809 td = TREE_TYPE (td);
9810 if (td == error_mark_node)
9811 return false;
9812 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9813 != NULL_TREE);
9816 /* True if __attribute__((renesas)) or -mrenesas, for the current
9817 function. */
9818 bool
9819 sh_cfun_attr_renesas_p (void)
9821 return sh_attr_renesas_p (current_function_decl);
9824 /* Returns true if the current function has the "interrupt_handler"
9825 attribute set. */
9826 bool
9827 sh_cfun_interrupt_handler_p (void)
9829 return (lookup_attribute ("interrupt_handler",
9830 DECL_ATTRIBUTES (current_function_decl))
9831 != NULL_TREE);
9834 /* Returns true if FUNC has been assigned the attribute
9835 "function_vector". */
9836 bool
9837 sh2a_function_vector_p (tree func)
9839 tree list;
9840 if (TREE_CODE (func) != FUNCTION_DECL)
9841 return false;
9843 list = SH_ATTRIBUTES (func);
9844 while (list)
9846 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9847 return true;
9849 list = TREE_CHAIN (list);
9851 return false;
9854 /* Returns true if given tree has the "resbank" attribute set. */
9855 bool
9856 sh_cfun_resbank_handler_p (void)
9858 return ((lookup_attribute ("resbank",
9859 DECL_ATTRIBUTES (current_function_decl))
9860 != NULL_TREE)
9861 && (lookup_attribute ("interrupt_handler",
9862 DECL_ATTRIBUTES (current_function_decl))
9863 != NULL_TREE) && TARGET_SH2A);
9866 /* Returns true if the current function has a "trap_exit" attribute set. */
9867 bool
9868 sh_cfun_trap_exit_p (void)
9870 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9871 != NULL_TREE;
9874 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9875 static const char *
9876 sh_check_pch_target_flags (int old_flags)
9878 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9879 | MASK_SH_E | MASK_HARD_SH4
9880 | MASK_FPU_SINGLE | MASK_SH4))
9881 return _("created and used with different architectures / ABIs");
9882 if ((old_flags ^ target_flags) & MASK_HITACHI)
9883 return _("created and used with different ABIs");
9884 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9885 return _("created and used with different endianness");
9886 return NULL;
9889 /* Predicates used by the templates. */
9891 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9892 Used only in general_movsrc_operand. */
9893 bool
9894 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9896 switch (REGNO (op))
9898 case PR_REG:
9899 case MACL_REG:
9900 case MACH_REG:
9901 return true;
9903 return false;
9906 /* Returns true if OP is a floating point value with value 0.0. */
9907 bool
9908 fp_zero_operand (rtx op)
9910 REAL_VALUE_TYPE r;
9912 if (GET_MODE (op) != SFmode)
9913 return false;
9915 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9916 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9919 /* Returns true if OP is a floating point value with value 1.0. */
9920 bool
9921 fp_one_operand (rtx op)
9923 REAL_VALUE_TYPE r;
9925 if (GET_MODE (op) != SFmode)
9926 return false;
9928 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9929 return REAL_VALUES_EQUAL (r, dconst1);
9932 /* Return the TLS type for TLS symbols. */
9933 enum tls_model
9934 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9936 if (GET_CODE (op) != SYMBOL_REF)
9937 return TLS_MODEL_NONE;
9938 return SYMBOL_REF_TLS_MODEL (op);
9941 /* Return the destination address of a branch. */
9942 static int
9943 branch_dest (rtx branch)
9945 rtx dest = SET_SRC (PATTERN (branch));
9946 int dest_uid;
9948 if (GET_CODE (dest) == IF_THEN_ELSE)
9949 dest = XEXP (dest, 1);
9950 dest = XEXP (dest, 0);
9951 dest_uid = INSN_UID (dest);
9952 return INSN_ADDRESSES (dest_uid);
9955 /* Return nonzero if REG is not used after INSN.
9956 We assume REG is a reload reg, and therefore does
9957 not live past labels. It may live past calls or jumps though. */
9958 bool
9959 reg_unused_after (rtx reg, rtx_insn *insn)
9961 enum rtx_code code;
9962 rtx set;
9964 /* If the reg is set by this instruction, then it is safe for our
9965 case. Disregard the case where this is a store to memory, since
9966 we are checking a register used in the store address. */
9967 set = single_set (insn);
9968 if (set && !MEM_P (SET_DEST (set))
9969 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9970 return true;
9972 while ((insn = NEXT_INSN (insn)))
9974 rtx set;
9975 if (!INSN_P (insn))
9976 continue;
9978 code = GET_CODE (insn);
9980 #if 0
9981 /* If this is a label that existed before reload, then the register
9982 is dead here. However, if this is a label added by reorg, then
9983 the register may still be live here. We can't tell the difference,
9984 so we just ignore labels completely. */
9985 if (code == CODE_LABEL)
9986 return 1;
9987 /* else */
9988 #endif
9990 if (code == JUMP_INSN)
9991 return false;
9993 /* If this is a sequence, we must handle them all at once.
9994 We could have for instance a call that sets the target register,
9995 and an insn in a delay slot that uses the register. In this case,
9996 we must return 0. */
9997 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9999 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
10000 int i;
10001 int retval = 0;
10003 for (i = 0; i < seq->len (); i++)
10005 rtx_insn *this_insn = seq->insn (i);
10006 rtx set = single_set (this_insn);
10008 if (CALL_P (this_insn))
10009 code = CALL_INSN;
10010 else if (JUMP_P (this_insn))
10012 if (INSN_ANNULLED_BRANCH_P (this_insn))
10013 return false;
10014 code = JUMP_INSN;
10017 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10018 return false;
10019 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10021 if (!MEM_P (SET_DEST (set)))
10022 retval = true;
10023 else
10024 return false;
10026 if (set == NULL_RTX
10027 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
10028 return false;
10030 if (retval == 1)
10031 return true;
10032 else if (code == JUMP_INSN)
10033 return false;
10036 set = single_set (insn);
10037 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10038 return false;
10039 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10040 return !MEM_P (SET_DEST (set));
10041 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
10042 return false;
10044 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10045 return true;
10047 return true;
10050 #include "ggc.h"
10052 static GTY(()) rtx t_reg_rtx;
10054 get_t_reg_rtx (void)
10056 if (! t_reg_rtx)
10057 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10058 return t_reg_rtx;
10061 static GTY(()) tree fpscr_values;
10063 static void
10064 emit_fpu_switch (rtx scratch, int index)
10066 rtx src;
10068 if (fpscr_values == NULL)
10070 tree t;
10072 t = build_index_type (integer_one_node);
10073 t = build_array_type (integer_type_node, t);
10074 t = build_decl (BUILTINS_LOCATION,
10075 VAR_DECL, get_identifier ("__fpscr_values"), t);
10076 DECL_ARTIFICIAL (t) = 1;
10077 DECL_IGNORED_P (t) = 1;
10078 DECL_EXTERNAL (t) = 1;
10079 TREE_STATIC (t) = 1;
10080 TREE_PUBLIC (t) = 1;
10081 TREE_USED (t) = 1;
10083 fpscr_values = t;
10086 src = DECL_RTL (fpscr_values);
10087 if (!can_create_pseudo_p ())
10089 emit_move_insn (scratch, XEXP (src, 0));
10090 if (index != 0)
10091 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10092 src = adjust_automodify_address (src, SImode, scratch, index * 4);
10094 else
10095 src = adjust_address (src, SImode, index * 4);
10097 emit_insn (gen_lds_fpscr (src));
10100 static rtx get_free_reg (HARD_REG_SET);
10102 /* This function returns a register to use to load the address to load
10103 the fpscr from. Currently it always returns r1 or r7, but when we are
10104 able to use pseudo registers after combine, or have a better mechanism
10105 for choosing a register, it should be done here. */
10106 /* REGS_LIVE is the liveness information for the point for which we
10107 need this allocation. In some bare-bones exit blocks, r1 is live at the
10108 start. We can even have all of r0..r3 being live:
10109 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10110 INSN before which new insns are placed with will clobber the register
10111 we return. If a basic block consists only of setting the return value
10112 register to a pseudo and using that register, the return value is not
10113 live before or after this block, yet we we'll insert our insns right in
10114 the middle. */
10115 static rtx
10116 get_free_reg (HARD_REG_SET regs_live)
10118 if (! TEST_HARD_REG_BIT (regs_live, 1))
10119 return gen_rtx_REG (Pmode, 1);
10121 /* Hard reg 1 is live; since this is a small register classes target,
10122 there shouldn't be anything but a jump before the function end. */
10123 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10124 return gen_rtx_REG (Pmode, 7);
10127 /* This function will set the fpscr from memory.
10128 MODE is the mode we are setting it to. */
10129 void
10130 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10132 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10133 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10134 rtx addr_reg;
10136 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10137 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10140 /* Is the given character a logical line separator for the assembler? */
10141 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10142 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10143 #endif
10145 static bool
10146 sequence_insn_p (rtx_insn *insn)
10148 rtx_insn *prev, *next;
10150 prev = PREV_INSN (insn);
10151 if (prev == NULL)
10152 return false;
10154 next = NEXT_INSN (prev);
10155 if (next == NULL)
10156 return false;
10158 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10162 sh_insn_length_adjustment (rtx_insn *insn)
10164 /* Instructions with unfilled delay slots take up an extra two bytes for
10165 the nop in the delay slot. */
10166 if (((NONJUMP_INSN_P (insn)
10167 && GET_CODE (PATTERN (insn)) != USE
10168 && GET_CODE (PATTERN (insn)) != CLOBBER)
10169 || CALL_P (insn) || JUMP_P (insn))
10170 && ! sequence_insn_p (insn)
10171 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10172 return 2;
10174 /* SH2e has a bug that prevents the use of annulled branches, so if
10175 the delay slot is not filled, we'll have to put a NOP in it. */
10176 if (sh_cpu_attr == CPU_SH2E
10177 && JUMP_P (insn)
10178 && get_attr_type (insn) == TYPE_CBRANCH
10179 && ! sequence_insn_p (insn))
10180 return 2;
10182 /* sh-dsp parallel processing insn take four bytes instead of two. */
10184 if (NONJUMP_INSN_P (insn))
10186 int sum = 0;
10187 rtx body = PATTERN (insn);
10188 const char *templ;
10189 char c;
10190 bool maybe_label = true;
10192 if (GET_CODE (body) == ASM_INPUT)
10193 templ = XSTR (body, 0);
10194 else if (asm_noperands (body) >= 0)
10195 templ
10196 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10197 else
10198 return 0;
10201 int ppi_adjust = 0;
10204 c = *templ++;
10205 while (c == ' ' || c == '\t');
10206 /* all sh-dsp parallel-processing insns start with p.
10207 The only non-ppi sh insn starting with p is pref.
10208 The only ppi starting with pr is prnd. */
10209 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10210 ppi_adjust = 2;
10211 /* The repeat pseudo-insn expands two three insns, a total of
10212 six bytes in size. */
10213 else if ((c == 'r' || c == 'R')
10214 && ! strncasecmp ("epeat", templ, 5))
10215 ppi_adjust = 4;
10216 while (c && c != '\n'
10217 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10219 /* If this is a label, it is obviously not a ppi insn. */
10220 if (c == ':' && maybe_label)
10222 ppi_adjust = 0;
10223 break;
10225 else if (c == '\'' || c == '"')
10226 maybe_label = false;
10227 c = *templ++;
10229 sum += ppi_adjust;
10230 maybe_label = c != ':';
10232 while (c);
10233 return sum;
10235 return 0;
10238 /* Return TRUE for a valid displacement for the REG+disp addressing
10239 with MODE. */
10240 bool
10241 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
10242 bool allow_zero)
10244 if (! CONST_INT_P (op))
10245 return false;
10247 if (TARGET_SHMEDIA)
10249 int size;
10251 /* Check if this is the address of an unaligned load / store. */
10252 if (mode == VOIDmode)
10253 return satisfies_constraint_I06 (op);
10255 size = GET_MODE_SIZE (mode);
10256 return (!(INTVAL (op) & (size - 1))
10257 && INTVAL (op) >= -512 * size
10258 && INTVAL (op) < 512 * size);
10260 else
10262 const HOST_WIDE_INT offset = INTVAL (op);
10263 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10264 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10266 /* If the mode does not support any displacement always return false.
10267 Even though an index of '0' is actually always valid, it will cause
10268 troubles when e.g. a DFmode move is split into two SFmode moves,
10269 where one SFmode move will have index '0' and the other move will
10270 have index '4'. */
10271 if (!allow_zero && max_disp < 1)
10272 return false;
10274 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10278 /* Recognize an RTL expression that is a valid memory address for
10279 an instruction.
10280 The MODE argument is the machine mode for the MEM expression
10281 that wants to use this address.
10282 Allow REG
10283 REG+disp
10284 REG+r0
10285 REG++
10286 --REG
10288 GBR+disp */
10289 static bool
10290 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10292 if (! ALLOW_INDEXED_ADDRESS
10293 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10294 return false;
10296 if (REG_P (x) && REGNO (x) == GBR_REG)
10297 return true;
10299 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10300 return true;
10301 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10302 && ! TARGET_SHMEDIA
10303 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10304 return true;
10305 else if (GET_CODE (x) == PLUS)
10307 rtx xop0 = XEXP (x, 0);
10308 rtx xop1 = XEXP (x, 1);
10310 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10311 return gbr_displacement (xop1, mode);
10313 if (GET_MODE_SIZE (mode) <= 8
10314 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10315 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10316 return true;
10318 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10319 || ((xop0 == stack_pointer_rtx
10320 || xop0 == hard_frame_pointer_rtx)
10321 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10322 || ((xop1 == stack_pointer_rtx
10323 || xop1 == hard_frame_pointer_rtx)
10324 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10325 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10326 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10327 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10328 && TARGET_FMOVD && mode == DFmode)))
10330 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10331 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10332 return true;
10333 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10334 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10335 return true;
10339 return false;
10342 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10343 isn't protected by a PIC unspec. */
10344 bool
10345 nonpic_symbol_mentioned_p (rtx x)
10347 const char *fmt;
10348 int i;
10350 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10351 || GET_CODE (x) == PC)
10352 return true;
10354 /* We don't want to look into the possible MEM location of a
10355 CONST_DOUBLE, since we're not going to use it, in general. */
10356 if (GET_CODE (x) == CONST_DOUBLE)
10357 return false;
10359 if (GET_CODE (x) == UNSPEC
10360 && (XINT (x, 1) == UNSPEC_PIC
10361 || XINT (x, 1) == UNSPEC_GOT
10362 || XINT (x, 1) == UNSPEC_GOTOFF
10363 || XINT (x, 1) == UNSPEC_GOTPLT
10364 || XINT (x, 1) == UNSPEC_GOTTPOFF
10365 || XINT (x, 1) == UNSPEC_DTPOFF
10366 || XINT (x, 1) == UNSPEC_TPOFF
10367 || XINT (x, 1) == UNSPEC_PLT
10368 || XINT (x, 1) == UNSPEC_SYMOFF
10369 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10370 return false;
10372 fmt = GET_RTX_FORMAT (GET_CODE (x));
10373 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10375 if (fmt[i] == 'E')
10377 int j;
10378 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10379 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10380 return true;
10382 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10383 return true;
10386 return false;
10389 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10390 @GOTOFF in `reg'. */
10392 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
10393 rtx reg)
10395 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10396 return orig;
10398 if (GET_CODE (orig) == LABEL_REF
10399 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10401 if (reg == NULL_RTX)
10402 reg = gen_reg_rtx (Pmode);
10404 emit_insn (gen_symGOTOFF2reg (reg, orig));
10405 return reg;
10407 else if (GET_CODE (orig) == SYMBOL_REF)
10409 if (reg == NULL_RTX)
10410 reg = gen_reg_rtx (Pmode);
10412 emit_insn (gen_symGOT2reg (reg, orig));
10413 return reg;
10415 return orig;
10418 /* Given a (logical) mode size and an offset in bytes, try to find a the
10419 appropriate displacement value for a mov insn. On SH the displacements
10420 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10421 15 bytes in QImode. To compensate this we create a new base address by
10422 adding an adjustment value to it.
10424 If the originally requested offset is greater than 127 we prefer using
10425 values 124..127 over 128..131 to increase opportunities to use the
10426 add #imm, Rn insn.
10428 In some cases it is possible that a requested offset might seem unaligned
10429 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10430 This is compensated by adjusting the base address so that the effective
10431 address of the displacement move insn will be aligned.
10433 This is not the best possible way of rebasing the base address, as it
10434 does not look at other present displacement addressings around it.
10435 In some cases this can create more base address adjustments than would
10436 actually be necessary. */
10437 struct disp_adjust
10439 rtx offset_adjust;
10440 rtx mov_disp;
10443 static struct disp_adjust
10444 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
10446 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10448 /* Do not try to use SH2A's large displacements here, because this would
10449 effectively disable the small displacement insns. */
10450 const int mode_sz = GET_MODE_SIZE (mode);
10451 const int mov_insn_sz = mov_insn_size (mode, false);
10452 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10453 const int max_disp_next = max_disp + mov_insn_sz;
10454 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10455 HOST_WIDE_INT offset_adjust;
10457 /* In some cases this actually does happen and we must check for it. */
10458 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10459 return res;
10461 /* Keeps the previous behavior for QImode displacement addressing.
10462 This just decides how the offset is re-based. Removing this special
10463 case will result in slightly bigger code on average, but it's not that
10464 bad actually. */
10465 if (mov_insn_sz == 1)
10466 align_modifier = 0;
10468 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10470 if (mode_sz + offset - offset_adjust <= max_disp_next)
10472 res.offset_adjust = GEN_INT (offset_adjust);
10473 res.mov_disp = GEN_INT (offset - offset_adjust);
10476 return res;
10479 /* Try to modify an illegitimate address and make it legitimate.
10480 If we find one, return the new, valid address.
10481 Otherwise, return the original address. */
10482 static rtx
10483 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
10485 if (flag_pic)
10486 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10488 if (TARGET_SHMEDIA)
10489 return x;
10491 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10492 || (TARGET_SH2E && mode == SFmode))
10493 return x;
10495 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10496 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10498 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10499 INTVAL (XEXP (x, 1)));
10501 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10503 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10504 adj.offset_adjust, NULL_RTX, 0,
10505 OPTAB_LIB_WIDEN);
10506 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10509 return x;
10512 /* Attempt to replace *p, which is an address that needs reloading, with
10513 a valid memory address for an operand of mode MODE.
10514 Like for sh_legitimize_address, for the SH we try to get a normal form
10515 of the address. That will allow inheritance of the address reloads. */
10516 bool
10517 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10518 int itype)
10520 enum reload_type type = (enum reload_type) itype;
10521 const int mode_sz = GET_MODE_SIZE (mode);
10523 if (sh_lra_p ())
10524 return false;
10526 if (! ALLOW_INDEXED_ADDRESS
10527 && GET_CODE (*p) == PLUS
10528 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10530 *p = copy_rtx (*p);
10531 push_reload (*p, NULL_RTX, p, NULL,
10532 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10533 return true;
10536 if (! ALLOW_INDEXED_ADDRESS
10537 && GET_CODE (*p) == PLUS
10538 && GET_CODE (XEXP (*p, 0)) == PLUS)
10540 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10541 XEXP (XEXP (*p, 0), 1));
10542 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10543 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10544 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10545 return true;
10548 if (TARGET_SHMEDIA)
10549 return false;
10551 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10552 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10553 && (ALLOW_INDEXED_ADDRESS
10554 || XEXP (*p, 0) == stack_pointer_rtx
10555 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10557 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10558 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10560 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10562 push_reload (*p, NULL_RTX, p, NULL,
10563 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10564 return true;
10567 if (TARGET_SH2E && mode == SFmode)
10569 *p = copy_rtx (*p);
10570 push_reload (*p, NULL_RTX, p, NULL,
10571 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10572 return true;
10575 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10576 moves because then reload has a problem figuring the constraint
10577 that the move insn target/source reg must be R0.
10578 Or maybe some handling is wrong in sh_secondary_reload for this
10579 to work properly? */
10580 if ((mode_sz == 4 || mode_sz == 8)
10581 && ! (TARGET_SH4 && mode == DFmode)
10582 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10584 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10585 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10586 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10587 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10588 return true;
10592 /* We must re-recognize what we created before. */
10593 if (GET_CODE (*p) == PLUS
10594 && (mode_sz == 4 || mode_sz == 8)
10595 && GET_CODE (XEXP (*p, 0)) == PLUS
10596 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10597 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10598 && CONST_INT_P (XEXP (*p, 1))
10599 && ! (TARGET_SH2E && mode == SFmode))
10601 /* Because this address is so complex, we know it must have
10602 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10603 it is already unshared, and needs no further unsharing. */
10604 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10605 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10606 return true;
10609 return false;
10612 /* In the name of slightly smaller debug output, and to cater to
10613 general assembler lossage, recognize various UNSPEC sequences
10614 and turn them back into a direct symbol reference. */
10615 static rtx
10616 sh_delegitimize_address (rtx orig_x)
10618 rtx x, y;
10620 orig_x = delegitimize_mem_from_attrs (orig_x);
10622 x = orig_x;
10623 if (MEM_P (x))
10624 x = XEXP (x, 0);
10625 if (GET_CODE (x) == CONST)
10627 y = XEXP (x, 0);
10628 if (GET_CODE (y) == UNSPEC)
10630 if (XINT (y, 1) == UNSPEC_GOT
10631 || XINT (y, 1) == UNSPEC_GOTOFF
10632 || XINT (y, 1) == UNSPEC_SYMOFF)
10633 return XVECEXP (y, 0, 0);
10634 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10636 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10638 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10640 if (GET_CODE (symplt) == UNSPEC
10641 && XINT (symplt, 1) == UNSPEC_PLT)
10642 return XVECEXP (symplt, 0, 0);
10645 else if (TARGET_SHMEDIA
10646 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10647 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10649 rtx offset = XVECEXP (y, 0, 1);
10651 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10652 if (MEM_P (orig_x))
10653 x = replace_equiv_address_nv (orig_x, x);
10654 return x;
10659 return orig_x;
10662 /* Mark the use of a constant in the literal table. If the constant
10663 has multiple labels, make it unique. */
10664 static rtx
10665 mark_constant_pool_use (rtx x)
10667 rtx_insn *insn, *lab;
10668 rtx pattern;
10670 if (x == NULL_RTX)
10671 return x;
10673 switch (GET_CODE (x))
10675 case LABEL_REF:
10676 x = XEXP (x, 0);
10677 case CODE_LABEL:
10678 break;
10679 default:
10680 return x;
10683 /* Get the first label in the list of labels for the same constant
10684 and delete another labels in the list. */
10685 lab = as_a <rtx_insn *> (x);
10686 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10688 if (!LABEL_P (insn)
10689 || LABEL_REFS (insn) != NEXT_INSN (insn))
10690 break;
10691 lab = insn;
10694 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10695 as_a<rtx_insn *> (insn)->set_deleted ();
10697 /* Mark constants in a window. */
10698 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10700 if (!NONJUMP_INSN_P (insn))
10701 continue;
10703 pattern = PATTERN (insn);
10704 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10705 continue;
10707 switch (XINT (pattern, 1))
10709 case UNSPECV_CONST2:
10710 case UNSPECV_CONST4:
10711 case UNSPECV_CONST8:
10712 XVECEXP (pattern, 0, 1) = const1_rtx;
10713 break;
10714 case UNSPECV_WINDOW_END:
10715 if (XVECEXP (pattern, 0, 0) == x)
10716 return lab;
10717 break;
10718 case UNSPECV_CONST_END:
10719 return lab;
10720 default:
10721 break;
10725 return lab;
10728 /* Return true if it's possible to redirect BRANCH1 to the destination
10729 of an unconditional jump BRANCH2. We only want to do this if the
10730 resulting branch will have a short displacement. */
10731 bool
10732 sh_can_redirect_branch (rtx_insn *branch1, rtx_insn *branch2)
10734 if (flag_expensive_optimizations && simplejump_p (branch2))
10736 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10737 rtx_insn *insn;
10738 int distance;
10740 for (distance = 0, insn = NEXT_INSN (branch1);
10741 insn && distance < 256;
10742 insn = PREV_INSN (insn))
10744 if (insn == dest)
10745 return true;
10746 else
10747 distance += get_attr_length (insn);
10749 for (distance = 0, insn = NEXT_INSN (branch1);
10750 insn && distance < 256;
10751 insn = NEXT_INSN (insn))
10753 if (insn == dest)
10754 return true;
10755 else
10756 distance += get_attr_length (insn);
10759 return false;
10762 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10763 bool
10764 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10765 unsigned int new_reg)
10767 /* Interrupt functions can only use registers that have already been
10768 saved by the prologue, even if they would normally be
10769 call-clobbered. */
10770 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10771 return false;
10773 return true;
10776 /* Function to update the integer COST
10777 based on the relationship between INSN that is dependent on
10778 DEP_INSN through the dependence LINK. The default is to make no
10779 adjustment to COST. This can be used for example to specify to
10780 the scheduler that an output- or anti-dependence does not incur
10781 the same cost as a data-dependence. The return value should be
10782 the new value for COST. */
10783 static int
10784 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10785 rtx_insn *dep_insn, int cost)
10787 rtx reg, use_pat;
10789 if (TARGET_SHMEDIA)
10791 /* On SHmedia, if the dependence is an anti-dependence or
10792 output-dependence, there is no cost. */
10793 if (REG_NOTE_KIND (link) != 0)
10795 /* However, dependencies between target register loads and
10796 uses of the register in a subsequent block that are separated
10797 by a conditional branch are not modelled - we have to do with
10798 the anti-dependency between the target register load and the
10799 conditional branch that ends the current block. */
10800 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10801 && GET_CODE (PATTERN (dep_insn)) == SET
10802 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10803 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10804 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10806 int orig_cost = cost;
10807 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10808 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10809 ? insn : JUMP_LABEL (insn));
10810 /* On the likely path, the branch costs 1, on the unlikely path,
10811 it costs 3. */
10812 cost--;
10814 target = next_active_insn (target);
10815 while (target && ! flow_dependent_p (target, dep_insn)
10816 && --cost > 0);
10817 /* If two branches are executed in immediate succession, with the
10818 first branch properly predicted, this causes a stall at the
10819 second branch, hence we won't need the target for the
10820 second branch for two cycles after the launch of the first
10821 branch. */
10822 if (cost > orig_cost - 2)
10823 cost = orig_cost - 2;
10825 else
10826 cost = 0;
10829 else if (get_attr_is_mac_media (insn)
10830 && get_attr_is_mac_media (dep_insn))
10831 cost = 1;
10833 else if (! reload_completed
10834 && GET_CODE (PATTERN (insn)) == SET
10835 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10836 && GET_CODE (PATTERN (dep_insn)) == SET
10837 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10838 && cost < 4)
10839 cost = 4;
10840 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10841 that is needed at the target. */
10842 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10843 && ! flow_dependent_p (insn, dep_insn))
10844 cost--;
10846 else if (REG_NOTE_KIND (link) == 0)
10848 enum attr_type type;
10849 rtx dep_set;
10851 if (recog_memoized (insn) < 0
10852 || recog_memoized (dep_insn) < 0)
10853 return cost;
10855 dep_set = single_set (dep_insn);
10857 /* The latency that we specify in the scheduling description refers
10858 to the actual output, not to an auto-increment register; for that,
10859 the latency is one. */
10860 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10862 rtx set = single_set (insn);
10864 if (set
10865 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10866 && (!MEM_P (SET_DEST (set))
10867 || !reg_mentioned_p (SET_DEST (dep_set),
10868 XEXP (SET_DEST (set), 0))))
10869 cost = 1;
10871 /* The only input for a call that is timing-critical is the
10872 function's address. */
10873 if (CALL_P (insn))
10875 rtx call = get_call_rtx_from (insn);
10876 if (call
10877 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10878 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10879 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10880 cost -= TARGET_SH4_300 ? 3 : 6;
10882 /* Likewise, the most timing critical input for an sfuncs call
10883 is the function address. However, sfuncs typically start
10884 using their arguments pretty quickly.
10885 Assume a four cycle delay for SH4 before they are needed.
10886 Cached ST40-300 calls are quicker, so assume only a one
10887 cycle delay there.
10888 ??? Maybe we should encode the delays till input registers
10889 are needed by sfuncs into the sfunc call insn. */
10890 /* All sfunc calls are parallels with at least four components.
10891 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10892 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10893 && XVECLEN (PATTERN (insn), 0) >= 4
10894 && (reg = sfunc_uses_reg (insn)))
10896 if (! reg_set_p (reg, dep_insn))
10897 cost -= TARGET_SH4_300 ? 1 : 4;
10899 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10901 enum attr_type dep_type = get_attr_type (dep_insn);
10903 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10904 cost--;
10905 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10906 && (type = get_attr_type (insn)) != TYPE_CALL
10907 && type != TYPE_SFUNC)
10908 cost--;
10909 /* When the preceding instruction loads the shift amount of
10910 the following SHAD/SHLD, the latency of the load is increased
10911 by 1 cycle. */
10912 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10913 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10914 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10915 XEXP (SET_SRC (single_set (insn)),
10916 1)))
10917 cost++;
10918 /* When an LS group instruction with a latency of less than
10919 3 cycles is followed by a double-precision floating-point
10920 instruction, FIPR, or FTRV, the latency of the first
10921 instruction is increased to 3 cycles. */
10922 else if (cost < 3
10923 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10924 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10925 cost = 3;
10926 /* The lsw register of a double-precision computation is ready one
10927 cycle earlier. */
10928 else if (reload_completed
10929 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10930 && (use_pat = single_set (insn))
10931 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10932 SET_SRC (use_pat)))
10933 cost -= 1;
10935 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10936 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10937 cost -= 1;
10939 else if (TARGET_SH4_300)
10941 /* Stores need their input register two cycles later. */
10942 if (dep_set && cost >= 1
10943 && ((type = get_attr_type (insn)) == TYPE_STORE
10944 || type == TYPE_PSTORE
10945 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10947 rtx set = single_set (insn);
10949 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10950 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10952 cost -= 2;
10953 /* But don't reduce the cost below 1 if the address depends
10954 on a side effect of dep_insn. */
10955 if (cost < 1
10956 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10957 cost = 1;
10962 /* An anti-dependence penalty of two applies if the first insn is a double
10963 precision fadd / fsub / fmul. */
10964 else if (!TARGET_SH4_300
10965 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10966 && recog_memoized (dep_insn) >= 0
10967 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10968 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10969 /* A lot of alleged anti-flow dependences are fake,
10970 so check this one is real. */
10971 && flow_dependent_p (dep_insn, insn))
10972 cost = 2;
10974 return cost;
10977 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10978 if DEP_INSN is anti-flow dependent on INSN. */
10979 static bool
10980 flow_dependent_p (rtx insn, rtx dep_insn)
10982 rtx tmp = PATTERN (insn);
10984 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10985 return tmp == NULL_RTX;
10988 /* A helper function for flow_dependent_p called through note_stores. */
10989 static void
10990 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10992 rtx * pinsn = (rtx *) data;
10994 if (*pinsn && reg_referenced_p (x, *pinsn))
10995 *pinsn = NULL_RTX;
10998 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10999 'special function' patterns (type sfunc) that clobber pr, but that
11000 do not look like function calls to leaf_function_p. Hence we must
11001 do this extra check. */
11002 static int
11003 sh_pr_n_sets (void)
11005 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11008 /* Return where to allocate pseudo for a given hard register initial
11009 value. */
11010 static rtx
11011 sh_allocate_initial_value (rtx hard_reg)
11013 rtx x;
11015 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11017 if (crtl->is_leaf
11018 && ! sh_pr_n_sets ()
11019 && ! (TARGET_SHCOMPACT
11020 && ((crtl->args.info.call_cookie
11021 & ~ CALL_COOKIE_RET_TRAMP (1))
11022 || crtl->saves_all_registers)))
11023 x = hard_reg;
11024 else
11025 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11027 else
11028 x = NULL_RTX;
11030 return x;
11033 /* This function returns "2" to indicate dual issue for the SH4
11034 processor. To be used by the DFA pipeline description. */
11035 static int
11036 sh_issue_rate (void)
11038 if (TARGET_SUPERSCALAR)
11039 return 2;
11040 else
11041 return 1;
11044 /* Functions for ready queue reordering for sched1. */
11046 /* Get weight for mode for a set x. */
11047 static short
11048 find_set_regmode_weight (rtx x, machine_mode mode)
11050 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11051 return 1;
11052 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11054 if (REG_P (SET_DEST (x)))
11056 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11057 return 1;
11058 else
11059 return 0;
11061 return 1;
11063 return 0;
11066 /* Get regmode weight for insn. */
11067 static short
11068 find_insn_regmode_weight (rtx insn, machine_mode mode)
11070 short reg_weight = 0;
11071 rtx x;
11073 /* Increment weight for each register born here. */
11074 x = PATTERN (insn);
11075 reg_weight += find_set_regmode_weight (x, mode);
11076 if (GET_CODE (x) == PARALLEL)
11078 int j;
11079 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11081 x = XVECEXP (PATTERN (insn), 0, j);
11082 reg_weight += find_set_regmode_weight (x, mode);
11085 /* Decrement weight for each register that dies here. */
11086 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11088 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11090 rtx note = XEXP (x, 0);
11091 if (REG_P (note) && GET_MODE (note) == mode)
11092 reg_weight--;
11095 return reg_weight;
11098 /* Calculate regmode weights for all insns of a basic block. */
11099 static void
11100 find_regmode_weight (basic_block b, machine_mode mode)
11102 rtx_insn *insn, *next_tail, *head, *tail;
11104 get_ebb_head_tail (b, b, &head, &tail);
11105 next_tail = NEXT_INSN (tail);
11107 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11109 /* Handle register life information. */
11110 if (!INSN_P (insn))
11111 continue;
11113 if (mode == SFmode)
11114 INSN_REGMODE_WEIGHT (insn, mode) =
11115 find_insn_regmode_weight (insn, mode)
11116 + 2 * find_insn_regmode_weight (insn, DFmode);
11117 else if (mode == SImode)
11118 INSN_REGMODE_WEIGHT (insn, mode) =
11119 find_insn_regmode_weight (insn, mode)
11120 + 2 * find_insn_regmode_weight (insn, DImode);
11124 /* Comparison function for ready queue sorting. */
11125 static int
11126 rank_for_reorder (const void *x, const void *y)
11128 rtx_insn *tmp = *(rtx_insn * const *) y;
11129 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11131 /* The insn in a schedule group should be issued the first. */
11132 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11133 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11135 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11136 minimizes instruction movement, thus minimizing sched's effect on
11137 register pressure. */
11138 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11141 /* Resort the array A in which only element at index N may be out of order. */
11142 static void
11143 swap_reorder (rtx_insn **a, int n)
11145 rtx_insn *insn = a[n - 1];
11146 int i = n - 2;
11148 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11150 a[i + 1] = a[i];
11151 i -= 1;
11153 a[i + 1] = insn;
11156 /* Sort the ready list by ascending priority. */
11157 static void
11158 ready_reorder (rtx_insn **ready, int nready)
11160 if (nready == 2)
11161 swap_reorder (ready, nready);
11162 else if (nready > 2)
11163 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11166 /* Count life regions of r0 for a block. */
11167 static int
11168 find_r0_life_regions (basic_block b)
11170 rtx_insn *end, *insn;
11171 rtx pset;
11172 rtx r0_reg;
11173 int live;
11174 int set;
11175 int death = 0;
11177 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11179 set = 1;
11180 live = 1;
11182 else
11184 set = 0;
11185 live = 0;
11188 insn = BB_HEAD (b);
11189 end = BB_END (b);
11190 r0_reg = gen_rtx_REG (SImode, R0_REG);
11191 while (1)
11193 if (INSN_P (insn))
11195 if (find_regno_note (insn, REG_DEAD, R0_REG))
11197 death++;
11198 live = 0;
11200 if (!live
11201 && (pset = single_set (insn))
11202 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11203 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11205 set++;
11206 live = 1;
11209 if (insn == end)
11210 break;
11211 insn = NEXT_INSN (insn);
11213 return set - death;
11216 /* Calculate regmode weights for all insns of all basic block. */
11217 static void
11218 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11219 int verbose ATTRIBUTE_UNUSED,
11220 int old_max_uid)
11222 basic_block b;
11224 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11225 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11226 r0_life_regions = 0;
11228 FOR_EACH_BB_REVERSE_FN (b, cfun)
11230 find_regmode_weight (b, SImode);
11231 find_regmode_weight (b, SFmode);
11232 if (!reload_completed)
11233 r0_life_regions += find_r0_life_regions (b);
11236 CURR_REGMODE_PRESSURE (SImode) = 0;
11237 CURR_REGMODE_PRESSURE (SFmode) = 0;
11240 /* Cleanup. */
11241 static void
11242 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11243 int verbose ATTRIBUTE_UNUSED)
11245 if (regmode_weight[0])
11247 free (regmode_weight[0]);
11248 regmode_weight[0] = NULL;
11250 if (regmode_weight[1])
11252 free (regmode_weight[1]);
11253 regmode_weight[1] = NULL;
11257 /* The scalar modes supported differs from the default version in TImode
11258 for 32-bit SHMEDIA. */
11259 static bool
11260 sh_scalar_mode_supported_p (machine_mode mode)
11262 if (TARGET_SHMEDIA32 && mode == TImode)
11263 return false;
11265 return default_scalar_mode_supported_p (mode);
11268 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11269 keep count of register pressures on SImode and SFmode. */
11270 static int
11271 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11272 int sched_verbose ATTRIBUTE_UNUSED,
11273 rtx_insn *insn,
11274 int can_issue_more)
11276 if (GET_CODE (PATTERN (insn)) != USE
11277 && GET_CODE (PATTERN (insn)) != CLOBBER)
11278 cached_can_issue_more = can_issue_more - 1;
11279 else
11280 cached_can_issue_more = can_issue_more;
11282 if (reload_completed)
11283 return cached_can_issue_more;
11285 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11286 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11288 return cached_can_issue_more;
11291 static void
11292 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11293 int verbose ATTRIBUTE_UNUSED,
11294 int veclen ATTRIBUTE_UNUSED)
11296 CURR_REGMODE_PRESSURE (SImode) = 0;
11297 CURR_REGMODE_PRESSURE (SFmode) = 0;
11300 /* Some magic numbers. */
11301 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11302 functions that already have high pressure on r0. */
11303 #define R0_MAX_LIFE_REGIONS 2
11304 /* Register Pressure thresholds for SImode and SFmode registers. */
11305 #define SIMODE_MAX_WEIGHT 5
11306 #define SFMODE_MAX_WEIGHT 10
11308 /* Return true if the pressure is high for MODE. */
11309 static bool
11310 high_pressure (machine_mode mode)
11312 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11313 functions that already have high pressure on r0. */
11314 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11315 return true;
11317 if (mode == SFmode)
11318 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11319 else
11320 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11323 /* Reorder ready queue if register pressure is high. */
11324 static int
11325 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11326 int sched_verbose ATTRIBUTE_UNUSED,
11327 rtx_insn **ready,
11328 int *n_readyp,
11329 int clock_var ATTRIBUTE_UNUSED)
11331 if (reload_completed)
11332 return sh_issue_rate ();
11334 if (high_pressure (SFmode) || high_pressure (SImode))
11336 ready_reorder (ready, *n_readyp);
11339 return sh_issue_rate ();
11342 /* Skip cycles if the current register pressure is high. */
11343 static int
11344 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11345 int sched_verbose ATTRIBUTE_UNUSED,
11346 rtx_insn **ready ATTRIBUTE_UNUSED,
11347 int *n_readyp ATTRIBUTE_UNUSED,
11348 int clock_var ATTRIBUTE_UNUSED)
11350 if (reload_completed)
11351 return cached_can_issue_more;
11353 if (high_pressure(SFmode) || high_pressure (SImode))
11354 skip_cycles = 1;
11356 return cached_can_issue_more;
11359 /* Skip cycles without sorting the ready queue. This will move insn from
11360 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11361 queue by sh_reorder. */
11363 /* Generally, skipping these many cycles are sufficient for all insns to move
11364 from Q -> R. */
11365 #define MAX_SKIPS 8
11367 static int
11368 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11369 int sched_verbose ATTRIBUTE_UNUSED,
11370 rtx_insn *insn ATTRIBUTE_UNUSED,
11371 int last_clock_var,
11372 int clock_var,
11373 int *sort_p)
11375 if (reload_completed)
11376 return 0;
11378 if (skip_cycles)
11380 if ((clock_var - last_clock_var) < MAX_SKIPS)
11382 *sort_p = 0;
11383 return 1;
11385 /* If this is the last cycle we are skipping, allow reordering of R. */
11386 if ((clock_var - last_clock_var) == MAX_SKIPS)
11388 *sort_p = 1;
11389 return 1;
11393 skip_cycles = 0;
11395 return 0;
11398 /* SHmedia requires registers for branches, so we can't generate new
11399 branches past reload. */
11400 static bool
11401 sh_cannot_modify_jumps_p (void)
11403 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11406 static reg_class_t
11407 sh_target_reg_class (void)
11409 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11412 static bool
11413 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11415 if (! shmedia_space_reserved_for_target_registers)
11416 return 0;
11417 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11418 return 0;
11420 HARD_REG_SET dummy;
11421 if (calc_live_regs (&dummy) >= 6 * 8)
11422 return 1;
11423 return 0;
11426 static bool
11427 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11429 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11433 On the SH1..SH4, the trampoline looks like
11434 2 0002 D202 mov.l l2,r2
11435 1 0000 D301 mov.l l1,r3
11436 3 0004 422B jmp @r2
11437 4 0006 0009 nop
11438 5 0008 00000000 l1: .long area
11439 6 000c 00000000 l2: .long function
11441 SH5 (compact) uses r1 instead of r3 for the static chain. */
11444 /* Emit RTL insns to initialize the variable parts of a trampoline.
11445 FNADDR is an RTX for the address of the function's pure code.
11446 CXT is an RTX for the static chain value for the function. */
11447 static void
11448 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11450 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11451 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11453 if (TARGET_SHMEDIA64)
11455 rtx tramp_templ;
11456 int fixed_len;
11458 rtx movi1 = GEN_INT (0xcc000010);
11459 rtx shori1 = GEN_INT (0xc8000010);
11460 rtx src, dst;
11462 /* The following trampoline works within a +- 128 KB range for cxt:
11463 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11464 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11465 gettr tr1,r1; blink tr0,r63 */
11466 /* Address rounding makes it hard to compute the exact bounds of the
11467 offset for this trampoline, but we have a rather generous offset
11468 range, so frame_offset should do fine as an upper bound. */
11469 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11471 /* ??? could optimize this trampoline initialization
11472 by writing DImode words with two insns each. */
11473 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11474 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11475 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11476 insn = gen_rtx_AND (DImode, insn, mask);
11477 /* Or in ptb/u .,tr1 pattern */
11478 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11479 insn = force_operand (insn, NULL_RTX);
11480 insn = gen_lowpart (SImode, insn);
11481 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11482 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11483 insn = gen_rtx_AND (DImode, insn, mask);
11484 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11485 insn = gen_lowpart (SImode, insn);
11486 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11487 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11488 insn = gen_rtx_AND (DImode, insn, mask);
11489 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11490 insn = gen_lowpart (SImode, insn);
11491 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11492 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11493 insn = gen_rtx_AND (DImode, insn, mask);
11494 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11495 insn = gen_lowpart (SImode, insn);
11496 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11497 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11498 insn = gen_rtx_AND (DImode, insn, mask);
11499 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11500 insn = gen_lowpart (SImode, insn);
11501 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11502 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11503 GEN_INT (0x6bf10600));
11504 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11505 GEN_INT (0x4415fc10));
11506 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11507 GEN_INT (0x4401fff0));
11508 emit_insn (gen_ic_invalidate_line (tramp));
11509 return;
11511 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11512 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11514 tramp_templ = gen_datalabel_ref (tramp_templ);
11515 dst = tramp_mem;
11516 src = gen_const_mem (BLKmode, tramp_templ);
11517 set_mem_align (dst, 256);
11518 set_mem_align (src, 64);
11519 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11521 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11522 emit_move_insn (adjust_address (tramp_mem, Pmode,
11523 fixed_len + GET_MODE_SIZE (Pmode)),
11524 cxt);
11525 emit_insn (gen_ic_invalidate_line (tramp));
11526 return;
11528 else if (TARGET_SHMEDIA)
11530 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11531 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11532 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11533 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11534 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11535 rotated 10 right, and higher 16 bit of every 32 selected. */
11536 rtx movishori
11537 = force_reg (V2HImode, (simplify_gen_subreg
11538 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11539 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11540 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11542 fnaddr = force_reg (SImode, fnaddr);
11543 cxt = force_reg (SImode, cxt);
11544 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11545 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11546 movishori));
11547 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11548 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11549 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11550 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11551 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11552 gen_rtx_SUBREG (V2HImode, cxt, 0),
11553 movishori));
11554 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11555 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11556 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11557 if (TARGET_LITTLE_ENDIAN)
11559 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11560 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11562 else
11564 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11565 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11567 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11568 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11569 emit_insn (gen_ic_invalidate_line (tramp));
11570 return;
11572 else if (TARGET_SHCOMPACT)
11574 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11575 return;
11577 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11578 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11579 SImode));
11580 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11581 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11582 SImode));
11583 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11584 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11585 if (TARGET_HARD_SH4 || TARGET_SH5)
11587 if (!TARGET_INLINE_IC_INVALIDATE
11588 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
11589 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11590 FUNCTION_ORDINARY),
11591 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11592 else
11593 emit_insn (gen_ic_invalidate_line (tramp));
11597 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11598 static rtx
11599 sh_trampoline_adjust_address (rtx tramp)
11601 if (TARGET_SHMEDIA)
11602 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11603 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11604 return tramp;
11607 /* FIXME: This is overly conservative. A SHcompact function that
11608 receives arguments ``by reference'' will have them stored in its
11609 own stack frame, so it must not pass pointers or references to
11610 these arguments to other functions by means of sibling calls. */
11611 /* If PIC, we cannot make sibling calls to global functions
11612 because the PLT requires r12 to be live. */
11613 static bool
11614 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11616 return (1
11617 && (! TARGET_SHCOMPACT
11618 || crtl->args.info.stack_regs == 0)
11619 && ! sh_cfun_interrupt_handler_p ()
11620 && (! flag_pic
11621 || (decl && ! TREE_PUBLIC (decl))
11622 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11625 /* Machine specific built-in functions. */
11627 struct builtin_description
11629 bool (* const is_enabled) (void);
11630 const enum insn_code icode;
11631 const char *const name;
11632 int signature;
11633 tree fndecl;
11636 static bool
11637 shmedia_builtin_p (void)
11639 return TARGET_SHMEDIA;
11642 /* This function can be used if there are any built-ins that are not for
11643 SHmedia. It's commented out to avoid the defined-but-unused warning. */
11644 static bool
11645 sh1_builtin_p (void)
11647 return TARGET_SH1;
11650 /* describe number and signedness of arguments; arg[0] == result
11651 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11652 /* 9: 64-bit pointer, 10: 32-bit pointer */
11653 static const char signature_args[][4] =
11655 #define SH_BLTIN_V2SI2 0
11656 { 4, 4 },
11657 #define SH_BLTIN_V4HI2 1
11658 { 4, 4 },
11659 #define SH_BLTIN_V2SI3 2
11660 { 4, 4, 4 },
11661 #define SH_BLTIN_V4HI3 3
11662 { 4, 4, 4 },
11663 #define SH_BLTIN_V8QI3 4
11664 { 4, 4, 4 },
11665 #define SH_BLTIN_MAC_HISI 5
11666 { 1, 4, 4, 1 },
11667 #define SH_BLTIN_SH_HI 6
11668 { 4, 4, 1 },
11669 #define SH_BLTIN_SH_SI 7
11670 { 4, 4, 1 },
11671 #define SH_BLTIN_V4HI2V2SI 8
11672 { 4, 4, 4 },
11673 #define SH_BLTIN_V4HI2V8QI 9
11674 { 4, 4, 4 },
11675 #define SH_BLTIN_SISF 10
11676 { 4, 2 },
11677 #define SH_BLTIN_LDUA_L 11
11678 { 2, 10 },
11679 #define SH_BLTIN_LDUA_Q 12
11680 { 1, 10 },
11681 #define SH_BLTIN_STUA_L 13
11682 { 0, 10, 2 },
11683 #define SH_BLTIN_STUA_Q 14
11684 { 0, 10, 1 },
11685 #define SH_BLTIN_LDUA_L64 15
11686 { 2, 9 },
11687 #define SH_BLTIN_LDUA_Q64 16
11688 { 1, 9 },
11689 #define SH_BLTIN_STUA_L64 17
11690 { 0, 9, 2 },
11691 #define SH_BLTIN_STUA_Q64 18
11692 { 0, 9, 1 },
11693 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11694 #define SH_BLTIN_2 19
11695 #define SH_BLTIN_SU 19
11696 { 1, 2 },
11697 #define SH_BLTIN_3 20
11698 #define SH_BLTIN_SUS 20
11699 { 2, 2, 1 },
11700 #define SH_BLTIN_PSSV 21
11701 { 0, 8, 2, 2 },
11702 #define SH_BLTIN_XXUU 22
11703 #define SH_BLTIN_UUUU 22
11704 { 1, 1, 1, 1 },
11705 #define SH_BLTIN_PV 23
11706 { 0, 8 },
11707 #define SH_BLTIN_VP 24
11708 { 8, 0 },
11709 #define SH_BLTIN_UV 25
11710 { 1, 0 },
11711 #define SH_BLTIN_VU 26
11712 { 0, 1 },
11714 /* mcmv: operands considered unsigned. */
11715 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11716 /* mperm: control value considered unsigned int. */
11717 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11718 /* mshards_q: returns signed short. */
11719 /* nsb: takes long long arg, returns unsigned char. */
11720 static struct builtin_description bdesc[] =
11722 { shmedia_builtin_p,
11723 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11724 { shmedia_builtin_p,
11725 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11726 { shmedia_builtin_p,
11727 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11728 { shmedia_builtin_p,
11729 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11730 { shmedia_builtin_p,
11731 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11732 { shmedia_builtin_p,
11733 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11734 { shmedia_builtin_p,
11735 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11736 { shmedia_builtin_p,
11737 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11738 { shmedia_builtin_p,
11739 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11740 { shmedia_builtin_p,
11741 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11742 { shmedia_builtin_p,
11743 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11744 { shmedia_builtin_p,
11745 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11746 { shmedia_builtin_p,
11747 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11748 { shmedia_builtin_p,
11749 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11750 { shmedia_builtin_p,
11751 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11752 { shmedia_builtin_p,
11753 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11754 { shmedia_builtin_p,
11755 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11756 { shmedia_builtin_p,
11757 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11758 { shmedia_builtin_p,
11759 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11760 { shmedia_builtin_p,
11761 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11762 { shmedia_builtin_p,
11763 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11764 { shmedia_builtin_p,
11765 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11766 { shmedia_builtin_p,
11767 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11768 { shmedia_builtin_p,
11769 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11770 { shmedia_builtin_p,
11771 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11772 { shmedia_builtin_p,
11773 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11774 { shmedia_builtin_p,
11775 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11776 { shmedia_builtin_p,
11777 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11778 { shmedia_builtin_p,
11779 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11780 { shmedia_builtin_p,
11781 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11782 { shmedia_builtin_p,
11783 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11784 { shmedia_builtin_p,
11785 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11786 { shmedia_builtin_p,
11787 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11788 { shmedia_builtin_p,
11789 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11790 { shmedia_builtin_p,
11791 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11792 { shmedia_builtin_p,
11793 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11794 { shmedia_builtin_p,
11795 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11796 { shmedia_builtin_p,
11797 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11798 { shmedia_builtin_p,
11799 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11800 { shmedia_builtin_p,
11801 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11802 { shmedia_builtin_p,
11803 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11804 { shmedia_builtin_p,
11805 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11806 { shmedia_builtin_p,
11807 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11808 { shmedia_builtin_p,
11809 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11810 { shmedia_builtin_p,
11811 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11812 { shmedia_builtin_p,
11813 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11814 { shmedia_builtin_p,
11815 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11816 { shmedia_builtin_p,
11817 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11818 { shmedia_builtin_p,
11819 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11820 { shmedia_builtin_p,
11821 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11822 { shmedia_builtin_p,
11823 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11824 { shmedia_builtin_p,
11825 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11826 { shmedia_builtin_p,
11827 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11828 { shmedia_builtin_p,
11829 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11830 { shmedia_builtin_p,
11831 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11832 { shmedia_builtin_p,
11833 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11834 { shmedia_builtin_p,
11835 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11836 { shmedia_builtin_p,
11837 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11838 { shmedia_builtin_p,
11839 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11840 { shmedia_builtin_p,
11841 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11842 { shmedia_builtin_p,
11843 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11844 { shmedia_builtin_p,
11845 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11846 { shmedia_builtin_p,
11847 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11848 { shmedia_builtin_p,
11849 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11850 { shmedia_builtin_p,
11851 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11852 { shmedia_builtin_p,
11853 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11854 { shmedia_builtin_p,
11855 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11856 { shmedia_builtin_p,
11857 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11858 { shmedia_builtin_p,
11859 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11860 { shmedia_builtin_p,
11861 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11862 { shmedia_builtin_p,
11863 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11864 { shmedia_builtin_p,
11865 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11866 { shmedia_builtin_p,
11867 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11868 { shmedia_builtin_p,
11869 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11870 { shmedia_builtin_p,
11871 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11872 { shmedia_builtin_p,
11873 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11874 { shmedia_builtin_p,
11875 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11876 { shmedia_builtin_p,
11877 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11878 { shmedia_builtin_p,
11879 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11880 { shmedia_builtin_p,
11881 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11882 { shmedia_builtin_p,
11883 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11884 { shmedia_builtin_p,
11885 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11886 { shmedia_builtin_p,
11887 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11889 { sh1_builtin_p,
11890 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
11891 { sh1_builtin_p,
11892 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
11895 static void
11896 sh_init_builtins (void)
11898 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11899 memset (shared, 0, sizeof shared);
11901 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11903 builtin_description* d = &bdesc[di];
11905 if (!d->is_enabled ())
11906 continue;
11908 tree type, arg_type = NULL_TREE;
11909 int signature = d->signature;
11911 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11912 type = shared[signature];
11913 else
11915 int has_result = signature_args[signature][0] != 0;
11916 tree args[3];
11918 if ((signature_args[signature][1] & 8)
11919 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11920 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11921 continue;
11922 if (! TARGET_FPU_ANY
11923 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11924 continue;
11925 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11926 args[i] = NULL_TREE;
11927 for (int i = 3; ; i--)
11929 int arg = signature_args[signature][i];
11930 int opno = i - 1 + has_result;
11932 if (arg & 8)
11933 arg_type = ptr_type_node;
11934 else if (arg)
11935 arg_type = (*lang_hooks.types.type_for_mode)
11936 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11937 else if (i)
11938 continue;
11939 else
11940 arg_type = void_type_node;
11941 if (i == 0)
11942 break;
11943 args[i-1] = arg_type;
11945 type = build_function_type_list (arg_type, args[0], args[1],
11946 args[2], NULL_TREE);
11947 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11948 shared[signature] = type;
11950 d->fndecl =
11951 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11952 NULL, NULL_TREE);
11956 /* Implements target hook vector_mode_supported_p. */
11957 bool
11958 sh_vector_mode_supported_p (machine_mode mode)
11960 if (TARGET_FPU_ANY
11961 && ((mode == V2SFmode)
11962 || (mode == V4SFmode)
11963 || (mode == V16SFmode)))
11964 return true;
11966 else if (TARGET_SHMEDIA
11967 && ((mode == V8QImode)
11968 || (mode == V2HImode)
11969 || (mode == V4HImode)
11970 || (mode == V2SImode)))
11971 return true;
11973 return false;
11976 bool
11977 sh_frame_pointer_required (void)
11979 /* If needed override this in other tm.h files to cope with various OS
11980 lossage requiring a frame pointer. */
11981 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11982 return true;
11984 if (crtl->profile)
11985 return true;
11987 return false;
11990 /* Implements target hook dwarf_calling_convention. Return an enum
11991 of dwarf_calling_convention. */
11993 sh_dwarf_calling_convention (const_tree func)
11995 if (sh_attr_renesas_p (func))
11996 return DW_CC_GNU_renesas_sh;
11998 return DW_CC_normal;
12001 /* Returns the sh builtin decl for CODE. */
12002 static tree
12003 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
12005 if (code >= ARRAY_SIZE (bdesc))
12006 return error_mark_node;
12008 if (!bdesc[code].is_enabled ())
12009 return error_mark_node;
12011 return bdesc[code].fndecl;
12014 /* Expand an expression EXP that calls a built-in function,
12015 with result going to TARGET if that's convenient
12016 (and in mode MODE if that's convenient).
12017 SUBTARGET may be used as the target for computing one of EXP's operands.
12018 IGNORE is nonzero if the value is to be ignored. */
12019 static rtx
12020 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12021 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12023 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12024 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12025 const struct builtin_description *d = &bdesc[fcode];
12026 enum insn_code icode = d->icode;
12027 int signature = d->signature;
12028 int nop = 0;
12029 rtx op[4];
12031 if (signature_args[signature][0])
12033 if (ignore)
12034 return NULL_RTX;
12036 machine_mode tmode = insn_data[icode].operand[0].mode;
12037 if (! target || GET_MODE (target) != tmode
12038 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12039 target = gen_reg_rtx (tmode);
12040 op[nop++] = target;
12042 else
12043 target = NULL_RTX;
12045 for (int i = 1; i <= 3; i++, nop++)
12047 tree arg;
12048 machine_mode opmode, argmode;
12049 tree optype;
12051 if (! signature_args[signature][i])
12052 break;
12053 arg = CALL_EXPR_ARG (exp, i - 1);
12054 if (arg == error_mark_node)
12055 return const0_rtx;
12056 if (signature_args[signature][i] & 8)
12058 opmode = ptr_mode;
12059 optype = ptr_type_node;
12061 else
12063 opmode = insn_data[icode].operand[nop].mode;
12064 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12066 argmode = TYPE_MODE (TREE_TYPE (arg));
12067 if (argmode != opmode)
12068 arg = build1 (NOP_EXPR, optype, arg);
12069 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12070 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12071 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12074 rtx pat = NULL_RTX;
12076 switch (nop)
12078 case 1:
12079 pat = (*insn_data[d->icode].genfun) (op[0]);
12080 break;
12081 case 2:
12082 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12083 break;
12084 case 3:
12085 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12086 break;
12087 case 4:
12088 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12089 break;
12090 default:
12091 gcc_unreachable ();
12093 if (! pat)
12094 return NULL_RTX;
12095 emit_insn (pat);
12096 return target;
12099 void
12100 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12102 rtx sel0 = const0_rtx;
12103 rtx sel1 = const1_rtx;
12104 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12105 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12107 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12108 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12111 void
12112 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12114 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12116 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12117 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12120 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12121 We can allow any mode in any general register. The special registers
12122 only allow SImode. Don't allow any mode in the PR.
12124 We cannot hold DCmode values in the XD registers because alter_reg
12125 handles subregs of them incorrectly. We could work around this by
12126 spacing the XD registers like the DR registers, but this would require
12127 additional memory in every compilation to hold larger register vectors.
12128 We could hold SFmode / SCmode values in XD registers, but that
12129 would require a tertiary reload when reloading from / to memory,
12130 and a secondary reload to reload from / to general regs; that
12131 seems to be a losing proposition.
12133 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12134 it won't be ferried through GP registers first. */
12135 bool
12136 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
12138 if (SPECIAL_REGISTER_P (regno))
12139 return mode == SImode;
12141 if (regno == FPUL_REG)
12142 return (mode == SImode || mode == SFmode);
12144 if (FP_REGISTER_P (regno) && mode == SFmode)
12145 return true;
12147 if (mode == V2SFmode)
12149 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12150 || GENERAL_REGISTER_P (regno)))
12151 return true;
12152 else
12153 return false;
12156 if (mode == V4SFmode)
12158 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12159 || GENERAL_REGISTER_P (regno))
12160 return true;
12161 else
12162 return false;
12165 if (mode == V16SFmode)
12167 if (TARGET_SHMEDIA)
12169 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12170 return true;
12171 else
12172 return false;
12174 else
12175 return regno == FIRST_XD_REG;
12178 if (FP_REGISTER_P (regno))
12180 if (mode == SFmode
12181 || mode == SImode
12182 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12183 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12184 || mode == DCmode
12185 || (TARGET_SHMEDIA
12186 && (mode == DFmode || mode == DImode
12187 || mode == V2SFmode || mode == TImode)))
12188 && ((regno - FIRST_FP_REG) & 1) == 0)
12189 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12190 && ((regno - FIRST_FP_REG) & 3) == 0))
12191 return true;
12192 else
12193 return false;
12196 if (XD_REGISTER_P (regno))
12197 return mode == DFmode;
12199 if (TARGET_REGISTER_P (regno))
12200 return (mode == DImode || mode == SImode || mode == PDImode);
12202 if (regno == PR_REG)
12203 return mode == SImode;
12205 if (regno == FPSCR_REG)
12206 return mode == SImode;
12208 /* FIXME. This works around PR target/37633 for -O0. */
12209 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12211 unsigned int n = GET_MODE_SIZE (mode) / 8;
12213 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12214 && regno <= FIRST_GENERAL_REG + 14)
12215 return false;
12218 return true;
12221 /* Specify the modes required to caller save a given hard regno.
12222 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
12223 and returns ?Imode for float regs when sh_hard_regno_mode_ok
12224 permits integer modes on them. That makes LRA's split process
12225 unhappy. See PR55212.
12227 machine_mode
12228 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
12229 machine_mode mode)
12231 if (FP_REGISTER_P (regno)
12232 && (mode == SFmode
12233 || mode == SCmode
12234 || ((mode == DFmode || mode == DCmode)
12235 && ((regno - FIRST_FP_REG) & 1) == 0)))
12236 return mode;
12238 return choose_hard_reg_mode (regno, nregs, false);
12241 /* Return the class of registers for which a mode change from FROM to TO
12242 is invalid. */
12243 bool
12244 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
12245 enum reg_class rclass)
12247 /* We want to enable the use of SUBREGs as a means to
12248 VEC_SELECT a single element of a vector. */
12250 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12251 This can be problematic when SFmode vector subregs need to be accessed
12252 on the stack with displacement addressing, as it happens with -O0.
12253 Thus we disallow the mode change for -O0. */
12254 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12255 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12257 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12259 if (TARGET_LITTLE_ENDIAN)
12261 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12262 return reg_classes_intersect_p (DF_REGS, rclass);
12264 else
12266 if (GET_MODE_SIZE (from) < 8)
12267 return reg_classes_intersect_p (DF_REGS, rclass);
12270 return false;
12273 /* Return true if registers in machine mode MODE will likely be
12274 allocated to registers in small register classes. */
12275 bool
12276 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
12278 return (! TARGET_SHMEDIA);
12281 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12282 that label is used. */
12283 void
12284 sh_mark_label (rtx address, int nuses)
12286 if (GOTOFF_P (address))
12288 /* Extract the label or symbol. */
12289 address = XEXP (address, 0);
12290 if (GET_CODE (address) == PLUS)
12291 address = XEXP (address, 0);
12292 address = XVECEXP (address, 0, 0);
12294 if (GET_CODE (address) == LABEL_REF
12295 && LABEL_P (XEXP (address, 0)))
12296 LABEL_NUSES (XEXP (address, 0)) += nuses;
12299 /* Compute extra cost of moving data between one register class
12300 and another.
12302 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12303 uses this information. Hence, the general register <-> floating point
12304 register information here is not used for SFmode. */
12305 static int
12306 sh_register_move_cost (machine_mode mode,
12307 reg_class_t srcclass, reg_class_t dstclass)
12309 if (dstclass == T_REGS || dstclass == PR_REGS)
12310 return 10;
12312 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12313 return 4;
12315 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12316 && REGCLASS_HAS_FP_REG (srcclass)
12317 && REGCLASS_HAS_FP_REG (dstclass))
12318 return 4;
12320 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12321 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12323 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12324 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12325 return 9;
12327 if ((REGCLASS_HAS_FP_REG (dstclass)
12328 && REGCLASS_HAS_GENERAL_REG (srcclass))
12329 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12330 && REGCLASS_HAS_FP_REG (srcclass)))
12332 /* Discourage trying to use fp regs for a pointer. This also
12333 discourages fp regs with SImode because Pmode is an alias
12334 of SImode on this target. See PR target/48596. */
12335 int addend = (mode == Pmode) ? 40 : 0;
12337 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12338 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12341 if ((dstclass == FPUL_REGS
12342 && REGCLASS_HAS_GENERAL_REG (srcclass))
12343 || (srcclass == FPUL_REGS
12344 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12345 return 5;
12347 if ((dstclass == FPUL_REGS
12348 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12349 || (srcclass == FPUL_REGS
12350 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12351 return 7;
12353 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12354 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12355 return 20;
12357 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12358 if (TARGET_SHMEDIA
12359 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12361 if (sh_gettrcost >= 0)
12362 return sh_gettrcost;
12363 else if (!TARGET_PT_FIXED)
12364 return 100;
12367 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12368 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12369 return 4;
12371 if (TARGET_SHMEDIA
12372 || (TARGET_FMOVD
12373 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12374 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12375 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12377 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12380 static rtx
12381 emit_load_ptr (rtx reg, rtx addr)
12383 rtx mem = gen_const_mem (ptr_mode, addr);
12385 if (Pmode != ptr_mode)
12386 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12387 return emit_move_insn (reg, mem);
12390 static void
12391 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12392 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12393 tree function)
12395 CUMULATIVE_ARGS cum;
12396 int structure_value_byref = 0;
12397 rtx this_rtx, this_value, sibcall, funexp;
12398 rtx_insn *insns;
12399 tree funtype = TREE_TYPE (function);
12400 int simple_add = CONST_OK_FOR_ADD (delta);
12401 int did_load = 0;
12402 rtx scratch0, scratch1, scratch2;
12403 unsigned i;
12405 reload_completed = 1;
12406 epilogue_completed = 1;
12407 crtl->uses_only_leaf_regs = 1;
12409 emit_note (NOTE_INSN_PROLOGUE_END);
12411 /* Find the "this" pointer. We have such a wide range of ABIs for the
12412 SH that it's best to do this completely machine independently.
12413 "this" is passed as first argument, unless a structure return pointer
12414 comes first, in which case "this" comes second. */
12415 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12416 #ifndef PCC_STATIC_STRUCT_RETURN
12417 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12418 structure_value_byref = 1;
12419 #endif /* not PCC_STATIC_STRUCT_RETURN */
12420 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12422 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12424 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12426 this_rtx
12427 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12429 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12430 static chain pointer (even if you can't have nested virtual functions
12431 right now, someone might implement them sometime), and the rest of the
12432 registers are used for argument passing, are callee-saved, or reserved. */
12433 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12434 -ffixed-reg has been used. */
12435 if (! call_used_regs[0] || fixed_regs[0])
12436 error ("r0 needs to be available as a call-clobbered register");
12437 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12438 if (! TARGET_SH5)
12440 if (call_used_regs[1] && ! fixed_regs[1])
12441 scratch1 = gen_rtx_REG (ptr_mode, 1);
12442 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12443 pointing where to return struct values. */
12444 if (call_used_regs[3] && ! fixed_regs[3])
12445 scratch2 = gen_rtx_REG (Pmode, 3);
12447 else if (TARGET_SHMEDIA)
12449 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12450 if (i != REGNO (scratch0) &&
12451 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12453 scratch1 = gen_rtx_REG (ptr_mode, i);
12454 break;
12456 if (scratch1 == scratch0)
12457 error ("need a second call-clobbered general purpose register");
12458 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12459 if (call_used_regs[i] && ! fixed_regs[i])
12461 scratch2 = gen_rtx_REG (Pmode, i);
12462 break;
12464 if (scratch2 == scratch0)
12465 error ("need a call-clobbered target register");
12468 this_value = plus_constant (Pmode, this_rtx, delta);
12469 if (vcall_offset
12470 && (simple_add || scratch0 != scratch1)
12471 && strict_memory_address_p (ptr_mode, this_value))
12473 emit_load_ptr (scratch0, this_value);
12474 did_load = 1;
12477 if (!delta)
12478 ; /* Do nothing. */
12479 else if (simple_add)
12480 emit_move_insn (this_rtx, this_value);
12481 else
12483 emit_move_insn (scratch1, GEN_INT (delta));
12484 emit_insn (gen_add2_insn (this_rtx, scratch1));
12487 if (vcall_offset)
12489 rtx offset_addr;
12491 if (!did_load)
12492 emit_load_ptr (scratch0, this_rtx);
12494 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12495 if (strict_memory_address_p (ptr_mode, offset_addr))
12496 ; /* Do nothing. */
12497 else if (! TARGET_SH5 && scratch0 != scratch1)
12499 /* scratch0 != scratch1, and we have indexed loads. Get better
12500 schedule by loading the offset into r1 and using an indexed
12501 load - then the load of r1 can issue before the load from
12502 (this_rtx + delta) finishes. */
12503 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12504 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12506 else if (CONST_OK_FOR_ADD (vcall_offset))
12508 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12509 offset_addr = scratch0;
12511 else if (scratch0 != scratch1)
12513 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12514 emit_insn (gen_add2_insn (scratch0, scratch1));
12515 offset_addr = scratch0;
12517 else
12518 gcc_unreachable (); /* FIXME */
12519 emit_load_ptr (scratch0, offset_addr);
12521 if (Pmode != ptr_mode)
12522 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12523 emit_insn (gen_add2_insn (this_rtx, scratch0));
12526 /* Generate a tail call to the target function. */
12527 if (! TREE_USED (function))
12529 assemble_external (function);
12530 TREE_USED (function) = 1;
12532 funexp = XEXP (DECL_RTL (function), 0);
12533 /* If the function is overridden, so is the thunk, hence we don't
12534 need GOT addressing even if this is a public symbol. */
12535 #if 0
12536 if (TARGET_SH1 && ! flag_weak)
12537 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12538 else
12539 #endif
12540 if (TARGET_SH2 && flag_pic)
12542 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12543 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12545 else
12547 if (TARGET_SHMEDIA && flag_pic)
12549 funexp = gen_sym2PIC (funexp);
12550 PUT_MODE (funexp, Pmode);
12552 emit_move_insn (scratch2, funexp);
12553 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12554 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12556 sibcall = emit_call_insn (sibcall);
12557 SIBLING_CALL_P (sibcall) = 1;
12558 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12559 emit_barrier ();
12561 /* Run just enough of rest_of_compilation to do scheduling and get
12562 the insns emitted. Note that use_thunk calls
12563 assemble_start_function and assemble_end_function. */
12565 insns = get_insns ();
12567 if (optimize > 0)
12569 if (! cfun->cfg)
12570 init_flow (cfun);
12571 split_all_insns_noflow ();
12574 sh_reorg ();
12575 shorten_branches (insns);
12576 final_start_function (insns, file, 1);
12577 final (insns, file, 1);
12578 final_end_function ();
12580 reload_completed = 0;
12581 epilogue_completed = 0;
12585 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12587 rtx sym;
12589 /* If this is not an ordinary function, the name usually comes from a
12590 string literal or an sprintf buffer. Make sure we use the same
12591 string consistently, so that cse will be able to unify address loads. */
12592 if (kind != FUNCTION_ORDINARY)
12593 name = IDENTIFIER_POINTER (get_identifier (name));
12594 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12595 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12596 if (flag_pic)
12597 switch (kind)
12599 case FUNCTION_ORDINARY:
12600 break;
12601 case SFUNC_GOT:
12603 rtx reg = target ? target : gen_reg_rtx (Pmode);
12605 emit_insn (gen_symGOT2reg (reg, sym));
12606 sym = reg;
12607 break;
12609 case SFUNC_STATIC:
12611 /* ??? To allow cse to work, we use GOTOFF relocations.
12612 We could add combiner patterns to transform this into
12613 straight pc-relative calls with sym2PIC / bsrf when
12614 label load and function call are still 1:1 and in the
12615 same basic block during combine. */
12616 rtx reg = target ? target : gen_reg_rtx (Pmode);
12618 emit_insn (gen_symGOTOFF2reg (reg, sym));
12619 sym = reg;
12620 break;
12623 if (target && sym != target)
12625 emit_move_insn (target, sym);
12626 return target;
12628 return sym;
12631 /* Find the number of a general purpose register in S. */
12632 static int
12633 scavenge_reg (HARD_REG_SET *s)
12635 int r;
12636 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12637 if (TEST_HARD_REG_BIT (*s, r))
12638 return r;
12639 return -1;
12643 sh_get_pr_initial_val (void)
12645 rtx val;
12647 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12648 PR register on SHcompact, because it might be clobbered by the prologue.
12649 We check first if that is known to be the case. */
12650 if (TARGET_SHCOMPACT
12651 && ((crtl->args.info.call_cookie
12652 & ~ CALL_COOKIE_RET_TRAMP (1))
12653 || crtl->saves_all_registers))
12654 return gen_frame_mem (SImode, return_address_pointer_rtx);
12656 /* If we haven't finished rtl generation, there might be a nonlocal label
12657 that we haven't seen yet.
12658 ??? get_hard_reg_initial_val fails if it is called after register
12659 allocation has started, unless it has been called before for the
12660 same register. And even then, we end in trouble if we didn't use
12661 the register in the same basic block before. So call
12662 get_hard_reg_initial_val now and wrap it in an unspec if we might
12663 need to replace it. */
12664 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12665 combine can put the pseudo returned by get_hard_reg_initial_val into
12666 instructions that need a general purpose registers, which will fail to
12667 be recognized when the pseudo becomes allocated to PR. */
12669 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12670 if (TARGET_SH1)
12671 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12672 return val;
12675 bool
12676 sh_expand_t_scc (rtx operands[])
12678 enum rtx_code code = GET_CODE (operands[1]);
12679 rtx target = operands[0];
12680 rtx op0 = operands[2];
12681 rtx op1 = operands[3];
12682 rtx result = target;
12683 HOST_WIDE_INT val;
12685 if (!REG_P (op0) || REGNO (op0) != T_REG
12686 || !CONST_INT_P (op1))
12687 return false;
12688 if (!REG_P (result))
12689 result = gen_reg_rtx (SImode);
12690 val = INTVAL (op1);
12691 if ((code == EQ && val == 1) || (code == NE && val == 0))
12692 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12693 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12694 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12695 else if (code == EQ || code == NE)
12696 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12697 else
12698 return false;
12699 if (result != target)
12700 emit_move_insn (target, result);
12701 return true;
12704 /* INSN is an sfunc; return the rtx that describes the address used. */
12705 static rtx
12706 extract_sfunc_addr (rtx insn)
12708 rtx pattern, part = NULL_RTX;
12709 int len, i;
12711 pattern = PATTERN (insn);
12712 len = XVECLEN (pattern, 0);
12713 for (i = 0; i < len; i++)
12715 part = XVECEXP (pattern, 0, i);
12716 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12717 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12718 return XEXP (part, 0);
12720 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12721 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12724 /* Verify that the register in use_sfunc_addr still agrees with the address
12725 used in the sfunc. This prevents fill_slots_from_thread from changing
12726 use_sfunc_addr.
12727 INSN is the use_sfunc_addr instruction, and REG is the register it
12728 guards. */
12729 bool
12730 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12732 /* Search for the sfunc. It should really come right after INSN. */
12733 while ((insn = NEXT_INSN (insn)))
12735 if (LABEL_P (insn) || JUMP_P (insn))
12736 break;
12737 if (! INSN_P (insn))
12738 continue;
12740 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12741 insn = seq->insn (0);
12742 if (GET_CODE (PATTERN (insn)) != PARALLEL
12743 || get_attr_type (insn) != TYPE_SFUNC)
12744 continue;
12745 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12747 gcc_unreachable ();
12750 /* This function returns a constant rtx that represents 2**15 / pi in
12751 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12752 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12753 static GTY(()) rtx sh_fsca_sf2int_rtx;
12756 sh_fsca_sf2int (void)
12758 if (! sh_fsca_sf2int_rtx)
12760 REAL_VALUE_TYPE rv;
12762 real_from_string (&rv, "10430.378350470453");
12763 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12766 return sh_fsca_sf2int_rtx;
12769 /* This function returns a constant rtx that represents pi / 2**15 in
12770 SFmode. It's used to scale SFmode angles, in radians, to a
12771 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12772 maps to 0x10000. */
12773 static GTY(()) rtx sh_fsca_int2sf_rtx;
12776 sh_fsca_int2sf (void)
12778 if (! sh_fsca_int2sf_rtx)
12780 REAL_VALUE_TYPE rv;
12782 real_from_string (&rv, "9.587379924285257e-5");
12783 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12786 return sh_fsca_int2sf_rtx;
12789 /* Initialize the CUMULATIVE_ARGS structure. */
12790 void
12791 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12792 tree fntype,
12793 rtx libname ATTRIBUTE_UNUSED,
12794 tree fndecl,
12795 signed int n_named_args,
12796 machine_mode mode)
12798 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12799 pcum->free_single_fp_reg = 0;
12800 pcum->stack_regs = 0;
12801 pcum->byref_regs = 0;
12802 pcum->byref = 0;
12803 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12805 /* XXX - Should we check TARGET_HITACHI here ??? */
12806 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12808 if (fntype)
12810 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12811 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12812 pcum->prototype_p = prototype_p (fntype);
12813 pcum->arg_count [(int) SH_ARG_INT]
12814 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12816 pcum->call_cookie
12817 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12818 && pcum->arg_count [(int) SH_ARG_INT] == 0
12819 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12820 ? int_size_in_bytes (TREE_TYPE (fntype))
12821 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12822 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12823 == FIRST_RET_REG));
12825 else
12827 pcum->arg_count [(int) SH_ARG_INT] = 0;
12828 pcum->prototype_p = FALSE;
12829 if (mode != VOIDmode)
12831 pcum->call_cookie =
12832 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12833 && GET_MODE_SIZE (mode) > 4
12834 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12836 /* If the default ABI is the Renesas ABI then all library
12837 calls must assume that the library will be using the
12838 Renesas ABI. So if the function would return its result
12839 in memory then we must force the address of this memory
12840 block onto the stack. Ideally we would like to call
12841 targetm.calls.return_in_memory() here but we do not have
12842 the TYPE or the FNDECL available so we synthesize the
12843 contents of that function as best we can. */
12844 pcum->force_mem =
12845 (TARGET_DEFAULT & MASK_HITACHI)
12846 && (mode == BLKmode
12847 || (GET_MODE_SIZE (mode) > 4
12848 && !(mode == DFmode
12849 && TARGET_FPU_DOUBLE)));
12851 else
12853 pcum->call_cookie = 0;
12854 pcum->force_mem = FALSE;
12859 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12860 not enter into CONST_DOUBLE for the replace.
12862 Note that copying is not done so X must not be shared unless all copies
12863 are to be modified.
12865 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12866 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12867 replacements[n*2+1] - and that we take mode changes into account.
12869 If a replacement is ambiguous, return NULL_RTX.
12871 If MODIFY is zero, don't modify any rtl in place,
12872 just return zero or nonzero for failure / success. */
12874 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12876 int i, j;
12877 const char *fmt;
12879 /* The following prevents loops occurrence when we change MEM in
12880 CONST_DOUBLE onto the same CONST_DOUBLE. */
12881 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12882 return x;
12884 for (i = n_replacements - 1; i >= 0 ; i--)
12885 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12886 return replacements[i*2+1];
12888 /* Allow this function to make replacements in EXPR_LISTs. */
12889 if (x == NULL_RTX)
12890 return NULL_RTX;
12892 if (GET_CODE (x) == SUBREG)
12894 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12895 n_replacements, modify);
12897 if (CONST_INT_P (new_rtx))
12899 x = simplify_subreg (GET_MODE (x), new_rtx,
12900 GET_MODE (SUBREG_REG (x)),
12901 SUBREG_BYTE (x));
12902 if (! x)
12903 abort ();
12905 else if (modify)
12906 SUBREG_REG (x) = new_rtx;
12908 return x;
12910 else if (REG_P (x))
12912 unsigned regno = REGNO (x);
12913 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12914 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12915 rtx result = NULL_RTX;
12917 for (i = n_replacements - 1; i >= 0; i--)
12919 rtx from = replacements[i*2];
12920 rtx to = replacements[i*2+1];
12921 unsigned from_regno, from_nregs, to_regno, new_regno;
12923 if (!REG_P (from))
12924 continue;
12925 from_regno = REGNO (from);
12926 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12927 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12928 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12930 if (regno < from_regno
12931 || regno + nregs > from_regno + nregs
12932 || !REG_P (to)
12933 || result)
12934 return NULL_RTX;
12935 to_regno = REGNO (to);
12936 if (to_regno < FIRST_PSEUDO_REGISTER)
12938 new_regno = regno + to_regno - from_regno;
12939 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12940 != nregs)
12941 return NULL_RTX;
12942 result = gen_rtx_REG (GET_MODE (x), new_regno);
12944 else if (GET_MODE (x) <= GET_MODE (to))
12945 result = gen_lowpart_common (GET_MODE (x), to);
12946 else
12947 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12950 return result ? result : x;
12952 else if (GET_CODE (x) == ZERO_EXTEND)
12954 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12955 n_replacements, modify);
12957 if (CONST_INT_P (new_rtx))
12959 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12960 new_rtx, GET_MODE (XEXP (x, 0)));
12961 if (! x)
12962 abort ();
12964 else if (modify)
12965 XEXP (x, 0) = new_rtx;
12967 return x;
12970 fmt = GET_RTX_FORMAT (GET_CODE (x));
12971 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12973 rtx new_rtx;
12975 if (fmt[i] == 'e')
12977 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12978 n_replacements, modify);
12979 if (!new_rtx)
12980 return NULL_RTX;
12981 if (modify)
12982 XEXP (x, i) = new_rtx;
12984 else if (fmt[i] == 'E')
12985 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12987 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12988 n_replacements, modify);
12989 if (!new_rtx)
12990 return NULL_RTX;
12991 if (modify)
12992 XVECEXP (x, i, j) = new_rtx;
12996 return x;
13000 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
13002 enum rtx_code code = TRUNCATE;
13004 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
13006 rtx inner = XEXP (x, 0);
13007 machine_mode inner_mode = GET_MODE (inner);
13009 if (inner_mode == mode)
13010 return inner;
13011 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
13012 x = inner;
13013 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
13014 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
13016 code = GET_CODE (x);
13017 x = inner;
13020 return gen_rtx_fmt_e (code, mode, x);
13023 /* Look through X cleaning up truncates of registers that span multiple
13024 actual hard registers. Return the number of changes made. */
13026 shmedia_cleanup_truncate (rtx x)
13028 int n_changes = 0;
13029 subrtx_var_iterator::array_type array;
13030 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
13032 rtx x = *iter;
13033 if (GET_CODE (x) == TRUNCATE)
13035 rtx reg = XEXP (x, 0);
13036 machine_mode reg_mode = GET_MODE (reg);
13037 if (REG_P (reg) && GET_MODE_SIZE (reg_mode) > 8)
13039 int offset = subreg_lowpart_offset (DImode, reg_mode);
13040 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, offset);
13041 n_changes += 1;
13042 iter.skip_subrtxes ();
13046 return n_changes;
13049 /* Load and store depend on the highpart of the address. However,
13050 set_attr_alternative does not give well-defined results before reload,
13051 so we must look at the rtl ourselves to see if any of the feeding
13052 registers is used in a memref.
13054 Return true iff INSN contains a MEM. */
13055 bool
13056 sh_contains_memref_p (rtx insn)
13058 subrtx_iterator::array_type array;
13059 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13060 if (MEM_P (*iter))
13061 return true;
13062 return false;
13065 /* Return true iff INSN loads a banked register. */
13066 bool
13067 sh_loads_bankedreg_p (rtx insn)
13069 if (GET_CODE (PATTERN (insn)) == SET)
13071 rtx op = SET_DEST (PATTERN(insn));
13072 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13073 return true;
13076 return false;
13079 /* FNADDR is the MEM expression from a call expander. Return an address
13080 to use in an SHmedia insn pattern. */
13082 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13084 int is_sym;
13086 fnaddr = XEXP (fnaddr, 0);
13087 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13088 if (flag_pic && is_sym)
13090 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13092 rtx reg = gen_reg_rtx (Pmode);
13094 /* We must not use GOTPLT for sibcalls, because PIC_REG
13095 must be restored before the PLT code gets to run. */
13096 if (is_sibcall)
13097 emit_insn (gen_symGOT2reg (reg, fnaddr));
13098 else
13099 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13100 fnaddr = reg;
13102 else
13104 fnaddr = gen_sym2PIC (fnaddr);
13105 PUT_MODE (fnaddr, Pmode);
13108 /* If ptabs might trap, make this visible to the rest of the compiler.
13109 We generally assume that symbols pertain to valid locations, but
13110 it is possible to generate invalid symbols with asm or linker tricks.
13111 In a list of functions where each returns its successor, an invalid
13112 symbol might denote an empty list. */
13113 if (!TARGET_PT_FIXED
13114 && (!is_sym || TARGET_INVALID_SYMBOLS)
13115 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13117 rtx tr = gen_reg_rtx (PDImode);
13119 emit_insn (gen_ptabs (tr, fnaddr));
13120 fnaddr = tr;
13122 else if (! target_reg_operand (fnaddr, Pmode))
13123 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13124 return fnaddr;
13127 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13128 static reg_class_t
13129 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13131 if (rclass == NO_REGS
13132 && TARGET_SHMEDIA
13133 && (CONST_DOUBLE_P (x)
13134 || GET_CODE (x) == SYMBOL_REF
13135 || PIC_ADDR_P (x)))
13136 return GENERAL_REGS;
13138 return rclass;
13141 /* Implement TARGET_SECONDARY_RELOAD. */
13142 static reg_class_t
13143 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13144 machine_mode mode, secondary_reload_info *sri)
13146 enum reg_class rclass = (enum reg_class) rclass_i;
13148 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13149 && REG_P (XEXP (XEXP (x, 0), 0))
13150 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13151 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13153 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13154 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13156 if (REG_P (x) && REGNO (x) == GBR_REG)
13157 return NO_REGS;
13159 if (in_p)
13161 if (REGCLASS_HAS_FP_REG (rclass)
13162 && ! TARGET_SHMEDIA
13163 && immediate_operand ((x), mode)
13164 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
13165 switch (mode)
13167 case SFmode:
13168 sri->icode = CODE_FOR_reload_insf__frn;
13169 return NO_REGS;
13170 case DFmode:
13171 sri->icode = CODE_FOR_reload_indf__frn;
13172 return NO_REGS;
13173 case SImode:
13174 /* ??? If we knew that we are in the appropriate mode -
13175 single precision - we could use a reload pattern directly. */
13176 return FPUL_REGS;
13177 default:
13178 abort ();
13180 if (rclass == FPUL_REGS
13181 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13182 || REGNO (x) == T_REG))
13183 || GET_CODE (x) == PLUS))
13184 return GENERAL_REGS;
13185 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13187 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13188 return GENERAL_REGS;
13189 else if (mode == SFmode)
13190 return FP_REGS;
13191 sri->icode = CODE_FOR_reload_insi__i_fpul;
13192 return NO_REGS;
13194 if (rclass == FPSCR_REGS
13195 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13196 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13197 return GENERAL_REGS;
13198 if (REGCLASS_HAS_FP_REG (rclass)
13199 && TARGET_SHMEDIA
13200 && immediate_operand (x, mode)
13201 && x != CONST0_RTX (GET_MODE (x))
13202 && GET_MODE (x) != V4SFmode)
13203 return GENERAL_REGS;
13204 if ((mode == QImode || mode == HImode)
13205 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13207 sri->icode = ((mode == QImode)
13208 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13209 return NO_REGS;
13211 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13212 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13213 return TARGET_REGS;
13214 } /* end of input-only processing. */
13216 if (((REGCLASS_HAS_FP_REG (rclass)
13217 && (REG_P (x)
13218 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13219 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13220 && TARGET_FMOVD))))
13221 || (REGCLASS_HAS_GENERAL_REG (rclass)
13222 && REG_P (x)
13223 && FP_REGISTER_P (REGNO (x))))
13224 && ! TARGET_SHMEDIA
13225 && (mode == SFmode || mode == SImode))
13226 return FPUL_REGS;
13227 if ((rclass == FPUL_REGS
13228 || (REGCLASS_HAS_FP_REG (rclass)
13229 && ! TARGET_SHMEDIA && mode == SImode))
13230 && (MEM_P (x)
13231 || (REG_P (x)
13232 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13233 || REGNO (x) == T_REG
13234 || system_reg_operand (x, VOIDmode)))))
13236 if (rclass == FPUL_REGS)
13237 return GENERAL_REGS;
13238 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
13240 if ((rclass == TARGET_REGS
13241 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13242 && !satisfies_constraint_Csy (x)
13243 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13244 return GENERAL_REGS;
13245 if ((rclass == MAC_REGS || rclass == PR_REGS)
13246 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13247 && rclass != REGNO_REG_CLASS (REGNO (x)))
13248 return GENERAL_REGS;
13249 if (rclass != GENERAL_REGS && REG_P (x)
13250 && TARGET_REGISTER_P (REGNO (x)))
13251 return GENERAL_REGS;
13253 /* If here fall back to loading FPUL register through general registers.
13254 This case can happen when movsi_ie insn is picked initially to
13255 load/store the FPUL register from/to another register, and then the
13256 other register is allocated on the stack. */
13257 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13258 return GENERAL_REGS;
13260 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13261 the other operand.
13262 On SH2A could also just leave it alone here, which would result in a
13263 4 byte move insn being generated instead. However, for this to work
13264 the insns must have the appropriate alternatives. */
13265 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13266 && satisfies_constraint_Sdd (x)
13267 && sh_disp_addr_displacement (x)
13268 <= sh_max_mov_insn_displacement (mode, false))
13269 return R0_REGS;
13271 /* When reload is trying to address a QImode or HImode subreg on the stack,
13272 force any subreg byte into R0_REGS, as this is going to become a
13273 displacement address.
13274 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13275 is on the stack, the memref to it might already require a displacement
13276 and that has to be added to the final address. At this point we don't
13277 know the cumulative displacement so we assume the worst case. */
13278 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13279 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13280 return R0_REGS;
13282 return NO_REGS;
13285 /* Return true if SUBST can't safely replace its equivalent during RA. */
13286 static bool
13287 sh_cannot_substitute_mem_equiv_p (rtx)
13289 if (TARGET_SHMEDIA)
13290 return false;
13292 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
13293 uses R0 and may cause spill failure when R0 is already used.
13294 We have to return true for that case at least.
13295 Moreover SH has strong R0 parity and also have not enough numbers of
13296 the hard registers to make the equiv substitution win in the size
13297 and the speed on average working sets. The pseudos produced to
13298 hold the equiv values can't get good hard registers for bad cases
13299 and end up memory save/restore insns which make the code worse. */
13300 return true;
13303 /* Return true if DISP can be legitimized. */
13304 static bool
13305 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
13306 machine_mode mode)
13308 if (TARGET_SHMEDIA)
13309 return false;
13311 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
13312 || (TARGET_SH2E && mode == SFmode))
13313 return false;
13315 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
13316 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
13318 *disp = adj.mov_disp;
13319 *offs = adj.offset_adjust;
13320 return true;
13323 return false;
13326 /* Return true if movsf insn should be splited with an additional
13327 register. */
13328 bool
13329 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
13331 /* op0 == op1 */
13332 if (rtx_equal_p (op0, op1))
13333 return true;
13334 /* fy, FQ, reg */
13335 if (GET_CODE (op1) == CONST_DOUBLE
13336 && ! satisfies_constraint_G (op1)
13337 && ! satisfies_constraint_H (op1)
13338 && REG_P (op0)
13339 && REG_P (op2))
13340 return true;
13341 /* f, r, y */
13342 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
13343 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
13344 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13345 return true;
13346 /* r, f, y */
13347 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
13348 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
13349 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13350 return true;
13352 return false;
13355 static void
13356 sh_conditional_register_usage (void)
13358 int regno;
13359 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13360 if (! VALID_REGISTER_P (regno))
13361 fixed_regs[regno] = call_used_regs[regno] = 1;
13362 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13363 if (TARGET_SH5)
13365 call_used_regs[FIRST_GENERAL_REG + 8]
13366 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13367 call_really_used_regs[FIRST_GENERAL_REG + 8]
13368 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13370 if (TARGET_SHMEDIA)
13372 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13373 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13374 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13376 if (flag_pic)
13378 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13379 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13381 /* Renesas saves and restores mac registers on call. */
13382 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13384 call_really_used_regs[MACH_REG] = 0;
13385 call_really_used_regs[MACL_REG] = 0;
13388 if (TARGET_SHMEDIA)
13390 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13391 if (! fixed_regs[regno] && call_really_used_regs[regno])
13392 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13394 else
13395 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13396 if (! fixed_regs[regno] && call_really_used_regs[regno])
13397 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13399 call_really_used_regs[FPSCR_MODES_REG] = 0;
13400 call_really_used_regs[FPSCR_STAT_REG] = 0;
13403 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13405 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13406 static bool
13407 sh_legitimate_constant_p (machine_mode mode, rtx x)
13409 return (TARGET_SHMEDIA
13410 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13411 || x == CONST0_RTX (mode)
13412 || !TARGET_SHMEDIA_FPU
13413 || TARGET_SHMEDIA64)
13414 : (GET_CODE (x) != CONST_DOUBLE
13415 || mode == DFmode || mode == SFmode
13416 || mode == DImode || GET_MODE (x) == VOIDmode));
13419 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13421 static void
13422 sh_init_sync_libfuncs (void)
13424 init_sync_libfuncs (UNITS_PER_WORD);
13427 /* Return true if it is appropriate to emit `ret' instructions in the
13428 body of a function. */
13429 bool
13430 sh_can_use_simple_return_p (void)
13432 HARD_REG_SET live_regs_mask;
13433 int d;
13435 /* Some targets require special return insns. */
13436 if (TARGET_SHMEDIA
13437 || (TARGET_SHCOMPACT
13438 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13439 return false;
13441 if (! reload_completed || frame_pointer_needed)
13442 return false;
13444 /* Moving prologue around does't reduce the size. */
13445 if (optimize_function_for_size_p (cfun))
13446 return false;
13448 /* Finally, allow for pr save. */
13449 d = calc_live_regs (&live_regs_mask);
13451 if (rounded_frame_size (d) > 4)
13452 return false;
13454 return true;
13457 /*------------------------------------------------------------------------------
13458 Address mode optimization support code
13461 typedef HOST_WIDE_INT disp_t;
13462 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13463 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13464 static const disp_t INVALID_DISP = MAX_DISP;
13466 /* A memory reference which is described by a base register and a
13467 displacement. */
13468 class base_reg_disp
13470 public:
13471 base_reg_disp (rtx br, disp_t d);
13473 bool is_reg (void) const;
13474 bool is_disp (void) const;
13475 rtx reg (void) const;
13476 disp_t disp (void) const;
13478 private:
13479 rtx reg_;
13480 disp_t disp_;
13483 inline
13484 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13485 : reg_ (br), disp_ (d)
13489 inline bool
13490 base_reg_disp::is_reg (void) const
13492 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13495 inline bool
13496 base_reg_disp::is_disp (void) const
13498 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13501 inline rtx
13502 base_reg_disp::reg (void) const
13504 return reg_;
13507 inline disp_t
13508 base_reg_disp::disp (void) const
13510 return disp_;
13513 /* Find the base register and calculate the displacement for a given
13514 address rtx 'x'. */
13515 static base_reg_disp
13516 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
13517 rtx base_reg = NULL)
13519 if (REG_P (x))
13521 if (REGNO (x) == GBR_REG)
13522 return base_reg_disp (x, disp);
13524 /* We've reached a hard-reg. This is probably the point where
13525 function args are copied to pseudos. Do not go any further and
13526 stick to the pseudo. If the original mem addr was in a hard reg
13527 from the beginning, it will become the base reg. */
13528 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13529 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13531 /* Find the def of the reg and trace it. If there are more than one
13532 defs and they are not the same, assume it's not safe to proceed. */
13533 rtx_insn* last_i = NULL;
13534 rtx last_set = NULL;
13535 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
13536 d = DF_REF_NEXT_REG (d))
13538 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
13540 /* Accept multiple defs, as long as they are equal. */
13541 if (last_set == NULL || rtx_equal_p (last_set, set))
13543 last_i = DF_REF_INSN (d);
13544 last_set = set;
13546 else
13548 last_i = NULL;
13549 last_set = NULL;
13550 break;
13554 if (last_set != NULL && last_i != NULL)
13555 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
13556 XEXP (last_set, 0));
13558 /* When here, no previous insn was found that sets the reg.
13559 The input reg is already the base reg. */
13560 return base_reg_disp (x, disp);
13563 else if (GET_CODE (x) == PLUS)
13565 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13566 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13568 /* Either left or right val must be a reg.
13569 We don't handle the case of 'reg + reg' here. */
13570 if (left_val.is_reg () && right_val.is_disp ())
13571 return base_reg_disp (left_val.reg (), left_val.disp ()
13572 + right_val.disp () + disp);
13573 else if (right_val.is_reg () && left_val.is_disp ())
13574 return base_reg_disp (right_val.reg (), right_val.disp ()
13575 + left_val.disp () + disp);
13576 else
13577 return base_reg_disp (base_reg, disp);
13580 else if (CONST_INT_P (x))
13581 return base_reg_disp (NULL, disp + INTVAL (x));
13583 /* Didn't find anything useful. */
13584 return base_reg_disp (base_reg, disp);
13587 /* Given an insn and a memory operand, try to find an equivalent GBR
13588 based memory address and return the corresponding new memory address.
13589 Return NULL_RTX if not found. */
13591 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
13593 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
13594 return NULL_RTX;
13596 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13597 if (side_effects_p (XEXP (mem, 0)))
13598 return NULL_RTX;
13600 /* When not optimizing there might be no dataflow available. */
13601 if (df == NULL)
13602 return NULL_RTX;
13604 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13606 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13608 /* If GBR is marked as call clobbered we bail out if we see a call.
13609 FIXME: Actually should check if this mem refers to the gbr value
13610 before or after the call. If there is a store_gbr preceeding this
13611 mem, it's safe to use GBR for this mem.
13613 If GBR is not marked as call clobbered, but there is some other
13614 def than a call, it's probably a load_gbr upon which we also
13615 bail out to be on the safe side.
13616 FIXME: Should check if we have a use-after-def case, such as
13617 the call case above. */
13618 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
13619 d = DF_REF_NEXT_REG (d))
13621 if (CALL_P (DF_REF_INSN (d)))
13623 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
13624 return NULL_RTX;
13625 else
13626 continue;
13628 else
13629 return NULL_RTX;
13632 rtx disp = GEN_INT (gbr_disp.disp ());
13633 if (gbr_displacement (disp, GET_MODE (mem)))
13634 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13637 return NULL_RTX;
13640 /*------------------------------------------------------------------------------
13641 Manual insn combine support code.
13644 /* Return true if the specified insn contains any UNSPECs or
13645 UNSPEC_VOLATILEs. */
13646 static bool
13647 sh_unspec_insn_p (rtx_insn* insn)
13649 bool result = false;
13651 struct note_uses_func
13653 static void
13654 func (rtx* x, void* data)
13656 if (GET_CODE (*x) == UNSPEC || GET_CODE (*x) == UNSPEC_VOLATILE)
13657 *(static_cast<bool*> (data)) = true;
13661 note_uses (&PATTERN (insn), note_uses_func::func, &result);
13662 return result;
13665 /* Return true if the register operands of the specified insn are modified
13666 between the specified from and to insns (exclusive of those two). */
13667 static bool
13668 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
13669 const rtx_insn* from,
13670 const rtx_insn* to)
13672 /* FIXME: Return true for multiple sets for now. */
13673 rtx s = single_set (operands_insn);
13674 if (s == NULL_RTX)
13675 return true;
13677 subrtx_iterator::array_type array;
13678 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
13679 if ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to))
13680 return true;
13682 return false;
13685 /* Given an op rtx and an insn, try to find out whether the result of the
13686 specified op consists only of logical operations on T bit stores. */
13687 bool
13688 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
13690 if (!logical_operator (op, SImode))
13691 return false;
13693 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13694 int op_is_t_count = 0;
13696 for (int i = 0; i < 2; ++i)
13698 if (t_reg_operand (ops[i], VOIDmode)
13699 || negt_reg_operand (ops[i], VOIDmode))
13700 op_is_t_count++;
13702 else
13704 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13705 prev_nonnote_insn_bb);
13706 if (op_set.set_src == NULL_RTX)
13707 continue;
13709 if (t_reg_operand (op_set.set_src, VOIDmode)
13710 || negt_reg_operand (op_set.set_src, VOIDmode)
13711 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13712 op_is_t_count++;
13716 return op_is_t_count == 2;
13719 /* Given the operand that is extended in a sign/zero extend insn, and the
13720 insn, try to figure out whether the sign/zero extension can be replaced
13721 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13722 NULL_RTX otherwise. */
13724 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
13726 if (REG_P (extended_op))
13727 extended_op = extended_op;
13728 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13729 extended_op = SUBREG_REG (extended_op);
13730 else
13731 return NULL_RTX;
13733 /* Reg moves must be of the same mode. */
13734 if (GET_MODE (extended_op) != SImode)
13735 return NULL_RTX;
13737 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13738 if (s.set_src == NULL_RTX)
13739 return NULL_RTX;
13741 if (t_reg_operand (s.set_src, VOIDmode)
13742 || negt_reg_operand (s.set_src, VOIDmode))
13743 return extended_op;
13745 /* If the zero extended reg was formed by a logical operation, check the
13746 operands of the logical operation. If both originated from T bit
13747 stores the zero extension can be eliminated. */
13748 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13749 return extended_op;
13751 return NULL_RTX;
13754 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
13755 figure out whether it should be converted into a movt-xor sequence in
13756 the movrt_negc splitter.
13757 Returns true if insns have been modified and the splitter has succeeded. */
13758 bool
13759 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
13761 /* In cases such as
13762 tst r4,r4
13763 mov #-1,r1
13764 negc r1,r1
13765 tst r4,r4
13766 we can replace the T bit clobbering negc with a movt-xor sequence and
13767 eliminate the redundant comparison.
13768 Because the xor insn depends on register allocation results, allow this
13769 only before reload. */
13770 if (!can_create_pseudo_p ())
13771 return false;
13773 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13774 prev_nonnote_insn_bb);
13775 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13776 next_nonnote_insn_bb);
13778 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
13779 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
13780 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
13781 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
13782 t_before_negc.insn,
13783 t_after_negc.insn)
13784 && !sh_unspec_insn_p (t_after_negc.insn)
13785 && !volatile_insn_p (PATTERN (t_after_negc.insn))
13786 && !side_effects_p (PATTERN (t_after_negc.insn))
13787 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
13789 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
13790 set_insn_deleted (t_after_negc.insn);
13791 return true;
13793 else
13794 return false;
13797 static void
13798 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
13799 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
13801 if ((TARGET_SH4A_FP || TARGET_SH4_300)
13802 && prev_mode != FP_MODE_NONE && prev_mode != mode)
13804 emit_insn (gen_toggle_pr ());
13805 if (TARGET_FMOVD)
13806 emit_insn (gen_toggle_sz ());
13808 else if (mode != FP_MODE_NONE)
13810 rtx tmp = gen_reg_rtx (SImode);
13811 emit_insn (gen_sts_fpscr (tmp));
13812 rtx i = NULL;
13814 const unsigned HOST_WIDE_INT fpbits =
13815 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
13817 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
13818 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
13819 else if (mode == FP_MODE_SINGLE)
13820 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
13821 else if (mode == FP_MODE_DOUBLE)
13822 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
13823 else
13824 gcc_unreachable ();
13826 emit_insn (i);
13827 emit_insn (gen_lds_fpscr (tmp));
13831 static int
13832 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
13834 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
13837 static int
13838 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
13840 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
13841 get_attr_fp_set (insn) != FP_SET_NONE)
13842 return (int) get_attr_fp_set (insn);
13843 else
13844 return mode;
13847 static int
13848 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
13850 return NORMAL_MODE (entity);
13853 static int
13854 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
13856 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
13859 static int
13860 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
13862 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
13865 /* Return true if we use LRA instead of reload pass. */
13866 static bool
13867 sh_lra_p (void)
13869 return sh_lra_flag;
13872 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
13874 static bool
13875 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
13876 unsigned int align,
13877 enum by_pieces_operation op,
13878 bool speed_p)
13880 switch (op)
13882 case MOVE_BY_PIECES:
13883 return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
13884 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
13885 case STORE_BY_PIECES:
13886 case SET_BY_PIECES:
13887 return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
13888 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
13889 default:
13890 return default_use_by_pieces_infrastructure_p (size, align,
13891 op, speed_p);
13895 #include "gt-sh.h"