* [SH] Miscellaneous changes for LRA.
[official-gcc.git] / gcc / config / sh / sh.c
blobb01efb09d3fd7b097d37af4110aac7821c189c13
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2014 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
23 #include <vector>
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "insn-config.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "stringpool.h"
33 #include "stor-layout.h"
34 #include "calls.h"
35 #include "varasm.h"
36 #include "flags.h"
37 #include "expr.h"
38 #include "insn-codes.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "hard-reg-set.h"
46 #include "input.h"
47 #include "function.h"
48 #include "regs.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "diagnostic-core.h"
52 #include "recog.h"
53 #include "dwarf2.h"
54 #include "tm_p.h"
55 #include "target.h"
56 #include "target-def.h"
57 #include "langhooks.h"
58 #include "predict.h"
59 #include "dominance.h"
60 #include "cfg.h"
61 #include "cfgrtl.h"
62 #include "cfganal.h"
63 #include "lcm.h"
64 #include "cfgbuild.h"
65 #include "cfgcleanup.h"
66 #include "basic-block.h"
67 #include "df.h"
68 #include "intl.h"
69 #include "sched-int.h"
70 #include "params.h"
71 #include "ggc.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "is-a.h"
79 #include "gimple.h"
80 #include "gimplify.h"
81 #include "cfgloop.h"
82 #include "alloc-pool.h"
83 #include "tm-constrs.h"
84 #include "opts.h"
85 #include "tree-pass.h"
86 #include "pass_manager.h"
87 #include "context.h"
88 #include "builtins.h"
89 #include "rtl-iter.h"
91 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
93 /* These are some macros to abstract register modes. */
94 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
95 && ((HOST_WIDE_INT)(VALUE)) <= 511)
97 #define CONST_OK_FOR_ADD(size) \
98 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
99 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
100 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
101 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
103 /* Used to simplify the logic below. Find the attributes wherever
104 they may be. */
105 #define SH_ATTRIBUTES(decl) \
106 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
107 : DECL_ATTRIBUTES (decl) \
108 ? (DECL_ATTRIBUTES (decl)) \
109 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
111 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
112 int current_function_interrupt;
114 tree sh_deferred_function_attributes;
115 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
117 /* Global variables for machine-dependent things. */
119 /* Which cpu are we scheduling for. */
120 enum processor_type sh_cpu;
122 /* Definitions used in ready queue reordering for first scheduling pass. */
124 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
125 static short *regmode_weight[2];
127 /* Total SFmode and SImode weights of scheduled insns. */
128 static int curr_regmode_pressure[2];
130 /* Number of r0 life regions. */
131 static int r0_life_regions;
133 /* If true, skip cycles for Q -> R movement. */
134 static int skip_cycles = 0;
136 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
137 and returned from sh_reorder2. */
138 static short cached_can_issue_more;
140 /* Unique number for UNSPEC_BBR pattern. */
141 static unsigned int unspec_bbr_uid = 1;
143 /* Provides the class number of the smallest class containing
144 reg number. */
145 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
147 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
151 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
152 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
153 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
154 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
155 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
156 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
157 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
161 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
167 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
168 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
169 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
170 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
171 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
172 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
173 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
174 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
175 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
176 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
177 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
178 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
179 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
180 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
181 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
182 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
183 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
184 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
185 GENERAL_REGS, GENERAL_REGS,
188 char sh_register_names[FIRST_PSEUDO_REGISTER] \
189 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
191 char sh_additional_register_names[ADDREGNAMES_SIZE] \
192 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
193 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
195 int assembler_dialect;
197 static bool shmedia_space_reserved_for_target_registers;
199 static void split_branches (rtx_insn *);
200 static int branch_dest (rtx);
201 static void print_slot (rtx_sequence *);
202 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
203 static void dump_table (rtx_insn *, rtx_insn *);
204 static bool broken_move (rtx_insn *);
205 static bool mova_p (rtx_insn *);
206 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
207 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
208 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
209 static void sh_reorg (void);
210 static void sh_option_override (void);
211 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
212 static rtx_insn *frame_insn (rtx);
213 static rtx push (int);
214 static void pop (int);
215 static void push_regs (HARD_REG_SET *, int);
216 static int calc_live_regs (HARD_REG_SET *);
217 static HOST_WIDE_INT rounded_frame_size (int);
218 static bool sh_frame_pointer_required (void);
219 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
220 static int sh_mode_needed (int, rtx_insn *);
221 static int sh_mode_after (int, int, rtx_insn *);
222 static int sh_mode_entry (int);
223 static int sh_mode_exit (int);
224 static int sh_mode_priority (int entity, int n);
225 static bool sh_lra_p (void);
227 static rtx mark_constant_pool_use (rtx);
228 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
229 int, bool *);
230 static tree sh_handle_resbank_handler_attribute (tree *, tree,
231 tree, int, bool *);
232 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
233 tree, int, bool *);
234 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
235 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
236 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
237 static void sh_print_operand (FILE *, rtx, int);
238 static void sh_print_operand_address (FILE *, rtx);
239 static bool sh_print_operand_punct_valid_p (unsigned char code);
240 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
241 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
242 static void sh_insert_attributes (tree, tree *);
243 static const char *sh_check_pch_target_flags (int);
244 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
245 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
246 static int sh_issue_rate (void);
247 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
248 static short find_set_regmode_weight (rtx, machine_mode);
249 static short find_insn_regmode_weight (rtx, machine_mode);
250 static void find_regmode_weight (basic_block, machine_mode);
251 static int find_r0_life_regions (basic_block);
252 static void sh_md_init_global (FILE *, int, int);
253 static void sh_md_finish_global (FILE *, int);
254 static int rank_for_reorder (const void *, const void *);
255 static void swap_reorder (rtx_insn **, int);
256 static void ready_reorder (rtx_insn **, int);
257 static bool high_pressure (machine_mode);
258 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
259 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
260 static void sh_md_init (FILE *, int, int);
261 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
263 static bool sh_function_ok_for_sibcall (tree, tree);
265 static bool sh_cannot_modify_jumps_p (void);
266 static reg_class_t sh_target_reg_class (void);
267 static bool sh_optimize_target_register_callee_saved (bool);
268 static bool sh_ms_bitfield_layout_p (const_tree);
270 static void sh_init_builtins (void);
271 static tree sh_builtin_decl (unsigned, bool);
272 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
273 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
274 HOST_WIDE_INT, tree);
275 static void sh_file_start (void);
276 static bool flow_dependent_p (rtx, rtx);
277 static void flow_dependent_p_1 (rtx, const_rtx, void *);
278 static int shiftcosts (rtx);
279 static int and_xor_ior_costs (rtx, int);
280 static int addsubcosts (rtx);
281 static int multcosts (rtx);
282 static bool unspec_caller_rtx_p (rtx);
283 static bool sh_cannot_copy_insn_p (rtx_insn *);
284 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
285 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
286 static int sh_pr_n_sets (void);
287 static rtx sh_allocate_initial_value (rtx);
288 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
289 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
290 machine_mode,
291 struct secondary_reload_info *);
292 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
293 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
294 static rtx sh_delegitimize_address (rtx);
295 static bool sh_cannot_substitute_mem_equiv_p (rtx);
296 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
297 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
298 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
299 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
300 static int scavenge_reg (HARD_REG_SET *s);
301 struct save_schedule_s;
302 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
303 struct save_schedule_s *, int);
305 static rtx sh_struct_value_rtx (tree, int);
306 static rtx sh_function_value (const_tree, const_tree, bool);
307 static bool sh_function_value_regno_p (const unsigned int);
308 static rtx sh_libcall_value (machine_mode, const_rtx);
309 static bool sh_return_in_memory (const_tree, const_tree);
310 static rtx sh_builtin_saveregs (void);
311 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
312 tree, int *, int);
313 static bool sh_strict_argument_naming (cumulative_args_t);
314 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
315 static tree sh_build_builtin_va_list (void);
316 static void sh_va_start (tree, rtx);
317 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
318 static bool sh_promote_prototypes (const_tree);
319 static machine_mode sh_promote_function_mode (const_tree type,
320 machine_mode,
321 int *punsignedp,
322 const_tree funtype,
323 int for_return);
324 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
325 const_tree, bool);
326 static bool sh_callee_copies (cumulative_args_t, machine_mode,
327 const_tree, bool);
328 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
329 tree, bool);
330 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
331 const_tree, bool);
332 static rtx sh_function_arg (cumulative_args_t, machine_mode,
333 const_tree, bool);
334 static bool sh_scalar_mode_supported_p (machine_mode);
335 static int sh_dwarf_calling_convention (const_tree);
336 static void sh_encode_section_info (tree, rtx, int);
337 static bool sh2a_function_vector_p (tree);
338 static void sh_trampoline_init (rtx, tree, rtx);
339 static rtx sh_trampoline_adjust_address (rtx);
340 static void sh_conditional_register_usage (void);
341 static bool sh_legitimate_constant_p (machine_mode, rtx);
342 static int mov_insn_size (machine_mode, bool);
343 static int mov_insn_alignment_mask (machine_mode, bool);
344 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
345 unsigned int,
346 enum by_pieces_operation,
347 bool);
348 static bool sequence_insn_p (rtx_insn *);
349 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
350 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
351 machine_mode, bool);
352 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
354 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
356 static const struct attribute_spec sh_attribute_table[] =
358 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
359 affects_type_identity } */
360 { "interrupt_handler", 0, 0, true, false, false,
361 sh_handle_interrupt_handler_attribute, false },
362 { "sp_switch", 1, 1, true, false, false,
363 sh_handle_sp_switch_attribute, false },
364 { "trap_exit", 1, 1, true, false, false,
365 sh_handle_trap_exit_attribute, false },
366 { "renesas", 0, 0, false, true, false,
367 sh_handle_renesas_attribute, false },
368 { "trapa_handler", 0, 0, true, false, false,
369 sh_handle_interrupt_handler_attribute, false },
370 { "nosave_low_regs", 0, 0, true, false, false,
371 sh_handle_interrupt_handler_attribute, false },
372 { "resbank", 0, 0, true, false, false,
373 sh_handle_resbank_handler_attribute, false },
374 { "function_vector", 1, 1, true, false, false,
375 sh2a_handle_function_vector_handler_attribute, false },
376 { NULL, 0, 0, false, false, false, NULL, false }
379 /* Initialize the GCC target structure. */
380 #undef TARGET_ATTRIBUTE_TABLE
381 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
383 /* The next two are used for debug info when compiling with -gdwarf. */
384 #undef TARGET_ASM_UNALIGNED_HI_OP
385 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
386 #undef TARGET_ASM_UNALIGNED_SI_OP
387 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
389 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
390 #undef TARGET_ASM_UNALIGNED_DI_OP
391 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
392 #undef TARGET_ASM_ALIGNED_DI_OP
393 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
395 #undef TARGET_OPTION_OVERRIDE
396 #define TARGET_OPTION_OVERRIDE sh_option_override
398 #undef TARGET_PRINT_OPERAND
399 #define TARGET_PRINT_OPERAND sh_print_operand
400 #undef TARGET_PRINT_OPERAND_ADDRESS
401 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
402 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
403 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
404 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
405 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
407 #undef TARGET_ASM_FUNCTION_EPILOGUE
408 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
410 #undef TARGET_ASM_OUTPUT_MI_THUNK
411 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
413 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
414 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
415 hook_bool_const_tree_hwi_hwi_const_tree_true
417 #undef TARGET_ASM_FILE_START
418 #define TARGET_ASM_FILE_START sh_file_start
419 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
420 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
422 #undef TARGET_REGISTER_MOVE_COST
423 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
425 #undef TARGET_INSERT_ATTRIBUTES
426 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
428 #undef TARGET_SCHED_ADJUST_COST
429 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
431 #undef TARGET_SCHED_ISSUE_RATE
432 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
434 /* The next 5 hooks have been implemented for reenabling sched1. With the
435 help of these macros we are limiting the movement of insns in sched1 to
436 reduce the register pressure. The overall idea is to keep count of SImode
437 and SFmode regs required by already scheduled insns. When these counts
438 cross some threshold values; give priority to insns that free registers.
439 The insn that frees registers is most likely to be the insn with lowest
440 LUID (original insn order); but such an insn might be there in the stalled
441 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
442 up to a max of 8 cycles so that such insns may move from Q -> R.
444 The description of the hooks are as below:
446 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
447 scheduler; it is called inside the sched_init function just after
448 find_insn_reg_weights function call. It is used to calculate the SImode
449 and SFmode weights of insns of basic blocks; much similar to what
450 find_insn_reg_weights does.
451 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
453 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
454 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
455 (Q)->(R).
457 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
458 high; reorder the ready queue so that the insn with lowest LUID will be
459 issued next.
461 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
462 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
464 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
465 can be returned from TARGET_SCHED_REORDER2.
467 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
469 #undef TARGET_SCHED_DFA_NEW_CYCLE
470 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
472 #undef TARGET_SCHED_INIT_GLOBAL
473 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
475 #undef TARGET_SCHED_FINISH_GLOBAL
476 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
478 #undef TARGET_SCHED_VARIABLE_ISSUE
479 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
481 #undef TARGET_SCHED_REORDER
482 #define TARGET_SCHED_REORDER sh_reorder
484 #undef TARGET_SCHED_REORDER2
485 #define TARGET_SCHED_REORDER2 sh_reorder2
487 #undef TARGET_SCHED_INIT
488 #define TARGET_SCHED_INIT sh_md_init
490 #undef TARGET_DELEGITIMIZE_ADDRESS
491 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
493 #undef TARGET_LEGITIMIZE_ADDRESS
494 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
496 #undef TARGET_CANNOT_MODIFY_JUMPS_P
497 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
498 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
499 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
500 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
501 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
502 sh_optimize_target_register_callee_saved
504 #undef TARGET_MS_BITFIELD_LAYOUT_P
505 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
507 #undef TARGET_INIT_BUILTINS
508 #define TARGET_INIT_BUILTINS sh_init_builtins
509 #undef TARGET_BUILTIN_DECL
510 #define TARGET_BUILTIN_DECL sh_builtin_decl
511 #undef TARGET_EXPAND_BUILTIN
512 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
514 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
515 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
517 #undef TARGET_CANNOT_COPY_INSN_P
518 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
519 #undef TARGET_RTX_COSTS
520 #define TARGET_RTX_COSTS sh_rtx_costs
521 #undef TARGET_ADDRESS_COST
522 #define TARGET_ADDRESS_COST sh_address_cost
523 #undef TARGET_ALLOCATE_INITIAL_VALUE
524 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
529 #undef TARGET_DWARF_REGISTER_SPAN
530 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
532 #ifdef HAVE_AS_TLS
533 #undef TARGET_HAVE_TLS
534 #define TARGET_HAVE_TLS true
535 #endif
537 #undef TARGET_PROMOTE_PROTOTYPES
538 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
542 #undef TARGET_FUNCTION_VALUE
543 #define TARGET_FUNCTION_VALUE sh_function_value
544 #undef TARGET_FUNCTION_VALUE_REGNO_P
545 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
546 #undef TARGET_LIBCALL_VALUE
547 #define TARGET_LIBCALL_VALUE sh_libcall_value
548 #undef TARGET_STRUCT_VALUE_RTX
549 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
550 #undef TARGET_RETURN_IN_MEMORY
551 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
553 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
554 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
555 #undef TARGET_SETUP_INCOMING_VARARGS
556 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
557 #undef TARGET_STRICT_ARGUMENT_NAMING
558 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
559 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
560 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
561 #undef TARGET_MUST_PASS_IN_STACK
562 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
563 #undef TARGET_PASS_BY_REFERENCE
564 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
565 #undef TARGET_CALLEE_COPIES
566 #define TARGET_CALLEE_COPIES sh_callee_copies
567 #undef TARGET_ARG_PARTIAL_BYTES
568 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
569 #undef TARGET_FUNCTION_ARG
570 #define TARGET_FUNCTION_ARG sh_function_arg
571 #undef TARGET_FUNCTION_ARG_ADVANCE
572 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
574 #undef TARGET_BUILD_BUILTIN_VA_LIST
575 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
576 #undef TARGET_EXPAND_BUILTIN_VA_START
577 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
578 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
579 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
581 #undef TARGET_SCALAR_MODE_SUPPORTED_P
582 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
583 #undef TARGET_VECTOR_MODE_SUPPORTED_P
584 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
586 #undef TARGET_CHECK_PCH_TARGET_FLAGS
587 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
589 #undef TARGET_DWARF_CALLING_CONVENTION
590 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
592 #undef TARGET_FRAME_POINTER_REQUIRED
593 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
595 #undef TARGET_MODE_EMIT
596 #define TARGET_MODE_EMIT sh_emit_mode_set
598 #undef TARGET_MODE_NEEDED
599 #define TARGET_MODE_NEEDED sh_mode_needed
601 #undef TARGET_MODE_AFTER
602 #define TARGET_MODE_AFTER sh_mode_after
604 #undef TARGET_MODE_ENTRY
605 #define TARGET_MODE_ENTRY sh_mode_entry
607 #undef TARGET_MODE_EXIT
608 #define TARGET_MODE_EXIT sh_mode_exit
610 #undef TARGET_MODE_PRIORITY
611 #define TARGET_MODE_PRIORITY sh_mode_priority
613 /* Return regmode weight for insn. */
614 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
615 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
617 /* Return current register pressure for regmode. */
618 #define CURR_REGMODE_PRESSURE(MODE)\
619 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
621 #undef TARGET_ENCODE_SECTION_INFO
622 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
624 #undef TARGET_LRA_P
625 #define TARGET_LRA_P sh_lra_p
627 #undef TARGET_SECONDARY_RELOAD
628 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
630 #undef TARGET_PREFERRED_RELOAD_CLASS
631 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
633 #undef TARGET_CONDITIONAL_REGISTER_USAGE
634 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
636 #undef TARGET_LEGITIMATE_ADDRESS_P
637 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
639 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
640 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
642 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
643 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
644 sh_legitimize_address_displacement
646 #undef TARGET_TRAMPOLINE_INIT
647 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
648 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
649 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
654 #undef TARGET_CANONICALIZE_COMPARISON
655 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
657 #undef TARGET_FIXED_CONDITION_CODE_REGS
658 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
660 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
661 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
662 sh_use_by_pieces_infrastructure_p
664 /* Machine-specific symbol_ref flags. */
665 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
667 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
668 is used by optabs.c atomic op expansion code as well as in sync.md. */
669 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
670 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
672 struct gcc_target targetm = TARGET_INITIALIZER;
675 /* Information on the currently selected atomic model.
676 This is initialized in sh_option_override. */
677 static sh_atomic_model selected_atomic_model_;
679 const sh_atomic_model&
680 selected_atomic_model (void)
682 return selected_atomic_model_;
685 static sh_atomic_model
686 parse_validate_atomic_model_option (const char* str)
688 const char* model_names[sh_atomic_model::num_models];
689 model_names[sh_atomic_model::none] = "none";
690 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
691 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
692 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
693 model_names[sh_atomic_model::soft_imask] = "soft-imask";
695 const char* model_cdef_names[sh_atomic_model::num_models];
696 model_cdef_names[sh_atomic_model::none] = "NONE";
697 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
698 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
699 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
700 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
702 sh_atomic_model ret;
703 ret.type = sh_atomic_model::none;
704 ret.name = model_names[sh_atomic_model::none];
705 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
706 ret.strict = false;
707 ret.tcb_gbr_offset = -1;
709 /* Handle empty string as 'none'. */
710 if (str == NULL || *str == '\0')
711 return ret;
713 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
715 std::vector<std::string> tokens;
716 for (std::stringstream ss (str); ss.good (); )
718 tokens.push_back (std::string ());
719 std::getline (ss, tokens.back (), ',');
722 if (tokens.empty ())
723 err_ret ("invalid atomic model option");
725 /* The first token must be the atomic model name. */
727 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
728 if (tokens.front () == model_names[i])
730 ret.type = (sh_atomic_model::enum_type)i;
731 ret.name = model_names[i];
732 ret.cdef_name = model_cdef_names[i];
733 goto got_mode_name;
736 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
737 got_mode_name:;
740 /* Go through the remaining tokens. */
741 for (size_t i = 1; i < tokens.size (); ++i)
743 if (tokens[i] == "strict")
744 ret.strict = true;
745 else if (tokens[i].find ("gbr-offset=") == 0)
747 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
748 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
749 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
750 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
751 "option", offset_str.c_str ());
753 else
754 err_ret ("unknown parameter \"%s\" in atomic model option",
755 tokens[i].c_str ());
758 /* Check that the selection makes sense. */
759 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
760 err_ret ("atomic operations are not supported on SHmedia");
762 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
763 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
764 ret.name);
766 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
767 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
769 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
770 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
772 if (ret.type == sh_atomic_model::soft_tcb
773 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
774 || (ret.tcb_gbr_offset & 3) != 0))
775 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
776 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
777 ret.name);
779 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
780 err_ret ("cannot use atomic model %s in user mode", ret.name);
782 return ret;
784 #undef err_ret
787 /* Register SH specific RTL passes. */
788 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
789 const char* name);
790 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
791 const char* name);
792 static void
793 register_sh_passes (void)
795 if (!TARGET_SH1)
796 return;
798 /* Running the sh_treg_combine pass after ce1 generates better code when
799 comparisons are combined and reg-reg moves are introduced, because
800 reg-reg moves will be eliminated afterwards. However, there are quite
801 some cases where combine will be unable to fold comparison related insns,
802 thus for now don't do it.
803 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
804 PASS_POS_INSERT_AFTER, "ce1", 1);
807 /* Run sh_treg_combine pass after combine but before register allocation. */
808 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
809 PASS_POS_INSERT_AFTER, "split1", 1);
811 /* Run sh_treg_combine pass after register allocation and basic block
812 reordering as this sometimes creates new opportunities. */
813 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
814 PASS_POS_INSERT_AFTER, "split4", 1);
816 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
817 is known after a conditional branch.
818 This must be done after basic blocks and branch conditions have
819 stabilized and won't be changed by further passes. */
820 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
821 PASS_POS_INSERT_BEFORE, "sched2", 1);
824 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
825 various options, and do some machine dependent initialization. */
826 static void
827 sh_option_override (void)
829 int regno;
831 SUBTARGET_OVERRIDE_OPTIONS;
832 if (optimize > 1 && !optimize_size)
833 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
835 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
836 TARGET_CBRANCHDI4 = 1;
837 TARGET_CMPEQDI_T = 0;
839 sh_cpu = PROCESSOR_SH1;
840 assembler_dialect = 0;
841 if (TARGET_SH2)
842 sh_cpu = PROCESSOR_SH2;
843 if (TARGET_SH2E)
844 sh_cpu = PROCESSOR_SH2E;
845 if (TARGET_SH2A)
846 sh_cpu = PROCESSOR_SH2A;
847 if (TARGET_SH3)
848 sh_cpu = PROCESSOR_SH3;
849 if (TARGET_SH3E)
850 sh_cpu = PROCESSOR_SH3E;
851 if (TARGET_SH4)
853 assembler_dialect = 1;
854 sh_cpu = PROCESSOR_SH4;
856 if (TARGET_SH4A)
858 assembler_dialect = 1;
859 sh_cpu = PROCESSOR_SH4A;
861 if (TARGET_SH5)
863 sh_cpu = PROCESSOR_SH5;
864 target_flags |= MASK_ALIGN_DOUBLE;
865 if (TARGET_SHMEDIA_FPU)
866 target_flags |= MASK_FMOVD;
867 if (TARGET_SHMEDIA)
869 /* There are no delay slots on SHmedia. */
870 flag_delayed_branch = 0;
871 /* Relaxation isn't yet supported for SHmedia */
872 target_flags &= ~MASK_RELAX;
873 /* After reload, if conversion does little good but can cause
874 ICEs:
875 - find_if_block doesn't do anything for SH because we don't
876 have conditional execution patterns. (We use conditional
877 move patterns, which are handled differently, and only
878 before reload).
879 - find_cond_trap doesn't do anything for the SH because we
880 don't have conditional traps.
881 - find_if_case_1 uses redirect_edge_and_branch_force in
882 the only path that does an optimization, and this causes
883 an ICE when branch targets are in registers.
884 - find_if_case_2 doesn't do anything for the SHmedia after
885 reload except when it can redirect a tablejump - and
886 that's rather rare. */
887 flag_if_conversion2 = 0;
888 if (! strcmp (sh_div_str, "call"))
889 sh_div_strategy = SH_DIV_CALL;
890 else if (! strcmp (sh_div_str, "call2"))
891 sh_div_strategy = SH_DIV_CALL2;
892 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
893 sh_div_strategy = SH_DIV_FP;
894 else if (! strcmp (sh_div_str, "inv"))
895 sh_div_strategy = SH_DIV_INV;
896 else if (! strcmp (sh_div_str, "inv:minlat"))
897 sh_div_strategy = SH_DIV_INV_MINLAT;
898 else if (! strcmp (sh_div_str, "inv20u"))
899 sh_div_strategy = SH_DIV_INV20U;
900 else if (! strcmp (sh_div_str, "inv20l"))
901 sh_div_strategy = SH_DIV_INV20L;
902 else if (! strcmp (sh_div_str, "inv:call2"))
903 sh_div_strategy = SH_DIV_INV_CALL2;
904 else if (! strcmp (sh_div_str, "inv:call"))
905 sh_div_strategy = SH_DIV_INV_CALL;
906 else if (! strcmp (sh_div_str, "inv:fp"))
908 if (TARGET_FPU_ANY)
909 sh_div_strategy = SH_DIV_INV_FP;
910 else
911 sh_div_strategy = SH_DIV_INV;
913 TARGET_CBRANCHDI4 = 0;
914 /* Assembler CFI isn't yet fully supported for SHmedia. */
915 flag_dwarf2_cfi_asm = 0;
918 else
920 /* Only the sh64-elf assembler fully supports .quad properly. */
921 targetm.asm_out.aligned_op.di = NULL;
922 targetm.asm_out.unaligned_op.di = NULL;
925 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
926 Disable it for everything else. */
927 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
928 TARGET_USERMODE = false;
930 if (TARGET_SH1)
932 if (! strcmp (sh_div_str, "call-div1"))
933 sh_div_strategy = SH_DIV_CALL_DIV1;
934 else if (! strcmp (sh_div_str, "call-fp")
935 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
936 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
937 sh_div_strategy = SH_DIV_CALL_FP;
938 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
939 sh_div_strategy = SH_DIV_CALL_TABLE;
940 else
941 /* Pick one that makes most sense for the target in general.
942 It is not much good to use different functions depending
943 on -Os, since then we'll end up with two different functions
944 when some of the code is compiled for size, and some for
945 speed. */
947 /* SH4 tends to emphasize speed. */
948 if (TARGET_HARD_SH4)
949 sh_div_strategy = SH_DIV_CALL_TABLE;
950 /* These have their own way of doing things. */
951 else if (TARGET_SH2A)
952 sh_div_strategy = SH_DIV_INTRINSIC;
953 /* ??? Should we use the integer SHmedia function instead? */
954 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
955 sh_div_strategy = SH_DIV_CALL_FP;
956 /* SH1 .. SH3 cores often go into small-footprint systems, so
957 default to the smallest implementation available. */
958 else
959 sh_div_strategy = SH_DIV_CALL_DIV1;
961 if (!TARGET_SH1)
962 TARGET_PRETEND_CMOVE = 0;
963 if (sh_divsi3_libfunc[0])
964 ; /* User supplied - leave it alone. */
965 else if (TARGET_DIVIDE_CALL_FP)
966 sh_divsi3_libfunc = "__sdivsi3_i4";
967 else if (TARGET_DIVIDE_CALL_TABLE)
968 sh_divsi3_libfunc = "__sdivsi3_i4i";
969 else if (TARGET_SH5)
970 sh_divsi3_libfunc = "__sdivsi3_1";
971 else
972 sh_divsi3_libfunc = "__sdivsi3";
974 if (sh_branch_cost == -1)
976 /* The SH1 does not have delay slots, hence we get a pipeline stall
977 at every branch. The SH4 is superscalar, so the single delay slot
978 is not sufficient to keep both pipelines filled.
979 In any case, set the default branch cost to '2', as it results in
980 slightly overall smaller code and also enables some if conversions
981 that are required for matching special T bit related insns. */
982 sh_branch_cost = 2;
985 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
986 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
987 TARGET_ZDCBRANCH = 1;
989 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
990 if (! VALID_REGISTER_P (regno))
991 sh_register_names[regno][0] = '\0';
993 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
994 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
995 sh_additional_register_names[regno][0] = '\0';
997 if ((flag_pic && ! TARGET_PREFERGOT)
998 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
999 flag_no_function_cse = 1;
1001 if (targetm.small_register_classes_for_mode_p (VOIDmode))
1003 /* Never run scheduling before reload, since that can
1004 break global alloc, and generates slower code anyway due
1005 to the pressure on R0. */
1006 /* Enable sched1 for SH4 if the user explicitly requests.
1007 When sched1 is enabled, the ready queue will be reordered by
1008 the target hooks if pressure is high. We can not do this for
1009 PIC, SH3 and lower as they give spill failures for R0. */
1010 if (!TARGET_HARD_SH4 || flag_pic)
1011 flag_schedule_insns = 0;
1012 /* ??? Current exception handling places basic block boundaries
1013 after call_insns. It causes the high pressure on R0 and gives
1014 spill failures for R0 in reload. See PR 22553 and the thread
1015 on gcc-patches
1016 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
1017 else if (flag_exceptions)
1019 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
1020 warning (0, "ignoring -fschedule-insns because of exception "
1021 "handling bug");
1022 flag_schedule_insns = 0;
1024 else if (flag_schedule_insns
1025 && !global_options_set.x_flag_schedule_insns)
1026 flag_schedule_insns = 0;
1029 /* Unwind info is not correct around the CFG unless either a frame
1030 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1031 unwind info generation to be aware of the CFG and propagating states
1032 around edges. */
1033 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1034 || flag_exceptions || flag_non_call_exceptions)
1035 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1037 warning (0, "unwind tables currently require either a frame pointer "
1038 "or -maccumulate-outgoing-args for correctness");
1039 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1042 /* Unwinding with -freorder-blocks-and-partition does not work on this
1043 architecture, because it requires far jumps to label crossing between
1044 hot/cold sections which are rejected on this architecture. */
1045 if (flag_reorder_blocks_and_partition)
1047 if (flag_exceptions)
1049 inform (input_location,
1050 "-freorder-blocks-and-partition does not work with "
1051 "exceptions on this architecture");
1052 flag_reorder_blocks_and_partition = 0;
1053 flag_reorder_blocks = 1;
1055 else if (flag_unwind_tables)
1057 inform (input_location,
1058 "-freorder-blocks-and-partition does not support unwind "
1059 "info on this architecture");
1060 flag_reorder_blocks_and_partition = 0;
1061 flag_reorder_blocks = 1;
1065 /* Adjust loop, jump and function alignment values (in bytes), if those
1066 were not specified by the user using -falign-loops, -falign-jumps
1067 and -falign-functions options.
1068 32 bit alignment is better for speed, because instructions can be
1069 fetched as a pair from a longword boundary. For size use 16 bit
1070 alignment to get more compact code.
1071 Aligning all jumps increases the code size, even if it might
1072 result in slightly faster code. Thus, it is set to the smallest
1073 alignment possible if not specified by the user. */
1074 if (align_loops == 0)
1076 if (TARGET_SH5)
1077 align_loops = 8;
1078 else
1079 align_loops = optimize_size ? 2 : 4;
1082 if (align_jumps == 0)
1084 if (TARGET_SHMEDIA)
1085 align_jumps = 1 << CACHE_LOG;
1086 else
1087 align_jumps = 2;
1089 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1090 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1092 if (align_functions == 0)
1094 if (TARGET_SHMEDIA)
1095 align_functions = optimize_size
1096 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1097 else
1098 align_functions = optimize_size ? 2 : 4;
1101 /* The linker relaxation code breaks when a function contains
1102 alignments that are larger than that at the start of a
1103 compilation unit. */
1104 if (TARGET_RELAX)
1106 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1108 /* Also take possible .long constants / mova tables into account. */
1109 if (min_align < 4)
1110 min_align = 4;
1111 if (align_functions < min_align)
1112 align_functions = min_align;
1115 if (flag_unsafe_math_optimizations)
1117 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1118 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1119 TARGET_FSCA = 1;
1121 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1122 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1123 TARGET_FSRRA = 1;
1126 /* Allow fsrra insn only if -funsafe-math-optimizations and
1127 -ffinite-math-only is enabled. */
1128 TARGET_FSRRA = TARGET_FSRRA
1129 && flag_unsafe_math_optimizations
1130 && flag_finite_math_only;
1132 /* If the -mieee option was not explicitly set by the user, turn it on
1133 unless -ffinite-math-only was specified. See also PR 33135. */
1134 if (! global_options_set.x_TARGET_IEEE)
1135 TARGET_IEEE = ! flag_finite_math_only;
1137 if (sh_fixed_range_str)
1138 sh_fix_range (sh_fixed_range_str);
1140 /* This target defaults to strict volatile bitfields. */
1141 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1142 flag_strict_volatile_bitfields = 1;
1144 /* Parse atomic model option and make sure it is valid for the current
1145 target CPU. */
1146 selected_atomic_model_
1147 = parse_validate_atomic_model_option (sh_atomic_model_str);
1149 register_sh_passes ();
1152 /* Print the operand address in x to the stream. */
1153 static void
1154 sh_print_operand_address (FILE *stream, rtx x)
1156 switch (GET_CODE (x))
1158 case REG:
1159 case SUBREG:
1160 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1161 break;
1163 case PLUS:
1165 rtx base = XEXP (x, 0);
1166 rtx index = XEXP (x, 1);
1168 switch (GET_CODE (index))
1170 case CONST_INT:
1171 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1172 reg_names[true_regnum (base)]);
1173 break;
1175 case REG:
1176 case SUBREG:
1178 int base_num = true_regnum (base);
1179 int index_num = true_regnum (index);
1181 fprintf (stream, "@(r0,%s)",
1182 reg_names[MAX (base_num, index_num)]);
1183 break;
1186 default:
1187 gcc_unreachable ();
1190 break;
1192 case PRE_DEC:
1193 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1194 break;
1196 case POST_INC:
1197 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1198 break;
1200 default:
1201 x = mark_constant_pool_use (x);
1202 output_addr_const (stream, x);
1203 break;
1207 /* Print operand x (an rtx) in assembler syntax to file stream
1208 according to modifier code.
1210 '.' print a .s if insn needs delay slot
1211 ',' print LOCAL_LABEL_PREFIX
1212 '@' print trap, rte or rts depending upon pragma interruptness
1213 '#' output a nop if there is nothing to put in the delay slot
1214 ''' print likelihood suffix (/u for unlikely).
1215 '>' print branch target if -fverbose-asm
1216 'O' print a constant without the #
1217 'R' print the LSW of a dp value - changes if in little endian
1218 'S' print the MSW of a dp value - changes if in little endian
1219 'T' print the next word of a dp value - same as 'R' in big endian mode.
1220 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1221 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1222 'N' print 'r63' if the operand is (const_int 0).
1223 'd' print a V2SF reg as dN instead of fpN.
1224 'm' print a pair `base,offset' or `base,index', for LD and ST.
1225 'U' Likewise for {LD,ST}{HI,LO}.
1226 'V' print the position of a single bit set.
1227 'W' print the position of a single bit cleared.
1228 't' print a memory address which is a register.
1229 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1230 'o' output an operator. */
1231 static void
1232 sh_print_operand (FILE *stream, rtx x, int code)
1234 int regno;
1235 machine_mode mode;
1237 switch (code)
1239 tree trapa_attr;
1241 case '.':
1242 if (final_sequence
1243 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1244 && get_attr_length (final_sequence->insn (1)))
1245 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1246 break;
1247 case ',':
1248 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1249 break;
1250 case '@':
1251 trapa_attr = lookup_attribute ("trap_exit",
1252 DECL_ATTRIBUTES (current_function_decl));
1253 if (trapa_attr)
1254 fprintf (stream, "trapa #%ld",
1255 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1256 else if (sh_cfun_interrupt_handler_p ())
1258 if (sh_cfun_resbank_handler_p ())
1259 fprintf (stream, "resbank\n");
1260 fprintf (stream, "rte");
1262 else
1263 fprintf (stream, "rts");
1264 break;
1265 case '#':
1266 /* Output a nop if there's nothing in the delay slot. */
1267 if (dbr_sequence_length () == 0)
1268 fprintf (stream, "\n\tnop");
1269 break;
1270 case '\'':
1272 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1274 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1275 fputs ("/u", stream);
1276 break;
1278 case '>':
1279 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1281 fputs ("\t! target: ", stream);
1282 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1284 break;
1285 case 'O':
1286 x = mark_constant_pool_use (x);
1287 output_addr_const (stream, x);
1288 break;
1289 /* N.B.: %R / %S / %T adjust memory addresses by four.
1290 For SHMEDIA, that means they can be used to access the first and
1291 second 32 bit part of a 64 bit (or larger) value that
1292 might be held in floating point registers or memory.
1293 While they can be used to access 64 bit parts of a larger value
1294 held in general purpose registers, that won't work with memory -
1295 neither for fp registers, since the frxx names are used. */
1296 case 'R':
1297 if (REG_P (x) || GET_CODE (x) == SUBREG)
1299 regno = true_regnum (x);
1300 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1301 fputs (reg_names[regno], (stream));
1303 else if (MEM_P (x))
1305 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1306 sh_print_operand_address (stream, XEXP (x, 0));
1308 else
1310 rtx sub = NULL_RTX;
1312 mode = GET_MODE (x);
1313 if (mode == VOIDmode)
1314 mode = DImode;
1315 if (GET_MODE_SIZE (mode) >= 8)
1316 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1317 if (sub)
1318 sh_print_operand (stream, sub, 0);
1319 else
1320 output_operand_lossage ("invalid operand to %%R");
1322 break;
1323 case 'S':
1324 if (REG_P (x) || GET_CODE (x) == SUBREG)
1326 regno = true_regnum (x);
1327 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1328 fputs (reg_names[regno], (stream));
1330 else if (MEM_P (x))
1332 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1333 sh_print_operand_address (stream, XEXP (x, 0));
1335 else
1337 rtx sub = NULL_RTX;
1339 mode = GET_MODE (x);
1340 if (mode == VOIDmode)
1341 mode = DImode;
1342 if (GET_MODE_SIZE (mode) >= 8)
1343 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1344 if (sub)
1345 sh_print_operand (stream, sub, 0);
1346 else
1347 output_operand_lossage ("invalid operand to %%S");
1349 break;
1350 case 'T':
1351 /* Next word of a double. */
1352 switch (GET_CODE (x))
1354 case REG:
1355 fputs (reg_names[REGNO (x) + 1], (stream));
1356 break;
1357 case MEM:
1358 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1359 && GET_CODE (XEXP (x, 0)) != POST_INC)
1360 x = adjust_address (x, SImode, 4);
1361 sh_print_operand_address (stream, XEXP (x, 0));
1362 break;
1363 default:
1364 break;
1366 break;
1368 case 't':
1369 gcc_assert (MEM_P (x));
1370 x = XEXP (x, 0);
1371 switch (GET_CODE (x))
1373 case REG:
1374 case SUBREG:
1375 sh_print_operand (stream, x, 0);
1376 break;
1377 default:
1378 break;
1380 break;
1382 case 'o':
1383 switch (GET_CODE (x))
1385 case PLUS: fputs ("add", stream); break;
1386 case MINUS: fputs ("sub", stream); break;
1387 case MULT: fputs ("mul", stream); break;
1388 case DIV: fputs ("div", stream); break;
1389 case EQ: fputs ("eq", stream); break;
1390 case NE: fputs ("ne", stream); break;
1391 case GT: case LT: fputs ("gt", stream); break;
1392 case GE: case LE: fputs ("ge", stream); break;
1393 case GTU: case LTU: fputs ("gtu", stream); break;
1394 case GEU: case LEU: fputs ("geu", stream); break;
1395 default:
1396 break;
1398 break;
1399 case 'M':
1400 if (TARGET_SHMEDIA)
1402 if (MEM_P (x)
1403 && GET_CODE (XEXP (x, 0)) == PLUS
1404 && (REG_P (XEXP (XEXP (x, 0), 1))
1405 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1406 fputc ('x', stream);
1408 else
1410 if (MEM_P (x))
1412 switch (GET_MODE (x))
1414 case QImode: fputs (".b", stream); break;
1415 case HImode: fputs (".w", stream); break;
1416 case SImode: fputs (".l", stream); break;
1417 case SFmode: fputs (".s", stream); break;
1418 case DFmode: fputs (".d", stream); break;
1419 default: gcc_unreachable ();
1423 break;
1425 case 'm':
1426 gcc_assert (MEM_P (x));
1427 x = XEXP (x, 0);
1428 /* Fall through. */
1429 case 'U':
1430 switch (GET_CODE (x))
1432 case REG:
1433 case SUBREG:
1434 sh_print_operand (stream, x, 0);
1435 fputs (", 0", stream);
1436 break;
1438 case PLUS:
1439 sh_print_operand (stream, XEXP (x, 0), 0);
1440 fputs (", ", stream);
1441 sh_print_operand (stream, XEXP (x, 1), 0);
1442 break;
1444 default:
1445 gcc_unreachable ();
1447 break;
1449 case 'V':
1451 int num = exact_log2 (INTVAL (x));
1452 gcc_assert (num >= 0);
1453 fprintf (stream, "#%d", num);
1455 break;
1457 case 'W':
1459 int num = exact_log2 (~INTVAL (x));
1460 gcc_assert (num >= 0);
1461 fprintf (stream, "#%d", num);
1463 break;
1465 case 'd':
1466 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1468 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1469 break;
1471 case 'N':
1472 if (x == CONST0_RTX (GET_MODE (x)))
1474 fprintf ((stream), "r63");
1475 break;
1477 goto default_output;
1478 case 'u':
1479 if (CONST_INT_P (x))
1481 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1482 break;
1484 /* Fall through. */
1486 default_output:
1487 default:
1488 regno = 0;
1489 mode = GET_MODE (x);
1491 switch (GET_CODE (x))
1493 case TRUNCATE:
1495 rtx inner = XEXP (x, 0);
1496 int offset = 0;
1497 machine_mode inner_mode;
1499 /* We might see SUBREGs with vector mode registers inside. */
1500 if (GET_CODE (inner) == SUBREG
1501 && (GET_MODE_SIZE (GET_MODE (inner))
1502 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1503 && subreg_lowpart_p (inner))
1504 inner = SUBREG_REG (inner);
1505 if (CONST_INT_P (inner))
1507 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1508 goto default_output;
1510 inner_mode = GET_MODE (inner);
1511 if (GET_CODE (inner) == SUBREG
1512 && (GET_MODE_SIZE (GET_MODE (inner))
1513 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1514 && REG_P (SUBREG_REG (inner)))
1516 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1517 GET_MODE (SUBREG_REG (inner)),
1518 SUBREG_BYTE (inner),
1519 GET_MODE (inner));
1520 inner = SUBREG_REG (inner);
1522 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1523 abort ();
1524 /* Floating point register pairs are always big endian;
1525 general purpose registers are 64 bit wide. */
1526 regno = REGNO (inner);
1527 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1528 - HARD_REGNO_NREGS (regno, mode))
1529 + offset;
1530 x = inner;
1531 goto reg;
1533 case SIGN_EXTEND:
1534 x = XEXP (x, 0);
1535 goto reg;
1536 /* FIXME: We need this on SHmedia32 because reload generates
1537 some sign-extended HI or QI loads into DImode registers
1538 but, because Pmode is SImode, the address ends up with a
1539 subreg:SI of the DImode register. Maybe reload should be
1540 fixed so as to apply alter_subreg to such loads? */
1541 case IF_THEN_ELSE:
1542 gcc_assert (trapping_target_operand (x, VOIDmode));
1543 x = XEXP (XEXP (x, 2), 0);
1544 goto default_output;
1545 case SUBREG:
1546 gcc_assert (SUBREG_BYTE (x) == 0
1547 && REG_P (SUBREG_REG (x)));
1549 x = SUBREG_REG (x);
1550 /* Fall through. */
1552 reg:
1553 case REG:
1554 regno += REGNO (x);
1555 if (FP_REGISTER_P (regno)
1556 && mode == V16SFmode)
1557 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1558 else if (FP_REGISTER_P (REGNO (x))
1559 && mode == V4SFmode)
1560 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1561 else if (REG_P (x)
1562 && mode == V2SFmode)
1563 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1564 else if (FP_REGISTER_P (REGNO (x))
1565 && GET_MODE_SIZE (mode) > 4)
1566 fprintf ((stream), "d%s", reg_names[regno] + 1);
1567 else
1568 fputs (reg_names[regno], (stream));
1569 break;
1571 case MEM:
1572 output_address (XEXP (x, 0));
1573 break;
1575 default:
1576 if (TARGET_SH1)
1577 fputc ('#', stream);
1578 output_addr_const (stream, x);
1579 break;
1581 break;
1585 static bool
1586 sh_print_operand_punct_valid_p (unsigned char code)
1588 return (code == '.' || code == '#' || code == '@' || code == ','
1589 || code == '$' || code == '\'' || code == '>');
1592 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1593 static bool
1594 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1596 if (GET_CODE (x) == UNSPEC)
1598 switch (XINT (x, 1))
1600 case UNSPEC_DATALABEL:
1601 fputs ("datalabel ", file);
1602 output_addr_const (file, XVECEXP (x, 0, 0));
1603 break;
1604 case UNSPEC_PIC:
1605 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1606 output_addr_const (file, XVECEXP (x, 0, 0));
1607 break;
1608 case UNSPEC_GOT:
1609 output_addr_const (file, XVECEXP (x, 0, 0));
1610 fputs ("@GOT", file);
1611 break;
1612 case UNSPEC_GOTOFF:
1613 output_addr_const (file, XVECEXP (x, 0, 0));
1614 fputs ("@GOTOFF", file);
1615 break;
1616 case UNSPEC_PLT:
1617 output_addr_const (file, XVECEXP (x, 0, 0));
1618 fputs ("@PLT", file);
1619 break;
1620 case UNSPEC_GOTPLT:
1621 output_addr_const (file, XVECEXP (x, 0, 0));
1622 fputs ("@GOTPLT", file);
1623 break;
1624 case UNSPEC_DTPOFF:
1625 output_addr_const (file, XVECEXP (x, 0, 0));
1626 fputs ("@DTPOFF", file);
1627 break;
1628 case UNSPEC_GOTTPOFF:
1629 output_addr_const (file, XVECEXP (x, 0, 0));
1630 fputs ("@GOTTPOFF", file);
1631 break;
1632 case UNSPEC_TPOFF:
1633 output_addr_const (file, XVECEXP (x, 0, 0));
1634 fputs ("@TPOFF", file);
1635 break;
1636 case UNSPEC_CALLER:
1638 char name[32];
1639 /* LPCS stands for Label for PIC Call Site. */
1640 targetm.asm_out.generate_internal_label (name, "LPCS",
1641 INTVAL (XVECEXP (x, 0, 0)));
1642 assemble_name (file, name);
1644 break;
1645 case UNSPEC_EXTRACT_S16:
1646 case UNSPEC_EXTRACT_U16:
1648 rtx val, shift;
1650 val = XVECEXP (x, 0, 0);
1651 shift = XVECEXP (x, 0, 1);
1652 fputc ('(', file);
1653 if (shift != const0_rtx)
1654 fputc ('(', file);
1655 if (GET_CODE (val) == CONST
1656 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1658 fputc ('(', file);
1659 output_addr_const (file, val);
1660 fputc (')', file);
1662 else
1663 output_addr_const (file, val);
1664 if (shift != const0_rtx)
1666 fputs (" >> ", file);
1667 output_addr_const (file, shift);
1668 fputc (')', file);
1670 fputs (" & 65535)", file);
1672 break;
1673 case UNSPEC_SYMOFF:
1674 output_addr_const (file, XVECEXP (x, 0, 0));
1675 fputc ('-', file);
1676 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1678 fputc ('(', file);
1679 output_addr_const (file, XVECEXP (x, 0, 1));
1680 fputc (')', file);
1682 else
1683 output_addr_const (file, XVECEXP (x, 0, 1));
1684 break;
1685 case UNSPEC_PCREL_SYMOFF:
1686 output_addr_const (file, XVECEXP (x, 0, 0));
1687 fputs ("-(", file);
1688 output_addr_const (file, XVECEXP (x, 0, 1));
1689 fputs ("-.)", file);
1690 break;
1691 default:
1692 return false;
1694 return true;
1696 else
1697 return false;
1700 /* Encode symbol attributes of a SYMBOL_REF into its
1701 SYMBOL_REF_FLAGS. */
1702 static void
1703 sh_encode_section_info (tree decl, rtx rtl, int first)
1705 default_encode_section_info (decl, rtl, first);
1707 if (TREE_CODE (decl) == FUNCTION_DECL
1708 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1709 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1712 /* Prepare operands for a move define_expand; specifically, one of the
1713 operands must be in a register. */
1714 void
1715 prepare_move_operands (rtx operands[], machine_mode mode)
1717 if ((mode == SImode || mode == DImode)
1718 && flag_pic
1719 && ! ((mode == Pmode || mode == ptr_mode)
1720 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1722 rtx temp;
1723 if (SYMBOLIC_CONST_P (operands[1]))
1725 if (MEM_P (operands[0]))
1726 operands[1] = force_reg (Pmode, operands[1]);
1727 else if (TARGET_SHMEDIA
1728 && GET_CODE (operands[1]) == LABEL_REF
1729 && target_reg_operand (operands[0], mode))
1730 /* It's ok. */;
1731 else
1733 temp = (!can_create_pseudo_p ()
1734 ? operands[0]
1735 : gen_reg_rtx (Pmode));
1736 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1739 else if (GET_CODE (operands[1]) == CONST
1740 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1741 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1743 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1744 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1745 mode, temp);
1746 operands[1] = expand_binop (mode, add_optab, temp,
1747 XEXP (XEXP (operands[1], 0), 1),
1748 (!can_create_pseudo_p ()
1749 ? temp
1750 : gen_reg_rtx (Pmode)),
1751 0, OPTAB_LIB_WIDEN);
1755 if (! reload_in_progress && ! reload_completed)
1757 /* Copy the source to a register if both operands aren't registers. */
1758 if (! register_operand (operands[0], mode)
1759 && ! sh_register_operand (operands[1], mode))
1760 operands[1] = copy_to_mode_reg (mode, operands[1]);
1762 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1764 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1765 except that we can't use that function because it is static. */
1766 rtx new_rtx = change_address (operands[0], mode, 0);
1767 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1768 operands[0] = new_rtx;
1771 /* This case can happen while generating code to move the result
1772 of a library call to the target. Reject `st r0,@(rX,rY)' because
1773 reload will fail to find a spill register for rX, since r0 is already
1774 being used for the source. */
1775 else if (TARGET_SH1
1776 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1777 && MEM_P (operands[0])
1778 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1779 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1780 operands[1] = copy_to_mode_reg (mode, operands[1]);
1783 if (mode == Pmode || mode == ptr_mode)
1785 rtx op0, op1, opc;
1786 enum tls_model tls_kind;
1788 op0 = operands[0];
1789 op1 = operands[1];
1790 if (GET_CODE (op1) == CONST
1791 && GET_CODE (XEXP (op1, 0)) == PLUS
1792 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1793 != TLS_MODEL_NONE))
1795 opc = XEXP (XEXP (op1, 0), 1);
1796 op1 = XEXP (XEXP (op1, 0), 0);
1798 else
1799 opc = NULL_RTX;
1801 if (! reload_in_progress && ! reload_completed
1802 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1804 rtx tga_op1, tga_ret, tmp, tmp2;
1806 if (! flag_pic
1807 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1808 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1809 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1811 /* Don't schedule insns for getting GOT address when
1812 the first scheduling is enabled, to avoid spill
1813 failures for R0. */
1814 if (flag_schedule_insns)
1815 emit_insn (gen_blockage ());
1816 emit_insn (gen_GOTaddr2picreg ());
1817 emit_use (gen_rtx_REG (SImode, PIC_REG));
1818 if (flag_schedule_insns)
1819 emit_insn (gen_blockage ());
1822 switch (tls_kind)
1824 case TLS_MODEL_GLOBAL_DYNAMIC:
1825 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1826 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1827 tmp = gen_reg_rtx (Pmode);
1828 emit_move_insn (tmp, tga_ret);
1829 op1 = tmp;
1830 break;
1832 case TLS_MODEL_LOCAL_DYNAMIC:
1833 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1834 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1836 tmp = gen_reg_rtx (Pmode);
1837 emit_move_insn (tmp, tga_ret);
1839 if (register_operand (op0, Pmode))
1840 tmp2 = op0;
1841 else
1842 tmp2 = gen_reg_rtx (Pmode);
1844 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1845 op1 = tmp2;
1846 break;
1848 case TLS_MODEL_INITIAL_EXEC:
1849 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1850 tmp = gen_sym2GOTTPOFF (op1);
1851 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1852 op1 = tga_op1;
1853 break;
1855 case TLS_MODEL_LOCAL_EXEC:
1856 tmp2 = gen_reg_rtx (Pmode);
1857 emit_insn (gen_store_gbr (tmp2));
1858 tmp = gen_reg_rtx (Pmode);
1859 emit_insn (gen_symTPOFF2reg (tmp, op1));
1861 if (register_operand (op0, Pmode))
1862 op1 = op0;
1863 else
1864 op1 = gen_reg_rtx (Pmode);
1866 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1867 break;
1869 default:
1870 gcc_unreachable ();
1872 if (opc)
1873 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1874 operands[1] = op1;
1879 /* Implement the canonicalize_comparison target hook for the combine
1880 pass. For the target hook this function is invoked via
1881 sh_canonicalize_comparison. This function is also re-used to
1882 canonicalize comparisons in cbranch pattern expanders. */
1883 static void
1884 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1885 machine_mode mode,
1886 bool op0_preserve_value)
1888 /* When invoked from within the combine pass the mode is not specified,
1889 so try to get it from one of the operands. */
1890 if (mode == VOIDmode)
1891 mode = GET_MODE (op0);
1892 if (mode == VOIDmode)
1893 mode = GET_MODE (op1);
1895 // We need to have a mode to do something useful here.
1896 if (mode == VOIDmode)
1897 return;
1899 // Currently, we don't deal with floats here.
1900 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1901 return;
1903 // Make sure that the constant operand is the second operand.
1904 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1906 if (op0_preserve_value)
1907 return;
1909 std::swap (op0, op1);
1910 cmp = swap_condition (cmp);
1913 if (CONST_INT_P (op1))
1915 /* Try to adjust the constant operand in such a way that available
1916 comparison insns can be utilized better and the constant can be
1917 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1918 constant pool. */
1919 const HOST_WIDE_INT val = INTVAL (op1);
1921 /* x > -1 --> x >= 0
1922 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1923 x <= -1 --> x < 0
1924 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1925 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1927 cmp = cmp == GT ? GE : LT;
1928 op1 = gen_int_mode (val + 1, mode);
1931 /* x >= 1 --> x > 0
1932 x >= 0x80 --> x > 0x7F
1933 x < 1 --> x <= 0
1934 x < 0x80 --> x <= 0x7F */
1935 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1937 cmp = cmp == GE ? GT : LE;
1938 op1 = gen_int_mode (val - 1, mode);
1941 /* unsigned x >= 1 --> x != 0
1942 unsigned x < 1 --> x == 0 */
1943 else if (val == 1 && (cmp == GEU || cmp == LTU))
1945 cmp = cmp == GEU ? NE : EQ;
1946 op1 = CONST0_RTX (mode);
1949 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1950 unsigned x < 0x80 --> unsigned x < 0x7F */
1951 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1953 cmp = cmp == GEU ? GTU : LEU;
1954 op1 = gen_int_mode (val - 1, mode);
1957 /* unsigned x > 0 --> x != 0
1958 unsigned x <= 0 --> x == 0 */
1959 else if (val == 0 && (cmp == GTU || cmp == LEU))
1960 cmp = cmp == GTU ? NE : EQ;
1962 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1963 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1964 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1965 && val == 0x7FFFFFFF)
1967 cmp = cmp == GTU ? LT : GE;
1968 op1 = const0_rtx;
1971 /* unsigned x >= 0x80000000 --> signed x < 0
1972 unsigned x < 0x80000000 --> signed x >= 0 */
1973 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1974 && (unsigned HOST_WIDE_INT)val
1975 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1977 cmp = cmp == GEU ? LT : GE;
1978 op1 = const0_rtx;
1983 /* This function implements the canonicalize_comparison target hook.
1984 This wrapper around the internally used sh_canonicalize_comparison
1985 function is needed to do the enum rtx_code <-> int conversion.
1986 Target hooks cannot use enum rtx_code in its definition. */
1987 static void
1988 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1989 bool op0_preserve_value)
1991 enum rtx_code tmp_code = (enum rtx_code)*code;
1992 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1993 VOIDmode, op0_preserve_value);
1994 *code = (int)tmp_code;
1997 bool
1998 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
2000 *p1 = T_REG;
2001 *p2 = INVALID_REGNUM;
2002 return true;
2005 enum rtx_code
2006 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2007 enum rtx_code comparison)
2009 /* The scratch reg is only available when this is invoked from within
2010 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2011 rtx scratch = NULL_RTX;
2013 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2014 comparison = GET_CODE (operands[0]);
2015 else
2016 scratch = operands[4];
2018 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2019 mode, false);
2021 /* Notice that this function is also invoked after reload by
2022 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2023 rtx op1 = operands[1];
2025 if (can_create_pseudo_p ())
2026 operands[1] = force_reg (mode, op1);
2027 /* When we are handling DImode comparisons, we want to keep constants so
2028 that we can optimize the component comparisons; however, memory loads
2029 are better issued as a whole so that they can be scheduled well.
2030 SImode equality comparisons allow I08 constants, but only when they
2031 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2032 into a register, that register might as well be r0, and we allow the
2033 constant. If it is already in a register, this is likely to be
2034 allocated to a different hard register, thus we load the constant into
2035 a register unless it is zero. */
2036 if (!REG_P (operands[2])
2037 && (!CONST_INT_P (operands[2])
2038 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2039 && ((comparison != EQ && comparison != NE)
2040 || (REG_P (op1) && REGNO (op1) != R0_REG)
2041 || !satisfies_constraint_I08 (operands[2])))))
2043 if (scratch && GET_MODE (scratch) == mode)
2045 emit_move_insn (scratch, operands[2]);
2046 operands[2] = scratch;
2048 else if (can_create_pseudo_p ())
2049 operands[2] = force_reg (mode, operands[2]);
2051 return comparison;
2054 void
2055 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2057 rtx (*branch_expander) (rtx) = gen_branch_true;
2058 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2059 switch (comparison)
2061 case NE: case LT: case LE: case LTU: case LEU:
2062 comparison = reverse_condition (comparison);
2063 branch_expander = gen_branch_false;
2064 default: ;
2066 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
2067 gen_rtx_fmt_ee (comparison, SImode,
2068 operands[1], operands[2])));
2069 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2070 if (probability >= 0)
2071 add_int_reg_note (jump, REG_BR_PROB, probability);
2074 /* ??? How should we distribute probabilities when more than one branch
2075 is generated. So far we only have some ad-hoc observations:
2076 - If the operands are random, they are likely to differ in both parts.
2077 - If comparing items in a hash chain, the operands are random or equal;
2078 operation should be EQ or NE.
2079 - If items are searched in an ordered tree from the root, we can expect
2080 the highpart to be unequal about half of the time; operation should be
2081 an inequality comparison, operands non-constant, and overall probability
2082 about 50%. Likewise for quicksort.
2083 - Range checks will be often made against constants. Even if we assume for
2084 simplicity an even distribution of the non-constant operand over a
2085 sub-range here, the same probability could be generated with differently
2086 wide sub-ranges - as long as the ratio of the part of the subrange that
2087 is before the threshold to the part that comes after the threshold stays
2088 the same. Thus, we can't really tell anything here;
2089 assuming random distribution is at least simple.
2091 bool
2092 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2094 enum rtx_code msw_taken, msw_skip, lsw_taken;
2095 rtx_code_label *skip_label = NULL;
2096 rtx op1h, op1l, op2h, op2l;
2097 int num_branches;
2098 int prob, rev_prob;
2099 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2100 rtx scratch = operands[4];
2102 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2103 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2104 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2105 op1l = gen_lowpart (SImode, operands[1]);
2106 op2l = gen_lowpart (SImode, operands[2]);
2107 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2108 prob = split_branch_probability;
2109 rev_prob = REG_BR_PROB_BASE - prob;
2110 switch (comparison)
2112 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2113 That costs 1 cycle more when the first branch can be predicted taken,
2114 but saves us mispredicts because only one branch needs prediction.
2115 It also enables generating the cmpeqdi_t-1 pattern. */
2116 case EQ:
2117 if (TARGET_CMPEQDI_T)
2119 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2120 emit_jump_insn (gen_branch_true (operands[3]));
2121 return true;
2123 msw_skip = NE;
2124 lsw_taken = EQ;
2125 if (prob >= 0)
2127 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2128 msw_skip_prob = rev_prob;
2129 if (REG_BR_PROB_BASE <= 65535)
2130 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2131 else
2133 lsw_taken_prob
2134 = (prob
2135 ? (REG_BR_PROB_BASE
2136 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2137 / ((gcov_type) prob << 32)))
2138 : 0);
2141 break;
2142 case NE:
2143 if (TARGET_CMPEQDI_T)
2145 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2146 emit_jump_insn (gen_branch_false (operands[3]));
2147 return true;
2149 msw_taken = NE;
2150 msw_taken_prob = prob;
2151 lsw_taken = NE;
2152 lsw_taken_prob = 0;
2153 break;
2154 case GTU: case GT:
2155 msw_taken = comparison;
2156 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2157 break;
2158 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2159 msw_skip = swap_condition (msw_taken);
2160 lsw_taken = GTU;
2161 break;
2162 case GEU: case GE:
2163 if (op2l == CONST0_RTX (SImode))
2164 msw_taken = comparison;
2165 else
2167 msw_taken = comparison == GE ? GT : GTU;
2168 msw_skip = swap_condition (msw_taken);
2169 lsw_taken = GEU;
2171 break;
2172 case LTU: case LT:
2173 msw_taken = comparison;
2174 if (op2l == CONST0_RTX (SImode))
2175 break;
2176 msw_skip = swap_condition (msw_taken);
2177 lsw_taken = LTU;
2178 break;
2179 case LEU: case LE:
2180 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2181 msw_taken = comparison;
2182 else
2184 lsw_taken = LEU;
2185 if (comparison == LE)
2186 msw_taken = LT;
2187 else if (op2h != CONST0_RTX (SImode))
2188 msw_taken = LTU;
2189 else
2191 msw_skip = swap_condition (LTU);
2192 break;
2194 msw_skip = swap_condition (msw_taken);
2196 break;
2197 default: return false;
2199 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2200 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2201 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2202 if (comparison != EQ && comparison != NE && num_branches > 1)
2204 if (!CONSTANT_P (operands[2])
2205 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2206 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2208 msw_taken_prob = prob / 2U;
2209 msw_skip_prob
2210 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2211 lsw_taken_prob = prob;
2213 else
2215 msw_taken_prob = prob;
2216 msw_skip_prob = REG_BR_PROB_BASE;
2217 /* ??? If we have a constant op2h, should we use that when
2218 calculating lsw_taken_prob? */
2219 lsw_taken_prob = prob;
2222 operands[1] = op1h;
2223 operands[2] = op2h;
2224 operands[4] = NULL_RTX;
2225 if (reload_completed
2226 && ! arith_reg_or_0_operand (op2h, SImode)
2227 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2228 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2229 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2231 emit_move_insn (scratch, operands[2]);
2232 operands[2] = scratch;
2234 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2235 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2236 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2238 rtx taken_label = operands[3];
2240 /* Operands were possibly modified, but msw_skip doesn't expect this.
2241 Always use the original ones. */
2242 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2244 operands[1] = op1h;
2245 operands[2] = op2h;
2246 if (reload_completed
2247 && ! arith_reg_or_0_operand (op2h, SImode)
2248 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2250 emit_move_insn (scratch, operands[2]);
2251 operands[2] = scratch;
2255 operands[3] = skip_label = gen_label_rtx ();
2256 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2257 operands[3] = taken_label;
2259 operands[1] = op1l;
2260 operands[2] = op2l;
2261 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2263 if (reload_completed
2264 && ! arith_reg_or_0_operand (op2l, SImode)
2265 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2267 emit_move_insn (scratch, operands[2]);
2268 operands[2] = scratch;
2270 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2272 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2273 emit_label (skip_label);
2274 return true;
2277 /* Given an operand, return 1 if the evaluated operand plugged into an
2278 if_then_else will result in a branch_true, 0 if branch_false, or
2279 -1 if neither nor applies. The truth table goes like this:
2281 op | cmpval | code | result
2282 ---------+--------+---------+--------------------
2283 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2284 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2285 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2286 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2287 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2288 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2289 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2290 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2292 sh_eval_treg_value (rtx op)
2294 if (t_reg_operand (op, GET_MODE (op)))
2295 return 1;
2296 if (negt_reg_operand (op, GET_MODE (op)))
2297 return 0;
2299 rtx_code code = GET_CODE (op);
2300 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2301 return -1;
2303 int cmpop = code == EQ ? 1 : 0;
2304 int cmpval = INTVAL (XEXP (op, 1));
2305 if (cmpval != 0 && cmpval != 1)
2306 return -1;
2308 int t;
2309 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2310 t = 0;
2311 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2312 t = 1;
2313 else
2314 return -1;
2316 return t ^ (cmpval == cmpop);
2319 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2320 of floating-point comparisons. */
2321 static void
2322 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2324 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2325 && GET_CODE (insn) != PARALLEL)
2327 insn = gen_rtx_PARALLEL (VOIDmode,
2328 gen_rtvec (3, insn,
2329 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2330 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2332 emit_insn (insn);
2335 /* Prepare the operands for an scc instruction; make sure that the
2336 compare has been done and the result is in T_REG. */
2337 void
2338 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2340 rtx t_reg = get_t_reg_rtx ();
2341 enum rtx_code oldcode = code;
2342 machine_mode mode;
2344 /* First need a compare insn. */
2345 switch (code)
2347 case NE:
2348 /* It isn't possible to handle this case. */
2349 gcc_unreachable ();
2350 case LT:
2351 code = GT;
2352 break;
2353 case LE:
2354 code = GE;
2355 break;
2356 case LTU:
2357 code = GTU;
2358 break;
2359 case LEU:
2360 code = GEU;
2361 break;
2362 default:
2363 break;
2365 if (code != oldcode)
2366 std::swap (op0, op1);
2368 mode = GET_MODE (op0);
2369 if (mode == VOIDmode)
2370 mode = GET_MODE (op1);
2372 op0 = force_reg (mode, op0);
2373 if ((code != EQ && code != NE
2374 && (op1 != const0_rtx
2375 || code == GTU || code == GEU || code == LTU || code == LEU))
2376 || (mode == DImode && op1 != const0_rtx)
2377 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2378 op1 = force_reg (mode, op1);
2380 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2381 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2382 mode);
2386 sh_emit_cheap_store_flag (machine_mode mode, enum rtx_code code,
2387 rtx op0, rtx op1)
2389 rtx target = gen_reg_rtx (SImode);
2390 rtx tmp;
2392 gcc_assert (TARGET_SHMEDIA);
2393 switch (code)
2395 case EQ:
2396 case GT:
2397 case LT:
2398 case UNORDERED:
2399 case GTU:
2400 case LTU:
2401 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2402 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2403 code = NE;
2404 break;
2406 case NE:
2407 case GE:
2408 case LE:
2409 case ORDERED:
2410 case GEU:
2411 case LEU:
2412 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2413 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2414 code = EQ;
2415 break;
2417 case UNEQ:
2418 case UNGE:
2419 case UNGT:
2420 case UNLE:
2421 case UNLT:
2422 case LTGT:
2423 return NULL_RTX;
2425 default:
2426 gcc_unreachable ();
2429 if (mode == DImode)
2431 rtx t2 = gen_reg_rtx (DImode);
2432 emit_insn (gen_extendsidi2 (t2, target));
2433 target = t2;
2436 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2439 /* Called from the md file, set up the operands of a compare instruction. */
2440 void
2441 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2443 enum rtx_code code = GET_CODE (operands[0]);
2444 enum rtx_code branch_code;
2445 rtx op0 = operands[1];
2446 rtx op1 = operands[2];
2447 rtx insn;
2448 bool need_ccmpeq = false;
2450 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2452 op0 = force_reg (mode, op0);
2453 op1 = force_reg (mode, op1);
2455 else
2457 if (code != EQ || mode == DImode)
2459 /* Force args into regs, since we can't use constants here. */
2460 op0 = force_reg (mode, op0);
2461 if (op1 != const0_rtx || code == GTU || code == GEU)
2462 op1 = force_reg (mode, op1);
2466 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2468 if (code == LT
2469 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2470 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2472 std::swap (op0, op1);
2473 code = swap_condition (code);
2476 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2477 if (code == GE)
2479 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2480 need_ccmpeq = true;
2481 code = GT;
2484 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2485 to EQ/GT respectively. */
2486 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2489 switch (code)
2491 case EQ:
2492 case GT:
2493 case GE:
2494 case GTU:
2495 case GEU:
2496 branch_code = code;
2497 break;
2498 case NE:
2499 case LT:
2500 case LE:
2501 case LTU:
2502 case LEU:
2503 branch_code = reverse_condition (code);
2504 break;
2505 default:
2506 gcc_unreachable ();
2509 insn = gen_rtx_SET (VOIDmode,
2510 get_t_reg_rtx (),
2511 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2513 sh_emit_set_t_insn (insn, mode);
2514 if (need_ccmpeq)
2515 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2517 if (branch_code == code)
2518 emit_jump_insn (gen_branch_true (operands[3]));
2519 else
2520 emit_jump_insn (gen_branch_false (operands[3]));
2523 void
2524 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2526 enum rtx_code code = GET_CODE (operands[1]);
2527 rtx op0 = operands[2];
2528 rtx op1 = operands[3];
2529 rtx_code_label *lab = NULL;
2530 bool invert = false;
2532 op0 = force_reg (mode, op0);
2533 if ((code != EQ && code != NE
2534 && (op1 != const0_rtx
2535 || code == GTU || code == GEU || code == LTU || code == LEU))
2536 || (mode == DImode && op1 != const0_rtx)
2537 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2538 op1 = force_reg (mode, op1);
2540 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2542 if (code == LT || code == LE)
2544 std::swap (op0, op1);
2545 code = swap_condition (code);
2547 if (code == GE)
2549 if (TARGET_IEEE)
2551 lab = gen_label_rtx ();
2552 sh_emit_scc_to_t (EQ, op0, op1);
2553 emit_jump_insn (gen_branch_true (lab));
2554 code = GT;
2556 else
2558 code = LT;
2559 invert = true;
2564 if (code == NE)
2566 code = EQ;
2567 invert = true;
2570 sh_emit_scc_to_t (code, op0, op1);
2571 if (lab)
2572 emit_label (lab);
2573 if (invert)
2574 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2575 else
2576 emit_move_insn (operands[0], get_t_reg_rtx ());
2579 /* Functions to output assembly code. */
2581 /* Return a sequence of instructions to perform DI or DF move.
2583 Since the SH cannot move a DI or DF in one instruction, we have
2584 to take care when we see overlapping source and dest registers. */
2585 const char *
2586 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2587 machine_mode mode)
2589 rtx dst = operands[0];
2590 rtx src = operands[1];
2592 if (MEM_P (dst)
2593 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2594 return "mov.l %T1,%0" "\n"
2595 " mov.l %1,%0";
2597 if (register_operand (dst, mode)
2598 && register_operand (src, mode))
2600 if (REGNO (src) == MACH_REG)
2601 return "sts mach,%S0" "\n"
2602 " sts macl,%R0";
2604 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2605 when mov.d r1,r0 do r1->r0 then r2->r1. */
2606 if (REGNO (src) + 1 == REGNO (dst))
2607 return "mov %T1,%T0" "\n"
2608 " mov %1,%0";
2609 else
2610 return "mov %1,%0" "\n"
2611 " mov %T1,%T0";
2613 else if (CONST_INT_P (src))
2615 if (INTVAL (src) < 0)
2616 output_asm_insn ("mov #-1,%S0", operands);
2617 else
2618 output_asm_insn ("mov #0,%S0", operands);
2620 return "mov %1,%R0";
2622 else if (MEM_P (src))
2624 int ptrreg = -1;
2625 int dreg = REGNO (dst);
2626 rtx inside = XEXP (src, 0);
2628 switch (GET_CODE (inside))
2630 case REG:
2631 ptrreg = REGNO (inside);
2632 break;
2634 case SUBREG:
2635 ptrreg = subreg_regno (inside);
2636 break;
2638 case PLUS:
2639 ptrreg = REGNO (XEXP (inside, 0));
2640 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2641 an offsettable address. Unfortunately, offsettable addresses use
2642 QImode to check the offset, and a QImode offsettable address
2643 requires r0 for the other operand, which is not currently
2644 supported, so we can't use the 'o' constraint.
2645 Thus we must check for and handle r0+REG addresses here.
2646 We punt for now, since this is likely very rare. */
2647 gcc_assert (!REG_P (XEXP (inside, 1)));
2648 break;
2650 case LABEL_REF:
2651 return "mov.l %1,%0" "\n"
2652 " mov.l %1+4,%T0";
2653 case POST_INC:
2654 return "mov.l %1,%0" "\n"
2655 " mov.l %1,%T0";
2656 default:
2657 gcc_unreachable ();
2660 /* Work out the safe way to copy. Copy into the second half first. */
2661 if (dreg == ptrreg)
2662 return "mov.l %T1,%T0" "\n"
2663 " mov.l %1,%0";
2666 return "mov.l %1,%0" "\n"
2667 " mov.l %T1,%T0";
2670 /* Print an instruction which would have gone into a delay slot after
2671 another instruction, but couldn't because the other instruction expanded
2672 into a sequence where putting the slot insn at the end wouldn't work. */
2673 static void
2674 print_slot (rtx_sequence *seq)
2676 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2678 seq->insn (1)->set_deleted ();
2681 const char *
2682 output_far_jump (rtx_insn *insn, rtx op)
2684 struct { rtx lab, reg, op; } this_jmp;
2685 rtx_code_label *braf_base_lab = NULL;
2686 const char *jump;
2687 int far;
2688 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2689 rtx_insn *prev;
2691 this_jmp.lab = gen_label_rtx ();
2693 if (TARGET_SH2
2694 && offset >= -32764
2695 && offset - get_attr_length (insn) <= 32766)
2697 far = 0;
2698 jump = "mov.w %O0,%1" "\n"
2699 " braf %1";
2701 else
2703 far = 1;
2704 if (flag_pic)
2706 if (TARGET_SH2)
2707 jump = "mov.l %O0,%1" "\n"
2708 " braf %1";
2709 else
2710 jump = "mov.l r0,@-r15" "\n"
2711 " mova %O0,r0" "\n"
2712 " mov.l @r0,%1" "\n"
2713 " add r0,%1" "\n"
2714 " mov.l @r15+,r0" "\n"
2715 " jmp @%1";
2717 else
2718 jump = "mov.l %O0,%1" "\n"
2719 " jmp @%1";
2721 /* If we have a scratch register available, use it. */
2722 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2723 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2725 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2726 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2727 jump = "mov.l r1,@-r15" "\n"
2728 " mova %O0,r0" "\n"
2729 " mov.l @r0,r1" "\n"
2730 " add r1,r0" "\n"
2731 " mov.l @r15+,r1" "\n"
2732 " jmp @%1";
2733 output_asm_insn (jump, &this_jmp.lab);
2734 if (dbr_sequence_length ())
2735 print_slot (final_sequence);
2736 else
2737 output_asm_insn ("nop", 0);
2739 else
2741 /* Output the delay slot insn first if any. */
2742 if (dbr_sequence_length ())
2743 print_slot (final_sequence);
2745 this_jmp.reg = gen_rtx_REG (SImode, 13);
2746 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2747 Fortunately, MACL is fixed and call-clobbered, and we never
2748 need its value across jumps, so save r13 in it instead of in
2749 the stack. */
2750 if (TARGET_SH5)
2751 output_asm_insn ("lds r13,macl", 0);
2752 else
2753 output_asm_insn ("mov.l r13,@-r15", 0);
2754 output_asm_insn (jump, &this_jmp.lab);
2755 if (TARGET_SH5)
2756 output_asm_insn ("sts macl,r13", 0);
2757 else
2758 output_asm_insn ("mov.l @r15+,r13", 0);
2760 if (far && flag_pic && TARGET_SH2)
2762 braf_base_lab = gen_label_rtx ();
2763 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2764 CODE_LABEL_NUMBER (braf_base_lab));
2766 if (far)
2767 output_asm_insn (".align 2", 0);
2768 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2769 this_jmp.op = op;
2770 if (far && flag_pic)
2772 if (TARGET_SH2)
2773 this_jmp.lab = braf_base_lab;
2774 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2776 else
2777 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2778 return "";
2781 /* Local label counter, used for constants in the pool and inside
2782 pattern branches. */
2783 static int lf = 100;
2785 /* Output code for ordinary branches. */
2786 const char *
2787 output_branch (int logic, rtx_insn *insn, rtx *operands)
2789 switch (get_attr_length (insn))
2791 case 6:
2792 /* This can happen if filling the delay slot has caused a forward
2793 branch to exceed its range (we could reverse it, but only
2794 when we know we won't overextend other branches; this should
2795 best be handled by relaxation).
2796 It can also happen when other condbranches hoist delay slot insn
2797 from their destination, thus leading to code size increase.
2798 But the branch will still be in the range -4092..+4098 bytes. */
2799 if (! TARGET_RELAX)
2801 int label = lf++;
2802 /* The call to print_slot will clobber the operands. */
2803 rtx op0 = operands[0];
2805 /* If the instruction in the delay slot is annulled (true), then
2806 there is no delay slot where we can put it now. The only safe
2807 place for it is after the label. final will do that by default. */
2809 if (final_sequence
2810 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2811 && get_attr_length (final_sequence->insn (1)))
2813 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2814 ASSEMBLER_DIALECT ? "/" : ".", label);
2815 print_slot (final_sequence);
2817 else
2818 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2820 output_asm_insn ("bra\t%l0", &op0);
2821 fprintf (asm_out_file, "\tnop\n");
2822 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2824 return "";
2826 /* When relaxing, handle this like a short branch. The linker
2827 will fix it up if it still doesn't fit after relaxation. */
2828 case 2:
2829 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2831 /* These are for SH2e, in which we have to account for the
2832 extra nop because of the hardware bug in annulled branches. */
2833 case 8:
2834 if (! TARGET_RELAX)
2836 int label = lf++;
2838 gcc_assert (!final_sequence
2839 || !(INSN_ANNULLED_BRANCH_P
2840 (XVECEXP (final_sequence, 0, 0))));
2841 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2842 logic ? "f" : "t",
2843 ASSEMBLER_DIALECT ? "/" : ".", label);
2844 fprintf (asm_out_file, "\tnop\n");
2845 output_asm_insn ("bra\t%l0", operands);
2846 fprintf (asm_out_file, "\tnop\n");
2847 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2849 return "";
2851 /* When relaxing, fall through. */
2852 case 4:
2854 char buffer[10];
2856 sprintf (buffer, "b%s%ss\t%%l0",
2857 logic ? "t" : "f",
2858 ASSEMBLER_DIALECT ? "/" : ".");
2859 output_asm_insn (buffer, &operands[0]);
2860 return "nop";
2863 default:
2864 /* There should be no longer branches now - that would
2865 indicate that something has destroyed the branches set
2866 up in machine_dependent_reorg. */
2867 gcc_unreachable ();
2871 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2872 fill in operands 9 as a label to the successor insn.
2873 We try to use jump threading where possible.
2874 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2875 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2876 follow jmp and bt, if the address is in range. */
2877 const char *
2878 output_branchy_insn (enum rtx_code code, const char *templ,
2879 rtx_insn *insn, rtx *operands)
2881 rtx_insn *next_insn = NEXT_INSN (insn);
2883 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2885 rtx src = SET_SRC (PATTERN (next_insn));
2886 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2888 /* Following branch not taken */
2889 rtx_code_label *lab = gen_label_rtx ();
2890 emit_label_after (lab, next_insn);
2891 INSN_ADDRESSES_NEW (lab,
2892 INSN_ADDRESSES (INSN_UID (next_insn))
2893 + get_attr_length (next_insn));
2894 operands[9] = lab;
2895 return templ;
2897 else
2899 int offset = (branch_dest (next_insn)
2900 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2901 if (offset >= -252 && offset <= 258)
2903 if (GET_CODE (src) == IF_THEN_ELSE)
2904 /* branch_true */
2905 src = XEXP (src, 1);
2906 operands[9] = src;
2907 return templ;
2911 rtx_code_label *lab = gen_label_rtx ();
2912 emit_label_after (lab, insn);
2913 INSN_ADDRESSES_NEW (lab,
2914 INSN_ADDRESSES (INSN_UID (insn))
2915 + get_attr_length (insn));
2916 operands[9] = lab;
2917 return templ;
2920 const char *
2921 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2923 return output_branchy_insn (NE, "bt %l9" "\n"
2924 " fcmp/eq %1,%0",
2925 insn, operands);
2928 /* Output the start of the assembler file. */
2929 static void
2930 sh_file_start (void)
2932 default_file_start ();
2934 if (TARGET_ELF)
2935 /* We need to show the text section with the proper
2936 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2937 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2938 will complain. We can teach GAS specifically about the
2939 default attributes for our choice of text section, but
2940 then we would have to change GAS again if/when we change
2941 the text section name. */
2942 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2943 else
2944 /* Switch to the data section so that the coffsem symbol
2945 isn't in the text section. */
2946 switch_to_section (data_section);
2948 if (TARGET_LITTLE_ENDIAN)
2949 fputs ("\t.little\n", asm_out_file);
2951 if (!TARGET_ELF)
2953 if (TARGET_SHCOMPACT)
2954 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2955 else if (TARGET_SHMEDIA)
2956 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2957 TARGET_SHMEDIA64 ? 64 : 32);
2961 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2962 static bool
2963 unspec_caller_rtx_p (rtx pat)
2965 rtx base, offset;
2966 int i;
2968 split_const (pat, &base, &offset);
2969 if (GET_CODE (base) == UNSPEC)
2971 if (XINT (base, 1) == UNSPEC_CALLER)
2972 return true;
2973 for (i = 0; i < XVECLEN (base, 0); i++)
2974 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2975 return true;
2977 return false;
2980 /* Indicate that INSN cannot be duplicated. This is true for insn
2981 that generates a unique label. */
2982 static bool
2983 sh_cannot_copy_insn_p (rtx_insn *insn)
2985 rtx pat;
2987 if (!reload_completed || !flag_pic)
2988 return false;
2990 if (!NONJUMP_INSN_P (insn))
2991 return false;
2992 if (asm_noperands (insn) >= 0)
2993 return false;
2995 pat = PATTERN (insn);
2996 if (GET_CODE (pat) != SET)
2997 return false;
2998 pat = SET_SRC (pat);
3000 if (unspec_caller_rtx_p (pat))
3001 return true;
3003 return false;
3006 /* Number of instructions used to make an arithmetic right shift by N. */
3007 static const char ashiftrt_insns[] =
3008 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3010 /* Description of a logical left or right shift, when expanded to a sequence
3011 of 1/2/8/16 shifts.
3012 Notice that one bit right shifts clobber the T bit. One bit left shifts
3013 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3014 enum
3016 ASHL_CLOBBERS_T = 1 << 0,
3017 LSHR_CLOBBERS_T = 1 << 1
3020 struct ashl_lshr_sequence
3022 char insn_count;
3023 signed char amount[6];
3024 char clobbers_t;
3027 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3029 { 0, { 0 }, 0 }, // 0
3030 { 1, { 1 }, LSHR_CLOBBERS_T },
3031 { 1, { 2 }, 0 },
3032 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3033 { 2, { 2, 2 }, 0 }, // 4
3034 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3035 { 3, { 2, 2, 2 }, 0 },
3036 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3037 { 1, { 8 }, 0 }, // 8
3038 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3039 { 2, { 8, 2 }, 0 },
3040 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3041 { 3, { 8, 2, 2 }, 0 }, // 12
3042 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3043 { 3, { 8, -2, 8 }, 0 },
3044 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3045 { 1, { 16 }, 0 }, // 16
3046 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3047 { 2, { 16, 2 }, 0 },
3048 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3049 { 3, { 16, 2, 2 }, 0 }, // 20
3050 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3051 { 3, { 16, -2, 8 }, 0 },
3052 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3053 { 2, { 16, 8 }, 0 }, // 24
3054 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3055 { 3, { 16, 8, 2 }, 0 },
3056 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3057 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3058 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3059 { 3, { 16, -2, 16 }, 0 },
3061 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3062 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3063 However, the shift-and combiner code needs this entry here to be in
3064 terms of real shift insns. */
3065 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3068 /* Individual shift amounts for shift amounts < 16, up to three highmost
3069 bits might be clobbered. This is typically used when combined with some
3070 kind of sign or zero extension. */
3071 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3073 { 0, { 0 }, 0 }, // 0
3074 { 1, { 1 }, LSHR_CLOBBERS_T },
3075 { 1, { 2 }, 0 },
3076 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3077 { 2, { 2, 2 }, 0 }, // 4
3078 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3079 { 2, { 8, -2 }, 0 },
3080 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3081 { 1, { 8 }, 0 }, // 8
3082 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3083 { 2, { 8, 2 }, 0 },
3084 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3085 { 3, { 8, 2, 2 }, 0 }, // 12
3086 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3087 { 2, { 16, -2 }, 0 },
3088 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3089 { 1, { 16 }, 0 }, // 16
3090 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3091 { 2, { 16, 2 }, 0 },
3092 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3093 { 3, { 16, 2, 2 }, 0 }, // 20
3094 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3095 { 3, { 16, -2, 8 }, 0 },
3096 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3097 { 2, { 16, 8 }, 0 }, // 24
3098 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3099 { 3, { 16, 8, 2 }, 0 },
3100 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3101 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3102 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3103 { 3, { 16, -2, 16 }, 0 },
3104 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3107 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3108 will clobber the T bit. */
3109 bool
3110 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3112 gcc_assert (CONST_INT_P (shift_amount));
3114 const int shift_amount_i = INTVAL (shift_amount) & 31;
3116 /* Special case for shift count of 31: use and-rotl sequence. */
3117 if (shift_amount_i == 31)
3118 return true;
3120 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3121 & ASHL_CLOBBERS_T) != 0;
3124 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3125 instructions will clobber the T bit. */
3126 bool
3127 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3129 gcc_assert (CONST_INT_P (shift_amount));
3131 const int shift_amount_i = INTVAL (shift_amount) & 31;
3133 /* Special case for shift count of 31: use shll-movt sequence. */
3134 if (shift_amount_i == 31)
3135 return true;
3137 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3138 & LSHR_CLOBBERS_T) != 0;
3141 /* Return true if it is potentially beneficial to use a dynamic shift
3142 instruction (shad / shar) instead of a combination of 1/2/8/16
3143 shift instructions for the specified shift count.
3144 If dynamic shifts are not available, always return false. */
3145 bool
3146 sh_dynamicalize_shift_p (rtx count)
3148 gcc_assert (CONST_INT_P (count));
3150 const int shift_amount_i = INTVAL (count) & 31;
3151 int insn_count;
3153 /* For left and right shifts, there are shorter 2 insn sequences for
3154 shift amounts of 31. */
3155 if (shift_amount_i == 31)
3156 insn_count = 2;
3157 else
3158 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3160 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3163 /* Assuming we have a value that has been sign-extended by at least one bit,
3164 can we use the ext_shift_amounts with the last shift turned to an
3165 arithmetic shift to shift it by N without data loss, and quicker than by
3166 other means? */
3167 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3169 /* Return the cost of a shift. */
3170 static inline int
3171 shiftcosts (rtx x)
3173 int value;
3175 if (TARGET_SHMEDIA)
3176 return 1;
3178 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3180 if (GET_MODE (x) == DImode
3181 && CONST_INT_P (XEXP (x, 1))
3182 && INTVAL (XEXP (x, 1)) == 1)
3183 return 2;
3185 /* Everything else is invalid, because there is no pattern for it. */
3186 return -1;
3188 /* If shift by a non constant, then this will be expensive. */
3189 if (!CONST_INT_P (XEXP (x, 1)))
3190 return SH_DYNAMIC_SHIFT_COST;
3192 /* Otherwise, return the true cost in instructions. Cope with out of range
3193 shift counts more or less arbitrarily. */
3194 value = INTVAL (XEXP (x, 1)) & 31;
3196 if (GET_CODE (x) == ASHIFTRT)
3198 int cost = ashiftrt_insns[value];
3199 /* If dynamic shifts are available and profitable in this case, then we
3200 put the constant in a reg and use shad. */
3201 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3202 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3203 return cost;
3205 else
3206 return ashl_lshr_seq[value].insn_count;
3209 /* Return the cost of an AND/XOR/IOR operation. */
3210 static inline int
3211 and_xor_ior_costs (rtx x, int code)
3213 /* On SH1-4 we have only max. SImode operations.
3214 Double the cost for modes > SImode. */
3215 const int cost_scale = !TARGET_SHMEDIA
3216 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3217 ? 2 : 1;
3219 /* A logical operation with two registers is a single cycle
3220 instruction. */
3221 if (!CONST_INT_P (XEXP (x, 1)))
3222 return 1 * cost_scale;
3224 int i = INTVAL (XEXP (x, 1));
3226 if (TARGET_SHMEDIA)
3228 if (satisfies_constraint_I10 (XEXP (x, 1))
3229 || satisfies_constraint_J16 (XEXP (x, 1)))
3230 return 1;
3231 else
3232 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3235 /* These constants are single cycle extu.[bw] instructions. */
3236 if ((i == 0xff || i == 0xffff) && code == AND)
3237 return 1 * cost_scale;
3238 /* Constants that can be used in an instruction as an immediate are
3239 a single cycle, but this requires r0, so make it a little more
3240 expensive. */
3241 if (CONST_OK_FOR_K08 (i))
3242 return 2 * cost_scale;
3243 /* Constants that can be loaded with a mov immediate need one more cycle.
3244 This case is probably unnecessary. */
3245 if (CONST_OK_FOR_I08 (i))
3246 return 2 * cost_scale;
3247 /* Any other constant requires an additional 2 cycle pc-relative load.
3248 This case is probably unnecessary. */
3249 return 3 * cost_scale;
3252 /* Return the cost of an addition or a subtraction. */
3253 static inline int
3254 addsubcosts (rtx x)
3256 if (GET_MODE (x) == SImode)
3258 /* The addc or subc patterns will eventually become one or two
3259 instructions. Below are some costs for some of the patterns
3260 which combine would reject because the costs of the individual
3261 insns in the patterns are lower.
3263 FIXME: It would be much easier if we had something like insn cost
3264 attributes and the cost calculation machinery used those attributes
3265 in the first place. This would eliminate redundant recog-like C
3266 code to calculate costs of complex patterns. */
3267 rtx op0 = XEXP (x, 0);
3268 rtx op1 = XEXP (x, 1);
3270 if (GET_CODE (x) == PLUS)
3272 if (GET_CODE (op0) == AND
3273 && XEXP (op0, 1) == const1_rtx
3274 && (GET_CODE (op1) == PLUS
3275 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3276 return 1;
3278 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3279 && GET_CODE (op1) == LSHIFTRT
3280 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3281 return 1;
3285 /* On SH1-4 we have only max. SImode operations.
3286 Double the cost for modes > SImode. */
3287 const int cost_scale = !TARGET_SHMEDIA
3288 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3289 ? 2 : 1;
3291 /* Adding a register is a single cycle insn. */
3292 if (REG_P (XEXP (x, 1))
3293 || GET_CODE (XEXP (x, 1)) == SUBREG)
3294 return 1 * cost_scale;
3296 /* Likewise for small constants. */
3297 if (CONST_INT_P (XEXP (x, 1))
3298 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3299 return 1 * cost_scale;
3301 if (TARGET_SHMEDIA)
3302 switch (GET_CODE (XEXP (x, 1)))
3304 case CONST:
3305 case LABEL_REF:
3306 case SYMBOL_REF:
3307 return TARGET_SHMEDIA64 ? 5 : 3;
3309 case CONST_INT:
3310 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3311 return 2;
3312 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3313 return 3;
3314 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3315 return 4;
3317 /* Fall through. */
3318 default:
3319 return 5;
3322 /* Any other constant requires a 2 cycle pc-relative load plus an
3323 addition. */
3324 return 3 * cost_scale;
3327 /* Return the cost of a multiply. */
3328 static inline int
3329 multcosts (rtx x ATTRIBUTE_UNUSED)
3331 if (sh_multcost >= 0)
3332 return sh_multcost;
3333 if (TARGET_SHMEDIA)
3334 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3335 accept constants. Ideally, we would use a cost of one or two and
3336 add the cost of the operand, but disregard the latter when inside loops
3337 and loop invariant code motion is still to follow.
3338 Using a multiply first and splitting it later if it's a loss
3339 doesn't work because of different sign / zero extension semantics
3340 of multiplies vs. shifts. */
3341 return optimize_size ? 2 : 3;
3343 if (TARGET_SH2)
3345 /* We have a mul insn, so we can never take more than the mul and the
3346 read of the mac reg, but count more because of the latency and extra
3347 reg usage. */
3348 if (optimize_size)
3349 return 2;
3350 return 3;
3353 /* If we're aiming at small code, then just count the number of
3354 insns in a multiply call sequence. */
3355 if (optimize_size)
3356 return 5;
3358 /* Otherwise count all the insns in the routine we'd be calling too. */
3359 return 20;
3362 /* Compute a (partial) cost for rtx X. Return true if the complete
3363 cost has been computed, and false if subexpressions should be
3364 scanned. In either case, *TOTAL contains the cost result. */
3365 static bool
3366 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3367 int *total, bool speed ATTRIBUTE_UNUSED)
3369 switch (code)
3371 /* The lower-subreg pass decides whether to split multi-word regs
3372 into individual regs by looking at the cost for a SET of certain
3373 modes with the following patterns:
3374 (set (reg) (reg))
3375 (set (reg) (const_int 0))
3376 On machines that support vector-move operations a multi-word move
3377 is the same cost as individual reg move. On SH there is no
3378 vector-move, so we have to provide the correct cost in the number
3379 of move insns to load/store the reg of the mode in question. */
3380 case SET:
3381 if (register_operand (SET_DEST (x), VOIDmode)
3382 && (register_operand (SET_SRC (x), VOIDmode)
3383 || satisfies_constraint_Z (SET_SRC (x))))
3385 const machine_mode mode = GET_MODE (SET_DEST (x));
3386 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3387 / mov_insn_size (mode, TARGET_SH2A));
3388 return true;
3390 return false;
3392 /* The cost of a mem access is mainly the cost of the address mode. */
3393 case MEM:
3394 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3395 true);
3396 return true;
3398 /* The cost of a sign or zero extend depends on whether the source is a
3399 reg or a mem. In case of a mem take the address into acount. */
3400 case SIGN_EXTEND:
3401 if (REG_P (XEXP (x, 0)))
3403 *total = COSTS_N_INSNS (1);
3404 return true;
3406 if (MEM_P (XEXP (x, 0)))
3408 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3409 GET_MODE (XEXP (x, 0)),
3410 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3411 return true;
3413 return false;
3415 case ZERO_EXTEND:
3416 if (REG_P (XEXP (x, 0)))
3418 *total = COSTS_N_INSNS (1);
3419 return true;
3421 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3422 && (GET_MODE (XEXP (x, 0)) == QImode
3423 || GET_MODE (XEXP (x, 0)) == HImode))
3425 /* Handle SH2A's movu.b and movu.w insn. */
3426 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3427 GET_MODE (XEXP (x, 0)),
3428 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3429 return true;
3431 return false;
3433 /* mems for SFmode and DFmode can be inside a parallel due to
3434 the way the fpscr is handled. */
3435 case PARALLEL:
3436 for (int i = 0; i < XVECLEN (x, 0); i++)
3438 rtx xx = XVECEXP (x, 0, i);
3439 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3441 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3442 GET_MODE (XEXP (xx, 0)),
3443 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3444 return true;
3446 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3448 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3449 GET_MODE (XEXP (xx, 1)),
3450 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3451 return true;
3455 if (sh_1el_vec (x, VOIDmode))
3456 *total = outer_code != SET;
3457 else if (sh_rep_vec (x, VOIDmode))
3458 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3459 + (outer_code != SET));
3460 else
3461 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3462 return true;
3464 case CONST_INT:
3465 if (TARGET_SHMEDIA)
3467 if (INTVAL (x) == 0)
3468 *total = 0;
3469 else if (outer_code == AND && and_operand ((x), DImode))
3470 *total = 0;
3471 else if ((outer_code == IOR || outer_code == XOR
3472 || outer_code == PLUS)
3473 && CONST_OK_FOR_I10 (INTVAL (x)))
3474 *total = 0;
3475 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3476 *total = COSTS_N_INSNS (outer_code != SET);
3477 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3478 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3479 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3480 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3481 else
3482 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3483 return true;
3485 if (CONST_OK_FOR_I08 (INTVAL (x)))
3486 *total = 0;
3487 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3488 && CONST_OK_FOR_K08 (INTVAL (x)))
3489 *total = 1;
3490 /* prepare_cmp_insn will force costly constants int registers before
3491 the cbranch[sd]i4 patterns can see them, so preserve potentially
3492 interesting ones not covered by I08 above. */
3493 else if (outer_code == COMPARE
3494 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3495 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3496 || INTVAL (x) == 0x7fffffff
3497 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3498 *total = 1;
3499 else
3500 *total = 8;
3501 return true;
3503 case EQ:
3504 /* An and with a constant compared against zero is
3505 most likely going to be a TST #imm, R0 instruction.
3506 Notice that this does not catch the zero_extract variants from
3507 the md file. */
3508 if (GET_CODE (XEXP (x, 0)) == AND
3509 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3511 *total = 1;
3512 return true;
3514 else
3515 return false;
3517 case SMIN:
3518 case SMAX:
3519 /* This is most likely a clips.b or clips.w insn that is being made up
3520 by combine. */
3521 if (TARGET_SH2A
3522 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3523 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3524 && REG_P (XEXP (XEXP (x, 0), 0))
3525 && CONST_INT_P (XEXP (x, 1)))
3527 *total = COSTS_N_INSNS (1);
3528 return true;
3530 else
3531 return false;
3533 case CONST:
3534 case LABEL_REF:
3535 case SYMBOL_REF:
3536 if (TARGET_SHMEDIA64)
3537 *total = COSTS_N_INSNS (4);
3538 else if (TARGET_SHMEDIA32)
3539 *total = COSTS_N_INSNS (2);
3540 else
3541 *total = 5;
3542 return true;
3544 case CONST_DOUBLE:
3545 if (TARGET_SHMEDIA)
3546 *total = COSTS_N_INSNS (4);
3547 /* prepare_cmp_insn will force costly constants int registers before
3548 the cbranchdi4 pattern can see them, so preserve potentially
3549 interesting ones. */
3550 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3551 *total = 1;
3552 else
3553 *total = 10;
3554 return true;
3556 case CONST_VECTOR:
3557 /* FIXME: This looks broken. Only the last statement has any effect.
3558 Probably this could be folded with the PARALLEL case? */
3559 if (x == CONST0_RTX (GET_MODE (x)))
3560 *total = 0;
3561 else if (sh_1el_vec (x, VOIDmode))
3562 *total = outer_code != SET;
3563 if (sh_rep_vec (x, VOIDmode))
3564 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3565 + (outer_code != SET));
3566 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3567 return true;
3569 case PLUS:
3570 case MINUS:
3571 *total = COSTS_N_INSNS (addsubcosts (x));
3572 return true;
3574 case AND:
3575 case XOR:
3576 case IOR:
3577 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3578 return true;
3580 case MULT:
3581 *total = COSTS_N_INSNS (multcosts (x));
3582 return true;
3584 case LT:
3585 case GE:
3586 /* div0s sign comparison. */
3587 if (GET_CODE (XEXP (x, 0)) == XOR
3588 && REG_P ((XEXP (XEXP (x, 0), 0)))
3589 && REG_P ((XEXP (XEXP (x, 0), 1)))
3590 && satisfies_constraint_Z (XEXP (x, 1)))
3592 *total = COSTS_N_INSNS (1);
3593 return true;
3595 else
3596 return false;
3598 case LSHIFTRT:
3599 /* div0s sign comparison. */
3600 if (GET_CODE (XEXP (x, 0)) == XOR
3601 && REG_P ((XEXP (XEXP (x, 0), 0)))
3602 && REG_P ((XEXP (XEXP (x, 0), 1)))
3603 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3605 *total = COSTS_N_INSNS (1);
3606 return true;
3608 /* Fall through to shiftcosts. */
3609 case ASHIFT:
3610 case ASHIFTRT:
3612 int cost = shiftcosts (x);
3613 if (cost < 0)
3614 return false;
3615 *total = COSTS_N_INSNS (cost);
3616 return true;
3619 case DIV:
3620 case UDIV:
3621 case MOD:
3622 case UMOD:
3623 *total = COSTS_N_INSNS (20);
3624 return true;
3626 case FLOAT:
3627 case FIX:
3628 *total = 100;
3629 return true;
3631 default:
3632 return false;
3636 /* Determine the size of the fundamental move insn that will be used
3637 for the specified mode. */
3638 static inline int
3639 mov_insn_size (machine_mode mode, bool consider_sh2a)
3641 const int mode_sz = GET_MODE_SIZE (mode);
3643 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3644 || (TARGET_FMOVD && mode == DFmode))
3645 return mode_sz;
3646 else
3648 /* The max. available mode for actual move insns is SImode.
3649 Larger accesses will be split into multiple loads/stores. */
3650 const int max_mov_sz = GET_MODE_SIZE (SImode);
3651 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3655 /* Determine the maximum possible displacement for a move insn for the
3656 specified mode. */
3658 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3660 /* The 4 byte displacement move insns are the same as the 2 byte
3661 versions but take a 12 bit displacement. All we need to do is to
3662 scale the max. displacement value accordingly. */
3663 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3665 /* SH2A supports FPU move insns with 12 bit displacements.
3666 Other variants to do not support any kind of displacements for
3667 FPU move insns. */
3668 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3669 return 0;
3670 else
3672 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3673 const int mode_sz = GET_MODE_SIZE (mode);
3674 int r = 15 * mov_insn_sz * disp_scale;
3676 /* If the mov insn will be split into multiple loads/stores, the
3677 maximum possible displacement is a bit smaller. */
3678 if (mode_sz > mov_insn_sz)
3679 r -= mode_sz - mov_insn_sz;
3680 return r;
3684 /* Determine the alignment mask for a move insn of the
3685 specified mode. */
3686 static inline int
3687 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3689 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3690 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3693 /* Return the displacement value of a displacement address. */
3694 HOST_WIDE_INT
3695 sh_disp_addr_displacement (rtx x)
3697 gcc_assert (satisfies_constraint_Sdd (x));
3698 return INTVAL (XEXP (XEXP (x, 0), 1));
3701 /* Compute the cost of an address. */
3702 static int
3703 sh_address_cost (rtx x, machine_mode mode,
3704 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3706 /* 'GBR + 0'. Account one more because of R0 restriction. */
3707 if (REG_P (x) && REGNO (x) == GBR_REG)
3708 return 2;
3710 /* Simple reg, post-inc, pre-dec addressing. */
3711 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3712 return 1;
3714 /* 'reg + disp' addressing. */
3715 if (GET_CODE (x) == PLUS
3716 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3718 /* 'GBR + disp'. Account one more because of R0 restriction. */
3719 if (REGNO (XEXP (x, 0)) == GBR_REG
3720 && gbr_displacement (XEXP (x, 1), mode))
3721 return 2;
3723 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3725 if (offset == 0)
3726 return 1;
3728 /* The displacement would fit into a 2 byte move insn.
3729 HImode and QImode loads/stores with displacement put pressure on
3730 R0 which will most likely require another reg copy. Thus account
3731 a higher cost for that. */
3732 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3733 return (mode == HImode || mode == QImode) ? 2 : 1;
3735 /* The displacement would fit into a 4 byte move insn (SH2A). */
3736 if (TARGET_SH2A
3737 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3738 return 2;
3740 /* The displacement is probably out of range and will require extra
3741 calculations. */
3742 return 3;
3745 /* 'reg + reg' addressing. Account a slightly higher cost because of
3746 increased pressure on R0. */
3747 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3748 && ! TARGET_SHMEDIA)
3749 return 3;
3751 /* Not sure what it is - probably expensive. */
3752 return 10;
3755 /* Code to expand a shift. */
3756 static void
3757 gen_ashift (int type, int n, rtx reg)
3759 rtx n_rtx;
3761 /* Negative values here come from the shift_amounts array. */
3762 if (n < 0)
3764 if (type == ASHIFT)
3765 type = LSHIFTRT;
3766 else
3767 type = ASHIFT;
3768 n = -n;
3771 n_rtx = GEN_INT (n);
3772 gcc_assert (satisfies_constraint_P27 (n_rtx));
3774 switch (type)
3776 case ASHIFTRT:
3777 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3778 break;
3779 case LSHIFTRT:
3780 if (n == 1)
3781 emit_insn (gen_shlr (reg, reg));
3782 else
3783 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3784 break;
3785 case ASHIFT:
3786 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3787 break;
3788 default:
3789 gcc_unreachable ();
3793 /* Code to expand a HImode shift. */
3794 static void
3795 gen_ashift_hi (int type, int n, rtx reg)
3797 /* Negative values here come from the shift_amounts array. */
3798 if (n < 0)
3800 if (type == ASHIFT)
3801 type = LSHIFTRT;
3802 else
3803 type = ASHIFT;
3804 n = -n;
3807 switch (type)
3809 case ASHIFTRT:
3810 case LSHIFTRT:
3811 /* We don't have HImode right shift operations because using the
3812 ordinary 32 bit shift instructions for that doesn't generate proper
3813 zero/sign extension.
3814 gen_ashift_hi is only called in contexts where we know that the
3815 sign extension works out correctly. */
3817 int offset = 0;
3818 if (GET_CODE (reg) == SUBREG)
3820 offset = SUBREG_BYTE (reg);
3821 reg = SUBREG_REG (reg);
3823 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3824 break;
3826 case ASHIFT:
3827 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3828 break;
3832 /* Output RTL to split a constant shift into its component SH constant
3833 shift instructions. */
3834 void
3835 gen_shifty_op (int code, rtx *operands)
3837 int value = INTVAL (operands[2]);
3838 int max, i;
3840 /* Truncate the shift count in case it is out of bounds. */
3841 value = value & 31;
3843 if (value == 31)
3845 if (code == LSHIFTRT)
3847 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3848 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3849 return;
3851 else if (code == ASHIFT)
3853 /* There is a two instruction sequence for 31 bit left shifts,
3854 but it requires r0. */
3855 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3857 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3858 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3859 return;
3863 else if (value == 0)
3865 /* This can happen even when optimizing, if there were subregs before
3866 reload. Don't output a nop here, as this is never optimized away;
3867 use a no-op move instead. */
3868 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3869 return;
3872 max = ashl_lshr_seq[value].insn_count;
3873 for (i = 0; i < max; i++)
3874 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3877 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3878 don't matter. */
3879 void
3880 gen_shifty_hi_op (int code, rtx *operands)
3882 int value = INTVAL (operands[2]);
3883 int max, i;
3884 void (*gen_fun) (int, int, rtx);
3886 /* This operation is used by and_shl for SImode values with a few
3887 high bits known to be cleared. */
3888 value &= 31;
3889 if (value == 0)
3891 emit_insn (gen_nop ());
3892 return;
3895 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3896 if (code == ASHIFT)
3898 max = ext_ashl_lshr_seq[value].insn_count;
3899 for (i = 0; i < max; i++)
3900 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3902 else
3903 /* When shifting right, emit the shifts in reverse order, so that
3904 solitary negative values come first. */
3905 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3906 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3909 /* Output RTL for an arithmetic right shift.
3910 ??? Rewrite to use super-optimizer sequences. */
3911 bool
3912 expand_ashiftrt (rtx *operands)
3914 rtx wrk;
3915 char func[18];
3916 int value;
3918 if (TARGET_DYNSHIFT)
3920 if (!CONST_INT_P (operands[2]))
3922 rtx count = copy_to_mode_reg (SImode, operands[2]);
3923 emit_insn (gen_negsi2 (count, count));
3924 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3925 return true;
3927 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3928 > 1 + SH_DYNAMIC_SHIFT_COST)
3930 rtx count
3931 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3932 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3933 return true;
3936 if (!CONST_INT_P (operands[2]))
3937 return false;
3939 value = INTVAL (operands[2]) & 31;
3941 if (value == 31)
3943 /* If we are called from abs expansion, arrange things so that we
3944 we can use a single MT instruction that doesn't clobber the source,
3945 if LICM can hoist out the load of the constant zero. */
3946 if (currently_expanding_to_rtl)
3948 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3949 operands[1]));
3950 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3951 return true;
3953 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3954 return true;
3956 else if (value >= 16 && value <= 19)
3958 wrk = gen_reg_rtx (SImode);
3959 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3960 value -= 16;
3961 while (value--)
3962 gen_ashift (ASHIFTRT, 1, wrk);
3963 emit_move_insn (operands[0], wrk);
3964 return true;
3966 /* Expand a short sequence inline, longer call a magic routine. */
3967 else if (value <= 5)
3969 wrk = gen_reg_rtx (SImode);
3970 emit_move_insn (wrk, operands[1]);
3971 while (value--)
3972 gen_ashift (ASHIFTRT, 1, wrk);
3973 emit_move_insn (operands[0], wrk);
3974 return true;
3977 wrk = gen_reg_rtx (Pmode);
3979 /* Load the value into an arg reg and call a helper. */
3980 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3981 sprintf (func, "__ashiftrt_r4_%d", value);
3982 function_symbol (wrk, func, SFUNC_STATIC);
3983 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3984 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3985 return true;
3988 /* Try to find a good way to implement the combiner pattern
3989 [(set (match_operand:SI 0 "register_operand" "r")
3990 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3991 (match_operand:SI 2 "const_int_operand" "n"))
3992 (match_operand:SI 3 "const_int_operand" "n"))) .
3993 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3994 return 0 for simple right / left or left/right shift combination.
3995 return 1 for a combination of shifts with zero_extend.
3996 return 2 for a combination of shifts with an AND that needs r0.
3997 return 3 for a combination of shifts with an AND that needs an extra
3998 scratch register, when the three highmost bits of the AND mask are clear.
3999 return 4 for a combination of shifts with an AND that needs an extra
4000 scratch register, when any of the three highmost bits of the AND mask
4001 is set.
4002 If ATTRP is set, store an initial right shift width in ATTRP[0],
4003 and the instruction length in ATTRP[1] . These values are not valid
4004 when returning 0.
4005 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
4006 shift_amounts for the last shift value that is to be used before the
4007 sign extend. */
4009 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
4011 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
4012 int left = INTVAL (left_rtx), right;
4013 int best = 0;
4014 int cost, best_cost = 10000;
4015 int best_right = 0, best_len = 0;
4016 int i;
4017 int can_ext;
4019 if (left < 0 || left > 31)
4020 return 0;
4021 if (CONST_INT_P (mask_rtx))
4022 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
4023 else
4024 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
4025 /* Can this be expressed as a right shift / left shift pair? */
4026 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
4027 right = exact_log2 (lsb);
4028 mask2 = ~(mask + lsb - 1);
4029 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4030 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4031 if (! mask2)
4032 best_cost = ashl_lshr_seq[right].insn_count
4033 + ashl_lshr_seq[right + left].insn_count;
4034 /* mask has no trailing zeroes <==> ! right */
4035 else if (! right && mask2 == ~(lsb2 - 1))
4037 int late_right = exact_log2 (lsb2);
4038 best_cost = ashl_lshr_seq[left + late_right].insn_count
4039 + ashl_lshr_seq[late_right].insn_count;
4041 /* Try to use zero extend. */
4042 if (mask2 == ~(lsb2 - 1))
4044 int width, first;
4046 for (width = 8; width <= 16; width += 8)
4048 /* Can we zero-extend right away? */
4049 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4051 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4052 + ext_ashl_lshr_seq[left + right].insn_count;
4053 if (cost < best_cost)
4055 best = 1;
4056 best_cost = cost;
4057 best_right = right;
4058 best_len = cost;
4059 if (attrp)
4060 attrp[2] = -1;
4062 continue;
4064 /* ??? Could try to put zero extend into initial right shift,
4065 or even shift a bit left before the right shift. */
4066 /* Determine value of first part of left shift, to get to the
4067 zero extend cut-off point. */
4068 first = width - exact_log2 (lsb2) + right;
4069 if (first >= 0 && right + left - first >= 0)
4071 cost = ext_ashl_lshr_seq[right].insn_count
4072 + ext_ashl_lshr_seq[first].insn_count + 1
4073 + ext_ashl_lshr_seq[right + left - first].insn_count;
4075 if (cost < best_cost)
4077 best = 1;
4078 best_cost = cost;
4079 best_right = right;
4080 best_len = cost;
4081 if (attrp)
4082 attrp[2] = first;
4087 /* Try to use r0 AND pattern */
4088 for (i = 0; i <= 2; i++)
4090 if (i > right)
4091 break;
4092 if (! CONST_OK_FOR_K08 (mask >> i))
4093 continue;
4094 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4095 if (cost < best_cost)
4097 best = 2;
4098 best_cost = cost;
4099 best_right = i;
4100 best_len = cost - 1;
4103 /* Try to use a scratch register to hold the AND operand. */
4104 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4105 for (i = 0; i <= 2; i++)
4107 if (i > right)
4108 break;
4109 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4110 + (can_ext
4111 ? ext_ashl_lshr_seq
4112 : ashl_lshr_seq)[left + i].insn_count;
4113 if (cost < best_cost)
4115 best = 4 - can_ext;
4116 best_cost = cost;
4117 best_right = i;
4118 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4122 if (attrp)
4124 attrp[0] = best_right;
4125 attrp[1] = best_len;
4127 return best;
4130 /* This is used in length attributes of the unnamed instructions
4131 corresponding to shl_and_kind return values of 1 and 2. */
4133 shl_and_length (rtx insn)
4135 rtx set_src, left_rtx, mask_rtx;
4136 int attributes[3];
4138 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4139 left_rtx = XEXP (XEXP (set_src, 0), 1);
4140 mask_rtx = XEXP (set_src, 1);
4141 shl_and_kind (left_rtx, mask_rtx, attributes);
4142 return attributes[1];
4145 /* This is used in length attribute of the and_shl_scratch instruction. */
4147 shl_and_scr_length (rtx insn)
4149 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4150 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4151 rtx op = XEXP (set_src, 0);
4152 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4153 op = XEXP (XEXP (op, 0), 0);
4154 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4157 /* Generate rtl for instructions for which shl_and_kind advised a particular
4158 method of generating them, i.e. returned zero. */
4159 bool
4160 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4162 int attributes[3];
4163 unsigned HOST_WIDE_INT mask;
4164 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4165 int right, total_shift;
4166 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4168 right = attributes[0];
4169 total_shift = INTVAL (left_rtx) + right;
4170 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4171 switch (kind)
4173 default:
4174 return true;
4175 case 1:
4177 int first = attributes[2];
4178 rtx operands[3];
4180 if (first < 0)
4182 emit_insn ((mask << right) <= 0xff
4183 ? gen_zero_extendqisi2 (dest,
4184 gen_lowpart (QImode, source))
4185 : gen_zero_extendhisi2 (dest,
4186 gen_lowpart (HImode, source)));
4187 source = dest;
4189 if (source != dest)
4190 emit_insn (gen_movsi (dest, source));
4191 operands[0] = dest;
4192 if (right)
4194 operands[2] = GEN_INT (right);
4195 gen_shifty_hi_op (LSHIFTRT, operands);
4197 if (first > 0)
4199 operands[2] = GEN_INT (first);
4200 gen_shifty_hi_op (ASHIFT, operands);
4201 total_shift -= first;
4202 mask <<= first;
4204 if (first >= 0)
4205 emit_insn (mask <= 0xff
4206 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4207 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4208 if (total_shift > 0)
4210 operands[2] = GEN_INT (total_shift);
4211 gen_shifty_hi_op (ASHIFT, operands);
4213 break;
4215 case 4:
4216 shift_gen_fun = gen_shifty_op;
4217 case 3:
4218 /* If the topmost bit that matters is set, set the topmost bits
4219 that don't matter. This way, we might be able to get a shorter
4220 signed constant. */
4221 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4222 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4223 case 2:
4224 /* Don't expand fine-grained when combining, because that will
4225 make the pattern fail. */
4226 if (currently_expanding_to_rtl
4227 || reload_in_progress || reload_completed)
4229 rtx operands[3];
4231 /* Cases 3 and 4 should be handled by this split
4232 only while combining */
4233 gcc_assert (kind <= 2);
4234 if (right)
4236 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4237 source = dest;
4239 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4240 if (total_shift)
4242 operands[0] = dest;
4243 operands[1] = dest;
4244 operands[2] = GEN_INT (total_shift);
4245 shift_gen_fun (ASHIFT, operands);
4247 break;
4249 else
4251 int neg = 0;
4252 if (kind != 4 && total_shift < 16)
4254 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4255 if (neg > 0)
4256 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4257 else
4258 neg = 0;
4260 emit_insn (gen_and_shl_scratch (dest, source,
4261 GEN_INT (right),
4262 GEN_INT (mask),
4263 GEN_INT (total_shift + neg),
4264 GEN_INT (neg)));
4265 emit_insn (gen_movsi (dest, dest));
4266 break;
4269 return false;
4272 /* Try to find a good way to implement the combiner pattern
4273 [(set (match_operand:SI 0 "register_operand" "=r")
4274 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4275 (match_operand:SI 2 "const_int_operand" "n")
4276 (match_operand:SI 3 "const_int_operand" "n")
4277 (const_int 0)))
4278 (clobber (reg:SI T_REG))]
4279 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4280 return 0 for simple left / right shift combination.
4281 return 1 for left shift / 8 bit sign extend / left shift.
4282 return 2 for left shift / 16 bit sign extend / left shift.
4283 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4284 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4285 return 5 for left shift / 16 bit sign extend / right shift
4286 return 6 for < 8 bit sign extend / left shift.
4287 return 7 for < 8 bit sign extend / left shift / single right shift.
4288 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4290 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4292 int left, size, insize, ext;
4293 int cost = 0, best_cost;
4294 int kind;
4296 left = INTVAL (left_rtx);
4297 size = INTVAL (size_rtx);
4298 insize = size - left;
4299 gcc_assert (insize > 0);
4300 /* Default to left / right shift. */
4301 kind = 0;
4302 best_cost = ashl_lshr_seq[32 - insize].insn_count
4303 + ashl_lshr_seq[32 - size].insn_count;
4304 if (size <= 16)
4306 /* 16 bit shift / sign extend / 16 bit shift */
4307 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4308 + ashl_lshr_seq[16 - size].insn_count;
4309 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4310 below, by alternative 3 or something even better. */
4311 if (cost < best_cost)
4313 kind = 5;
4314 best_cost = cost;
4317 /* Try a plain sign extend between two shifts. */
4318 for (ext = 16; ext >= insize; ext -= 8)
4320 if (ext <= size)
4322 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4323 + ashl_lshr_seq[size - ext].insn_count;
4324 if (cost < best_cost)
4326 kind = ext / (unsigned) 8;
4327 best_cost = cost;
4330 /* Check if we can do a sloppy shift with a final signed shift
4331 restoring the sign. */
4332 if (EXT_SHIFT_SIGNED (size - ext))
4333 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4334 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4335 /* If not, maybe it's still cheaper to do the second shift sloppy,
4336 and do a final sign extend? */
4337 else if (size <= 16)
4338 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4339 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4340 + 1;
4341 else
4342 continue;
4343 if (cost < best_cost)
4345 kind = ext / (unsigned) 8 + 2;
4346 best_cost = cost;
4349 /* Check if we can sign extend in r0 */
4350 if (insize < 8)
4352 cost = 3 + ashl_lshr_seq[left].insn_count;
4353 if (cost < best_cost)
4355 kind = 6;
4356 best_cost = cost;
4358 /* Try the same with a final signed shift. */
4359 if (left < 31)
4361 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4362 if (cost < best_cost)
4364 kind = 7;
4365 best_cost = cost;
4369 if (TARGET_DYNSHIFT)
4371 /* Try to use a dynamic shift. */
4372 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4373 if (cost < best_cost)
4375 kind = 0;
4376 best_cost = cost;
4379 if (costp)
4380 *costp = cost;
4381 return kind;
4384 /* Function to be used in the length attribute of the instructions
4385 implementing this pattern. */
4387 shl_sext_length (rtx insn)
4389 rtx set_src, left_rtx, size_rtx;
4390 int cost;
4392 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4393 left_rtx = XEXP (XEXP (set_src, 0), 1);
4394 size_rtx = XEXP (set_src, 1);
4395 shl_sext_kind (left_rtx, size_rtx, &cost);
4396 return cost;
4399 /* Generate rtl for this pattern */
4400 bool
4401 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4403 int kind;
4404 int left, size, insize, cost;
4405 rtx operands[3];
4407 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4408 left = INTVAL (left_rtx);
4409 size = INTVAL (size_rtx);
4410 insize = size - left;
4411 switch (kind)
4413 case 1:
4414 case 2:
4415 case 3:
4416 case 4:
4418 int ext = kind & 1 ? 8 : 16;
4419 int shift2 = size - ext;
4421 /* Don't expand fine-grained when combining, because that will
4422 make the pattern fail. */
4423 if (! currently_expanding_to_rtl
4424 && ! reload_in_progress && ! reload_completed)
4426 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4427 emit_insn (gen_movsi (dest, source));
4428 break;
4430 if (dest != source)
4431 emit_insn (gen_movsi (dest, source));
4432 operands[0] = dest;
4433 if (ext - insize)
4435 operands[2] = GEN_INT (ext - insize);
4436 gen_shifty_hi_op (ASHIFT, operands);
4438 emit_insn (kind & 1
4439 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4440 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4441 if (kind <= 2)
4443 if (shift2)
4445 operands[2] = GEN_INT (shift2);
4446 gen_shifty_op (ASHIFT, operands);
4449 else
4451 if (shift2 > 0)
4453 if (EXT_SHIFT_SIGNED (shift2))
4455 operands[2] = GEN_INT (shift2 + 1);
4456 gen_shifty_op (ASHIFT, operands);
4457 operands[2] = const1_rtx;
4458 gen_shifty_op (ASHIFTRT, operands);
4459 break;
4461 operands[2] = GEN_INT (shift2);
4462 gen_shifty_hi_op (ASHIFT, operands);
4464 else if (shift2)
4466 operands[2] = GEN_INT (-shift2);
4467 gen_shifty_hi_op (LSHIFTRT, operands);
4469 emit_insn (size <= 8
4470 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4471 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4473 break;
4475 case 5:
4477 int i = 16 - size;
4478 if (! currently_expanding_to_rtl
4479 && ! reload_in_progress && ! reload_completed)
4480 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4481 else
4483 operands[0] = dest;
4484 operands[2] = GEN_INT (16 - insize);
4485 gen_shifty_hi_op (ASHIFT, operands);
4486 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4488 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4489 while (--i >= 0)
4490 gen_ashift (ASHIFTRT, 1, dest);
4491 break;
4493 case 6:
4494 case 7:
4495 /* Don't expand fine-grained when combining, because that will
4496 make the pattern fail. */
4497 if (! currently_expanding_to_rtl
4498 && ! reload_in_progress && ! reload_completed)
4500 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4501 emit_insn (gen_movsi (dest, source));
4502 break;
4504 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4505 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4506 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4507 operands[0] = dest;
4508 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4509 gen_shifty_op (ASHIFT, operands);
4510 if (kind == 7)
4511 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4512 break;
4513 default:
4514 return true;
4516 return false;
4519 /* Prefix a symbol_ref name with "datalabel". */
4521 gen_datalabel_ref (rtx sym)
4523 const char *str;
4525 if (GET_CODE (sym) == LABEL_REF)
4526 return gen_rtx_CONST (GET_MODE (sym),
4527 gen_rtx_UNSPEC (GET_MODE (sym),
4528 gen_rtvec (1, sym),
4529 UNSPEC_DATALABEL));
4531 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4533 str = XSTR (sym, 0);
4534 /* Share all SYMBOL_REF strings with the same value - that is important
4535 for cse. */
4536 str = IDENTIFIER_POINTER (get_identifier (str));
4537 XSTR (sym, 0) = str;
4539 return sym;
4543 static alloc_pool label_ref_list_pool;
4545 typedef struct label_ref_list_d
4547 rtx_code_label *label;
4548 struct label_ref_list_d *next;
4549 } *label_ref_list_t;
4551 /* The SH cannot load a large constant into a register, constants have to
4552 come from a pc relative load. The reference of a pc relative load
4553 instruction must be less than 1k in front of the instruction. This
4554 means that we often have to dump a constant inside a function, and
4555 generate code to branch around it.
4557 It is important to minimize this, since the branches will slow things
4558 down and make things bigger.
4560 Worst case code looks like:
4562 mov.l L1,rn
4563 bra L2
4565 align
4566 L1: .long value
4570 mov.l L3,rn
4571 bra L4
4573 align
4574 L3: .long value
4578 We fix this by performing a scan before scheduling, which notices which
4579 instructions need to have their operands fetched from the constant table
4580 and builds the table.
4582 The algorithm is:
4584 scan, find an instruction which needs a pcrel move. Look forward, find the
4585 last barrier which is within MAX_COUNT bytes of the requirement.
4586 If there isn't one, make one. Process all the instructions between
4587 the find and the barrier.
4589 In the above example, we can tell that L3 is within 1k of L1, so
4590 the first move can be shrunk from the 3 insn+constant sequence into
4591 just 1 insn, and the constant moved to L3 to make:
4593 mov.l L1,rn
4595 mov.l L3,rn
4596 bra L4
4598 align
4599 L3:.long value
4600 L4:.long value
4602 Then the second move becomes the target for the shortening process. */
4604 typedef struct
4606 rtx value; /* Value in table. */
4607 rtx_code_label *label; /* Label of value. */
4608 label_ref_list_t wend; /* End of window. */
4609 machine_mode mode; /* Mode of value. */
4611 /* True if this constant is accessed as part of a post-increment
4612 sequence. Note that HImode constants are never accessed in this way. */
4613 bool part_of_sequence_p;
4614 } pool_node;
4616 /* The maximum number of constants that can fit into one pool, since
4617 constants in the range 0..510 are at least 2 bytes long, and in the
4618 range from there to 1018 at least 4 bytes. */
4620 #define MAX_POOL_SIZE 372
4621 static pool_node pool_vector[MAX_POOL_SIZE];
4622 static int pool_size;
4623 static rtx_code_label *pool_window_label;
4624 static int pool_window_last;
4626 static int max_labelno_before_reorg;
4628 /* ??? If we need a constant in HImode which is the truncated value of a
4629 constant we need in SImode, we could combine the two entries thus saving
4630 two bytes. Is this common enough to be worth the effort of implementing
4631 it? */
4633 /* ??? This stuff should be done at the same time that we shorten branches.
4634 As it is now, we must assume that all branches are the maximum size, and
4635 this causes us to almost always output constant pools sooner than
4636 necessary. */
4638 /* Add a constant to the pool and return its label. */
4639 static rtx_code_label *
4640 add_constant (rtx x, machine_mode mode, rtx last_value)
4642 int i;
4643 rtx_code_label *lab, *new_rtx;
4644 label_ref_list_t ref, newref;
4646 /* First see if we've already got it. */
4647 for (i = 0; i < pool_size; i++)
4649 if (x->code == pool_vector[i].value->code
4650 && mode == pool_vector[i].mode)
4652 if (x->code == CODE_LABEL)
4654 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4655 continue;
4657 if (rtx_equal_p (x, pool_vector[i].value))
4659 lab = new_rtx = 0;
4660 if (! last_value
4661 || ! i
4662 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4664 new_rtx = gen_label_rtx ();
4665 LABEL_REFS (new_rtx) = pool_vector[i].label;
4666 pool_vector[i].label = lab = new_rtx;
4668 if (lab && pool_window_label)
4670 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4671 newref->label = pool_window_label;
4672 ref = pool_vector[pool_window_last].wend;
4673 newref->next = ref;
4674 pool_vector[pool_window_last].wend = newref;
4676 if (new_rtx)
4677 pool_window_label = new_rtx;
4678 pool_window_last = i;
4679 return lab;
4684 /* Need a new one. */
4685 pool_vector[pool_size].value = x;
4686 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4688 lab = 0;
4689 pool_vector[pool_size - 1].part_of_sequence_p = true;
4691 else
4692 lab = gen_label_rtx ();
4693 pool_vector[pool_size].mode = mode;
4694 pool_vector[pool_size].label = lab;
4695 pool_vector[pool_size].wend = NULL;
4696 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4697 if (lab && pool_window_label)
4699 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4700 newref->label = pool_window_label;
4701 ref = pool_vector[pool_window_last].wend;
4702 newref->next = ref;
4703 pool_vector[pool_window_last].wend = newref;
4705 if (lab)
4706 pool_window_label = lab;
4707 pool_window_last = pool_size;
4708 pool_size++;
4709 return lab;
4712 /* Output the literal table. START, if nonzero, is the first instruction
4713 this table is needed for, and also indicates that there is at least one
4714 casesi_worker_2 instruction; We have to emit the operand3 labels from
4715 these insns at a 4-byte aligned position. BARRIER is the barrier
4716 after which we are to place the table. */
4717 static void
4718 dump_table (rtx_insn *start, rtx_insn *barrier)
4720 rtx_insn *scan = barrier;
4721 int i;
4722 bool need_align = true;
4723 rtx lab;
4724 label_ref_list_t ref;
4725 bool have_df = false;
4727 /* Do two passes, first time dump out the HI sized constants. */
4729 for (i = 0; i < pool_size; i++)
4731 pool_node *p = &pool_vector[i];
4733 if (p->mode == HImode)
4735 if (need_align)
4737 scan = emit_insn_after (gen_align_2 (), scan);
4738 need_align = false;
4740 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4741 scan = emit_label_after (lab, scan);
4742 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4743 scan);
4744 for (ref = p->wend; ref; ref = ref->next)
4746 lab = ref->label;
4747 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4750 else if (p->mode == DFmode)
4751 have_df = true;
4754 need_align = true;
4756 if (start)
4758 scan = emit_insn_after (gen_align_4 (), scan);
4759 need_align = false;
4760 for (; start != barrier; start = NEXT_INSN (start))
4761 if (NONJUMP_INSN_P (start)
4762 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4764 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4765 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4767 scan = emit_label_after (lab, scan);
4770 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4772 rtx_insn *align_insn = NULL;
4774 scan = emit_label_after (gen_label_rtx (), scan);
4775 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4776 need_align = false;
4778 for (i = 0; i < pool_size; i++)
4780 pool_node *p = &pool_vector[i];
4782 switch (p->mode)
4784 case HImode:
4785 break;
4786 case SImode:
4787 case SFmode:
4788 if (align_insn && !p->part_of_sequence_p)
4790 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4791 emit_label_before (lab, align_insn);
4792 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4793 align_insn);
4794 for (ref = p->wend; ref; ref = ref->next)
4796 lab = ref->label;
4797 emit_insn_before (gen_consttable_window_end (lab),
4798 align_insn);
4800 delete_insn (align_insn);
4801 align_insn = NULL;
4802 continue;
4804 else
4806 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4807 scan = emit_label_after (lab, scan);
4808 scan = emit_insn_after (gen_consttable_4 (p->value,
4809 const0_rtx), scan);
4810 need_align = ! need_align;
4812 break;
4813 case DFmode:
4814 if (need_align)
4816 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4817 align_insn = scan;
4818 need_align = false;
4820 case DImode:
4821 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4822 scan = emit_label_after (lab, scan);
4823 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4824 scan);
4825 break;
4826 default:
4827 gcc_unreachable ();
4830 if (p->mode != HImode)
4832 for (ref = p->wend; ref; ref = ref->next)
4834 lab = ref->label;
4835 scan = emit_insn_after (gen_consttable_window_end (lab),
4836 scan);
4841 pool_size = 0;
4844 for (i = 0; i < pool_size; i++)
4846 pool_node *p = &pool_vector[i];
4848 switch (p->mode)
4850 case HImode:
4851 break;
4852 case SImode:
4853 case SFmode:
4854 if (need_align)
4856 need_align = false;
4857 scan = emit_label_after (gen_label_rtx (), scan);
4858 scan = emit_insn_after (gen_align_4 (), scan);
4860 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4861 scan = emit_label_after (lab, scan);
4862 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4863 scan);
4864 break;
4865 case DFmode:
4866 case DImode:
4867 if (need_align)
4869 need_align = false;
4870 scan = emit_label_after (gen_label_rtx (), scan);
4871 scan = emit_insn_after (gen_align_4 (), scan);
4873 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4874 scan = emit_label_after (lab, scan);
4875 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4876 scan);
4877 break;
4878 default:
4879 gcc_unreachable ();
4882 if (p->mode != HImode)
4884 for (ref = p->wend; ref; ref = ref->next)
4886 lab = ref->label;
4887 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4892 scan = emit_insn_after (gen_consttable_end (), scan);
4893 scan = emit_barrier_after (scan);
4894 pool_size = 0;
4895 pool_window_label = NULL;
4896 pool_window_last = 0;
4899 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4901 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4903 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4904 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4905 need to fix it if the input value is CONST_OK_FOR_I08. */
4906 static bool
4907 broken_move (rtx_insn *insn)
4909 if (NONJUMP_INSN_P (insn))
4911 rtx pat = PATTERN (insn);
4912 if (GET_CODE (pat) == PARALLEL)
4913 pat = XVECEXP (pat, 0, 0);
4914 if (GET_CODE (pat) == SET
4915 /* We can load any 8-bit value if we don't care what the high
4916 order bits end up as. */
4917 && GET_MODE (SET_DEST (pat)) != QImode
4918 && (CONSTANT_P (SET_SRC (pat))
4919 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4920 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4921 /* Match mova_const. */
4922 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4923 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4924 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4925 && ! (TARGET_SH2E
4926 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4927 && (fp_zero_operand (SET_SRC (pat))
4928 || fp_one_operand (SET_SRC (pat)))
4929 /* In general we don't know the current setting of fpscr, so
4930 disable fldi.
4931 There is an exception if this was a register-register move
4932 before reload - and hence it was ascertained that we have
4933 single precision setting - and in a post-reload optimization
4934 we changed this to do a constant load. In that case
4935 we don't have an r0 clobber, hence we must use fldi. */
4936 && (TARGET_FMOVD
4937 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4938 == SCRATCH))
4939 && REG_P (SET_DEST (pat))
4940 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4941 && ! (TARGET_SH2A
4942 && GET_MODE (SET_DEST (pat)) == SImode
4943 && (satisfies_constraint_I20 (SET_SRC (pat))
4944 || satisfies_constraint_I28 (SET_SRC (pat))))
4945 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4946 return true;
4949 return false;
4952 /* Return true if the specified insn is a mova insn. */
4953 static bool
4954 mova_p (rtx_insn *insn)
4956 return (NONJUMP_INSN_P (insn)
4957 && GET_CODE (PATTERN (insn)) == SET
4958 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4959 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4960 /* Don't match mova_const. */
4961 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4964 /* Fix up a mova from a switch that went out of range. */
4965 static void
4966 fixup_mova (rtx_insn *mova)
4968 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4969 if (! flag_pic)
4971 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4972 INSN_CODE (mova) = -1;
4974 else
4976 rtx_insn *worker = mova;
4977 rtx_code_label *lab = gen_label_rtx ();
4978 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4982 worker = NEXT_INSN (worker);
4983 gcc_assert (worker
4984 && !LABEL_P (worker)
4985 && !JUMP_P (worker));
4986 } while (NOTE_P (worker)
4987 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4988 wpat = PATTERN (worker);
4989 wpat0 = XVECEXP (wpat, 0, 0);
4990 wpat1 = XVECEXP (wpat, 0, 1);
4991 wsrc = SET_SRC (wpat0);
4992 PATTERN (worker) = (gen_casesi_worker_2
4993 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4994 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4995 XEXP (wpat1, 0)));
4996 INSN_CODE (worker) = -1;
4997 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4998 base = gen_rtx_LABEL_REF (Pmode, lab);
4999 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
5000 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
5001 INSN_CODE (mova) = -1;
5005 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
5006 *num_mova, and check if the new mova is not nested within the first one.
5007 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
5008 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
5009 static int
5010 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
5012 int n_addr = 0; /* Initialization to shut up spurious warning. */
5013 int f_target, n_target = 0; /* Likewise. */
5015 if (optimize)
5017 /* If NEW_MOVA has no address yet, it will be handled later. */
5018 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
5019 return -1;
5021 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
5022 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
5023 if (n_addr > n_target || n_addr + 1022 < n_target)
5025 /* Change the mova into a load.
5026 broken_move will then return true for it. */
5027 fixup_mova (new_mova);
5028 return 1;
5031 if (!(*num_mova)++)
5033 *first_mova = new_mova;
5034 return 2;
5036 if (!optimize
5037 || ((f_target
5038 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5039 >= n_target))
5040 return -1;
5042 (*num_mova)--;
5043 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5044 > n_target - n_addr)
5046 fixup_mova (*first_mova);
5047 return 0;
5049 else
5051 fixup_mova (new_mova);
5052 return 1;
5056 /* Find the last barrier from insn FROM which is close enough to hold the
5057 constant pool. If we can't find one, then create one near the end of
5058 the range. */
5059 static rtx_insn *
5060 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5062 int count_si = 0;
5063 int count_hi = 0;
5064 int found_hi = 0;
5065 int found_si = 0;
5066 int found_di = 0;
5067 int hi_align = 2;
5068 int si_align = 2;
5069 int leading_mova = num_mova;
5070 rtx_insn *barrier_before_mova = NULL;
5071 rtx_insn *found_barrier = NULL;
5072 rtx_insn *good_barrier = NULL;
5073 int si_limit;
5074 int hi_limit;
5075 rtx_insn *orig = from;
5076 rtx_insn *last_got = NULL;
5077 rtx_insn *last_symoff = NULL;
5079 /* For HImode: range is 510, add 4 because pc counts from address of
5080 second instruction after this one, subtract 2 for the jump instruction
5081 that we may need to emit before the table, subtract 2 for the instruction
5082 that fills the jump delay slot (in very rare cases, reorg will take an
5083 instruction from after the constant pool or will leave the delay slot
5084 empty). This gives 510.
5085 For SImode: range is 1020, add 4 because pc counts from address of
5086 second instruction after this one, subtract 2 in case pc is 2 byte
5087 aligned, subtract 2 for the jump instruction that we may need to emit
5088 before the table, subtract 2 for the instruction that fills the jump
5089 delay slot. This gives 1018. */
5091 /* The branch will always be shortened now that the reference address for
5092 forward branches is the successor address, thus we need no longer make
5093 adjustments to the [sh]i_limit for -O0. */
5095 si_limit = 1018;
5096 hi_limit = 510;
5098 while (from && count_si < si_limit && count_hi < hi_limit)
5100 int inc = get_attr_length (from);
5101 int new_align = 1;
5103 /* If this is a label that existed at the time of the compute_alignments
5104 call, determine the alignment. N.B. When find_barrier recurses for
5105 an out-of-reach mova, we might see labels at the start of previously
5106 inserted constant tables. */
5107 if (LABEL_P (from)
5108 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5110 if (optimize)
5111 new_align = 1 << label_to_alignment (from);
5112 else if (BARRIER_P (prev_nonnote_insn (from)))
5113 new_align = 1 << barrier_align (from);
5114 else
5115 new_align = 1;
5116 inc = 0;
5118 /* In case we are scanning a constant table because of recursion, check
5119 for explicit alignments. If the table is long, we might be forced
5120 to emit the new table in front of it; the length of the alignment
5121 might be the last straw. */
5122 else if (NONJUMP_INSN_P (from)
5123 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5124 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5125 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5126 /* When we find the end of a constant table, paste the new constant
5127 at the end. That is better than putting it in front because
5128 this way, we don't need extra alignment for adding a 4-byte-aligned
5129 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5130 else if (NONJUMP_INSN_P (from)
5131 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5132 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5133 return from;
5135 if (BARRIER_P (from))
5137 rtx_insn *next;
5139 found_barrier = from;
5141 /* If we are at the end of the function, or in front of an alignment
5142 instruction, we need not insert an extra alignment. We prefer
5143 this kind of barrier. */
5144 if (barrier_align (from) > 2)
5145 good_barrier = from;
5147 /* If we are at the end of a hot/cold block, dump the constants
5148 here. */
5149 next = NEXT_INSN (from);
5150 if (next
5151 && NOTE_P (next)
5152 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5153 break;
5156 if (broken_move (from))
5158 rtx pat, src, dst;
5159 machine_mode mode;
5161 pat = PATTERN (from);
5162 if (GET_CODE (pat) == PARALLEL)
5163 pat = XVECEXP (pat, 0, 0);
5164 src = SET_SRC (pat);
5165 dst = SET_DEST (pat);
5166 mode = GET_MODE (dst);
5168 /* GOT pcrelat setting comes in pair of
5169 mova .L8,r0
5170 mov.l .L8,r12
5171 instructions. (plus add r0,r12).
5172 Remember if we see one without the other. */
5173 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5174 last_got = last_got ? NULL : from;
5175 else if (PIC_ADDR_P (src))
5176 last_got = last_got ? NULL : from;
5178 /* We must explicitly check the mode, because sometimes the
5179 front end will generate code to load unsigned constants into
5180 HImode targets without properly sign extending them. */
5181 if (mode == HImode
5182 || (mode == SImode && satisfies_constraint_I16 (src)
5183 && REGNO (dst) != FPUL_REG))
5185 found_hi += 2;
5186 /* We put the short constants before the long constants, so
5187 we must count the length of short constants in the range
5188 for the long constants. */
5189 /* ??? This isn't optimal, but is easy to do. */
5190 si_limit -= 2;
5192 else
5194 /* We dump DF/DI constants before SF/SI ones, because
5195 the limit is the same, but the alignment requirements
5196 are higher. We may waste up to 4 additional bytes
5197 for alignment, and the DF/DI constant may have
5198 another SF/SI constant placed before it. */
5199 if (TARGET_SHCOMPACT
5200 && ! found_di
5201 && (mode == DFmode || mode == DImode))
5203 found_di = 1;
5204 si_limit -= 8;
5206 while (si_align > 2 && found_si + si_align - 2 > count_si)
5207 si_align >>= 1;
5208 if (found_si > count_si)
5209 count_si = found_si;
5210 found_si += GET_MODE_SIZE (mode);
5211 if (num_mova)
5212 si_limit -= GET_MODE_SIZE (mode);
5216 if (mova_p (from))
5218 switch (untangle_mova (&num_mova, &mova, from))
5220 case 1:
5221 if (flag_pic)
5223 rtx src = SET_SRC (PATTERN (from));
5224 if (GET_CODE (src) == CONST
5225 && GET_CODE (XEXP (src, 0)) == UNSPEC
5226 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5227 last_symoff = from;
5229 break;
5230 case 0: return find_barrier (0, 0, mova);
5231 case 2:
5233 leading_mova = 0;
5234 barrier_before_mova
5235 = good_barrier ? good_barrier : found_barrier;
5237 default: break;
5239 if (found_si > count_si)
5240 count_si = found_si;
5242 else if (JUMP_TABLE_DATA_P (from)
5243 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5245 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5246 || (num_mova
5247 && (prev_nonnote_insn (from)
5248 == XEXP (MOVA_LABELREF (mova), 0))))
5249 num_mova--;
5250 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5252 /* We have just passed the barrier in front of the
5253 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5254 the ADDR_DIFF_VEC is accessed as data, just like our pool
5255 constants, this is a good opportunity to accommodate what
5256 we have gathered so far.
5257 If we waited any longer, we could end up at a barrier in
5258 front of code, which gives worse cache usage for separated
5259 instruction / data caches. */
5260 good_barrier = found_barrier;
5261 break;
5263 else
5265 rtx body = PATTERN (from);
5266 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5269 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5270 else if (JUMP_P (from)
5271 && ! TARGET_SH2
5272 && ! optimize_size)
5273 new_align = 4;
5275 /* There is a possibility that a bf is transformed into a bf/s by the
5276 delay slot scheduler. */
5277 if (JUMP_P (from)
5278 && get_attr_type (from) == TYPE_CBRANCH
5279 && ! sequence_insn_p (from))
5280 inc += 2;
5282 if (found_si)
5284 count_si += inc;
5285 if (new_align > si_align)
5287 si_limit -= (count_si - 1) & (new_align - si_align);
5288 si_align = new_align;
5290 count_si = (count_si + new_align - 1) & -new_align;
5292 if (found_hi)
5294 count_hi += inc;
5295 if (new_align > hi_align)
5297 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5298 hi_align = new_align;
5300 count_hi = (count_hi + new_align - 1) & -new_align;
5302 from = NEXT_INSN (from);
5305 if (num_mova)
5307 if (leading_mova)
5309 /* Try as we might, the leading mova is out of range. Change
5310 it into a load (which will become a pcload) and retry. */
5311 fixup_mova (mova);
5312 return find_barrier (0, 0, mova);
5314 else
5316 /* Insert the constant pool table before the mova instruction,
5317 to prevent the mova label reference from going out of range. */
5318 from = mova;
5319 good_barrier = found_barrier = barrier_before_mova;
5323 if (found_barrier)
5325 if (good_barrier && next_real_insn (found_barrier))
5326 found_barrier = good_barrier;
5328 else
5330 /* We didn't find a barrier in time to dump our stuff,
5331 so we'll make one. */
5332 rtx_code_label *label = gen_label_rtx ();
5334 /* Don't emit a constant table in the middle of insns for
5335 casesi_worker_2. This is a bit overkill but is enough
5336 because casesi_worker_2 wouldn't appear so frequently. */
5337 if (last_symoff)
5338 from = last_symoff;
5340 /* If we exceeded the range, then we must back up over the last
5341 instruction we looked at. Otherwise, we just need to undo the
5342 NEXT_INSN at the end of the loop. */
5343 if (PREV_INSN (from) != orig
5344 && (count_hi > hi_limit || count_si > si_limit))
5345 from = PREV_INSN (PREV_INSN (from));
5346 else
5347 from = PREV_INSN (from);
5349 /* Don't emit a constant table int the middle of global pointer setting,
5350 since that that would move the addressing base GOT into another table.
5351 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5352 in the pool anyway, so just move up the whole constant pool.
5354 However, avoid doing so when the last single GOT mov is the starting
5355 insn itself. Going past above the start insn would create a negative
5356 offset, causing errors. */
5357 if (last_got && last_got != orig)
5358 from = PREV_INSN (last_got);
5360 /* Don't insert the constant pool table at the position which
5361 may be the landing pad. */
5362 if (flag_exceptions
5363 && CALL_P (from)
5364 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5365 from = PREV_INSN (from);
5367 /* Walk back to be just before any jump or label.
5368 Putting it before a label reduces the number of times the branch
5369 around the constant pool table will be hit. Putting it before
5370 a jump makes it more likely that the bra delay slot will be
5371 filled. */
5372 while (NOTE_P (from) || JUMP_P (from)
5373 || LABEL_P (from))
5374 from = PREV_INSN (from);
5376 /* Make sure we do not split between a call and its corresponding
5377 CALL_ARG_LOCATION note. */
5378 if (CALL_P (from))
5380 rtx_insn *next = NEXT_INSN (from);
5381 if (next && NOTE_P (next)
5382 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5383 from = next;
5386 from = emit_jump_insn_after (gen_jump (label), from);
5387 JUMP_LABEL (from) = label;
5388 LABEL_NUSES (label) = 1;
5389 found_barrier = emit_barrier_after (from);
5390 emit_label_after (label, found_barrier);
5393 return found_barrier;
5396 /* If the instruction INSN is implemented by a special function, and we can
5397 positively find the register that is used to call the sfunc, and this
5398 register is not used anywhere else in this instruction - except as the
5399 destination of a set, return this register; else, return 0. */
5401 sfunc_uses_reg (rtx_insn *insn)
5403 int i;
5404 rtx pattern, part, reg_part, reg;
5406 if (!NONJUMP_INSN_P (insn))
5407 return NULL_RTX;
5408 pattern = PATTERN (insn);
5409 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5410 return NULL_RTX;
5412 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5414 part = XVECEXP (pattern, 0, i);
5415 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5416 reg_part = part;
5418 if (! reg_part)
5419 return NULL_RTX;
5420 reg = XEXP (reg_part, 0);
5421 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5423 part = XVECEXP (pattern, 0, i);
5424 if (part == reg_part || GET_CODE (part) == CLOBBER)
5425 continue;
5426 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5427 && REG_P (SET_DEST (part)))
5428 ? SET_SRC (part) : part)))
5429 return NULL_RTX;
5431 return reg;
5434 /* See if the only way in which INSN uses REG is by calling it, or by
5435 setting it while calling it. Set *SET to a SET rtx if the register
5436 is set by INSN. */
5437 static bool
5438 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5440 rtx pattern, reg2;
5442 *set = NULL_RTX;
5444 reg2 = sfunc_uses_reg (insn);
5445 if (reg2 && REGNO (reg2) == REGNO (reg))
5447 pattern = single_set (insn);
5448 if (pattern
5449 && REG_P (SET_DEST (pattern))
5450 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5451 *set = pattern;
5452 return false;
5454 if (!CALL_P (insn))
5456 /* We don't use rtx_equal_p because we don't care if the mode is
5457 different. */
5458 pattern = single_set (insn);
5459 if (pattern
5460 && REG_P (SET_DEST (pattern))
5461 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5463 rtx par, part;
5464 int i;
5466 *set = pattern;
5467 par = PATTERN (insn);
5468 if (GET_CODE (par) == PARALLEL)
5469 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5471 part = XVECEXP (par, 0, i);
5472 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5473 return true;
5475 return reg_mentioned_p (reg, SET_SRC (pattern));
5478 return true;
5481 pattern = PATTERN (insn);
5483 if (GET_CODE (pattern) == PARALLEL)
5485 int i;
5487 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5488 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5489 return true;
5490 pattern = XVECEXP (pattern, 0, 0);
5493 if (GET_CODE (pattern) == SET)
5495 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5497 /* We don't use rtx_equal_p, because we don't care if the
5498 mode is different. */
5499 if (!REG_P (SET_DEST (pattern))
5500 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5501 return true;
5503 *set = pattern;
5506 pattern = SET_SRC (pattern);
5509 if (GET_CODE (pattern) != CALL
5510 || !MEM_P (XEXP (pattern, 0))
5511 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5512 return true;
5514 return false;
5517 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5518 general registers. Bits 0..15 mean that the respective registers
5519 are used as inputs in the instruction. Bits 16..31 mean that the
5520 registers 0..15, respectively, are used as outputs, or are clobbered.
5521 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5523 regs_used (rtx x, int is_dest)
5525 enum rtx_code code;
5526 const char *fmt;
5527 int i, used = 0;
5529 if (! x)
5530 return used;
5531 code = GET_CODE (x);
5532 switch (code)
5534 case REG:
5535 if (REGNO (x) < 16)
5536 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5537 << (REGNO (x) + is_dest));
5538 return 0;
5539 case SUBREG:
5541 rtx y = SUBREG_REG (x);
5543 if (!REG_P (y))
5544 break;
5545 if (REGNO (y) < 16)
5546 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5547 << (REGNO (y) +
5548 subreg_regno_offset (REGNO (y),
5549 GET_MODE (y),
5550 SUBREG_BYTE (x),
5551 GET_MODE (x)) + is_dest));
5552 return 0;
5554 case SET:
5555 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5556 case RETURN:
5557 /* If there was a return value, it must have been indicated with USE. */
5558 return 0x00ffff00;
5559 case CLOBBER:
5560 is_dest = 1;
5561 break;
5562 case MEM:
5563 is_dest = 0;
5564 break;
5565 case CALL:
5566 used |= 0x00ff00f0;
5567 break;
5568 default:
5569 break;
5572 fmt = GET_RTX_FORMAT (code);
5574 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5576 if (fmt[i] == 'E')
5578 int j;
5579 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5580 used |= regs_used (XVECEXP (x, i, j), is_dest);
5582 else if (fmt[i] == 'e')
5583 used |= regs_used (XEXP (x, i), is_dest);
5585 return used;
5588 /* Create an instruction that prevents redirection of a conditional branch
5589 to the destination of the JUMP with address ADDR.
5590 If the branch needs to be implemented as an indirect jump, try to find
5591 a scratch register for it.
5592 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5593 If any preceding insn that doesn't fit into a delay slot is good enough,
5594 pass 1. Pass 2 if a definite blocking insn is needed.
5595 -1 is used internally to avoid deep recursion.
5596 If a blocking instruction is made or recognized, return it. */
5597 static rtx_insn *
5598 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5600 int dead = 0;
5601 rtx_insn *prev = prev_nonnote_insn (jump);
5602 rtx dest;
5604 /* First, check if we already have an instruction that satisfies our need. */
5605 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5607 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5608 return prev;
5609 if (GET_CODE (PATTERN (prev)) == USE
5610 || GET_CODE (PATTERN (prev)) == CLOBBER
5611 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5612 prev = jump;
5613 else if ((need_block &= ~1) < 0)
5614 return prev;
5615 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5616 need_block = 0;
5618 if (GET_CODE (PATTERN (jump)) == RETURN)
5620 if (! need_block)
5621 return prev;
5622 /* Reorg even does nasty things with return insns that cause branches
5623 to go out of range - see find_end_label and callers. */
5624 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5626 /* We can't use JUMP_LABEL here because it might be undefined
5627 when not optimizing. */
5628 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5629 /* If the branch is out of range, try to find a scratch register for it. */
5630 if (optimize
5631 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5632 > 4092 + 4098))
5634 rtx_insn *scan;
5635 /* Don't look for the stack pointer as a scratch register,
5636 it would cause trouble if an interrupt occurred. */
5637 unsigned attempt = 0x7fff, used;
5638 int jump_left = flag_expensive_optimizations + 1;
5640 /* It is likely that the most recent eligible instruction is wanted for
5641 the delay slot. Therefore, find out which registers it uses, and
5642 try to avoid using them. */
5644 for (scan = jump; (scan = PREV_INSN (scan)); )
5646 enum rtx_code code;
5648 if (scan->deleted ())
5649 continue;
5650 code = GET_CODE (scan);
5651 if (code == CODE_LABEL || code == JUMP_INSN)
5652 break;
5653 if (code == INSN
5654 && GET_CODE (PATTERN (scan)) != USE
5655 && GET_CODE (PATTERN (scan)) != CLOBBER
5656 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5658 attempt &= ~regs_used (PATTERN (scan), 0);
5659 break;
5662 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5663 (scan = NEXT_INSN (scan)); )
5665 enum rtx_code code;
5667 if (scan->deleted ())
5668 continue;
5669 code = GET_CODE (scan);
5670 if (INSN_P (scan))
5672 used |= regs_used (PATTERN (scan), 0);
5673 if (code == CALL_INSN)
5674 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5675 dead |= (used >> 16) & ~used;
5676 if (dead & attempt)
5678 dead &= attempt;
5679 break;
5681 if (code == JUMP_INSN)
5683 if (jump_left-- && simplejump_p (scan))
5684 scan = JUMP_LABEL_AS_INSN (scan);
5685 else
5686 break;
5690 /* Mask out the stack pointer again, in case it was
5691 the only 'free' register we have found. */
5692 dead &= 0x7fff;
5694 /* If the immediate destination is still in range, check for possible
5695 threading with a jump beyond the delay slot insn.
5696 Don't check if we are called recursively; the jump has been or will be
5697 checked in a different invocation then. */
5699 else if (optimize && need_block >= 0)
5701 rtx_insn *next = next_active_insn (next_active_insn (dest));
5702 if (next && JUMP_P (next)
5703 && GET_CODE (PATTERN (next)) == SET
5704 && recog_memoized (next) == CODE_FOR_jump_compact)
5706 dest = JUMP_LABEL (next);
5707 if (dest
5708 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5709 > 4092 + 4098))
5710 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5714 if (dead)
5716 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5718 /* It would be nice if we could convert the jump into an indirect
5719 jump / far branch right now, and thus exposing all constituent
5720 instructions to further optimization. However, reorg uses
5721 simplejump_p to determine if there is an unconditional jump where
5722 it should try to schedule instructions from the target of the
5723 branch; simplejump_p fails for indirect jumps even if they have
5724 a JUMP_LABEL. */
5725 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5726 (reg, GEN_INT (unspec_bbr_uid++)),
5727 jump);
5728 /* ??? We would like this to have the scope of the jump, but that
5729 scope will change when a delay slot insn of an inner scope is added.
5730 Hence, after delay slot scheduling, we'll have to expect
5731 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5732 the jump. */
5734 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5735 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5736 return insn;
5738 else if (need_block)
5739 /* We can't use JUMP_LABEL here because it might be undefined
5740 when not optimizing. */
5741 return emit_insn_before (gen_block_branch_redirect
5742 (GEN_INT (unspec_bbr_uid++)),
5743 jump);
5744 return prev;
5747 #define CONDJUMP_MIN -252
5748 #define CONDJUMP_MAX 262
5749 struct far_branch
5751 /* A label (to be placed) in front of the jump
5752 that jumps to our ultimate destination. */
5753 rtx_insn *near_label;
5754 /* Where we are going to insert it if we cannot move the jump any farther,
5755 or the jump itself if we have picked up an existing jump. */
5756 rtx_insn *insert_place;
5757 /* The ultimate destination. */
5758 rtx_insn *far_label;
5759 struct far_branch *prev;
5760 /* If the branch has already been created, its address;
5761 else the address of its first prospective user. */
5762 int address;
5765 static void gen_far_branch (struct far_branch *);
5766 enum mdep_reorg_phase_e mdep_reorg_phase;
5767 static void
5768 gen_far_branch (struct far_branch *bp)
5770 rtx_insn *insn = bp->insert_place;
5771 rtx_insn *jump;
5772 rtx_code_label *label = gen_label_rtx ();
5773 int ok;
5775 emit_label_after (label, insn);
5776 if (bp->far_label)
5778 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5779 LABEL_NUSES (bp->far_label)++;
5781 else
5782 jump = emit_jump_insn_after (gen_return (), insn);
5784 /* Emit a barrier so that reorg knows that any following instructions
5785 are not reachable via a fall-through path.
5786 But don't do this when not optimizing, since we wouldn't suppress the
5787 alignment for the barrier then, and could end up with out-of-range
5788 pc-relative loads. */
5789 if (optimize)
5790 emit_barrier_after (jump);
5791 emit_label_after (bp->near_label, insn);
5793 if (bp->far_label)
5794 JUMP_LABEL (jump) = bp->far_label;
5795 else
5797 rtx pat = PATTERN (jump);
5798 gcc_assert (ANY_RETURN_P (pat));
5799 JUMP_LABEL (jump) = pat;
5802 ok = invert_jump (insn, label, 1);
5803 gcc_assert (ok);
5805 /* If we are branching around a jump (rather than a return), prevent
5806 reorg from using an insn from the jump target as the delay slot insn -
5807 when reorg did this, it pessimized code (we rather hide the delay slot)
5808 and it could cause branches to go out of range. */
5809 if (bp->far_label)
5810 (emit_insn_after
5811 (gen_stuff_delay_slot
5812 (GEN_INT (unspec_bbr_uid++),
5813 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5814 insn));
5815 /* Prevent reorg from undoing our splits. */
5816 gen_block_redirect (jump, bp->address += 2, 2);
5819 /* Fix up ADDR_DIFF_VECs. */
5820 void
5821 fixup_addr_diff_vecs (rtx_insn *first)
5823 rtx_insn *insn;
5825 for (insn = first; insn; insn = NEXT_INSN (insn))
5827 rtx vec_lab, pat, prevpat, x, braf_label;
5828 rtx_insn *prev;
5830 if (! JUMP_TABLE_DATA_P (insn)
5831 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5832 continue;
5833 pat = PATTERN (insn);
5834 vec_lab = XEXP (XEXP (pat, 0), 0);
5836 /* Search the matching casesi_jump_2. */
5837 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5839 if (!JUMP_P (prev))
5840 continue;
5841 prevpat = PATTERN (prev);
5842 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5843 continue;
5844 x = XVECEXP (prevpat, 0, 1);
5845 if (GET_CODE (x) != USE)
5846 continue;
5847 x = XEXP (x, 0);
5848 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5849 break;
5851 /* FIXME: This is a bug in the optimizer, but it seems harmless
5852 to just avoid panicing. */
5853 if (!prev)
5854 continue;
5856 /* Emit the reference label of the braf where it belongs, right after
5857 the casesi_jump_2 (i.e. braf). */
5858 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5859 emit_label_after (braf_label, prev);
5861 /* Fix up the ADDR_DIF_VEC to be relative
5862 to the reference address of the braf. */
5863 XEXP (XEXP (pat, 0), 0) = braf_label;
5867 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5868 a barrier. Return the base 2 logarithm of the desired alignment. */
5870 barrier_align (rtx_insn *barrier_or_label)
5872 rtx next, pat;
5874 if (! barrier_or_label)
5875 return 0;
5877 if (LABEL_P (barrier_or_label)
5878 && NEXT_INSN (barrier_or_label)
5879 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5880 return 2;
5882 if (BARRIER_P (barrier_or_label)
5883 && PREV_INSN (barrier_or_label)
5884 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5886 pat = PATTERN (PREV_INSN (barrier_or_label));
5887 /* If this is a very small table, we want to keep the alignment after
5888 the table to the minimum for proper code alignment. */
5889 return ((optimize_size
5890 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5891 <= (unsigned) 1 << (CACHE_LOG - 2)))
5892 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5895 next = next_active_insn (barrier_or_label);
5897 if (! next)
5898 return 0;
5900 pat = PATTERN (next);
5902 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5903 /* This is a barrier in front of a constant table. */
5904 return 0;
5906 if (optimize_size)
5907 return 0;
5909 if (! TARGET_SH2 || ! optimize)
5910 return align_jumps_log;
5912 /* When fixing up pcloads, a constant table might be inserted just before
5913 the basic block that ends with the barrier. Thus, we can't trust the
5914 instruction lengths before that. */
5915 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5917 /* Check if there is an immediately preceding branch to the insn beyond
5918 the barrier. We must weight the cost of discarding useful information
5919 from the current cache line when executing this branch and there is
5920 an alignment, against that of fetching unneeded insn in front of the
5921 branch target when there is no alignment. */
5923 /* There are two delay_slot cases to consider. One is the simple case
5924 where the preceding branch is to the insn beyond the barrier (simple
5925 delay slot filling), and the other is where the preceding branch has
5926 a delay slot that is a duplicate of the insn after the barrier
5927 (fill_eager_delay_slots) and the branch is to the insn after the insn
5928 after the barrier. */
5930 int slot, credit;
5931 bool jump_to_next = false;
5933 /* Skip to the insn before the JUMP_INSN before the barrier under
5934 investigation. */
5935 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5937 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5938 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5939 prev = prev_real_insn (prev))
5941 jump_to_next = false;
5942 if (GET_CODE (PATTERN (prev)) == USE
5943 || GET_CODE (PATTERN (prev)) == CLOBBER)
5944 continue;
5945 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5947 prev = prev_seq->insn (1);
5948 if (INSN_UID (prev) == INSN_UID (next))
5950 /* Delay slot was filled with insn at jump target. */
5951 jump_to_next = true;
5952 continue;
5956 if (slot &&
5957 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5958 slot = 0;
5959 credit -= get_attr_length (prev);
5961 if (prev && jump_to_label_p (prev))
5963 rtx_insn *x;
5964 if (jump_to_next
5965 || next_real_insn (JUMP_LABEL (prev)) == next
5966 /* If relax_delay_slots() decides NEXT was redundant
5967 with some previous instruction, it will have
5968 redirected PREV's jump to the following insn. */
5969 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5970 /* There is no upper bound on redundant instructions
5971 that might have been skipped, but we must not put an
5972 alignment where none had been before. */
5973 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5974 (INSN_P (x)
5975 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5976 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5977 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5979 rtx pat = PATTERN (prev);
5980 if (GET_CODE (pat) == PARALLEL)
5981 pat = XVECEXP (pat, 0, 0);
5982 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5983 return 0;
5988 return align_jumps_log;
5991 /* If we are inside a phony loop, almost any kind of label can turn up as the
5992 first one in the loop. Aligning a braf label causes incorrect switch
5993 destination addresses; we can detect braf labels because they are
5994 followed by a BARRIER.
5995 Applying loop alignment to small constant or switch tables is a waste
5996 of space, so we suppress this too. */
5998 sh_loop_align (rtx_insn *label)
6000 rtx_insn *next = label;
6002 if (! optimize || optimize_size)
6003 return 0;
6006 next = next_nonnote_insn (next);
6007 while (next && LABEL_P (next));
6009 if (! next
6010 || ! INSN_P (next)
6011 || recog_memoized (next) == CODE_FOR_consttable_2)
6012 return 0;
6014 return align_loops_log;
6017 /* Do a final pass over the function, just before delayed branch
6018 scheduling. */
6019 static void
6020 sh_reorg (void)
6022 rtx_insn *first, *insn, *mova = NULL;
6023 int num_mova;
6024 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
6025 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
6027 first = get_insns ();
6028 max_labelno_before_reorg = max_label_num ();
6030 /* We must split call insns before introducing `mova's. If we're
6031 optimizing, they'll have already been split. Otherwise, make
6032 sure we don't split them too late. */
6033 if (! optimize)
6034 split_all_insns_noflow ();
6036 if (TARGET_SHMEDIA)
6037 return;
6039 /* If relaxing, generate pseudo-ops to associate function calls with
6040 the symbols they call. It does no harm to not generate these
6041 pseudo-ops. However, when we can generate them, it enables the
6042 linker to potentially relax the jsr to a bsr, and eliminate the
6043 register load and, possibly, the constant pool entry. */
6045 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6046 if (TARGET_RELAX)
6048 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6049 own purposes. This works because none of the remaining passes
6050 need to look at them.
6052 ??? But it may break in the future. We should use a machine
6053 dependent REG_NOTE, or some other approach entirely. */
6054 for (insn = first; insn; insn = NEXT_INSN (insn))
6056 if (INSN_P (insn))
6058 rtx note;
6060 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6061 NULL_RTX)) != 0)
6062 remove_note (insn, note);
6066 for (insn = first; insn; insn = NEXT_INSN (insn))
6068 rtx pattern, reg, set, dies;
6069 rtx_code_label *label;
6070 rtx_insn *link, *scan;
6071 int rescan = 0, foundinsn = 0;
6073 if (CALL_P (insn))
6075 pattern = PATTERN (insn);
6077 if (GET_CODE (pattern) == PARALLEL)
6078 pattern = XVECEXP (pattern, 0, 0);
6079 if (GET_CODE (pattern) == SET)
6080 pattern = SET_SRC (pattern);
6082 if (GET_CODE (pattern) != CALL
6083 || !MEM_P (XEXP (pattern, 0)))
6084 continue;
6086 reg = XEXP (XEXP (pattern, 0), 0);
6088 else
6090 reg = sfunc_uses_reg (insn);
6091 if (! reg)
6092 continue;
6095 if (!REG_P (reg))
6096 continue;
6098 /* Try scanning backward to find where the register is set. */
6099 link = NULL;
6100 for (scan = PREV_INSN (insn);
6101 scan && !LABEL_P (scan);
6102 scan = PREV_INSN (scan))
6104 if (! INSN_P (scan))
6105 continue;
6107 if (! reg_mentioned_p (reg, scan))
6108 continue;
6110 if (noncall_uses_reg (reg, scan, &set))
6111 break;
6113 if (set)
6115 link = scan;
6116 break;
6120 if (! link)
6121 continue;
6123 /* The register is set at LINK. */
6125 /* We can only optimize the function call if the register is
6126 being set to a symbol. In theory, we could sometimes
6127 optimize calls to a constant location, but the assembler
6128 and linker do not support that at present. */
6129 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6130 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6131 continue;
6133 /* Scan forward from LINK to the place where REG dies, and
6134 make sure that the only insns which use REG are
6135 themselves function calls. */
6137 /* ??? This doesn't work for call targets that were allocated
6138 by reload, since there may not be a REG_DEAD note for the
6139 register. */
6141 dies = NULL_RTX;
6142 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6144 rtx scanset;
6146 /* Don't try to trace forward past a CODE_LABEL if we haven't
6147 seen INSN yet. Ordinarily, we will only find the setting insn
6148 if it is in the same basic block. However,
6149 cross-jumping can insert code labels in between the load and
6150 the call, and can result in situations where a single call
6151 insn may have two targets depending on where we came from. */
6153 if (LABEL_P (scan) && ! foundinsn)
6154 break;
6156 if (! INSN_P (scan))
6157 continue;
6159 /* Don't try to trace forward past a JUMP. To optimize
6160 safely, we would have to check that all the
6161 instructions at the jump destination did not use REG. */
6163 if (JUMP_P (scan))
6164 break;
6166 if (! reg_mentioned_p (reg, scan))
6167 continue;
6169 if (noncall_uses_reg (reg, scan, &scanset))
6170 break;
6172 if (scan == insn)
6173 foundinsn = 1;
6175 if (scan != insn
6176 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6178 /* There is a function call to this register other
6179 than the one we are checking. If we optimize
6180 this call, we need to rescan again below. */
6181 rescan = 1;
6184 /* ??? We shouldn't have to worry about SCANSET here.
6185 We should just be able to check for a REG_DEAD note
6186 on a function call. However, the REG_DEAD notes are
6187 apparently not dependable around libcalls; c-torture
6188 execute/920501-2 is a test case. If SCANSET is set,
6189 then this insn sets the register, so it must have
6190 died earlier. Unfortunately, this will only handle
6191 the cases in which the register is, in fact, set in a
6192 later insn. */
6194 /* ??? We shouldn't have to use FOUNDINSN here.
6195 This dates back to when we used LOG_LINKS to find
6196 the most recent insn which sets the register. */
6198 if (foundinsn
6199 && (scanset
6200 || find_reg_note (scan, REG_DEAD, reg)))
6202 dies = scan;
6203 break;
6207 if (! dies)
6209 /* Either there was a branch, or some insn used REG
6210 other than as a function call address. */
6211 continue;
6214 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6215 on the insn which sets the register, and on each call insn
6216 which uses the register. In final_prescan_insn we look for
6217 the REG_LABEL_OPERAND notes, and output the appropriate label
6218 or pseudo-op. */
6220 label = gen_label_rtx ();
6221 add_reg_note (link, REG_LABEL_OPERAND, label);
6222 add_reg_note (insn, REG_LABEL_OPERAND, label);
6223 if (rescan)
6225 scan = link;
6228 rtx reg2;
6230 scan = NEXT_INSN (scan);
6231 if (scan != insn
6232 && ((CALL_P (scan)
6233 && reg_mentioned_p (reg, scan))
6234 || ((reg2 = sfunc_uses_reg (scan))
6235 && REGNO (reg2) == REGNO (reg))))
6236 add_reg_note (scan, REG_LABEL_OPERAND, label);
6238 while (scan != dies);
6243 if (TARGET_SH2)
6244 fixup_addr_diff_vecs (first);
6246 if (optimize)
6248 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6249 shorten_branches (first);
6252 /* Scan the function looking for move instructions which have to be
6253 changed to pc-relative loads and insert the literal tables. */
6254 label_ref_list_pool = create_alloc_pool ("label references list",
6255 sizeof (struct label_ref_list_d),
6256 30);
6257 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6258 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6260 if (mova_p (insn))
6262 /* ??? basic block reordering can move a switch table dispatch
6263 below the switch table. Check if that has happened.
6264 We only have the addresses available when optimizing; but then,
6265 this check shouldn't be needed when not optimizing. */
6266 if (!untangle_mova (&num_mova, &mova, insn))
6268 insn = mova;
6269 num_mova = 0;
6272 else if (JUMP_TABLE_DATA_P (insn)
6273 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6274 && num_mova
6275 /* ??? loop invariant motion can also move a mova out of a
6276 loop. Since loop does this code motion anyway, maybe we
6277 should wrap UNSPEC_MOVA into a CONST, so that reload can
6278 move it back. */
6279 && ((num_mova > 1
6280 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6281 || (prev_nonnote_insn (insn)
6282 == XEXP (MOVA_LABELREF (mova), 0))))
6284 rtx_insn *scan;
6285 int total;
6287 num_mova--;
6289 /* Some code might have been inserted between the mova and
6290 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6291 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6292 total += get_attr_length (scan);
6294 /* range of mova is 1020, add 4 because pc counts from address of
6295 second instruction after this one, subtract 2 in case pc is 2
6296 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6297 cancels out with alignment effects of the mova itself. */
6298 if (total > 1022)
6300 /* Change the mova into a load, and restart scanning
6301 there. broken_move will then return true for mova. */
6302 fixup_mova (mova);
6303 insn = mova;
6306 if (broken_move (insn)
6307 || (NONJUMP_INSN_P (insn)
6308 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6310 rtx_insn *scan;
6311 /* Scan ahead looking for a barrier to stick the constant table
6312 behind. */
6313 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6314 rtx_insn *last_float_move = NULL;
6315 rtx last_float = 0, *last_float_addr = NULL;
6316 int need_aligned_label = 0;
6318 if (num_mova && ! mova_p (mova))
6320 /* find_barrier had to change the first mova into a
6321 pcload; thus, we have to start with this new pcload. */
6322 insn = mova;
6323 num_mova = 0;
6325 /* Now find all the moves between the points and modify them. */
6326 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6328 if (LABEL_P (scan))
6329 last_float = 0;
6330 if (NONJUMP_INSN_P (scan)
6331 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6332 need_aligned_label = 1;
6333 if (broken_move (scan))
6335 rtx *patp = &PATTERN (scan), pat = *patp;
6336 rtx src, dst;
6337 rtx lab;
6338 rtx newsrc;
6339 machine_mode mode;
6341 if (GET_CODE (pat) == PARALLEL)
6342 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6343 src = SET_SRC (pat);
6344 dst = SET_DEST (pat);
6345 mode = GET_MODE (dst);
6347 if (mode == SImode && satisfies_constraint_I16 (src)
6348 && REGNO (dst) != FPUL_REG)
6350 int offset = 0;
6352 mode = HImode;
6353 while (GET_CODE (dst) == SUBREG)
6355 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6356 GET_MODE (SUBREG_REG (dst)),
6357 SUBREG_BYTE (dst),
6358 GET_MODE (dst));
6359 dst = SUBREG_REG (dst);
6361 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6363 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6365 /* This must be an insn that clobbers r0. */
6366 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6367 XVECLEN (PATTERN (scan), 0)
6368 - 1);
6369 rtx clobber = *clobberp;
6371 gcc_assert (GET_CODE (clobber) == CLOBBER
6372 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6374 if (last_float
6375 && reg_set_between_p (r0_rtx, last_float_move, scan))
6376 last_float = 0;
6377 if (last_float
6378 && TARGET_SHCOMPACT
6379 && GET_MODE_SIZE (mode) != 4
6380 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6381 last_float = 0;
6382 lab = add_constant (src, mode, last_float);
6383 if (lab)
6384 emit_insn_before (gen_mova (lab), scan);
6385 else
6387 /* There will be a REG_UNUSED note for r0 on
6388 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6389 lest reorg:mark_target_live_regs will not
6390 consider r0 to be used, and we end up with delay
6391 slot insn in front of SCAN that clobbers r0. */
6392 rtx note
6393 = find_regno_note (last_float_move, REG_UNUSED, 0);
6395 /* If we are not optimizing, then there may not be
6396 a note. */
6397 if (note)
6398 PUT_REG_NOTE_KIND (note, REG_INC);
6400 *last_float_addr = r0_inc_rtx;
6402 last_float_move = scan;
6403 last_float = src;
6404 newsrc = gen_const_mem (mode,
6405 (((TARGET_SH4 && ! TARGET_FMOVD)
6406 || REGNO (dst) == FPUL_REG)
6407 ? r0_inc_rtx
6408 : r0_rtx));
6409 last_float_addr = &XEXP (newsrc, 0);
6411 /* Remove the clobber of r0. */
6412 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6413 gen_rtx_SCRATCH (Pmode));
6415 /* This is a mova needing a label. Create it. */
6416 else if (GET_CODE (src) == UNSPEC
6417 && XINT (src, 1) == UNSPEC_MOVA
6418 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6420 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6421 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6422 newsrc = gen_rtx_UNSPEC (SImode,
6423 gen_rtvec (1, newsrc),
6424 UNSPEC_MOVA);
6426 else if (GET_CODE (src) == UNSPEC_VOLATILE
6427 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6429 newsrc = XVECEXP (src, 0, 0);
6430 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6431 INSN_CODE (scan) = -1;
6432 continue;
6434 else
6436 lab = add_constant (src, mode, 0);
6437 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6438 newsrc = gen_const_mem (mode, newsrc);
6440 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6441 INSN_CODE (scan) = -1;
6444 dump_table (need_aligned_label ? insn : 0, barrier);
6445 insn = barrier;
6448 free_alloc_pool (label_ref_list_pool);
6449 for (insn = first; insn; insn = NEXT_INSN (insn))
6450 PUT_MODE (insn, VOIDmode);
6452 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6453 INSN_ADDRESSES_FREE ();
6454 split_branches (first);
6456 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6457 also has an effect on the register that holds the address of the sfunc.
6458 Insert an extra dummy insn in front of each sfunc that pretends to
6459 use this register. */
6460 if (flag_delayed_branch)
6462 for (insn = first; insn; insn = NEXT_INSN (insn))
6464 rtx reg = sfunc_uses_reg (insn);
6466 if (! reg)
6467 continue;
6468 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6471 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6474 /* Return the UID of the insn that follows the specified label. */
6476 get_dest_uid (rtx label, int max_uid)
6478 rtx_insn *dest = next_real_insn (label);
6479 int dest_uid;
6480 if (! dest)
6481 /* This can happen for an undefined label. */
6482 return 0;
6483 dest_uid = INSN_UID (dest);
6484 /* If this is a newly created branch redirection blocking instruction,
6485 we cannot index the branch_uid or insn_addresses arrays with its
6486 uid. But then, we won't need to, because the actual destination is
6487 the following branch. */
6488 while (dest_uid >= max_uid)
6490 dest = NEXT_INSN (dest);
6491 dest_uid = INSN_UID (dest);
6493 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6494 return 0;
6495 return dest_uid;
6498 /* Split condbranches that are out of range. Also add clobbers for
6499 scratch registers that are needed in far jumps.
6500 We do this before delay slot scheduling, so that it can take our
6501 newly created instructions into account. It also allows us to
6502 find branches with common targets more easily. */
6503 static void
6504 split_branches (rtx_insn *first)
6506 rtx_insn *insn;
6507 struct far_branch **uid_branch, *far_branch_list = 0;
6508 int max_uid = get_max_uid ();
6509 int ok;
6511 /* Find out which branches are out of range. */
6512 shorten_branches (first);
6514 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6515 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6517 for (insn = first; insn; insn = NEXT_INSN (insn))
6518 if (! INSN_P (insn))
6519 continue;
6520 else if (insn->deleted ())
6522 /* Shorten_branches would split this instruction again,
6523 so transform it into a note. */
6524 SET_INSN_DELETED (insn);
6526 else if (JUMP_P (insn))
6528 enum attr_type type = get_attr_type (insn);
6529 if (type == TYPE_CBRANCH)
6531 rtx_insn *next, *beyond;
6533 if (get_attr_length (insn) > 4)
6535 rtx src = SET_SRC (PATTERN (insn));
6536 rtx olabel = XEXP (XEXP (src, 1), 0);
6537 int addr = INSN_ADDRESSES (INSN_UID (insn));
6538 rtx_insn *label = 0;
6539 int dest_uid = get_dest_uid (olabel, max_uid);
6540 struct far_branch *bp = uid_branch[dest_uid];
6542 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6543 the label if the LABEL_NUSES count drops to zero. There is
6544 always a jump_optimize pass that sets these values, but it
6545 proceeds to delete unreferenced code, and then if not
6546 optimizing, to un-delete the deleted instructions, thus
6547 leaving labels with too low uses counts. */
6548 if (! optimize)
6550 JUMP_LABEL (insn) = olabel;
6551 LABEL_NUSES (olabel)++;
6553 if (! bp)
6555 bp = (struct far_branch *) alloca (sizeof *bp);
6556 uid_branch[dest_uid] = bp;
6557 bp->prev = far_branch_list;
6558 far_branch_list = bp;
6559 bp->far_label = as_a <rtx_insn *> (
6560 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6561 0));
6562 LABEL_NUSES (bp->far_label)++;
6564 else
6566 label = bp->near_label;
6567 if (! label && bp->address - addr >= CONDJUMP_MIN)
6569 rtx_insn *block = bp->insert_place;
6571 if (GET_CODE (PATTERN (block)) == RETURN)
6572 block = PREV_INSN (block);
6573 else
6574 block = gen_block_redirect (block,
6575 bp->address, 2);
6576 label = emit_label_after (gen_label_rtx (),
6577 PREV_INSN (block));
6578 bp->near_label = label;
6580 else if (label && ! NEXT_INSN (label))
6582 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6583 bp->insert_place = insn;
6584 else
6585 gen_far_branch (bp);
6588 if (! label
6589 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6591 bp->near_label = label = gen_label_rtx ();
6592 bp->insert_place = insn;
6593 bp->address = addr;
6595 ok = redirect_jump (insn, label, 0);
6596 gcc_assert (ok);
6598 else
6600 /* get_attr_length (insn) == 2 */
6601 /* Check if we have a pattern where reorg wants to redirect
6602 the branch to a label from an unconditional branch that
6603 is too far away. */
6604 /* We can't use JUMP_LABEL here because it might be undefined
6605 when not optimizing. */
6606 /* A syntax error might cause beyond to be NULL_RTX. */
6607 beyond
6608 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6609 0));
6611 if (beyond
6612 && (JUMP_P (beyond)
6613 || ((beyond = next_active_insn (beyond))
6614 && JUMP_P (beyond)))
6615 && GET_CODE (PATTERN (beyond)) == SET
6616 && recog_memoized (beyond) == CODE_FOR_jump_compact
6617 && ((INSN_ADDRESSES
6618 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6619 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6620 > 252 + 258 + 2))
6621 gen_block_redirect (beyond,
6622 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6625 next = next_active_insn (insn);
6627 if (next
6628 && (JUMP_P (next)
6629 || ((next = next_active_insn (next))
6630 && JUMP_P (next)))
6631 && GET_CODE (PATTERN (next)) == SET
6632 && recog_memoized (next) == CODE_FOR_jump_compact
6633 && ((INSN_ADDRESSES
6634 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6635 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6636 > 252 + 258 + 2))
6637 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6639 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6641 int addr = INSN_ADDRESSES (INSN_UID (insn));
6642 rtx_insn *far_label = 0;
6643 int dest_uid = 0;
6644 struct far_branch *bp;
6646 if (type == TYPE_JUMP)
6648 far_label = as_a <rtx_insn *> (
6649 XEXP (SET_SRC (PATTERN (insn)), 0));
6650 dest_uid = get_dest_uid (far_label, max_uid);
6651 if (! dest_uid)
6653 /* Parse errors can lead to labels outside
6654 the insn stream. */
6655 if (! NEXT_INSN (far_label))
6656 continue;
6658 if (! optimize)
6660 JUMP_LABEL (insn) = far_label;
6661 LABEL_NUSES (far_label)++;
6663 redirect_jump (insn, ret_rtx, 1);
6664 far_label = 0;
6667 bp = uid_branch[dest_uid];
6668 if (! bp)
6670 bp = (struct far_branch *) alloca (sizeof *bp);
6671 uid_branch[dest_uid] = bp;
6672 bp->prev = far_branch_list;
6673 far_branch_list = bp;
6674 bp->near_label = 0;
6675 bp->far_label = far_label;
6676 if (far_label)
6677 LABEL_NUSES (far_label)++;
6679 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6680 if (addr - bp->address <= CONDJUMP_MAX)
6681 emit_label_after (bp->near_label, PREV_INSN (insn));
6682 else
6684 gen_far_branch (bp);
6685 bp->near_label = 0;
6687 else
6688 bp->near_label = 0;
6689 bp->address = addr;
6690 bp->insert_place = insn;
6691 if (! far_label)
6692 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6693 else
6694 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6697 /* Generate all pending far branches,
6698 and free our references to the far labels. */
6699 while (far_branch_list)
6701 if (far_branch_list->near_label
6702 && ! NEXT_INSN (far_branch_list->near_label))
6703 gen_far_branch (far_branch_list);
6704 if (optimize
6705 && far_branch_list->far_label
6706 && ! --LABEL_NUSES (far_branch_list->far_label))
6707 delete_insn (far_branch_list->far_label);
6708 far_branch_list = far_branch_list->prev;
6711 /* Instruction length information is no longer valid due to the new
6712 instructions that have been generated. */
6713 init_insn_lengths ();
6716 /* Dump out instruction addresses, which is useful for debugging the
6717 constant pool table stuff.
6719 If relaxing, output the label and pseudo-ops used to link together
6720 calls and the instruction which set the registers.
6722 ??? The addresses printed by this routine for insns are nonsense for
6723 insns which are inside of a sequence where none of the inner insns have
6724 variable length. This is because the second pass of shorten_branches
6725 does not bother to update them. */
6726 void
6727 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6728 int noperands ATTRIBUTE_UNUSED)
6730 if (TARGET_DUMPISIZE)
6731 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6733 if (TARGET_RELAX)
6735 rtx note;
6737 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6738 if (note)
6740 rtx pattern;
6742 pattern = PATTERN (insn);
6743 if (GET_CODE (pattern) == PARALLEL)
6744 pattern = XVECEXP (pattern, 0, 0);
6745 switch (GET_CODE (pattern))
6747 case SET:
6748 if (GET_CODE (SET_SRC (pattern)) != CALL
6749 && get_attr_type (insn) != TYPE_SFUNC)
6751 targetm.asm_out.internal_label
6752 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6753 break;
6755 /* else FALLTHROUGH */
6756 case CALL:
6757 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6758 CODE_LABEL_NUMBER (XEXP (note, 0)));
6759 break;
6761 default:
6762 gcc_unreachable ();
6768 /* Dump out any constants accumulated in the final pass. These will
6769 only be labels. */
6770 const char *
6771 output_jump_label_table (void)
6773 int i;
6775 if (pool_size)
6777 fprintf (asm_out_file, "\t.align 2\n");
6778 for (i = 0; i < pool_size; i++)
6780 pool_node *p = &pool_vector[i];
6782 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6783 CODE_LABEL_NUMBER (p->label));
6784 output_asm_insn (".long %O0", &p->value);
6786 pool_size = 0;
6789 return "";
6792 /* A full frame looks like:
6794 arg-5
6795 arg-4
6796 [ if current_function_anonymous_args
6797 arg-3
6798 arg-2
6799 arg-1
6800 arg-0 ]
6801 saved-fp
6802 saved-r10
6803 saved-r11
6804 saved-r12
6805 saved-pr
6806 local-n
6808 local-1
6809 local-0 <- fp points here.
6811 Number of bytes pushed for anonymous args, used to pass information
6812 between expand_prologue and expand_epilogue.
6814 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6815 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6816 for an epilogue and a negative value means that it's for a sibcall
6817 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6818 all the registers that are about to be restored, and hence dead. */
6819 static void
6820 output_stack_adjust (int size, rtx reg, int epilogue_p,
6821 HARD_REG_SET *live_regs_mask, bool frame_p)
6823 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6824 if (size)
6826 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6828 /* This test is bogus, as output_stack_adjust is used to re-align the
6829 stack. */
6830 #if 0
6831 gcc_assert (!(size % align));
6832 #endif
6834 if (CONST_OK_FOR_ADD (size))
6835 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6836 /* Try to do it with two partial adjustments; however, we must make
6837 sure that the stack is properly aligned at all times, in case
6838 an interrupt occurs between the two partial adjustments. */
6839 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6840 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6842 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6843 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6845 else
6847 rtx const_reg;
6848 rtx insn;
6849 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6850 int i;
6852 /* If TEMP is invalid, we could temporarily save a general
6853 register to MACL. However, there is currently no need
6854 to handle this case, so just die when we see it. */
6855 if (epilogue_p < 0
6856 || current_function_interrupt
6857 || ! call_really_used_regs[temp] || fixed_regs[temp])
6858 temp = -1;
6859 if (temp < 0 && ! current_function_interrupt
6860 && (TARGET_SHMEDIA || epilogue_p >= 0))
6862 HARD_REG_SET temps;
6863 COPY_HARD_REG_SET (temps, call_used_reg_set);
6864 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6865 if (epilogue_p > 0)
6867 int nreg = 0;
6868 if (crtl->return_rtx)
6870 machine_mode mode;
6871 mode = GET_MODE (crtl->return_rtx);
6872 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6873 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6875 for (i = 0; i < nreg; i++)
6876 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6877 if (crtl->calls_eh_return)
6879 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6880 for (i = 0; i <= 3; i++)
6881 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6884 if (TARGET_SHMEDIA && epilogue_p < 0)
6885 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6886 CLEAR_HARD_REG_BIT (temps, i);
6887 if (epilogue_p <= 0)
6889 for (i = FIRST_PARM_REG;
6890 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6891 CLEAR_HARD_REG_BIT (temps, i);
6892 if (cfun->static_chain_decl != NULL)
6893 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6895 temp = scavenge_reg (&temps);
6897 if (temp < 0 && live_regs_mask)
6899 HARD_REG_SET temps;
6901 COPY_HARD_REG_SET (temps, *live_regs_mask);
6902 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6903 temp = scavenge_reg (&temps);
6905 if (temp < 0)
6907 rtx adj_reg, tmp_reg, mem;
6909 /* If we reached here, the most likely case is the (sibcall)
6910 epilogue for non SHmedia. Put a special push/pop sequence
6911 for such case as the last resort. This looks lengthy but
6912 would not be problem because it seems to be very
6913 rare. */
6915 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6918 /* ??? There is still the slight possibility that r4 or
6919 r5 have been reserved as fixed registers or assigned
6920 as global registers, and they change during an
6921 interrupt. There are possible ways to handle this:
6923 - If we are adjusting the frame pointer (r14), we can do
6924 with a single temp register and an ordinary push / pop
6925 on the stack.
6926 - Grab any call-used or call-saved registers (i.e. not
6927 fixed or globals) for the temps we need. We might
6928 also grab r14 if we are adjusting the stack pointer.
6929 If we can't find enough available registers, issue
6930 a diagnostic and die - the user must have reserved
6931 way too many registers.
6932 But since all this is rather unlikely to happen and
6933 would require extra testing, we just die if r4 / r5
6934 are not available. */
6935 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6936 && !global_regs[4] && !global_regs[5]);
6938 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6939 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6940 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6941 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6942 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6943 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6944 emit_move_insn (mem, tmp_reg);
6945 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6946 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6947 emit_move_insn (mem, tmp_reg);
6948 emit_move_insn (reg, adj_reg);
6949 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6950 emit_move_insn (adj_reg, mem);
6951 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6952 emit_move_insn (tmp_reg, mem);
6953 /* Tell flow the insns that pop r4/r5 aren't dead. */
6954 emit_use (tmp_reg);
6955 emit_use (adj_reg);
6956 return;
6958 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6960 /* If SIZE is negative, subtract the positive value.
6961 This sometimes allows a constant pool entry to be shared
6962 between prologue and epilogue code. */
6963 if (size < 0)
6965 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6966 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6968 else
6970 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6971 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6973 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6974 gen_rtx_SET (VOIDmode, reg,
6975 gen_rtx_PLUS (SImode, reg,
6976 GEN_INT (size))));
6981 /* Emit the specified insn and mark it as frame related.
6982 FIXME: Rename this to emit_frame_insn. */
6983 static rtx_insn *
6984 frame_insn (rtx x)
6986 rtx_insn *insn = emit_insn (x);
6987 RTX_FRAME_RELATED_P (insn) = 1;
6988 return insn;
6991 /* Output RTL to push register RN onto the stack. */
6992 static rtx
6993 push (int rn)
6995 rtx x;
6996 if (rn == FPUL_REG)
6997 x = gen_push_fpul ();
6998 else if (rn == FPSCR_REG)
6999 x = gen_push_fpscr ();
7000 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7001 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7003 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7004 return NULL_RTX;
7005 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
7007 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7008 x = gen_push_e (gen_rtx_REG (SFmode, rn));
7009 else
7010 x = gen_push (gen_rtx_REG (SImode, rn));
7012 x = frame_insn (x);
7013 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7014 return x;
7017 /* Output RTL to pop register RN from the stack. */
7018 static void
7019 pop (int rn)
7021 rtx x, sp_reg, reg;
7022 if (rn == FPUL_REG)
7023 x = gen_pop_fpul ();
7024 else if (rn == FPSCR_REG)
7025 x = gen_pop_fpscr ();
7026 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7027 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7029 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7030 return;
7031 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7033 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7034 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7035 else
7036 x = gen_pop (gen_rtx_REG (SImode, rn));
7038 x = emit_insn (x);
7040 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7041 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7042 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7043 : SET_DEST (PATTERN (x)));
7044 add_reg_note (x, REG_CFA_RESTORE, reg);
7045 add_reg_note (x, REG_CFA_ADJUST_CFA,
7046 gen_rtx_SET (SImode, sp_reg,
7047 plus_constant (SImode, sp_reg,
7048 GET_MODE_SIZE (GET_MODE (reg)))));
7049 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7050 RTX_FRAME_RELATED_P (x) = 1;
7053 /* Generate code to push the regs specified in the mask. */
7054 static void
7055 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7057 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7058 int skip_fpscr = 0;
7060 /* Push PR last; this gives better latencies after the prologue, and
7061 candidates for the return delay slot when there are no general
7062 registers pushed. */
7063 for (; i < FIRST_PSEUDO_REGISTER; i++)
7065 /* If this is an interrupt handler, and the SZ bit varies,
7066 and we have to push any floating point register, we need
7067 to switch to the correct precision first. */
7068 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7069 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7071 HARD_REG_SET unsaved;
7073 push (FPSCR_REG);
7074 COMPL_HARD_REG_SET (unsaved, *mask);
7075 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7076 skip_fpscr = 1;
7078 if (i != PR_REG
7079 && (i != FPSCR_REG || ! skip_fpscr)
7080 && TEST_HARD_REG_BIT (*mask, i))
7082 /* If the ISR has RESBANK attribute assigned, don't push any of
7083 the following registers - R0-R14, MACH, MACL and GBR. */
7084 if (! (sh_cfun_resbank_handler_p ()
7085 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7086 || i == MACH_REG
7087 || i == MACL_REG
7088 || i == GBR_REG)))
7089 push (i);
7093 /* Push banked registers last to improve delay slot opportunities. */
7094 if (interrupt_handler)
7096 bool use_movml = false;
7098 if (TARGET_SH2A)
7100 unsigned int count = 0;
7102 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7103 if (TEST_HARD_REG_BIT (*mask, i))
7104 count++;
7105 else
7106 break;
7108 /* Use movml when all banked registers are pushed. */
7109 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7110 use_movml = true;
7113 if (sh_cfun_resbank_handler_p ())
7114 ; /* Do nothing. */
7115 else if (use_movml)
7117 rtx x, mem, reg, set;
7118 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7120 /* We must avoid scheduling multiple store insn with another
7121 insns. */
7122 emit_insn (gen_blockage ());
7123 x = gen_movml_push_banked (sp_reg);
7124 x = frame_insn (x);
7125 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7127 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7128 reg = gen_rtx_REG (SImode, i);
7129 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7132 set = gen_rtx_SET (SImode, sp_reg,
7133 plus_constant (Pmode, sp_reg, - 32));
7134 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7135 emit_insn (gen_blockage ());
7137 else
7138 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7139 if (TEST_HARD_REG_BIT (*mask, i))
7140 push (i);
7143 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7144 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7145 push (PR_REG);
7148 /* Calculate how much extra space is needed to save all callee-saved
7149 target registers.
7150 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7151 static int
7152 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7154 int reg;
7155 int stack_space = 0;
7156 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7158 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7159 if ((! call_really_used_regs[reg] || interrupt_handler)
7160 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7161 /* Leave space to save this target register on the stack,
7162 in case target register allocation wants to use it. */
7163 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7164 return stack_space;
7167 /* Decide whether we should reserve space for callee-save target registers,
7168 in case target register allocation wants to use them. REGS_SAVED is
7169 the space, in bytes, that is already required for register saves.
7170 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7171 static int
7172 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7173 HARD_REG_SET *live_regs_mask)
7175 if (optimize_size)
7176 return 0;
7177 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7180 /* Decide how much space to reserve for callee-save target registers
7181 in case target register allocation wants to use them.
7182 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7183 static int
7184 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7186 if (shmedia_space_reserved_for_target_registers)
7187 return shmedia_target_regs_stack_space (live_regs_mask);
7188 else
7189 return 0;
7192 /* Work out the registers which need to be saved, both as a mask and a
7193 count of saved words. Return the count.
7195 If doing a pragma interrupt function, then push all regs used by the
7196 function, and if we call another function (we can tell by looking at PR),
7197 make sure that all the regs it clobbers are safe too. */
7198 static int
7199 calc_live_regs (HARD_REG_SET *live_regs_mask)
7201 unsigned int reg;
7202 int count;
7203 tree attrs;
7204 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7205 bool nosave_low_regs;
7206 int pr_live, has_call;
7208 attrs = DECL_ATTRIBUTES (current_function_decl);
7209 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7210 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7211 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7212 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7214 CLEAR_HARD_REG_SET (*live_regs_mask);
7215 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7216 && df_regs_ever_live_p (FPSCR_REG))
7217 target_flags &= ~MASK_FPU_SINGLE;
7218 /* If we can save a lot of saves by switching to double mode, do that. */
7219 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7220 && TARGET_FPU_SINGLE)
7221 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7222 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7223 && (! call_really_used_regs[reg]
7224 || interrupt_handler)
7225 && ++count > 2)
7227 target_flags &= ~MASK_FPU_SINGLE;
7228 break;
7230 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7231 knows how to use it. That means the pseudo originally allocated for
7232 the initial value can become the PR_MEDIA_REG hard register, as seen for
7233 execute/20010122-1.c:test9. */
7234 if (TARGET_SHMEDIA)
7235 /* ??? this function is called from initial_elimination_offset, hence we
7236 can't use the result of sh_media_register_for_return here. */
7237 pr_live = sh_pr_n_sets ();
7238 else
7240 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7241 pr_live = (pr_initial
7242 ? (!REG_P (pr_initial)
7243 || REGNO (pr_initial) != (PR_REG))
7244 : df_regs_ever_live_p (PR_REG));
7245 /* For Shcompact, if not optimizing, we end up with a memory reference
7246 using the return address pointer for __builtin_return_address even
7247 though there is no actual need to put the PR register on the stack. */
7248 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7250 /* Force PR to be live if the prologue has to call the SHmedia
7251 argument decoder or register saver. */
7252 if (TARGET_SHCOMPACT
7253 && ((crtl->args.info.call_cookie
7254 & ~ CALL_COOKIE_RET_TRAMP (1))
7255 || crtl->saves_all_registers))
7256 pr_live = 1;
7257 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7258 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7260 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7261 ? pr_live
7262 : interrupt_handler
7263 ? (/* Need to save all the regs ever live. */
7264 (df_regs_ever_live_p (reg)
7265 || (call_really_used_regs[reg]
7266 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7267 || reg == PIC_OFFSET_TABLE_REGNUM)
7268 && has_call)
7269 || (TARGET_SHMEDIA && has_call
7270 && REGISTER_NATURAL_MODE (reg) == SImode
7271 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7272 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7273 && reg != RETURN_ADDRESS_POINTER_REGNUM
7274 && reg != T_REG && reg != GBR_REG
7275 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7276 /* Push fpscr only on targets which have FPU */
7277 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7278 : (/* Only push those regs which are used and need to be saved. */
7279 (TARGET_SHCOMPACT
7280 && flag_pic
7281 && crtl->args.info.call_cookie
7282 && reg == PIC_OFFSET_TABLE_REGNUM)
7283 || (df_regs_ever_live_p (reg)
7284 && ((!call_really_used_regs[reg]
7285 && !(reg != PIC_OFFSET_TABLE_REGNUM
7286 && fixed_regs[reg] && call_used_regs[reg]))
7287 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7288 || (crtl->calls_eh_return
7289 && (reg == EH_RETURN_DATA_REGNO (0)
7290 || reg == EH_RETURN_DATA_REGNO (1)
7291 || reg == EH_RETURN_DATA_REGNO (2)
7292 || reg == EH_RETURN_DATA_REGNO (3)))
7293 || ((reg == MACL_REG || reg == MACH_REG)
7294 && df_regs_ever_live_p (reg)
7295 && sh_cfun_attr_renesas_p ())
7298 SET_HARD_REG_BIT (*live_regs_mask, reg);
7299 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7301 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7302 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7304 if (FP_REGISTER_P (reg))
7306 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7308 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7309 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7312 else if (XD_REGISTER_P (reg))
7314 /* Must switch to double mode to access these registers. */
7315 target_flags &= ~MASK_FPU_SINGLE;
7319 if (nosave_low_regs && reg == R8_REG)
7320 break;
7322 /* If we have a target register optimization pass after prologue / epilogue
7323 threading, we need to assume all target registers will be live even if
7324 they aren't now. */
7325 if (flag_branch_target_load_optimize2
7326 && TARGET_SAVE_ALL_TARGET_REGS
7327 && shmedia_space_reserved_for_target_registers)
7328 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7329 if ((! call_really_used_regs[reg] || interrupt_handler)
7330 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7332 SET_HARD_REG_BIT (*live_regs_mask, reg);
7333 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7335 /* If this is an interrupt handler, we don't have any call-clobbered
7336 registers we can conveniently use for target register save/restore.
7337 Make sure we save at least one general purpose register when we need
7338 to save target registers. */
7339 if (interrupt_handler
7340 && hard_reg_set_intersect_p (*live_regs_mask,
7341 reg_class_contents[TARGET_REGS])
7342 && ! hard_reg_set_intersect_p (*live_regs_mask,
7343 reg_class_contents[GENERAL_REGS]))
7345 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7346 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7349 return count;
7352 /* Code to generate prologue and epilogue sequences */
7354 /* PUSHED is the number of bytes that are being pushed on the
7355 stack for register saves. Return the frame size, padded
7356 appropriately so that the stack stays properly aligned. */
7357 static HOST_WIDE_INT
7358 rounded_frame_size (int pushed)
7360 HOST_WIDE_INT size = get_frame_size ();
7361 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7363 if (ACCUMULATE_OUTGOING_ARGS)
7364 size += crtl->outgoing_args_size;
7366 return ((size + pushed + align - 1) & -align) - pushed;
7369 /* Choose a call-clobbered target-branch register that remains
7370 unchanged along the whole function. We set it up as the return
7371 value in the prologue. */
7373 sh_media_register_for_return (void)
7375 int regno;
7376 int tr0_used;
7378 if (! crtl->is_leaf)
7379 return -1;
7380 if (lookup_attribute ("interrupt_handler",
7381 DECL_ATTRIBUTES (current_function_decl)))
7382 return -1;
7383 if (sh_cfun_interrupt_handler_p ())
7384 return -1;
7386 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7388 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7389 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7390 return regno;
7392 return -1;
7395 /* The maximum registers we need to save are:
7396 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7397 - 32 floating point registers (for each pair, we save none,
7398 one single precision value, or a double precision value).
7399 - 8 target registers
7400 - add 1 entry for a delimiter. */
7401 #define MAX_SAVED_REGS (62+32+8)
7403 typedef struct save_entry_s
7405 unsigned char reg;
7406 unsigned char mode;
7407 short offset;
7408 } save_entry;
7410 #define MAX_TEMPS 4
7412 /* There will be a delimiter entry with VOIDmode both at the start and the
7413 end of a filled in schedule. The end delimiter has the offset of the
7414 save with the smallest (i.e. most negative) offset. */
7415 typedef struct save_schedule_s
7417 save_entry entries[MAX_SAVED_REGS + 2];
7418 int temps[MAX_TEMPS+1];
7419 } save_schedule;
7421 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7422 use reverse order. Returns the last entry written to (not counting
7423 the delimiter). OFFSET_BASE is a number to be added to all offset
7424 entries. */
7425 static save_entry *
7426 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7427 int offset_base)
7429 int align, i;
7430 save_entry *entry = schedule->entries;
7431 int tmpx = 0;
7432 int offset;
7434 if (! current_function_interrupt)
7435 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7436 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7437 && ! FUNCTION_ARG_REGNO_P (i)
7438 && i != FIRST_RET_REG
7439 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7440 && ! (crtl->calls_eh_return
7441 && (i == EH_RETURN_STACKADJ_REGNO
7442 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7443 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7444 schedule->temps[tmpx++] = i;
7445 entry->reg = -1;
7446 entry->mode = VOIDmode;
7447 entry->offset = offset_base;
7448 entry++;
7449 /* We loop twice: first, we save 8-byte aligned registers in the
7450 higher addresses, that are known to be aligned. Then, we
7451 proceed to saving 32-bit registers that don't need 8-byte
7452 alignment.
7453 If this is an interrupt function, all registers that need saving
7454 need to be saved in full. moreover, we need to postpone saving
7455 target registers till we have saved some general purpose registers
7456 we can then use as scratch registers. */
7457 offset = offset_base;
7458 for (align = 1; align >= 0; align--)
7460 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7461 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7463 machine_mode mode = REGISTER_NATURAL_MODE (i);
7464 int reg = i;
7466 if (current_function_interrupt)
7468 if (TARGET_REGISTER_P (i))
7469 continue;
7470 if (GENERAL_REGISTER_P (i))
7471 mode = DImode;
7473 if (mode == SFmode && (i % 2) == 1
7474 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7475 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7477 mode = DFmode;
7478 i--;
7479 reg--;
7482 /* If we're doing the aligned pass and this is not aligned,
7483 or we're doing the unaligned pass and this is aligned,
7484 skip it. */
7485 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7486 != align)
7487 continue;
7489 if (current_function_interrupt
7490 && GENERAL_REGISTER_P (i)
7491 && tmpx < MAX_TEMPS)
7492 schedule->temps[tmpx++] = i;
7494 offset -= GET_MODE_SIZE (mode);
7495 entry->reg = i;
7496 entry->mode = mode;
7497 entry->offset = offset;
7498 entry++;
7500 if (align && current_function_interrupt)
7501 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7502 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7504 offset -= GET_MODE_SIZE (DImode);
7505 entry->reg = i;
7506 entry->mode = DImode;
7507 entry->offset = offset;
7508 entry++;
7511 entry->reg = -1;
7512 entry->mode = VOIDmode;
7513 entry->offset = offset;
7514 schedule->temps[tmpx] = -1;
7515 return entry - 1;
7518 /* Expand code for the function prologue. */
7519 void
7520 sh_expand_prologue (void)
7522 HARD_REG_SET live_regs_mask;
7523 int d, i;
7524 int d_rounding = 0;
7525 int save_flags = target_flags;
7526 int pretend_args;
7527 int stack_usage;
7528 tree sp_switch_attr
7529 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7531 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7533 /* We have pretend args if we had an object sent partially in registers
7534 and partially on the stack, e.g. a large structure. */
7535 pretend_args = crtl->args.pretend_args_size;
7536 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7537 && (NPARM_REGS(SImode)
7538 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7539 pretend_args = 0;
7541 output_stack_adjust (-pretend_args
7542 - crtl->args.info.stack_regs * 8,
7543 stack_pointer_rtx, 0, NULL, true);
7544 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7546 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7547 /* We're going to use the PIC register to load the address of the
7548 incoming-argument decoder and/or of the return trampoline from
7549 the GOT, so make sure the PIC register is preserved and
7550 initialized. */
7551 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7553 if (TARGET_SHCOMPACT
7554 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7556 int reg;
7558 /* First, make all registers with incoming arguments that will
7559 be pushed onto the stack live, so that register renaming
7560 doesn't overwrite them. */
7561 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7562 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7563 >= NPARM_REGS (SImode) - reg)
7564 for (; reg < NPARM_REGS (SImode); reg++)
7565 emit_insn (gen_shcompact_preserve_incoming_args
7566 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7567 else if (CALL_COOKIE_INT_REG_GET
7568 (crtl->args.info.call_cookie, reg) == 1)
7569 emit_insn (gen_shcompact_preserve_incoming_args
7570 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7572 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7573 stack_pointer_rtx);
7574 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7575 GEN_INT (crtl->args.info.call_cookie));
7576 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7577 gen_rtx_REG (SImode, R0_REG));
7579 else if (TARGET_SHMEDIA)
7581 int tr = sh_media_register_for_return ();
7583 if (tr >= 0)
7584 emit_move_insn (gen_rtx_REG (DImode, tr),
7585 gen_rtx_REG (DImode, PR_MEDIA_REG));
7588 /* Emit the code for SETUP_VARARGS. */
7589 if (cfun->stdarg)
7591 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7593 /* Push arg regs as if they'd been provided by caller in stack. */
7594 for (i = 0; i < NPARM_REGS(SImode); i++)
7596 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7598 if (i >= (NPARM_REGS(SImode)
7599 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7601 break;
7602 push (rn);
7603 stack_usage += GET_MODE_SIZE (SImode);
7608 /* If we're supposed to switch stacks at function entry, do so now. */
7609 if (sp_switch_attr)
7611 rtx lab, newsrc;
7612 /* The argument specifies a variable holding the address of the
7613 stack the interrupt function should switch to/from at entry/exit. */
7614 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7615 const char *s
7616 = ggc_strdup (TREE_STRING_POINTER (arg));
7617 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7619 lab = add_constant (sp_switch, SImode, 0);
7620 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7622 emit_insn (gen_sp_switch_1 (newsrc));
7625 d = calc_live_regs (&live_regs_mask);
7626 /* ??? Maybe we could save some switching if we can move a mode switch
7627 that already happens to be at the function start into the prologue. */
7628 if (target_flags != save_flags && ! current_function_interrupt)
7629 emit_insn (gen_toggle_sz ());
7631 if (TARGET_SH5)
7633 int offset_base, offset;
7634 rtx r0 = NULL_RTX;
7635 int offset_in_r0 = -1;
7636 int sp_in_r0 = 0;
7637 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7638 int total_size, save_size;
7639 save_schedule schedule;
7640 save_entry *entry;
7641 int *tmp_pnt;
7643 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7644 && ! current_function_interrupt)
7645 r0 = gen_rtx_REG (Pmode, R0_REG);
7647 /* D is the actual number of bytes that we need for saving registers,
7648 however, in initial_elimination_offset we have committed to using
7649 an additional TREGS_SPACE amount of bytes - in order to keep both
7650 addresses to arguments supplied by the caller and local variables
7651 valid, we must keep this gap. Place it between the incoming
7652 arguments and the actually saved registers in a bid to optimize
7653 locality of reference. */
7654 total_size = d + tregs_space;
7655 total_size += rounded_frame_size (total_size);
7656 save_size = total_size - rounded_frame_size (d);
7657 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7658 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7659 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7661 /* If adjusting the stack in a single step costs nothing extra, do so.
7662 I.e. either if a single addi is enough, or we need a movi anyway,
7663 and we don't exceed the maximum offset range (the test for the
7664 latter is conservative for simplicity). */
7665 if (TARGET_SHMEDIA
7666 && (CONST_OK_FOR_I10 (-total_size)
7667 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7668 && total_size <= 2044)))
7669 d_rounding = total_size - save_size;
7671 offset_base = d + d_rounding;
7673 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7674 0, NULL, true);
7675 stack_usage += save_size + d_rounding;
7677 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7678 tmp_pnt = schedule.temps;
7679 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7681 machine_mode mode = (machine_mode) entry->mode;
7682 unsigned int reg = entry->reg;
7683 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7684 rtx orig_reg_rtx;
7686 offset = entry->offset;
7688 reg_rtx = gen_rtx_REG (mode, reg);
7690 mem_rtx = gen_frame_mem (mode,
7691 gen_rtx_PLUS (Pmode,
7692 stack_pointer_rtx,
7693 GEN_INT (offset)));
7695 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7697 gcc_assert (r0);
7698 mem_rtx = NULL_RTX;
7701 if (HAVE_PRE_DECREMENT
7702 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7703 || mem_rtx == NULL_RTX
7704 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7706 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7708 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7709 pre_dec = NULL_RTX;
7710 else
7712 mem_rtx = NULL_RTX;
7713 offset += GET_MODE_SIZE (mode);
7717 if (mem_rtx != NULL_RTX)
7718 goto addr_ok;
7720 if (offset_in_r0 == -1)
7722 emit_move_insn (r0, GEN_INT (offset));
7723 offset_in_r0 = offset;
7725 else if (offset != offset_in_r0)
7727 emit_move_insn (r0,
7728 gen_rtx_PLUS
7729 (Pmode, r0,
7730 GEN_INT (offset - offset_in_r0)));
7731 offset_in_r0 += offset - offset_in_r0;
7734 if (pre_dec != NULL_RTX)
7736 if (! sp_in_r0)
7738 emit_move_insn (r0,
7739 gen_rtx_PLUS
7740 (Pmode, r0, stack_pointer_rtx));
7741 sp_in_r0 = 1;
7744 offset -= GET_MODE_SIZE (mode);
7745 offset_in_r0 -= GET_MODE_SIZE (mode);
7747 mem_rtx = pre_dec;
7749 else if (sp_in_r0)
7750 mem_rtx = gen_frame_mem (mode, r0);
7751 else
7752 mem_rtx = gen_frame_mem (mode,
7753 gen_rtx_PLUS (Pmode,
7754 stack_pointer_rtx,
7755 r0));
7757 /* We must not use an r0-based address for target-branch
7758 registers or for special registers without pre-dec
7759 memory addresses, since we store their values in r0
7760 first. */
7761 gcc_assert (!TARGET_REGISTER_P (reg)
7762 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7763 || mem_rtx == pre_dec));
7765 addr_ok:
7766 orig_reg_rtx = reg_rtx;
7767 if (TARGET_REGISTER_P (reg)
7768 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7769 && mem_rtx != pre_dec))
7771 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7773 emit_move_insn (tmp_reg, reg_rtx);
7775 if (REGNO (tmp_reg) == R0_REG)
7777 offset_in_r0 = -1;
7778 sp_in_r0 = 0;
7779 gcc_assert (!refers_to_regno_p
7780 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7783 if (*++tmp_pnt <= 0)
7784 tmp_pnt = schedule.temps;
7786 reg_rtx = tmp_reg;
7789 rtx insn;
7791 /* Mark as interesting for dwarf cfi generator */
7792 insn = emit_move_insn (mem_rtx, reg_rtx);
7793 RTX_FRAME_RELATED_P (insn) = 1;
7794 /* If we use an intermediate register for the save, we can't
7795 describe this exactly in cfi as a copy of the to-be-saved
7796 register into the temporary register and then the temporary
7797 register on the stack, because the temporary register can
7798 have a different natural size than the to-be-saved register.
7799 Thus, we gloss over the intermediate copy and pretend we do
7800 a direct save from the to-be-saved register. */
7801 if (REGNO (reg_rtx) != reg)
7803 rtx set;
7805 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7806 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7809 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7811 rtx reg_rtx = gen_rtx_REG (mode, reg);
7812 rtx set;
7813 rtx mem_rtx = gen_frame_mem (mode,
7814 gen_rtx_PLUS (Pmode,
7815 stack_pointer_rtx,
7816 GEN_INT (offset)));
7818 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7819 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7824 gcc_assert (entry->offset == d_rounding);
7826 else
7828 push_regs (&live_regs_mask, current_function_interrupt);
7829 stack_usage += d;
7832 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7833 emit_insn (gen_GOTaddr2picreg ());
7835 if (SHMEDIA_REGS_STACK_ADJUST ())
7837 /* This must NOT go through the PLT, otherwise mach and macl
7838 may be clobbered. */
7839 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7840 (TARGET_FPU_ANY
7841 ? "__GCC_push_shmedia_regs"
7842 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7843 emit_insn (gen_shmedia_save_restore_regs_compact
7844 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7847 if (target_flags != save_flags && ! current_function_interrupt)
7848 emit_insn (gen_toggle_sz ());
7850 target_flags = save_flags;
7852 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7853 stack_pointer_rtx, 0, NULL, true);
7854 stack_usage += rounded_frame_size (d) - d_rounding;
7856 if (frame_pointer_needed)
7857 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7859 if (TARGET_SHCOMPACT
7860 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7862 /* This must NOT go through the PLT, otherwise mach and macl
7863 may be clobbered. */
7864 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7865 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7866 emit_insn (gen_shcompact_incoming_args ());
7869 /* If we are profiling, make sure no instructions are scheduled before
7870 the call to mcount. Similarly if some call instructions are swapped
7871 before frame related insns, it'll confuse the unwinder because
7872 currently SH has no unwind info for function epilogues. */
7873 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7874 emit_insn (gen_blockage ());
7876 if (flag_stack_usage_info)
7877 current_function_static_stack_size = stack_usage;
7880 /* Expand code for the function epilogue. */
7881 void
7882 sh_expand_epilogue (bool sibcall_p)
7884 HARD_REG_SET live_regs_mask;
7885 int d, i;
7886 int d_rounding = 0;
7888 int save_flags = target_flags;
7889 int frame_size, save_size;
7890 int fpscr_deferred = 0;
7891 int e = sibcall_p ? -1 : 1;
7893 d = calc_live_regs (&live_regs_mask);
7895 save_size = d;
7896 frame_size = rounded_frame_size (d);
7898 if (TARGET_SH5)
7900 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7901 int total_size;
7902 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7903 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7904 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7906 total_size = d + tregs_space;
7907 total_size += rounded_frame_size (total_size);
7908 save_size = total_size - frame_size;
7910 /* If adjusting the stack in a single step costs nothing extra, do so.
7911 I.e. either if a single addi is enough, or we need a movi anyway,
7912 and we don't exceed the maximum offset range (the test for the
7913 latter is conservative for simplicity). */
7914 if (TARGET_SHMEDIA
7915 && ! frame_pointer_needed
7916 && (CONST_OK_FOR_I10 (total_size)
7917 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7918 && total_size <= 2044)))
7919 d_rounding = frame_size;
7921 frame_size -= d_rounding;
7924 if (frame_pointer_needed)
7926 /* We must avoid scheduling the epilogue with previous basic blocks.
7927 See PR/18032 and PR/40313. */
7928 emit_insn (gen_blockage ());
7929 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7930 &live_regs_mask, true);
7932 /* We must avoid moving the stack pointer adjustment past code
7933 which reads from the local frame, else an interrupt could
7934 occur after the SP adjustment and clobber data in the local
7935 frame. */
7936 emit_insn (gen_blockage ());
7937 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7939 else if (frame_size)
7941 /* We must avoid moving the stack pointer adjustment past code
7942 which reads from the local frame, else an interrupt could
7943 occur after the SP adjustment and clobber data in the local
7944 frame. */
7945 emit_insn (gen_blockage ());
7946 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7947 &live_regs_mask, true);
7950 if (SHMEDIA_REGS_STACK_ADJUST ())
7952 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7953 (TARGET_FPU_ANY
7954 ? "__GCC_pop_shmedia_regs"
7955 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7956 /* This must NOT go through the PLT, otherwise mach and macl
7957 may be clobbered. */
7958 emit_insn (gen_shmedia_save_restore_regs_compact
7959 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7962 /* Pop all the registers. */
7964 if (target_flags != save_flags && ! current_function_interrupt)
7965 emit_insn (gen_toggle_sz ());
7966 if (TARGET_SH5)
7968 int offset_base, offset;
7969 int offset_in_r0 = -1;
7970 int sp_in_r0 = 0;
7971 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7972 save_schedule schedule;
7973 save_entry *entry;
7974 int *tmp_pnt;
7976 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7977 offset_base = -entry[1].offset + d_rounding;
7978 tmp_pnt = schedule.temps;
7979 for (; entry->mode != VOIDmode; entry--)
7981 machine_mode mode = (machine_mode) entry->mode;
7982 int reg = entry->reg;
7983 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7985 offset = offset_base + entry->offset;
7986 reg_rtx = gen_rtx_REG (mode, reg);
7988 mem_rtx = gen_frame_mem (mode,
7989 gen_rtx_PLUS (Pmode,
7990 stack_pointer_rtx,
7991 GEN_INT (offset)));
7993 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7994 mem_rtx = NULL_RTX;
7996 if (HAVE_POST_INCREMENT
7997 && (offset == offset_in_r0
7998 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7999 && mem_rtx == NULL_RTX)
8000 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
8002 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
8004 if (!memory_address_p (mode, XEXP (post_inc, 0)))
8005 post_inc = NULL_RTX;
8006 else
8007 mem_rtx = NULL_RTX;
8010 if (mem_rtx != NULL_RTX)
8011 goto addr_ok;
8013 if (offset_in_r0 == -1)
8015 emit_move_insn (r0, GEN_INT (offset));
8016 offset_in_r0 = offset;
8018 else if (offset != offset_in_r0)
8020 emit_move_insn (r0,
8021 gen_rtx_PLUS
8022 (Pmode, r0,
8023 GEN_INT (offset - offset_in_r0)));
8024 offset_in_r0 += offset - offset_in_r0;
8027 if (post_inc != NULL_RTX)
8029 if (! sp_in_r0)
8031 emit_move_insn (r0,
8032 gen_rtx_PLUS
8033 (Pmode, r0, stack_pointer_rtx));
8034 sp_in_r0 = 1;
8037 mem_rtx = post_inc;
8039 offset_in_r0 += GET_MODE_SIZE (mode);
8041 else if (sp_in_r0)
8042 mem_rtx = gen_frame_mem (mode, r0);
8043 else
8044 mem_rtx = gen_frame_mem (mode,
8045 gen_rtx_PLUS (Pmode,
8046 stack_pointer_rtx,
8047 r0));
8049 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8050 || mem_rtx == post_inc);
8052 addr_ok:
8053 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8054 && mem_rtx != post_inc)
8056 emit_move_insn (r0, mem_rtx);
8057 mem_rtx = r0;
8059 else if (TARGET_REGISTER_P (reg))
8061 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8063 /* Give the scheduler a bit of freedom by using up to
8064 MAX_TEMPS registers in a round-robin fashion. */
8065 emit_move_insn (tmp_reg, mem_rtx);
8066 mem_rtx = tmp_reg;
8067 if (*++tmp_pnt < 0)
8068 tmp_pnt = schedule.temps;
8071 emit_move_insn (reg_rtx, mem_rtx);
8074 gcc_assert (entry->offset + offset_base == d + d_rounding);
8076 else /* ! TARGET_SH5 */
8078 int last_reg;
8080 save_size = 0;
8081 /* For an ISR with RESBANK attribute assigned, don't pop PR
8082 register. */
8083 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8084 && !sh_cfun_resbank_handler_p ())
8086 if (!frame_pointer_needed)
8087 emit_insn (gen_blockage ());
8088 pop (PR_REG);
8091 /* Banked registers are popped first to avoid being scheduled in the
8092 delay slot. RTE switches banks before the ds instruction. */
8093 if (current_function_interrupt)
8095 bool use_movml = false;
8097 if (TARGET_SH2A)
8099 unsigned int count = 0;
8101 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8102 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8103 count++;
8104 else
8105 break;
8107 /* Use movml when all banked register are poped. */
8108 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8109 use_movml = true;
8112 if (sh_cfun_resbank_handler_p ())
8113 ; /* Do nothing. */
8114 else if (use_movml)
8116 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8118 /* We must avoid scheduling multiple load insn with another
8119 insns. */
8120 emit_insn (gen_blockage ());
8121 emit_insn (gen_movml_pop_banked (sp_reg));
8122 emit_insn (gen_blockage ());
8124 else
8125 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8126 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8127 pop (i);
8129 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8131 else
8132 last_reg = FIRST_PSEUDO_REGISTER;
8134 for (i = 0; i < last_reg; i++)
8136 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8138 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8139 && hard_reg_set_intersect_p (live_regs_mask,
8140 reg_class_contents[DF_REGS]))
8141 fpscr_deferred = 1;
8142 /* For an ISR with RESBANK attribute assigned, don't pop
8143 following registers, R0-R14, MACH, MACL and GBR. */
8144 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8145 && ! (sh_cfun_resbank_handler_p ()
8146 && ((j >= FIRST_GENERAL_REG
8147 && j < LAST_GENERAL_REG)
8148 || j == MACH_REG
8149 || j == MACL_REG
8150 || j == GBR_REG)))
8151 pop (j);
8153 if (j == FIRST_FP_REG && fpscr_deferred)
8154 pop (FPSCR_REG);
8157 if (target_flags != save_flags && ! current_function_interrupt)
8158 emit_insn (gen_toggle_sz ());
8159 target_flags = save_flags;
8161 output_stack_adjust (crtl->args.pretend_args_size
8162 + save_size + d_rounding
8163 + crtl->args.info.stack_regs * 8,
8164 stack_pointer_rtx, e, NULL, true);
8166 if (crtl->calls_eh_return)
8167 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8168 EH_RETURN_STACKADJ_RTX));
8170 /* Switch back to the normal stack if necessary. */
8171 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8172 emit_insn (gen_sp_switch_2 ());
8174 /* Tell flow the insn that pops PR isn't dead. */
8175 /* PR_REG will never be live in SHmedia mode, and we don't need to
8176 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8177 by the return pattern. */
8178 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8179 emit_use (gen_rtx_REG (SImode, PR_REG));
8182 /* Emit code to change the current function's return address to RA.
8183 TEMP is available as a scratch register, if needed. */
8184 void
8185 sh_set_return_address (rtx ra, rtx tmp)
8187 HARD_REG_SET live_regs_mask;
8188 int d;
8189 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8190 int pr_offset;
8192 d = calc_live_regs (&live_regs_mask);
8194 /* If pr_reg isn't life, we can set it (or the register given in
8195 sh_media_register_for_return) directly. */
8196 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8198 rtx rr;
8200 if (TARGET_SHMEDIA)
8202 int rr_regno = sh_media_register_for_return ();
8204 if (rr_regno < 0)
8205 rr_regno = pr_reg;
8207 rr = gen_rtx_REG (DImode, rr_regno);
8209 else
8210 rr = gen_rtx_REG (SImode, pr_reg);
8212 emit_insn (GEN_MOV (rr, ra));
8213 /* Tell flow the register for return isn't dead. */
8214 emit_use (rr);
8215 return;
8218 if (TARGET_SH5)
8220 int offset;
8221 save_schedule schedule;
8222 save_entry *entry;
8224 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8225 offset = entry[1].offset;
8226 for (; entry->mode != VOIDmode; entry--)
8227 if (entry->reg == pr_reg)
8228 goto found;
8230 /* We can't find pr register. */
8231 gcc_unreachable ();
8233 found:
8234 offset = entry->offset - offset;
8235 pr_offset = (rounded_frame_size (d) + offset
8236 + SHMEDIA_REGS_STACK_ADJUST ());
8238 else
8239 pr_offset = rounded_frame_size (d);
8241 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8243 if (frame_pointer_needed)
8244 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8245 else
8246 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8248 tmp = gen_frame_mem (Pmode, tmp);
8249 emit_insn (GEN_MOV (tmp, ra));
8250 /* Tell this store isn't dead. */
8251 emit_use (tmp);
8254 /* Clear variables at function end. */
8255 static void
8256 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8257 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8261 static rtx
8262 sh_builtin_saveregs (void)
8264 /* First unnamed integer register. */
8265 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8266 /* Number of integer registers we need to save. */
8267 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8268 /* First unnamed SFmode float reg */
8269 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8270 /* Number of SFmode float regs to save. */
8271 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8272 rtx regbuf, fpregs;
8273 int bufsize, regno;
8274 alias_set_type alias_set;
8276 if (TARGET_SH5)
8278 if (n_intregs)
8280 int pushregs = n_intregs;
8282 while (pushregs < NPARM_REGS (SImode) - 1
8283 && (CALL_COOKIE_INT_REG_GET
8284 (crtl->args.info.call_cookie,
8285 NPARM_REGS (SImode) - pushregs)
8286 == 1))
8288 crtl->args.info.call_cookie
8289 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8290 - pushregs, 1);
8291 pushregs++;
8294 if (pushregs == NPARM_REGS (SImode))
8295 crtl->args.info.call_cookie
8296 |= (CALL_COOKIE_INT_REG (0, 1)
8297 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8298 else
8299 crtl->args.info.call_cookie
8300 |= CALL_COOKIE_STACKSEQ (pushregs);
8302 crtl->args.pretend_args_size += 8 * n_intregs;
8304 if (TARGET_SHCOMPACT)
8305 return const0_rtx;
8308 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8310 error ("__builtin_saveregs not supported by this subtarget");
8311 return const0_rtx;
8314 if (TARGET_SHMEDIA)
8315 n_floatregs = 0;
8317 /* Allocate block of memory for the regs. */
8318 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8319 Or can assign_stack_local accept a 0 SIZE argument? */
8320 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8322 if (TARGET_SHMEDIA)
8323 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8324 else if (n_floatregs & 1)
8326 rtx addr;
8328 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8329 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8330 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8331 regbuf = change_address (regbuf, BLKmode, addr);
8333 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8335 rtx addr, mask;
8337 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8338 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8339 XEXP (regbuf, 0), 4));
8340 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8341 emit_insn (gen_andsi3 (addr, addr, mask));
8342 regbuf = change_address (regbuf, BLKmode, addr);
8344 else
8345 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8346 alias_set = get_varargs_alias_set ();
8347 set_mem_alias_set (regbuf, alias_set);
8349 /* Save int args.
8350 This is optimized to only save the regs that are necessary. Explicitly
8351 named args need not be saved. */
8352 if (n_intregs > 0)
8353 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8354 adjust_address (regbuf, BLKmode,
8355 n_floatregs * UNITS_PER_WORD),
8356 n_intregs);
8358 if (TARGET_SHMEDIA)
8359 /* Return the address of the regbuf. */
8360 return XEXP (regbuf, 0);
8362 /* Save float args.
8363 This is optimized to only save the regs that are necessary. Explicitly
8364 named args need not be saved.
8365 We explicitly build a pointer to the buffer because it halves the insn
8366 count when not optimizing (otherwise the pointer is built for each reg
8367 saved).
8368 We emit the moves in reverse order so that we can use predecrement. */
8370 fpregs = copy_to_mode_reg (Pmode,
8371 plus_constant (Pmode, XEXP (regbuf, 0),
8372 n_floatregs * UNITS_PER_WORD));
8373 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8375 rtx mem;
8376 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8378 emit_insn (gen_addsi3 (fpregs, fpregs,
8379 GEN_INT (-2 * UNITS_PER_WORD)));
8380 mem = change_address (regbuf, DFmode, fpregs);
8381 emit_move_insn (mem,
8382 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8384 regno = first_floatreg;
8385 if (regno & 1)
8387 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8388 mem = change_address (regbuf, SFmode, fpregs);
8389 emit_move_insn (mem,
8390 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8391 + regno - SH_REG_MSW_OFFSET));
8394 else
8395 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8397 rtx mem;
8399 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8400 mem = change_address (regbuf, SFmode, fpregs);
8401 emit_move_insn (mem,
8402 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8405 /* Return the address of the regbuf. */
8406 return XEXP (regbuf, 0);
8409 /* Define the `__builtin_va_list' type for the ABI. */
8410 static tree
8411 sh_build_builtin_va_list (void)
8413 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8414 tree record, type_decl;
8416 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8417 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8418 return ptr_type_node;
8420 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8421 type_decl = build_decl (BUILTINS_LOCATION,
8422 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8424 f_next_o = build_decl (BUILTINS_LOCATION,
8425 FIELD_DECL, get_identifier ("__va_next_o"),
8426 ptr_type_node);
8427 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8428 FIELD_DECL,
8429 get_identifier ("__va_next_o_limit"),
8430 ptr_type_node);
8431 f_next_fp = build_decl (BUILTINS_LOCATION,
8432 FIELD_DECL, get_identifier ("__va_next_fp"),
8433 ptr_type_node);
8434 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8435 FIELD_DECL,
8436 get_identifier ("__va_next_fp_limit"),
8437 ptr_type_node);
8438 f_next_stack = build_decl (BUILTINS_LOCATION,
8439 FIELD_DECL, get_identifier ("__va_next_stack"),
8440 ptr_type_node);
8442 DECL_FIELD_CONTEXT (f_next_o) = record;
8443 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8444 DECL_FIELD_CONTEXT (f_next_fp) = record;
8445 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8446 DECL_FIELD_CONTEXT (f_next_stack) = record;
8448 TYPE_STUB_DECL (record) = type_decl;
8449 TYPE_NAME (record) = type_decl;
8450 TYPE_FIELDS (record) = f_next_o;
8451 DECL_CHAIN (f_next_o) = f_next_o_limit;
8452 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8453 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8454 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8456 layout_type (record);
8458 return record;
8461 /* Implement `va_start' for varargs and stdarg. */
8462 static void
8463 sh_va_start (tree valist, rtx nextarg)
8465 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8466 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8467 tree t, u;
8468 int nfp, nint;
8470 if (TARGET_SH5)
8472 expand_builtin_saveregs ();
8473 std_expand_builtin_va_start (valist, nextarg);
8474 return;
8477 if ((! TARGET_SH2E && ! TARGET_SH4)
8478 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8480 std_expand_builtin_va_start (valist, nextarg);
8481 return;
8484 f_next_o = TYPE_FIELDS (va_list_type_node);
8485 f_next_o_limit = DECL_CHAIN (f_next_o);
8486 f_next_fp = DECL_CHAIN (f_next_o_limit);
8487 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8488 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8490 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8491 NULL_TREE);
8492 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8493 valist, f_next_o_limit, NULL_TREE);
8494 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8495 NULL_TREE);
8496 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8497 valist, f_next_fp_limit, NULL_TREE);
8498 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8499 valist, f_next_stack, NULL_TREE);
8501 /* Call __builtin_saveregs. */
8502 u = make_tree (sizetype, expand_builtin_saveregs ());
8503 u = fold_convert (ptr_type_node, u);
8504 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8505 TREE_SIDE_EFFECTS (t) = 1;
8506 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8508 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8509 if (nfp < 8)
8510 nfp = 8 - nfp;
8511 else
8512 nfp = 0;
8513 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8514 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8515 TREE_SIDE_EFFECTS (t) = 1;
8516 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8518 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8519 TREE_SIDE_EFFECTS (t) = 1;
8520 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8522 nint = crtl->args.info.arg_count[SH_ARG_INT];
8523 if (nint < 4)
8524 nint = 4 - nint;
8525 else
8526 nint = 0;
8527 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8528 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8529 TREE_SIDE_EFFECTS (t) = 1;
8530 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8532 u = make_tree (ptr_type_node, nextarg);
8533 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8534 TREE_SIDE_EFFECTS (t) = 1;
8535 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8538 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8539 member, return it. */
8540 static tree
8541 find_sole_member (tree type)
8543 tree field, member = NULL_TREE;
8545 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8547 if (TREE_CODE (field) != FIELD_DECL)
8548 continue;
8549 if (!DECL_SIZE (field))
8550 return NULL_TREE;
8551 if (integer_zerop (DECL_SIZE (field)))
8552 continue;
8553 if (member)
8554 return NULL_TREE;
8555 member = field;
8557 return member;
8560 /* Implement `va_arg'. */
8561 static tree
8562 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8563 gimple_seq *post_p ATTRIBUTE_UNUSED)
8565 HOST_WIDE_INT size, rsize;
8566 tree tmp, pptr_type_node;
8567 tree addr, lab_over = NULL, result = NULL;
8568 bool pass_by_ref;
8569 tree eff_type;
8571 if (!VOID_TYPE_P (type))
8572 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8573 else
8574 pass_by_ref = false;
8576 if (pass_by_ref)
8577 type = build_pointer_type (type);
8579 size = int_size_in_bytes (type);
8580 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8581 pptr_type_node = build_pointer_type (ptr_type_node);
8583 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8584 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8586 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8587 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8588 int pass_as_float;
8589 tree lab_false;
8590 tree member;
8592 f_next_o = TYPE_FIELDS (va_list_type_node);
8593 f_next_o_limit = DECL_CHAIN (f_next_o);
8594 f_next_fp = DECL_CHAIN (f_next_o_limit);
8595 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8596 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8598 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8599 NULL_TREE);
8600 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8601 valist, f_next_o_limit, NULL_TREE);
8602 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8603 valist, f_next_fp, NULL_TREE);
8604 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8605 valist, f_next_fp_limit, NULL_TREE);
8606 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8607 valist, f_next_stack, NULL_TREE);
8609 /* Structures with a single member with a distinct mode are passed
8610 like their member. This is relevant if the latter has a REAL_TYPE
8611 or COMPLEX_TYPE type. */
8612 eff_type = type;
8613 while (TREE_CODE (eff_type) == RECORD_TYPE
8614 && (member = find_sole_member (eff_type))
8615 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8616 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8617 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8619 tree field_type = TREE_TYPE (member);
8621 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8622 eff_type = field_type;
8623 else
8625 gcc_assert ((TYPE_ALIGN (eff_type)
8626 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8627 || (TYPE_ALIGN (eff_type)
8628 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8629 break;
8633 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8635 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8636 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8637 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8638 && size <= 16));
8640 else
8642 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8645 addr = create_tmp_var (pptr_type_node);
8646 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8647 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8649 valist = build_simple_mem_ref (addr);
8651 if (pass_as_float)
8653 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
8654 tree cmp;
8655 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8657 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8658 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8660 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8661 tmp = next_fp_limit;
8662 if (size > 4 && !is_double)
8663 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8664 tmp = build2 (GE_EXPR, boolean_type_node,
8665 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8666 cmp = build3 (COND_EXPR, void_type_node, tmp,
8667 build1 (GOTO_EXPR, void_type_node,
8668 unshare_expr (lab_false)), NULL_TREE);
8669 if (!is_double)
8670 gimplify_and_add (cmp, pre_p);
8672 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8673 || (is_double || size == 16))
8675 tmp = fold_convert (sizetype, next_fp_tmp);
8676 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8677 size_int (UNITS_PER_WORD));
8678 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8679 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8681 if (is_double)
8682 gimplify_and_add (cmp, pre_p);
8684 #ifdef FUNCTION_ARG_SCmode_WART
8685 if (TYPE_MODE (eff_type) == SCmode
8686 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8688 tree subtype = TREE_TYPE (eff_type);
8689 tree real, imag;
8691 imag
8692 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8693 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8695 real
8696 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8697 real = get_initialized_tmp_var (real, pre_p, NULL);
8699 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8700 if (type != eff_type)
8701 result = build1 (VIEW_CONVERT_EXPR, type, result);
8702 result = get_initialized_tmp_var (result, pre_p, NULL);
8704 #endif /* FUNCTION_ARG_SCmode_WART */
8706 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8707 gimplify_and_add (tmp, pre_p);
8709 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8710 gimplify_and_add (tmp, pre_p);
8712 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8713 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8714 gimplify_assign (unshare_expr (next_fp_tmp),
8715 unshare_expr (valist), pre_p);
8717 gimplify_assign (unshare_expr (valist),
8718 unshare_expr (next_fp_tmp), post_p);
8719 valist = next_fp_tmp;
8721 else
8723 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8724 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8725 unshare_expr (next_o_limit));
8726 tmp = build3 (COND_EXPR, void_type_node, tmp,
8727 build1 (GOTO_EXPR, void_type_node,
8728 unshare_expr (lab_false)),
8729 NULL_TREE);
8730 gimplify_and_add (tmp, pre_p);
8732 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8733 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8735 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8736 gimplify_and_add (tmp, pre_p);
8738 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8739 gimplify_and_add (tmp, pre_p);
8741 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8742 gimplify_assign (unshare_expr (next_o),
8743 unshare_expr (next_o_limit), pre_p);
8745 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8746 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8749 if (!result)
8751 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8752 gimplify_and_add (tmp, pre_p);
8756 /* ??? In va-sh.h, there had been code to make values larger than
8757 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8759 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8760 if (result)
8762 gimplify_assign (result, tmp, pre_p);
8763 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8764 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8765 gimplify_and_add (tmp, pre_p);
8767 else
8768 result = tmp;
8770 if (pass_by_ref)
8771 result = build_va_arg_indirect_ref (result);
8773 return result;
8776 /* 64 bit floating points memory transfers are paired single precision loads
8777 or store. So DWARF information needs fixing in little endian (unless
8778 PR=SZ=1 in FPSCR). */
8780 sh_dwarf_register_span (rtx reg)
8782 unsigned regno = REGNO (reg);
8784 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8785 return NULL_RTX;
8787 return
8788 gen_rtx_PARALLEL (VOIDmode,
8789 gen_rtvec (2,
8790 gen_rtx_REG (SFmode, regno + 1),
8791 gen_rtx_REG (SFmode, regno)));
8794 static machine_mode
8795 sh_promote_function_mode (const_tree type, machine_mode mode,
8796 int *punsignedp, const_tree funtype,
8797 int for_return)
8799 if (sh_promote_prototypes (funtype))
8800 return promote_mode (type, mode, punsignedp);
8801 else
8802 return default_promote_function_mode (type, mode, punsignedp, funtype,
8803 for_return);
8806 static bool
8807 sh_promote_prototypes (const_tree type)
8809 if (TARGET_HITACHI)
8810 return false;
8811 if (! type)
8812 return true;
8813 return ! sh_attr_renesas_p (type);
8816 /* Whether an argument must be passed by reference. On SHcompact, we
8817 pretend arguments wider than 32-bits that would have been passed in
8818 registers are passed by reference, so that an SHmedia trampoline
8819 loads them into the full 64-bits registers. */
8820 static int
8821 shcompact_byref (const CUMULATIVE_ARGS *cum, machine_mode mode,
8822 const_tree type, bool named)
8824 unsigned HOST_WIDE_INT size;
8826 if (type)
8827 size = int_size_in_bytes (type);
8828 else
8829 size = GET_MODE_SIZE (mode);
8831 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8832 && (!named
8833 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8834 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8835 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8836 && size > 4
8837 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8838 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8839 return size;
8840 else
8841 return 0;
8844 static bool
8845 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8846 const_tree type, bool named)
8848 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8850 if (targetm.calls.must_pass_in_stack (mode, type))
8851 return true;
8853 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8854 wants to know about pass-by-reference semantics for incoming
8855 arguments. */
8856 if (! cum)
8857 return false;
8859 if (TARGET_SHCOMPACT)
8861 cum->byref = shcompact_byref (cum, mode, type, named);
8862 return cum->byref != 0;
8865 return false;
8868 static bool
8869 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
8870 const_tree type, bool named ATTRIBUTE_UNUSED)
8872 /* ??? How can it possibly be correct to return true only on the
8873 caller side of the equation? Is there someplace else in the
8874 sh backend that's magically producing the copies? */
8875 return (get_cumulative_args (cum)->outgoing
8876 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8877 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8880 /* Round a register number up to a proper boundary for an arg of mode
8881 MODE.
8882 The SH doesn't care about double alignment, so we only
8883 round doubles to even regs when asked to explicitly. */
8884 static int
8885 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
8887 /* FIXME: This used to be a macro and has been copy pasted into this
8888 function as is. Make this more readable. */
8889 return
8890 (((TARGET_ALIGN_DOUBLE
8891 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
8892 && (mode == DFmode || mode == DCmode)
8893 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
8894 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
8895 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
8896 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
8897 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
8900 /* Return true if arg of the specified mode should be be passed in a register
8901 or false otherwise. */
8902 static bool
8903 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
8904 const_tree type)
8906 /* FIXME: This used to be a macro and has been copy pasted into this
8907 function as is. Make this more readable. */
8908 return
8909 ((type == 0
8910 || (! TREE_ADDRESSABLE (type)
8911 && (! (TARGET_HITACHI || cum.renesas_abi)
8912 || ! (AGGREGATE_TYPE_P (type)
8913 || (!TARGET_FPU_ANY
8914 && (GET_MODE_CLASS (mode) == MODE_FLOAT
8915 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
8916 && ! cum.force_mem
8917 && (TARGET_SH2E
8918 ? ((mode) == BLKmode
8919 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
8920 + int_size_in_bytes (type))
8921 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
8922 : ((sh_round_reg (cum, mode)
8923 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
8924 <= NPARM_REGS (mode)))
8925 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
8928 static int
8929 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
8930 tree type, bool named ATTRIBUTE_UNUSED)
8932 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8933 int words = 0;
8935 if (!TARGET_SH5
8936 && sh_pass_in_reg_p (*cum, mode, type)
8937 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8938 && (sh_round_reg (*cum, mode)
8939 + (mode != BLKmode
8940 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
8941 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8942 > NPARM_REGS (mode)))
8943 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8945 else if (!TARGET_SHCOMPACT
8946 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8947 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8949 return words * UNITS_PER_WORD;
8953 /* Define where to put the arguments to a function.
8954 Value is zero to push the argument on the stack,
8955 or a hard register in which to store the argument.
8957 MODE is the argument's machine mode.
8958 TYPE is the data type of the argument (as a tree).
8959 This is null for libcalls where that information may
8960 not be available.
8961 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8962 the preceding args and about the function being called.
8963 NAMED is nonzero if this argument is a named parameter
8964 (otherwise it is an extra parameter matching an ellipsis).
8966 On SH the first args are normally in registers
8967 and the rest are pushed. Any arg that starts within the first
8968 NPARM_REGS words is at least partially passed in a register unless
8969 its data type forbids. */
8970 static rtx
8971 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
8972 const_tree type, bool named)
8974 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8976 if (! TARGET_SH5 && mode == VOIDmode)
8977 return GEN_INT (ca->renesas_abi ? 1 : 0);
8979 if (! TARGET_SH5
8980 && sh_pass_in_reg_p (*ca, mode, type)
8981 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8983 int regno;
8985 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8986 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8988 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8989 gen_rtx_REG (SFmode,
8990 BASE_ARG_REG (mode)
8991 + (sh_round_reg (*ca, mode) ^ 1)),
8992 const0_rtx);
8993 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8994 gen_rtx_REG (SFmode,
8995 BASE_ARG_REG (mode)
8996 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8997 GEN_INT (4));
8998 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
9001 /* If the alignment of a DF value causes an SF register to be
9002 skipped, we will use that skipped register for the next SF
9003 value. */
9004 if ((TARGET_HITACHI || ca->renesas_abi)
9005 && ca->free_single_fp_reg
9006 && mode == SFmode)
9007 return gen_rtx_REG (mode, ca->free_single_fp_reg);
9009 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
9010 ^ (mode == SFmode && TARGET_SH4
9011 && TARGET_LITTLE_ENDIAN
9012 && ! TARGET_HITACHI && ! ca->renesas_abi);
9013 return gen_rtx_REG (mode, regno);
9017 if (TARGET_SH5)
9019 if (mode == VOIDmode && TARGET_SHCOMPACT)
9020 return GEN_INT (ca->call_cookie);
9022 /* The following test assumes unnamed arguments are promoted to
9023 DFmode. */
9024 if (mode == SFmode && ca->free_single_fp_reg)
9025 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
9027 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9028 && (named || ! ca->prototype_p)
9029 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9031 if (! ca->prototype_p && TARGET_SHMEDIA)
9032 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9034 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9035 FIRST_FP_PARM_REG
9036 + ca->arg_count[(int) SH_ARG_FLOAT]);
9039 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9040 && (! TARGET_SHCOMPACT
9041 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9042 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9043 type, named))))
9045 return gen_rtx_REG (mode, (FIRST_PARM_REG
9046 + ca->arg_count[(int) SH_ARG_INT]));
9049 return NULL_RTX;
9052 return NULL_RTX;
9055 /* Update the data in CUM to advance over an argument
9056 of mode MODE and data type TYPE.
9057 (TYPE is null for libcalls where that information may not be
9058 available.) */
9059 static void
9060 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
9061 const_tree type, bool named)
9063 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9065 if (ca->force_mem)
9066 ca->force_mem = 0;
9067 else if (TARGET_SH5)
9069 const_tree type2 = (ca->byref && type
9070 ? TREE_TYPE (type)
9071 : type);
9072 machine_mode mode2 = (ca->byref && type
9073 ? TYPE_MODE (type2)
9074 : mode);
9075 int dwords = ((ca->byref
9076 ? ca->byref
9077 : mode2 == BLKmode
9078 ? int_size_in_bytes (type2)
9079 : GET_MODE_SIZE (mode2)) + 7) / 8;
9080 int numregs = MIN (dwords, NPARM_REGS (SImode)
9081 - ca->arg_count[(int) SH_ARG_INT]);
9083 if (numregs)
9085 ca->arg_count[(int) SH_ARG_INT] += numregs;
9086 if (TARGET_SHCOMPACT
9087 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9089 ca->call_cookie
9090 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9091 - numregs, 1);
9092 /* N.B. We want this also for outgoing. */
9093 ca->stack_regs += numregs;
9095 else if (ca->byref)
9097 if (! ca->outgoing)
9098 ca->stack_regs += numregs;
9099 ca->byref_regs += numregs;
9100 ca->byref = 0;
9102 ca->call_cookie
9103 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9104 - numregs, 2);
9105 while (--numregs);
9106 ca->call_cookie
9107 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9108 - 1, 1);
9110 else if (dwords > numregs)
9112 int pushregs = numregs;
9114 if (TARGET_SHCOMPACT)
9115 ca->stack_regs += numregs;
9116 while (pushregs < NPARM_REGS (SImode) - 1
9117 && (CALL_COOKIE_INT_REG_GET
9118 (ca->call_cookie,
9119 NPARM_REGS (SImode) - pushregs)
9120 == 1))
9122 ca->call_cookie
9123 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9124 - pushregs, 1);
9125 pushregs++;
9127 if (numregs == NPARM_REGS (SImode))
9128 ca->call_cookie
9129 |= CALL_COOKIE_INT_REG (0, 1)
9130 | CALL_COOKIE_STACKSEQ (numregs - 1);
9131 else
9132 ca->call_cookie
9133 |= CALL_COOKIE_STACKSEQ (numregs);
9136 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9137 && (named || ! ca->prototype_p))
9139 if (mode2 == SFmode && ca->free_single_fp_reg)
9140 ca->free_single_fp_reg = 0;
9141 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9142 < NPARM_REGS (SFmode))
9144 int numfpregs
9145 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9146 NPARM_REGS (SFmode)
9147 - ca->arg_count[(int) SH_ARG_FLOAT]);
9149 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9151 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9153 if (ca->outgoing && numregs > 0)
9156 ca->call_cookie
9157 |= (CALL_COOKIE_INT_REG
9158 (ca->arg_count[(int) SH_ARG_INT]
9159 - numregs + ((numfpregs - 2) / 2),
9160 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9161 - numfpregs) / 2));
9163 while (numfpregs -= 2);
9165 else if (mode2 == SFmode && (named)
9166 && (ca->arg_count[(int) SH_ARG_FLOAT]
9167 < NPARM_REGS (SFmode)))
9168 ca->free_single_fp_reg
9169 = FIRST_FP_PARM_REG - numfpregs
9170 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9173 return;
9176 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9178 /* Note that we've used the skipped register. */
9179 if (mode == SFmode && ca->free_single_fp_reg)
9181 ca->free_single_fp_reg = 0;
9182 return;
9184 /* When we have a DF after an SF, there's an SF register that get
9185 skipped in order to align the DF value. We note this skipped
9186 register, because the next SF value will use it, and not the
9187 SF that follows the DF. */
9188 if (mode == DFmode
9189 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9191 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9192 + BASE_ARG_REG (mode));
9196 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9197 || sh_pass_in_reg_p (*ca, mode, type))
9198 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9199 = (sh_round_reg (*ca, mode)
9200 + (mode == BLKmode
9201 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9202 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9205 /* The Renesas calling convention doesn't quite fit into this scheme since
9206 the address is passed like an invisible argument, but one that is always
9207 passed in memory. */
9208 static rtx
9209 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9211 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9212 return NULL_RTX;
9213 return gen_rtx_REG (Pmode, 2);
9216 /* Worker function for TARGET_FUNCTION_VALUE.
9218 For the SH, this is like LIBCALL_VALUE, except that we must change the
9219 mode like PROMOTE_MODE does.
9220 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9221 tested here has to be kept in sync with the one in
9222 explow.c:promote_mode. */
9223 static rtx
9224 sh_function_value (const_tree valtype,
9225 const_tree fn_decl_or_type,
9226 bool outgoing ATTRIBUTE_UNUSED)
9228 if (fn_decl_or_type
9229 && !DECL_P (fn_decl_or_type))
9230 fn_decl_or_type = NULL;
9232 return gen_rtx_REG (
9233 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9234 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9235 && (TREE_CODE (valtype) == INTEGER_TYPE
9236 || TREE_CODE (valtype) == ENUMERAL_TYPE
9237 || TREE_CODE (valtype) == BOOLEAN_TYPE
9238 || TREE_CODE (valtype) == REAL_TYPE
9239 || TREE_CODE (valtype) == OFFSET_TYPE))
9240 && sh_promote_prototypes (fn_decl_or_type)
9241 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9242 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9245 /* Worker function for TARGET_LIBCALL_VALUE. */
9246 static rtx
9247 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9249 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9252 /* Return true if N is a possible register number of function value. */
9253 static bool
9254 sh_function_value_regno_p (const unsigned int regno)
9256 return ((regno) == FIRST_RET_REG
9257 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9258 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9261 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9262 static bool
9263 sh_return_in_memory (const_tree type, const_tree fndecl)
9265 if (TARGET_SH5)
9267 if (TYPE_MODE (type) == BLKmode)
9268 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9269 else
9270 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9272 else
9274 return (TYPE_MODE (type) == BLKmode
9275 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9276 && TREE_CODE (type) == RECORD_TYPE));
9280 /* We actually emit the code in sh_expand_prologue. We used to use
9281 a static variable to flag that we need to emit this code, but that
9282 doesn't when inlining, when functions are deferred and then emitted
9283 later. Fortunately, we already have two flags that are part of struct
9284 function that tell if a function uses varargs or stdarg. */
9285 static void
9286 sh_setup_incoming_varargs (cumulative_args_t ca,
9287 machine_mode mode,
9288 tree type,
9289 int *pretend_arg_size,
9290 int second_time ATTRIBUTE_UNUSED)
9292 gcc_assert (cfun->stdarg);
9293 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9295 int named_parm_regs, anon_parm_regs;
9297 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9298 + (mode == BLKmode
9299 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9300 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9301 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9302 if (anon_parm_regs > 0)
9303 *pretend_arg_size = anon_parm_regs * 4;
9307 static bool
9308 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9310 return TARGET_SH5;
9313 static bool
9314 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9316 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9318 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9322 /* Define the offset between two registers, one to be eliminated, and
9323 the other its replacement, at the start of a routine. */
9325 initial_elimination_offset (int from, int to)
9327 int regs_saved;
9328 int regs_saved_rounding = 0;
9329 int total_saved_regs_space;
9330 int total_auto_space;
9331 int save_flags = target_flags;
9332 int copy_flags;
9333 HARD_REG_SET live_regs_mask;
9335 shmedia_space_reserved_for_target_registers = false;
9336 regs_saved = calc_live_regs (&live_regs_mask);
9337 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9339 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9341 shmedia_space_reserved_for_target_registers = true;
9342 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9345 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9346 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9347 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9349 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9350 copy_flags = target_flags;
9351 target_flags = save_flags;
9353 total_saved_regs_space = regs_saved + regs_saved_rounding;
9355 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9356 return total_saved_regs_space + total_auto_space
9357 + crtl->args.info.byref_regs * 8;
9359 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9360 return total_saved_regs_space + total_auto_space
9361 + crtl->args.info.byref_regs * 8;
9363 /* Initial gap between fp and sp is 0. */
9364 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9365 return 0;
9367 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9368 return rounded_frame_size (0);
9370 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9371 return rounded_frame_size (0);
9373 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9374 && (to == HARD_FRAME_POINTER_REGNUM
9375 || to == STACK_POINTER_REGNUM));
9376 if (TARGET_SH5)
9378 int n = total_saved_regs_space;
9379 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9380 save_schedule schedule;
9381 save_entry *entry;
9383 n += total_auto_space;
9385 /* If it wasn't saved, there's not much we can do. */
9386 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9387 return n;
9389 target_flags = copy_flags;
9391 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9392 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9393 if (entry->reg == pr_reg)
9395 target_flags = save_flags;
9396 return entry->offset;
9398 gcc_unreachable ();
9400 else
9401 return total_auto_space;
9404 /* Parse the -mfixed-range= option string. */
9405 void
9406 sh_fix_range (const char *const_str)
9408 int i, first, last;
9409 char *str, *dash, *comma;
9411 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9412 REG2 are either register names or register numbers. The effect
9413 of this option is to mark the registers in the range from REG1 to
9414 REG2 as ``fixed'' so they won't be used by the compiler. */
9416 i = strlen (const_str);
9417 str = (char *) alloca (i + 1);
9418 memcpy (str, const_str, i + 1);
9420 while (1)
9422 dash = strchr (str, '-');
9423 if (!dash)
9425 warning (0, "value of -mfixed-range must have form REG1-REG2");
9426 return;
9428 *dash = '\0';
9429 comma = strchr (dash + 1, ',');
9430 if (comma)
9431 *comma = '\0';
9433 first = decode_reg_name (str);
9434 if (first < 0)
9436 warning (0, "unknown register name: %s", str);
9437 return;
9440 last = decode_reg_name (dash + 1);
9441 if (last < 0)
9443 warning (0, "unknown register name: %s", dash + 1);
9444 return;
9447 *dash = '-';
9449 if (first > last)
9451 warning (0, "%s-%s is an empty range", str, dash + 1);
9452 return;
9455 for (i = first; i <= last; ++i)
9456 fixed_regs[i] = call_used_regs[i] = 1;
9458 if (!comma)
9459 break;
9461 *comma = ',';
9462 str = comma + 1;
9466 /* Insert any deferred function attributes from earlier pragmas. */
9467 static void
9468 sh_insert_attributes (tree node, tree *attributes)
9470 tree attrs;
9472 if (TREE_CODE (node) != FUNCTION_DECL)
9473 return;
9475 /* We are only interested in fields. */
9476 if (!DECL_P (node))
9477 return;
9479 /* Append the attributes to the deferred attributes. */
9480 *sh_deferred_function_attributes_tail = *attributes;
9481 attrs = sh_deferred_function_attributes;
9482 if (!attrs)
9483 return;
9485 /* Some attributes imply or require the interrupt attribute. */
9486 if (!lookup_attribute ("interrupt_handler", attrs)
9487 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9489 /* If we have a trapa_handler, but no interrupt_handler attribute,
9490 insert an interrupt_handler attribute. */
9491 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9492 /* We can't use sh_pr_interrupt here because that's not in the
9493 java frontend. */
9494 attrs
9495 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9496 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9497 if the interrupt attribute is missing, we ignore the attribute
9498 and warn. */
9499 else if (lookup_attribute ("sp_switch", attrs)
9500 || lookup_attribute ("trap_exit", attrs)
9501 || lookup_attribute ("nosave_low_regs", attrs)
9502 || lookup_attribute ("resbank", attrs))
9504 tree *tail;
9506 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9508 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9509 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9510 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9511 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9512 warning (OPT_Wattributes,
9513 "%qE attribute only applies to interrupt functions",
9514 TREE_PURPOSE (attrs));
9515 else
9517 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9518 NULL_TREE);
9519 tail = &TREE_CHAIN (*tail);
9522 attrs = *attributes;
9526 /* Install the processed list. */
9527 *attributes = attrs;
9529 /* Clear deferred attributes. */
9530 sh_deferred_function_attributes = NULL_TREE;
9531 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9533 return;
9536 /*------------------------------------------------------------------------------
9537 Target specific attributes
9538 Supported attributes are:
9540 * interrupt_handler
9541 Specifies this function is an interrupt handler.
9543 * trapa_handler
9544 Like interrupt_handler, but don't save all registers.
9546 * sp_switch
9547 Specifies an alternate stack for an interrupt handler to run on.
9549 * trap_exit
9550 Use a trapa to exit an interrupt function instead of rte.
9552 * nosave_low_regs
9553 Don't save r0..r7 in an interrupt handler function.
9554 This is useful on SH3* and SH4*, which have a separate set of low
9555 regs for user and privileged modes.
9556 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9557 those that run with interrupts disabled and thus can't be
9558 interrupted thenselves).
9560 * renesas
9561 Use Renesas calling/layout conventions (functions and structures).
9563 * resbank
9564 In case of an interrupt handler function, use a register bank to
9565 save registers R0-R14, MACH, MACL, GBR and PR.
9566 This is available only on SH2A targets.
9568 * function_vector
9569 Declares a function to be called using the TBR relative addressing
9570 mode. Takes an argument that specifies the slot number in the table
9571 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9574 /* Handle a 'resbank' attribute. */
9575 static tree
9576 sh_handle_resbank_handler_attribute (tree * node, tree name,
9577 tree args ATTRIBUTE_UNUSED,
9578 int flags ATTRIBUTE_UNUSED,
9579 bool * no_add_attrs)
9581 if (!TARGET_SH2A)
9583 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9584 name);
9585 *no_add_attrs = true;
9587 if (TREE_CODE (*node) != FUNCTION_DECL)
9589 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9590 name);
9591 *no_add_attrs = true;
9594 return NULL_TREE;
9597 /* Handle an "interrupt_handler" attribute; arguments as in
9598 struct attribute_spec.handler. */
9599 static tree
9600 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9601 tree args ATTRIBUTE_UNUSED,
9602 int flags ATTRIBUTE_UNUSED,
9603 bool *no_add_attrs)
9605 if (TREE_CODE (*node) != FUNCTION_DECL)
9607 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9608 name);
9609 *no_add_attrs = true;
9611 else if (TARGET_SHCOMPACT)
9613 error ("attribute interrupt_handler is not compatible with -m5-compact");
9614 *no_add_attrs = true;
9617 return NULL_TREE;
9620 /* Handle an 'function_vector' attribute; arguments as in
9621 struct attribute_spec.handler. */
9622 static tree
9623 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9624 tree args ATTRIBUTE_UNUSED,
9625 int flags ATTRIBUTE_UNUSED,
9626 bool * no_add_attrs)
9628 if (!TARGET_SH2A)
9630 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9631 name);
9632 *no_add_attrs = true;
9634 else if (TREE_CODE (*node) != FUNCTION_DECL)
9636 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9637 name);
9638 *no_add_attrs = true;
9640 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9642 /* The argument must be a constant integer. */
9643 warning (OPT_Wattributes,
9644 "%qE attribute argument not an integer constant",
9645 name);
9646 *no_add_attrs = true;
9648 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9650 /* The argument value must be between 0 to 255. */
9651 warning (OPT_Wattributes,
9652 "%qE attribute argument should be between 0 to 255",
9653 name);
9654 *no_add_attrs = true;
9656 return NULL_TREE;
9659 /* Returns true if current function has been assigned the attribute
9660 'function_vector'. */
9661 bool
9662 sh2a_is_function_vector_call (rtx x)
9664 if (GET_CODE (x) == SYMBOL_REF
9665 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9667 tree tr = SYMBOL_REF_DECL (x);
9669 if (sh2a_function_vector_p (tr))
9670 return true;
9673 return false;
9676 /* Returns the function vector number, if the attribute
9677 'function_vector' is assigned, otherwise returns zero. */
9679 sh2a_get_function_vector_number (rtx x)
9681 int num;
9682 tree list, t;
9684 if ((GET_CODE (x) == SYMBOL_REF)
9685 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9687 t = SYMBOL_REF_DECL (x);
9689 if (TREE_CODE (t) != FUNCTION_DECL)
9690 return 0;
9692 list = SH_ATTRIBUTES (t);
9693 while (list)
9695 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9697 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9698 return num;
9701 list = TREE_CHAIN (list);
9704 return 0;
9706 else
9707 return 0;
9710 /* Handle an "sp_switch" attribute; arguments as in
9711 struct attribute_spec.handler. */
9712 static tree
9713 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9714 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9716 if (TREE_CODE (*node) != FUNCTION_DECL)
9718 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9719 name);
9720 *no_add_attrs = true;
9722 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9724 /* The argument must be a constant string. */
9725 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9726 name);
9727 *no_add_attrs = true;
9730 return NULL_TREE;
9733 /* Handle an "trap_exit" attribute; arguments as in
9734 struct attribute_spec.handler. */
9735 static tree
9736 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9737 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9739 if (TREE_CODE (*node) != FUNCTION_DECL)
9741 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9742 name);
9743 *no_add_attrs = true;
9745 /* The argument specifies a trap number to be used in a trapa instruction
9746 at function exit (instead of an rte instruction). */
9747 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9749 /* The argument must be a constant integer. */
9750 warning (OPT_Wattributes, "%qE attribute argument not an "
9751 "integer constant", name);
9752 *no_add_attrs = true;
9755 return NULL_TREE;
9758 static tree
9759 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9760 tree name ATTRIBUTE_UNUSED,
9761 tree args ATTRIBUTE_UNUSED,
9762 int flags ATTRIBUTE_UNUSED,
9763 bool *no_add_attrs ATTRIBUTE_UNUSED)
9765 return NULL_TREE;
9768 /* True if __attribute__((renesas)) or -mrenesas. */
9769 bool
9770 sh_attr_renesas_p (const_tree td)
9772 if (TARGET_HITACHI)
9773 return true;
9774 if (td == NULL_TREE)
9775 return false;
9776 if (DECL_P (td))
9777 td = TREE_TYPE (td);
9778 if (td == error_mark_node)
9779 return false;
9780 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9781 != NULL_TREE);
9784 /* True if __attribute__((renesas)) or -mrenesas, for the current
9785 function. */
9786 bool
9787 sh_cfun_attr_renesas_p (void)
9789 return sh_attr_renesas_p (current_function_decl);
9792 /* Returns true if the current function has the "interrupt_handler"
9793 attribute set. */
9794 bool
9795 sh_cfun_interrupt_handler_p (void)
9797 return (lookup_attribute ("interrupt_handler",
9798 DECL_ATTRIBUTES (current_function_decl))
9799 != NULL_TREE);
9802 /* Returns true if FUNC has been assigned the attribute
9803 "function_vector". */
9804 bool
9805 sh2a_function_vector_p (tree func)
9807 tree list;
9808 if (TREE_CODE (func) != FUNCTION_DECL)
9809 return false;
9811 list = SH_ATTRIBUTES (func);
9812 while (list)
9814 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9815 return true;
9817 list = TREE_CHAIN (list);
9819 return false;
9822 /* Returns true if given tree has the "resbank" attribute set. */
9823 bool
9824 sh_cfun_resbank_handler_p (void)
9826 return ((lookup_attribute ("resbank",
9827 DECL_ATTRIBUTES (current_function_decl))
9828 != NULL_TREE)
9829 && (lookup_attribute ("interrupt_handler",
9830 DECL_ATTRIBUTES (current_function_decl))
9831 != NULL_TREE) && TARGET_SH2A);
9834 /* Returns true if the current function has a "trap_exit" attribute set. */
9835 bool
9836 sh_cfun_trap_exit_p (void)
9838 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9839 != NULL_TREE;
9842 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9843 static const char *
9844 sh_check_pch_target_flags (int old_flags)
9846 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9847 | MASK_SH_E | MASK_HARD_SH4
9848 | MASK_FPU_SINGLE | MASK_SH4))
9849 return _("created and used with different architectures / ABIs");
9850 if ((old_flags ^ target_flags) & MASK_HITACHI)
9851 return _("created and used with different ABIs");
9852 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9853 return _("created and used with different endianness");
9854 return NULL;
9857 /* Predicates used by the templates. */
9859 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9860 Used only in general_movsrc_operand. */
9861 bool
9862 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9864 switch (REGNO (op))
9866 case PR_REG:
9867 case MACL_REG:
9868 case MACH_REG:
9869 return true;
9871 return false;
9874 /* Returns true if OP is a floating point value with value 0.0. */
9875 bool
9876 fp_zero_operand (rtx op)
9878 REAL_VALUE_TYPE r;
9880 if (GET_MODE (op) != SFmode)
9881 return false;
9883 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9884 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9887 /* Returns true if OP is a floating point value with value 1.0. */
9888 bool
9889 fp_one_operand (rtx op)
9891 REAL_VALUE_TYPE r;
9893 if (GET_MODE (op) != SFmode)
9894 return false;
9896 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9897 return REAL_VALUES_EQUAL (r, dconst1);
9900 /* Return the TLS type for TLS symbols. */
9901 enum tls_model
9902 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9904 if (GET_CODE (op) != SYMBOL_REF)
9905 return TLS_MODEL_NONE;
9906 return SYMBOL_REF_TLS_MODEL (op);
9909 /* Return the destination address of a branch. */
9910 static int
9911 branch_dest (rtx branch)
9913 rtx dest = SET_SRC (PATTERN (branch));
9914 int dest_uid;
9916 if (GET_CODE (dest) == IF_THEN_ELSE)
9917 dest = XEXP (dest, 1);
9918 dest = XEXP (dest, 0);
9919 dest_uid = INSN_UID (dest);
9920 return INSN_ADDRESSES (dest_uid);
9923 /* Return nonzero if REG is not used after INSN.
9924 We assume REG is a reload reg, and therefore does
9925 not live past labels. It may live past calls or jumps though. */
9926 bool
9927 reg_unused_after (rtx reg, rtx_insn *insn)
9929 enum rtx_code code;
9930 rtx set;
9932 /* If the reg is set by this instruction, then it is safe for our
9933 case. Disregard the case where this is a store to memory, since
9934 we are checking a register used in the store address. */
9935 set = single_set (insn);
9936 if (set && !MEM_P (SET_DEST (set))
9937 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9938 return true;
9940 while ((insn = NEXT_INSN (insn)))
9942 rtx set;
9943 if (!INSN_P (insn))
9944 continue;
9946 code = GET_CODE (insn);
9948 #if 0
9949 /* If this is a label that existed before reload, then the register
9950 is dead here. However, if this is a label added by reorg, then
9951 the register may still be live here. We can't tell the difference,
9952 so we just ignore labels completely. */
9953 if (code == CODE_LABEL)
9954 return 1;
9955 /* else */
9956 #endif
9958 if (code == JUMP_INSN)
9959 return false;
9961 /* If this is a sequence, we must handle them all at once.
9962 We could have for instance a call that sets the target register,
9963 and an insn in a delay slot that uses the register. In this case,
9964 we must return 0. */
9965 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9967 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
9968 int i;
9969 int retval = 0;
9971 for (i = 0; i < seq->len (); i++)
9973 rtx_insn *this_insn = seq->insn (i);
9974 rtx set = single_set (this_insn);
9976 if (CALL_P (this_insn))
9977 code = CALL_INSN;
9978 else if (JUMP_P (this_insn))
9980 if (INSN_ANNULLED_BRANCH_P (this_insn))
9981 return false;
9982 code = JUMP_INSN;
9985 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9986 return false;
9987 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9989 if (!MEM_P (SET_DEST (set)))
9990 retval = true;
9991 else
9992 return false;
9994 if (set == NULL_RTX
9995 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9996 return false;
9998 if (retval == 1)
9999 return true;
10000 else if (code == JUMP_INSN)
10001 return false;
10004 set = single_set (insn);
10005 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10006 return false;
10007 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10008 return !MEM_P (SET_DEST (set));
10009 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
10010 return false;
10012 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10013 return true;
10015 return true;
10018 #include "ggc.h"
10020 static GTY(()) rtx t_reg_rtx;
10022 get_t_reg_rtx (void)
10024 if (! t_reg_rtx)
10025 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10026 return t_reg_rtx;
10029 static GTY(()) tree fpscr_values;
10031 static void
10032 emit_fpu_switch (rtx scratch, int index)
10034 rtx src;
10036 if (fpscr_values == NULL)
10038 tree t;
10040 t = build_index_type (integer_one_node);
10041 t = build_array_type (integer_type_node, t);
10042 t = build_decl (BUILTINS_LOCATION,
10043 VAR_DECL, get_identifier ("__fpscr_values"), t);
10044 DECL_ARTIFICIAL (t) = 1;
10045 DECL_IGNORED_P (t) = 1;
10046 DECL_EXTERNAL (t) = 1;
10047 TREE_STATIC (t) = 1;
10048 TREE_PUBLIC (t) = 1;
10049 TREE_USED (t) = 1;
10051 fpscr_values = t;
10054 src = DECL_RTL (fpscr_values);
10055 if (!can_create_pseudo_p ())
10057 emit_move_insn (scratch, XEXP (src, 0));
10058 if (index != 0)
10059 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10060 src = adjust_automodify_address (src, SImode, scratch, index * 4);
10062 else
10063 src = adjust_address (src, SImode, index * 4);
10065 emit_insn (gen_lds_fpscr (src));
10068 static rtx get_free_reg (HARD_REG_SET);
10070 /* This function returns a register to use to load the address to load
10071 the fpscr from. Currently it always returns r1 or r7, but when we are
10072 able to use pseudo registers after combine, or have a better mechanism
10073 for choosing a register, it should be done here. */
10074 /* REGS_LIVE is the liveness information for the point for which we
10075 need this allocation. In some bare-bones exit blocks, r1 is live at the
10076 start. We can even have all of r0..r3 being live:
10077 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10078 INSN before which new insns are placed with will clobber the register
10079 we return. If a basic block consists only of setting the return value
10080 register to a pseudo and using that register, the return value is not
10081 live before or after this block, yet we we'll insert our insns right in
10082 the middle. */
10083 static rtx
10084 get_free_reg (HARD_REG_SET regs_live)
10086 if (! TEST_HARD_REG_BIT (regs_live, 1))
10087 return gen_rtx_REG (Pmode, 1);
10089 /* Hard reg 1 is live; since this is a small register classes target,
10090 there shouldn't be anything but a jump before the function end. */
10091 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10092 return gen_rtx_REG (Pmode, 7);
10095 /* This function will set the fpscr from memory.
10096 MODE is the mode we are setting it to. */
10097 void
10098 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10100 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10101 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10102 rtx addr_reg;
10104 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10105 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10108 /* Is the given character a logical line separator for the assembler? */
10109 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10110 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10111 #endif
10113 static bool
10114 sequence_insn_p (rtx_insn *insn)
10116 rtx_insn *prev, *next;
10118 prev = PREV_INSN (insn);
10119 if (prev == NULL)
10120 return false;
10122 next = NEXT_INSN (prev);
10123 if (next == NULL)
10124 return false;
10126 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10130 sh_insn_length_adjustment (rtx_insn *insn)
10132 /* Instructions with unfilled delay slots take up an extra two bytes for
10133 the nop in the delay slot. */
10134 if (((NONJUMP_INSN_P (insn)
10135 && GET_CODE (PATTERN (insn)) != USE
10136 && GET_CODE (PATTERN (insn)) != CLOBBER)
10137 || CALL_P (insn) || JUMP_P (insn))
10138 && ! sequence_insn_p (insn)
10139 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10140 return 2;
10142 /* SH2e has a bug that prevents the use of annulled branches, so if
10143 the delay slot is not filled, we'll have to put a NOP in it. */
10144 if (sh_cpu_attr == CPU_SH2E
10145 && JUMP_P (insn)
10146 && get_attr_type (insn) == TYPE_CBRANCH
10147 && ! sequence_insn_p (insn))
10148 return 2;
10150 /* sh-dsp parallel processing insn take four bytes instead of two. */
10152 if (NONJUMP_INSN_P (insn))
10154 int sum = 0;
10155 rtx body = PATTERN (insn);
10156 const char *templ;
10157 char c;
10158 bool maybe_label = true;
10160 if (GET_CODE (body) == ASM_INPUT)
10161 templ = XSTR (body, 0);
10162 else if (asm_noperands (body) >= 0)
10163 templ
10164 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10165 else
10166 return 0;
10169 int ppi_adjust = 0;
10172 c = *templ++;
10173 while (c == ' ' || c == '\t');
10174 /* all sh-dsp parallel-processing insns start with p.
10175 The only non-ppi sh insn starting with p is pref.
10176 The only ppi starting with pr is prnd. */
10177 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10178 ppi_adjust = 2;
10179 /* The repeat pseudo-insn expands two three insns, a total of
10180 six bytes in size. */
10181 else if ((c == 'r' || c == 'R')
10182 && ! strncasecmp ("epeat", templ, 5))
10183 ppi_adjust = 4;
10184 while (c && c != '\n'
10185 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10187 /* If this is a label, it is obviously not a ppi insn. */
10188 if (c == ':' && maybe_label)
10190 ppi_adjust = 0;
10191 break;
10193 else if (c == '\'' || c == '"')
10194 maybe_label = false;
10195 c = *templ++;
10197 sum += ppi_adjust;
10198 maybe_label = c != ':';
10200 while (c);
10201 return sum;
10203 return 0;
10206 /* Return TRUE for a valid displacement for the REG+disp addressing
10207 with MODE. */
10208 bool
10209 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
10210 bool allow_zero)
10212 if (! CONST_INT_P (op))
10213 return false;
10215 if (TARGET_SHMEDIA)
10217 int size;
10219 /* Check if this is the address of an unaligned load / store. */
10220 if (mode == VOIDmode)
10221 return satisfies_constraint_I06 (op);
10223 size = GET_MODE_SIZE (mode);
10224 return (!(INTVAL (op) & (size - 1))
10225 && INTVAL (op) >= -512 * size
10226 && INTVAL (op) < 512 * size);
10228 else
10230 const HOST_WIDE_INT offset = INTVAL (op);
10231 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10232 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10234 /* If the mode does not support any displacement always return false.
10235 Even though an index of '0' is actually always valid, it will cause
10236 troubles when e.g. a DFmode move is split into two SFmode moves,
10237 where one SFmode move will have index '0' and the other move will
10238 have index '4'. */
10239 if (!allow_zero && max_disp < 1)
10240 return false;
10242 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10246 /* Recognize an RTL expression that is a valid memory address for
10247 an instruction.
10248 The MODE argument is the machine mode for the MEM expression
10249 that wants to use this address.
10250 Allow REG
10251 REG+disp
10252 REG+r0
10253 REG++
10254 --REG
10256 GBR+disp */
10257 static bool
10258 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10260 if (! ALLOW_INDEXED_ADDRESS
10261 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10262 return false;
10264 if (REG_P (x) && REGNO (x) == GBR_REG)
10265 return true;
10267 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10268 return true;
10269 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10270 && ! TARGET_SHMEDIA
10271 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10272 return true;
10273 else if (GET_CODE (x) == PLUS)
10275 rtx xop0 = XEXP (x, 0);
10276 rtx xop1 = XEXP (x, 1);
10278 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10279 return gbr_displacement (xop1, mode);
10281 if (GET_MODE_SIZE (mode) <= 8
10282 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10283 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10284 return true;
10286 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10287 || ((xop0 == stack_pointer_rtx
10288 || xop0 == hard_frame_pointer_rtx)
10289 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10290 || ((xop1 == stack_pointer_rtx
10291 || xop1 == hard_frame_pointer_rtx)
10292 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10293 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10294 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10295 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10296 && TARGET_FMOVD && mode == DFmode)))
10298 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10299 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10300 return true;
10301 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10302 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10303 return true;
10307 return false;
10310 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10311 isn't protected by a PIC unspec. */
10312 bool
10313 nonpic_symbol_mentioned_p (rtx x)
10315 const char *fmt;
10316 int i;
10318 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10319 || GET_CODE (x) == PC)
10320 return true;
10322 /* We don't want to look into the possible MEM location of a
10323 CONST_DOUBLE, since we're not going to use it, in general. */
10324 if (GET_CODE (x) == CONST_DOUBLE)
10325 return false;
10327 if (GET_CODE (x) == UNSPEC
10328 && (XINT (x, 1) == UNSPEC_PIC
10329 || XINT (x, 1) == UNSPEC_GOT
10330 || XINT (x, 1) == UNSPEC_GOTOFF
10331 || XINT (x, 1) == UNSPEC_GOTPLT
10332 || XINT (x, 1) == UNSPEC_GOTTPOFF
10333 || XINT (x, 1) == UNSPEC_DTPOFF
10334 || XINT (x, 1) == UNSPEC_TPOFF
10335 || XINT (x, 1) == UNSPEC_PLT
10336 || XINT (x, 1) == UNSPEC_SYMOFF
10337 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10338 return false;
10340 fmt = GET_RTX_FORMAT (GET_CODE (x));
10341 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10343 if (fmt[i] == 'E')
10345 int j;
10346 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10347 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10348 return true;
10350 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10351 return true;
10354 return false;
10357 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10358 @GOTOFF in `reg'. */
10360 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
10361 rtx reg)
10363 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10364 return orig;
10366 if (GET_CODE (orig) == LABEL_REF
10367 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10369 if (reg == NULL_RTX)
10370 reg = gen_reg_rtx (Pmode);
10372 emit_insn (gen_symGOTOFF2reg (reg, orig));
10373 return reg;
10375 else if (GET_CODE (orig) == SYMBOL_REF)
10377 if (reg == NULL_RTX)
10378 reg = gen_reg_rtx (Pmode);
10380 emit_insn (gen_symGOT2reg (reg, orig));
10381 return reg;
10383 return orig;
10386 /* Given a (logical) mode size and an offset in bytes, try to find a the
10387 appropriate displacement value for a mov insn. On SH the displacements
10388 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10389 15 bytes in QImode. To compensate this we create a new base address by
10390 adding an adjustment value to it.
10392 If the originally requested offset is greater than 127 we prefer using
10393 values 124..127 over 128..131 to increase opportunities to use the
10394 add #imm, Rn insn.
10396 In some cases it is possible that a requested offset might seem unaligned
10397 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10398 This is compensated by adjusting the base address so that the effective
10399 address of the displacement move insn will be aligned.
10401 This is not the best possible way of rebasing the base address, as it
10402 does not look at other present displacement addressings around it.
10403 In some cases this can create more base address adjustments than would
10404 actually be necessary. */
10405 struct disp_adjust
10407 rtx offset_adjust;
10408 rtx mov_disp;
10411 static struct disp_adjust
10412 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
10414 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10416 /* Do not try to use SH2A's large displacements here, because this would
10417 effectively disable the small displacement insns. */
10418 const int mode_sz = GET_MODE_SIZE (mode);
10419 const int mov_insn_sz = mov_insn_size (mode, false);
10420 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10421 const int max_disp_next = max_disp + mov_insn_sz;
10422 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10423 HOST_WIDE_INT offset_adjust;
10425 /* In some cases this actually does happen and we must check for it. */
10426 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10427 return res;
10429 /* Keeps the previous behavior for QImode displacement addressing.
10430 This just decides how the offset is re-based. Removing this special
10431 case will result in slightly bigger code on average, but it's not that
10432 bad actually. */
10433 if (mov_insn_sz == 1)
10434 align_modifier = 0;
10436 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10438 if (mode_sz + offset - offset_adjust <= max_disp_next)
10440 res.offset_adjust = GEN_INT (offset_adjust);
10441 res.mov_disp = GEN_INT (offset - offset_adjust);
10444 return res;
10447 /* Try to modify an illegitimate address and make it legitimate.
10448 If we find one, return the new, valid address.
10449 Otherwise, return the original address. */
10450 static rtx
10451 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
10453 if (flag_pic)
10454 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10456 if (TARGET_SHMEDIA)
10457 return x;
10459 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10460 || (TARGET_SH2E && mode == SFmode))
10461 return x;
10463 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10464 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10466 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10467 INTVAL (XEXP (x, 1)));
10469 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10471 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10472 adj.offset_adjust, NULL_RTX, 0,
10473 OPTAB_LIB_WIDEN);
10474 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10477 return x;
10480 /* Attempt to replace *p, which is an address that needs reloading, with
10481 a valid memory address for an operand of mode MODE.
10482 Like for sh_legitimize_address, for the SH we try to get a normal form
10483 of the address. That will allow inheritance of the address reloads. */
10484 bool
10485 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10486 int itype)
10488 enum reload_type type = (enum reload_type) itype;
10489 const int mode_sz = GET_MODE_SIZE (mode);
10491 if (sh_lra_p ())
10492 return false;
10494 if (! ALLOW_INDEXED_ADDRESS
10495 && GET_CODE (*p) == PLUS
10496 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10498 *p = copy_rtx (*p);
10499 push_reload (*p, NULL_RTX, p, NULL,
10500 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10501 return true;
10504 if (! ALLOW_INDEXED_ADDRESS
10505 && GET_CODE (*p) == PLUS
10506 && GET_CODE (XEXP (*p, 0)) == PLUS)
10508 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10509 XEXP (XEXP (*p, 0), 1));
10510 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10511 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10512 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10513 return true;
10516 if (TARGET_SHMEDIA)
10517 return false;
10519 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10520 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10521 && (ALLOW_INDEXED_ADDRESS
10522 || XEXP (*p, 0) == stack_pointer_rtx
10523 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10525 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10526 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10528 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10530 push_reload (*p, NULL_RTX, p, NULL,
10531 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10532 return true;
10535 if (TARGET_SH2E && mode == SFmode)
10537 *p = copy_rtx (*p);
10538 push_reload (*p, NULL_RTX, p, NULL,
10539 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10540 return true;
10543 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10544 moves because then reload has a problem figuring the constraint
10545 that the move insn target/source reg must be R0.
10546 Or maybe some handling is wrong in sh_secondary_reload for this
10547 to work properly? */
10548 if ((mode_sz == 4 || mode_sz == 8)
10549 && ! (TARGET_SH4 && mode == DFmode)
10550 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10552 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10553 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10554 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10555 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10556 return true;
10560 /* We must re-recognize what we created before. */
10561 if (GET_CODE (*p) == PLUS
10562 && (mode_sz == 4 || mode_sz == 8)
10563 && GET_CODE (XEXP (*p, 0)) == PLUS
10564 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10565 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10566 && CONST_INT_P (XEXP (*p, 1))
10567 && ! (TARGET_SH2E && mode == SFmode))
10569 /* Because this address is so complex, we know it must have
10570 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10571 it is already unshared, and needs no further unsharing. */
10572 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10573 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10574 return true;
10577 return false;
10580 /* In the name of slightly smaller debug output, and to cater to
10581 general assembler lossage, recognize various UNSPEC sequences
10582 and turn them back into a direct symbol reference. */
10583 static rtx
10584 sh_delegitimize_address (rtx orig_x)
10586 rtx x, y;
10588 orig_x = delegitimize_mem_from_attrs (orig_x);
10590 x = orig_x;
10591 if (MEM_P (x))
10592 x = XEXP (x, 0);
10593 if (GET_CODE (x) == CONST)
10595 y = XEXP (x, 0);
10596 if (GET_CODE (y) == UNSPEC)
10598 if (XINT (y, 1) == UNSPEC_GOT
10599 || XINT (y, 1) == UNSPEC_GOTOFF
10600 || XINT (y, 1) == UNSPEC_SYMOFF)
10601 return XVECEXP (y, 0, 0);
10602 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10604 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10606 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10608 if (GET_CODE (symplt) == UNSPEC
10609 && XINT (symplt, 1) == UNSPEC_PLT)
10610 return XVECEXP (symplt, 0, 0);
10613 else if (TARGET_SHMEDIA
10614 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10615 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10617 rtx offset = XVECEXP (y, 0, 1);
10619 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10620 if (MEM_P (orig_x))
10621 x = replace_equiv_address_nv (orig_x, x);
10622 return x;
10627 return orig_x;
10630 /* Mark the use of a constant in the literal table. If the constant
10631 has multiple labels, make it unique. */
10632 static rtx
10633 mark_constant_pool_use (rtx x)
10635 rtx_insn *insn, *lab;
10636 rtx pattern;
10638 if (x == NULL_RTX)
10639 return x;
10641 switch (GET_CODE (x))
10643 case LABEL_REF:
10644 x = XEXP (x, 0);
10645 case CODE_LABEL:
10646 break;
10647 default:
10648 return x;
10651 /* Get the first label in the list of labels for the same constant
10652 and delete another labels in the list. */
10653 lab = as_a <rtx_insn *> (x);
10654 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10656 if (!LABEL_P (insn)
10657 || LABEL_REFS (insn) != NEXT_INSN (insn))
10658 break;
10659 lab = insn;
10662 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10663 as_a<rtx_insn *> (insn)->set_deleted ();
10665 /* Mark constants in a window. */
10666 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10668 if (!NONJUMP_INSN_P (insn))
10669 continue;
10671 pattern = PATTERN (insn);
10672 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10673 continue;
10675 switch (XINT (pattern, 1))
10677 case UNSPECV_CONST2:
10678 case UNSPECV_CONST4:
10679 case UNSPECV_CONST8:
10680 XVECEXP (pattern, 0, 1) = const1_rtx;
10681 break;
10682 case UNSPECV_WINDOW_END:
10683 if (XVECEXP (pattern, 0, 0) == x)
10684 return lab;
10685 break;
10686 case UNSPECV_CONST_END:
10687 return lab;
10688 default:
10689 break;
10693 return lab;
10696 /* Return true if it's possible to redirect BRANCH1 to the destination
10697 of an unconditional jump BRANCH2. We only want to do this if the
10698 resulting branch will have a short displacement. */
10699 bool
10700 sh_can_redirect_branch (rtx_insn *branch1, rtx_insn *branch2)
10702 if (flag_expensive_optimizations && simplejump_p (branch2))
10704 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10705 rtx_insn *insn;
10706 int distance;
10708 for (distance = 0, insn = NEXT_INSN (branch1);
10709 insn && distance < 256;
10710 insn = PREV_INSN (insn))
10712 if (insn == dest)
10713 return true;
10714 else
10715 distance += get_attr_length (insn);
10717 for (distance = 0, insn = NEXT_INSN (branch1);
10718 insn && distance < 256;
10719 insn = NEXT_INSN (insn))
10721 if (insn == dest)
10722 return true;
10723 else
10724 distance += get_attr_length (insn);
10727 return false;
10730 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10731 bool
10732 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10733 unsigned int new_reg)
10735 /* Interrupt functions can only use registers that have already been
10736 saved by the prologue, even if they would normally be
10737 call-clobbered. */
10738 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10739 return false;
10741 return true;
10744 /* Function to update the integer COST
10745 based on the relationship between INSN that is dependent on
10746 DEP_INSN through the dependence LINK. The default is to make no
10747 adjustment to COST. This can be used for example to specify to
10748 the scheduler that an output- or anti-dependence does not incur
10749 the same cost as a data-dependence. The return value should be
10750 the new value for COST. */
10751 static int
10752 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10753 rtx_insn *dep_insn, int cost)
10755 rtx reg, use_pat;
10757 if (TARGET_SHMEDIA)
10759 /* On SHmedia, if the dependence is an anti-dependence or
10760 output-dependence, there is no cost. */
10761 if (REG_NOTE_KIND (link) != 0)
10763 /* However, dependencies between target register loads and
10764 uses of the register in a subsequent block that are separated
10765 by a conditional branch are not modelled - we have to do with
10766 the anti-dependency between the target register load and the
10767 conditional branch that ends the current block. */
10768 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10769 && GET_CODE (PATTERN (dep_insn)) == SET
10770 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10771 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10772 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10774 int orig_cost = cost;
10775 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10776 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10777 ? insn : JUMP_LABEL (insn));
10778 /* On the likely path, the branch costs 1, on the unlikely path,
10779 it costs 3. */
10780 cost--;
10782 target = next_active_insn (target);
10783 while (target && ! flow_dependent_p (target, dep_insn)
10784 && --cost > 0);
10785 /* If two branches are executed in immediate succession, with the
10786 first branch properly predicted, this causes a stall at the
10787 second branch, hence we won't need the target for the
10788 second branch for two cycles after the launch of the first
10789 branch. */
10790 if (cost > orig_cost - 2)
10791 cost = orig_cost - 2;
10793 else
10794 cost = 0;
10797 else if (get_attr_is_mac_media (insn)
10798 && get_attr_is_mac_media (dep_insn))
10799 cost = 1;
10801 else if (! reload_completed
10802 && GET_CODE (PATTERN (insn)) == SET
10803 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10804 && GET_CODE (PATTERN (dep_insn)) == SET
10805 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10806 && cost < 4)
10807 cost = 4;
10808 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10809 that is needed at the target. */
10810 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10811 && ! flow_dependent_p (insn, dep_insn))
10812 cost--;
10814 else if (REG_NOTE_KIND (link) == 0)
10816 enum attr_type type;
10817 rtx dep_set;
10819 if (recog_memoized (insn) < 0
10820 || recog_memoized (dep_insn) < 0)
10821 return cost;
10823 dep_set = single_set (dep_insn);
10825 /* The latency that we specify in the scheduling description refers
10826 to the actual output, not to an auto-increment register; for that,
10827 the latency is one. */
10828 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10830 rtx set = single_set (insn);
10832 if (set
10833 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10834 && (!MEM_P (SET_DEST (set))
10835 || !reg_mentioned_p (SET_DEST (dep_set),
10836 XEXP (SET_DEST (set), 0))))
10837 cost = 1;
10839 /* The only input for a call that is timing-critical is the
10840 function's address. */
10841 if (CALL_P (insn))
10843 rtx call = get_call_rtx_from (insn);
10844 if (call
10845 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10846 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10847 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10848 cost -= TARGET_SH4_300 ? 3 : 6;
10850 /* Likewise, the most timing critical input for an sfuncs call
10851 is the function address. However, sfuncs typically start
10852 using their arguments pretty quickly.
10853 Assume a four cycle delay for SH4 before they are needed.
10854 Cached ST40-300 calls are quicker, so assume only a one
10855 cycle delay there.
10856 ??? Maybe we should encode the delays till input registers
10857 are needed by sfuncs into the sfunc call insn. */
10858 /* All sfunc calls are parallels with at least four components.
10859 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10860 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10861 && XVECLEN (PATTERN (insn), 0) >= 4
10862 && (reg = sfunc_uses_reg (insn)))
10864 if (! reg_set_p (reg, dep_insn))
10865 cost -= TARGET_SH4_300 ? 1 : 4;
10867 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10869 enum attr_type dep_type = get_attr_type (dep_insn);
10871 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10872 cost--;
10873 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10874 && (type = get_attr_type (insn)) != TYPE_CALL
10875 && type != TYPE_SFUNC)
10876 cost--;
10877 /* When the preceding instruction loads the shift amount of
10878 the following SHAD/SHLD, the latency of the load is increased
10879 by 1 cycle. */
10880 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10881 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10882 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10883 XEXP (SET_SRC (single_set (insn)),
10884 1)))
10885 cost++;
10886 /* When an LS group instruction with a latency of less than
10887 3 cycles is followed by a double-precision floating-point
10888 instruction, FIPR, or FTRV, the latency of the first
10889 instruction is increased to 3 cycles. */
10890 else if (cost < 3
10891 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10892 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10893 cost = 3;
10894 /* The lsw register of a double-precision computation is ready one
10895 cycle earlier. */
10896 else if (reload_completed
10897 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10898 && (use_pat = single_set (insn))
10899 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10900 SET_SRC (use_pat)))
10901 cost -= 1;
10903 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10904 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10905 cost -= 1;
10907 else if (TARGET_SH4_300)
10909 /* Stores need their input register two cycles later. */
10910 if (dep_set && cost >= 1
10911 && ((type = get_attr_type (insn)) == TYPE_STORE
10912 || type == TYPE_PSTORE
10913 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10915 rtx set = single_set (insn);
10917 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10918 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10920 cost -= 2;
10921 /* But don't reduce the cost below 1 if the address depends
10922 on a side effect of dep_insn. */
10923 if (cost < 1
10924 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10925 cost = 1;
10930 /* An anti-dependence penalty of two applies if the first insn is a double
10931 precision fadd / fsub / fmul. */
10932 else if (!TARGET_SH4_300
10933 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10934 && recog_memoized (dep_insn) >= 0
10935 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10936 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10937 /* A lot of alleged anti-flow dependences are fake,
10938 so check this one is real. */
10939 && flow_dependent_p (dep_insn, insn))
10940 cost = 2;
10942 return cost;
10945 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10946 if DEP_INSN is anti-flow dependent on INSN. */
10947 static bool
10948 flow_dependent_p (rtx insn, rtx dep_insn)
10950 rtx tmp = PATTERN (insn);
10952 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10953 return tmp == NULL_RTX;
10956 /* A helper function for flow_dependent_p called through note_stores. */
10957 static void
10958 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10960 rtx * pinsn = (rtx *) data;
10962 if (*pinsn && reg_referenced_p (x, *pinsn))
10963 *pinsn = NULL_RTX;
10966 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10967 'special function' patterns (type sfunc) that clobber pr, but that
10968 do not look like function calls to leaf_function_p. Hence we must
10969 do this extra check. */
10970 static int
10971 sh_pr_n_sets (void)
10973 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10976 /* Return where to allocate pseudo for a given hard register initial
10977 value. */
10978 static rtx
10979 sh_allocate_initial_value (rtx hard_reg)
10981 rtx x;
10983 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10985 if (crtl->is_leaf
10986 && ! sh_pr_n_sets ()
10987 && ! (TARGET_SHCOMPACT
10988 && ((crtl->args.info.call_cookie
10989 & ~ CALL_COOKIE_RET_TRAMP (1))
10990 || crtl->saves_all_registers)))
10991 x = hard_reg;
10992 else
10993 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10995 else
10996 x = NULL_RTX;
10998 return x;
11001 /* This function returns "2" to indicate dual issue for the SH4
11002 processor. To be used by the DFA pipeline description. */
11003 static int
11004 sh_issue_rate (void)
11006 if (TARGET_SUPERSCALAR)
11007 return 2;
11008 else
11009 return 1;
11012 /* Functions for ready queue reordering for sched1. */
11014 /* Get weight for mode for a set x. */
11015 static short
11016 find_set_regmode_weight (rtx x, machine_mode mode)
11018 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11019 return 1;
11020 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11022 if (REG_P (SET_DEST (x)))
11024 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11025 return 1;
11026 else
11027 return 0;
11029 return 1;
11031 return 0;
11034 /* Get regmode weight for insn. */
11035 static short
11036 find_insn_regmode_weight (rtx insn, machine_mode mode)
11038 short reg_weight = 0;
11039 rtx x;
11041 /* Increment weight for each register born here. */
11042 x = PATTERN (insn);
11043 reg_weight += find_set_regmode_weight (x, mode);
11044 if (GET_CODE (x) == PARALLEL)
11046 int j;
11047 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11049 x = XVECEXP (PATTERN (insn), 0, j);
11050 reg_weight += find_set_regmode_weight (x, mode);
11053 /* Decrement weight for each register that dies here. */
11054 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11056 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11058 rtx note = XEXP (x, 0);
11059 if (REG_P (note) && GET_MODE (note) == mode)
11060 reg_weight--;
11063 return reg_weight;
11066 /* Calculate regmode weights for all insns of a basic block. */
11067 static void
11068 find_regmode_weight (basic_block b, machine_mode mode)
11070 rtx_insn *insn, *next_tail, *head, *tail;
11072 get_ebb_head_tail (b, b, &head, &tail);
11073 next_tail = NEXT_INSN (tail);
11075 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11077 /* Handle register life information. */
11078 if (!INSN_P (insn))
11079 continue;
11081 if (mode == SFmode)
11082 INSN_REGMODE_WEIGHT (insn, mode) =
11083 find_insn_regmode_weight (insn, mode)
11084 + 2 * find_insn_regmode_weight (insn, DFmode);
11085 else if (mode == SImode)
11086 INSN_REGMODE_WEIGHT (insn, mode) =
11087 find_insn_regmode_weight (insn, mode)
11088 + 2 * find_insn_regmode_weight (insn, DImode);
11092 /* Comparison function for ready queue sorting. */
11093 static int
11094 rank_for_reorder (const void *x, const void *y)
11096 rtx_insn *tmp = *(rtx_insn * const *) y;
11097 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11099 /* The insn in a schedule group should be issued the first. */
11100 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11101 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11103 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11104 minimizes instruction movement, thus minimizing sched's effect on
11105 register pressure. */
11106 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11109 /* Resort the array A in which only element at index N may be out of order. */
11110 static void
11111 swap_reorder (rtx_insn **a, int n)
11113 rtx_insn *insn = a[n - 1];
11114 int i = n - 2;
11116 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11118 a[i + 1] = a[i];
11119 i -= 1;
11121 a[i + 1] = insn;
11124 /* Sort the ready list by ascending priority. */
11125 static void
11126 ready_reorder (rtx_insn **ready, int nready)
11128 if (nready == 2)
11129 swap_reorder (ready, nready);
11130 else if (nready > 2)
11131 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11134 /* Count life regions of r0 for a block. */
11135 static int
11136 find_r0_life_regions (basic_block b)
11138 rtx_insn *end, *insn;
11139 rtx pset;
11140 rtx r0_reg;
11141 int live;
11142 int set;
11143 int death = 0;
11145 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11147 set = 1;
11148 live = 1;
11150 else
11152 set = 0;
11153 live = 0;
11156 insn = BB_HEAD (b);
11157 end = BB_END (b);
11158 r0_reg = gen_rtx_REG (SImode, R0_REG);
11159 while (1)
11161 if (INSN_P (insn))
11163 if (find_regno_note (insn, REG_DEAD, R0_REG))
11165 death++;
11166 live = 0;
11168 if (!live
11169 && (pset = single_set (insn))
11170 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11171 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11173 set++;
11174 live = 1;
11177 if (insn == end)
11178 break;
11179 insn = NEXT_INSN (insn);
11181 return set - death;
11184 /* Calculate regmode weights for all insns of all basic block. */
11185 static void
11186 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11187 int verbose ATTRIBUTE_UNUSED,
11188 int old_max_uid)
11190 basic_block b;
11192 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11193 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11194 r0_life_regions = 0;
11196 FOR_EACH_BB_REVERSE_FN (b, cfun)
11198 find_regmode_weight (b, SImode);
11199 find_regmode_weight (b, SFmode);
11200 if (!reload_completed)
11201 r0_life_regions += find_r0_life_regions (b);
11204 CURR_REGMODE_PRESSURE (SImode) = 0;
11205 CURR_REGMODE_PRESSURE (SFmode) = 0;
11208 /* Cleanup. */
11209 static void
11210 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11211 int verbose ATTRIBUTE_UNUSED)
11213 if (regmode_weight[0])
11215 free (regmode_weight[0]);
11216 regmode_weight[0] = NULL;
11218 if (regmode_weight[1])
11220 free (regmode_weight[1]);
11221 regmode_weight[1] = NULL;
11225 /* The scalar modes supported differs from the default version in TImode
11226 for 32-bit SHMEDIA. */
11227 static bool
11228 sh_scalar_mode_supported_p (machine_mode mode)
11230 if (TARGET_SHMEDIA32 && mode == TImode)
11231 return false;
11233 return default_scalar_mode_supported_p (mode);
11236 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11237 keep count of register pressures on SImode and SFmode. */
11238 static int
11239 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11240 int sched_verbose ATTRIBUTE_UNUSED,
11241 rtx_insn *insn,
11242 int can_issue_more)
11244 if (GET_CODE (PATTERN (insn)) != USE
11245 && GET_CODE (PATTERN (insn)) != CLOBBER)
11246 cached_can_issue_more = can_issue_more - 1;
11247 else
11248 cached_can_issue_more = can_issue_more;
11250 if (reload_completed)
11251 return cached_can_issue_more;
11253 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11254 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11256 return cached_can_issue_more;
11259 static void
11260 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11261 int verbose ATTRIBUTE_UNUSED,
11262 int veclen ATTRIBUTE_UNUSED)
11264 CURR_REGMODE_PRESSURE (SImode) = 0;
11265 CURR_REGMODE_PRESSURE (SFmode) = 0;
11268 /* Some magic numbers. */
11269 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11270 functions that already have high pressure on r0. */
11271 #define R0_MAX_LIFE_REGIONS 2
11272 /* Register Pressure thresholds for SImode and SFmode registers. */
11273 #define SIMODE_MAX_WEIGHT 5
11274 #define SFMODE_MAX_WEIGHT 10
11276 /* Return true if the pressure is high for MODE. */
11277 static bool
11278 high_pressure (machine_mode mode)
11280 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11281 functions that already have high pressure on r0. */
11282 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11283 return true;
11285 if (mode == SFmode)
11286 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11287 else
11288 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11291 /* Reorder ready queue if register pressure is high. */
11292 static int
11293 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11294 int sched_verbose ATTRIBUTE_UNUSED,
11295 rtx_insn **ready,
11296 int *n_readyp,
11297 int clock_var ATTRIBUTE_UNUSED)
11299 if (reload_completed)
11300 return sh_issue_rate ();
11302 if (high_pressure (SFmode) || high_pressure (SImode))
11304 ready_reorder (ready, *n_readyp);
11307 return sh_issue_rate ();
11310 /* Skip cycles if the current register pressure is high. */
11311 static int
11312 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11313 int sched_verbose ATTRIBUTE_UNUSED,
11314 rtx_insn **ready ATTRIBUTE_UNUSED,
11315 int *n_readyp ATTRIBUTE_UNUSED,
11316 int clock_var ATTRIBUTE_UNUSED)
11318 if (reload_completed)
11319 return cached_can_issue_more;
11321 if (high_pressure(SFmode) || high_pressure (SImode))
11322 skip_cycles = 1;
11324 return cached_can_issue_more;
11327 /* Skip cycles without sorting the ready queue. This will move insn from
11328 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11329 queue by sh_reorder. */
11331 /* Generally, skipping these many cycles are sufficient for all insns to move
11332 from Q -> R. */
11333 #define MAX_SKIPS 8
11335 static int
11336 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11337 int sched_verbose ATTRIBUTE_UNUSED,
11338 rtx_insn *insn ATTRIBUTE_UNUSED,
11339 int last_clock_var,
11340 int clock_var,
11341 int *sort_p)
11343 if (reload_completed)
11344 return 0;
11346 if (skip_cycles)
11348 if ((clock_var - last_clock_var) < MAX_SKIPS)
11350 *sort_p = 0;
11351 return 1;
11353 /* If this is the last cycle we are skipping, allow reordering of R. */
11354 if ((clock_var - last_clock_var) == MAX_SKIPS)
11356 *sort_p = 1;
11357 return 1;
11361 skip_cycles = 0;
11363 return 0;
11366 /* SHmedia requires registers for branches, so we can't generate new
11367 branches past reload. */
11368 static bool
11369 sh_cannot_modify_jumps_p (void)
11371 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11374 static reg_class_t
11375 sh_target_reg_class (void)
11377 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11380 static bool
11381 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11383 if (! shmedia_space_reserved_for_target_registers)
11384 return 0;
11385 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11386 return 0;
11388 HARD_REG_SET dummy;
11389 if (calc_live_regs (&dummy) >= 6 * 8)
11390 return 1;
11391 return 0;
11394 static bool
11395 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11397 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11401 On the SH1..SH4, the trampoline looks like
11402 2 0002 D202 mov.l l2,r2
11403 1 0000 D301 mov.l l1,r3
11404 3 0004 422B jmp @r2
11405 4 0006 0009 nop
11406 5 0008 00000000 l1: .long area
11407 6 000c 00000000 l2: .long function
11409 SH5 (compact) uses r1 instead of r3 for the static chain. */
11412 /* Emit RTL insns to initialize the variable parts of a trampoline.
11413 FNADDR is an RTX for the address of the function's pure code.
11414 CXT is an RTX for the static chain value for the function. */
11415 static void
11416 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11418 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11419 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11421 if (TARGET_SHMEDIA64)
11423 rtx tramp_templ;
11424 int fixed_len;
11426 rtx movi1 = GEN_INT (0xcc000010);
11427 rtx shori1 = GEN_INT (0xc8000010);
11428 rtx src, dst;
11430 /* The following trampoline works within a +- 128 KB range for cxt:
11431 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11432 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11433 gettr tr1,r1; blink tr0,r63 */
11434 /* Address rounding makes it hard to compute the exact bounds of the
11435 offset for this trampoline, but we have a rather generous offset
11436 range, so frame_offset should do fine as an upper bound. */
11437 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11439 /* ??? could optimize this trampoline initialization
11440 by writing DImode words with two insns each. */
11441 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11442 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11443 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11444 insn = gen_rtx_AND (DImode, insn, mask);
11445 /* Or in ptb/u .,tr1 pattern */
11446 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11447 insn = force_operand (insn, NULL_RTX);
11448 insn = gen_lowpart (SImode, insn);
11449 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11450 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11451 insn = gen_rtx_AND (DImode, insn, mask);
11452 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11453 insn = gen_lowpart (SImode, insn);
11454 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11455 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11456 insn = gen_rtx_AND (DImode, insn, mask);
11457 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11458 insn = gen_lowpart (SImode, insn);
11459 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11460 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11461 insn = gen_rtx_AND (DImode, insn, mask);
11462 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11463 insn = gen_lowpart (SImode, insn);
11464 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11465 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11466 insn = gen_rtx_AND (DImode, insn, mask);
11467 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11468 insn = gen_lowpart (SImode, insn);
11469 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11470 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11471 GEN_INT (0x6bf10600));
11472 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11473 GEN_INT (0x4415fc10));
11474 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11475 GEN_INT (0x4401fff0));
11476 emit_insn (gen_ic_invalidate_line (tramp));
11477 return;
11479 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11480 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11482 tramp_templ = gen_datalabel_ref (tramp_templ);
11483 dst = tramp_mem;
11484 src = gen_const_mem (BLKmode, tramp_templ);
11485 set_mem_align (dst, 256);
11486 set_mem_align (src, 64);
11487 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11489 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11490 emit_move_insn (adjust_address (tramp_mem, Pmode,
11491 fixed_len + GET_MODE_SIZE (Pmode)),
11492 cxt);
11493 emit_insn (gen_ic_invalidate_line (tramp));
11494 return;
11496 else if (TARGET_SHMEDIA)
11498 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11499 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11500 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11501 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11502 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11503 rotated 10 right, and higher 16 bit of every 32 selected. */
11504 rtx movishori
11505 = force_reg (V2HImode, (simplify_gen_subreg
11506 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11507 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11508 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11510 fnaddr = force_reg (SImode, fnaddr);
11511 cxt = force_reg (SImode, cxt);
11512 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11513 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11514 movishori));
11515 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11516 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11517 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11518 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11519 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11520 gen_rtx_SUBREG (V2HImode, cxt, 0),
11521 movishori));
11522 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11523 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11524 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11525 if (TARGET_LITTLE_ENDIAN)
11527 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11528 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11530 else
11532 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11533 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11535 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11536 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11537 emit_insn (gen_ic_invalidate_line (tramp));
11538 return;
11540 else if (TARGET_SHCOMPACT)
11542 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11543 return;
11545 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11546 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11547 SImode));
11548 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11549 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11550 SImode));
11551 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11552 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11553 if (TARGET_HARD_SH4 || TARGET_SH5)
11555 if (!TARGET_INLINE_IC_INVALIDATE
11556 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
11557 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11558 FUNCTION_ORDINARY),
11559 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11560 else
11561 emit_insn (gen_ic_invalidate_line (tramp));
11565 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11566 static rtx
11567 sh_trampoline_adjust_address (rtx tramp)
11569 if (TARGET_SHMEDIA)
11570 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11571 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11572 return tramp;
11575 /* FIXME: This is overly conservative. A SHcompact function that
11576 receives arguments ``by reference'' will have them stored in its
11577 own stack frame, so it must not pass pointers or references to
11578 these arguments to other functions by means of sibling calls. */
11579 /* If PIC, we cannot make sibling calls to global functions
11580 because the PLT requires r12 to be live. */
11581 static bool
11582 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11584 return (1
11585 && (! TARGET_SHCOMPACT
11586 || crtl->args.info.stack_regs == 0)
11587 && ! sh_cfun_interrupt_handler_p ()
11588 && (! flag_pic
11589 || (decl && ! TREE_PUBLIC (decl))
11590 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11593 /* Machine specific built-in functions. */
11595 struct builtin_description
11597 bool (* const is_enabled) (void);
11598 const enum insn_code icode;
11599 const char *const name;
11600 int signature;
11601 tree fndecl;
11604 static bool
11605 shmedia_builtin_p (void)
11607 return TARGET_SHMEDIA;
11610 /* This function can be used if there are any built-ins that are not for
11611 SHmedia. It's commented out to avoid the defined-but-unused warning. */
11612 static bool
11613 sh1_builtin_p (void)
11615 return TARGET_SH1;
11618 /* describe number and signedness of arguments; arg[0] == result
11619 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11620 /* 9: 64-bit pointer, 10: 32-bit pointer */
11621 static const char signature_args[][4] =
11623 #define SH_BLTIN_V2SI2 0
11624 { 4, 4 },
11625 #define SH_BLTIN_V4HI2 1
11626 { 4, 4 },
11627 #define SH_BLTIN_V2SI3 2
11628 { 4, 4, 4 },
11629 #define SH_BLTIN_V4HI3 3
11630 { 4, 4, 4 },
11631 #define SH_BLTIN_V8QI3 4
11632 { 4, 4, 4 },
11633 #define SH_BLTIN_MAC_HISI 5
11634 { 1, 4, 4, 1 },
11635 #define SH_BLTIN_SH_HI 6
11636 { 4, 4, 1 },
11637 #define SH_BLTIN_SH_SI 7
11638 { 4, 4, 1 },
11639 #define SH_BLTIN_V4HI2V2SI 8
11640 { 4, 4, 4 },
11641 #define SH_BLTIN_V4HI2V8QI 9
11642 { 4, 4, 4 },
11643 #define SH_BLTIN_SISF 10
11644 { 4, 2 },
11645 #define SH_BLTIN_LDUA_L 11
11646 { 2, 10 },
11647 #define SH_BLTIN_LDUA_Q 12
11648 { 1, 10 },
11649 #define SH_BLTIN_STUA_L 13
11650 { 0, 10, 2 },
11651 #define SH_BLTIN_STUA_Q 14
11652 { 0, 10, 1 },
11653 #define SH_BLTIN_LDUA_L64 15
11654 { 2, 9 },
11655 #define SH_BLTIN_LDUA_Q64 16
11656 { 1, 9 },
11657 #define SH_BLTIN_STUA_L64 17
11658 { 0, 9, 2 },
11659 #define SH_BLTIN_STUA_Q64 18
11660 { 0, 9, 1 },
11661 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11662 #define SH_BLTIN_2 19
11663 #define SH_BLTIN_SU 19
11664 { 1, 2 },
11665 #define SH_BLTIN_3 20
11666 #define SH_BLTIN_SUS 20
11667 { 2, 2, 1 },
11668 #define SH_BLTIN_PSSV 21
11669 { 0, 8, 2, 2 },
11670 #define SH_BLTIN_XXUU 22
11671 #define SH_BLTIN_UUUU 22
11672 { 1, 1, 1, 1 },
11673 #define SH_BLTIN_PV 23
11674 { 0, 8 },
11675 #define SH_BLTIN_VP 24
11676 { 8, 0 },
11677 #define SH_BLTIN_UV 25
11678 { 1, 0 },
11679 #define SH_BLTIN_VU 26
11680 { 0, 1 },
11682 /* mcmv: operands considered unsigned. */
11683 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11684 /* mperm: control value considered unsigned int. */
11685 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11686 /* mshards_q: returns signed short. */
11687 /* nsb: takes long long arg, returns unsigned char. */
11688 static struct builtin_description bdesc[] =
11690 { shmedia_builtin_p,
11691 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11692 { shmedia_builtin_p,
11693 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11694 { shmedia_builtin_p,
11695 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11696 { shmedia_builtin_p,
11697 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11698 { shmedia_builtin_p,
11699 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11700 { shmedia_builtin_p,
11701 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11702 { shmedia_builtin_p,
11703 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11704 { shmedia_builtin_p,
11705 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11706 { shmedia_builtin_p,
11707 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11708 { shmedia_builtin_p,
11709 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11710 { shmedia_builtin_p,
11711 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11712 { shmedia_builtin_p,
11713 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11714 { shmedia_builtin_p,
11715 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11716 { shmedia_builtin_p,
11717 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11718 { shmedia_builtin_p,
11719 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11720 { shmedia_builtin_p,
11721 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11722 { shmedia_builtin_p,
11723 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11724 { shmedia_builtin_p,
11725 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11726 { shmedia_builtin_p,
11727 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11728 { shmedia_builtin_p,
11729 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11730 { shmedia_builtin_p,
11731 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11732 { shmedia_builtin_p,
11733 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11734 { shmedia_builtin_p,
11735 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11736 { shmedia_builtin_p,
11737 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11738 { shmedia_builtin_p,
11739 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11740 { shmedia_builtin_p,
11741 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11742 { shmedia_builtin_p,
11743 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11744 { shmedia_builtin_p,
11745 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11746 { shmedia_builtin_p,
11747 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11748 { shmedia_builtin_p,
11749 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11750 { shmedia_builtin_p,
11751 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11752 { shmedia_builtin_p,
11753 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11754 { shmedia_builtin_p,
11755 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11756 { shmedia_builtin_p,
11757 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11758 { shmedia_builtin_p,
11759 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11760 { shmedia_builtin_p,
11761 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11762 { shmedia_builtin_p,
11763 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11764 { shmedia_builtin_p,
11765 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11766 { shmedia_builtin_p,
11767 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11768 { shmedia_builtin_p,
11769 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11770 { shmedia_builtin_p,
11771 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11772 { shmedia_builtin_p,
11773 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11774 { shmedia_builtin_p,
11775 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11776 { shmedia_builtin_p,
11777 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11778 { shmedia_builtin_p,
11779 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11780 { shmedia_builtin_p,
11781 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11782 { shmedia_builtin_p,
11783 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11784 { shmedia_builtin_p,
11785 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11786 { shmedia_builtin_p,
11787 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11788 { shmedia_builtin_p,
11789 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11790 { shmedia_builtin_p,
11791 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11792 { shmedia_builtin_p,
11793 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11794 { shmedia_builtin_p,
11795 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11796 { shmedia_builtin_p,
11797 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11798 { shmedia_builtin_p,
11799 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11800 { shmedia_builtin_p,
11801 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11802 { shmedia_builtin_p,
11803 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11804 { shmedia_builtin_p,
11805 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11806 { shmedia_builtin_p,
11807 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11808 { shmedia_builtin_p,
11809 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11810 { shmedia_builtin_p,
11811 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11812 { shmedia_builtin_p,
11813 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11814 { shmedia_builtin_p,
11815 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11816 { shmedia_builtin_p,
11817 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11818 { shmedia_builtin_p,
11819 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11820 { shmedia_builtin_p,
11821 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11822 { shmedia_builtin_p,
11823 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11824 { shmedia_builtin_p,
11825 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11826 { shmedia_builtin_p,
11827 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11828 { shmedia_builtin_p,
11829 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11830 { shmedia_builtin_p,
11831 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11832 { shmedia_builtin_p,
11833 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11834 { shmedia_builtin_p,
11835 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11836 { shmedia_builtin_p,
11837 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11838 { shmedia_builtin_p,
11839 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11840 { shmedia_builtin_p,
11841 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11842 { shmedia_builtin_p,
11843 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11844 { shmedia_builtin_p,
11845 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11846 { shmedia_builtin_p,
11847 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11848 { shmedia_builtin_p,
11849 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11850 { shmedia_builtin_p,
11851 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11852 { shmedia_builtin_p,
11853 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11854 { shmedia_builtin_p,
11855 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11857 { sh1_builtin_p,
11858 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
11859 { sh1_builtin_p,
11860 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
11863 static void
11864 sh_init_builtins (void)
11866 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11867 memset (shared, 0, sizeof shared);
11869 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11871 builtin_description* d = &bdesc[di];
11873 if (!d->is_enabled ())
11874 continue;
11876 tree type, arg_type = NULL_TREE;
11877 int signature = d->signature;
11879 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11880 type = shared[signature];
11881 else
11883 int has_result = signature_args[signature][0] != 0;
11884 tree args[3];
11886 if ((signature_args[signature][1] & 8)
11887 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11888 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11889 continue;
11890 if (! TARGET_FPU_ANY
11891 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11892 continue;
11893 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11894 args[i] = NULL_TREE;
11895 for (int i = 3; ; i--)
11897 int arg = signature_args[signature][i];
11898 int opno = i - 1 + has_result;
11900 if (arg & 8)
11901 arg_type = ptr_type_node;
11902 else if (arg)
11903 arg_type = (*lang_hooks.types.type_for_mode)
11904 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11905 else if (i)
11906 continue;
11907 else
11908 arg_type = void_type_node;
11909 if (i == 0)
11910 break;
11911 args[i-1] = arg_type;
11913 type = build_function_type_list (arg_type, args[0], args[1],
11914 args[2], NULL_TREE);
11915 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11916 shared[signature] = type;
11918 d->fndecl =
11919 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11920 NULL, NULL_TREE);
11924 /* Implements target hook vector_mode_supported_p. */
11925 bool
11926 sh_vector_mode_supported_p (machine_mode mode)
11928 if (TARGET_FPU_ANY
11929 && ((mode == V2SFmode)
11930 || (mode == V4SFmode)
11931 || (mode == V16SFmode)))
11932 return true;
11934 else if (TARGET_SHMEDIA
11935 && ((mode == V8QImode)
11936 || (mode == V2HImode)
11937 || (mode == V4HImode)
11938 || (mode == V2SImode)))
11939 return true;
11941 return false;
11944 bool
11945 sh_frame_pointer_required (void)
11947 /* If needed override this in other tm.h files to cope with various OS
11948 lossage requiring a frame pointer. */
11949 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11950 return true;
11952 if (crtl->profile)
11953 return true;
11955 return false;
11958 /* Implements target hook dwarf_calling_convention. Return an enum
11959 of dwarf_calling_convention. */
11961 sh_dwarf_calling_convention (const_tree func)
11963 if (sh_attr_renesas_p (func))
11964 return DW_CC_GNU_renesas_sh;
11966 return DW_CC_normal;
11969 /* Returns the sh builtin decl for CODE. */
11970 static tree
11971 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11973 if (code >= ARRAY_SIZE (bdesc))
11974 return error_mark_node;
11976 if (!bdesc[code].is_enabled ())
11977 return error_mark_node;
11979 return bdesc[code].fndecl;
11982 /* Expand an expression EXP that calls a built-in function,
11983 with result going to TARGET if that's convenient
11984 (and in mode MODE if that's convenient).
11985 SUBTARGET may be used as the target for computing one of EXP's operands.
11986 IGNORE is nonzero if the value is to be ignored. */
11987 static rtx
11988 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11989 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11991 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11992 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11993 const struct builtin_description *d = &bdesc[fcode];
11994 enum insn_code icode = d->icode;
11995 int signature = d->signature;
11996 int nop = 0;
11997 rtx op[4];
11999 if (signature_args[signature][0])
12001 if (ignore)
12002 return NULL_RTX;
12004 machine_mode tmode = insn_data[icode].operand[0].mode;
12005 if (! target || GET_MODE (target) != tmode
12006 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12007 target = gen_reg_rtx (tmode);
12008 op[nop++] = target;
12010 else
12011 target = NULL_RTX;
12013 for (int i = 1; i <= 3; i++, nop++)
12015 tree arg;
12016 machine_mode opmode, argmode;
12017 tree optype;
12019 if (! signature_args[signature][i])
12020 break;
12021 arg = CALL_EXPR_ARG (exp, i - 1);
12022 if (arg == error_mark_node)
12023 return const0_rtx;
12024 if (signature_args[signature][i] & 8)
12026 opmode = ptr_mode;
12027 optype = ptr_type_node;
12029 else
12031 opmode = insn_data[icode].operand[nop].mode;
12032 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12034 argmode = TYPE_MODE (TREE_TYPE (arg));
12035 if (argmode != opmode)
12036 arg = build1 (NOP_EXPR, optype, arg);
12037 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12038 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12039 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12042 rtx pat = NULL_RTX;
12044 switch (nop)
12046 case 1:
12047 pat = (*insn_data[d->icode].genfun) (op[0]);
12048 break;
12049 case 2:
12050 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12051 break;
12052 case 3:
12053 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12054 break;
12055 case 4:
12056 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12057 break;
12058 default:
12059 gcc_unreachable ();
12061 if (! pat)
12062 return NULL_RTX;
12063 emit_insn (pat);
12064 return target;
12067 void
12068 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12070 rtx sel0 = const0_rtx;
12071 rtx sel1 = const1_rtx;
12072 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12073 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12075 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12076 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12079 void
12080 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12082 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12084 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12085 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12088 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12089 We can allow any mode in any general register. The special registers
12090 only allow SImode. Don't allow any mode in the PR.
12092 We cannot hold DCmode values in the XD registers because alter_reg
12093 handles subregs of them incorrectly. We could work around this by
12094 spacing the XD registers like the DR registers, but this would require
12095 additional memory in every compilation to hold larger register vectors.
12096 We could hold SFmode / SCmode values in XD registers, but that
12097 would require a tertiary reload when reloading from / to memory,
12098 and a secondary reload to reload from / to general regs; that
12099 seems to be a losing proposition.
12101 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12102 it won't be ferried through GP registers first. */
12103 bool
12104 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
12106 if (SPECIAL_REGISTER_P (regno))
12107 return mode == SImode;
12109 if (regno == FPUL_REG)
12110 return (mode == SImode || mode == SFmode);
12112 if (FP_REGISTER_P (regno) && mode == SFmode)
12113 return true;
12115 if (mode == V2SFmode)
12117 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12118 || GENERAL_REGISTER_P (regno)))
12119 return true;
12120 else
12121 return false;
12124 if (mode == V4SFmode)
12126 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12127 || GENERAL_REGISTER_P (regno))
12128 return true;
12129 else
12130 return false;
12133 if (mode == V16SFmode)
12135 if (TARGET_SHMEDIA)
12137 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12138 return true;
12139 else
12140 return false;
12142 else
12143 return regno == FIRST_XD_REG;
12146 if (FP_REGISTER_P (regno))
12148 if (mode == SFmode
12149 || mode == SImode
12150 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12151 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12152 || mode == DCmode
12153 || (TARGET_SHMEDIA
12154 && (mode == DFmode || mode == DImode
12155 || mode == V2SFmode || mode == TImode)))
12156 && ((regno - FIRST_FP_REG) & 1) == 0)
12157 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12158 && ((regno - FIRST_FP_REG) & 3) == 0))
12159 return true;
12160 else
12161 return false;
12164 if (XD_REGISTER_P (regno))
12165 return mode == DFmode;
12167 if (TARGET_REGISTER_P (regno))
12168 return (mode == DImode || mode == SImode || mode == PDImode);
12170 if (regno == PR_REG)
12171 return mode == SImode;
12173 if (regno == FPSCR_REG)
12174 return mode == SImode;
12176 /* FIXME. This works around PR target/37633 for -O0. */
12177 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12179 unsigned int n = GET_MODE_SIZE (mode) / 8;
12181 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12182 && regno <= FIRST_GENERAL_REG + 14)
12183 return false;
12186 return true;
12189 /* Specify the modes required to caller save a given hard regno.
12190 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
12191 and returns ?Imode for float regs when sh_hard_regno_mode_ok
12192 permits integer modes on them. That makes LRA's split process
12193 unhappy. See PR55212.
12195 machine_mode
12196 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
12197 machine_mode mode)
12199 if (FP_REGISTER_P (regno)
12200 && (mode == SFmode
12201 || mode == SCmode
12202 || ((mode == DFmode || mode == DCmode)
12203 && ((regno - FIRST_FP_REG) & 1) == 0)))
12204 return mode;
12206 return choose_hard_reg_mode (regno, nregs, false);
12209 /* Return the class of registers for which a mode change from FROM to TO
12210 is invalid. */
12211 bool
12212 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
12213 enum reg_class rclass)
12215 /* We want to enable the use of SUBREGs as a means to
12216 VEC_SELECT a single element of a vector. */
12218 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12219 This can be problematic when SFmode vector subregs need to be accessed
12220 on the stack with displacement addressing, as it happens with -O0.
12221 Thus we disallow the mode change for -O0. */
12222 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12223 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12225 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12227 if (TARGET_LITTLE_ENDIAN)
12229 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12230 return reg_classes_intersect_p (DF_REGS, rclass);
12232 else
12234 if (GET_MODE_SIZE (from) < 8)
12235 return reg_classes_intersect_p (DF_REGS, rclass);
12238 return false;
12241 /* Return true if registers in machine mode MODE will likely be
12242 allocated to registers in small register classes. */
12243 bool
12244 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
12246 return (! TARGET_SHMEDIA);
12249 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12250 that label is used. */
12251 void
12252 sh_mark_label (rtx address, int nuses)
12254 if (GOTOFF_P (address))
12256 /* Extract the label or symbol. */
12257 address = XEXP (address, 0);
12258 if (GET_CODE (address) == PLUS)
12259 address = XEXP (address, 0);
12260 address = XVECEXP (address, 0, 0);
12262 if (GET_CODE (address) == LABEL_REF
12263 && LABEL_P (XEXP (address, 0)))
12264 LABEL_NUSES (XEXP (address, 0)) += nuses;
12267 /* Compute extra cost of moving data between one register class
12268 and another.
12270 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12271 uses this information. Hence, the general register <-> floating point
12272 register information here is not used for SFmode. */
12273 static int
12274 sh_register_move_cost (machine_mode mode,
12275 reg_class_t srcclass, reg_class_t dstclass)
12277 if (dstclass == T_REGS || dstclass == PR_REGS)
12278 return 10;
12280 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12281 return 4;
12283 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12284 && REGCLASS_HAS_FP_REG (srcclass)
12285 && REGCLASS_HAS_FP_REG (dstclass))
12286 return 4;
12288 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12289 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12291 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12292 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12293 return 9;
12295 if ((REGCLASS_HAS_FP_REG (dstclass)
12296 && REGCLASS_HAS_GENERAL_REG (srcclass))
12297 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12298 && REGCLASS_HAS_FP_REG (srcclass)))
12300 /* Discourage trying to use fp regs for a pointer. This also
12301 discourages fp regs with SImode because Pmode is an alias
12302 of SImode on this target. See PR target/48596. */
12303 int addend = (mode == Pmode) ? 40 : 0;
12305 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12306 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12309 if ((dstclass == FPUL_REGS
12310 && REGCLASS_HAS_GENERAL_REG (srcclass))
12311 || (srcclass == FPUL_REGS
12312 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12313 return 5;
12315 if ((dstclass == FPUL_REGS
12316 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12317 || (srcclass == FPUL_REGS
12318 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12319 return 7;
12321 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12322 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12323 return 20;
12325 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12326 if (TARGET_SHMEDIA
12327 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12329 if (sh_gettrcost >= 0)
12330 return sh_gettrcost;
12331 else if (!TARGET_PT_FIXED)
12332 return 100;
12335 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12336 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12337 return 4;
12339 if (TARGET_SHMEDIA
12340 || (TARGET_FMOVD
12341 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12342 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12343 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12345 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12348 static rtx
12349 emit_load_ptr (rtx reg, rtx addr)
12351 rtx mem = gen_const_mem (ptr_mode, addr);
12353 if (Pmode != ptr_mode)
12354 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12355 return emit_move_insn (reg, mem);
12358 static void
12359 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12360 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12361 tree function)
12363 CUMULATIVE_ARGS cum;
12364 int structure_value_byref = 0;
12365 rtx this_rtx, this_value, sibcall, funexp;
12366 rtx_insn *insns;
12367 tree funtype = TREE_TYPE (function);
12368 int simple_add = CONST_OK_FOR_ADD (delta);
12369 int did_load = 0;
12370 rtx scratch0, scratch1, scratch2;
12371 unsigned i;
12373 reload_completed = 1;
12374 epilogue_completed = 1;
12375 crtl->uses_only_leaf_regs = 1;
12377 emit_note (NOTE_INSN_PROLOGUE_END);
12379 /* Find the "this" pointer. We have such a wide range of ABIs for the
12380 SH that it's best to do this completely machine independently.
12381 "this" is passed as first argument, unless a structure return pointer
12382 comes first, in which case "this" comes second. */
12383 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12384 #ifndef PCC_STATIC_STRUCT_RETURN
12385 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12386 structure_value_byref = 1;
12387 #endif /* not PCC_STATIC_STRUCT_RETURN */
12388 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12390 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12392 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12394 this_rtx
12395 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12397 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12398 static chain pointer (even if you can't have nested virtual functions
12399 right now, someone might implement them sometime), and the rest of the
12400 registers are used for argument passing, are callee-saved, or reserved. */
12401 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12402 -ffixed-reg has been used. */
12403 if (! call_used_regs[0] || fixed_regs[0])
12404 error ("r0 needs to be available as a call-clobbered register");
12405 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12406 if (! TARGET_SH5)
12408 if (call_used_regs[1] && ! fixed_regs[1])
12409 scratch1 = gen_rtx_REG (ptr_mode, 1);
12410 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12411 pointing where to return struct values. */
12412 if (call_used_regs[3] && ! fixed_regs[3])
12413 scratch2 = gen_rtx_REG (Pmode, 3);
12415 else if (TARGET_SHMEDIA)
12417 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12418 if (i != REGNO (scratch0) &&
12419 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12421 scratch1 = gen_rtx_REG (ptr_mode, i);
12422 break;
12424 if (scratch1 == scratch0)
12425 error ("need a second call-clobbered general purpose register");
12426 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12427 if (call_used_regs[i] && ! fixed_regs[i])
12429 scratch2 = gen_rtx_REG (Pmode, i);
12430 break;
12432 if (scratch2 == scratch0)
12433 error ("need a call-clobbered target register");
12436 this_value = plus_constant (Pmode, this_rtx, delta);
12437 if (vcall_offset
12438 && (simple_add || scratch0 != scratch1)
12439 && strict_memory_address_p (ptr_mode, this_value))
12441 emit_load_ptr (scratch0, this_value);
12442 did_load = 1;
12445 if (!delta)
12446 ; /* Do nothing. */
12447 else if (simple_add)
12448 emit_move_insn (this_rtx, this_value);
12449 else
12451 emit_move_insn (scratch1, GEN_INT (delta));
12452 emit_insn (gen_add2_insn (this_rtx, scratch1));
12455 if (vcall_offset)
12457 rtx offset_addr;
12459 if (!did_load)
12460 emit_load_ptr (scratch0, this_rtx);
12462 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12463 if (strict_memory_address_p (ptr_mode, offset_addr))
12464 ; /* Do nothing. */
12465 else if (! TARGET_SH5 && scratch0 != scratch1)
12467 /* scratch0 != scratch1, and we have indexed loads. Get better
12468 schedule by loading the offset into r1 and using an indexed
12469 load - then the load of r1 can issue before the load from
12470 (this_rtx + delta) finishes. */
12471 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12472 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12474 else if (CONST_OK_FOR_ADD (vcall_offset))
12476 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12477 offset_addr = scratch0;
12479 else if (scratch0 != scratch1)
12481 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12482 emit_insn (gen_add2_insn (scratch0, scratch1));
12483 offset_addr = scratch0;
12485 else
12486 gcc_unreachable (); /* FIXME */
12487 emit_load_ptr (scratch0, offset_addr);
12489 if (Pmode != ptr_mode)
12490 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12491 emit_insn (gen_add2_insn (this_rtx, scratch0));
12494 /* Generate a tail call to the target function. */
12495 if (! TREE_USED (function))
12497 assemble_external (function);
12498 TREE_USED (function) = 1;
12500 funexp = XEXP (DECL_RTL (function), 0);
12501 /* If the function is overridden, so is the thunk, hence we don't
12502 need GOT addressing even if this is a public symbol. */
12503 #if 0
12504 if (TARGET_SH1 && ! flag_weak)
12505 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12506 else
12507 #endif
12508 if (TARGET_SH2 && flag_pic)
12510 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12511 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12513 else
12515 if (TARGET_SHMEDIA && flag_pic)
12517 funexp = gen_sym2PIC (funexp);
12518 PUT_MODE (funexp, Pmode);
12520 emit_move_insn (scratch2, funexp);
12521 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12522 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12524 sibcall = emit_call_insn (sibcall);
12525 SIBLING_CALL_P (sibcall) = 1;
12526 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12527 emit_barrier ();
12529 /* Run just enough of rest_of_compilation to do scheduling and get
12530 the insns emitted. Note that use_thunk calls
12531 assemble_start_function and assemble_end_function. */
12533 insns = get_insns ();
12535 if (optimize > 0)
12537 if (! cfun->cfg)
12538 init_flow (cfun);
12539 split_all_insns_noflow ();
12542 sh_reorg ();
12543 shorten_branches (insns);
12544 final_start_function (insns, file, 1);
12545 final (insns, file, 1);
12546 final_end_function ();
12548 reload_completed = 0;
12549 epilogue_completed = 0;
12553 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12555 rtx sym;
12557 /* If this is not an ordinary function, the name usually comes from a
12558 string literal or an sprintf buffer. Make sure we use the same
12559 string consistently, so that cse will be able to unify address loads. */
12560 if (kind != FUNCTION_ORDINARY)
12561 name = IDENTIFIER_POINTER (get_identifier (name));
12562 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12563 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12564 if (flag_pic)
12565 switch (kind)
12567 case FUNCTION_ORDINARY:
12568 break;
12569 case SFUNC_GOT:
12571 rtx reg = target ? target : gen_reg_rtx (Pmode);
12573 emit_insn (gen_symGOT2reg (reg, sym));
12574 sym = reg;
12575 break;
12577 case SFUNC_STATIC:
12579 /* ??? To allow cse to work, we use GOTOFF relocations.
12580 We could add combiner patterns to transform this into
12581 straight pc-relative calls with sym2PIC / bsrf when
12582 label load and function call are still 1:1 and in the
12583 same basic block during combine. */
12584 rtx reg = target ? target : gen_reg_rtx (Pmode);
12586 emit_insn (gen_symGOTOFF2reg (reg, sym));
12587 sym = reg;
12588 break;
12591 if (target && sym != target)
12593 emit_move_insn (target, sym);
12594 return target;
12596 return sym;
12599 /* Find the number of a general purpose register in S. */
12600 static int
12601 scavenge_reg (HARD_REG_SET *s)
12603 int r;
12604 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12605 if (TEST_HARD_REG_BIT (*s, r))
12606 return r;
12607 return -1;
12611 sh_get_pr_initial_val (void)
12613 rtx val;
12615 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12616 PR register on SHcompact, because it might be clobbered by the prologue.
12617 We check first if that is known to be the case. */
12618 if (TARGET_SHCOMPACT
12619 && ((crtl->args.info.call_cookie
12620 & ~ CALL_COOKIE_RET_TRAMP (1))
12621 || crtl->saves_all_registers))
12622 return gen_frame_mem (SImode, return_address_pointer_rtx);
12624 /* If we haven't finished rtl generation, there might be a nonlocal label
12625 that we haven't seen yet.
12626 ??? get_hard_reg_initial_val fails if it is called after register
12627 allocation has started, unless it has been called before for the
12628 same register. And even then, we end in trouble if we didn't use
12629 the register in the same basic block before. So call
12630 get_hard_reg_initial_val now and wrap it in an unspec if we might
12631 need to replace it. */
12632 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12633 combine can put the pseudo returned by get_hard_reg_initial_val into
12634 instructions that need a general purpose registers, which will fail to
12635 be recognized when the pseudo becomes allocated to PR. */
12637 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12638 if (TARGET_SH1)
12639 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12640 return val;
12643 bool
12644 sh_expand_t_scc (rtx operands[])
12646 enum rtx_code code = GET_CODE (operands[1]);
12647 rtx target = operands[0];
12648 rtx op0 = operands[2];
12649 rtx op1 = operands[3];
12650 rtx result = target;
12651 HOST_WIDE_INT val;
12653 if (!REG_P (op0) || REGNO (op0) != T_REG
12654 || !CONST_INT_P (op1))
12655 return false;
12656 if (!REG_P (result))
12657 result = gen_reg_rtx (SImode);
12658 val = INTVAL (op1);
12659 if ((code == EQ && val == 1) || (code == NE && val == 0))
12660 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12661 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12662 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12663 else if (code == EQ || code == NE)
12664 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12665 else
12666 return false;
12667 if (result != target)
12668 emit_move_insn (target, result);
12669 return true;
12672 /* INSN is an sfunc; return the rtx that describes the address used. */
12673 static rtx
12674 extract_sfunc_addr (rtx insn)
12676 rtx pattern, part = NULL_RTX;
12677 int len, i;
12679 pattern = PATTERN (insn);
12680 len = XVECLEN (pattern, 0);
12681 for (i = 0; i < len; i++)
12683 part = XVECEXP (pattern, 0, i);
12684 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12685 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12686 return XEXP (part, 0);
12688 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12689 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12692 /* Verify that the register in use_sfunc_addr still agrees with the address
12693 used in the sfunc. This prevents fill_slots_from_thread from changing
12694 use_sfunc_addr.
12695 INSN is the use_sfunc_addr instruction, and REG is the register it
12696 guards. */
12697 bool
12698 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12700 /* Search for the sfunc. It should really come right after INSN. */
12701 while ((insn = NEXT_INSN (insn)))
12703 if (LABEL_P (insn) || JUMP_P (insn))
12704 break;
12705 if (! INSN_P (insn))
12706 continue;
12708 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12709 insn = seq->insn (0);
12710 if (GET_CODE (PATTERN (insn)) != PARALLEL
12711 || get_attr_type (insn) != TYPE_SFUNC)
12712 continue;
12713 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12715 gcc_unreachable ();
12718 /* This function returns a constant rtx that represents 2**15 / pi in
12719 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12720 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12721 static GTY(()) rtx sh_fsca_sf2int_rtx;
12724 sh_fsca_sf2int (void)
12726 if (! sh_fsca_sf2int_rtx)
12728 REAL_VALUE_TYPE rv;
12730 real_from_string (&rv, "10430.378350470453");
12731 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12734 return sh_fsca_sf2int_rtx;
12737 /* This function returns a constant rtx that represents pi / 2**15 in
12738 SFmode. It's used to scale SFmode angles, in radians, to a
12739 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12740 maps to 0x10000. */
12741 static GTY(()) rtx sh_fsca_int2sf_rtx;
12744 sh_fsca_int2sf (void)
12746 if (! sh_fsca_int2sf_rtx)
12748 REAL_VALUE_TYPE rv;
12750 real_from_string (&rv, "9.587379924285257e-5");
12751 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12754 return sh_fsca_int2sf_rtx;
12757 /* Initialize the CUMULATIVE_ARGS structure. */
12758 void
12759 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12760 tree fntype,
12761 rtx libname ATTRIBUTE_UNUSED,
12762 tree fndecl,
12763 signed int n_named_args,
12764 machine_mode mode)
12766 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12767 pcum->free_single_fp_reg = 0;
12768 pcum->stack_regs = 0;
12769 pcum->byref_regs = 0;
12770 pcum->byref = 0;
12771 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12773 /* XXX - Should we check TARGET_HITACHI here ??? */
12774 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12776 if (fntype)
12778 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12779 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12780 pcum->prototype_p = prototype_p (fntype);
12781 pcum->arg_count [(int) SH_ARG_INT]
12782 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12784 pcum->call_cookie
12785 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12786 && pcum->arg_count [(int) SH_ARG_INT] == 0
12787 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12788 ? int_size_in_bytes (TREE_TYPE (fntype))
12789 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12790 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12791 == FIRST_RET_REG));
12793 else
12795 pcum->arg_count [(int) SH_ARG_INT] = 0;
12796 pcum->prototype_p = FALSE;
12797 if (mode != VOIDmode)
12799 pcum->call_cookie =
12800 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12801 && GET_MODE_SIZE (mode) > 4
12802 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12804 /* If the default ABI is the Renesas ABI then all library
12805 calls must assume that the library will be using the
12806 Renesas ABI. So if the function would return its result
12807 in memory then we must force the address of this memory
12808 block onto the stack. Ideally we would like to call
12809 targetm.calls.return_in_memory() here but we do not have
12810 the TYPE or the FNDECL available so we synthesize the
12811 contents of that function as best we can. */
12812 pcum->force_mem =
12813 (TARGET_DEFAULT & MASK_HITACHI)
12814 && (mode == BLKmode
12815 || (GET_MODE_SIZE (mode) > 4
12816 && !(mode == DFmode
12817 && TARGET_FPU_DOUBLE)));
12819 else
12821 pcum->call_cookie = 0;
12822 pcum->force_mem = FALSE;
12827 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12828 not enter into CONST_DOUBLE for the replace.
12830 Note that copying is not done so X must not be shared unless all copies
12831 are to be modified.
12833 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12834 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12835 replacements[n*2+1] - and that we take mode changes into account.
12837 If a replacement is ambiguous, return NULL_RTX.
12839 If MODIFY is zero, don't modify any rtl in place,
12840 just return zero or nonzero for failure / success. */
12842 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12844 int i, j;
12845 const char *fmt;
12847 /* The following prevents loops occurrence when we change MEM in
12848 CONST_DOUBLE onto the same CONST_DOUBLE. */
12849 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12850 return x;
12852 for (i = n_replacements - 1; i >= 0 ; i--)
12853 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12854 return replacements[i*2+1];
12856 /* Allow this function to make replacements in EXPR_LISTs. */
12857 if (x == NULL_RTX)
12858 return NULL_RTX;
12860 if (GET_CODE (x) == SUBREG)
12862 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12863 n_replacements, modify);
12865 if (CONST_INT_P (new_rtx))
12867 x = simplify_subreg (GET_MODE (x), new_rtx,
12868 GET_MODE (SUBREG_REG (x)),
12869 SUBREG_BYTE (x));
12870 if (! x)
12871 abort ();
12873 else if (modify)
12874 SUBREG_REG (x) = new_rtx;
12876 return x;
12878 else if (REG_P (x))
12880 unsigned regno = REGNO (x);
12881 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12882 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12883 rtx result = NULL_RTX;
12885 for (i = n_replacements - 1; i >= 0; i--)
12887 rtx from = replacements[i*2];
12888 rtx to = replacements[i*2+1];
12889 unsigned from_regno, from_nregs, to_regno, new_regno;
12891 if (!REG_P (from))
12892 continue;
12893 from_regno = REGNO (from);
12894 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12895 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12896 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12898 if (regno < from_regno
12899 || regno + nregs > from_regno + nregs
12900 || !REG_P (to)
12901 || result)
12902 return NULL_RTX;
12903 to_regno = REGNO (to);
12904 if (to_regno < FIRST_PSEUDO_REGISTER)
12906 new_regno = regno + to_regno - from_regno;
12907 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12908 != nregs)
12909 return NULL_RTX;
12910 result = gen_rtx_REG (GET_MODE (x), new_regno);
12912 else if (GET_MODE (x) <= GET_MODE (to))
12913 result = gen_lowpart_common (GET_MODE (x), to);
12914 else
12915 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12918 return result ? result : x;
12920 else if (GET_CODE (x) == ZERO_EXTEND)
12922 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12923 n_replacements, modify);
12925 if (CONST_INT_P (new_rtx))
12927 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12928 new_rtx, GET_MODE (XEXP (x, 0)));
12929 if (! x)
12930 abort ();
12932 else if (modify)
12933 XEXP (x, 0) = new_rtx;
12935 return x;
12938 fmt = GET_RTX_FORMAT (GET_CODE (x));
12939 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12941 rtx new_rtx;
12943 if (fmt[i] == 'e')
12945 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12946 n_replacements, modify);
12947 if (!new_rtx)
12948 return NULL_RTX;
12949 if (modify)
12950 XEXP (x, i) = new_rtx;
12952 else if (fmt[i] == 'E')
12953 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12955 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12956 n_replacements, modify);
12957 if (!new_rtx)
12958 return NULL_RTX;
12959 if (modify)
12960 XVECEXP (x, i, j) = new_rtx;
12964 return x;
12968 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
12970 enum rtx_code code = TRUNCATE;
12972 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12974 rtx inner = XEXP (x, 0);
12975 machine_mode inner_mode = GET_MODE (inner);
12977 if (inner_mode == mode)
12978 return inner;
12979 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12980 x = inner;
12981 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12982 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12984 code = GET_CODE (x);
12985 x = inner;
12988 return gen_rtx_fmt_e (code, mode, x);
12991 /* Look through X cleaning up truncates of registers that span multiple
12992 actual hard registers. Return the number of changes made. */
12994 shmedia_cleanup_truncate (rtx x)
12996 int n_changes = 0;
12997 subrtx_var_iterator::array_type array;
12998 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
13000 rtx x = *iter;
13001 if (GET_CODE (x) == TRUNCATE)
13003 rtx reg = XEXP (x, 0);
13004 machine_mode reg_mode = GET_MODE (reg);
13005 if (REG_P (reg) && GET_MODE_SIZE (reg_mode) > 8)
13007 int offset = subreg_lowpart_offset (DImode, reg_mode);
13008 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, offset);
13009 n_changes += 1;
13010 iter.skip_subrtxes ();
13014 return n_changes;
13017 /* Load and store depend on the highpart of the address. However,
13018 set_attr_alternative does not give well-defined results before reload,
13019 so we must look at the rtl ourselves to see if any of the feeding
13020 registers is used in a memref.
13022 Return true iff INSN contains a MEM. */
13023 bool
13024 sh_contains_memref_p (rtx insn)
13026 subrtx_iterator::array_type array;
13027 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13028 if (MEM_P (*iter))
13029 return true;
13030 return false;
13033 /* Return true iff INSN loads a banked register. */
13034 bool
13035 sh_loads_bankedreg_p (rtx insn)
13037 if (GET_CODE (PATTERN (insn)) == SET)
13039 rtx op = SET_DEST (PATTERN(insn));
13040 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13041 return true;
13044 return false;
13047 /* FNADDR is the MEM expression from a call expander. Return an address
13048 to use in an SHmedia insn pattern. */
13050 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13052 int is_sym;
13054 fnaddr = XEXP (fnaddr, 0);
13055 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13056 if (flag_pic && is_sym)
13058 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13060 rtx reg = gen_reg_rtx (Pmode);
13062 /* We must not use GOTPLT for sibcalls, because PIC_REG
13063 must be restored before the PLT code gets to run. */
13064 if (is_sibcall)
13065 emit_insn (gen_symGOT2reg (reg, fnaddr));
13066 else
13067 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13068 fnaddr = reg;
13070 else
13072 fnaddr = gen_sym2PIC (fnaddr);
13073 PUT_MODE (fnaddr, Pmode);
13076 /* If ptabs might trap, make this visible to the rest of the compiler.
13077 We generally assume that symbols pertain to valid locations, but
13078 it is possible to generate invalid symbols with asm or linker tricks.
13079 In a list of functions where each returns its successor, an invalid
13080 symbol might denote an empty list. */
13081 if (!TARGET_PT_FIXED
13082 && (!is_sym || TARGET_INVALID_SYMBOLS)
13083 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13085 rtx tr = gen_reg_rtx (PDImode);
13087 emit_insn (gen_ptabs (tr, fnaddr));
13088 fnaddr = tr;
13090 else if (! target_reg_operand (fnaddr, Pmode))
13091 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13092 return fnaddr;
13095 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13096 static reg_class_t
13097 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13099 if (rclass == NO_REGS
13100 && TARGET_SHMEDIA
13101 && (CONST_DOUBLE_P (x)
13102 || GET_CODE (x) == SYMBOL_REF
13103 || PIC_ADDR_P (x)))
13104 return GENERAL_REGS;
13106 return rclass;
13109 /* Implement TARGET_SECONDARY_RELOAD. */
13110 static reg_class_t
13111 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13112 machine_mode mode, secondary_reload_info *sri)
13114 enum reg_class rclass = (enum reg_class) rclass_i;
13116 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13117 && REG_P (XEXP (XEXP (x, 0), 0))
13118 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13119 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13121 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13122 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13124 if (REG_P (x) && REGNO (x) == GBR_REG)
13125 return NO_REGS;
13127 if (in_p)
13129 if (REGCLASS_HAS_FP_REG (rclass)
13130 && ! TARGET_SHMEDIA
13131 && immediate_operand ((x), mode)
13132 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
13133 switch (mode)
13135 case SFmode:
13136 sri->icode = CODE_FOR_reload_insf__frn;
13137 return NO_REGS;
13138 case DFmode:
13139 sri->icode = CODE_FOR_reload_indf__frn;
13140 return NO_REGS;
13141 case SImode:
13142 /* ??? If we knew that we are in the appropriate mode -
13143 single precision - we could use a reload pattern directly. */
13144 return FPUL_REGS;
13145 default:
13146 abort ();
13148 if (rclass == FPUL_REGS
13149 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13150 || REGNO (x) == T_REG))
13151 || GET_CODE (x) == PLUS))
13152 return GENERAL_REGS;
13153 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13155 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13156 return GENERAL_REGS;
13157 else if (mode == SFmode)
13158 return FP_REGS;
13159 sri->icode = CODE_FOR_reload_insi__i_fpul;
13160 return NO_REGS;
13162 if (rclass == FPSCR_REGS
13163 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13164 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13165 return GENERAL_REGS;
13166 if (REGCLASS_HAS_FP_REG (rclass)
13167 && TARGET_SHMEDIA
13168 && immediate_operand (x, mode)
13169 && x != CONST0_RTX (GET_MODE (x))
13170 && GET_MODE (x) != V4SFmode)
13171 return GENERAL_REGS;
13172 if ((mode == QImode || mode == HImode)
13173 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13175 sri->icode = ((mode == QImode)
13176 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13177 return NO_REGS;
13179 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13180 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13181 return TARGET_REGS;
13182 } /* end of input-only processing. */
13184 if (((REGCLASS_HAS_FP_REG (rclass)
13185 && (REG_P (x)
13186 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13187 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13188 && TARGET_FMOVD))))
13189 || (REGCLASS_HAS_GENERAL_REG (rclass)
13190 && REG_P (x)
13191 && FP_REGISTER_P (REGNO (x))))
13192 && ! TARGET_SHMEDIA
13193 && (mode == SFmode || mode == SImode))
13194 return FPUL_REGS;
13195 if ((rclass == FPUL_REGS
13196 || (REGCLASS_HAS_FP_REG (rclass)
13197 && ! TARGET_SHMEDIA && mode == SImode))
13198 && (MEM_P (x)
13199 || (REG_P (x)
13200 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13201 || REGNO (x) == T_REG
13202 || system_reg_operand (x, VOIDmode)))))
13204 if (rclass == FPUL_REGS)
13205 return GENERAL_REGS;
13206 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
13208 if ((rclass == TARGET_REGS
13209 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13210 && !satisfies_constraint_Csy (x)
13211 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13212 return GENERAL_REGS;
13213 if ((rclass == MAC_REGS || rclass == PR_REGS)
13214 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13215 && rclass != REGNO_REG_CLASS (REGNO (x)))
13216 return GENERAL_REGS;
13217 if (rclass != GENERAL_REGS && REG_P (x)
13218 && TARGET_REGISTER_P (REGNO (x)))
13219 return GENERAL_REGS;
13221 /* If here fall back to loading FPUL register through general registers.
13222 This case can happen when movsi_ie insn is picked initially to
13223 load/store the FPUL register from/to another register, and then the
13224 other register is allocated on the stack. */
13225 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13226 return GENERAL_REGS;
13228 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13229 the other operand.
13230 On SH2A could also just leave it alone here, which would result in a
13231 4 byte move insn being generated instead. However, for this to work
13232 the insns must have the appropriate alternatives. */
13233 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13234 && satisfies_constraint_Sdd (x)
13235 && sh_disp_addr_displacement (x)
13236 <= sh_max_mov_insn_displacement (mode, false))
13237 return R0_REGS;
13239 /* When reload is trying to address a QImode or HImode subreg on the stack,
13240 force any subreg byte into R0_REGS, as this is going to become a
13241 displacement address.
13242 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13243 is on the stack, the memref to it might already require a displacement
13244 and that has to be added to the final address. At this point we don't
13245 know the cumulative displacement so we assume the worst case. */
13246 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13247 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13248 return R0_REGS;
13250 return NO_REGS;
13253 /* Return true if SUBST can't safely replace its equivalent during RA. */
13254 static bool
13255 sh_cannot_substitute_mem_equiv_p (rtx)
13257 if (TARGET_SHMEDIA)
13258 return false;
13260 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
13261 uses R0 and may cause spill failure when R0 is already used.
13262 We have to return true for that case at least.
13263 Moreover SH has strong R0 parity and also have not enough numbers of
13264 the hard registers to make the equiv substitution win in the size
13265 and the speed on average working sets. The pseudos produced to
13266 hold the equiv values can't get good hard registers for bad cases
13267 and end up memory save/restore insns which make the code worse. */
13268 return true;
13271 /* Return true if DISP can be legitimized. */
13272 static bool
13273 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
13274 machine_mode mode)
13276 if (TARGET_SHMEDIA)
13277 return false;
13279 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
13280 || (TARGET_SH2E && mode == SFmode))
13281 return false;
13283 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
13284 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
13286 *disp = adj.mov_disp;
13287 *offs = adj.offset_adjust;
13288 return true;
13291 return false;
13294 static void
13295 sh_conditional_register_usage (void)
13297 int regno;
13298 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13299 if (! VALID_REGISTER_P (regno))
13300 fixed_regs[regno] = call_used_regs[regno] = 1;
13301 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13302 if (TARGET_SH5)
13304 call_used_regs[FIRST_GENERAL_REG + 8]
13305 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13306 call_really_used_regs[FIRST_GENERAL_REG + 8]
13307 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13309 if (TARGET_SHMEDIA)
13311 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13312 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13313 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13315 if (flag_pic)
13317 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13318 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13320 /* Renesas saves and restores mac registers on call. */
13321 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13323 call_really_used_regs[MACH_REG] = 0;
13324 call_really_used_regs[MACL_REG] = 0;
13327 if (TARGET_SHMEDIA)
13329 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13330 if (! fixed_regs[regno] && call_really_used_regs[regno])
13331 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13333 else
13334 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13335 if (! fixed_regs[regno] && call_really_used_regs[regno])
13336 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13338 call_really_used_regs[FPSCR_MODES_REG] = 0;
13339 call_really_used_regs[FPSCR_STAT_REG] = 0;
13342 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13344 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13345 static bool
13346 sh_legitimate_constant_p (machine_mode mode, rtx x)
13348 return (TARGET_SHMEDIA
13349 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13350 || x == CONST0_RTX (mode)
13351 || !TARGET_SHMEDIA_FPU
13352 || TARGET_SHMEDIA64)
13353 : (GET_CODE (x) != CONST_DOUBLE
13354 || mode == DFmode || mode == SFmode
13355 || mode == DImode || GET_MODE (x) == VOIDmode));
13358 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13360 static void
13361 sh_init_sync_libfuncs (void)
13363 init_sync_libfuncs (UNITS_PER_WORD);
13366 /* Return true if it is appropriate to emit `ret' instructions in the
13367 body of a function. */
13368 bool
13369 sh_can_use_simple_return_p (void)
13371 HARD_REG_SET live_regs_mask;
13372 int d;
13374 /* Some targets require special return insns. */
13375 if (TARGET_SHMEDIA
13376 || (TARGET_SHCOMPACT
13377 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13378 return false;
13380 if (! reload_completed || frame_pointer_needed)
13381 return false;
13383 /* Moving prologue around does't reduce the size. */
13384 if (optimize_function_for_size_p (cfun))
13385 return false;
13387 /* Finally, allow for pr save. */
13388 d = calc_live_regs (&live_regs_mask);
13390 if (rounded_frame_size (d) > 4)
13391 return false;
13393 return true;
13396 /*------------------------------------------------------------------------------
13397 Address mode optimization support code
13400 typedef HOST_WIDE_INT disp_t;
13401 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13402 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13403 static const disp_t INVALID_DISP = MAX_DISP;
13405 /* A memory reference which is described by a base register and a
13406 displacement. */
13407 class base_reg_disp
13409 public:
13410 base_reg_disp (rtx br, disp_t d);
13412 bool is_reg (void) const;
13413 bool is_disp (void) const;
13414 rtx reg (void) const;
13415 disp_t disp (void) const;
13417 private:
13418 rtx reg_;
13419 disp_t disp_;
13422 inline
13423 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13424 : reg_ (br), disp_ (d)
13428 inline bool
13429 base_reg_disp::is_reg (void) const
13431 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13434 inline bool
13435 base_reg_disp::is_disp (void) const
13437 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13440 inline rtx
13441 base_reg_disp::reg (void) const
13443 return reg_;
13446 inline disp_t
13447 base_reg_disp::disp (void) const
13449 return disp_;
13452 /* Find the base register and calculate the displacement for a given
13453 address rtx 'x'. */
13454 static base_reg_disp
13455 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
13456 rtx base_reg = NULL)
13458 if (REG_P (x))
13460 if (REGNO (x) == GBR_REG)
13461 return base_reg_disp (x, disp);
13463 /* We've reached a hard-reg. This is probably the point where
13464 function args are copied to pseudos. Do not go any further and
13465 stick to the pseudo. If the original mem addr was in a hard reg
13466 from the beginning, it will become the base reg. */
13467 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13468 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13470 /* Find the def of the reg and trace it. If there are more than one
13471 defs and they are not the same, assume it's not safe to proceed. */
13472 rtx_insn* last_i = NULL;
13473 rtx last_set = NULL;
13474 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
13475 d = DF_REF_NEXT_REG (d))
13477 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
13479 /* Accept multiple defs, as long as they are equal. */
13480 if (last_set == NULL || rtx_equal_p (last_set, set))
13482 last_i = DF_REF_INSN (d);
13483 last_set = set;
13485 else
13487 last_i = NULL;
13488 last_set = NULL;
13489 break;
13493 if (last_set != NULL && last_i != NULL)
13494 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
13495 XEXP (last_set, 0));
13497 /* When here, no previous insn was found that sets the reg.
13498 The input reg is already the base reg. */
13499 return base_reg_disp (x, disp);
13502 else if (GET_CODE (x) == PLUS)
13504 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13505 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13507 /* Either left or right val must be a reg.
13508 We don't handle the case of 'reg + reg' here. */
13509 if (left_val.is_reg () && right_val.is_disp ())
13510 return base_reg_disp (left_val.reg (), left_val.disp ()
13511 + right_val.disp () + disp);
13512 else if (right_val.is_reg () && left_val.is_disp ())
13513 return base_reg_disp (right_val.reg (), right_val.disp ()
13514 + left_val.disp () + disp);
13515 else
13516 return base_reg_disp (base_reg, disp);
13519 else if (CONST_INT_P (x))
13520 return base_reg_disp (NULL, disp + INTVAL (x));
13522 /* Didn't find anything useful. */
13523 return base_reg_disp (base_reg, disp);
13526 /* Given an insn and a memory operand, try to find an equivalent GBR
13527 based memory address and return the corresponding new memory address.
13528 Return NULL_RTX if not found. */
13530 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
13532 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
13533 return NULL_RTX;
13535 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13536 if (side_effects_p (XEXP (mem, 0)))
13537 return NULL_RTX;
13539 /* When not optimizing there might be no dataflow available. */
13540 if (df == NULL)
13541 return NULL_RTX;
13543 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13545 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13547 /* If GBR is marked as call clobbered we bail out if we see a call.
13548 FIXME: Actually should check if this mem refers to the gbr value
13549 before or after the call. If there is a store_gbr preceeding this
13550 mem, it's safe to use GBR for this mem.
13552 If GBR is not marked as call clobbered, but there is some other
13553 def than a call, it's probably a load_gbr upon which we also
13554 bail out to be on the safe side.
13555 FIXME: Should check if we have a use-after-def case, such as
13556 the call case above. */
13557 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
13558 d = DF_REF_NEXT_REG (d))
13560 if (CALL_P (DF_REF_INSN (d)))
13562 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
13563 return NULL_RTX;
13564 else
13565 continue;
13567 else
13568 return NULL_RTX;
13571 rtx disp = GEN_INT (gbr_disp.disp ());
13572 if (gbr_displacement (disp, GET_MODE (mem)))
13573 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13576 return NULL_RTX;
13579 /*------------------------------------------------------------------------------
13580 Manual insn combine support code.
13583 /* Return true if the specified insn contains any UNSPECs or
13584 UNSPEC_VOLATILEs. */
13585 static bool
13586 sh_unspec_insn_p (rtx_insn* insn)
13588 bool result = false;
13590 struct note_uses_func
13592 static void
13593 func (rtx* x, void* data)
13595 if (GET_CODE (*x) == UNSPEC || GET_CODE (*x) == UNSPEC_VOLATILE)
13596 *(static_cast<bool*> (data)) = true;
13600 note_uses (&PATTERN (insn), note_uses_func::func, &result);
13601 return result;
13604 /* Return true if the register operands of the specified insn are modified
13605 between the specified from and to insns (exclusive of those two). */
13606 static bool
13607 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
13608 const rtx_insn* from,
13609 const rtx_insn* to)
13611 /* FIXME: Return true for multiple sets for now. */
13612 rtx s = single_set (operands_insn);
13613 if (s == NULL_RTX)
13614 return true;
13616 subrtx_iterator::array_type array;
13617 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
13618 if ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to))
13619 return true;
13621 return false;
13624 /* Given an op rtx and an insn, try to find out whether the result of the
13625 specified op consists only of logical operations on T bit stores. */
13626 bool
13627 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
13629 if (!logical_operator (op, SImode))
13630 return false;
13632 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13633 int op_is_t_count = 0;
13635 for (int i = 0; i < 2; ++i)
13637 if (t_reg_operand (ops[i], VOIDmode)
13638 || negt_reg_operand (ops[i], VOIDmode))
13639 op_is_t_count++;
13641 else
13643 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13644 prev_nonnote_insn_bb);
13645 if (op_set.set_src == NULL_RTX)
13646 continue;
13648 if (t_reg_operand (op_set.set_src, VOIDmode)
13649 || negt_reg_operand (op_set.set_src, VOIDmode)
13650 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13651 op_is_t_count++;
13655 return op_is_t_count == 2;
13658 /* Given the operand that is extended in a sign/zero extend insn, and the
13659 insn, try to figure out whether the sign/zero extension can be replaced
13660 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13661 NULL_RTX otherwise. */
13663 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
13665 if (REG_P (extended_op))
13666 extended_op = extended_op;
13667 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13668 extended_op = SUBREG_REG (extended_op);
13669 else
13670 return NULL_RTX;
13672 /* Reg moves must be of the same mode. */
13673 if (GET_MODE (extended_op) != SImode)
13674 return NULL_RTX;
13676 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13677 if (s.set_src == NULL_RTX)
13678 return NULL_RTX;
13680 if (t_reg_operand (s.set_src, VOIDmode)
13681 || negt_reg_operand (s.set_src, VOIDmode))
13682 return extended_op;
13684 /* If the zero extended reg was formed by a logical operation, check the
13685 operands of the logical operation. If both originated from T bit
13686 stores the zero extension can be eliminated. */
13687 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13688 return extended_op;
13690 return NULL_RTX;
13693 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
13694 figure out whether it should be converted into a movt-xor sequence in
13695 the movrt_negc splitter.
13696 Returns true if insns have been modified and the splitter has succeeded. */
13697 bool
13698 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
13700 /* In cases such as
13701 tst r4,r4
13702 mov #-1,r1
13703 negc r1,r1
13704 tst r4,r4
13705 we can replace the T bit clobbering negc with a movt-xor sequence and
13706 eliminate the redundant comparison.
13707 Because the xor insn depends on register allocation results, allow this
13708 only before reload. */
13709 if (!can_create_pseudo_p ())
13710 return false;
13712 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13713 prev_nonnote_insn_bb);
13714 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13715 next_nonnote_insn_bb);
13717 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
13718 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
13719 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
13720 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
13721 t_before_negc.insn,
13722 t_after_negc.insn)
13723 && !sh_unspec_insn_p (t_after_negc.insn)
13724 && !volatile_insn_p (PATTERN (t_after_negc.insn))
13725 && !side_effects_p (PATTERN (t_after_negc.insn))
13726 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
13728 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
13729 set_insn_deleted (t_after_negc.insn);
13730 return true;
13732 else
13733 return false;
13736 static void
13737 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
13738 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
13740 if ((TARGET_SH4A_FP || TARGET_SH4_300)
13741 && prev_mode != FP_MODE_NONE && prev_mode != mode)
13743 emit_insn (gen_toggle_pr ());
13744 if (TARGET_FMOVD)
13745 emit_insn (gen_toggle_sz ());
13747 else if (mode != FP_MODE_NONE)
13749 rtx tmp = gen_reg_rtx (SImode);
13750 emit_insn (gen_sts_fpscr (tmp));
13751 rtx i = NULL;
13753 const unsigned HOST_WIDE_INT fpbits =
13754 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
13756 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
13757 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
13758 else if (mode == FP_MODE_SINGLE)
13759 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
13760 else if (mode == FP_MODE_DOUBLE)
13761 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
13762 else
13763 gcc_unreachable ();
13765 emit_insn (i);
13766 emit_insn (gen_lds_fpscr (tmp));
13770 static int
13771 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
13773 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
13776 static int
13777 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
13779 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
13780 get_attr_fp_set (insn) != FP_SET_NONE)
13781 return (int) get_attr_fp_set (insn);
13782 else
13783 return mode;
13786 static int
13787 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
13789 return NORMAL_MODE (entity);
13792 static int
13793 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
13795 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
13798 static int
13799 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
13801 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
13804 /* Return true if we use LRA instead of reload pass. */
13805 static bool
13806 sh_lra_p (void)
13808 return sh_lra_flag;
13811 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
13813 static bool
13814 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
13815 unsigned int align,
13816 enum by_pieces_operation op,
13817 bool speed_p)
13819 switch (op)
13821 case MOVE_BY_PIECES:
13822 return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
13823 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
13824 case STORE_BY_PIECES:
13825 case SET_BY_PIECES:
13826 return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
13827 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
13828 default:
13829 return default_use_by_pieces_infrastructure_p (size, align,
13830 op, speed_p);
13834 #include "gt-sh.h"