PR target/64761
[official-gcc.git] / gcc / config / sh / sh.c
blobf8434d1fbcc0e614acfad5dc2c97d9f86910c176
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2015 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
23 #include <vector>
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "insn-config.h"
30 #include "rtl.h"
31 #include "hash-set.h"
32 #include "machmode.h"
33 #include "vec.h"
34 #include "double-int.h"
35 #include "input.h"
36 #include "alias.h"
37 #include "symtab.h"
38 #include "wide-int.h"
39 #include "inchash.h"
40 #include "tree.h"
41 #include "fold-const.h"
42 #include "stringpool.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "flags.h"
47 #include "hashtab.h"
48 #include "hard-reg-set.h"
49 #include "function.h"
50 #include "statistics.h"
51 #include "real.h"
52 #include "fixed-value.h"
53 #include "expmed.h"
54 #include "dojump.h"
55 #include "explow.h"
56 #include "emit-rtl.h"
57 #include "stmt.h"
58 #include "expr.h"
59 #include "insn-codes.h"
60 #include "optabs.h"
61 #include "reload.h"
62 #include "regs.h"
63 #include "output.h"
64 #include "insn-attr.h"
65 #include "diagnostic-core.h"
66 #include "recog.h"
67 #include "dwarf2.h"
68 #include "tm_p.h"
69 #include "target.h"
70 #include "target-def.h"
71 #include "langhooks.h"
72 #include "predict.h"
73 #include "dominance.h"
74 #include "cfg.h"
75 #include "cfgrtl.h"
76 #include "cfganal.h"
77 #include "lcm.h"
78 #include "cfgbuild.h"
79 #include "cfgcleanup.h"
80 #include "basic-block.h"
81 #include "df.h"
82 #include "intl.h"
83 #include "sched-int.h"
84 #include "params.h"
85 #include "ggc.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "is-a.h"
93 #include "gimple.h"
94 #include "gimplify.h"
95 #include "cfgloop.h"
96 #include "alloc-pool.h"
97 #include "tm-constrs.h"
98 #include "opts.h"
99 #include "tree-pass.h"
100 #include "pass_manager.h"
101 #include "context.h"
102 #include "builtins.h"
103 #include "rtl-iter.h"
105 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
107 /* These are some macros to abstract register modes. */
108 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
109 && ((HOST_WIDE_INT)(VALUE)) <= 511)
111 #define CONST_OK_FOR_ADD(size) \
112 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
113 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
114 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
115 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
117 /* Used to simplify the logic below. Find the attributes wherever
118 they may be. */
119 #define SH_ATTRIBUTES(decl) \
120 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
121 : DECL_ATTRIBUTES (decl) \
122 ? (DECL_ATTRIBUTES (decl)) \
123 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
125 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
126 int current_function_interrupt;
128 tree sh_deferred_function_attributes;
129 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
131 /* Global variables for machine-dependent things. */
133 /* Which cpu are we scheduling for. */
134 enum processor_type sh_cpu;
136 /* Definitions used in ready queue reordering for first scheduling pass. */
138 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
139 static short *regmode_weight[2];
141 /* Total SFmode and SImode weights of scheduled insns. */
142 static int curr_regmode_pressure[2];
144 /* Number of r0 life regions. */
145 static int r0_life_regions;
147 /* If true, skip cycles for Q -> R movement. */
148 static int skip_cycles = 0;
150 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
151 and returned from sh_reorder2. */
152 static short cached_can_issue_more;
154 /* Unique number for UNSPEC_BBR pattern. */
155 static unsigned int unspec_bbr_uid = 1;
157 /* Provides the class number of the smallest class containing
158 reg number. */
159 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
161 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
165 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
166 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
167 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
168 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
169 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
170 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
171 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
172 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
173 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
174 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
175 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
176 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
177 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
178 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
179 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
180 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
181 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
182 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
183 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
184 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
185 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
186 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
187 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
188 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
189 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
190 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
191 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
192 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
193 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
194 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
195 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
196 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
197 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
198 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
199 GENERAL_REGS, GENERAL_REGS,
202 char sh_register_names[FIRST_PSEUDO_REGISTER] \
203 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
205 char sh_additional_register_names[ADDREGNAMES_SIZE] \
206 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
207 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
209 int assembler_dialect;
211 static bool shmedia_space_reserved_for_target_registers;
213 static void split_branches (rtx_insn *);
214 static int branch_dest (rtx);
215 static void print_slot (rtx_sequence *);
216 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
217 static void dump_table (rtx_insn *, rtx_insn *);
218 static bool broken_move (rtx_insn *);
219 static bool mova_p (rtx_insn *);
220 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
221 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
222 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
223 static void sh_reorg (void);
224 static void sh_option_override (void);
225 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
226 static rtx_insn *frame_insn (rtx);
227 static rtx push (int);
228 static void pop (int);
229 static void push_regs (HARD_REG_SET *, int);
230 static int calc_live_regs (HARD_REG_SET *);
231 static HOST_WIDE_INT rounded_frame_size (int);
232 static bool sh_frame_pointer_required (void);
233 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
234 static int sh_mode_needed (int, rtx_insn *);
235 static int sh_mode_after (int, int, rtx_insn *);
236 static int sh_mode_entry (int);
237 static int sh_mode_exit (int);
238 static int sh_mode_priority (int entity, int n);
239 static bool sh_lra_p (void);
241 static rtx mark_constant_pool_use (rtx);
242 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
243 int, bool *);
244 static tree sh_handle_resbank_handler_attribute (tree *, tree,
245 tree, int, bool *);
246 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
247 tree, int, bool *);
248 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
249 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
250 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
251 static void sh_print_operand (FILE *, rtx, int);
252 static void sh_print_operand_address (FILE *, rtx);
253 static bool sh_print_operand_punct_valid_p (unsigned char code);
254 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
255 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
256 static void sh_insert_attributes (tree, tree *);
257 static const char *sh_check_pch_target_flags (int);
258 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
259 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
260 static int sh_issue_rate (void);
261 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
262 static short find_set_regmode_weight (rtx, machine_mode);
263 static short find_insn_regmode_weight (rtx, machine_mode);
264 static void find_regmode_weight (basic_block, machine_mode);
265 static int find_r0_life_regions (basic_block);
266 static void sh_md_init_global (FILE *, int, int);
267 static void sh_md_finish_global (FILE *, int);
268 static int rank_for_reorder (const void *, const void *);
269 static void swap_reorder (rtx_insn **, int);
270 static void ready_reorder (rtx_insn **, int);
271 static bool high_pressure (machine_mode);
272 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
273 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
274 static void sh_md_init (FILE *, int, int);
275 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
277 static bool sh_function_ok_for_sibcall (tree, tree);
279 static bool sh_cannot_modify_jumps_p (void);
280 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
281 static reg_class_t sh_target_reg_class (void);
282 static bool sh_optimize_target_register_callee_saved (bool);
283 static bool sh_ms_bitfield_layout_p (const_tree);
285 static void sh_init_builtins (void);
286 static tree sh_builtin_decl (unsigned, bool);
287 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
288 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
289 HOST_WIDE_INT, tree);
290 static void sh_file_start (void);
291 static bool flow_dependent_p (rtx, rtx);
292 static void flow_dependent_p_1 (rtx, const_rtx, void *);
293 static int shiftcosts (rtx);
294 static int and_xor_ior_costs (rtx, int);
295 static int addsubcosts (rtx);
296 static int multcosts (rtx);
297 static bool unspec_caller_rtx_p (rtx);
298 static bool sh_cannot_copy_insn_p (rtx_insn *);
299 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
300 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
301 static int sh_pr_n_sets (void);
302 static rtx sh_allocate_initial_value (rtx);
303 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
304 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
305 machine_mode,
306 struct secondary_reload_info *);
307 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
308 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
309 static rtx sh_delegitimize_address (rtx);
310 static bool sh_cannot_substitute_mem_equiv_p (rtx);
311 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
312 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
313 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
314 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
315 static int scavenge_reg (HARD_REG_SET *s);
316 struct save_schedule_s;
317 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
318 struct save_schedule_s *, int);
320 static rtx sh_struct_value_rtx (tree, int);
321 static rtx sh_function_value (const_tree, const_tree, bool);
322 static bool sh_function_value_regno_p (const unsigned int);
323 static rtx sh_libcall_value (machine_mode, const_rtx);
324 static bool sh_return_in_memory (const_tree, const_tree);
325 static rtx sh_builtin_saveregs (void);
326 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
327 tree, int *, int);
328 static bool sh_strict_argument_naming (cumulative_args_t);
329 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
330 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
331 static tree sh_build_builtin_va_list (void);
332 static void sh_va_start (tree, rtx);
333 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
334 static bool sh_promote_prototypes (const_tree);
335 static machine_mode sh_promote_function_mode (const_tree type,
336 machine_mode,
337 int *punsignedp,
338 const_tree funtype,
339 int for_return);
340 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
341 const_tree, bool);
342 static bool sh_callee_copies (cumulative_args_t, machine_mode,
343 const_tree, bool);
344 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
345 tree, bool);
346 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
347 const_tree, bool);
348 static rtx sh_function_arg (cumulative_args_t, machine_mode,
349 const_tree, bool);
350 static bool sh_scalar_mode_supported_p (machine_mode);
351 static int sh_dwarf_calling_convention (const_tree);
352 static void sh_encode_section_info (tree, rtx, int);
353 static bool sh2a_function_vector_p (tree);
354 static void sh_trampoline_init (rtx, tree, rtx);
355 static rtx sh_trampoline_adjust_address (rtx);
356 static void sh_conditional_register_usage (void);
357 static bool sh_legitimate_constant_p (machine_mode, rtx);
358 static int mov_insn_size (machine_mode, bool);
359 static int mov_insn_alignment_mask (machine_mode, bool);
360 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
361 unsigned int,
362 enum by_pieces_operation,
363 bool);
364 static bool sequence_insn_p (rtx_insn *);
365 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
366 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
367 machine_mode, bool);
368 static bool sh_legitimate_combined_insn (rtx_insn* insn);
370 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
372 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
374 static const struct attribute_spec sh_attribute_table[] =
376 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
377 affects_type_identity } */
378 { "interrupt_handler", 0, 0, true, false, false,
379 sh_handle_interrupt_handler_attribute, false },
380 { "sp_switch", 1, 1, true, false, false,
381 sh_handle_sp_switch_attribute, false },
382 { "trap_exit", 1, 1, true, false, false,
383 sh_handle_trap_exit_attribute, false },
384 { "renesas", 0, 0, false, true, false,
385 sh_handle_renesas_attribute, false },
386 { "trapa_handler", 0, 0, true, false, false,
387 sh_handle_interrupt_handler_attribute, false },
388 { "nosave_low_regs", 0, 0, true, false, false,
389 sh_handle_interrupt_handler_attribute, false },
390 { "resbank", 0, 0, true, false, false,
391 sh_handle_resbank_handler_attribute, false },
392 { "function_vector", 1, 1, true, false, false,
393 sh2a_handle_function_vector_handler_attribute, false },
394 { NULL, 0, 0, false, false, false, NULL, false }
397 /* Initialize the GCC target structure. */
398 #undef TARGET_ATTRIBUTE_TABLE
399 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
401 /* The next two are used for debug info when compiling with -gdwarf. */
402 #undef TARGET_ASM_UNALIGNED_HI_OP
403 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
404 #undef TARGET_ASM_UNALIGNED_SI_OP
405 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
407 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
408 #undef TARGET_ASM_UNALIGNED_DI_OP
409 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
410 #undef TARGET_ASM_ALIGNED_DI_OP
411 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
413 #undef TARGET_OPTION_OVERRIDE
414 #define TARGET_OPTION_OVERRIDE sh_option_override
416 #undef TARGET_PRINT_OPERAND
417 #define TARGET_PRINT_OPERAND sh_print_operand
418 #undef TARGET_PRINT_OPERAND_ADDRESS
419 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
420 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
421 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
422 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
423 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
425 #undef TARGET_ASM_FUNCTION_EPILOGUE
426 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
428 #undef TARGET_ASM_OUTPUT_MI_THUNK
429 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
431 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
432 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
433 hook_bool_const_tree_hwi_hwi_const_tree_true
435 #undef TARGET_ASM_FILE_START
436 #define TARGET_ASM_FILE_START sh_file_start
437 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
438 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
440 #undef TARGET_REGISTER_MOVE_COST
441 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
443 #undef TARGET_INSERT_ATTRIBUTES
444 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
446 #undef TARGET_SCHED_ADJUST_COST
447 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
449 #undef TARGET_SCHED_ISSUE_RATE
450 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
452 /* The next 5 hooks have been implemented for reenabling sched1. With the
453 help of these macros we are limiting the movement of insns in sched1 to
454 reduce the register pressure. The overall idea is to keep count of SImode
455 and SFmode regs required by already scheduled insns. When these counts
456 cross some threshold values; give priority to insns that free registers.
457 The insn that frees registers is most likely to be the insn with lowest
458 LUID (original insn order); but such an insn might be there in the stalled
459 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
460 up to a max of 8 cycles so that such insns may move from Q -> R.
462 The description of the hooks are as below:
464 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
465 scheduler; it is called inside the sched_init function just after
466 find_insn_reg_weights function call. It is used to calculate the SImode
467 and SFmode weights of insns of basic blocks; much similar to what
468 find_insn_reg_weights does.
469 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
471 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
472 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
473 (Q)->(R).
475 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
476 high; reorder the ready queue so that the insn with lowest LUID will be
477 issued next.
479 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
480 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
482 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
483 can be returned from TARGET_SCHED_REORDER2.
485 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
487 #undef TARGET_SCHED_DFA_NEW_CYCLE
488 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
490 #undef TARGET_SCHED_INIT_GLOBAL
491 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
493 #undef TARGET_SCHED_FINISH_GLOBAL
494 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
496 #undef TARGET_SCHED_VARIABLE_ISSUE
497 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
499 #undef TARGET_SCHED_REORDER
500 #define TARGET_SCHED_REORDER sh_reorder
502 #undef TARGET_SCHED_REORDER2
503 #define TARGET_SCHED_REORDER2 sh_reorder2
505 #undef TARGET_SCHED_INIT
506 #define TARGET_SCHED_INIT sh_md_init
508 #undef TARGET_DELEGITIMIZE_ADDRESS
509 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
511 #undef TARGET_LEGITIMIZE_ADDRESS
512 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
514 #undef TARGET_CANNOT_MODIFY_JUMPS_P
515 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
516 #undef TARGET_CAN_FOLLOW_JUMP
517 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
518 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
519 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
520 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
521 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
522 sh_optimize_target_register_callee_saved
524 #undef TARGET_MS_BITFIELD_LAYOUT_P
525 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
527 #undef TARGET_INIT_BUILTINS
528 #define TARGET_INIT_BUILTINS sh_init_builtins
529 #undef TARGET_BUILTIN_DECL
530 #define TARGET_BUILTIN_DECL sh_builtin_decl
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
534 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
535 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
537 #undef TARGET_CANNOT_COPY_INSN_P
538 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
539 #undef TARGET_RTX_COSTS
540 #define TARGET_RTX_COSTS sh_rtx_costs
541 #undef TARGET_ADDRESS_COST
542 #define TARGET_ADDRESS_COST sh_address_cost
543 #undef TARGET_ALLOCATE_INITIAL_VALUE
544 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
546 #undef TARGET_MACHINE_DEPENDENT_REORG
547 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
549 #undef TARGET_DWARF_REGISTER_SPAN
550 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
552 #ifdef HAVE_AS_TLS
553 #undef TARGET_HAVE_TLS
554 #define TARGET_HAVE_TLS true
555 #endif
557 #undef TARGET_PROMOTE_PROTOTYPES
558 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
559 #undef TARGET_PROMOTE_FUNCTION_MODE
560 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
562 #undef TARGET_FUNCTION_VALUE
563 #define TARGET_FUNCTION_VALUE sh_function_value
564 #undef TARGET_FUNCTION_VALUE_REGNO_P
565 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
566 #undef TARGET_LIBCALL_VALUE
567 #define TARGET_LIBCALL_VALUE sh_libcall_value
568 #undef TARGET_STRUCT_VALUE_RTX
569 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
570 #undef TARGET_RETURN_IN_MEMORY
571 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
573 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
574 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
575 #undef TARGET_SETUP_INCOMING_VARARGS
576 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
577 #undef TARGET_STRICT_ARGUMENT_NAMING
578 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
579 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
580 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
581 #undef TARGET_MUST_PASS_IN_STACK
582 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
583 #undef TARGET_PASS_BY_REFERENCE
584 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
585 #undef TARGET_CALLEE_COPIES
586 #define TARGET_CALLEE_COPIES sh_callee_copies
587 #undef TARGET_ARG_PARTIAL_BYTES
588 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
589 #undef TARGET_FUNCTION_ARG
590 #define TARGET_FUNCTION_ARG sh_function_arg
591 #undef TARGET_FUNCTION_ARG_ADVANCE
592 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
594 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
595 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
597 #undef TARGET_BUILD_BUILTIN_VA_LIST
598 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
599 #undef TARGET_EXPAND_BUILTIN_VA_START
600 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
601 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
602 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
604 #undef TARGET_SCALAR_MODE_SUPPORTED_P
605 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
606 #undef TARGET_VECTOR_MODE_SUPPORTED_P
607 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
609 #undef TARGET_CHECK_PCH_TARGET_FLAGS
610 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
612 #undef TARGET_DWARF_CALLING_CONVENTION
613 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
615 #undef TARGET_FRAME_POINTER_REQUIRED
616 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
618 #undef TARGET_MODE_EMIT
619 #define TARGET_MODE_EMIT sh_emit_mode_set
621 #undef TARGET_MODE_NEEDED
622 #define TARGET_MODE_NEEDED sh_mode_needed
624 #undef TARGET_MODE_AFTER
625 #define TARGET_MODE_AFTER sh_mode_after
627 #undef TARGET_MODE_ENTRY
628 #define TARGET_MODE_ENTRY sh_mode_entry
630 #undef TARGET_MODE_EXIT
631 #define TARGET_MODE_EXIT sh_mode_exit
633 #undef TARGET_MODE_PRIORITY
634 #define TARGET_MODE_PRIORITY sh_mode_priority
636 /* Return regmode weight for insn. */
637 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
638 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
640 /* Return current register pressure for regmode. */
641 #define CURR_REGMODE_PRESSURE(MODE)\
642 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
644 #undef TARGET_ENCODE_SECTION_INFO
645 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
647 #undef TARGET_LRA_P
648 #define TARGET_LRA_P sh_lra_p
650 #undef TARGET_SECONDARY_RELOAD
651 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
653 #undef TARGET_PREFERRED_RELOAD_CLASS
654 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
656 #undef TARGET_CONDITIONAL_REGISTER_USAGE
657 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
659 #undef TARGET_LEGITIMATE_ADDRESS_P
660 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
662 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
663 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
665 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
666 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
667 sh_legitimize_address_displacement
669 #undef TARGET_TRAMPOLINE_INIT
670 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
671 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
672 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
674 #undef TARGET_LEGITIMATE_CONSTANT_P
675 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
677 #undef TARGET_CANONICALIZE_COMPARISON
678 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
680 #undef TARGET_LEGITIMATE_COMBINED_INSN
681 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
683 #undef TARGET_FIXED_CONDITION_CODE_REGS
684 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
686 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
687 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
688 sh_use_by_pieces_infrastructure_p
690 /* Machine-specific symbol_ref flags. */
691 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
693 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
694 is used by optabs.c atomic op expansion code as well as in sync.md. */
695 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
696 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
698 struct gcc_target targetm = TARGET_INITIALIZER;
701 /* Information on the currently selected atomic model.
702 This is initialized in sh_option_override. */
703 static sh_atomic_model selected_atomic_model_;
705 const sh_atomic_model&
706 selected_atomic_model (void)
708 return selected_atomic_model_;
711 static sh_atomic_model
712 parse_validate_atomic_model_option (const char* str)
714 const char* model_names[sh_atomic_model::num_models];
715 model_names[sh_atomic_model::none] = "none";
716 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
717 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
718 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
719 model_names[sh_atomic_model::soft_imask] = "soft-imask";
721 const char* model_cdef_names[sh_atomic_model::num_models];
722 model_cdef_names[sh_atomic_model::none] = "NONE";
723 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
724 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
725 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
726 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
728 sh_atomic_model ret;
729 ret.type = sh_atomic_model::none;
730 ret.name = model_names[sh_atomic_model::none];
731 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
732 ret.strict = false;
733 ret.tcb_gbr_offset = -1;
735 /* Handle empty string as 'none'. */
736 if (str == NULL || *str == '\0')
737 return ret;
739 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
741 std::vector<std::string> tokens;
742 for (std::stringstream ss (str); ss.good (); )
744 tokens.push_back (std::string ());
745 std::getline (ss, tokens.back (), ',');
748 if (tokens.empty ())
749 err_ret ("invalid atomic model option");
751 /* The first token must be the atomic model name. */
753 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
754 if (tokens.front () == model_names[i])
756 ret.type = (sh_atomic_model::enum_type)i;
757 ret.name = model_names[i];
758 ret.cdef_name = model_cdef_names[i];
759 goto got_mode_name;
762 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
763 got_mode_name:;
766 /* Go through the remaining tokens. */
767 for (size_t i = 1; i < tokens.size (); ++i)
769 if (tokens[i] == "strict")
770 ret.strict = true;
771 else if (tokens[i].find ("gbr-offset=") == 0)
773 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
774 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
775 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
776 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
777 "option", offset_str.c_str ());
779 else
780 err_ret ("unknown parameter \"%s\" in atomic model option",
781 tokens[i].c_str ());
784 /* Check that the selection makes sense. */
785 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
786 err_ret ("atomic operations are not supported on SHmedia");
788 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
789 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
790 ret.name);
792 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
793 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
795 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
796 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
798 if (ret.type == sh_atomic_model::soft_tcb
799 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
800 || (ret.tcb_gbr_offset & 3) != 0))
801 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
802 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
803 ret.name);
805 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
806 err_ret ("cannot use atomic model %s in user mode", ret.name);
808 return ret;
810 #undef err_ret
813 /* Register SH specific RTL passes. */
814 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
815 const char* name);
816 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
817 const char* name);
818 static void
819 register_sh_passes (void)
821 if (!TARGET_SH1)
822 return;
824 /* Running the sh_treg_combine pass after ce1 generates better code when
825 comparisons are combined and reg-reg moves are introduced, because
826 reg-reg moves will be eliminated afterwards. However, there are quite
827 some cases where combine will be unable to fold comparison related insns,
828 thus for now don't do it.
829 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
830 PASS_POS_INSERT_AFTER, "ce1", 1);
833 /* Run sh_treg_combine pass after combine but before register allocation. */
834 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
835 PASS_POS_INSERT_AFTER, "split1", 1);
837 /* Run sh_treg_combine pass after register allocation and basic block
838 reordering as this sometimes creates new opportunities. */
839 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
840 PASS_POS_INSERT_AFTER, "split4", 1);
842 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
843 is known after a conditional branch.
844 This must be done after basic blocks and branch conditions have
845 stabilized and won't be changed by further passes. */
846 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
847 PASS_POS_INSERT_BEFORE, "sched2", 1);
850 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
851 various options, and do some machine dependent initialization. */
852 static void
853 sh_option_override (void)
855 int regno;
857 SUBTARGET_OVERRIDE_OPTIONS;
858 if (optimize > 1 && !optimize_size)
859 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
861 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
862 TARGET_CBRANCHDI4 = 1;
863 TARGET_CMPEQDI_T = 0;
865 sh_cpu = PROCESSOR_SH1;
866 assembler_dialect = 0;
867 if (TARGET_SH2)
868 sh_cpu = PROCESSOR_SH2;
869 if (TARGET_SH2E)
870 sh_cpu = PROCESSOR_SH2E;
871 if (TARGET_SH2A)
872 sh_cpu = PROCESSOR_SH2A;
873 if (TARGET_SH3)
874 sh_cpu = PROCESSOR_SH3;
875 if (TARGET_SH3E)
876 sh_cpu = PROCESSOR_SH3E;
877 if (TARGET_SH4)
879 assembler_dialect = 1;
880 sh_cpu = PROCESSOR_SH4;
882 if (TARGET_SH4A)
884 assembler_dialect = 1;
885 sh_cpu = PROCESSOR_SH4A;
887 if (TARGET_SH5)
889 sh_cpu = PROCESSOR_SH5;
890 target_flags |= MASK_ALIGN_DOUBLE;
891 if (TARGET_SHMEDIA_FPU)
892 target_flags |= MASK_FMOVD;
893 if (TARGET_SHMEDIA)
895 /* There are no delay slots on SHmedia. */
896 flag_delayed_branch = 0;
897 /* Relaxation isn't yet supported for SHmedia */
898 target_flags &= ~MASK_RELAX;
899 /* After reload, if conversion does little good but can cause
900 ICEs:
901 - find_if_block doesn't do anything for SH because we don't
902 have conditional execution patterns. (We use conditional
903 move patterns, which are handled differently, and only
904 before reload).
905 - find_cond_trap doesn't do anything for the SH because we
906 don't have conditional traps.
907 - find_if_case_1 uses redirect_edge_and_branch_force in
908 the only path that does an optimization, and this causes
909 an ICE when branch targets are in registers.
910 - find_if_case_2 doesn't do anything for the SHmedia after
911 reload except when it can redirect a tablejump - and
912 that's rather rare. */
913 flag_if_conversion2 = 0;
914 if (! strcmp (sh_div_str, "call"))
915 sh_div_strategy = SH_DIV_CALL;
916 else if (! strcmp (sh_div_str, "call2"))
917 sh_div_strategy = SH_DIV_CALL2;
918 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
919 sh_div_strategy = SH_DIV_FP;
920 else if (! strcmp (sh_div_str, "inv"))
921 sh_div_strategy = SH_DIV_INV;
922 else if (! strcmp (sh_div_str, "inv:minlat"))
923 sh_div_strategy = SH_DIV_INV_MINLAT;
924 else if (! strcmp (sh_div_str, "inv20u"))
925 sh_div_strategy = SH_DIV_INV20U;
926 else if (! strcmp (sh_div_str, "inv20l"))
927 sh_div_strategy = SH_DIV_INV20L;
928 else if (! strcmp (sh_div_str, "inv:call2"))
929 sh_div_strategy = SH_DIV_INV_CALL2;
930 else if (! strcmp (sh_div_str, "inv:call"))
931 sh_div_strategy = SH_DIV_INV_CALL;
932 else if (! strcmp (sh_div_str, "inv:fp"))
934 if (TARGET_FPU_ANY)
935 sh_div_strategy = SH_DIV_INV_FP;
936 else
937 sh_div_strategy = SH_DIV_INV;
939 TARGET_CBRANCHDI4 = 0;
940 /* Assembler CFI isn't yet fully supported for SHmedia. */
941 flag_dwarf2_cfi_asm = 0;
944 else
946 /* Only the sh64-elf assembler fully supports .quad properly. */
947 targetm.asm_out.aligned_op.di = NULL;
948 targetm.asm_out.unaligned_op.di = NULL;
951 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
952 Disable it for everything else. */
953 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
954 TARGET_USERMODE = false;
956 if (TARGET_SH1)
958 if (! strcmp (sh_div_str, "call-div1"))
959 sh_div_strategy = SH_DIV_CALL_DIV1;
960 else if (! strcmp (sh_div_str, "call-fp")
961 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
962 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
963 sh_div_strategy = SH_DIV_CALL_FP;
964 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
965 sh_div_strategy = SH_DIV_CALL_TABLE;
966 else
967 /* Pick one that makes most sense for the target in general.
968 It is not much good to use different functions depending
969 on -Os, since then we'll end up with two different functions
970 when some of the code is compiled for size, and some for
971 speed. */
973 /* SH4 tends to emphasize speed. */
974 if (TARGET_HARD_SH4)
975 sh_div_strategy = SH_DIV_CALL_TABLE;
976 /* These have their own way of doing things. */
977 else if (TARGET_SH2A)
978 sh_div_strategy = SH_DIV_INTRINSIC;
979 /* ??? Should we use the integer SHmedia function instead? */
980 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
981 sh_div_strategy = SH_DIV_CALL_FP;
982 /* SH1 .. SH3 cores often go into small-footprint systems, so
983 default to the smallest implementation available. */
984 else
985 sh_div_strategy = SH_DIV_CALL_DIV1;
987 if (!TARGET_SH1)
988 TARGET_PRETEND_CMOVE = 0;
989 if (sh_divsi3_libfunc[0])
990 ; /* User supplied - leave it alone. */
991 else if (TARGET_DIVIDE_CALL_FP)
992 sh_divsi3_libfunc = "__sdivsi3_i4";
993 else if (TARGET_DIVIDE_CALL_TABLE)
994 sh_divsi3_libfunc = "__sdivsi3_i4i";
995 else if (TARGET_SH5)
996 sh_divsi3_libfunc = "__sdivsi3_1";
997 else
998 sh_divsi3_libfunc = "__sdivsi3";
1000 if (sh_branch_cost == -1)
1002 /* The SH1 does not have delay slots, hence we get a pipeline stall
1003 at every branch. The SH4 is superscalar, so the single delay slot
1004 is not sufficient to keep both pipelines filled.
1005 In any case, set the default branch cost to '2', as it results in
1006 slightly overall smaller code and also enables some if conversions
1007 that are required for matching special T bit related insns. */
1008 sh_branch_cost = 2;
1011 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
1012 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
1013 TARGET_ZDCBRANCH = 1;
1015 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1016 if (! VALID_REGISTER_P (regno))
1017 sh_register_names[regno][0] = '\0';
1019 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
1020 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
1021 sh_additional_register_names[regno][0] = '\0';
1023 if ((flag_pic && ! TARGET_PREFERGOT)
1024 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
1025 flag_no_function_cse = 1;
1027 if (targetm.small_register_classes_for_mode_p (VOIDmode))
1029 /* Never run scheduling before reload, since that can
1030 break global alloc, and generates slower code anyway due
1031 to the pressure on R0. */
1032 /* Enable sched1 for SH4 if the user explicitly requests.
1033 When sched1 is enabled, the ready queue will be reordered by
1034 the target hooks if pressure is high. We can not do this for
1035 PIC, SH3 and lower as they give spill failures for R0. */
1036 if (!TARGET_HARD_SH4 || flag_pic)
1037 flag_schedule_insns = 0;
1038 /* ??? Current exception handling places basic block boundaries
1039 after call_insns. It causes the high pressure on R0 and gives
1040 spill failures for R0 in reload. See PR 22553 and the thread
1041 on gcc-patches
1042 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
1043 else if (flag_exceptions)
1045 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
1046 warning (0, "ignoring -fschedule-insns because of exception "
1047 "handling bug");
1048 flag_schedule_insns = 0;
1050 else if (flag_schedule_insns
1051 && !global_options_set.x_flag_schedule_insns)
1052 flag_schedule_insns = 0;
1055 /* Unwind info is not correct around the CFG unless either a frame
1056 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1057 unwind info generation to be aware of the CFG and propagating states
1058 around edges. */
1059 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1060 || flag_exceptions || flag_non_call_exceptions)
1061 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1063 warning (0, "unwind tables currently require either a frame pointer "
1064 "or -maccumulate-outgoing-args for correctness");
1065 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1068 /* Unwinding with -freorder-blocks-and-partition does not work on this
1069 architecture, because it requires far jumps to label crossing between
1070 hot/cold sections which are rejected on this architecture. */
1071 if (flag_reorder_blocks_and_partition)
1073 if (flag_exceptions)
1075 inform (input_location,
1076 "-freorder-blocks-and-partition does not work with "
1077 "exceptions on this architecture");
1078 flag_reorder_blocks_and_partition = 0;
1079 flag_reorder_blocks = 1;
1081 else if (flag_unwind_tables)
1083 inform (input_location,
1084 "-freorder-blocks-and-partition does not support unwind "
1085 "info on this architecture");
1086 flag_reorder_blocks_and_partition = 0;
1087 flag_reorder_blocks = 1;
1091 /* Adjust loop, jump and function alignment values (in bytes), if those
1092 were not specified by the user using -falign-loops, -falign-jumps
1093 and -falign-functions options.
1094 32 bit alignment is better for speed, because instructions can be
1095 fetched as a pair from a longword boundary. For size use 16 bit
1096 alignment to get more compact code.
1097 Aligning all jumps increases the code size, even if it might
1098 result in slightly faster code. Thus, it is set to the smallest
1099 alignment possible if not specified by the user. */
1100 if (align_loops == 0)
1102 if (TARGET_SH5)
1103 align_loops = 8;
1104 else
1105 align_loops = optimize_size ? 2 : 4;
1108 if (align_jumps == 0)
1110 if (TARGET_SHMEDIA)
1111 align_jumps = 1 << CACHE_LOG;
1112 else
1113 align_jumps = 2;
1115 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1116 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1118 if (align_functions == 0)
1120 if (TARGET_SHMEDIA)
1121 align_functions = optimize_size
1122 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1123 else
1124 align_functions = optimize_size ? 2 : 4;
1127 /* The linker relaxation code breaks when a function contains
1128 alignments that are larger than that at the start of a
1129 compilation unit. */
1130 if (TARGET_RELAX)
1132 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1134 /* Also take possible .long constants / mova tables into account. */
1135 if (min_align < 4)
1136 min_align = 4;
1137 if (align_functions < min_align)
1138 align_functions = min_align;
1141 if (flag_unsafe_math_optimizations)
1143 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1144 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1145 TARGET_FSCA = 1;
1147 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1148 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1149 TARGET_FSRRA = 1;
1152 /* Allow fsrra insn only if -funsafe-math-optimizations and
1153 -ffinite-math-only is enabled. */
1154 TARGET_FSRRA = TARGET_FSRRA
1155 && flag_unsafe_math_optimizations
1156 && flag_finite_math_only;
1158 /* If the -mieee option was not explicitly set by the user, turn it on
1159 unless -ffinite-math-only was specified. See also PR 33135. */
1160 if (! global_options_set.x_TARGET_IEEE)
1161 TARGET_IEEE = ! flag_finite_math_only;
1163 if (sh_fixed_range_str)
1164 sh_fix_range (sh_fixed_range_str);
1166 /* This target defaults to strict volatile bitfields. */
1167 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1168 flag_strict_volatile_bitfields = 1;
1170 /* Parse atomic model option and make sure it is valid for the current
1171 target CPU. */
1172 selected_atomic_model_
1173 = parse_validate_atomic_model_option (sh_atomic_model_str);
1175 register_sh_passes ();
1178 /* Print the operand address in x to the stream. */
1179 static void
1180 sh_print_operand_address (FILE *stream, rtx x)
1182 switch (GET_CODE (x))
1184 case REG:
1185 case SUBREG:
1186 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1187 break;
1189 case PLUS:
1191 rtx base = XEXP (x, 0);
1192 rtx index = XEXP (x, 1);
1194 switch (GET_CODE (index))
1196 case CONST_INT:
1197 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1198 reg_names[true_regnum (base)]);
1199 break;
1201 case REG:
1202 case SUBREG:
1204 int base_num = true_regnum (base);
1205 int index_num = true_regnum (index);
1207 fprintf (stream, "@(r0,%s)",
1208 reg_names[MAX (base_num, index_num)]);
1209 break;
1212 default:
1213 gcc_unreachable ();
1216 break;
1218 case PRE_DEC:
1219 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1220 break;
1222 case POST_INC:
1223 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1224 break;
1226 default:
1227 x = mark_constant_pool_use (x);
1228 output_addr_const (stream, x);
1229 break;
1233 /* Print operand x (an rtx) in assembler syntax to file stream
1234 according to modifier code.
1236 '.' print a .s if insn needs delay slot
1237 ',' print LOCAL_LABEL_PREFIX
1238 '@' print trap, rte or rts depending upon pragma interruptness
1239 '#' output a nop if there is nothing to put in the delay slot
1240 ''' print likelihood suffix (/u for unlikely).
1241 '>' print branch target if -fverbose-asm
1242 'O' print a constant without the #
1243 'R' print the LSW of a dp value - changes if in little endian
1244 'S' print the MSW of a dp value - changes if in little endian
1245 'T' print the next word of a dp value - same as 'R' in big endian mode.
1246 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1247 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1248 'N' print 'r63' if the operand is (const_int 0).
1249 'd' print a V2SF reg as dN instead of fpN.
1250 'm' print a pair `base,offset' or `base,index', for LD and ST.
1251 'U' Likewise for {LD,ST}{HI,LO}.
1252 'V' print the position of a single bit set.
1253 'W' print the position of a single bit cleared.
1254 't' print a memory address which is a register.
1255 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1256 'o' output an operator. */
1257 static void
1258 sh_print_operand (FILE *stream, rtx x, int code)
1260 int regno;
1261 machine_mode mode;
1263 switch (code)
1265 tree trapa_attr;
1267 case '.':
1268 if (final_sequence
1269 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1270 && get_attr_length (final_sequence->insn (1)))
1271 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1272 break;
1273 case ',':
1274 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1275 break;
1276 case '@':
1277 trapa_attr = lookup_attribute ("trap_exit",
1278 DECL_ATTRIBUTES (current_function_decl));
1279 if (trapa_attr)
1280 fprintf (stream, "trapa #%ld",
1281 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1282 else if (sh_cfun_interrupt_handler_p ())
1284 if (sh_cfun_resbank_handler_p ())
1285 fprintf (stream, "resbank\n");
1286 fprintf (stream, "rte");
1288 else
1289 fprintf (stream, "rts");
1290 break;
1291 case '#':
1292 /* Output a nop if there's nothing in the delay slot. */
1293 if (dbr_sequence_length () == 0)
1294 fprintf (stream, "\n\tnop");
1295 break;
1296 case '\'':
1298 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1300 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1301 fputs ("/u", stream);
1302 break;
1304 case '>':
1305 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1307 fputs ("\t! target: ", stream);
1308 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1310 break;
1311 case 'O':
1312 x = mark_constant_pool_use (x);
1313 output_addr_const (stream, x);
1314 break;
1315 /* N.B.: %R / %S / %T adjust memory addresses by four.
1316 For SHMEDIA, that means they can be used to access the first and
1317 second 32 bit part of a 64 bit (or larger) value that
1318 might be held in floating point registers or memory.
1319 While they can be used to access 64 bit parts of a larger value
1320 held in general purpose registers, that won't work with memory -
1321 neither for fp registers, since the frxx names are used. */
1322 case 'R':
1323 if (REG_P (x) || GET_CODE (x) == SUBREG)
1325 regno = true_regnum (x);
1326 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1327 fputs (reg_names[regno], (stream));
1329 else if (MEM_P (x))
1331 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1332 sh_print_operand_address (stream, XEXP (x, 0));
1334 else
1336 rtx sub = NULL_RTX;
1338 mode = GET_MODE (x);
1339 if (mode == VOIDmode)
1340 mode = DImode;
1341 if (GET_MODE_SIZE (mode) >= 8)
1342 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1343 if (sub)
1344 sh_print_operand (stream, sub, 0);
1345 else
1346 output_operand_lossage ("invalid operand to %%R");
1348 break;
1349 case 'S':
1350 if (REG_P (x) || GET_CODE (x) == SUBREG)
1352 regno = true_regnum (x);
1353 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1354 fputs (reg_names[regno], (stream));
1356 else if (MEM_P (x))
1358 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1359 sh_print_operand_address (stream, XEXP (x, 0));
1361 else
1363 rtx sub = NULL_RTX;
1365 mode = GET_MODE (x);
1366 if (mode == VOIDmode)
1367 mode = DImode;
1368 if (GET_MODE_SIZE (mode) >= 8)
1369 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1370 if (sub)
1371 sh_print_operand (stream, sub, 0);
1372 else
1373 output_operand_lossage ("invalid operand to %%S");
1375 break;
1376 case 'T':
1377 /* Next word of a double. */
1378 switch (GET_CODE (x))
1380 case REG:
1381 fputs (reg_names[REGNO (x) + 1], (stream));
1382 break;
1383 case MEM:
1384 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1385 && GET_CODE (XEXP (x, 0)) != POST_INC)
1386 x = adjust_address (x, SImode, 4);
1387 sh_print_operand_address (stream, XEXP (x, 0));
1388 break;
1389 default:
1390 break;
1392 break;
1394 case 't':
1395 gcc_assert (MEM_P (x));
1396 x = XEXP (x, 0);
1397 switch (GET_CODE (x))
1399 case REG:
1400 case SUBREG:
1401 sh_print_operand (stream, x, 0);
1402 break;
1403 default:
1404 break;
1406 break;
1408 case 'o':
1409 switch (GET_CODE (x))
1411 case PLUS: fputs ("add", stream); break;
1412 case MINUS: fputs ("sub", stream); break;
1413 case MULT: fputs ("mul", stream); break;
1414 case DIV: fputs ("div", stream); break;
1415 case EQ: fputs ("eq", stream); break;
1416 case NE: fputs ("ne", stream); break;
1417 case GT: case LT: fputs ("gt", stream); break;
1418 case GE: case LE: fputs ("ge", stream); break;
1419 case GTU: case LTU: fputs ("gtu", stream); break;
1420 case GEU: case LEU: fputs ("geu", stream); break;
1421 default:
1422 break;
1424 break;
1425 case 'M':
1426 if (TARGET_SHMEDIA)
1428 if (MEM_P (x)
1429 && GET_CODE (XEXP (x, 0)) == PLUS
1430 && (REG_P (XEXP (XEXP (x, 0), 1))
1431 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1432 fputc ('x', stream);
1434 else
1436 if (MEM_P (x))
1438 switch (GET_MODE (x))
1440 case QImode: fputs (".b", stream); break;
1441 case HImode: fputs (".w", stream); break;
1442 case SImode: fputs (".l", stream); break;
1443 case SFmode: fputs (".s", stream); break;
1444 case DFmode: fputs (".d", stream); break;
1445 default: gcc_unreachable ();
1449 break;
1451 case 'm':
1452 gcc_assert (MEM_P (x));
1453 x = XEXP (x, 0);
1454 /* Fall through. */
1455 case 'U':
1456 switch (GET_CODE (x))
1458 case REG:
1459 case SUBREG:
1460 sh_print_operand (stream, x, 0);
1461 fputs (", 0", stream);
1462 break;
1464 case PLUS:
1465 sh_print_operand (stream, XEXP (x, 0), 0);
1466 fputs (", ", stream);
1467 sh_print_operand (stream, XEXP (x, 1), 0);
1468 break;
1470 default:
1471 gcc_unreachable ();
1473 break;
1475 case 'V':
1477 int num = exact_log2 (INTVAL (x));
1478 gcc_assert (num >= 0);
1479 fprintf (stream, "#%d", num);
1481 break;
1483 case 'W':
1485 int num = exact_log2 (~INTVAL (x));
1486 gcc_assert (num >= 0);
1487 fprintf (stream, "#%d", num);
1489 break;
1491 case 'd':
1492 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1494 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1495 break;
1497 case 'N':
1498 if (x == CONST0_RTX (GET_MODE (x)))
1500 fprintf ((stream), "r63");
1501 break;
1503 goto default_output;
1504 case 'u':
1505 if (CONST_INT_P (x))
1507 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1508 break;
1510 /* Fall through. */
1512 default_output:
1513 default:
1514 regno = 0;
1515 mode = GET_MODE (x);
1517 switch (GET_CODE (x))
1519 case TRUNCATE:
1521 rtx inner = XEXP (x, 0);
1522 int offset = 0;
1523 machine_mode inner_mode;
1525 /* We might see SUBREGs with vector mode registers inside. */
1526 if (GET_CODE (inner) == SUBREG
1527 && (GET_MODE_SIZE (GET_MODE (inner))
1528 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1529 && subreg_lowpart_p (inner))
1530 inner = SUBREG_REG (inner);
1531 if (CONST_INT_P (inner))
1533 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1534 goto default_output;
1536 inner_mode = GET_MODE (inner);
1537 if (GET_CODE (inner) == SUBREG
1538 && (GET_MODE_SIZE (GET_MODE (inner))
1539 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1540 && REG_P (SUBREG_REG (inner)))
1542 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1543 GET_MODE (SUBREG_REG (inner)),
1544 SUBREG_BYTE (inner),
1545 GET_MODE (inner));
1546 inner = SUBREG_REG (inner);
1548 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1549 abort ();
1550 /* Floating point register pairs are always big endian;
1551 general purpose registers are 64 bit wide. */
1552 regno = REGNO (inner);
1553 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1554 - HARD_REGNO_NREGS (regno, mode))
1555 + offset;
1556 x = inner;
1557 goto reg;
1559 case SIGN_EXTEND:
1560 x = XEXP (x, 0);
1561 goto reg;
1562 /* FIXME: We need this on SHmedia32 because reload generates
1563 some sign-extended HI or QI loads into DImode registers
1564 but, because Pmode is SImode, the address ends up with a
1565 subreg:SI of the DImode register. Maybe reload should be
1566 fixed so as to apply alter_subreg to such loads? */
1567 case IF_THEN_ELSE:
1568 gcc_assert (trapping_target_operand (x, VOIDmode));
1569 x = XEXP (XEXP (x, 2), 0);
1570 goto default_output;
1571 case SUBREG:
1572 gcc_assert (SUBREG_BYTE (x) == 0
1573 && REG_P (SUBREG_REG (x)));
1575 x = SUBREG_REG (x);
1576 /* Fall through. */
1578 reg:
1579 case REG:
1580 regno += REGNO (x);
1581 if (FP_REGISTER_P (regno)
1582 && mode == V16SFmode)
1583 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1584 else if (FP_REGISTER_P (REGNO (x))
1585 && mode == V4SFmode)
1586 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1587 else if (REG_P (x)
1588 && mode == V2SFmode)
1589 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1590 else if (FP_REGISTER_P (REGNO (x))
1591 && GET_MODE_SIZE (mode) > 4)
1592 fprintf ((stream), "d%s", reg_names[regno] + 1);
1593 else
1594 fputs (reg_names[regno], (stream));
1595 break;
1597 case MEM:
1598 output_address (XEXP (x, 0));
1599 break;
1601 default:
1602 if (TARGET_SH1)
1603 fputc ('#', stream);
1604 output_addr_const (stream, x);
1605 break;
1607 break;
1611 static bool
1612 sh_print_operand_punct_valid_p (unsigned char code)
1614 return (code == '.' || code == '#' || code == '@' || code == ','
1615 || code == '$' || code == '\'' || code == '>');
1618 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1619 static bool
1620 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1622 if (GET_CODE (x) == UNSPEC)
1624 switch (XINT (x, 1))
1626 case UNSPEC_DATALABEL:
1627 fputs ("datalabel ", file);
1628 output_addr_const (file, XVECEXP (x, 0, 0));
1629 break;
1630 case UNSPEC_PIC:
1631 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1632 output_addr_const (file, XVECEXP (x, 0, 0));
1633 break;
1634 case UNSPEC_GOT:
1635 output_addr_const (file, XVECEXP (x, 0, 0));
1636 fputs ("@GOT", file);
1637 break;
1638 case UNSPEC_GOTOFF:
1639 output_addr_const (file, XVECEXP (x, 0, 0));
1640 fputs ("@GOTOFF", file);
1641 break;
1642 case UNSPEC_PLT:
1643 output_addr_const (file, XVECEXP (x, 0, 0));
1644 fputs ("@PLT", file);
1645 break;
1646 case UNSPEC_GOTPLT:
1647 output_addr_const (file, XVECEXP (x, 0, 0));
1648 fputs ("@GOTPLT", file);
1649 break;
1650 case UNSPEC_DTPOFF:
1651 output_addr_const (file, XVECEXP (x, 0, 0));
1652 fputs ("@DTPOFF", file);
1653 break;
1654 case UNSPEC_GOTTPOFF:
1655 output_addr_const (file, XVECEXP (x, 0, 0));
1656 fputs ("@GOTTPOFF", file);
1657 break;
1658 case UNSPEC_TPOFF:
1659 output_addr_const (file, XVECEXP (x, 0, 0));
1660 fputs ("@TPOFF", file);
1661 break;
1662 case UNSPEC_CALLER:
1664 char name[32];
1665 /* LPCS stands for Label for PIC Call Site. */
1666 targetm.asm_out.generate_internal_label (name, "LPCS",
1667 INTVAL (XVECEXP (x, 0, 0)));
1668 assemble_name (file, name);
1670 break;
1671 case UNSPEC_EXTRACT_S16:
1672 case UNSPEC_EXTRACT_U16:
1674 rtx val, shift;
1676 val = XVECEXP (x, 0, 0);
1677 shift = XVECEXP (x, 0, 1);
1678 fputc ('(', file);
1679 if (shift != const0_rtx)
1680 fputc ('(', file);
1681 if (GET_CODE (val) == CONST
1682 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1684 fputc ('(', file);
1685 output_addr_const (file, val);
1686 fputc (')', file);
1688 else
1689 output_addr_const (file, val);
1690 if (shift != const0_rtx)
1692 fputs (" >> ", file);
1693 output_addr_const (file, shift);
1694 fputc (')', file);
1696 fputs (" & 65535)", file);
1698 break;
1699 case UNSPEC_SYMOFF:
1700 output_addr_const (file, XVECEXP (x, 0, 0));
1701 fputc ('-', file);
1702 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1704 fputc ('(', file);
1705 output_addr_const (file, XVECEXP (x, 0, 1));
1706 fputc (')', file);
1708 else
1709 output_addr_const (file, XVECEXP (x, 0, 1));
1710 break;
1711 case UNSPEC_PCREL_SYMOFF:
1712 output_addr_const (file, XVECEXP (x, 0, 0));
1713 fputs ("-(", file);
1714 output_addr_const (file, XVECEXP (x, 0, 1));
1715 fputs ("-.)", file);
1716 break;
1717 default:
1718 return false;
1720 return true;
1722 else
1723 return false;
1726 /* Encode symbol attributes of a SYMBOL_REF into its
1727 SYMBOL_REF_FLAGS. */
1728 static void
1729 sh_encode_section_info (tree decl, rtx rtl, int first)
1731 default_encode_section_info (decl, rtl, first);
1733 if (TREE_CODE (decl) == FUNCTION_DECL
1734 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1735 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1738 /* Prepare operands for a move define_expand; specifically, one of the
1739 operands must be in a register. */
1740 void
1741 prepare_move_operands (rtx operands[], machine_mode mode)
1743 if ((mode == SImode || mode == DImode)
1744 && flag_pic
1745 && ! ((mode == Pmode || mode == ptr_mode)
1746 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1748 rtx temp;
1749 if (SYMBOLIC_CONST_P (operands[1]))
1751 if (MEM_P (operands[0]))
1752 operands[1] = force_reg (Pmode, operands[1]);
1753 else if (TARGET_SHMEDIA
1754 && GET_CODE (operands[1]) == LABEL_REF
1755 && target_reg_operand (operands[0], mode))
1756 /* It's ok. */;
1757 else
1759 temp = (!can_create_pseudo_p ()
1760 ? operands[0]
1761 : gen_reg_rtx (Pmode));
1762 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1765 else if (GET_CODE (operands[1]) == CONST
1766 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1767 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1769 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1770 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1771 mode, temp);
1772 operands[1] = expand_binop (mode, add_optab, temp,
1773 XEXP (XEXP (operands[1], 0), 1),
1774 (!can_create_pseudo_p ()
1775 ? temp
1776 : gen_reg_rtx (Pmode)),
1777 0, OPTAB_LIB_WIDEN);
1781 if (! reload_in_progress && ! reload_completed)
1783 /* Copy the source to a register if both operands aren't registers. */
1784 if (! register_operand (operands[0], mode)
1785 && ! sh_register_operand (operands[1], mode))
1786 operands[1] = copy_to_mode_reg (mode, operands[1]);
1788 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1790 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1791 except that we can't use that function because it is static. */
1792 rtx new_rtx = change_address (operands[0], mode, 0);
1793 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1794 operands[0] = new_rtx;
1797 /* This case can happen while generating code to move the result
1798 of a library call to the target. Reject `st r0,@(rX,rY)' because
1799 reload will fail to find a spill register for rX, since r0 is already
1800 being used for the source. */
1801 else if (TARGET_SH1
1802 && refers_to_regno_p (R0_REG, operands[1])
1803 && MEM_P (operands[0])
1804 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1805 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1806 operands[1] = copy_to_mode_reg (mode, operands[1]);
1808 /* When the displacement addressing is used, RA will assign r0 to
1809 the pseudo register operand for the QI/HImode load/store.
1810 This tends to make a long live range for R0 and might cause
1811 anomalous register spills in some case with LRA. See PR
1812 target/55212.
1813 We split possible load/store to two move insns via r0 so as to
1814 shorten R0 live range. It will make some codes worse but will
1815 win on avarage for LRA. */
1816 else if (sh_lra_p ()
1817 && TARGET_SH1 && ! TARGET_SH2A
1818 && (mode == QImode || mode == HImode)
1819 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1820 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1822 bool load_p = REG_P (operands[0]);
1823 rtx reg = operands[load_p ? 0 : 1];
1824 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1826 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1827 && GET_CODE (adr) == PLUS
1828 && REG_P (XEXP (adr, 0))
1829 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1830 && CONST_INT_P (XEXP (adr, 1))
1831 && INTVAL (XEXP (adr, 1)) != 0
1832 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1834 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1835 emit_move_insn (r0_rtx, operands[1]);
1836 operands[1] = r0_rtx;
1841 if (mode == Pmode || mode == ptr_mode)
1843 rtx op0, op1, opc;
1844 enum tls_model tls_kind;
1846 op0 = operands[0];
1847 op1 = operands[1];
1848 if (GET_CODE (op1) == CONST
1849 && GET_CODE (XEXP (op1, 0)) == PLUS
1850 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1851 != TLS_MODEL_NONE))
1853 opc = XEXP (XEXP (op1, 0), 1);
1854 op1 = XEXP (XEXP (op1, 0), 0);
1856 else
1857 opc = NULL_RTX;
1859 if (! reload_in_progress && ! reload_completed
1860 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1862 rtx tga_op1, tga_ret, tmp, tmp2;
1864 if (! flag_pic
1865 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1866 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1867 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1869 /* Don't schedule insns for getting GOT address when
1870 the first scheduling is enabled, to avoid spill
1871 failures for R0. */
1872 if (flag_schedule_insns)
1873 emit_insn (gen_blockage ());
1874 emit_insn (gen_GOTaddr2picreg ());
1875 emit_use (gen_rtx_REG (SImode, PIC_REG));
1876 if (flag_schedule_insns)
1877 emit_insn (gen_blockage ());
1880 switch (tls_kind)
1882 case TLS_MODEL_GLOBAL_DYNAMIC:
1883 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1884 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1885 tmp = gen_reg_rtx (Pmode);
1886 emit_move_insn (tmp, tga_ret);
1887 op1 = tmp;
1888 break;
1890 case TLS_MODEL_LOCAL_DYNAMIC:
1891 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1892 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1894 tmp = gen_reg_rtx (Pmode);
1895 emit_move_insn (tmp, tga_ret);
1897 if (register_operand (op0, Pmode))
1898 tmp2 = op0;
1899 else
1900 tmp2 = gen_reg_rtx (Pmode);
1902 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1903 op1 = tmp2;
1904 break;
1906 case TLS_MODEL_INITIAL_EXEC:
1907 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1908 tmp = gen_sym2GOTTPOFF (op1);
1909 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1910 op1 = tga_op1;
1911 break;
1913 case TLS_MODEL_LOCAL_EXEC:
1914 tmp2 = gen_reg_rtx (Pmode);
1915 emit_insn (gen_store_gbr (tmp2));
1916 tmp = gen_reg_rtx (Pmode);
1917 emit_insn (gen_symTPOFF2reg (tmp, op1));
1919 if (register_operand (op0, Pmode))
1920 op1 = op0;
1921 else
1922 op1 = gen_reg_rtx (Pmode);
1924 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1925 break;
1927 default:
1928 gcc_unreachable ();
1930 if (opc)
1931 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1932 operands[1] = op1;
1937 /* Implement the canonicalize_comparison target hook for the combine
1938 pass. For the target hook this function is invoked via
1939 sh_canonicalize_comparison. This function is also re-used to
1940 canonicalize comparisons in cbranch pattern expanders. */
1941 static void
1942 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1943 machine_mode mode,
1944 bool op0_preserve_value)
1946 /* When invoked from within the combine pass the mode is not specified,
1947 so try to get it from one of the operands. */
1948 if (mode == VOIDmode)
1949 mode = GET_MODE (op0);
1950 if (mode == VOIDmode)
1951 mode = GET_MODE (op1);
1953 // We need to have a mode to do something useful here.
1954 if (mode == VOIDmode)
1955 return;
1957 // Currently, we don't deal with floats here.
1958 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1959 return;
1961 // Make sure that the constant operand is the second operand.
1962 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1964 if (op0_preserve_value)
1965 return;
1967 std::swap (op0, op1);
1968 cmp = swap_condition (cmp);
1971 if (CONST_INT_P (op1))
1973 /* Try to adjust the constant operand in such a way that available
1974 comparison insns can be utilized better and the constant can be
1975 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1976 constant pool. */
1977 const HOST_WIDE_INT val = INTVAL (op1);
1979 /* x > -1 --> x >= 0
1980 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1981 x <= -1 --> x < 0
1982 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1983 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1985 cmp = cmp == GT ? GE : LT;
1986 op1 = gen_int_mode (val + 1, mode);
1989 /* x >= 1 --> x > 0
1990 x >= 0x80 --> x > 0x7F
1991 x < 1 --> x <= 0
1992 x < 0x80 --> x <= 0x7F */
1993 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1995 cmp = cmp == GE ? GT : LE;
1996 op1 = gen_int_mode (val - 1, mode);
1999 /* unsigned x >= 1 --> x != 0
2000 unsigned x < 1 --> x == 0 */
2001 else if (val == 1 && (cmp == GEU || cmp == LTU))
2003 cmp = cmp == GEU ? NE : EQ;
2004 op1 = CONST0_RTX (mode);
2007 /* unsigned x >= 0x80 --> unsigned x > 0x7F
2008 unsigned x < 0x80 --> unsigned x < 0x7F */
2009 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
2011 cmp = cmp == GEU ? GTU : LEU;
2012 op1 = gen_int_mode (val - 1, mode);
2015 /* unsigned x > 0 --> x != 0
2016 unsigned x <= 0 --> x == 0 */
2017 else if (val == 0 && (cmp == GTU || cmp == LEU))
2018 cmp = cmp == GTU ? NE : EQ;
2020 /* unsigned x > 0x7FFFFFFF --> signed x < 0
2021 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
2022 else if (mode == SImode && (cmp == GTU || cmp == LEU)
2023 && val == 0x7FFFFFFF)
2025 cmp = cmp == GTU ? LT : GE;
2026 op1 = const0_rtx;
2029 /* unsigned x >= 0x80000000 --> signed x < 0
2030 unsigned x < 0x80000000 --> signed x >= 0 */
2031 else if (mode == SImode && (cmp == GEU || cmp == LTU)
2032 && (unsigned HOST_WIDE_INT)val
2033 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
2035 cmp = cmp == GEU ? LT : GE;
2036 op1 = const0_rtx;
2041 /* This function implements the canonicalize_comparison target hook.
2042 This wrapper around the internally used sh_canonicalize_comparison
2043 function is needed to do the enum rtx_code <-> int conversion.
2044 Target hooks cannot use enum rtx_code in its definition. */
2045 static void
2046 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
2047 bool op0_preserve_value)
2049 enum rtx_code tmp_code = (enum rtx_code)*code;
2050 sh_canonicalize_comparison (tmp_code, *op0, *op1,
2051 VOIDmode, op0_preserve_value);
2052 *code = (int)tmp_code;
2055 /* This function implements the legitimate_combined_insn target hook,
2056 which the combine pass uses to early reject combined insns, before
2057 it tries to recog the insn and determine its cost. */
2058 static bool
2059 sh_legitimate_combined_insn (rtx_insn* insn)
2061 /* Reject combinations of memory loads and zero extensions, as these
2062 interfere with other combine patterns such as zero extracts and bit
2063 tests. The SH2A movu.{b|w} insns are formed later in the
2064 'sh_optimize_extu_exts' pass after combine/split1. */
2065 rtx p = PATTERN (insn);
2066 if (GET_CODE (p) == SET
2067 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
2068 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
2069 && MEM_P (XEXP (XEXP (p, 1), 0)))
2070 return false;
2072 return true;
2075 bool
2076 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
2078 *p1 = T_REG;
2079 *p2 = INVALID_REGNUM;
2080 return true;
2083 enum rtx_code
2084 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2085 enum rtx_code comparison)
2087 /* The scratch reg is only available when this is invoked from within
2088 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2089 rtx scratch = NULL_RTX;
2091 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2092 comparison = GET_CODE (operands[0]);
2093 else
2094 scratch = operands[4];
2096 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2097 mode, false);
2099 /* Notice that this function is also invoked after reload by
2100 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2101 rtx op1 = operands[1];
2103 if (can_create_pseudo_p ())
2104 operands[1] = force_reg (mode, op1);
2105 /* When we are handling DImode comparisons, we want to keep constants so
2106 that we can optimize the component comparisons; however, memory loads
2107 are better issued as a whole so that they can be scheduled well.
2108 SImode equality comparisons allow I08 constants, but only when they
2109 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2110 into a register, that register might as well be r0, and we allow the
2111 constant. If it is already in a register, this is likely to be
2112 allocated to a different hard register, thus we load the constant into
2113 a register unless it is zero. */
2114 if (!REG_P (operands[2])
2115 && (!CONST_INT_P (operands[2])
2116 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2117 && ((comparison != EQ && comparison != NE)
2118 || (REG_P (op1) && REGNO (op1) != R0_REG)
2119 || !satisfies_constraint_I08 (operands[2])))))
2121 if (scratch && GET_MODE (scratch) == mode)
2123 emit_move_insn (scratch, operands[2]);
2124 operands[2] = scratch;
2126 else if (can_create_pseudo_p ())
2127 operands[2] = force_reg (mode, operands[2]);
2129 return comparison;
2132 void
2133 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2135 rtx (*branch_expander) (rtx) = gen_branch_true;
2136 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2137 switch (comparison)
2139 case NE: case LT: case LE: case LTU: case LEU:
2140 comparison = reverse_condition (comparison);
2141 branch_expander = gen_branch_false;
2142 default: ;
2144 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
2145 gen_rtx_fmt_ee (comparison, SImode,
2146 operands[1], operands[2])));
2147 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2148 if (probability >= 0)
2149 add_int_reg_note (jump, REG_BR_PROB, probability);
2152 /* ??? How should we distribute probabilities when more than one branch
2153 is generated. So far we only have some ad-hoc observations:
2154 - If the operands are random, they are likely to differ in both parts.
2155 - If comparing items in a hash chain, the operands are random or equal;
2156 operation should be EQ or NE.
2157 - If items are searched in an ordered tree from the root, we can expect
2158 the highpart to be unequal about half of the time; operation should be
2159 an inequality comparison, operands non-constant, and overall probability
2160 about 50%. Likewise for quicksort.
2161 - Range checks will be often made against constants. Even if we assume for
2162 simplicity an even distribution of the non-constant operand over a
2163 sub-range here, the same probability could be generated with differently
2164 wide sub-ranges - as long as the ratio of the part of the subrange that
2165 is before the threshold to the part that comes after the threshold stays
2166 the same. Thus, we can't really tell anything here;
2167 assuming random distribution is at least simple.
2169 bool
2170 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2172 enum rtx_code msw_taken, msw_skip, lsw_taken;
2173 rtx_code_label *skip_label = NULL;
2174 rtx op1h, op1l, op2h, op2l;
2175 int num_branches;
2176 int prob, rev_prob;
2177 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2178 rtx scratch = operands[4];
2180 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2181 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2182 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2183 op1l = gen_lowpart (SImode, operands[1]);
2184 op2l = gen_lowpart (SImode, operands[2]);
2185 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2186 prob = split_branch_probability;
2187 rev_prob = REG_BR_PROB_BASE - prob;
2188 switch (comparison)
2190 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2191 That costs 1 cycle more when the first branch can be predicted taken,
2192 but saves us mispredicts because only one branch needs prediction.
2193 It also enables generating the cmpeqdi_t-1 pattern. */
2194 case EQ:
2195 if (TARGET_CMPEQDI_T)
2197 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2198 emit_jump_insn (gen_branch_true (operands[3]));
2199 return true;
2201 msw_skip = NE;
2202 lsw_taken = EQ;
2203 if (prob >= 0)
2205 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2206 msw_skip_prob = rev_prob;
2207 if (REG_BR_PROB_BASE <= 65535)
2208 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2209 else
2211 lsw_taken_prob
2212 = (prob
2213 ? (REG_BR_PROB_BASE
2214 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2215 / ((gcov_type) prob << 32)))
2216 : 0);
2219 break;
2220 case NE:
2221 if (TARGET_CMPEQDI_T)
2223 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2224 emit_jump_insn (gen_branch_false (operands[3]));
2225 return true;
2227 msw_taken = NE;
2228 msw_taken_prob = prob;
2229 lsw_taken = NE;
2230 lsw_taken_prob = 0;
2231 break;
2232 case GTU: case GT:
2233 msw_taken = comparison;
2234 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2235 break;
2236 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2237 msw_skip = swap_condition (msw_taken);
2238 lsw_taken = GTU;
2239 break;
2240 case GEU: case GE:
2241 if (op2l == CONST0_RTX (SImode))
2242 msw_taken = comparison;
2243 else
2245 msw_taken = comparison == GE ? GT : GTU;
2246 msw_skip = swap_condition (msw_taken);
2247 lsw_taken = GEU;
2249 break;
2250 case LTU: case LT:
2251 msw_taken = comparison;
2252 if (op2l == CONST0_RTX (SImode))
2253 break;
2254 msw_skip = swap_condition (msw_taken);
2255 lsw_taken = LTU;
2256 break;
2257 case LEU: case LE:
2258 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2259 msw_taken = comparison;
2260 else
2262 lsw_taken = LEU;
2263 if (comparison == LE)
2264 msw_taken = LT;
2265 else if (op2h != CONST0_RTX (SImode))
2266 msw_taken = LTU;
2267 else
2269 msw_skip = swap_condition (LTU);
2270 break;
2272 msw_skip = swap_condition (msw_taken);
2274 break;
2275 default: return false;
2277 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2278 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2279 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2280 if (comparison != EQ && comparison != NE && num_branches > 1)
2282 if (!CONSTANT_P (operands[2])
2283 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2284 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2286 msw_taken_prob = prob / 2U;
2287 msw_skip_prob
2288 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2289 lsw_taken_prob = prob;
2291 else
2293 msw_taken_prob = prob;
2294 msw_skip_prob = REG_BR_PROB_BASE;
2295 /* ??? If we have a constant op2h, should we use that when
2296 calculating lsw_taken_prob? */
2297 lsw_taken_prob = prob;
2300 operands[1] = op1h;
2301 operands[2] = op2h;
2302 operands[4] = NULL_RTX;
2303 if (reload_completed
2304 && ! arith_reg_or_0_operand (op2h, SImode)
2305 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2306 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2307 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2309 emit_move_insn (scratch, operands[2]);
2310 operands[2] = scratch;
2312 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2313 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2314 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2316 rtx taken_label = operands[3];
2318 /* Operands were possibly modified, but msw_skip doesn't expect this.
2319 Always use the original ones. */
2320 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2322 operands[1] = op1h;
2323 operands[2] = op2h;
2324 if (reload_completed
2325 && ! arith_reg_or_0_operand (op2h, SImode)
2326 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2328 emit_move_insn (scratch, operands[2]);
2329 operands[2] = scratch;
2333 operands[3] = skip_label = gen_label_rtx ();
2334 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2335 operands[3] = taken_label;
2337 operands[1] = op1l;
2338 operands[2] = op2l;
2339 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2341 if (reload_completed
2342 && ! arith_reg_or_0_operand (op2l, SImode)
2343 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2345 emit_move_insn (scratch, operands[2]);
2346 operands[2] = scratch;
2348 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2350 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2351 emit_label (skip_label);
2352 return true;
2355 /* Given an operand, return 1 if the evaluated operand plugged into an
2356 if_then_else will result in a branch_true, 0 if branch_false, or
2357 -1 if neither nor applies. The truth table goes like this:
2359 op | cmpval | code | result
2360 ---------+--------+---------+--------------------
2361 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2362 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2363 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2364 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2365 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2366 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2367 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2368 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2370 sh_eval_treg_value (rtx op)
2372 if (t_reg_operand (op, GET_MODE (op)))
2373 return 1;
2374 if (negt_reg_operand (op, GET_MODE (op)))
2375 return 0;
2377 rtx_code code = GET_CODE (op);
2378 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2379 return -1;
2381 int cmpop = code == EQ ? 1 : 0;
2382 int cmpval = INTVAL (XEXP (op, 1));
2383 if (cmpval != 0 && cmpval != 1)
2384 return -1;
2386 int t;
2387 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2388 t = 0;
2389 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2390 t = 1;
2391 else
2392 return -1;
2394 return t ^ (cmpval == cmpop);
2397 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2398 of floating-point comparisons. */
2399 static void
2400 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2402 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2403 && GET_CODE (insn) != PARALLEL)
2405 insn = gen_rtx_PARALLEL (VOIDmode,
2406 gen_rtvec (3, insn,
2407 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2408 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2410 emit_insn (insn);
2413 /* Prepare the operands for an scc instruction; make sure that the
2414 compare has been done and the result is in T_REG. */
2415 void
2416 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2418 rtx t_reg = get_t_reg_rtx ();
2419 enum rtx_code oldcode = code;
2420 machine_mode mode;
2422 /* First need a compare insn. */
2423 switch (code)
2425 case NE:
2426 /* It isn't possible to handle this case. */
2427 gcc_unreachable ();
2428 case LT:
2429 code = GT;
2430 break;
2431 case LE:
2432 code = GE;
2433 break;
2434 case LTU:
2435 code = GTU;
2436 break;
2437 case LEU:
2438 code = GEU;
2439 break;
2440 default:
2441 break;
2443 if (code != oldcode)
2444 std::swap (op0, op1);
2446 mode = GET_MODE (op0);
2447 if (mode == VOIDmode)
2448 mode = GET_MODE (op1);
2450 op0 = force_reg (mode, op0);
2451 if ((code != EQ && code != NE
2452 && (op1 != const0_rtx
2453 || code == GTU || code == GEU || code == LTU || code == LEU))
2454 || (mode == DImode && op1 != const0_rtx)
2455 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2456 op1 = force_reg (mode, op1);
2458 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2459 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2460 mode);
2464 sh_emit_cheap_store_flag (machine_mode mode, enum rtx_code code,
2465 rtx op0, rtx op1)
2467 rtx target = gen_reg_rtx (SImode);
2468 rtx tmp;
2470 gcc_assert (TARGET_SHMEDIA);
2471 switch (code)
2473 case EQ:
2474 case GT:
2475 case LT:
2476 case UNORDERED:
2477 case GTU:
2478 case LTU:
2479 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2480 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2481 code = NE;
2482 break;
2484 case NE:
2485 case GE:
2486 case LE:
2487 case ORDERED:
2488 case GEU:
2489 case LEU:
2490 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2491 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2492 code = EQ;
2493 break;
2495 case UNEQ:
2496 case UNGE:
2497 case UNGT:
2498 case UNLE:
2499 case UNLT:
2500 case LTGT:
2501 return NULL_RTX;
2503 default:
2504 gcc_unreachable ();
2507 if (mode == DImode)
2509 rtx t2 = gen_reg_rtx (DImode);
2510 emit_insn (gen_extendsidi2 (t2, target));
2511 target = t2;
2514 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2517 /* Called from the md file, set up the operands of a compare instruction. */
2518 void
2519 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2521 enum rtx_code code = GET_CODE (operands[0]);
2522 enum rtx_code branch_code;
2523 rtx op0 = operands[1];
2524 rtx op1 = operands[2];
2525 rtx insn;
2526 bool need_ccmpeq = false;
2528 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2530 op0 = force_reg (mode, op0);
2531 op1 = force_reg (mode, op1);
2533 else
2535 if (code != EQ || mode == DImode)
2537 /* Force args into regs, since we can't use constants here. */
2538 op0 = force_reg (mode, op0);
2539 if (op1 != const0_rtx || code == GTU || code == GEU)
2540 op1 = force_reg (mode, op1);
2544 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2546 if (code == LT
2547 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2548 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2550 std::swap (op0, op1);
2551 code = swap_condition (code);
2554 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2555 if (code == GE)
2557 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2558 need_ccmpeq = true;
2559 code = GT;
2562 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2563 to EQ/GT respectively. */
2564 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2567 switch (code)
2569 case EQ:
2570 case GT:
2571 case GE:
2572 case GTU:
2573 case GEU:
2574 branch_code = code;
2575 break;
2576 case NE:
2577 case LT:
2578 case LE:
2579 case LTU:
2580 case LEU:
2581 branch_code = reverse_condition (code);
2582 break;
2583 default:
2584 gcc_unreachable ();
2587 insn = gen_rtx_SET (VOIDmode,
2588 get_t_reg_rtx (),
2589 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2591 sh_emit_set_t_insn (insn, mode);
2592 if (need_ccmpeq)
2593 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2595 if (branch_code == code)
2596 emit_jump_insn (gen_branch_true (operands[3]));
2597 else
2598 emit_jump_insn (gen_branch_false (operands[3]));
2601 void
2602 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2604 enum rtx_code code = GET_CODE (operands[1]);
2605 rtx op0 = operands[2];
2606 rtx op1 = operands[3];
2607 rtx_code_label *lab = NULL;
2608 bool invert = false;
2610 op0 = force_reg (mode, op0);
2611 if ((code != EQ && code != NE
2612 && (op1 != const0_rtx
2613 || code == GTU || code == GEU || code == LTU || code == LEU))
2614 || (mode == DImode && op1 != const0_rtx)
2615 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2616 op1 = force_reg (mode, op1);
2618 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2620 if (code == LT || code == LE)
2622 std::swap (op0, op1);
2623 code = swap_condition (code);
2625 if (code == GE)
2627 if (TARGET_IEEE)
2629 lab = gen_label_rtx ();
2630 sh_emit_scc_to_t (EQ, op0, op1);
2631 emit_jump_insn (gen_branch_true (lab));
2632 code = GT;
2634 else
2636 code = LT;
2637 invert = true;
2642 if (code == NE)
2644 code = EQ;
2645 invert = true;
2648 sh_emit_scc_to_t (code, op0, op1);
2649 if (lab)
2650 emit_label (lab);
2651 if (invert)
2652 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2653 else
2654 emit_move_insn (operands[0], get_t_reg_rtx ());
2657 /* Functions to output assembly code. */
2659 /* Return a sequence of instructions to perform DI or DF move.
2661 Since the SH cannot move a DI or DF in one instruction, we have
2662 to take care when we see overlapping source and dest registers. */
2663 const char *
2664 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2665 machine_mode mode)
2667 rtx dst = operands[0];
2668 rtx src = operands[1];
2670 if (MEM_P (dst)
2671 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2672 return "mov.l %T1,%0" "\n"
2673 " mov.l %1,%0";
2675 if (register_operand (dst, mode)
2676 && register_operand (src, mode))
2678 if (REGNO (src) == MACH_REG)
2679 return "sts mach,%S0" "\n"
2680 " sts macl,%R0";
2682 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2683 when mov.d r1,r0 do r1->r0 then r2->r1. */
2684 if (REGNO (src) + 1 == REGNO (dst))
2685 return "mov %T1,%T0" "\n"
2686 " mov %1,%0";
2687 else
2688 return "mov %1,%0" "\n"
2689 " mov %T1,%T0";
2691 else if (CONST_INT_P (src))
2693 if (INTVAL (src) < 0)
2694 output_asm_insn ("mov #-1,%S0", operands);
2695 else
2696 output_asm_insn ("mov #0,%S0", operands);
2698 return "mov %1,%R0";
2700 else if (MEM_P (src))
2702 int ptrreg = -1;
2703 int dreg = REGNO (dst);
2704 rtx inside = XEXP (src, 0);
2706 switch (GET_CODE (inside))
2708 case REG:
2709 ptrreg = REGNO (inside);
2710 break;
2712 case SUBREG:
2713 ptrreg = subreg_regno (inside);
2714 break;
2716 case PLUS:
2717 ptrreg = REGNO (XEXP (inside, 0));
2718 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2719 an offsettable address. Unfortunately, offsettable addresses use
2720 QImode to check the offset, and a QImode offsettable address
2721 requires r0 for the other operand, which is not currently
2722 supported, so we can't use the 'o' constraint.
2723 Thus we must check for and handle r0+REG addresses here.
2724 We punt for now, since this is likely very rare. */
2725 gcc_assert (!REG_P (XEXP (inside, 1)));
2726 break;
2728 case LABEL_REF:
2729 return "mov.l %1,%0" "\n"
2730 " mov.l %1+4,%T0";
2731 case POST_INC:
2732 return "mov.l %1,%0" "\n"
2733 " mov.l %1,%T0";
2734 default:
2735 gcc_unreachable ();
2738 /* Work out the safe way to copy. Copy into the second half first. */
2739 if (dreg == ptrreg)
2740 return "mov.l %T1,%T0" "\n"
2741 " mov.l %1,%0";
2744 return "mov.l %1,%0" "\n"
2745 " mov.l %T1,%T0";
2748 /* Print an instruction which would have gone into a delay slot after
2749 another instruction, but couldn't because the other instruction expanded
2750 into a sequence where putting the slot insn at the end wouldn't work. */
2751 static void
2752 print_slot (rtx_sequence *seq)
2754 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2756 seq->insn (1)->set_deleted ();
2759 const char *
2760 output_far_jump (rtx_insn *insn, rtx op)
2762 struct { rtx lab, reg, op; } this_jmp;
2763 rtx_code_label *braf_base_lab = NULL;
2764 const char *jump;
2765 int far;
2766 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2767 rtx_insn *prev;
2769 this_jmp.lab = gen_label_rtx ();
2771 if (TARGET_SH2
2772 && offset >= -32764
2773 && offset - get_attr_length (insn) <= 32766)
2775 far = 0;
2776 jump = "mov.w %O0,%1" "\n"
2777 " braf %1";
2779 else
2781 far = 1;
2782 if (flag_pic)
2784 if (TARGET_SH2)
2785 jump = "mov.l %O0,%1" "\n"
2786 " braf %1";
2787 else
2788 jump = "mov.l r0,@-r15" "\n"
2789 " mova %O0,r0" "\n"
2790 " mov.l @r0,%1" "\n"
2791 " add r0,%1" "\n"
2792 " mov.l @r15+,r0" "\n"
2793 " jmp @%1";
2795 else
2796 jump = "mov.l %O0,%1" "\n"
2797 " jmp @%1";
2799 /* If we have a scratch register available, use it. */
2800 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2801 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2803 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2804 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2805 jump = "mov.l r1,@-r15" "\n"
2806 " mova %O0,r0" "\n"
2807 " mov.l @r0,r1" "\n"
2808 " add r1,r0" "\n"
2809 " mov.l @r15+,r1" "\n"
2810 " jmp @%1";
2811 output_asm_insn (jump, &this_jmp.lab);
2812 if (dbr_sequence_length ())
2813 print_slot (final_sequence);
2814 else
2815 output_asm_insn ("nop", 0);
2817 else
2819 /* Output the delay slot insn first if any. */
2820 if (dbr_sequence_length ())
2821 print_slot (final_sequence);
2823 this_jmp.reg = gen_rtx_REG (SImode, 13);
2824 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2825 Fortunately, MACL is fixed and call-clobbered, and we never
2826 need its value across jumps, so save r13 in it instead of in
2827 the stack. */
2828 if (TARGET_SH5)
2829 output_asm_insn ("lds r13,macl", 0);
2830 else
2831 output_asm_insn ("mov.l r13,@-r15", 0);
2832 output_asm_insn (jump, &this_jmp.lab);
2833 if (TARGET_SH5)
2834 output_asm_insn ("sts macl,r13", 0);
2835 else
2836 output_asm_insn ("mov.l @r15+,r13", 0);
2838 if (far && flag_pic && TARGET_SH2)
2840 braf_base_lab = gen_label_rtx ();
2841 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2842 CODE_LABEL_NUMBER (braf_base_lab));
2844 if (far)
2845 output_asm_insn (".align 2", 0);
2846 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2847 this_jmp.op = op;
2848 if (far && flag_pic)
2850 if (TARGET_SH2)
2851 this_jmp.lab = braf_base_lab;
2852 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2854 else
2855 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2856 return "";
2859 /* Local label counter, used for constants in the pool and inside
2860 pattern branches. */
2861 static int lf = 100;
2863 /* Output code for ordinary branches. */
2864 const char *
2865 output_branch (int logic, rtx_insn *insn, rtx *operands)
2867 switch (get_attr_length (insn))
2869 case 6:
2870 /* This can happen if filling the delay slot has caused a forward
2871 branch to exceed its range (we could reverse it, but only
2872 when we know we won't overextend other branches; this should
2873 best be handled by relaxation).
2874 It can also happen when other condbranches hoist delay slot insn
2875 from their destination, thus leading to code size increase.
2876 But the branch will still be in the range -4092..+4098 bytes. */
2877 if (! TARGET_RELAX)
2879 int label = lf++;
2880 /* The call to print_slot will clobber the operands. */
2881 rtx op0 = operands[0];
2883 /* If the instruction in the delay slot is annulled (true), then
2884 there is no delay slot where we can put it now. The only safe
2885 place for it is after the label. final will do that by default. */
2887 if (final_sequence
2888 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2889 && get_attr_length (final_sequence->insn (1)))
2891 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2892 ASSEMBLER_DIALECT ? "/" : ".", label);
2893 print_slot (final_sequence);
2895 else
2896 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2898 output_asm_insn ("bra\t%l0", &op0);
2899 fprintf (asm_out_file, "\tnop\n");
2900 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2902 return "";
2904 /* When relaxing, handle this like a short branch. The linker
2905 will fix it up if it still doesn't fit after relaxation. */
2906 case 2:
2907 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2909 /* These are for SH2e, in which we have to account for the
2910 extra nop because of the hardware bug in annulled branches. */
2911 case 8:
2912 if (! TARGET_RELAX)
2914 int label = lf++;
2916 gcc_assert (!final_sequence
2917 || !(INSN_ANNULLED_BRANCH_P
2918 (XVECEXP (final_sequence, 0, 0))));
2919 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2920 logic ? "f" : "t",
2921 ASSEMBLER_DIALECT ? "/" : ".", label);
2922 fprintf (asm_out_file, "\tnop\n");
2923 output_asm_insn ("bra\t%l0", operands);
2924 fprintf (asm_out_file, "\tnop\n");
2925 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2927 return "";
2929 /* When relaxing, fall through. */
2930 case 4:
2932 char buffer[10];
2934 sprintf (buffer, "b%s%ss\t%%l0",
2935 logic ? "t" : "f",
2936 ASSEMBLER_DIALECT ? "/" : ".");
2937 output_asm_insn (buffer, &operands[0]);
2938 return "nop";
2941 default:
2942 /* There should be no longer branches now - that would
2943 indicate that something has destroyed the branches set
2944 up in machine_dependent_reorg. */
2945 gcc_unreachable ();
2949 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2950 fill in operands 9 as a label to the successor insn.
2951 We try to use jump threading where possible.
2952 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2953 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2954 follow jmp and bt, if the address is in range. */
2955 const char *
2956 output_branchy_insn (enum rtx_code code, const char *templ,
2957 rtx_insn *insn, rtx *operands)
2959 rtx_insn *next_insn = NEXT_INSN (insn);
2961 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2963 rtx src = SET_SRC (PATTERN (next_insn));
2964 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2966 /* Following branch not taken */
2967 rtx_code_label *lab = gen_label_rtx ();
2968 emit_label_after (lab, next_insn);
2969 INSN_ADDRESSES_NEW (lab,
2970 INSN_ADDRESSES (INSN_UID (next_insn))
2971 + get_attr_length (next_insn));
2972 operands[9] = lab;
2973 return templ;
2975 else
2977 int offset = (branch_dest (next_insn)
2978 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2979 if (offset >= -252 && offset <= 258)
2981 if (GET_CODE (src) == IF_THEN_ELSE)
2982 /* branch_true */
2983 src = XEXP (src, 1);
2984 operands[9] = src;
2985 return templ;
2989 rtx_code_label *lab = gen_label_rtx ();
2990 emit_label_after (lab, insn);
2991 INSN_ADDRESSES_NEW (lab,
2992 INSN_ADDRESSES (INSN_UID (insn))
2993 + get_attr_length (insn));
2994 operands[9] = lab;
2995 return templ;
2998 const char *
2999 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
3001 return output_branchy_insn (NE, "bt %l9" "\n"
3002 " fcmp/eq %1,%0",
3003 insn, operands);
3006 /* Output the start of the assembler file. */
3007 static void
3008 sh_file_start (void)
3010 default_file_start ();
3012 if (TARGET_ELF)
3013 /* We need to show the text section with the proper
3014 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
3015 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
3016 will complain. We can teach GAS specifically about the
3017 default attributes for our choice of text section, but
3018 then we would have to change GAS again if/when we change
3019 the text section name. */
3020 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
3021 else
3022 /* Switch to the data section so that the coffsem symbol
3023 isn't in the text section. */
3024 switch_to_section (data_section);
3026 if (TARGET_LITTLE_ENDIAN)
3027 fputs ("\t.little\n", asm_out_file);
3029 if (!TARGET_ELF)
3031 if (TARGET_SHCOMPACT)
3032 fputs ("\t.mode\tSHcompact\n", asm_out_file);
3033 else if (TARGET_SHMEDIA)
3034 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
3035 TARGET_SHMEDIA64 ? 64 : 32);
3039 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
3040 static bool
3041 unspec_caller_rtx_p (rtx pat)
3043 rtx base, offset;
3044 int i;
3046 split_const (pat, &base, &offset);
3047 if (GET_CODE (base) == UNSPEC)
3049 if (XINT (base, 1) == UNSPEC_CALLER)
3050 return true;
3051 for (i = 0; i < XVECLEN (base, 0); i++)
3052 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
3053 return true;
3055 return false;
3058 /* Indicate that INSN cannot be duplicated. This is true for insn
3059 that generates a unique label. */
3060 static bool
3061 sh_cannot_copy_insn_p (rtx_insn *insn)
3063 rtx pat;
3065 if (!reload_completed || !flag_pic)
3066 return false;
3068 if (!NONJUMP_INSN_P (insn))
3069 return false;
3070 if (asm_noperands (insn) >= 0)
3071 return false;
3073 pat = PATTERN (insn);
3074 if (GET_CODE (pat) != SET)
3075 return false;
3076 pat = SET_SRC (pat);
3078 if (unspec_caller_rtx_p (pat))
3079 return true;
3081 return false;
3084 /* Number of instructions used to make an arithmetic right shift by N. */
3085 static const char ashiftrt_insns[] =
3086 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3088 /* Description of a logical left or right shift, when expanded to a sequence
3089 of 1/2/8/16 shifts.
3090 Notice that one bit right shifts clobber the T bit. One bit left shifts
3091 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3092 enum
3094 ASHL_CLOBBERS_T = 1 << 0,
3095 LSHR_CLOBBERS_T = 1 << 1
3098 struct ashl_lshr_sequence
3100 char insn_count;
3101 signed char amount[6];
3102 char clobbers_t;
3105 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3107 { 0, { 0 }, 0 }, // 0
3108 { 1, { 1 }, LSHR_CLOBBERS_T },
3109 { 1, { 2 }, 0 },
3110 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3111 { 2, { 2, 2 }, 0 }, // 4
3112 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3113 { 3, { 2, 2, 2 }, 0 },
3114 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3115 { 1, { 8 }, 0 }, // 8
3116 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3117 { 2, { 8, 2 }, 0 },
3118 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3119 { 3, { 8, 2, 2 }, 0 }, // 12
3120 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3121 { 3, { 8, -2, 8 }, 0 },
3122 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3123 { 1, { 16 }, 0 }, // 16
3124 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3125 { 2, { 16, 2 }, 0 },
3126 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3127 { 3, { 16, 2, 2 }, 0 }, // 20
3128 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3129 { 3, { 16, -2, 8 }, 0 },
3130 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3131 { 2, { 16, 8 }, 0 }, // 24
3132 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3133 { 3, { 16, 8, 2 }, 0 },
3134 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3135 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3136 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3137 { 3, { 16, -2, 16 }, 0 },
3139 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3140 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3141 However, the shift-and combiner code needs this entry here to be in
3142 terms of real shift insns. */
3143 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3146 /* Individual shift amounts for shift amounts < 16, up to three highmost
3147 bits might be clobbered. This is typically used when combined with some
3148 kind of sign or zero extension. */
3149 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3151 { 0, { 0 }, 0 }, // 0
3152 { 1, { 1 }, LSHR_CLOBBERS_T },
3153 { 1, { 2 }, 0 },
3154 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3155 { 2, { 2, 2 }, 0 }, // 4
3156 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3157 { 2, { 8, -2 }, 0 },
3158 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3159 { 1, { 8 }, 0 }, // 8
3160 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3161 { 2, { 8, 2 }, 0 },
3162 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3163 { 3, { 8, 2, 2 }, 0 }, // 12
3164 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3165 { 2, { 16, -2 }, 0 },
3166 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3167 { 1, { 16 }, 0 }, // 16
3168 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3169 { 2, { 16, 2 }, 0 },
3170 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3171 { 3, { 16, 2, 2 }, 0 }, // 20
3172 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3173 { 3, { 16, -2, 8 }, 0 },
3174 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3175 { 2, { 16, 8 }, 0 }, // 24
3176 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3177 { 3, { 16, 8, 2 }, 0 },
3178 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3179 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3180 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3181 { 3, { 16, -2, 16 }, 0 },
3182 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3185 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3186 will clobber the T bit. */
3187 bool
3188 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3190 gcc_assert (CONST_INT_P (shift_amount));
3192 const int shift_amount_i = INTVAL (shift_amount) & 31;
3194 /* Special case for shift count of 31: use and-rotl sequence. */
3195 if (shift_amount_i == 31)
3196 return true;
3198 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3199 & ASHL_CLOBBERS_T) != 0;
3202 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3203 instructions will clobber the T bit. */
3204 bool
3205 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3207 gcc_assert (CONST_INT_P (shift_amount));
3209 const int shift_amount_i = INTVAL (shift_amount) & 31;
3211 /* Special case for shift count of 31: use shll-movt sequence. */
3212 if (shift_amount_i == 31)
3213 return true;
3215 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3216 & LSHR_CLOBBERS_T) != 0;
3219 /* Return true if it is potentially beneficial to use a dynamic shift
3220 instruction (shad / shar) instead of a combination of 1/2/8/16
3221 shift instructions for the specified shift count.
3222 If dynamic shifts are not available, always return false. */
3223 bool
3224 sh_dynamicalize_shift_p (rtx count)
3226 gcc_assert (CONST_INT_P (count));
3228 const int shift_amount_i = INTVAL (count) & 31;
3229 int insn_count;
3231 /* For left and right shifts, there are shorter 2 insn sequences for
3232 shift amounts of 31. */
3233 if (shift_amount_i == 31)
3234 insn_count = 2;
3235 else
3236 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3238 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3241 /* Assuming we have a value that has been sign-extended by at least one bit,
3242 can we use the ext_shift_amounts with the last shift turned to an
3243 arithmetic shift to shift it by N without data loss, and quicker than by
3244 other means? */
3245 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3247 /* Return the cost of a shift. */
3248 static inline int
3249 shiftcosts (rtx x)
3251 int value;
3253 if (TARGET_SHMEDIA)
3254 return 1;
3256 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3258 if (GET_MODE (x) == DImode
3259 && CONST_INT_P (XEXP (x, 1))
3260 && INTVAL (XEXP (x, 1)) == 1)
3261 return 2;
3263 /* Everything else is invalid, because there is no pattern for it. */
3264 return -1;
3266 /* If shift by a non constant, then this will be expensive. */
3267 if (!CONST_INT_P (XEXP (x, 1)))
3268 return SH_DYNAMIC_SHIFT_COST;
3270 /* Otherwise, return the true cost in instructions. Cope with out of range
3271 shift counts more or less arbitrarily. */
3272 value = INTVAL (XEXP (x, 1)) & 31;
3274 if (GET_CODE (x) == ASHIFTRT)
3276 int cost = ashiftrt_insns[value];
3277 /* If dynamic shifts are available and profitable in this case, then we
3278 put the constant in a reg and use shad. */
3279 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3280 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3281 return cost;
3283 else
3284 return ashl_lshr_seq[value].insn_count;
3287 /* Return the cost of an AND/XOR/IOR operation. */
3288 static inline int
3289 and_xor_ior_costs (rtx x, int code)
3291 /* On SH1-4 we have only max. SImode operations.
3292 Double the cost for modes > SImode. */
3293 const int cost_scale = !TARGET_SHMEDIA
3294 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3295 ? 2 : 1;
3297 /* A logical operation with two registers is a single cycle
3298 instruction. */
3299 if (!CONST_INT_P (XEXP (x, 1)))
3300 return 1 * cost_scale;
3302 int i = INTVAL (XEXP (x, 1));
3304 if (TARGET_SHMEDIA)
3306 if (satisfies_constraint_I10 (XEXP (x, 1))
3307 || satisfies_constraint_J16 (XEXP (x, 1)))
3308 return 1;
3309 else
3310 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3313 /* These constants are single cycle extu.[bw] instructions. */
3314 if ((i == 0xff || i == 0xffff) && code == AND)
3315 return 1 * cost_scale;
3316 /* Constants that can be used in an instruction as an immediate are
3317 a single cycle, but this requires r0, so make it a little more
3318 expensive. */
3319 if (CONST_OK_FOR_K08 (i))
3320 return 2 * cost_scale;
3321 /* Constants that can be loaded with a mov immediate need one more cycle.
3322 This case is probably unnecessary. */
3323 if (CONST_OK_FOR_I08 (i))
3324 return 2 * cost_scale;
3325 /* Any other constant requires an additional 2 cycle pc-relative load.
3326 This case is probably unnecessary. */
3327 return 3 * cost_scale;
3330 /* Return the cost of an addition or a subtraction. */
3331 static inline int
3332 addsubcosts (rtx x)
3334 if (GET_MODE (x) == SImode)
3336 /* The addc or subc patterns will eventually become one or two
3337 instructions. Below are some costs for some of the patterns
3338 which combine would reject because the costs of the individual
3339 insns in the patterns are lower.
3341 FIXME: It would be much easier if we had something like insn cost
3342 attributes and the cost calculation machinery used those attributes
3343 in the first place. This would eliminate redundant recog-like C
3344 code to calculate costs of complex patterns. */
3345 rtx op0 = XEXP (x, 0);
3346 rtx op1 = XEXP (x, 1);
3348 if (GET_CODE (x) == PLUS)
3350 if (GET_CODE (op0) == AND
3351 && XEXP (op0, 1) == const1_rtx
3352 && (GET_CODE (op1) == PLUS
3353 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3354 return 1;
3356 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3357 && GET_CODE (op1) == LSHIFTRT
3358 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3359 return 1;
3361 /* Let's assume that adding the result of an insns that stores into
3362 the T bit is cheap. */
3363 if (treg_set_expr (op1, SImode))
3364 return 1;
3365 if (treg_set_expr (op0, SImode))
3366 return 1;
3369 /* On SH1-4 we have only max. SImode operations.
3370 Double the cost for modes > SImode. */
3371 const int cost_scale = !TARGET_SHMEDIA
3372 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3373 ? 2 : 1;
3375 /* Adding a register is a single cycle insn. */
3376 if (REG_P (XEXP (x, 1))
3377 || GET_CODE (XEXP (x, 1)) == SUBREG)
3378 return 1 * cost_scale;
3380 /* Likewise for small constants. */
3381 if (CONST_INT_P (XEXP (x, 1))
3382 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3383 return 1 * cost_scale;
3385 if (TARGET_SHMEDIA)
3386 switch (GET_CODE (XEXP (x, 1)))
3388 case CONST:
3389 case LABEL_REF:
3390 case SYMBOL_REF:
3391 return TARGET_SHMEDIA64 ? 5 : 3;
3393 case CONST_INT:
3394 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3395 return 2;
3396 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3397 return 3;
3398 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3399 return 4;
3401 /* Fall through. */
3402 default:
3403 return 5;
3406 /* Any other constant requires a 2 cycle pc-relative load plus an
3407 addition. */
3408 return 3 * cost_scale;
3411 /* Return the cost of a multiply. */
3412 static inline int
3413 multcosts (rtx x ATTRIBUTE_UNUSED)
3415 if (sh_multcost >= 0)
3416 return sh_multcost;
3417 if (TARGET_SHMEDIA)
3418 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3419 accept constants. Ideally, we would use a cost of one or two and
3420 add the cost of the operand, but disregard the latter when inside loops
3421 and loop invariant code motion is still to follow.
3422 Using a multiply first and splitting it later if it's a loss
3423 doesn't work because of different sign / zero extension semantics
3424 of multiplies vs. shifts. */
3425 return optimize_size ? 2 : 3;
3427 if (TARGET_SH2)
3429 /* We have a mul insn, so we can never take more than the mul and the
3430 read of the mac reg, but count more because of the latency and extra
3431 reg usage. */
3432 if (optimize_size)
3433 return 2;
3434 return 3;
3437 /* If we're aiming at small code, then just count the number of
3438 insns in a multiply call sequence. */
3439 if (optimize_size)
3440 return 5;
3442 /* Otherwise count all the insns in the routine we'd be calling too. */
3443 return 20;
3446 /* Compute a (partial) cost for rtx X. Return true if the complete
3447 cost has been computed, and false if subexpressions should be
3448 scanned. In either case, *TOTAL contains the cost result. */
3449 static bool
3450 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3451 int *total, bool speed ATTRIBUTE_UNUSED)
3453 switch (code)
3455 /* The lower-subreg pass decides whether to split multi-word regs
3456 into individual regs by looking at the cost for a SET of certain
3457 modes with the following patterns:
3458 (set (reg) (reg))
3459 (set (reg) (const_int 0))
3460 On machines that support vector-move operations a multi-word move
3461 is the same cost as individual reg move. On SH there is no
3462 vector-move, so we have to provide the correct cost in the number
3463 of move insns to load/store the reg of the mode in question. */
3464 case SET:
3465 if (register_operand (SET_DEST (x), VOIDmode)
3466 && (register_operand (SET_SRC (x), VOIDmode)
3467 || satisfies_constraint_Z (SET_SRC (x))))
3469 const machine_mode mode = GET_MODE (SET_DEST (x));
3470 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3471 / mov_insn_size (mode, TARGET_SH2A));
3472 return true;
3474 return false;
3476 /* The cost of a mem access is mainly the cost of the address mode. */
3477 case MEM:
3478 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3479 true);
3480 return true;
3482 case IF_THEN_ELSE:
3483 /* This case is required for the if_then_else negc pattern. */
3484 if (treg_set_expr (XEXP (x, 0), SImode))
3486 *total = COSTS_N_INSNS (1);
3487 return true;
3489 else
3490 return false;
3492 /* Zero extracts of single bits are usually combine patterns for the
3493 tst insns. */
3494 case ZERO_EXTRACT:
3495 if (GET_CODE (XEXP (x, 0)) == XOR
3496 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3497 && XEXP (x, 1) == const1_rtx
3498 && CONST_INT_P (XEXP (x, 2))
3499 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3500 /* Check that the xor constaint overlaps with the extracted bit. */
3501 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3503 *total = 1; //COSTS_N_INSNS (1);
3504 return true;
3506 return false;
3508 /* The cost of a sign or zero extend depends on whether the source is a
3509 reg or a mem. In case of a mem take the address into acount. */
3510 case SIGN_EXTEND:
3511 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3513 *total = COSTS_N_INSNS (1);
3514 return true;
3516 if (MEM_P (XEXP (x, 0)))
3518 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3519 GET_MODE (XEXP (x, 0)),
3520 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3521 return true;
3523 return false;
3525 case ZERO_EXTEND:
3526 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3528 *total = COSTS_N_INSNS (1);
3529 return true;
3531 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3532 && (GET_MODE (XEXP (x, 0)) == QImode
3533 || GET_MODE (XEXP (x, 0)) == HImode))
3535 /* Handle SH2A's movu.b and movu.w insn. */
3536 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3537 GET_MODE (XEXP (x, 0)),
3538 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3539 return true;
3541 return false;
3543 /* mems for SFmode and DFmode can be inside a parallel due to
3544 the way the fpscr is handled. */
3545 case PARALLEL:
3546 for (int i = 0; i < XVECLEN (x, 0); i++)
3548 rtx xx = XVECEXP (x, 0, i);
3549 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3551 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3552 GET_MODE (XEXP (xx, 0)),
3553 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3554 return true;
3556 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3558 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3559 GET_MODE (XEXP (xx, 1)),
3560 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3561 return true;
3565 if (sh_1el_vec (x, VOIDmode))
3566 *total = outer_code != SET;
3567 else if (sh_rep_vec (x, VOIDmode))
3568 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3569 + (outer_code != SET));
3570 else
3571 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3572 return true;
3574 case CONST_INT:
3575 if (TARGET_SHMEDIA)
3577 if (INTVAL (x) == 0)
3578 *total = 0;
3579 else if (outer_code == AND && and_operand ((x), DImode))
3580 *total = 0;
3581 else if ((outer_code == IOR || outer_code == XOR
3582 || outer_code == PLUS)
3583 && CONST_OK_FOR_I10 (INTVAL (x)))
3584 *total = 0;
3585 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3586 *total = COSTS_N_INSNS (outer_code != SET);
3587 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3588 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3589 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3590 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3591 else
3592 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3593 return true;
3595 if (CONST_OK_FOR_I08 (INTVAL (x)))
3596 *total = 0;
3597 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3598 && CONST_OK_FOR_K08 (INTVAL (x)))
3599 *total = 1;
3600 /* prepare_cmp_insn will force costly constants int registers before
3601 the cbranch[sd]i4 patterns can see them, so preserve potentially
3602 interesting ones not covered by I08 above. */
3603 else if (outer_code == COMPARE
3604 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3605 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3606 || INTVAL (x) == 0x7fffffff
3607 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3608 *total = 1;
3609 else
3610 *total = 8;
3611 return true;
3613 case EQ:
3614 /* An and with a constant compared against zero is
3615 most likely going to be a TST #imm, R0 instruction.
3616 Notice that this does not catch the zero_extract variants from
3617 the md file. */
3618 if (XEXP (x, 1) == const0_rtx
3619 && (GET_CODE (XEXP (x, 0)) == AND
3620 || (SUBREG_P (XEXP (x, 0))
3621 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND)))
3623 *total = 1;
3624 return true;
3627 else if (XEXP (x, 1) == const0_rtx
3628 && GET_CODE (XEXP (x, 0)) == AND
3629 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3630 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3631 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3632 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3634 *total = 1;
3635 return true;
3637 else
3638 return false;
3640 case SMIN:
3641 case SMAX:
3642 /* This is most likely a clips.b or clips.w insn that is being made up
3643 by combine. */
3644 if (TARGET_SH2A
3645 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3646 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3647 && REG_P (XEXP (XEXP (x, 0), 0))
3648 && CONST_INT_P (XEXP (x, 1)))
3650 *total = COSTS_N_INSNS (1);
3651 return true;
3653 else
3654 return false;
3656 case CONST:
3657 case LABEL_REF:
3658 case SYMBOL_REF:
3659 if (TARGET_SHMEDIA64)
3660 *total = COSTS_N_INSNS (4);
3661 else if (TARGET_SHMEDIA32)
3662 *total = COSTS_N_INSNS (2);
3663 else
3664 *total = 5;
3665 return true;
3667 case CONST_DOUBLE:
3668 if (TARGET_SHMEDIA)
3669 *total = COSTS_N_INSNS (4);
3670 /* prepare_cmp_insn will force costly constants int registers before
3671 the cbranchdi4 pattern can see them, so preserve potentially
3672 interesting ones. */
3673 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3674 *total = 1;
3675 else
3676 *total = 10;
3677 return true;
3679 case CONST_VECTOR:
3680 /* FIXME: This looks broken. Only the last statement has any effect.
3681 Probably this could be folded with the PARALLEL case? */
3682 if (x == CONST0_RTX (GET_MODE (x)))
3683 *total = 0;
3684 else if (sh_1el_vec (x, VOIDmode))
3685 *total = outer_code != SET;
3686 if (sh_rep_vec (x, VOIDmode))
3687 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3688 + (outer_code != SET));
3689 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3690 return true;
3692 case PLUS:
3693 case MINUS:
3694 *total = COSTS_N_INSNS (addsubcosts (x));
3695 return true;
3697 case AND:
3698 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3699 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3701 *total = COSTS_N_INSNS (1);
3702 return true;
3704 /* Fall through. */
3706 case XOR:
3707 case IOR:
3708 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3709 return true;
3711 case MULT:
3712 *total = COSTS_N_INSNS (multcosts (x));
3713 return true;
3715 case LT:
3716 case GE:
3717 /* div0s sign comparison. */
3718 if (GET_CODE (XEXP (x, 0)) == XOR
3719 && REG_P ((XEXP (XEXP (x, 0), 0)))
3720 && REG_P ((XEXP (XEXP (x, 0), 1)))
3721 && satisfies_constraint_Z (XEXP (x, 1)))
3723 *total = COSTS_N_INSNS (1);
3724 return true;
3726 else
3727 return false;
3729 case LSHIFTRT:
3730 /* div0s sign comparison. */
3731 if (GET_CODE (XEXP (x, 0)) == XOR
3732 && REG_P ((XEXP (XEXP (x, 0), 0)))
3733 && REG_P ((XEXP (XEXP (x, 0), 1)))
3734 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3736 *total = COSTS_N_INSNS (1);
3737 return true;
3739 /* Fall through to shiftcosts. */
3740 case ASHIFT:
3741 case ASHIFTRT:
3743 int cost = shiftcosts (x);
3744 if (cost < 0)
3745 return false;
3746 *total = COSTS_N_INSNS (cost);
3747 return true;
3750 case DIV:
3751 case UDIV:
3752 case MOD:
3753 case UMOD:
3754 *total = COSTS_N_INSNS (20);
3755 return true;
3757 case FLOAT:
3758 case FIX:
3759 *total = 100;
3760 return true;
3762 default:
3763 return false;
3767 /* Determine the size of the fundamental move insn that will be used
3768 for the specified mode. */
3769 static inline int
3770 mov_insn_size (machine_mode mode, bool consider_sh2a)
3772 const int mode_sz = GET_MODE_SIZE (mode);
3774 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3775 || (TARGET_FMOVD && mode == DFmode))
3776 return mode_sz;
3777 else
3779 /* The max. available mode for actual move insns is SImode.
3780 Larger accesses will be split into multiple loads/stores. */
3781 const int max_mov_sz = GET_MODE_SIZE (SImode);
3782 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3786 /* Determine the maximum possible displacement for a move insn for the
3787 specified mode. */
3789 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3791 /* The 4 byte displacement move insns are the same as the 2 byte
3792 versions but take a 12 bit displacement. All we need to do is to
3793 scale the max. displacement value accordingly. */
3794 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3796 /* SH2A supports FPU move insns with 12 bit displacements.
3797 Other variants to do not support any kind of displacements for
3798 FPU move insns. */
3799 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3800 return 0;
3801 else
3803 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3804 const int mode_sz = GET_MODE_SIZE (mode);
3805 int r = 15 * mov_insn_sz * disp_scale;
3807 /* If the mov insn will be split into multiple loads/stores, the
3808 maximum possible displacement is a bit smaller. */
3809 if (mode_sz > mov_insn_sz)
3810 r -= mode_sz - mov_insn_sz;
3811 return r;
3815 /* Determine the alignment mask for a move insn of the
3816 specified mode. */
3817 static inline int
3818 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3820 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3821 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3824 /* Return the displacement value of a displacement address. */
3825 HOST_WIDE_INT
3826 sh_disp_addr_displacement (rtx x)
3828 gcc_assert (satisfies_constraint_Sdd (x));
3829 return INTVAL (XEXP (XEXP (x, 0), 1));
3832 /* Compute the cost of an address. */
3833 static int
3834 sh_address_cost (rtx x, machine_mode mode,
3835 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3837 /* 'GBR + 0'. Account one more because of R0 restriction. */
3838 if (REG_P (x) && REGNO (x) == GBR_REG)
3839 return 2;
3841 /* Simple reg, post-inc, pre-dec addressing. */
3842 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3843 return 1;
3845 /* 'reg + disp' addressing. */
3846 if (GET_CODE (x) == PLUS
3847 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3849 /* 'GBR + disp'. Account one more because of R0 restriction. */
3850 if (REGNO (XEXP (x, 0)) == GBR_REG
3851 && gbr_displacement (XEXP (x, 1), mode))
3852 return 2;
3854 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3856 if (offset == 0)
3857 return 1;
3859 /* The displacement would fit into a 2 byte move insn.
3860 HImode and QImode loads/stores with displacement put pressure on
3861 R0 which will most likely require another reg copy. Thus account
3862 a higher cost for that. */
3863 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3864 return (mode == HImode || mode == QImode) ? 2 : 1;
3866 /* The displacement would fit into a 4 byte move insn (SH2A). */
3867 if (TARGET_SH2A
3868 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3869 return 2;
3871 /* The displacement is probably out of range and will require extra
3872 calculations. */
3873 return 3;
3876 /* 'reg + reg' addressing. Account a slightly higher cost because of
3877 increased pressure on R0. */
3878 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3879 && ! TARGET_SHMEDIA)
3880 return 3;
3882 /* Not sure what it is - probably expensive. */
3883 return 10;
3886 /* Code to expand a shift. */
3887 static void
3888 gen_ashift (int type, int n, rtx reg)
3890 rtx n_rtx;
3892 /* Negative values here come from the shift_amounts array. */
3893 if (n < 0)
3895 if (type == ASHIFT)
3896 type = LSHIFTRT;
3897 else
3898 type = ASHIFT;
3899 n = -n;
3902 n_rtx = GEN_INT (n);
3903 gcc_assert (satisfies_constraint_P27 (n_rtx));
3905 switch (type)
3907 case ASHIFTRT:
3908 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3909 break;
3910 case LSHIFTRT:
3911 if (n == 1)
3912 emit_insn (gen_shlr (reg, reg));
3913 else
3914 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3915 break;
3916 case ASHIFT:
3917 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3918 break;
3919 default:
3920 gcc_unreachable ();
3924 /* Code to expand a HImode shift. */
3925 static void
3926 gen_ashift_hi (int type, int n, rtx reg)
3928 /* Negative values here come from the shift_amounts array. */
3929 if (n < 0)
3931 if (type == ASHIFT)
3932 type = LSHIFTRT;
3933 else
3934 type = ASHIFT;
3935 n = -n;
3938 switch (type)
3940 case ASHIFTRT:
3941 case LSHIFTRT:
3942 /* We don't have HImode right shift operations because using the
3943 ordinary 32 bit shift instructions for that doesn't generate proper
3944 zero/sign extension.
3945 gen_ashift_hi is only called in contexts where we know that the
3946 sign extension works out correctly. */
3948 int offset = 0;
3949 if (GET_CODE (reg) == SUBREG)
3951 offset = SUBREG_BYTE (reg);
3952 reg = SUBREG_REG (reg);
3954 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3955 break;
3957 case ASHIFT:
3958 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3959 break;
3963 /* Output RTL to split a constant shift into its component SH constant
3964 shift instructions. */
3965 void
3966 gen_shifty_op (int code, rtx *operands)
3968 int value = INTVAL (operands[2]);
3969 int max, i;
3971 /* Truncate the shift count in case it is out of bounds. */
3972 value = value & 31;
3974 if (value == 31)
3976 if (code == LSHIFTRT)
3978 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3979 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3980 return;
3982 else if (code == ASHIFT)
3984 /* There is a two instruction sequence for 31 bit left shifts,
3985 but it requires r0. */
3986 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3988 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3989 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3990 return;
3994 else if (value == 0)
3996 /* This can happen even when optimizing, if there were subregs before
3997 reload. Don't output a nop here, as this is never optimized away;
3998 use a no-op move instead. */
3999 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
4000 return;
4003 max = ashl_lshr_seq[value].insn_count;
4004 for (i = 0; i < max; i++)
4005 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
4008 /* Same as gen_shifty_op, but optimized for values where the topmost bits
4009 don't matter. */
4010 void
4011 gen_shifty_hi_op (int code, rtx *operands)
4013 int value = INTVAL (operands[2]);
4014 int max, i;
4015 void (*gen_fun) (int, int, rtx);
4017 /* This operation is used by and_shl for SImode values with a few
4018 high bits known to be cleared. */
4019 value &= 31;
4020 if (value == 0)
4022 emit_insn (gen_nop ());
4023 return;
4026 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
4027 if (code == ASHIFT)
4029 max = ext_ashl_lshr_seq[value].insn_count;
4030 for (i = 0; i < max; i++)
4031 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4033 else
4034 /* When shifting right, emit the shifts in reverse order, so that
4035 solitary negative values come first. */
4036 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
4037 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4040 /* Output RTL for an arithmetic right shift.
4041 ??? Rewrite to use super-optimizer sequences. */
4042 bool
4043 expand_ashiftrt (rtx *operands)
4045 rtx wrk;
4046 char func[18];
4047 int value;
4049 if (TARGET_DYNSHIFT)
4051 if (!CONST_INT_P (operands[2]))
4053 rtx count = copy_to_mode_reg (SImode, operands[2]);
4054 emit_insn (gen_negsi2 (count, count));
4055 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4056 return true;
4058 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
4059 > 1 + SH_DYNAMIC_SHIFT_COST)
4061 rtx count
4062 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
4063 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4064 return true;
4067 if (!CONST_INT_P (operands[2]))
4068 return false;
4070 value = INTVAL (operands[2]) & 31;
4072 if (value == 31)
4074 /* If we are called from abs expansion, arrange things so that we
4075 we can use a single MT instruction that doesn't clobber the source,
4076 if LICM can hoist out the load of the constant zero. */
4077 if (currently_expanding_to_rtl)
4079 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
4080 operands[1]));
4081 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
4082 return true;
4084 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
4085 return true;
4087 else if (value >= 16 && value <= 19)
4089 wrk = gen_reg_rtx (SImode);
4090 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
4091 value -= 16;
4092 while (value--)
4093 gen_ashift (ASHIFTRT, 1, wrk);
4094 emit_move_insn (operands[0], wrk);
4095 return true;
4097 /* Expand a short sequence inline, longer call a magic routine. */
4098 else if (value <= 5)
4100 wrk = gen_reg_rtx (SImode);
4101 emit_move_insn (wrk, operands[1]);
4102 while (value--)
4103 gen_ashift (ASHIFTRT, 1, wrk);
4104 emit_move_insn (operands[0], wrk);
4105 return true;
4108 wrk = gen_reg_rtx (Pmode);
4110 /* Load the value into an arg reg and call a helper. */
4111 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
4112 sprintf (func, "__ashiftrt_r4_%d", value);
4113 function_symbol (wrk, func, SFUNC_STATIC);
4114 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
4115 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
4116 return true;
4119 /* Try to find a good way to implement the combiner pattern
4120 [(set (match_operand:SI 0 "register_operand" "r")
4121 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4122 (match_operand:SI 2 "const_int_operand" "n"))
4123 (match_operand:SI 3 "const_int_operand" "n"))) .
4124 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
4125 return 0 for simple right / left or left/right shift combination.
4126 return 1 for a combination of shifts with zero_extend.
4127 return 2 for a combination of shifts with an AND that needs r0.
4128 return 3 for a combination of shifts with an AND that needs an extra
4129 scratch register, when the three highmost bits of the AND mask are clear.
4130 return 4 for a combination of shifts with an AND that needs an extra
4131 scratch register, when any of the three highmost bits of the AND mask
4132 is set.
4133 If ATTRP is set, store an initial right shift width in ATTRP[0],
4134 and the instruction length in ATTRP[1] . These values are not valid
4135 when returning 0.
4136 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
4137 shift_amounts for the last shift value that is to be used before the
4138 sign extend. */
4140 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
4142 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
4143 int left = INTVAL (left_rtx), right;
4144 int best = 0;
4145 int cost, best_cost = 10000;
4146 int best_right = 0, best_len = 0;
4147 int i;
4148 int can_ext;
4150 if (left < 0 || left > 31)
4151 return 0;
4152 if (CONST_INT_P (mask_rtx))
4153 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
4154 else
4155 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
4156 /* Can this be expressed as a right shift / left shift pair? */
4157 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
4158 right = exact_log2 (lsb);
4159 mask2 = ~(mask + lsb - 1);
4160 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4161 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4162 if (! mask2)
4163 best_cost = ashl_lshr_seq[right].insn_count
4164 + ashl_lshr_seq[right + left].insn_count;
4165 /* mask has no trailing zeroes <==> ! right */
4166 else if (! right && mask2 == ~(lsb2 - 1))
4168 int late_right = exact_log2 (lsb2);
4169 best_cost = ashl_lshr_seq[left + late_right].insn_count
4170 + ashl_lshr_seq[late_right].insn_count;
4172 /* Try to use zero extend. */
4173 if (mask2 == ~(lsb2 - 1))
4175 int width, first;
4177 for (width = 8; width <= 16; width += 8)
4179 /* Can we zero-extend right away? */
4180 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4182 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4183 + ext_ashl_lshr_seq[left + right].insn_count;
4184 if (cost < best_cost)
4186 best = 1;
4187 best_cost = cost;
4188 best_right = right;
4189 best_len = cost;
4190 if (attrp)
4191 attrp[2] = -1;
4193 continue;
4195 /* ??? Could try to put zero extend into initial right shift,
4196 or even shift a bit left before the right shift. */
4197 /* Determine value of first part of left shift, to get to the
4198 zero extend cut-off point. */
4199 first = width - exact_log2 (lsb2) + right;
4200 if (first >= 0 && right + left - first >= 0)
4202 cost = ext_ashl_lshr_seq[right].insn_count
4203 + ext_ashl_lshr_seq[first].insn_count + 1
4204 + ext_ashl_lshr_seq[right + left - first].insn_count;
4206 if (cost < best_cost)
4208 best = 1;
4209 best_cost = cost;
4210 best_right = right;
4211 best_len = cost;
4212 if (attrp)
4213 attrp[2] = first;
4218 /* Try to use r0 AND pattern */
4219 for (i = 0; i <= 2; i++)
4221 if (i > right)
4222 break;
4223 if (! CONST_OK_FOR_K08 (mask >> i))
4224 continue;
4225 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4226 if (cost < best_cost)
4228 best = 2;
4229 best_cost = cost;
4230 best_right = i;
4231 best_len = cost - 1;
4234 /* Try to use a scratch register to hold the AND operand. */
4235 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4236 for (i = 0; i <= 2; i++)
4238 if (i > right)
4239 break;
4240 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4241 + (can_ext
4242 ? ext_ashl_lshr_seq
4243 : ashl_lshr_seq)[left + i].insn_count;
4244 if (cost < best_cost)
4246 best = 4 - can_ext;
4247 best_cost = cost;
4248 best_right = i;
4249 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4253 if (attrp)
4255 attrp[0] = best_right;
4256 attrp[1] = best_len;
4258 return best;
4261 /* This is used in length attributes of the unnamed instructions
4262 corresponding to shl_and_kind return values of 1 and 2. */
4264 shl_and_length (rtx insn)
4266 rtx set_src, left_rtx, mask_rtx;
4267 int attributes[3];
4269 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4270 left_rtx = XEXP (XEXP (set_src, 0), 1);
4271 mask_rtx = XEXP (set_src, 1);
4272 shl_and_kind (left_rtx, mask_rtx, attributes);
4273 return attributes[1];
4276 /* This is used in length attribute of the and_shl_scratch instruction. */
4278 shl_and_scr_length (rtx insn)
4280 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4281 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4282 rtx op = XEXP (set_src, 0);
4283 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4284 op = XEXP (XEXP (op, 0), 0);
4285 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4288 /* Generate rtl for instructions for which shl_and_kind advised a particular
4289 method of generating them, i.e. returned zero. */
4290 bool
4291 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4293 int attributes[3];
4294 unsigned HOST_WIDE_INT mask;
4295 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4296 int right, total_shift;
4297 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4299 right = attributes[0];
4300 total_shift = INTVAL (left_rtx) + right;
4301 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4302 switch (kind)
4304 default:
4305 return true;
4306 case 1:
4308 int first = attributes[2];
4309 rtx operands[3];
4311 if (first < 0)
4313 emit_insn ((mask << right) <= 0xff
4314 ? gen_zero_extendqisi2 (dest,
4315 gen_lowpart (QImode, source))
4316 : gen_zero_extendhisi2 (dest,
4317 gen_lowpart (HImode, source)));
4318 source = dest;
4320 if (source != dest)
4321 emit_insn (gen_movsi (dest, source));
4322 operands[0] = dest;
4323 if (right)
4325 operands[2] = GEN_INT (right);
4326 gen_shifty_hi_op (LSHIFTRT, operands);
4328 if (first > 0)
4330 operands[2] = GEN_INT (first);
4331 gen_shifty_hi_op (ASHIFT, operands);
4332 total_shift -= first;
4333 mask <<= first;
4335 if (first >= 0)
4336 emit_insn (mask <= 0xff
4337 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4338 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4339 if (total_shift > 0)
4341 operands[2] = GEN_INT (total_shift);
4342 gen_shifty_hi_op (ASHIFT, operands);
4344 break;
4346 case 4:
4347 shift_gen_fun = gen_shifty_op;
4348 case 3:
4349 /* If the topmost bit that matters is set, set the topmost bits
4350 that don't matter. This way, we might be able to get a shorter
4351 signed constant. */
4352 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4353 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4354 case 2:
4355 /* Don't expand fine-grained when combining, because that will
4356 make the pattern fail. */
4357 if (currently_expanding_to_rtl
4358 || reload_in_progress || reload_completed)
4360 rtx operands[3];
4362 /* Cases 3 and 4 should be handled by this split
4363 only while combining */
4364 gcc_assert (kind <= 2);
4365 if (right)
4367 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4368 source = dest;
4370 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4371 if (total_shift)
4373 operands[0] = dest;
4374 operands[1] = dest;
4375 operands[2] = GEN_INT (total_shift);
4376 shift_gen_fun (ASHIFT, operands);
4378 break;
4380 else
4382 int neg = 0;
4383 if (kind != 4 && total_shift < 16)
4385 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4386 if (neg > 0)
4387 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4388 else
4389 neg = 0;
4391 emit_insn (gen_and_shl_scratch (dest, source,
4392 GEN_INT (right),
4393 GEN_INT (mask),
4394 GEN_INT (total_shift + neg),
4395 GEN_INT (neg)));
4396 emit_insn (gen_movsi (dest, dest));
4397 break;
4400 return false;
4403 /* Try to find a good way to implement the combiner pattern
4404 [(set (match_operand:SI 0 "register_operand" "=r")
4405 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4406 (match_operand:SI 2 "const_int_operand" "n")
4407 (match_operand:SI 3 "const_int_operand" "n")
4408 (const_int 0)))
4409 (clobber (reg:SI T_REG))]
4410 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4411 return 0 for simple left / right shift combination.
4412 return 1 for left shift / 8 bit sign extend / left shift.
4413 return 2 for left shift / 16 bit sign extend / left shift.
4414 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4415 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4416 return 5 for left shift / 16 bit sign extend / right shift
4417 return 6 for < 8 bit sign extend / left shift.
4418 return 7 for < 8 bit sign extend / left shift / single right shift.
4419 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4421 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4423 int left, size, insize, ext;
4424 int cost = 0, best_cost;
4425 int kind;
4427 left = INTVAL (left_rtx);
4428 size = INTVAL (size_rtx);
4429 insize = size - left;
4430 gcc_assert (insize > 0);
4431 /* Default to left / right shift. */
4432 kind = 0;
4433 best_cost = ashl_lshr_seq[32 - insize].insn_count
4434 + ashl_lshr_seq[32 - size].insn_count;
4435 if (size <= 16)
4437 /* 16 bit shift / sign extend / 16 bit shift */
4438 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4439 + ashl_lshr_seq[16 - size].insn_count;
4440 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4441 below, by alternative 3 or something even better. */
4442 if (cost < best_cost)
4444 kind = 5;
4445 best_cost = cost;
4448 /* Try a plain sign extend between two shifts. */
4449 for (ext = 16; ext >= insize; ext -= 8)
4451 if (ext <= size)
4453 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4454 + ashl_lshr_seq[size - ext].insn_count;
4455 if (cost < best_cost)
4457 kind = ext / (unsigned) 8;
4458 best_cost = cost;
4461 /* Check if we can do a sloppy shift with a final signed shift
4462 restoring the sign. */
4463 if (EXT_SHIFT_SIGNED (size - ext))
4464 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4465 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4466 /* If not, maybe it's still cheaper to do the second shift sloppy,
4467 and do a final sign extend? */
4468 else if (size <= 16)
4469 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4470 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4471 + 1;
4472 else
4473 continue;
4474 if (cost < best_cost)
4476 kind = ext / (unsigned) 8 + 2;
4477 best_cost = cost;
4480 /* Check if we can sign extend in r0 */
4481 if (insize < 8)
4483 cost = 3 + ashl_lshr_seq[left].insn_count;
4484 if (cost < best_cost)
4486 kind = 6;
4487 best_cost = cost;
4489 /* Try the same with a final signed shift. */
4490 if (left < 31)
4492 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4493 if (cost < best_cost)
4495 kind = 7;
4496 best_cost = cost;
4500 if (TARGET_DYNSHIFT)
4502 /* Try to use a dynamic shift. */
4503 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4504 if (cost < best_cost)
4506 kind = 0;
4507 best_cost = cost;
4510 if (costp)
4511 *costp = cost;
4512 return kind;
4515 /* Function to be used in the length attribute of the instructions
4516 implementing this pattern. */
4518 shl_sext_length (rtx insn)
4520 rtx set_src, left_rtx, size_rtx;
4521 int cost;
4523 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4524 left_rtx = XEXP (XEXP (set_src, 0), 1);
4525 size_rtx = XEXP (set_src, 1);
4526 shl_sext_kind (left_rtx, size_rtx, &cost);
4527 return cost;
4530 /* Generate rtl for this pattern */
4531 bool
4532 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4534 int kind;
4535 int left, size, insize, cost;
4536 rtx operands[3];
4538 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4539 left = INTVAL (left_rtx);
4540 size = INTVAL (size_rtx);
4541 insize = size - left;
4542 switch (kind)
4544 case 1:
4545 case 2:
4546 case 3:
4547 case 4:
4549 int ext = kind & 1 ? 8 : 16;
4550 int shift2 = size - ext;
4552 /* Don't expand fine-grained when combining, because that will
4553 make the pattern fail. */
4554 if (! currently_expanding_to_rtl
4555 && ! reload_in_progress && ! reload_completed)
4557 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4558 emit_insn (gen_movsi (dest, source));
4559 break;
4561 if (dest != source)
4562 emit_insn (gen_movsi (dest, source));
4563 operands[0] = dest;
4564 if (ext - insize)
4566 operands[2] = GEN_INT (ext - insize);
4567 gen_shifty_hi_op (ASHIFT, operands);
4569 emit_insn (kind & 1
4570 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4571 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4572 if (kind <= 2)
4574 if (shift2)
4576 operands[2] = GEN_INT (shift2);
4577 gen_shifty_op (ASHIFT, operands);
4580 else
4582 if (shift2 > 0)
4584 if (EXT_SHIFT_SIGNED (shift2))
4586 operands[2] = GEN_INT (shift2 + 1);
4587 gen_shifty_op (ASHIFT, operands);
4588 operands[2] = const1_rtx;
4589 gen_shifty_op (ASHIFTRT, operands);
4590 break;
4592 operands[2] = GEN_INT (shift2);
4593 gen_shifty_hi_op (ASHIFT, operands);
4595 else if (shift2)
4597 operands[2] = GEN_INT (-shift2);
4598 gen_shifty_hi_op (LSHIFTRT, operands);
4600 emit_insn (size <= 8
4601 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4602 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4604 break;
4606 case 5:
4608 int i = 16 - size;
4609 if (! currently_expanding_to_rtl
4610 && ! reload_in_progress && ! reload_completed)
4611 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4612 else
4614 operands[0] = dest;
4615 operands[2] = GEN_INT (16 - insize);
4616 gen_shifty_hi_op (ASHIFT, operands);
4617 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4619 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4620 while (--i >= 0)
4621 gen_ashift (ASHIFTRT, 1, dest);
4622 break;
4624 case 6:
4625 case 7:
4626 /* Don't expand fine-grained when combining, because that will
4627 make the pattern fail. */
4628 if (! currently_expanding_to_rtl
4629 && ! reload_in_progress && ! reload_completed)
4631 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4632 emit_insn (gen_movsi (dest, source));
4633 break;
4635 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4636 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4637 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4638 operands[0] = dest;
4639 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4640 gen_shifty_op (ASHIFT, operands);
4641 if (kind == 7)
4642 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4643 break;
4644 default:
4645 return true;
4647 return false;
4650 /* Prefix a symbol_ref name with "datalabel". */
4652 gen_datalabel_ref (rtx sym)
4654 const char *str;
4656 if (GET_CODE (sym) == LABEL_REF)
4657 return gen_rtx_CONST (GET_MODE (sym),
4658 gen_rtx_UNSPEC (GET_MODE (sym),
4659 gen_rtvec (1, sym),
4660 UNSPEC_DATALABEL));
4662 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4664 str = XSTR (sym, 0);
4665 /* Share all SYMBOL_REF strings with the same value - that is important
4666 for cse. */
4667 str = IDENTIFIER_POINTER (get_identifier (str));
4668 XSTR (sym, 0) = str;
4670 return sym;
4674 static alloc_pool label_ref_list_pool;
4676 typedef struct label_ref_list_d
4678 rtx_code_label *label;
4679 struct label_ref_list_d *next;
4680 } *label_ref_list_t;
4682 /* The SH cannot load a large constant into a register, constants have to
4683 come from a pc relative load. The reference of a pc relative load
4684 instruction must be less than 1k in front of the instruction. This
4685 means that we often have to dump a constant inside a function, and
4686 generate code to branch around it.
4688 It is important to minimize this, since the branches will slow things
4689 down and make things bigger.
4691 Worst case code looks like:
4693 mov.l L1,rn
4694 bra L2
4696 align
4697 L1: .long value
4701 mov.l L3,rn
4702 bra L4
4704 align
4705 L3: .long value
4709 We fix this by performing a scan before scheduling, which notices which
4710 instructions need to have their operands fetched from the constant table
4711 and builds the table.
4713 The algorithm is:
4715 scan, find an instruction which needs a pcrel move. Look forward, find the
4716 last barrier which is within MAX_COUNT bytes of the requirement.
4717 If there isn't one, make one. Process all the instructions between
4718 the find and the barrier.
4720 In the above example, we can tell that L3 is within 1k of L1, so
4721 the first move can be shrunk from the 3 insn+constant sequence into
4722 just 1 insn, and the constant moved to L3 to make:
4724 mov.l L1,rn
4726 mov.l L3,rn
4727 bra L4
4729 align
4730 L3:.long value
4731 L4:.long value
4733 Then the second move becomes the target for the shortening process. */
4735 typedef struct
4737 rtx value; /* Value in table. */
4738 rtx_code_label *label; /* Label of value. */
4739 label_ref_list_t wend; /* End of window. */
4740 machine_mode mode; /* Mode of value. */
4742 /* True if this constant is accessed as part of a post-increment
4743 sequence. Note that HImode constants are never accessed in this way. */
4744 bool part_of_sequence_p;
4745 } pool_node;
4747 /* The maximum number of constants that can fit into one pool, since
4748 constants in the range 0..510 are at least 2 bytes long, and in the
4749 range from there to 1018 at least 4 bytes. */
4751 #define MAX_POOL_SIZE 372
4752 static pool_node pool_vector[MAX_POOL_SIZE];
4753 static int pool_size;
4754 static rtx_code_label *pool_window_label;
4755 static int pool_window_last;
4757 static int max_labelno_before_reorg;
4759 /* ??? If we need a constant in HImode which is the truncated value of a
4760 constant we need in SImode, we could combine the two entries thus saving
4761 two bytes. Is this common enough to be worth the effort of implementing
4762 it? */
4764 /* ??? This stuff should be done at the same time that we shorten branches.
4765 As it is now, we must assume that all branches are the maximum size, and
4766 this causes us to almost always output constant pools sooner than
4767 necessary. */
4769 /* Add a constant to the pool and return its label. */
4770 static rtx_code_label *
4771 add_constant (rtx x, machine_mode mode, rtx last_value)
4773 int i;
4774 rtx_code_label *lab, *new_rtx;
4775 label_ref_list_t ref, newref;
4777 /* First see if we've already got it. */
4778 for (i = 0; i < pool_size; i++)
4780 if (x->code == pool_vector[i].value->code
4781 && mode == pool_vector[i].mode)
4783 if (x->code == CODE_LABEL)
4785 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4786 continue;
4788 if (rtx_equal_p (x, pool_vector[i].value))
4790 lab = new_rtx = 0;
4791 if (! last_value
4792 || ! i
4793 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4795 new_rtx = gen_label_rtx ();
4796 LABEL_REFS (new_rtx) = pool_vector[i].label;
4797 pool_vector[i].label = lab = new_rtx;
4799 if (lab && pool_window_label)
4801 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4802 newref->label = pool_window_label;
4803 ref = pool_vector[pool_window_last].wend;
4804 newref->next = ref;
4805 pool_vector[pool_window_last].wend = newref;
4807 if (new_rtx)
4808 pool_window_label = new_rtx;
4809 pool_window_last = i;
4810 return lab;
4815 /* Need a new one. */
4816 pool_vector[pool_size].value = x;
4817 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4819 lab = 0;
4820 pool_vector[pool_size - 1].part_of_sequence_p = true;
4822 else
4823 lab = gen_label_rtx ();
4824 pool_vector[pool_size].mode = mode;
4825 pool_vector[pool_size].label = lab;
4826 pool_vector[pool_size].wend = NULL;
4827 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4828 if (lab && pool_window_label)
4830 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4831 newref->label = pool_window_label;
4832 ref = pool_vector[pool_window_last].wend;
4833 newref->next = ref;
4834 pool_vector[pool_window_last].wend = newref;
4836 if (lab)
4837 pool_window_label = lab;
4838 pool_window_last = pool_size;
4839 pool_size++;
4840 return lab;
4843 /* Output the literal table. START, if nonzero, is the first instruction
4844 this table is needed for, and also indicates that there is at least one
4845 casesi_worker_2 instruction; We have to emit the operand3 labels from
4846 these insns at a 4-byte aligned position. BARRIER is the barrier
4847 after which we are to place the table. */
4848 static void
4849 dump_table (rtx_insn *start, rtx_insn *barrier)
4851 rtx_insn *scan = barrier;
4852 int i;
4853 bool need_align = true;
4854 rtx lab;
4855 label_ref_list_t ref;
4856 bool have_df = false;
4858 /* Do two passes, first time dump out the HI sized constants. */
4860 for (i = 0; i < pool_size; i++)
4862 pool_node *p = &pool_vector[i];
4864 if (p->mode == HImode)
4866 if (need_align)
4868 scan = emit_insn_after (gen_align_2 (), scan);
4869 need_align = false;
4871 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4872 scan = emit_label_after (lab, scan);
4873 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4874 scan);
4875 for (ref = p->wend; ref; ref = ref->next)
4877 lab = ref->label;
4878 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4881 else if (p->mode == DFmode)
4882 have_df = true;
4885 need_align = true;
4887 if (start)
4889 scan = emit_insn_after (gen_align_4 (), scan);
4890 need_align = false;
4891 for (; start != barrier; start = NEXT_INSN (start))
4892 if (NONJUMP_INSN_P (start)
4893 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4895 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4896 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4898 scan = emit_label_after (lab, scan);
4901 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4903 rtx_insn *align_insn = NULL;
4905 scan = emit_label_after (gen_label_rtx (), scan);
4906 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4907 need_align = false;
4909 for (i = 0; i < pool_size; i++)
4911 pool_node *p = &pool_vector[i];
4913 switch (p->mode)
4915 case HImode:
4916 break;
4917 case SImode:
4918 case SFmode:
4919 if (align_insn && !p->part_of_sequence_p)
4921 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4922 emit_label_before (lab, align_insn);
4923 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4924 align_insn);
4925 for (ref = p->wend; ref; ref = ref->next)
4927 lab = ref->label;
4928 emit_insn_before (gen_consttable_window_end (lab),
4929 align_insn);
4931 delete_insn (align_insn);
4932 align_insn = NULL;
4933 continue;
4935 else
4937 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4938 scan = emit_label_after (lab, scan);
4939 scan = emit_insn_after (gen_consttable_4 (p->value,
4940 const0_rtx), scan);
4941 need_align = ! need_align;
4943 break;
4944 case DFmode:
4945 if (need_align)
4947 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4948 align_insn = scan;
4949 need_align = false;
4951 case DImode:
4952 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4953 scan = emit_label_after (lab, scan);
4954 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4955 scan);
4956 break;
4957 default:
4958 gcc_unreachable ();
4961 if (p->mode != HImode)
4963 for (ref = p->wend; ref; ref = ref->next)
4965 lab = ref->label;
4966 scan = emit_insn_after (gen_consttable_window_end (lab),
4967 scan);
4972 pool_size = 0;
4975 for (i = 0; i < pool_size; i++)
4977 pool_node *p = &pool_vector[i];
4979 switch (p->mode)
4981 case HImode:
4982 break;
4983 case SImode:
4984 case SFmode:
4985 if (need_align)
4987 need_align = false;
4988 scan = emit_label_after (gen_label_rtx (), scan);
4989 scan = emit_insn_after (gen_align_4 (), scan);
4991 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4992 scan = emit_label_after (lab, scan);
4993 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4994 scan);
4995 break;
4996 case DFmode:
4997 case DImode:
4998 if (need_align)
5000 need_align = false;
5001 scan = emit_label_after (gen_label_rtx (), scan);
5002 scan = emit_insn_after (gen_align_4 (), scan);
5004 for (lab = p->label; lab; lab = LABEL_REFS (lab))
5005 scan = emit_label_after (lab, scan);
5006 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
5007 scan);
5008 break;
5009 default:
5010 gcc_unreachable ();
5013 if (p->mode != HImode)
5015 for (ref = p->wend; ref; ref = ref->next)
5017 lab = ref->label;
5018 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
5023 scan = emit_insn_after (gen_consttable_end (), scan);
5024 scan = emit_barrier_after (scan);
5025 pool_size = 0;
5026 pool_window_label = NULL;
5027 pool_window_last = 0;
5030 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
5032 /* Nonzero if the insn is a move instruction which needs to be fixed. */
5034 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
5035 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
5036 need to fix it if the input value is CONST_OK_FOR_I08. */
5037 static bool
5038 broken_move (rtx_insn *insn)
5040 if (NONJUMP_INSN_P (insn))
5042 rtx pat = PATTERN (insn);
5043 if (GET_CODE (pat) == PARALLEL)
5044 pat = XVECEXP (pat, 0, 0);
5045 if (GET_CODE (pat) == SET
5046 /* We can load any 8-bit value if we don't care what the high
5047 order bits end up as. */
5048 && GET_MODE (SET_DEST (pat)) != QImode
5049 && (CONSTANT_P (SET_SRC (pat))
5050 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
5051 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
5052 /* Match mova_const. */
5053 || (GET_CODE (SET_SRC (pat)) == UNSPEC
5054 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
5055 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
5056 && ! (TARGET_SH2E
5057 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
5058 && (fp_zero_operand (SET_SRC (pat))
5059 || fp_one_operand (SET_SRC (pat)))
5060 /* In general we don't know the current setting of fpscr, so
5061 disable fldi.
5062 There is an exception if this was a register-register move
5063 before reload - and hence it was ascertained that we have
5064 single precision setting - and in a post-reload optimization
5065 we changed this to do a constant load. In that case
5066 we don't have an r0 clobber, hence we must use fldi. */
5067 && (TARGET_FMOVD
5068 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
5069 == SCRATCH))
5070 && REG_P (SET_DEST (pat))
5071 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
5072 && ! (TARGET_SH2A
5073 && GET_MODE (SET_DEST (pat)) == SImode
5074 && (satisfies_constraint_I20 (SET_SRC (pat))
5075 || satisfies_constraint_I28 (SET_SRC (pat))))
5076 && ! satisfies_constraint_I08 (SET_SRC (pat)))
5077 return true;
5080 return false;
5083 /* Return true if the specified insn is a mova insn. */
5084 static bool
5085 mova_p (rtx_insn *insn)
5087 return (NONJUMP_INSN_P (insn)
5088 && GET_CODE (PATTERN (insn)) == SET
5089 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
5090 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
5091 /* Don't match mova_const. */
5092 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
5095 /* Fix up a mova from a switch that went out of range. */
5096 static void
5097 fixup_mova (rtx_insn *mova)
5099 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
5100 if (! flag_pic)
5102 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
5103 INSN_CODE (mova) = -1;
5105 else
5107 rtx_insn *worker = mova;
5108 rtx_code_label *lab = gen_label_rtx ();
5109 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
5113 worker = NEXT_INSN (worker);
5114 gcc_assert (worker
5115 && !LABEL_P (worker)
5116 && !JUMP_P (worker));
5117 } while (NOTE_P (worker)
5118 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
5119 wpat = PATTERN (worker);
5120 wpat0 = XVECEXP (wpat, 0, 0);
5121 wpat1 = XVECEXP (wpat, 0, 1);
5122 wsrc = SET_SRC (wpat0);
5123 PATTERN (worker) = (gen_casesi_worker_2
5124 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
5125 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
5126 XEXP (wpat1, 0)));
5127 INSN_CODE (worker) = -1;
5128 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
5129 base = gen_rtx_LABEL_REF (Pmode, lab);
5130 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
5131 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
5132 INSN_CODE (mova) = -1;
5136 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
5137 *num_mova, and check if the new mova is not nested within the first one.
5138 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
5139 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
5140 static int
5141 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
5143 int n_addr = 0; /* Initialization to shut up spurious warning. */
5144 int f_target, n_target = 0; /* Likewise. */
5146 if (optimize)
5148 /* If NEW_MOVA has no address yet, it will be handled later. */
5149 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
5150 return -1;
5152 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
5153 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
5154 if (n_addr > n_target || n_addr + 1022 < n_target)
5156 /* Change the mova into a load.
5157 broken_move will then return true for it. */
5158 fixup_mova (new_mova);
5159 return 1;
5162 if (!(*num_mova)++)
5164 *first_mova = new_mova;
5165 return 2;
5167 if (!optimize
5168 || ((f_target
5169 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5170 >= n_target))
5171 return -1;
5173 (*num_mova)--;
5174 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5175 > n_target - n_addr)
5177 fixup_mova (*first_mova);
5178 return 0;
5180 else
5182 fixup_mova (new_mova);
5183 return 1;
5187 /* Find the last barrier from insn FROM which is close enough to hold the
5188 constant pool. If we can't find one, then create one near the end of
5189 the range. */
5190 static rtx_insn *
5191 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5193 int count_si = 0;
5194 int count_hi = 0;
5195 int found_hi = 0;
5196 int found_si = 0;
5197 int found_di = 0;
5198 int hi_align = 2;
5199 int si_align = 2;
5200 int leading_mova = num_mova;
5201 rtx_insn *barrier_before_mova = NULL;
5202 rtx_insn *found_barrier = NULL;
5203 rtx_insn *good_barrier = NULL;
5204 int si_limit;
5205 int hi_limit;
5206 rtx_insn *orig = from;
5207 rtx_insn *last_got = NULL;
5208 rtx_insn *last_symoff = NULL;
5210 /* For HImode: range is 510, add 4 because pc counts from address of
5211 second instruction after this one, subtract 2 for the jump instruction
5212 that we may need to emit before the table, subtract 2 for the instruction
5213 that fills the jump delay slot (in very rare cases, reorg will take an
5214 instruction from after the constant pool or will leave the delay slot
5215 empty). This gives 510.
5216 For SImode: range is 1020, add 4 because pc counts from address of
5217 second instruction after this one, subtract 2 in case pc is 2 byte
5218 aligned, subtract 2 for the jump instruction that we may need to emit
5219 before the table, subtract 2 for the instruction that fills the jump
5220 delay slot. This gives 1018. */
5222 /* The branch will always be shortened now that the reference address for
5223 forward branches is the successor address, thus we need no longer make
5224 adjustments to the [sh]i_limit for -O0. */
5226 si_limit = 1018;
5227 hi_limit = 510;
5229 while (from && count_si < si_limit && count_hi < hi_limit)
5231 int inc = get_attr_length (from);
5232 int new_align = 1;
5234 /* If this is a label that existed at the time of the compute_alignments
5235 call, determine the alignment. N.B. When find_barrier recurses for
5236 an out-of-reach mova, we might see labels at the start of previously
5237 inserted constant tables. */
5238 if (LABEL_P (from)
5239 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5241 if (optimize)
5242 new_align = 1 << label_to_alignment (from);
5243 else if (BARRIER_P (prev_nonnote_insn (from)))
5244 new_align = 1 << barrier_align (from);
5245 else
5246 new_align = 1;
5247 inc = 0;
5249 /* In case we are scanning a constant table because of recursion, check
5250 for explicit alignments. If the table is long, we might be forced
5251 to emit the new table in front of it; the length of the alignment
5252 might be the last straw. */
5253 else if (NONJUMP_INSN_P (from)
5254 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5255 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5256 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5257 /* When we find the end of a constant table, paste the new constant
5258 at the end. That is better than putting it in front because
5259 this way, we don't need extra alignment for adding a 4-byte-aligned
5260 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5261 else if (NONJUMP_INSN_P (from)
5262 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5263 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5264 return from;
5266 if (BARRIER_P (from))
5268 rtx_insn *next;
5270 found_barrier = from;
5272 /* If we are at the end of the function, or in front of an alignment
5273 instruction, we need not insert an extra alignment. We prefer
5274 this kind of barrier. */
5275 if (barrier_align (from) > 2)
5276 good_barrier = from;
5278 /* If we are at the end of a hot/cold block, dump the constants
5279 here. */
5280 next = NEXT_INSN (from);
5281 if (next
5282 && NOTE_P (next)
5283 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5284 break;
5287 if (broken_move (from))
5289 rtx pat, src, dst;
5290 machine_mode mode;
5292 pat = PATTERN (from);
5293 if (GET_CODE (pat) == PARALLEL)
5294 pat = XVECEXP (pat, 0, 0);
5295 src = SET_SRC (pat);
5296 dst = SET_DEST (pat);
5297 mode = GET_MODE (dst);
5299 /* GOT pcrelat setting comes in pair of
5300 mova .L8,r0
5301 mov.l .L8,r12
5302 instructions. (plus add r0,r12).
5303 Remember if we see one without the other. */
5304 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5305 last_got = last_got ? NULL : from;
5306 else if (PIC_ADDR_P (src))
5307 last_got = last_got ? NULL : from;
5309 /* We must explicitly check the mode, because sometimes the
5310 front end will generate code to load unsigned constants into
5311 HImode targets without properly sign extending them. */
5312 if (mode == HImode
5313 || (mode == SImode && satisfies_constraint_I16 (src)
5314 && REGNO (dst) != FPUL_REG))
5316 found_hi += 2;
5317 /* We put the short constants before the long constants, so
5318 we must count the length of short constants in the range
5319 for the long constants. */
5320 /* ??? This isn't optimal, but is easy to do. */
5321 si_limit -= 2;
5323 else
5325 /* We dump DF/DI constants before SF/SI ones, because
5326 the limit is the same, but the alignment requirements
5327 are higher. We may waste up to 4 additional bytes
5328 for alignment, and the DF/DI constant may have
5329 another SF/SI constant placed before it. */
5330 if (TARGET_SHCOMPACT
5331 && ! found_di
5332 && (mode == DFmode || mode == DImode))
5334 found_di = 1;
5335 si_limit -= 8;
5337 while (si_align > 2 && found_si + si_align - 2 > count_si)
5338 si_align >>= 1;
5339 if (found_si > count_si)
5340 count_si = found_si;
5341 found_si += GET_MODE_SIZE (mode);
5342 if (num_mova)
5343 si_limit -= GET_MODE_SIZE (mode);
5347 if (mova_p (from))
5349 switch (untangle_mova (&num_mova, &mova, from))
5351 case 1:
5352 if (flag_pic)
5354 rtx src = SET_SRC (PATTERN (from));
5355 if (GET_CODE (src) == CONST
5356 && GET_CODE (XEXP (src, 0)) == UNSPEC
5357 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5358 last_symoff = from;
5360 break;
5361 case 0: return find_barrier (0, 0, mova);
5362 case 2:
5364 leading_mova = 0;
5365 barrier_before_mova
5366 = good_barrier ? good_barrier : found_barrier;
5368 default: break;
5370 if (found_si > count_si)
5371 count_si = found_si;
5373 else if (JUMP_TABLE_DATA_P (from)
5374 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5376 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5377 || (num_mova
5378 && (prev_nonnote_insn (from)
5379 == XEXP (MOVA_LABELREF (mova), 0))))
5380 num_mova--;
5381 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5383 /* We have just passed the barrier in front of the
5384 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5385 the ADDR_DIFF_VEC is accessed as data, just like our pool
5386 constants, this is a good opportunity to accommodate what
5387 we have gathered so far.
5388 If we waited any longer, we could end up at a barrier in
5389 front of code, which gives worse cache usage for separated
5390 instruction / data caches. */
5391 good_barrier = found_barrier;
5392 break;
5394 else
5396 rtx body = PATTERN (from);
5397 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5400 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5401 else if (JUMP_P (from)
5402 && ! TARGET_SH2
5403 && ! optimize_size)
5404 new_align = 4;
5406 /* There is a possibility that a bf is transformed into a bf/s by the
5407 delay slot scheduler. */
5408 if (JUMP_P (from)
5409 && get_attr_type (from) == TYPE_CBRANCH
5410 && ! sequence_insn_p (from))
5411 inc += 2;
5413 if (found_si)
5415 count_si += inc;
5416 if (new_align > si_align)
5418 si_limit -= (count_si - 1) & (new_align - si_align);
5419 si_align = new_align;
5421 count_si = (count_si + new_align - 1) & -new_align;
5423 if (found_hi)
5425 count_hi += inc;
5426 if (new_align > hi_align)
5428 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5429 hi_align = new_align;
5431 count_hi = (count_hi + new_align - 1) & -new_align;
5433 from = NEXT_INSN (from);
5436 if (num_mova)
5438 if (leading_mova)
5440 /* Try as we might, the leading mova is out of range. Change
5441 it into a load (which will become a pcload) and retry. */
5442 fixup_mova (mova);
5443 return find_barrier (0, 0, mova);
5445 else
5447 /* Insert the constant pool table before the mova instruction,
5448 to prevent the mova label reference from going out of range. */
5449 from = mova;
5450 good_barrier = found_barrier = barrier_before_mova;
5454 if (found_barrier)
5456 if (good_barrier && next_real_insn (found_barrier))
5457 found_barrier = good_barrier;
5459 else
5461 /* We didn't find a barrier in time to dump our stuff,
5462 so we'll make one. */
5463 rtx_code_label *label = gen_label_rtx ();
5465 /* Don't emit a constant table in the middle of insns for
5466 casesi_worker_2. This is a bit overkill but is enough
5467 because casesi_worker_2 wouldn't appear so frequently. */
5468 if (last_symoff)
5469 from = last_symoff;
5471 /* If we exceeded the range, then we must back up over the last
5472 instruction we looked at. Otherwise, we just need to undo the
5473 NEXT_INSN at the end of the loop. */
5474 if (PREV_INSN (from) != orig
5475 && (count_hi > hi_limit || count_si > si_limit))
5476 from = PREV_INSN (PREV_INSN (from));
5477 else
5478 from = PREV_INSN (from);
5480 /* Don't emit a constant table int the middle of global pointer setting,
5481 since that that would move the addressing base GOT into another table.
5482 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5483 in the pool anyway, so just move up the whole constant pool.
5485 However, avoid doing so when the last single GOT mov is the starting
5486 insn itself. Going past above the start insn would create a negative
5487 offset, causing errors. */
5488 if (last_got && last_got != orig)
5489 from = PREV_INSN (last_got);
5491 /* Don't insert the constant pool table at the position which
5492 may be the landing pad. */
5493 if (flag_exceptions
5494 && CALL_P (from)
5495 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5496 from = PREV_INSN (from);
5498 /* Walk back to be just before any jump or label.
5499 Putting it before a label reduces the number of times the branch
5500 around the constant pool table will be hit. Putting it before
5501 a jump makes it more likely that the bra delay slot will be
5502 filled. */
5503 while (NOTE_P (from) || JUMP_P (from)
5504 || LABEL_P (from))
5505 from = PREV_INSN (from);
5507 /* Make sure we do not split between a call and its corresponding
5508 CALL_ARG_LOCATION note. */
5509 if (CALL_P (from))
5511 rtx_insn *next = NEXT_INSN (from);
5512 if (next && NOTE_P (next)
5513 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5514 from = next;
5517 from = emit_jump_insn_after (gen_jump (label), from);
5518 JUMP_LABEL (from) = label;
5519 LABEL_NUSES (label) = 1;
5520 found_barrier = emit_barrier_after (from);
5521 emit_label_after (label, found_barrier);
5524 return found_barrier;
5527 /* If the instruction INSN is implemented by a special function, and we can
5528 positively find the register that is used to call the sfunc, and this
5529 register is not used anywhere else in this instruction - except as the
5530 destination of a set, return this register; else, return 0. */
5532 sfunc_uses_reg (rtx_insn *insn)
5534 int i;
5535 rtx pattern, part, reg_part, reg;
5537 if (!NONJUMP_INSN_P (insn))
5538 return NULL_RTX;
5539 pattern = PATTERN (insn);
5540 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5541 return NULL_RTX;
5543 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5545 part = XVECEXP (pattern, 0, i);
5546 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5547 reg_part = part;
5549 if (! reg_part)
5550 return NULL_RTX;
5551 reg = XEXP (reg_part, 0);
5552 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5554 part = XVECEXP (pattern, 0, i);
5555 if (part == reg_part || GET_CODE (part) == CLOBBER)
5556 continue;
5557 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5558 && REG_P (SET_DEST (part)))
5559 ? SET_SRC (part) : part)))
5560 return NULL_RTX;
5562 return reg;
5565 /* See if the only way in which INSN uses REG is by calling it, or by
5566 setting it while calling it. Set *SET to a SET rtx if the register
5567 is set by INSN. */
5568 static bool
5569 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5571 rtx pattern, reg2;
5573 *set = NULL_RTX;
5575 reg2 = sfunc_uses_reg (insn);
5576 if (reg2 && REGNO (reg2) == REGNO (reg))
5578 pattern = single_set (insn);
5579 if (pattern
5580 && REG_P (SET_DEST (pattern))
5581 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5582 *set = pattern;
5583 return false;
5585 if (!CALL_P (insn))
5587 /* We don't use rtx_equal_p because we don't care if the mode is
5588 different. */
5589 pattern = single_set (insn);
5590 if (pattern
5591 && REG_P (SET_DEST (pattern))
5592 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5594 rtx par, part;
5595 int i;
5597 *set = pattern;
5598 par = PATTERN (insn);
5599 if (GET_CODE (par) == PARALLEL)
5600 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5602 part = XVECEXP (par, 0, i);
5603 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5604 return true;
5606 return reg_mentioned_p (reg, SET_SRC (pattern));
5609 return true;
5612 pattern = PATTERN (insn);
5614 if (GET_CODE (pattern) == PARALLEL)
5616 int i;
5618 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5619 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5620 return true;
5621 pattern = XVECEXP (pattern, 0, 0);
5624 if (GET_CODE (pattern) == SET)
5626 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5628 /* We don't use rtx_equal_p, because we don't care if the
5629 mode is different. */
5630 if (!REG_P (SET_DEST (pattern))
5631 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5632 return true;
5634 *set = pattern;
5637 pattern = SET_SRC (pattern);
5640 if (GET_CODE (pattern) != CALL
5641 || !MEM_P (XEXP (pattern, 0))
5642 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5643 return true;
5645 return false;
5648 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5649 general registers. Bits 0..15 mean that the respective registers
5650 are used as inputs in the instruction. Bits 16..31 mean that the
5651 registers 0..15, respectively, are used as outputs, or are clobbered.
5652 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5654 regs_used (rtx x, int is_dest)
5656 enum rtx_code code;
5657 const char *fmt;
5658 int i, used = 0;
5660 if (! x)
5661 return used;
5662 code = GET_CODE (x);
5663 switch (code)
5665 case REG:
5666 if (REGNO (x) < 16)
5667 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5668 << (REGNO (x) + is_dest));
5669 return 0;
5670 case SUBREG:
5672 rtx y = SUBREG_REG (x);
5674 if (!REG_P (y))
5675 break;
5676 if (REGNO (y) < 16)
5677 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5678 << (REGNO (y) +
5679 subreg_regno_offset (REGNO (y),
5680 GET_MODE (y),
5681 SUBREG_BYTE (x),
5682 GET_MODE (x)) + is_dest));
5683 return 0;
5685 case SET:
5686 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5687 case RETURN:
5688 /* If there was a return value, it must have been indicated with USE. */
5689 return 0x00ffff00;
5690 case CLOBBER:
5691 is_dest = 1;
5692 break;
5693 case MEM:
5694 is_dest = 0;
5695 break;
5696 case CALL:
5697 used |= 0x00ff00f0;
5698 break;
5699 default:
5700 break;
5703 fmt = GET_RTX_FORMAT (code);
5705 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5707 if (fmt[i] == 'E')
5709 int j;
5710 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5711 used |= regs_used (XVECEXP (x, i, j), is_dest);
5713 else if (fmt[i] == 'e')
5714 used |= regs_used (XEXP (x, i), is_dest);
5716 return used;
5719 /* Create an instruction that prevents redirection of a conditional branch
5720 to the destination of the JUMP with address ADDR.
5721 If the branch needs to be implemented as an indirect jump, try to find
5722 a scratch register for it.
5723 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5724 If any preceding insn that doesn't fit into a delay slot is good enough,
5725 pass 1. Pass 2 if a definite blocking insn is needed.
5726 -1 is used internally to avoid deep recursion.
5727 If a blocking instruction is made or recognized, return it. */
5728 static rtx_insn *
5729 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5731 int dead = 0;
5732 rtx_insn *prev = prev_nonnote_insn (jump);
5733 rtx dest;
5735 /* First, check if we already have an instruction that satisfies our need. */
5736 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5738 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5739 return prev;
5740 if (GET_CODE (PATTERN (prev)) == USE
5741 || GET_CODE (PATTERN (prev)) == CLOBBER
5742 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5743 prev = jump;
5744 else if ((need_block &= ~1) < 0)
5745 return prev;
5746 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5747 need_block = 0;
5749 if (GET_CODE (PATTERN (jump)) == RETURN)
5751 if (! need_block)
5752 return prev;
5753 /* Reorg even does nasty things with return insns that cause branches
5754 to go out of range - see find_end_label and callers. */
5755 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5757 /* We can't use JUMP_LABEL here because it might be undefined
5758 when not optimizing. */
5759 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5760 /* If the branch is out of range, try to find a scratch register for it. */
5761 if (optimize
5762 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5763 > 4092 + 4098))
5765 rtx_insn *scan;
5766 /* Don't look for the stack pointer as a scratch register,
5767 it would cause trouble if an interrupt occurred. */
5768 unsigned attempt = 0x7fff, used;
5769 int jump_left = flag_expensive_optimizations + 1;
5771 /* It is likely that the most recent eligible instruction is wanted for
5772 the delay slot. Therefore, find out which registers it uses, and
5773 try to avoid using them. */
5775 for (scan = jump; (scan = PREV_INSN (scan)); )
5777 enum rtx_code code;
5779 if (scan->deleted ())
5780 continue;
5781 code = GET_CODE (scan);
5782 if (code == CODE_LABEL || code == JUMP_INSN)
5783 break;
5784 if (code == INSN
5785 && GET_CODE (PATTERN (scan)) != USE
5786 && GET_CODE (PATTERN (scan)) != CLOBBER
5787 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5789 attempt &= ~regs_used (PATTERN (scan), 0);
5790 break;
5793 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5794 (scan = NEXT_INSN (scan)); )
5796 enum rtx_code code;
5798 if (scan->deleted ())
5799 continue;
5800 code = GET_CODE (scan);
5801 if (INSN_P (scan))
5803 used |= regs_used (PATTERN (scan), 0);
5804 if (code == CALL_INSN)
5805 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5806 dead |= (used >> 16) & ~used;
5807 if (dead & attempt)
5809 dead &= attempt;
5810 break;
5812 if (code == JUMP_INSN)
5814 if (jump_left-- && simplejump_p (scan))
5815 scan = JUMP_LABEL_AS_INSN (scan);
5816 else
5817 break;
5821 /* Mask out the stack pointer again, in case it was
5822 the only 'free' register we have found. */
5823 dead &= 0x7fff;
5825 /* If the immediate destination is still in range, check for possible
5826 threading with a jump beyond the delay slot insn.
5827 Don't check if we are called recursively; the jump has been or will be
5828 checked in a different invocation then. */
5830 else if (optimize && need_block >= 0)
5832 rtx_insn *next = next_active_insn (next_active_insn (dest));
5833 if (next && JUMP_P (next)
5834 && GET_CODE (PATTERN (next)) == SET
5835 && recog_memoized (next) == CODE_FOR_jump_compact)
5837 dest = JUMP_LABEL (next);
5838 if (dest
5839 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5840 > 4092 + 4098))
5841 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5845 if (dead)
5847 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5849 /* It would be nice if we could convert the jump into an indirect
5850 jump / far branch right now, and thus exposing all constituent
5851 instructions to further optimization. However, reorg uses
5852 simplejump_p to determine if there is an unconditional jump where
5853 it should try to schedule instructions from the target of the
5854 branch; simplejump_p fails for indirect jumps even if they have
5855 a JUMP_LABEL. */
5856 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5857 (reg, GEN_INT (unspec_bbr_uid++)),
5858 jump);
5859 /* ??? We would like this to have the scope of the jump, but that
5860 scope will change when a delay slot insn of an inner scope is added.
5861 Hence, after delay slot scheduling, we'll have to expect
5862 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5863 the jump. */
5865 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5866 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5867 return insn;
5869 else if (need_block)
5870 /* We can't use JUMP_LABEL here because it might be undefined
5871 when not optimizing. */
5872 return emit_insn_before (gen_block_branch_redirect
5873 (GEN_INT (unspec_bbr_uid++)),
5874 jump);
5875 return prev;
5878 #define CONDJUMP_MIN -252
5879 #define CONDJUMP_MAX 262
5880 struct far_branch
5882 /* A label (to be placed) in front of the jump
5883 that jumps to our ultimate destination. */
5884 rtx_insn *near_label;
5885 /* Where we are going to insert it if we cannot move the jump any farther,
5886 or the jump itself if we have picked up an existing jump. */
5887 rtx_insn *insert_place;
5888 /* The ultimate destination. */
5889 rtx_insn *far_label;
5890 struct far_branch *prev;
5891 /* If the branch has already been created, its address;
5892 else the address of its first prospective user. */
5893 int address;
5896 static void gen_far_branch (struct far_branch *);
5897 enum mdep_reorg_phase_e mdep_reorg_phase;
5898 static void
5899 gen_far_branch (struct far_branch *bp)
5901 rtx_insn *insn = bp->insert_place;
5902 rtx_insn *jump;
5903 rtx_code_label *label = gen_label_rtx ();
5904 int ok;
5906 emit_label_after (label, insn);
5907 if (bp->far_label)
5909 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5910 LABEL_NUSES (bp->far_label)++;
5912 else
5913 jump = emit_jump_insn_after (gen_return (), insn);
5915 /* Emit a barrier so that reorg knows that any following instructions
5916 are not reachable via a fall-through path.
5917 But don't do this when not optimizing, since we wouldn't suppress the
5918 alignment for the barrier then, and could end up with out-of-range
5919 pc-relative loads. */
5920 if (optimize)
5921 emit_barrier_after (jump);
5922 emit_label_after (bp->near_label, insn);
5924 if (bp->far_label)
5925 JUMP_LABEL (jump) = bp->far_label;
5926 else
5928 rtx pat = PATTERN (jump);
5929 gcc_assert (ANY_RETURN_P (pat));
5930 JUMP_LABEL (jump) = pat;
5933 ok = invert_jump (insn, label, 1);
5934 gcc_assert (ok);
5936 /* If we are branching around a jump (rather than a return), prevent
5937 reorg from using an insn from the jump target as the delay slot insn -
5938 when reorg did this, it pessimized code (we rather hide the delay slot)
5939 and it could cause branches to go out of range. */
5940 if (bp->far_label)
5941 (emit_insn_after
5942 (gen_stuff_delay_slot
5943 (GEN_INT (unspec_bbr_uid++),
5944 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5945 insn));
5946 /* Prevent reorg from undoing our splits. */
5947 gen_block_redirect (jump, bp->address += 2, 2);
5950 /* Fix up ADDR_DIFF_VECs. */
5951 void
5952 fixup_addr_diff_vecs (rtx_insn *first)
5954 rtx_insn *insn;
5956 for (insn = first; insn; insn = NEXT_INSN (insn))
5958 rtx vec_lab, pat, prevpat, x, braf_label;
5959 rtx_insn *prev;
5961 if (! JUMP_TABLE_DATA_P (insn)
5962 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5963 continue;
5964 pat = PATTERN (insn);
5965 vec_lab = XEXP (XEXP (pat, 0), 0);
5967 /* Search the matching casesi_jump_2. */
5968 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5970 if (!JUMP_P (prev))
5971 continue;
5972 prevpat = PATTERN (prev);
5973 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5974 continue;
5975 x = XVECEXP (prevpat, 0, 1);
5976 if (GET_CODE (x) != USE)
5977 continue;
5978 x = XEXP (x, 0);
5979 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5980 break;
5982 /* FIXME: This is a bug in the optimizer, but it seems harmless
5983 to just avoid panicing. */
5984 if (!prev)
5985 continue;
5987 /* Emit the reference label of the braf where it belongs, right after
5988 the casesi_jump_2 (i.e. braf). */
5989 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5990 emit_label_after (braf_label, prev);
5992 /* Fix up the ADDR_DIF_VEC to be relative
5993 to the reference address of the braf. */
5994 XEXP (XEXP (pat, 0), 0) = braf_label;
5998 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5999 a barrier. Return the base 2 logarithm of the desired alignment. */
6001 barrier_align (rtx_insn *barrier_or_label)
6003 rtx next, pat;
6005 if (! barrier_or_label)
6006 return 0;
6008 if (LABEL_P (barrier_or_label)
6009 && NEXT_INSN (barrier_or_label)
6010 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
6011 return 2;
6013 if (BARRIER_P (barrier_or_label)
6014 && PREV_INSN (barrier_or_label)
6015 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
6017 pat = PATTERN (PREV_INSN (barrier_or_label));
6018 /* If this is a very small table, we want to keep the alignment after
6019 the table to the minimum for proper code alignment. */
6020 return ((optimize_size
6021 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
6022 <= (unsigned) 1 << (CACHE_LOG - 2)))
6023 ? 1 << TARGET_SHMEDIA : align_jumps_log);
6026 next = next_active_insn (barrier_or_label);
6028 if (! next)
6029 return 0;
6031 pat = PATTERN (next);
6033 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
6034 /* This is a barrier in front of a constant table. */
6035 return 0;
6037 if (optimize_size)
6038 return 0;
6040 if (! TARGET_SH2 || ! optimize)
6041 return align_jumps_log;
6043 /* When fixing up pcloads, a constant table might be inserted just before
6044 the basic block that ends with the barrier. Thus, we can't trust the
6045 instruction lengths before that. */
6046 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
6048 /* Check if there is an immediately preceding branch to the insn beyond
6049 the barrier. We must weight the cost of discarding useful information
6050 from the current cache line when executing this branch and there is
6051 an alignment, against that of fetching unneeded insn in front of the
6052 branch target when there is no alignment. */
6054 /* There are two delay_slot cases to consider. One is the simple case
6055 where the preceding branch is to the insn beyond the barrier (simple
6056 delay slot filling), and the other is where the preceding branch has
6057 a delay slot that is a duplicate of the insn after the barrier
6058 (fill_eager_delay_slots) and the branch is to the insn after the insn
6059 after the barrier. */
6061 int slot, credit;
6062 bool jump_to_next = false;
6064 /* Skip to the insn before the JUMP_INSN before the barrier under
6065 investigation. */
6066 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
6068 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
6069 credit >= 0 && prev && NONJUMP_INSN_P (prev);
6070 prev = prev_real_insn (prev))
6072 jump_to_next = false;
6073 if (GET_CODE (PATTERN (prev)) == USE
6074 || GET_CODE (PATTERN (prev)) == CLOBBER)
6075 continue;
6076 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
6078 prev = prev_seq->insn (1);
6079 if (INSN_UID (prev) == INSN_UID (next))
6081 /* Delay slot was filled with insn at jump target. */
6082 jump_to_next = true;
6083 continue;
6087 if (slot &&
6088 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
6089 slot = 0;
6090 credit -= get_attr_length (prev);
6092 if (prev && jump_to_label_p (prev))
6094 rtx_insn *x;
6095 if (jump_to_next
6096 || next_real_insn (JUMP_LABEL (prev)) == next
6097 /* If relax_delay_slots() decides NEXT was redundant
6098 with some previous instruction, it will have
6099 redirected PREV's jump to the following insn. */
6100 || JUMP_LABEL (prev) == next_nonnote_insn (next)
6101 /* There is no upper bound on redundant instructions
6102 that might have been skipped, but we must not put an
6103 alignment where none had been before. */
6104 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
6105 (INSN_P (x)
6106 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
6107 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
6108 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
6110 rtx pat = PATTERN (prev);
6111 if (GET_CODE (pat) == PARALLEL)
6112 pat = XVECEXP (pat, 0, 0);
6113 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
6114 return 0;
6119 return align_jumps_log;
6122 /* If we are inside a phony loop, almost any kind of label can turn up as the
6123 first one in the loop. Aligning a braf label causes incorrect switch
6124 destination addresses; we can detect braf labels because they are
6125 followed by a BARRIER.
6126 Applying loop alignment to small constant or switch tables is a waste
6127 of space, so we suppress this too. */
6129 sh_loop_align (rtx_insn *label)
6131 rtx_insn *next = label;
6133 if (! optimize || optimize_size)
6134 return 0;
6137 next = next_nonnote_insn (next);
6138 while (next && LABEL_P (next));
6140 if (! next
6141 || ! INSN_P (next)
6142 || recog_memoized (next) == CODE_FOR_consttable_2)
6143 return 0;
6145 return align_loops_log;
6148 /* Do a final pass over the function, just before delayed branch
6149 scheduling. */
6150 static void
6151 sh_reorg (void)
6153 rtx_insn *first, *insn, *mova = NULL;
6154 int num_mova;
6155 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
6156 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
6158 first = get_insns ();
6159 max_labelno_before_reorg = max_label_num ();
6161 /* We must split call insns before introducing `mova's. If we're
6162 optimizing, they'll have already been split. Otherwise, make
6163 sure we don't split them too late. */
6164 if (! optimize)
6165 split_all_insns_noflow ();
6167 if (TARGET_SHMEDIA)
6168 return;
6170 /* If relaxing, generate pseudo-ops to associate function calls with
6171 the symbols they call. It does no harm to not generate these
6172 pseudo-ops. However, when we can generate them, it enables the
6173 linker to potentially relax the jsr to a bsr, and eliminate the
6174 register load and, possibly, the constant pool entry. */
6176 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6177 if (TARGET_RELAX)
6179 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6180 own purposes. This works because none of the remaining passes
6181 need to look at them.
6183 ??? But it may break in the future. We should use a machine
6184 dependent REG_NOTE, or some other approach entirely. */
6185 for (insn = first; insn; insn = NEXT_INSN (insn))
6187 if (INSN_P (insn))
6189 rtx note;
6191 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6192 NULL_RTX)) != 0)
6193 remove_note (insn, note);
6197 for (insn = first; insn; insn = NEXT_INSN (insn))
6199 rtx pattern, reg, set, dies;
6200 rtx_code_label *label;
6201 rtx_insn *link, *scan;
6202 int rescan = 0, foundinsn = 0;
6204 if (CALL_P (insn))
6206 pattern = PATTERN (insn);
6208 if (GET_CODE (pattern) == PARALLEL)
6209 pattern = XVECEXP (pattern, 0, 0);
6210 if (GET_CODE (pattern) == SET)
6211 pattern = SET_SRC (pattern);
6213 if (GET_CODE (pattern) != CALL
6214 || !MEM_P (XEXP (pattern, 0)))
6215 continue;
6217 reg = XEXP (XEXP (pattern, 0), 0);
6219 else
6221 reg = sfunc_uses_reg (insn);
6222 if (! reg)
6223 continue;
6226 if (!REG_P (reg))
6227 continue;
6229 /* Try scanning backward to find where the register is set. */
6230 link = NULL;
6231 for (scan = PREV_INSN (insn);
6232 scan && !LABEL_P (scan);
6233 scan = PREV_INSN (scan))
6235 if (! INSN_P (scan))
6236 continue;
6238 if (! reg_mentioned_p (reg, scan))
6239 continue;
6241 if (noncall_uses_reg (reg, scan, &set))
6242 break;
6244 if (set)
6246 link = scan;
6247 break;
6251 if (! link)
6252 continue;
6254 /* The register is set at LINK. */
6256 /* We can only optimize the function call if the register is
6257 being set to a symbol. In theory, we could sometimes
6258 optimize calls to a constant location, but the assembler
6259 and linker do not support that at present. */
6260 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6261 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6262 continue;
6264 /* Scan forward from LINK to the place where REG dies, and
6265 make sure that the only insns which use REG are
6266 themselves function calls. */
6268 /* ??? This doesn't work for call targets that were allocated
6269 by reload, since there may not be a REG_DEAD note for the
6270 register. */
6272 dies = NULL_RTX;
6273 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6275 rtx scanset;
6277 /* Don't try to trace forward past a CODE_LABEL if we haven't
6278 seen INSN yet. Ordinarily, we will only find the setting insn
6279 if it is in the same basic block. However,
6280 cross-jumping can insert code labels in between the load and
6281 the call, and can result in situations where a single call
6282 insn may have two targets depending on where we came from. */
6284 if (LABEL_P (scan) && ! foundinsn)
6285 break;
6287 if (! INSN_P (scan))
6288 continue;
6290 /* Don't try to trace forward past a JUMP. To optimize
6291 safely, we would have to check that all the
6292 instructions at the jump destination did not use REG. */
6294 if (JUMP_P (scan))
6295 break;
6297 if (! reg_mentioned_p (reg, scan))
6298 continue;
6300 if (noncall_uses_reg (reg, scan, &scanset))
6301 break;
6303 if (scan == insn)
6304 foundinsn = 1;
6306 if (scan != insn
6307 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6309 /* There is a function call to this register other
6310 than the one we are checking. If we optimize
6311 this call, we need to rescan again below. */
6312 rescan = 1;
6315 /* ??? We shouldn't have to worry about SCANSET here.
6316 We should just be able to check for a REG_DEAD note
6317 on a function call. However, the REG_DEAD notes are
6318 apparently not dependable around libcalls; c-torture
6319 execute/920501-2 is a test case. If SCANSET is set,
6320 then this insn sets the register, so it must have
6321 died earlier. Unfortunately, this will only handle
6322 the cases in which the register is, in fact, set in a
6323 later insn. */
6325 /* ??? We shouldn't have to use FOUNDINSN here.
6326 This dates back to when we used LOG_LINKS to find
6327 the most recent insn which sets the register. */
6329 if (foundinsn
6330 && (scanset
6331 || find_reg_note (scan, REG_DEAD, reg)))
6333 dies = scan;
6334 break;
6338 if (! dies)
6340 /* Either there was a branch, or some insn used REG
6341 other than as a function call address. */
6342 continue;
6345 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6346 on the insn which sets the register, and on each call insn
6347 which uses the register. In final_prescan_insn we look for
6348 the REG_LABEL_OPERAND notes, and output the appropriate label
6349 or pseudo-op. */
6351 label = gen_label_rtx ();
6352 add_reg_note (link, REG_LABEL_OPERAND, label);
6353 add_reg_note (insn, REG_LABEL_OPERAND, label);
6354 if (rescan)
6356 scan = link;
6359 rtx reg2;
6361 scan = NEXT_INSN (scan);
6362 if (scan != insn
6363 && ((CALL_P (scan)
6364 && reg_mentioned_p (reg, scan))
6365 || ((reg2 = sfunc_uses_reg (scan))
6366 && REGNO (reg2) == REGNO (reg))))
6367 add_reg_note (scan, REG_LABEL_OPERAND, label);
6369 while (scan != dies);
6374 if (TARGET_SH2)
6375 fixup_addr_diff_vecs (first);
6377 if (optimize)
6379 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6380 shorten_branches (first);
6383 /* Scan the function looking for move instructions which have to be
6384 changed to pc-relative loads and insert the literal tables. */
6385 label_ref_list_pool = create_alloc_pool ("label references list",
6386 sizeof (struct label_ref_list_d),
6387 30);
6388 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6389 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6391 if (mova_p (insn))
6393 /* ??? basic block reordering can move a switch table dispatch
6394 below the switch table. Check if that has happened.
6395 We only have the addresses available when optimizing; but then,
6396 this check shouldn't be needed when not optimizing. */
6397 if (!untangle_mova (&num_mova, &mova, insn))
6399 insn = mova;
6400 num_mova = 0;
6403 else if (JUMP_TABLE_DATA_P (insn)
6404 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6405 && num_mova
6406 /* ??? loop invariant motion can also move a mova out of a
6407 loop. Since loop does this code motion anyway, maybe we
6408 should wrap UNSPEC_MOVA into a CONST, so that reload can
6409 move it back. */
6410 && ((num_mova > 1
6411 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6412 || (prev_nonnote_insn (insn)
6413 == XEXP (MOVA_LABELREF (mova), 0))))
6415 rtx_insn *scan;
6416 int total;
6418 num_mova--;
6420 /* Some code might have been inserted between the mova and
6421 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6422 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6423 total += get_attr_length (scan);
6425 /* range of mova is 1020, add 4 because pc counts from address of
6426 second instruction after this one, subtract 2 in case pc is 2
6427 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6428 cancels out with alignment effects of the mova itself. */
6429 if (total > 1022)
6431 /* Change the mova into a load, and restart scanning
6432 there. broken_move will then return true for mova. */
6433 fixup_mova (mova);
6434 insn = mova;
6437 if (broken_move (insn)
6438 || (NONJUMP_INSN_P (insn)
6439 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6441 rtx_insn *scan;
6442 /* Scan ahead looking for a barrier to stick the constant table
6443 behind. */
6444 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6445 rtx_insn *last_float_move = NULL;
6446 rtx last_float = 0, *last_float_addr = NULL;
6447 int need_aligned_label = 0;
6449 if (num_mova && ! mova_p (mova))
6451 /* find_barrier had to change the first mova into a
6452 pcload; thus, we have to start with this new pcload. */
6453 insn = mova;
6454 num_mova = 0;
6456 /* Now find all the moves between the points and modify them. */
6457 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6459 if (LABEL_P (scan))
6460 last_float = 0;
6461 if (NONJUMP_INSN_P (scan)
6462 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6463 need_aligned_label = 1;
6464 if (broken_move (scan))
6466 rtx *patp = &PATTERN (scan), pat = *patp;
6467 rtx src, dst;
6468 rtx lab;
6469 rtx newsrc;
6470 machine_mode mode;
6472 if (GET_CODE (pat) == PARALLEL)
6473 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6474 src = SET_SRC (pat);
6475 dst = SET_DEST (pat);
6476 mode = GET_MODE (dst);
6478 if (mode == SImode && satisfies_constraint_I16 (src)
6479 && REGNO (dst) != FPUL_REG)
6481 int offset = 0;
6483 mode = HImode;
6484 while (GET_CODE (dst) == SUBREG)
6486 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6487 GET_MODE (SUBREG_REG (dst)),
6488 SUBREG_BYTE (dst),
6489 GET_MODE (dst));
6490 dst = SUBREG_REG (dst);
6492 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6494 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6496 /* This must be an insn that clobbers r0. */
6497 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6498 XVECLEN (PATTERN (scan), 0)
6499 - 1);
6500 rtx clobber = *clobberp;
6502 gcc_assert (GET_CODE (clobber) == CLOBBER
6503 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6505 if (last_float
6506 && reg_set_between_p (r0_rtx, last_float_move, scan))
6507 last_float = 0;
6508 if (last_float
6509 && TARGET_SHCOMPACT
6510 && GET_MODE_SIZE (mode) != 4
6511 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6512 last_float = 0;
6513 lab = add_constant (src, mode, last_float);
6514 if (lab)
6515 emit_insn_before (gen_mova (lab), scan);
6516 else
6518 /* There will be a REG_UNUSED note for r0 on
6519 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6520 lest reorg:mark_target_live_regs will not
6521 consider r0 to be used, and we end up with delay
6522 slot insn in front of SCAN that clobbers r0. */
6523 rtx note
6524 = find_regno_note (last_float_move, REG_UNUSED, 0);
6526 /* If we are not optimizing, then there may not be
6527 a note. */
6528 if (note)
6529 PUT_REG_NOTE_KIND (note, REG_INC);
6531 *last_float_addr = r0_inc_rtx;
6533 last_float_move = scan;
6534 last_float = src;
6535 newsrc = gen_const_mem (mode,
6536 (((TARGET_SH4 && ! TARGET_FMOVD)
6537 || REGNO (dst) == FPUL_REG)
6538 ? r0_inc_rtx
6539 : r0_rtx));
6540 last_float_addr = &XEXP (newsrc, 0);
6542 /* Remove the clobber of r0. */
6543 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6544 gen_rtx_SCRATCH (Pmode));
6546 /* This is a mova needing a label. Create it. */
6547 else if (GET_CODE (src) == UNSPEC
6548 && XINT (src, 1) == UNSPEC_MOVA
6549 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6551 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6552 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6553 newsrc = gen_rtx_UNSPEC (SImode,
6554 gen_rtvec (1, newsrc),
6555 UNSPEC_MOVA);
6557 else if (GET_CODE (src) == UNSPEC_VOLATILE
6558 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6560 newsrc = XVECEXP (src, 0, 0);
6561 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6562 INSN_CODE (scan) = -1;
6563 continue;
6565 else
6567 lab = add_constant (src, mode, 0);
6568 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6569 newsrc = gen_const_mem (mode, newsrc);
6571 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6572 INSN_CODE (scan) = -1;
6575 dump_table (need_aligned_label ? insn : 0, barrier);
6576 insn = barrier;
6579 free_alloc_pool (label_ref_list_pool);
6580 for (insn = first; insn; insn = NEXT_INSN (insn))
6581 PUT_MODE (insn, VOIDmode);
6583 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6584 INSN_ADDRESSES_FREE ();
6585 split_branches (first);
6587 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6588 also has an effect on the register that holds the address of the sfunc.
6589 Insert an extra dummy insn in front of each sfunc that pretends to
6590 use this register. */
6591 if (flag_delayed_branch)
6593 for (insn = first; insn; insn = NEXT_INSN (insn))
6595 rtx reg = sfunc_uses_reg (insn);
6597 if (! reg)
6598 continue;
6599 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6602 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6605 /* Return the UID of the insn that follows the specified label. */
6607 get_dest_uid (rtx label, int max_uid)
6609 rtx_insn *dest = next_real_insn (label);
6610 int dest_uid;
6611 if (! dest)
6612 /* This can happen for an undefined label. */
6613 return 0;
6614 dest_uid = INSN_UID (dest);
6615 /* If this is a newly created branch redirection blocking instruction,
6616 we cannot index the branch_uid or insn_addresses arrays with its
6617 uid. But then, we won't need to, because the actual destination is
6618 the following branch. */
6619 while (dest_uid >= max_uid)
6621 dest = NEXT_INSN (dest);
6622 dest_uid = INSN_UID (dest);
6624 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6625 return 0;
6626 return dest_uid;
6629 /* Split condbranches that are out of range. Also add clobbers for
6630 scratch registers that are needed in far jumps.
6631 We do this before delay slot scheduling, so that it can take our
6632 newly created instructions into account. It also allows us to
6633 find branches with common targets more easily. */
6634 static void
6635 split_branches (rtx_insn *first)
6637 rtx_insn *insn;
6638 struct far_branch **uid_branch, *far_branch_list = 0;
6639 int max_uid = get_max_uid ();
6640 int ok;
6642 /* Find out which branches are out of range. */
6643 shorten_branches (first);
6645 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6646 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6648 for (insn = first; insn; insn = NEXT_INSN (insn))
6649 if (! INSN_P (insn))
6650 continue;
6651 else if (insn->deleted ())
6653 /* Shorten_branches would split this instruction again,
6654 so transform it into a note. */
6655 SET_INSN_DELETED (insn);
6657 else if (JUMP_P (insn))
6659 enum attr_type type = get_attr_type (insn);
6660 if (type == TYPE_CBRANCH)
6662 rtx_insn *next, *beyond;
6664 if (get_attr_length (insn) > 4)
6666 rtx src = SET_SRC (PATTERN (insn));
6667 rtx olabel = XEXP (XEXP (src, 1), 0);
6668 int addr = INSN_ADDRESSES (INSN_UID (insn));
6669 rtx_insn *label = 0;
6670 int dest_uid = get_dest_uid (olabel, max_uid);
6671 struct far_branch *bp = uid_branch[dest_uid];
6673 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6674 the label if the LABEL_NUSES count drops to zero. There is
6675 always a jump_optimize pass that sets these values, but it
6676 proceeds to delete unreferenced code, and then if not
6677 optimizing, to un-delete the deleted instructions, thus
6678 leaving labels with too low uses counts. */
6679 if (! optimize)
6681 JUMP_LABEL (insn) = olabel;
6682 LABEL_NUSES (olabel)++;
6684 if (! bp)
6686 bp = (struct far_branch *) alloca (sizeof *bp);
6687 uid_branch[dest_uid] = bp;
6688 bp->prev = far_branch_list;
6689 far_branch_list = bp;
6690 bp->far_label = as_a <rtx_insn *> (
6691 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6692 0));
6693 LABEL_NUSES (bp->far_label)++;
6695 else
6697 label = bp->near_label;
6698 if (! label && bp->address - addr >= CONDJUMP_MIN)
6700 rtx_insn *block = bp->insert_place;
6702 if (GET_CODE (PATTERN (block)) == RETURN)
6703 block = PREV_INSN (block);
6704 else
6705 block = gen_block_redirect (block,
6706 bp->address, 2);
6707 label = emit_label_after (gen_label_rtx (),
6708 PREV_INSN (block));
6709 bp->near_label = label;
6711 else if (label && ! NEXT_INSN (label))
6713 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6714 bp->insert_place = insn;
6715 else
6716 gen_far_branch (bp);
6719 if (! label
6720 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6722 bp->near_label = label = gen_label_rtx ();
6723 bp->insert_place = insn;
6724 bp->address = addr;
6726 ok = redirect_jump (insn, label, 0);
6727 gcc_assert (ok);
6729 else
6731 /* get_attr_length (insn) == 2 */
6732 /* Check if we have a pattern where reorg wants to redirect
6733 the branch to a label from an unconditional branch that
6734 is too far away. */
6735 /* We can't use JUMP_LABEL here because it might be undefined
6736 when not optimizing. */
6737 /* A syntax error might cause beyond to be NULL_RTX. */
6738 beyond
6739 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6740 0));
6742 if (beyond
6743 && (JUMP_P (beyond)
6744 || ((beyond = next_active_insn (beyond))
6745 && JUMP_P (beyond)))
6746 && GET_CODE (PATTERN (beyond)) == SET
6747 && recog_memoized (beyond) == CODE_FOR_jump_compact
6748 && ((INSN_ADDRESSES
6749 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6750 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6751 > 252 + 258 + 2))
6752 gen_block_redirect (beyond,
6753 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6756 next = next_active_insn (insn);
6758 if (next
6759 && (JUMP_P (next)
6760 || ((next = next_active_insn (next))
6761 && JUMP_P (next)))
6762 && GET_CODE (PATTERN (next)) == SET
6763 && recog_memoized (next) == CODE_FOR_jump_compact
6764 && ((INSN_ADDRESSES
6765 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6766 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6767 > 252 + 258 + 2))
6768 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6770 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6772 int addr = INSN_ADDRESSES (INSN_UID (insn));
6773 rtx_insn *far_label = 0;
6774 int dest_uid = 0;
6775 struct far_branch *bp;
6777 if (type == TYPE_JUMP)
6779 far_label = as_a <rtx_insn *> (
6780 XEXP (SET_SRC (PATTERN (insn)), 0));
6781 dest_uid = get_dest_uid (far_label, max_uid);
6782 if (! dest_uid)
6784 /* Parse errors can lead to labels outside
6785 the insn stream. */
6786 if (! NEXT_INSN (far_label))
6787 continue;
6789 if (! optimize)
6791 JUMP_LABEL (insn) = far_label;
6792 LABEL_NUSES (far_label)++;
6794 redirect_jump (insn, ret_rtx, 1);
6795 far_label = 0;
6798 bp = uid_branch[dest_uid];
6799 if (! bp)
6801 bp = (struct far_branch *) alloca (sizeof *bp);
6802 uid_branch[dest_uid] = bp;
6803 bp->prev = far_branch_list;
6804 far_branch_list = bp;
6805 bp->near_label = 0;
6806 bp->far_label = far_label;
6807 if (far_label)
6808 LABEL_NUSES (far_label)++;
6810 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6811 if (addr - bp->address <= CONDJUMP_MAX)
6812 emit_label_after (bp->near_label, PREV_INSN (insn));
6813 else
6815 gen_far_branch (bp);
6816 bp->near_label = 0;
6818 else
6819 bp->near_label = 0;
6820 bp->address = addr;
6821 bp->insert_place = insn;
6822 if (! far_label)
6823 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6824 else
6825 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6828 /* Generate all pending far branches,
6829 and free our references to the far labels. */
6830 while (far_branch_list)
6832 if (far_branch_list->near_label
6833 && ! NEXT_INSN (far_branch_list->near_label))
6834 gen_far_branch (far_branch_list);
6835 if (optimize
6836 && far_branch_list->far_label
6837 && ! --LABEL_NUSES (far_branch_list->far_label))
6838 delete_insn (far_branch_list->far_label);
6839 far_branch_list = far_branch_list->prev;
6842 /* Instruction length information is no longer valid due to the new
6843 instructions that have been generated. */
6844 init_insn_lengths ();
6847 /* Dump out instruction addresses, which is useful for debugging the
6848 constant pool table stuff.
6850 If relaxing, output the label and pseudo-ops used to link together
6851 calls and the instruction which set the registers.
6853 ??? The addresses printed by this routine for insns are nonsense for
6854 insns which are inside of a sequence where none of the inner insns have
6855 variable length. This is because the second pass of shorten_branches
6856 does not bother to update them. */
6857 void
6858 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6859 int noperands ATTRIBUTE_UNUSED)
6861 if (TARGET_DUMPISIZE)
6862 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6864 if (TARGET_RELAX)
6866 rtx note;
6868 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6869 if (note)
6871 rtx pattern;
6873 pattern = PATTERN (insn);
6874 if (GET_CODE (pattern) == PARALLEL)
6875 pattern = XVECEXP (pattern, 0, 0);
6876 switch (GET_CODE (pattern))
6878 case SET:
6879 if (GET_CODE (SET_SRC (pattern)) != CALL
6880 && get_attr_type (insn) != TYPE_SFUNC)
6882 targetm.asm_out.internal_label
6883 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6884 break;
6886 /* else FALLTHROUGH */
6887 case CALL:
6888 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6889 CODE_LABEL_NUMBER (XEXP (note, 0)));
6890 break;
6892 default:
6893 gcc_unreachable ();
6899 /* Dump out any constants accumulated in the final pass. These will
6900 only be labels. */
6901 const char *
6902 output_jump_label_table (void)
6904 int i;
6906 if (pool_size)
6908 fprintf (asm_out_file, "\t.align 2\n");
6909 for (i = 0; i < pool_size; i++)
6911 pool_node *p = &pool_vector[i];
6913 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6914 CODE_LABEL_NUMBER (p->label));
6915 output_asm_insn (".long %O0", &p->value);
6917 pool_size = 0;
6920 return "";
6923 /* A full frame looks like:
6925 arg-5
6926 arg-4
6927 [ if current_function_anonymous_args
6928 arg-3
6929 arg-2
6930 arg-1
6931 arg-0 ]
6932 saved-fp
6933 saved-r10
6934 saved-r11
6935 saved-r12
6936 saved-pr
6937 local-n
6939 local-1
6940 local-0 <- fp points here.
6942 Number of bytes pushed for anonymous args, used to pass information
6943 between expand_prologue and expand_epilogue.
6945 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6946 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6947 for an epilogue and a negative value means that it's for a sibcall
6948 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6949 all the registers that are about to be restored, and hence dead. */
6950 static void
6951 output_stack_adjust (int size, rtx reg, int epilogue_p,
6952 HARD_REG_SET *live_regs_mask, bool frame_p)
6954 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6955 if (size)
6957 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6959 /* This test is bogus, as output_stack_adjust is used to re-align the
6960 stack. */
6961 #if 0
6962 gcc_assert (!(size % align));
6963 #endif
6965 if (CONST_OK_FOR_ADD (size))
6966 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6967 /* Try to do it with two partial adjustments; however, we must make
6968 sure that the stack is properly aligned at all times, in case
6969 an interrupt occurs between the two partial adjustments. */
6970 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6971 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6973 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6974 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6976 else
6978 rtx const_reg;
6979 rtx insn;
6980 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6981 int i;
6983 /* If TEMP is invalid, we could temporarily save a general
6984 register to MACL. However, there is currently no need
6985 to handle this case, so just die when we see it. */
6986 if (epilogue_p < 0
6987 || current_function_interrupt
6988 || ! call_really_used_regs[temp] || fixed_regs[temp])
6989 temp = -1;
6990 if (temp < 0 && ! current_function_interrupt
6991 && (TARGET_SHMEDIA || epilogue_p >= 0))
6993 HARD_REG_SET temps;
6994 COPY_HARD_REG_SET (temps, call_used_reg_set);
6995 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6996 if (epilogue_p > 0)
6998 int nreg = 0;
6999 if (crtl->return_rtx)
7001 machine_mode mode;
7002 mode = GET_MODE (crtl->return_rtx);
7003 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
7004 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
7006 for (i = 0; i < nreg; i++)
7007 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
7008 if (crtl->calls_eh_return)
7010 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
7011 for (i = 0; i <= 3; i++)
7012 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
7015 if (TARGET_SHMEDIA && epilogue_p < 0)
7016 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
7017 CLEAR_HARD_REG_BIT (temps, i);
7018 if (epilogue_p <= 0)
7020 for (i = FIRST_PARM_REG;
7021 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
7022 CLEAR_HARD_REG_BIT (temps, i);
7023 if (cfun->static_chain_decl != NULL)
7024 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
7026 temp = scavenge_reg (&temps);
7028 if (temp < 0 && live_regs_mask)
7030 HARD_REG_SET temps;
7032 COPY_HARD_REG_SET (temps, *live_regs_mask);
7033 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
7034 temp = scavenge_reg (&temps);
7036 if (temp < 0)
7038 rtx adj_reg, tmp_reg, mem;
7040 /* If we reached here, the most likely case is the (sibcall)
7041 epilogue for non SHmedia. Put a special push/pop sequence
7042 for such case as the last resort. This looks lengthy but
7043 would not be problem because it seems to be very
7044 rare. */
7046 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
7049 /* ??? There is still the slight possibility that r4 or
7050 r5 have been reserved as fixed registers or assigned
7051 as global registers, and they change during an
7052 interrupt. There are possible ways to handle this:
7054 - If we are adjusting the frame pointer (r14), we can do
7055 with a single temp register and an ordinary push / pop
7056 on the stack.
7057 - Grab any call-used or call-saved registers (i.e. not
7058 fixed or globals) for the temps we need. We might
7059 also grab r14 if we are adjusting the stack pointer.
7060 If we can't find enough available registers, issue
7061 a diagnostic and die - the user must have reserved
7062 way too many registers.
7063 But since all this is rather unlikely to happen and
7064 would require extra testing, we just die if r4 / r5
7065 are not available. */
7066 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
7067 && !global_regs[4] && !global_regs[5]);
7069 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
7070 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
7071 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
7072 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
7073 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
7074 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7075 emit_move_insn (mem, tmp_reg);
7076 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
7077 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7078 emit_move_insn (mem, tmp_reg);
7079 emit_move_insn (reg, adj_reg);
7080 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7081 emit_move_insn (adj_reg, mem);
7082 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7083 emit_move_insn (tmp_reg, mem);
7084 /* Tell flow the insns that pop r4/r5 aren't dead. */
7085 emit_use (tmp_reg);
7086 emit_use (adj_reg);
7087 return;
7089 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
7091 /* If SIZE is negative, subtract the positive value.
7092 This sometimes allows a constant pool entry to be shared
7093 between prologue and epilogue code. */
7094 if (size < 0)
7096 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
7097 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
7099 else
7101 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
7102 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
7104 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7105 gen_rtx_SET (VOIDmode, reg,
7106 gen_rtx_PLUS (SImode, reg,
7107 GEN_INT (size))));
7112 /* Emit the specified insn and mark it as frame related.
7113 FIXME: Rename this to emit_frame_insn. */
7114 static rtx_insn *
7115 frame_insn (rtx x)
7117 rtx_insn *insn = emit_insn (x);
7118 RTX_FRAME_RELATED_P (insn) = 1;
7119 return insn;
7122 /* Output RTL to push register RN onto the stack. */
7123 static rtx
7124 push (int rn)
7126 rtx x;
7127 if (rn == FPUL_REG)
7128 x = gen_push_fpul ();
7129 else if (rn == FPSCR_REG)
7130 x = gen_push_fpscr ();
7131 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7132 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7134 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7135 return NULL_RTX;
7136 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
7138 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7139 x = gen_push_e (gen_rtx_REG (SFmode, rn));
7140 else
7141 x = gen_push (gen_rtx_REG (SImode, rn));
7143 x = frame_insn (x);
7144 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7145 return x;
7148 /* Output RTL to pop register RN from the stack. */
7149 static void
7150 pop (int rn)
7152 rtx x, sp_reg, reg;
7153 if (rn == FPUL_REG)
7154 x = gen_pop_fpul ();
7155 else if (rn == FPSCR_REG)
7156 x = gen_pop_fpscr ();
7157 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7158 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7160 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7161 return;
7162 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7164 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7165 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7166 else
7167 x = gen_pop (gen_rtx_REG (SImode, rn));
7169 x = emit_insn (x);
7171 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7172 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7173 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7174 : SET_DEST (PATTERN (x)));
7175 add_reg_note (x, REG_CFA_RESTORE, reg);
7176 add_reg_note (x, REG_CFA_ADJUST_CFA,
7177 gen_rtx_SET (SImode, sp_reg,
7178 plus_constant (SImode, sp_reg,
7179 GET_MODE_SIZE (GET_MODE (reg)))));
7180 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7181 RTX_FRAME_RELATED_P (x) = 1;
7184 /* Generate code to push the regs specified in the mask. */
7185 static void
7186 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7188 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7189 int skip_fpscr = 0;
7191 /* Push PR last; this gives better latencies after the prologue, and
7192 candidates for the return delay slot when there are no general
7193 registers pushed. */
7194 for (; i < FIRST_PSEUDO_REGISTER; i++)
7196 /* If this is an interrupt handler, and the SZ bit varies,
7197 and we have to push any floating point register, we need
7198 to switch to the correct precision first. */
7199 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7200 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7202 HARD_REG_SET unsaved;
7204 push (FPSCR_REG);
7205 COMPL_HARD_REG_SET (unsaved, *mask);
7206 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7207 skip_fpscr = 1;
7209 if (i != PR_REG
7210 && (i != FPSCR_REG || ! skip_fpscr)
7211 && TEST_HARD_REG_BIT (*mask, i))
7213 /* If the ISR has RESBANK attribute assigned, don't push any of
7214 the following registers - R0-R14, MACH, MACL and GBR. */
7215 if (! (sh_cfun_resbank_handler_p ()
7216 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7217 || i == MACH_REG
7218 || i == MACL_REG
7219 || i == GBR_REG)))
7220 push (i);
7224 /* Push banked registers last to improve delay slot opportunities. */
7225 if (interrupt_handler)
7227 bool use_movml = false;
7229 if (TARGET_SH2A)
7231 unsigned int count = 0;
7233 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7234 if (TEST_HARD_REG_BIT (*mask, i))
7235 count++;
7236 else
7237 break;
7239 /* Use movml when all banked registers are pushed. */
7240 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7241 use_movml = true;
7244 if (sh_cfun_resbank_handler_p ())
7245 ; /* Do nothing. */
7246 else if (use_movml)
7248 rtx x, mem, reg, set;
7249 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7251 /* We must avoid scheduling multiple store insn with another
7252 insns. */
7253 emit_insn (gen_blockage ());
7254 x = gen_movml_push_banked (sp_reg);
7255 x = frame_insn (x);
7256 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7258 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7259 reg = gen_rtx_REG (SImode, i);
7260 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7263 set = gen_rtx_SET (SImode, sp_reg,
7264 plus_constant (Pmode, sp_reg, - 32));
7265 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7266 emit_insn (gen_blockage ());
7268 else
7269 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7270 if (TEST_HARD_REG_BIT (*mask, i))
7271 push (i);
7274 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7275 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7276 push (PR_REG);
7279 /* Calculate how much extra space is needed to save all callee-saved
7280 target registers.
7281 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7282 static int
7283 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7285 int reg;
7286 int stack_space = 0;
7287 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7289 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7290 if ((! call_really_used_regs[reg] || interrupt_handler)
7291 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7292 /* Leave space to save this target register on the stack,
7293 in case target register allocation wants to use it. */
7294 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7295 return stack_space;
7298 /* Decide whether we should reserve space for callee-save target registers,
7299 in case target register allocation wants to use them. REGS_SAVED is
7300 the space, in bytes, that is already required for register saves.
7301 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7302 static int
7303 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7304 HARD_REG_SET *live_regs_mask)
7306 if (optimize_size)
7307 return 0;
7308 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7311 /* Decide how much space to reserve for callee-save target registers
7312 in case target register allocation wants to use them.
7313 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7314 static int
7315 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7317 if (shmedia_space_reserved_for_target_registers)
7318 return shmedia_target_regs_stack_space (live_regs_mask);
7319 else
7320 return 0;
7323 /* Work out the registers which need to be saved, both as a mask and a
7324 count of saved words. Return the count.
7326 If doing a pragma interrupt function, then push all regs used by the
7327 function, and if we call another function (we can tell by looking at PR),
7328 make sure that all the regs it clobbers are safe too. */
7329 static int
7330 calc_live_regs (HARD_REG_SET *live_regs_mask)
7332 unsigned int reg;
7333 int count;
7334 tree attrs;
7335 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7336 bool nosave_low_regs;
7337 int pr_live, has_call;
7339 attrs = DECL_ATTRIBUTES (current_function_decl);
7340 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7341 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7342 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7343 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7345 CLEAR_HARD_REG_SET (*live_regs_mask);
7346 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7347 && df_regs_ever_live_p (FPSCR_REG))
7348 target_flags &= ~MASK_FPU_SINGLE;
7349 /* If we can save a lot of saves by switching to double mode, do that. */
7350 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7351 && TARGET_FPU_SINGLE)
7352 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7353 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7354 && (! call_really_used_regs[reg]
7355 || interrupt_handler)
7356 && ++count > 2)
7358 target_flags &= ~MASK_FPU_SINGLE;
7359 break;
7361 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7362 knows how to use it. That means the pseudo originally allocated for
7363 the initial value can become the PR_MEDIA_REG hard register, as seen for
7364 execute/20010122-1.c:test9. */
7365 if (TARGET_SHMEDIA)
7366 /* ??? this function is called from initial_elimination_offset, hence we
7367 can't use the result of sh_media_register_for_return here. */
7368 pr_live = sh_pr_n_sets ();
7369 else
7371 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7372 pr_live = (pr_initial
7373 ? (!REG_P (pr_initial)
7374 || REGNO (pr_initial) != (PR_REG))
7375 : df_regs_ever_live_p (PR_REG));
7376 /* For Shcompact, if not optimizing, we end up with a memory reference
7377 using the return address pointer for __builtin_return_address even
7378 though there is no actual need to put the PR register on the stack. */
7379 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7381 /* Force PR to be live if the prologue has to call the SHmedia
7382 argument decoder or register saver. */
7383 if (TARGET_SHCOMPACT
7384 && ((crtl->args.info.call_cookie
7385 & ~ CALL_COOKIE_RET_TRAMP (1))
7386 || crtl->saves_all_registers))
7387 pr_live = 1;
7388 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7389 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7391 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7392 ? pr_live
7393 : interrupt_handler
7394 ? (/* Need to save all the regs ever live. */
7395 (df_regs_ever_live_p (reg)
7396 || (call_really_used_regs[reg]
7397 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7398 || reg == PIC_OFFSET_TABLE_REGNUM)
7399 && has_call)
7400 || (TARGET_SHMEDIA && has_call
7401 && REGISTER_NATURAL_MODE (reg) == SImode
7402 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7403 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7404 && reg != RETURN_ADDRESS_POINTER_REGNUM
7405 && reg != T_REG && reg != GBR_REG
7406 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7407 /* Push fpscr only on targets which have FPU */
7408 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7409 : (/* Only push those regs which are used and need to be saved. */
7410 (TARGET_SHCOMPACT
7411 && flag_pic
7412 && crtl->args.info.call_cookie
7413 && reg == PIC_OFFSET_TABLE_REGNUM)
7414 || (df_regs_ever_live_p (reg)
7415 && ((!call_really_used_regs[reg]
7416 && !(reg != PIC_OFFSET_TABLE_REGNUM
7417 && fixed_regs[reg] && call_used_regs[reg]))
7418 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7419 || (crtl->calls_eh_return
7420 && (reg == EH_RETURN_DATA_REGNO (0)
7421 || reg == EH_RETURN_DATA_REGNO (1)
7422 || reg == EH_RETURN_DATA_REGNO (2)
7423 || reg == EH_RETURN_DATA_REGNO (3)))
7424 || ((reg == MACL_REG || reg == MACH_REG)
7425 && df_regs_ever_live_p (reg)
7426 && sh_cfun_attr_renesas_p ())
7429 SET_HARD_REG_BIT (*live_regs_mask, reg);
7430 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7432 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7433 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7435 if (FP_REGISTER_P (reg))
7437 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7439 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7440 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7443 else if (XD_REGISTER_P (reg))
7445 /* Must switch to double mode to access these registers. */
7446 target_flags &= ~MASK_FPU_SINGLE;
7450 if (nosave_low_regs && reg == R8_REG)
7451 break;
7453 /* If we have a target register optimization pass after prologue / epilogue
7454 threading, we need to assume all target registers will be live even if
7455 they aren't now. */
7456 if (flag_branch_target_load_optimize2
7457 && TARGET_SAVE_ALL_TARGET_REGS
7458 && shmedia_space_reserved_for_target_registers)
7459 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7460 if ((! call_really_used_regs[reg] || interrupt_handler)
7461 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7463 SET_HARD_REG_BIT (*live_regs_mask, reg);
7464 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7466 /* If this is an interrupt handler, we don't have any call-clobbered
7467 registers we can conveniently use for target register save/restore.
7468 Make sure we save at least one general purpose register when we need
7469 to save target registers. */
7470 if (interrupt_handler
7471 && hard_reg_set_intersect_p (*live_regs_mask,
7472 reg_class_contents[TARGET_REGS])
7473 && ! hard_reg_set_intersect_p (*live_regs_mask,
7474 reg_class_contents[GENERAL_REGS]))
7476 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7477 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7480 return count;
7483 /* Code to generate prologue and epilogue sequences */
7485 /* PUSHED is the number of bytes that are being pushed on the
7486 stack for register saves. Return the frame size, padded
7487 appropriately so that the stack stays properly aligned. */
7488 static HOST_WIDE_INT
7489 rounded_frame_size (int pushed)
7491 HOST_WIDE_INT size = get_frame_size ();
7492 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7494 if (ACCUMULATE_OUTGOING_ARGS)
7495 size += crtl->outgoing_args_size;
7497 return ((size + pushed + align - 1) & -align) - pushed;
7500 /* Choose a call-clobbered target-branch register that remains
7501 unchanged along the whole function. We set it up as the return
7502 value in the prologue. */
7504 sh_media_register_for_return (void)
7506 int regno;
7507 int tr0_used;
7509 if (! crtl->is_leaf)
7510 return -1;
7511 if (lookup_attribute ("interrupt_handler",
7512 DECL_ATTRIBUTES (current_function_decl)))
7513 return -1;
7514 if (sh_cfun_interrupt_handler_p ())
7515 return -1;
7517 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7519 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7520 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7521 return regno;
7523 return -1;
7526 /* The maximum registers we need to save are:
7527 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7528 - 32 floating point registers (for each pair, we save none,
7529 one single precision value, or a double precision value).
7530 - 8 target registers
7531 - add 1 entry for a delimiter. */
7532 #define MAX_SAVED_REGS (62+32+8)
7534 typedef struct save_entry_s
7536 unsigned char reg;
7537 unsigned char mode;
7538 short offset;
7539 } save_entry;
7541 #define MAX_TEMPS 4
7543 /* There will be a delimiter entry with VOIDmode both at the start and the
7544 end of a filled in schedule. The end delimiter has the offset of the
7545 save with the smallest (i.e. most negative) offset. */
7546 typedef struct save_schedule_s
7548 save_entry entries[MAX_SAVED_REGS + 2];
7549 int temps[MAX_TEMPS+1];
7550 } save_schedule;
7552 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7553 use reverse order. Returns the last entry written to (not counting
7554 the delimiter). OFFSET_BASE is a number to be added to all offset
7555 entries. */
7556 static save_entry *
7557 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7558 int offset_base)
7560 int align, i;
7561 save_entry *entry = schedule->entries;
7562 int tmpx = 0;
7563 int offset;
7565 if (! current_function_interrupt)
7566 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7567 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7568 && ! FUNCTION_ARG_REGNO_P (i)
7569 && i != FIRST_RET_REG
7570 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7571 && ! (crtl->calls_eh_return
7572 && (i == EH_RETURN_STACKADJ_REGNO
7573 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7574 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7575 schedule->temps[tmpx++] = i;
7576 entry->reg = -1;
7577 entry->mode = VOIDmode;
7578 entry->offset = offset_base;
7579 entry++;
7580 /* We loop twice: first, we save 8-byte aligned registers in the
7581 higher addresses, that are known to be aligned. Then, we
7582 proceed to saving 32-bit registers that don't need 8-byte
7583 alignment.
7584 If this is an interrupt function, all registers that need saving
7585 need to be saved in full. moreover, we need to postpone saving
7586 target registers till we have saved some general purpose registers
7587 we can then use as scratch registers. */
7588 offset = offset_base;
7589 for (align = 1; align >= 0; align--)
7591 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7592 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7594 machine_mode mode = REGISTER_NATURAL_MODE (i);
7595 int reg = i;
7597 if (current_function_interrupt)
7599 if (TARGET_REGISTER_P (i))
7600 continue;
7601 if (GENERAL_REGISTER_P (i))
7602 mode = DImode;
7604 if (mode == SFmode && (i % 2) == 1
7605 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7606 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7608 mode = DFmode;
7609 i--;
7610 reg--;
7613 /* If we're doing the aligned pass and this is not aligned,
7614 or we're doing the unaligned pass and this is aligned,
7615 skip it. */
7616 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7617 != align)
7618 continue;
7620 if (current_function_interrupt
7621 && GENERAL_REGISTER_P (i)
7622 && tmpx < MAX_TEMPS)
7623 schedule->temps[tmpx++] = i;
7625 offset -= GET_MODE_SIZE (mode);
7626 entry->reg = i;
7627 entry->mode = mode;
7628 entry->offset = offset;
7629 entry++;
7631 if (align && current_function_interrupt)
7632 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7633 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7635 offset -= GET_MODE_SIZE (DImode);
7636 entry->reg = i;
7637 entry->mode = DImode;
7638 entry->offset = offset;
7639 entry++;
7642 entry->reg = -1;
7643 entry->mode = VOIDmode;
7644 entry->offset = offset;
7645 schedule->temps[tmpx] = -1;
7646 return entry - 1;
7649 /* Expand code for the function prologue. */
7650 void
7651 sh_expand_prologue (void)
7653 HARD_REG_SET live_regs_mask;
7654 int d, i;
7655 int d_rounding = 0;
7656 int save_flags = target_flags;
7657 int pretend_args;
7658 int stack_usage;
7659 tree sp_switch_attr
7660 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7662 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7664 /* We have pretend args if we had an object sent partially in registers
7665 and partially on the stack, e.g. a large structure. */
7666 pretend_args = crtl->args.pretend_args_size;
7667 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7668 && (NPARM_REGS(SImode)
7669 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7670 pretend_args = 0;
7672 output_stack_adjust (-pretend_args
7673 - crtl->args.info.stack_regs * 8,
7674 stack_pointer_rtx, 0, NULL, true);
7675 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7677 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7678 /* We're going to use the PIC register to load the address of the
7679 incoming-argument decoder and/or of the return trampoline from
7680 the GOT, so make sure the PIC register is preserved and
7681 initialized. */
7682 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7684 if (TARGET_SHCOMPACT
7685 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7687 int reg;
7689 /* First, make all registers with incoming arguments that will
7690 be pushed onto the stack live, so that register renaming
7691 doesn't overwrite them. */
7692 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7693 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7694 >= NPARM_REGS (SImode) - reg)
7695 for (; reg < NPARM_REGS (SImode); reg++)
7696 emit_insn (gen_shcompact_preserve_incoming_args
7697 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7698 else if (CALL_COOKIE_INT_REG_GET
7699 (crtl->args.info.call_cookie, reg) == 1)
7700 emit_insn (gen_shcompact_preserve_incoming_args
7701 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7703 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7704 stack_pointer_rtx);
7705 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7706 GEN_INT (crtl->args.info.call_cookie));
7707 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7708 gen_rtx_REG (SImode, R0_REG));
7710 else if (TARGET_SHMEDIA)
7712 int tr = sh_media_register_for_return ();
7714 if (tr >= 0)
7715 emit_move_insn (gen_rtx_REG (DImode, tr),
7716 gen_rtx_REG (DImode, PR_MEDIA_REG));
7719 /* Emit the code for SETUP_VARARGS. */
7720 if (cfun->stdarg)
7722 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7724 /* Push arg regs as if they'd been provided by caller in stack. */
7725 for (i = 0; i < NPARM_REGS(SImode); i++)
7727 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7729 if (i >= (NPARM_REGS(SImode)
7730 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7732 break;
7733 push (rn);
7734 stack_usage += GET_MODE_SIZE (SImode);
7739 /* If we're supposed to switch stacks at function entry, do so now. */
7740 if (sp_switch_attr)
7742 rtx lab, newsrc;
7743 /* The argument specifies a variable holding the address of the
7744 stack the interrupt function should switch to/from at entry/exit. */
7745 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7746 const char *s
7747 = ggc_strdup (TREE_STRING_POINTER (arg));
7748 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7750 lab = add_constant (sp_switch, SImode, 0);
7751 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7753 emit_insn (gen_sp_switch_1 (newsrc));
7756 d = calc_live_regs (&live_regs_mask);
7757 /* ??? Maybe we could save some switching if we can move a mode switch
7758 that already happens to be at the function start into the prologue. */
7759 if (target_flags != save_flags && ! current_function_interrupt)
7760 emit_insn (gen_toggle_sz ());
7762 if (TARGET_SH5)
7764 int offset_base, offset;
7765 rtx r0 = NULL_RTX;
7766 int offset_in_r0 = -1;
7767 int sp_in_r0 = 0;
7768 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7769 int total_size, save_size;
7770 save_schedule schedule;
7771 save_entry *entry;
7772 int *tmp_pnt;
7774 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7775 && ! current_function_interrupt)
7776 r0 = gen_rtx_REG (Pmode, R0_REG);
7778 /* D is the actual number of bytes that we need for saving registers,
7779 however, in initial_elimination_offset we have committed to using
7780 an additional TREGS_SPACE amount of bytes - in order to keep both
7781 addresses to arguments supplied by the caller and local variables
7782 valid, we must keep this gap. Place it between the incoming
7783 arguments and the actually saved registers in a bid to optimize
7784 locality of reference. */
7785 total_size = d + tregs_space;
7786 total_size += rounded_frame_size (total_size);
7787 save_size = total_size - rounded_frame_size (d);
7788 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7789 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7790 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7792 /* If adjusting the stack in a single step costs nothing extra, do so.
7793 I.e. either if a single addi is enough, or we need a movi anyway,
7794 and we don't exceed the maximum offset range (the test for the
7795 latter is conservative for simplicity). */
7796 if (TARGET_SHMEDIA
7797 && (CONST_OK_FOR_I10 (-total_size)
7798 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7799 && total_size <= 2044)))
7800 d_rounding = total_size - save_size;
7802 offset_base = d + d_rounding;
7804 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7805 0, NULL, true);
7806 stack_usage += save_size + d_rounding;
7808 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7809 tmp_pnt = schedule.temps;
7810 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7812 machine_mode mode = (machine_mode) entry->mode;
7813 unsigned int reg = entry->reg;
7814 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7815 rtx orig_reg_rtx;
7817 offset = entry->offset;
7819 reg_rtx = gen_rtx_REG (mode, reg);
7821 mem_rtx = gen_frame_mem (mode,
7822 gen_rtx_PLUS (Pmode,
7823 stack_pointer_rtx,
7824 GEN_INT (offset)));
7826 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7828 gcc_assert (r0);
7829 mem_rtx = NULL_RTX;
7832 if (HAVE_PRE_DECREMENT
7833 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7834 || mem_rtx == NULL_RTX
7835 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7837 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7839 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7840 pre_dec = NULL_RTX;
7841 else
7843 mem_rtx = NULL_RTX;
7844 offset += GET_MODE_SIZE (mode);
7848 if (mem_rtx != NULL_RTX)
7849 goto addr_ok;
7851 if (offset_in_r0 == -1)
7853 emit_move_insn (r0, GEN_INT (offset));
7854 offset_in_r0 = offset;
7856 else if (offset != offset_in_r0)
7858 emit_move_insn (r0,
7859 gen_rtx_PLUS
7860 (Pmode, r0,
7861 GEN_INT (offset - offset_in_r0)));
7862 offset_in_r0 += offset - offset_in_r0;
7865 if (pre_dec != NULL_RTX)
7867 if (! sp_in_r0)
7869 emit_move_insn (r0,
7870 gen_rtx_PLUS
7871 (Pmode, r0, stack_pointer_rtx));
7872 sp_in_r0 = 1;
7875 offset -= GET_MODE_SIZE (mode);
7876 offset_in_r0 -= GET_MODE_SIZE (mode);
7878 mem_rtx = pre_dec;
7880 else if (sp_in_r0)
7881 mem_rtx = gen_frame_mem (mode, r0);
7882 else
7883 mem_rtx = gen_frame_mem (mode,
7884 gen_rtx_PLUS (Pmode,
7885 stack_pointer_rtx,
7886 r0));
7888 /* We must not use an r0-based address for target-branch
7889 registers or for special registers without pre-dec
7890 memory addresses, since we store their values in r0
7891 first. */
7892 gcc_assert (!TARGET_REGISTER_P (reg)
7893 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7894 || mem_rtx == pre_dec));
7896 addr_ok:
7897 orig_reg_rtx = reg_rtx;
7898 if (TARGET_REGISTER_P (reg)
7899 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7900 && mem_rtx != pre_dec))
7902 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7904 emit_move_insn (tmp_reg, reg_rtx);
7906 if (REGNO (tmp_reg) == R0_REG)
7908 offset_in_r0 = -1;
7909 sp_in_r0 = 0;
7910 gcc_assert (!refers_to_regno_p (R0_REG, mem_rtx));
7913 if (*++tmp_pnt <= 0)
7914 tmp_pnt = schedule.temps;
7916 reg_rtx = tmp_reg;
7919 rtx insn;
7921 /* Mark as interesting for dwarf cfi generator */
7922 insn = emit_move_insn (mem_rtx, reg_rtx);
7923 RTX_FRAME_RELATED_P (insn) = 1;
7924 /* If we use an intermediate register for the save, we can't
7925 describe this exactly in cfi as a copy of the to-be-saved
7926 register into the temporary register and then the temporary
7927 register on the stack, because the temporary register can
7928 have a different natural size than the to-be-saved register.
7929 Thus, we gloss over the intermediate copy and pretend we do
7930 a direct save from the to-be-saved register. */
7931 if (REGNO (reg_rtx) != reg)
7933 rtx set;
7935 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7936 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7939 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7941 rtx reg_rtx = gen_rtx_REG (mode, reg);
7942 rtx set;
7943 rtx mem_rtx = gen_frame_mem (mode,
7944 gen_rtx_PLUS (Pmode,
7945 stack_pointer_rtx,
7946 GEN_INT (offset)));
7948 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7949 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7954 gcc_assert (entry->offset == d_rounding);
7956 else
7958 push_regs (&live_regs_mask, current_function_interrupt);
7959 stack_usage += d;
7962 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7963 emit_insn (gen_GOTaddr2picreg ());
7965 if (SHMEDIA_REGS_STACK_ADJUST ())
7967 /* This must NOT go through the PLT, otherwise mach and macl
7968 may be clobbered. */
7969 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7970 (TARGET_FPU_ANY
7971 ? "__GCC_push_shmedia_regs"
7972 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7973 emit_insn (gen_shmedia_save_restore_regs_compact
7974 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7977 if (target_flags != save_flags && ! current_function_interrupt)
7978 emit_insn (gen_toggle_sz ());
7980 target_flags = save_flags;
7982 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7983 stack_pointer_rtx, 0, NULL, true);
7984 stack_usage += rounded_frame_size (d) - d_rounding;
7986 if (frame_pointer_needed)
7987 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7989 if (TARGET_SHCOMPACT
7990 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7992 /* This must NOT go through the PLT, otherwise mach and macl
7993 may be clobbered. */
7994 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7995 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7996 emit_insn (gen_shcompact_incoming_args ());
7999 /* If we are profiling, make sure no instructions are scheduled before
8000 the call to mcount. Similarly if some call instructions are swapped
8001 before frame related insns, it'll confuse the unwinder because
8002 currently SH has no unwind info for function epilogues. */
8003 if (crtl->profile || flag_exceptions || flag_unwind_tables)
8004 emit_insn (gen_blockage ());
8006 if (flag_stack_usage_info)
8007 current_function_static_stack_size = stack_usage;
8010 /* Expand code for the function epilogue. */
8011 void
8012 sh_expand_epilogue (bool sibcall_p)
8014 HARD_REG_SET live_regs_mask;
8015 int d, i;
8016 int d_rounding = 0;
8018 int save_flags = target_flags;
8019 int frame_size, save_size;
8020 int fpscr_deferred = 0;
8021 int e = sibcall_p ? -1 : 1;
8023 d = calc_live_regs (&live_regs_mask);
8025 save_size = d;
8026 frame_size = rounded_frame_size (d);
8028 if (TARGET_SH5)
8030 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
8031 int total_size;
8032 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
8033 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8034 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
8036 total_size = d + tregs_space;
8037 total_size += rounded_frame_size (total_size);
8038 save_size = total_size - frame_size;
8040 /* If adjusting the stack in a single step costs nothing extra, do so.
8041 I.e. either if a single addi is enough, or we need a movi anyway,
8042 and we don't exceed the maximum offset range (the test for the
8043 latter is conservative for simplicity). */
8044 if (TARGET_SHMEDIA
8045 && ! frame_pointer_needed
8046 && (CONST_OK_FOR_I10 (total_size)
8047 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
8048 && total_size <= 2044)))
8049 d_rounding = frame_size;
8051 frame_size -= d_rounding;
8054 if (frame_pointer_needed)
8056 /* We must avoid scheduling the epilogue with previous basic blocks.
8057 See PR/18032 and PR/40313. */
8058 emit_insn (gen_blockage ());
8059 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
8060 &live_regs_mask, true);
8062 /* We must avoid moving the stack pointer adjustment past code
8063 which reads from the local frame, else an interrupt could
8064 occur after the SP adjustment and clobber data in the local
8065 frame. */
8066 emit_insn (gen_blockage ());
8067 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
8069 else if (frame_size)
8071 /* We must avoid moving the stack pointer adjustment past code
8072 which reads from the local frame, else an interrupt could
8073 occur after the SP adjustment and clobber data in the local
8074 frame. */
8075 emit_insn (gen_blockage ());
8076 output_stack_adjust (frame_size, stack_pointer_rtx, e,
8077 &live_regs_mask, true);
8080 if (SHMEDIA_REGS_STACK_ADJUST ())
8082 function_symbol (gen_rtx_REG (Pmode, R0_REG),
8083 (TARGET_FPU_ANY
8084 ? "__GCC_pop_shmedia_regs"
8085 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
8086 /* This must NOT go through the PLT, otherwise mach and macl
8087 may be clobbered. */
8088 emit_insn (gen_shmedia_save_restore_regs_compact
8089 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
8092 /* Pop all the registers. */
8094 if (target_flags != save_flags && ! current_function_interrupt)
8095 emit_insn (gen_toggle_sz ());
8096 if (TARGET_SH5)
8098 int offset_base, offset;
8099 int offset_in_r0 = -1;
8100 int sp_in_r0 = 0;
8101 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
8102 save_schedule schedule;
8103 save_entry *entry;
8104 int *tmp_pnt;
8106 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
8107 offset_base = -entry[1].offset + d_rounding;
8108 tmp_pnt = schedule.temps;
8109 for (; entry->mode != VOIDmode; entry--)
8111 machine_mode mode = (machine_mode) entry->mode;
8112 int reg = entry->reg;
8113 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
8115 offset = offset_base + entry->offset;
8116 reg_rtx = gen_rtx_REG (mode, reg);
8118 mem_rtx = gen_frame_mem (mode,
8119 gen_rtx_PLUS (Pmode,
8120 stack_pointer_rtx,
8121 GEN_INT (offset)));
8123 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
8124 mem_rtx = NULL_RTX;
8126 if (HAVE_POST_INCREMENT
8127 && (offset == offset_in_r0
8128 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
8129 && mem_rtx == NULL_RTX)
8130 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
8132 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
8134 if (!memory_address_p (mode, XEXP (post_inc, 0)))
8135 post_inc = NULL_RTX;
8136 else
8137 mem_rtx = NULL_RTX;
8140 if (mem_rtx != NULL_RTX)
8141 goto addr_ok;
8143 if (offset_in_r0 == -1)
8145 emit_move_insn (r0, GEN_INT (offset));
8146 offset_in_r0 = offset;
8148 else if (offset != offset_in_r0)
8150 emit_move_insn (r0,
8151 gen_rtx_PLUS
8152 (Pmode, r0,
8153 GEN_INT (offset - offset_in_r0)));
8154 offset_in_r0 += offset - offset_in_r0;
8157 if (post_inc != NULL_RTX)
8159 if (! sp_in_r0)
8161 emit_move_insn (r0,
8162 gen_rtx_PLUS
8163 (Pmode, r0, stack_pointer_rtx));
8164 sp_in_r0 = 1;
8167 mem_rtx = post_inc;
8169 offset_in_r0 += GET_MODE_SIZE (mode);
8171 else if (sp_in_r0)
8172 mem_rtx = gen_frame_mem (mode, r0);
8173 else
8174 mem_rtx = gen_frame_mem (mode,
8175 gen_rtx_PLUS (Pmode,
8176 stack_pointer_rtx,
8177 r0));
8179 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8180 || mem_rtx == post_inc);
8182 addr_ok:
8183 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8184 && mem_rtx != post_inc)
8186 emit_move_insn (r0, mem_rtx);
8187 mem_rtx = r0;
8189 else if (TARGET_REGISTER_P (reg))
8191 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8193 /* Give the scheduler a bit of freedom by using up to
8194 MAX_TEMPS registers in a round-robin fashion. */
8195 emit_move_insn (tmp_reg, mem_rtx);
8196 mem_rtx = tmp_reg;
8197 if (*++tmp_pnt < 0)
8198 tmp_pnt = schedule.temps;
8201 emit_move_insn (reg_rtx, mem_rtx);
8204 gcc_assert (entry->offset + offset_base == d + d_rounding);
8206 else /* ! TARGET_SH5 */
8208 int last_reg;
8210 save_size = 0;
8211 /* For an ISR with RESBANK attribute assigned, don't pop PR
8212 register. */
8213 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8214 && !sh_cfun_resbank_handler_p ())
8216 if (!frame_pointer_needed)
8217 emit_insn (gen_blockage ());
8218 pop (PR_REG);
8221 /* Banked registers are popped first to avoid being scheduled in the
8222 delay slot. RTE switches banks before the ds instruction. */
8223 if (current_function_interrupt)
8225 bool use_movml = false;
8227 if (TARGET_SH2A)
8229 unsigned int count = 0;
8231 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8232 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8233 count++;
8234 else
8235 break;
8237 /* Use movml when all banked register are poped. */
8238 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8239 use_movml = true;
8242 if (sh_cfun_resbank_handler_p ())
8243 ; /* Do nothing. */
8244 else if (use_movml)
8246 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8248 /* We must avoid scheduling multiple load insn with another
8249 insns. */
8250 emit_insn (gen_blockage ());
8251 emit_insn (gen_movml_pop_banked (sp_reg));
8252 emit_insn (gen_blockage ());
8254 else
8255 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8256 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8257 pop (i);
8259 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8261 else
8262 last_reg = FIRST_PSEUDO_REGISTER;
8264 for (i = 0; i < last_reg; i++)
8266 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8268 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8269 && hard_reg_set_intersect_p (live_regs_mask,
8270 reg_class_contents[DF_REGS]))
8271 fpscr_deferred = 1;
8272 /* For an ISR with RESBANK attribute assigned, don't pop
8273 following registers, R0-R14, MACH, MACL and GBR. */
8274 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8275 && ! (sh_cfun_resbank_handler_p ()
8276 && ((j >= FIRST_GENERAL_REG
8277 && j < LAST_GENERAL_REG)
8278 || j == MACH_REG
8279 || j == MACL_REG
8280 || j == GBR_REG)))
8281 pop (j);
8283 if (j == FIRST_FP_REG && fpscr_deferred)
8284 pop (FPSCR_REG);
8287 if (target_flags != save_flags && ! current_function_interrupt)
8288 emit_insn (gen_toggle_sz ());
8289 target_flags = save_flags;
8291 output_stack_adjust (crtl->args.pretend_args_size
8292 + save_size + d_rounding
8293 + crtl->args.info.stack_regs * 8,
8294 stack_pointer_rtx, e, NULL, true);
8296 if (crtl->calls_eh_return)
8297 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8298 EH_RETURN_STACKADJ_RTX));
8300 /* Switch back to the normal stack if necessary. */
8301 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8302 emit_insn (gen_sp_switch_2 ());
8304 /* Tell flow the insn that pops PR isn't dead. */
8305 /* PR_REG will never be live in SHmedia mode, and we don't need to
8306 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8307 by the return pattern. */
8308 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8309 emit_use (gen_rtx_REG (SImode, PR_REG));
8312 /* Emit code to change the current function's return address to RA.
8313 TEMP is available as a scratch register, if needed. */
8314 void
8315 sh_set_return_address (rtx ra, rtx tmp)
8317 HARD_REG_SET live_regs_mask;
8318 int d;
8319 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8320 int pr_offset;
8322 d = calc_live_regs (&live_regs_mask);
8324 /* If pr_reg isn't life, we can set it (or the register given in
8325 sh_media_register_for_return) directly. */
8326 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8328 rtx rr;
8330 if (TARGET_SHMEDIA)
8332 int rr_regno = sh_media_register_for_return ();
8334 if (rr_regno < 0)
8335 rr_regno = pr_reg;
8337 rr = gen_rtx_REG (DImode, rr_regno);
8339 else
8340 rr = gen_rtx_REG (SImode, pr_reg);
8342 emit_insn (GEN_MOV (rr, ra));
8343 /* Tell flow the register for return isn't dead. */
8344 emit_use (rr);
8345 return;
8348 if (TARGET_SH5)
8350 int offset;
8351 save_schedule schedule;
8352 save_entry *entry;
8354 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8355 offset = entry[1].offset;
8356 for (; entry->mode != VOIDmode; entry--)
8357 if (entry->reg == pr_reg)
8358 goto found;
8360 /* We can't find pr register. */
8361 gcc_unreachable ();
8363 found:
8364 offset = entry->offset - offset;
8365 pr_offset = (rounded_frame_size (d) + offset
8366 + SHMEDIA_REGS_STACK_ADJUST ());
8368 else
8369 pr_offset = rounded_frame_size (d);
8371 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8373 if (frame_pointer_needed)
8374 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8375 else
8376 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8378 tmp = gen_frame_mem (Pmode, tmp);
8379 emit_insn (GEN_MOV (tmp, ra));
8380 /* Tell this store isn't dead. */
8381 emit_use (tmp);
8384 /* Clear variables at function end. */
8385 static void
8386 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8387 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8391 static rtx
8392 sh_builtin_saveregs (void)
8394 /* First unnamed integer register. */
8395 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8396 /* Number of integer registers we need to save. */
8397 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8398 /* First unnamed SFmode float reg */
8399 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8400 /* Number of SFmode float regs to save. */
8401 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8402 rtx regbuf, fpregs;
8403 int bufsize, regno;
8404 alias_set_type alias_set;
8406 if (TARGET_SH5)
8408 if (n_intregs)
8410 int pushregs = n_intregs;
8412 while (pushregs < NPARM_REGS (SImode) - 1
8413 && (CALL_COOKIE_INT_REG_GET
8414 (crtl->args.info.call_cookie,
8415 NPARM_REGS (SImode) - pushregs)
8416 == 1))
8418 crtl->args.info.call_cookie
8419 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8420 - pushregs, 1);
8421 pushregs++;
8424 if (pushregs == NPARM_REGS (SImode))
8425 crtl->args.info.call_cookie
8426 |= (CALL_COOKIE_INT_REG (0, 1)
8427 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8428 else
8429 crtl->args.info.call_cookie
8430 |= CALL_COOKIE_STACKSEQ (pushregs);
8432 crtl->args.pretend_args_size += 8 * n_intregs;
8434 if (TARGET_SHCOMPACT)
8435 return const0_rtx;
8438 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8440 error ("__builtin_saveregs not supported by this subtarget");
8441 return const0_rtx;
8444 if (TARGET_SHMEDIA)
8445 n_floatregs = 0;
8447 /* Allocate block of memory for the regs. */
8448 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8449 Or can assign_stack_local accept a 0 SIZE argument? */
8450 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8452 if (TARGET_SHMEDIA)
8453 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8454 else if (n_floatregs & 1)
8456 rtx addr;
8458 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8459 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8460 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8461 regbuf = change_address (regbuf, BLKmode, addr);
8463 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8465 rtx addr, mask;
8467 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8468 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8469 XEXP (regbuf, 0), 4));
8470 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8471 emit_insn (gen_andsi3 (addr, addr, mask));
8472 regbuf = change_address (regbuf, BLKmode, addr);
8474 else
8475 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8476 alias_set = get_varargs_alias_set ();
8477 set_mem_alias_set (regbuf, alias_set);
8479 /* Save int args.
8480 This is optimized to only save the regs that are necessary. Explicitly
8481 named args need not be saved. */
8482 if (n_intregs > 0)
8483 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8484 adjust_address (regbuf, BLKmode,
8485 n_floatregs * UNITS_PER_WORD),
8486 n_intregs);
8488 if (TARGET_SHMEDIA)
8489 /* Return the address of the regbuf. */
8490 return XEXP (regbuf, 0);
8492 /* Save float args.
8493 This is optimized to only save the regs that are necessary. Explicitly
8494 named args need not be saved.
8495 We explicitly build a pointer to the buffer because it halves the insn
8496 count when not optimizing (otherwise the pointer is built for each reg
8497 saved).
8498 We emit the moves in reverse order so that we can use predecrement. */
8500 fpregs = copy_to_mode_reg (Pmode,
8501 plus_constant (Pmode, XEXP (regbuf, 0),
8502 n_floatregs * UNITS_PER_WORD));
8503 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8505 rtx mem;
8506 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8508 emit_insn (gen_addsi3 (fpregs, fpregs,
8509 GEN_INT (-2 * UNITS_PER_WORD)));
8510 mem = change_address (regbuf, DFmode, fpregs);
8511 emit_move_insn (mem,
8512 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8514 regno = first_floatreg;
8515 if (regno & 1)
8517 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8518 mem = change_address (regbuf, SFmode, fpregs);
8519 emit_move_insn (mem,
8520 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8521 + regno - SH_REG_MSW_OFFSET));
8524 else
8525 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8527 rtx mem;
8529 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8530 mem = change_address (regbuf, SFmode, fpregs);
8531 emit_move_insn (mem,
8532 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8535 /* Return the address of the regbuf. */
8536 return XEXP (regbuf, 0);
8539 /* Define the `__builtin_va_list' type for the ABI. */
8540 static tree
8541 sh_build_builtin_va_list (void)
8543 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8544 tree record, type_decl;
8546 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8547 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8548 return ptr_type_node;
8550 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8551 type_decl = build_decl (BUILTINS_LOCATION,
8552 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8554 f_next_o = build_decl (BUILTINS_LOCATION,
8555 FIELD_DECL, get_identifier ("__va_next_o"),
8556 ptr_type_node);
8557 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8558 FIELD_DECL,
8559 get_identifier ("__va_next_o_limit"),
8560 ptr_type_node);
8561 f_next_fp = build_decl (BUILTINS_LOCATION,
8562 FIELD_DECL, get_identifier ("__va_next_fp"),
8563 ptr_type_node);
8564 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8565 FIELD_DECL,
8566 get_identifier ("__va_next_fp_limit"),
8567 ptr_type_node);
8568 f_next_stack = build_decl (BUILTINS_LOCATION,
8569 FIELD_DECL, get_identifier ("__va_next_stack"),
8570 ptr_type_node);
8572 DECL_FIELD_CONTEXT (f_next_o) = record;
8573 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8574 DECL_FIELD_CONTEXT (f_next_fp) = record;
8575 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8576 DECL_FIELD_CONTEXT (f_next_stack) = record;
8578 TYPE_STUB_DECL (record) = type_decl;
8579 TYPE_NAME (record) = type_decl;
8580 TYPE_FIELDS (record) = f_next_o;
8581 DECL_CHAIN (f_next_o) = f_next_o_limit;
8582 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8583 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8584 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8586 layout_type (record);
8588 return record;
8591 /* Implement `va_start' for varargs and stdarg. */
8592 static void
8593 sh_va_start (tree valist, rtx nextarg)
8595 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8596 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8597 tree t, u;
8598 int nfp, nint;
8600 if (TARGET_SH5)
8602 expand_builtin_saveregs ();
8603 std_expand_builtin_va_start (valist, nextarg);
8604 return;
8607 if ((! TARGET_SH2E && ! TARGET_SH4)
8608 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8610 std_expand_builtin_va_start (valist, nextarg);
8611 return;
8614 f_next_o = TYPE_FIELDS (va_list_type_node);
8615 f_next_o_limit = DECL_CHAIN (f_next_o);
8616 f_next_fp = DECL_CHAIN (f_next_o_limit);
8617 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8618 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8620 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8621 NULL_TREE);
8622 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8623 valist, f_next_o_limit, NULL_TREE);
8624 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8625 NULL_TREE);
8626 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8627 valist, f_next_fp_limit, NULL_TREE);
8628 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8629 valist, f_next_stack, NULL_TREE);
8631 /* Call __builtin_saveregs. */
8632 u = make_tree (sizetype, expand_builtin_saveregs ());
8633 u = fold_convert (ptr_type_node, u);
8634 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8635 TREE_SIDE_EFFECTS (t) = 1;
8636 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8638 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8639 if (nfp < 8)
8640 nfp = 8 - nfp;
8641 else
8642 nfp = 0;
8643 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8644 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8645 TREE_SIDE_EFFECTS (t) = 1;
8646 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8648 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8649 TREE_SIDE_EFFECTS (t) = 1;
8650 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8652 nint = crtl->args.info.arg_count[SH_ARG_INT];
8653 if (nint < 4)
8654 nint = 4 - nint;
8655 else
8656 nint = 0;
8657 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8658 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8659 TREE_SIDE_EFFECTS (t) = 1;
8660 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8662 u = make_tree (ptr_type_node, nextarg);
8663 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8664 TREE_SIDE_EFFECTS (t) = 1;
8665 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8668 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8669 member, return it. */
8670 static tree
8671 find_sole_member (tree type)
8673 tree field, member = NULL_TREE;
8675 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8677 if (TREE_CODE (field) != FIELD_DECL)
8678 continue;
8679 if (!DECL_SIZE (field))
8680 return NULL_TREE;
8681 if (integer_zerop (DECL_SIZE (field)))
8682 continue;
8683 if (member)
8684 return NULL_TREE;
8685 member = field;
8687 return member;
8690 /* Implement `va_arg'. */
8691 static tree
8692 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8693 gimple_seq *post_p ATTRIBUTE_UNUSED)
8695 HOST_WIDE_INT size, rsize;
8696 tree tmp, pptr_type_node;
8697 tree addr, lab_over = NULL, result = NULL;
8698 bool pass_by_ref;
8699 tree eff_type;
8701 if (!VOID_TYPE_P (type))
8702 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8703 else
8704 pass_by_ref = false;
8706 if (pass_by_ref)
8707 type = build_pointer_type (type);
8709 size = int_size_in_bytes (type);
8710 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8711 pptr_type_node = build_pointer_type (ptr_type_node);
8713 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8714 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8716 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8717 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8718 int pass_as_float;
8719 tree lab_false;
8720 tree member;
8722 f_next_o = TYPE_FIELDS (va_list_type_node);
8723 f_next_o_limit = DECL_CHAIN (f_next_o);
8724 f_next_fp = DECL_CHAIN (f_next_o_limit);
8725 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8726 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8728 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8729 NULL_TREE);
8730 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8731 valist, f_next_o_limit, NULL_TREE);
8732 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8733 valist, f_next_fp, NULL_TREE);
8734 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8735 valist, f_next_fp_limit, NULL_TREE);
8736 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8737 valist, f_next_stack, NULL_TREE);
8739 /* Structures with a single member with a distinct mode are passed
8740 like their member. This is relevant if the latter has a REAL_TYPE
8741 or COMPLEX_TYPE type. */
8742 eff_type = type;
8743 while (TREE_CODE (eff_type) == RECORD_TYPE
8744 && (member = find_sole_member (eff_type))
8745 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8746 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8747 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8749 tree field_type = TREE_TYPE (member);
8751 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8752 eff_type = field_type;
8753 else
8755 gcc_assert ((TYPE_ALIGN (eff_type)
8756 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8757 || (TYPE_ALIGN (eff_type)
8758 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8759 break;
8763 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8765 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8766 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8767 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8768 && size <= 16));
8770 else
8772 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8775 addr = create_tmp_var (pptr_type_node);
8776 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8777 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8779 valist = build_simple_mem_ref (addr);
8781 if (pass_as_float)
8783 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
8784 tree cmp;
8785 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8787 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8788 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8790 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8791 tmp = next_fp_limit;
8792 if (size > 4 && !is_double)
8793 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8794 tmp = build2 (GE_EXPR, boolean_type_node,
8795 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8796 cmp = build3 (COND_EXPR, void_type_node, tmp,
8797 build1 (GOTO_EXPR, void_type_node,
8798 unshare_expr (lab_false)), NULL_TREE);
8799 if (!is_double)
8800 gimplify_and_add (cmp, pre_p);
8802 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8803 || (is_double || size == 16))
8805 tmp = fold_convert (sizetype, next_fp_tmp);
8806 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8807 size_int (UNITS_PER_WORD));
8808 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8809 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8811 if (is_double)
8812 gimplify_and_add (cmp, pre_p);
8814 #ifdef FUNCTION_ARG_SCmode_WART
8815 if (TYPE_MODE (eff_type) == SCmode
8816 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8818 tree subtype = TREE_TYPE (eff_type);
8819 tree real, imag;
8821 imag
8822 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8823 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8825 real
8826 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8827 real = get_initialized_tmp_var (real, pre_p, NULL);
8829 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8830 if (type != eff_type)
8831 result = build1 (VIEW_CONVERT_EXPR, type, result);
8832 result = get_initialized_tmp_var (result, pre_p, NULL);
8834 #endif /* FUNCTION_ARG_SCmode_WART */
8836 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8837 gimplify_and_add (tmp, pre_p);
8839 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8840 gimplify_and_add (tmp, pre_p);
8842 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8843 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8844 gimplify_assign (unshare_expr (next_fp_tmp),
8845 unshare_expr (valist), pre_p);
8847 gimplify_assign (unshare_expr (valist),
8848 unshare_expr (next_fp_tmp), post_p);
8849 valist = next_fp_tmp;
8851 else
8853 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8854 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8855 unshare_expr (next_o_limit));
8856 tmp = build3 (COND_EXPR, void_type_node, tmp,
8857 build1 (GOTO_EXPR, void_type_node,
8858 unshare_expr (lab_false)),
8859 NULL_TREE);
8860 gimplify_and_add (tmp, pre_p);
8862 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8863 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8865 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8866 gimplify_and_add (tmp, pre_p);
8868 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8869 gimplify_and_add (tmp, pre_p);
8871 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8872 gimplify_assign (unshare_expr (next_o),
8873 unshare_expr (next_o_limit), pre_p);
8875 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8876 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8879 if (!result)
8881 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8882 gimplify_and_add (tmp, pre_p);
8886 /* ??? In va-sh.h, there had been code to make values larger than
8887 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8889 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8890 if (result)
8892 gimplify_assign (result, tmp, pre_p);
8893 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8894 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8895 gimplify_and_add (tmp, pre_p);
8897 else
8898 result = tmp;
8900 if (pass_by_ref)
8901 result = build_va_arg_indirect_ref (result);
8903 return result;
8906 /* 64 bit floating points memory transfers are paired single precision loads
8907 or store. So DWARF information needs fixing in little endian (unless
8908 PR=SZ=1 in FPSCR). */
8910 sh_dwarf_register_span (rtx reg)
8912 unsigned regno = REGNO (reg);
8914 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8915 return NULL_RTX;
8917 return
8918 gen_rtx_PARALLEL (VOIDmode,
8919 gen_rtvec (2,
8920 gen_rtx_REG (SFmode, regno + 1),
8921 gen_rtx_REG (SFmode, regno)));
8924 static machine_mode
8925 sh_promote_function_mode (const_tree type, machine_mode mode,
8926 int *punsignedp, const_tree funtype,
8927 int for_return)
8929 if (sh_promote_prototypes (funtype))
8930 return promote_mode (type, mode, punsignedp);
8931 else
8932 return default_promote_function_mode (type, mode, punsignedp, funtype,
8933 for_return);
8936 static bool
8937 sh_promote_prototypes (const_tree type)
8939 if (TARGET_HITACHI)
8940 return false;
8941 if (! type)
8942 return true;
8943 return ! sh_attr_renesas_p (type);
8946 /* Whether an argument must be passed by reference. On SHcompact, we
8947 pretend arguments wider than 32-bits that would have been passed in
8948 registers are passed by reference, so that an SHmedia trampoline
8949 loads them into the full 64-bits registers. */
8950 static int
8951 shcompact_byref (const CUMULATIVE_ARGS *cum, machine_mode mode,
8952 const_tree type, bool named)
8954 unsigned HOST_WIDE_INT size;
8956 if (type)
8957 size = int_size_in_bytes (type);
8958 else
8959 size = GET_MODE_SIZE (mode);
8961 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8962 && (!named
8963 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8964 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8965 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8966 && size > 4
8967 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8968 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8969 return size;
8970 else
8971 return 0;
8974 static bool
8975 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8976 const_tree type, bool named)
8978 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8980 if (targetm.calls.must_pass_in_stack (mode, type))
8981 return true;
8983 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8984 wants to know about pass-by-reference semantics for incoming
8985 arguments. */
8986 if (! cum)
8987 return false;
8989 if (TARGET_SHCOMPACT)
8991 cum->byref = shcompact_byref (cum, mode, type, named);
8992 return cum->byref != 0;
8995 return false;
8998 static bool
8999 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
9000 const_tree type, bool named ATTRIBUTE_UNUSED)
9002 /* ??? How can it possibly be correct to return true only on the
9003 caller side of the equation? Is there someplace else in the
9004 sh backend that's magically producing the copies? */
9005 return (get_cumulative_args (cum)->outgoing
9006 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
9007 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
9010 /* Round a register number up to a proper boundary for an arg of mode
9011 MODE.
9012 The SH doesn't care about double alignment, so we only
9013 round doubles to even regs when asked to explicitly. */
9014 static int
9015 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
9017 /* FIXME: This used to be a macro and has been copy pasted into this
9018 function as is. Make this more readable. */
9019 return
9020 (((TARGET_ALIGN_DOUBLE
9021 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9022 && (mode == DFmode || mode == DCmode)
9023 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
9024 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
9025 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
9026 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
9027 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
9030 /* Return true if arg of the specified mode should be be passed in a register
9031 or false otherwise. */
9032 static bool
9033 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
9034 const_tree type)
9036 /* FIXME: This used to be a macro and has been copy pasted into this
9037 function as is. Make this more readable. */
9038 return
9039 ((type == 0
9040 || (! TREE_ADDRESSABLE (type)
9041 && (! (TARGET_HITACHI || cum.renesas_abi)
9042 || ! (AGGREGATE_TYPE_P (type)
9043 || (!TARGET_FPU_ANY
9044 && (GET_MODE_CLASS (mode) == MODE_FLOAT
9045 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
9046 && ! cum.force_mem
9047 && (TARGET_SH2E
9048 ? ((mode) == BLKmode
9049 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
9050 + int_size_in_bytes (type))
9051 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
9052 : ((sh_round_reg (cum, mode)
9053 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
9054 <= NPARM_REGS (mode)))
9055 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
9058 static int
9059 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9060 tree type, bool named ATTRIBUTE_UNUSED)
9062 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9063 int words = 0;
9065 if (!TARGET_SH5
9066 && sh_pass_in_reg_p (*cum, mode, type)
9067 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
9068 && (sh_round_reg (*cum, mode)
9069 + (mode != BLKmode
9070 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
9071 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
9072 > NPARM_REGS (mode)))
9073 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
9075 else if (!TARGET_SHCOMPACT
9076 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
9077 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
9079 return words * UNITS_PER_WORD;
9083 /* Define where to put the arguments to a function.
9084 Value is zero to push the argument on the stack,
9085 or a hard register in which to store the argument.
9087 MODE is the argument's machine mode.
9088 TYPE is the data type of the argument (as a tree).
9089 This is null for libcalls where that information may
9090 not be available.
9091 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9092 the preceding args and about the function being called.
9093 NAMED is nonzero if this argument is a named parameter
9094 (otherwise it is an extra parameter matching an ellipsis).
9096 On SH the first args are normally in registers
9097 and the rest are pushed. Any arg that starts within the first
9098 NPARM_REGS words is at least partially passed in a register unless
9099 its data type forbids. */
9100 static rtx
9101 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
9102 const_tree type, bool named)
9104 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9106 if (! TARGET_SH5 && mode == VOIDmode)
9107 return GEN_INT (ca->renesas_abi ? 1 : 0);
9109 if (! TARGET_SH5
9110 && sh_pass_in_reg_p (*ca, mode, type)
9111 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
9113 int regno;
9115 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
9116 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
9118 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
9119 gen_rtx_REG (SFmode,
9120 BASE_ARG_REG (mode)
9121 + (sh_round_reg (*ca, mode) ^ 1)),
9122 const0_rtx);
9123 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
9124 gen_rtx_REG (SFmode,
9125 BASE_ARG_REG (mode)
9126 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
9127 GEN_INT (4));
9128 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
9131 /* If the alignment of a DF value causes an SF register to be
9132 skipped, we will use that skipped register for the next SF
9133 value. */
9134 if ((TARGET_HITACHI || ca->renesas_abi)
9135 && ca->free_single_fp_reg
9136 && mode == SFmode)
9137 return gen_rtx_REG (mode, ca->free_single_fp_reg);
9139 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
9140 ^ (mode == SFmode && TARGET_SH4
9141 && TARGET_LITTLE_ENDIAN
9142 && ! TARGET_HITACHI && ! ca->renesas_abi);
9143 return gen_rtx_REG (mode, regno);
9147 if (TARGET_SH5)
9149 if (mode == VOIDmode && TARGET_SHCOMPACT)
9150 return GEN_INT (ca->call_cookie);
9152 /* The following test assumes unnamed arguments are promoted to
9153 DFmode. */
9154 if (mode == SFmode && ca->free_single_fp_reg)
9155 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
9157 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9158 && (named || ! ca->prototype_p)
9159 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9161 if (! ca->prototype_p && TARGET_SHMEDIA)
9162 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9164 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9165 FIRST_FP_PARM_REG
9166 + ca->arg_count[(int) SH_ARG_FLOAT]);
9169 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9170 && (! TARGET_SHCOMPACT
9171 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9172 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9173 type, named))))
9175 return gen_rtx_REG (mode, (FIRST_PARM_REG
9176 + ca->arg_count[(int) SH_ARG_INT]));
9179 return NULL_RTX;
9182 return NULL_RTX;
9185 /* Update the data in CUM to advance over an argument
9186 of mode MODE and data type TYPE.
9187 (TYPE is null for libcalls where that information may not be
9188 available.) */
9189 static void
9190 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
9191 const_tree type, bool named)
9193 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9195 if (ca->force_mem)
9196 ca->force_mem = 0;
9197 else if (TARGET_SH5)
9199 const_tree type2 = (ca->byref && type
9200 ? TREE_TYPE (type)
9201 : type);
9202 machine_mode mode2 = (ca->byref && type
9203 ? TYPE_MODE (type2)
9204 : mode);
9205 int dwords = ((ca->byref
9206 ? ca->byref
9207 : mode2 == BLKmode
9208 ? int_size_in_bytes (type2)
9209 : GET_MODE_SIZE (mode2)) + 7) / 8;
9210 int numregs = MIN (dwords, NPARM_REGS (SImode)
9211 - ca->arg_count[(int) SH_ARG_INT]);
9213 if (numregs)
9215 ca->arg_count[(int) SH_ARG_INT] += numregs;
9216 if (TARGET_SHCOMPACT
9217 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9219 ca->call_cookie
9220 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9221 - numregs, 1);
9222 /* N.B. We want this also for outgoing. */
9223 ca->stack_regs += numregs;
9225 else if (ca->byref)
9227 if (! ca->outgoing)
9228 ca->stack_regs += numregs;
9229 ca->byref_regs += numregs;
9230 ca->byref = 0;
9232 ca->call_cookie
9233 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9234 - numregs, 2);
9235 while (--numregs);
9236 ca->call_cookie
9237 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9238 - 1, 1);
9240 else if (dwords > numregs)
9242 int pushregs = numregs;
9244 if (TARGET_SHCOMPACT)
9245 ca->stack_regs += numregs;
9246 while (pushregs < NPARM_REGS (SImode) - 1
9247 && (CALL_COOKIE_INT_REG_GET
9248 (ca->call_cookie,
9249 NPARM_REGS (SImode) - pushregs)
9250 == 1))
9252 ca->call_cookie
9253 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9254 - pushregs, 1);
9255 pushregs++;
9257 if (numregs == NPARM_REGS (SImode))
9258 ca->call_cookie
9259 |= CALL_COOKIE_INT_REG (0, 1)
9260 | CALL_COOKIE_STACKSEQ (numregs - 1);
9261 else
9262 ca->call_cookie
9263 |= CALL_COOKIE_STACKSEQ (numregs);
9266 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9267 && (named || ! ca->prototype_p))
9269 if (mode2 == SFmode && ca->free_single_fp_reg)
9270 ca->free_single_fp_reg = 0;
9271 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9272 < NPARM_REGS (SFmode))
9274 int numfpregs
9275 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9276 NPARM_REGS (SFmode)
9277 - ca->arg_count[(int) SH_ARG_FLOAT]);
9279 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9281 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9283 if (ca->outgoing && numregs > 0)
9286 ca->call_cookie
9287 |= (CALL_COOKIE_INT_REG
9288 (ca->arg_count[(int) SH_ARG_INT]
9289 - numregs + ((numfpregs - 2) / 2),
9290 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9291 - numfpregs) / 2));
9293 while (numfpregs -= 2);
9295 else if (mode2 == SFmode && (named)
9296 && (ca->arg_count[(int) SH_ARG_FLOAT]
9297 < NPARM_REGS (SFmode)))
9298 ca->free_single_fp_reg
9299 = FIRST_FP_PARM_REG - numfpregs
9300 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9303 return;
9306 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9308 /* Note that we've used the skipped register. */
9309 if (mode == SFmode && ca->free_single_fp_reg)
9311 ca->free_single_fp_reg = 0;
9312 return;
9314 /* When we have a DF after an SF, there's an SF register that get
9315 skipped in order to align the DF value. We note this skipped
9316 register, because the next SF value will use it, and not the
9317 SF that follows the DF. */
9318 if (mode == DFmode
9319 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9321 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9322 + BASE_ARG_REG (mode));
9326 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9327 || sh_pass_in_reg_p (*ca, mode, type))
9328 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9329 = (sh_round_reg (*ca, mode)
9330 + (mode == BLKmode
9331 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9332 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9335 /* The Renesas calling convention doesn't quite fit into this scheme since
9336 the address is passed like an invisible argument, but one that is always
9337 passed in memory. */
9338 static rtx
9339 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9341 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9342 return NULL_RTX;
9343 return gen_rtx_REG (Pmode, 2);
9346 /* Worker function for TARGET_FUNCTION_VALUE.
9348 For the SH, this is like LIBCALL_VALUE, except that we must change the
9349 mode like PROMOTE_MODE does.
9350 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9351 tested here has to be kept in sync with the one in
9352 explow.c:promote_mode. */
9353 static rtx
9354 sh_function_value (const_tree valtype,
9355 const_tree fn_decl_or_type,
9356 bool outgoing ATTRIBUTE_UNUSED)
9358 if (fn_decl_or_type
9359 && !DECL_P (fn_decl_or_type))
9360 fn_decl_or_type = NULL;
9362 return gen_rtx_REG (
9363 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9364 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9365 && (TREE_CODE (valtype) == INTEGER_TYPE
9366 || TREE_CODE (valtype) == ENUMERAL_TYPE
9367 || TREE_CODE (valtype) == BOOLEAN_TYPE
9368 || TREE_CODE (valtype) == REAL_TYPE
9369 || TREE_CODE (valtype) == OFFSET_TYPE))
9370 && sh_promote_prototypes (fn_decl_or_type)
9371 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9372 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9375 /* Worker function for TARGET_LIBCALL_VALUE. */
9376 static rtx
9377 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9379 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9382 /* Return true if N is a possible register number of function value. */
9383 static bool
9384 sh_function_value_regno_p (const unsigned int regno)
9386 return ((regno) == FIRST_RET_REG
9387 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9388 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9391 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9392 static bool
9393 sh_return_in_memory (const_tree type, const_tree fndecl)
9395 if (TARGET_SH5)
9397 if (TYPE_MODE (type) == BLKmode)
9398 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9399 else
9400 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9402 else
9404 return (TYPE_MODE (type) == BLKmode
9405 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9406 && TREE_CODE (type) == RECORD_TYPE));
9410 /* We actually emit the code in sh_expand_prologue. We used to use
9411 a static variable to flag that we need to emit this code, but that
9412 doesn't when inlining, when functions are deferred and then emitted
9413 later. Fortunately, we already have two flags that are part of struct
9414 function that tell if a function uses varargs or stdarg. */
9415 static void
9416 sh_setup_incoming_varargs (cumulative_args_t ca,
9417 machine_mode mode,
9418 tree type,
9419 int *pretend_arg_size,
9420 int second_time ATTRIBUTE_UNUSED)
9422 gcc_assert (cfun->stdarg);
9423 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9425 int named_parm_regs, anon_parm_regs;
9427 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9428 + (mode == BLKmode
9429 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9430 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9431 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9432 if (anon_parm_regs > 0)
9433 *pretend_arg_size = anon_parm_regs * 4;
9437 static bool
9438 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9440 return TARGET_SH5;
9443 static bool
9444 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9446 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9448 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9452 /* Define the offset between two registers, one to be eliminated, and
9453 the other its replacement, at the start of a routine. */
9455 initial_elimination_offset (int from, int to)
9457 int regs_saved;
9458 int regs_saved_rounding = 0;
9459 int total_saved_regs_space;
9460 int total_auto_space;
9461 int save_flags = target_flags;
9462 int copy_flags;
9463 HARD_REG_SET live_regs_mask;
9465 shmedia_space_reserved_for_target_registers = false;
9466 regs_saved = calc_live_regs (&live_regs_mask);
9467 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9469 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9471 shmedia_space_reserved_for_target_registers = true;
9472 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9475 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9476 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9477 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9479 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9480 copy_flags = target_flags;
9481 target_flags = save_flags;
9483 total_saved_regs_space = regs_saved + regs_saved_rounding;
9485 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9486 return total_saved_regs_space + total_auto_space
9487 + crtl->args.info.byref_regs * 8;
9489 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9490 return total_saved_regs_space + total_auto_space
9491 + crtl->args.info.byref_regs * 8;
9493 /* Initial gap between fp and sp is 0. */
9494 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9495 return 0;
9497 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9498 return rounded_frame_size (0);
9500 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9501 return rounded_frame_size (0);
9503 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9504 && (to == HARD_FRAME_POINTER_REGNUM
9505 || to == STACK_POINTER_REGNUM));
9506 if (TARGET_SH5)
9508 int n = total_saved_regs_space;
9509 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9510 save_schedule schedule;
9511 save_entry *entry;
9513 n += total_auto_space;
9515 /* If it wasn't saved, there's not much we can do. */
9516 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9517 return n;
9519 target_flags = copy_flags;
9521 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9522 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9523 if (entry->reg == pr_reg)
9525 target_flags = save_flags;
9526 return entry->offset;
9528 gcc_unreachable ();
9530 else
9531 return total_auto_space;
9534 /* Parse the -mfixed-range= option string. */
9535 void
9536 sh_fix_range (const char *const_str)
9538 int i, first, last;
9539 char *str, *dash, *comma;
9541 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9542 REG2 are either register names or register numbers. The effect
9543 of this option is to mark the registers in the range from REG1 to
9544 REG2 as ``fixed'' so they won't be used by the compiler. */
9546 i = strlen (const_str);
9547 str = (char *) alloca (i + 1);
9548 memcpy (str, const_str, i + 1);
9550 while (1)
9552 dash = strchr (str, '-');
9553 if (!dash)
9555 warning (0, "value of -mfixed-range must have form REG1-REG2");
9556 return;
9558 *dash = '\0';
9559 comma = strchr (dash + 1, ',');
9560 if (comma)
9561 *comma = '\0';
9563 first = decode_reg_name (str);
9564 if (first < 0)
9566 warning (0, "unknown register name: %s", str);
9567 return;
9570 last = decode_reg_name (dash + 1);
9571 if (last < 0)
9573 warning (0, "unknown register name: %s", dash + 1);
9574 return;
9577 *dash = '-';
9579 if (first > last)
9581 warning (0, "%s-%s is an empty range", str, dash + 1);
9582 return;
9585 for (i = first; i <= last; ++i)
9586 fixed_regs[i] = call_used_regs[i] = 1;
9588 if (!comma)
9589 break;
9591 *comma = ',';
9592 str = comma + 1;
9596 /* Insert any deferred function attributes from earlier pragmas. */
9597 static void
9598 sh_insert_attributes (tree node, tree *attributes)
9600 tree attrs;
9602 if (TREE_CODE (node) != FUNCTION_DECL)
9603 return;
9605 /* We are only interested in fields. */
9606 if (!DECL_P (node))
9607 return;
9609 /* Append the attributes to the deferred attributes. */
9610 *sh_deferred_function_attributes_tail = *attributes;
9611 attrs = sh_deferred_function_attributes;
9612 if (!attrs)
9613 return;
9615 /* Some attributes imply or require the interrupt attribute. */
9616 if (!lookup_attribute ("interrupt_handler", attrs)
9617 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9619 /* If we have a trapa_handler, but no interrupt_handler attribute,
9620 insert an interrupt_handler attribute. */
9621 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9622 /* We can't use sh_pr_interrupt here because that's not in the
9623 java frontend. */
9624 attrs
9625 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9626 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9627 if the interrupt attribute is missing, we ignore the attribute
9628 and warn. */
9629 else if (lookup_attribute ("sp_switch", attrs)
9630 || lookup_attribute ("trap_exit", attrs)
9631 || lookup_attribute ("nosave_low_regs", attrs)
9632 || lookup_attribute ("resbank", attrs))
9634 tree *tail;
9636 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9638 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9639 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9640 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9641 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9642 warning (OPT_Wattributes,
9643 "%qE attribute only applies to interrupt functions",
9644 TREE_PURPOSE (attrs));
9645 else
9647 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9648 NULL_TREE);
9649 tail = &TREE_CHAIN (*tail);
9652 attrs = *attributes;
9656 /* Install the processed list. */
9657 *attributes = attrs;
9659 /* Clear deferred attributes. */
9660 sh_deferred_function_attributes = NULL_TREE;
9661 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9663 return;
9666 /*------------------------------------------------------------------------------
9667 Target specific attributes
9668 Supported attributes are:
9670 * interrupt_handler
9671 Specifies this function is an interrupt handler.
9673 * trapa_handler
9674 Like interrupt_handler, but don't save all registers.
9676 * sp_switch
9677 Specifies an alternate stack for an interrupt handler to run on.
9679 * trap_exit
9680 Use a trapa to exit an interrupt function instead of rte.
9682 * nosave_low_regs
9683 Don't save r0..r7 in an interrupt handler function.
9684 This is useful on SH3* and SH4*, which have a separate set of low
9685 regs for user and privileged modes.
9686 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9687 those that run with interrupts disabled and thus can't be
9688 interrupted thenselves).
9690 * renesas
9691 Use Renesas calling/layout conventions (functions and structures).
9693 * resbank
9694 In case of an interrupt handler function, use a register bank to
9695 save registers R0-R14, MACH, MACL, GBR and PR.
9696 This is available only on SH2A targets.
9698 * function_vector
9699 Declares a function to be called using the TBR relative addressing
9700 mode. Takes an argument that specifies the slot number in the table
9701 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9704 /* Handle a 'resbank' attribute. */
9705 static tree
9706 sh_handle_resbank_handler_attribute (tree * node, tree name,
9707 tree args ATTRIBUTE_UNUSED,
9708 int flags ATTRIBUTE_UNUSED,
9709 bool * no_add_attrs)
9711 if (!TARGET_SH2A)
9713 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9714 name);
9715 *no_add_attrs = true;
9717 if (TREE_CODE (*node) != FUNCTION_DECL)
9719 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9720 name);
9721 *no_add_attrs = true;
9724 return NULL_TREE;
9727 /* Handle an "interrupt_handler" attribute; arguments as in
9728 struct attribute_spec.handler. */
9729 static tree
9730 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9731 tree args ATTRIBUTE_UNUSED,
9732 int flags ATTRIBUTE_UNUSED,
9733 bool *no_add_attrs)
9735 if (TREE_CODE (*node) != FUNCTION_DECL)
9737 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9738 name);
9739 *no_add_attrs = true;
9741 else if (TARGET_SHCOMPACT)
9743 error ("attribute interrupt_handler is not compatible with -m5-compact");
9744 *no_add_attrs = true;
9747 return NULL_TREE;
9750 /* Handle an 'function_vector' attribute; arguments as in
9751 struct attribute_spec.handler. */
9752 static tree
9753 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9754 tree args ATTRIBUTE_UNUSED,
9755 int flags ATTRIBUTE_UNUSED,
9756 bool * no_add_attrs)
9758 if (!TARGET_SH2A)
9760 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9761 name);
9762 *no_add_attrs = true;
9764 else if (TREE_CODE (*node) != FUNCTION_DECL)
9766 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9767 name);
9768 *no_add_attrs = true;
9770 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9772 /* The argument must be a constant integer. */
9773 warning (OPT_Wattributes,
9774 "%qE attribute argument not an integer constant",
9775 name);
9776 *no_add_attrs = true;
9778 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9780 /* The argument value must be between 0 to 255. */
9781 warning (OPT_Wattributes,
9782 "%qE attribute argument should be between 0 to 255",
9783 name);
9784 *no_add_attrs = true;
9786 return NULL_TREE;
9789 /* Returns true if current function has been assigned the attribute
9790 'function_vector'. */
9791 bool
9792 sh2a_is_function_vector_call (rtx x)
9794 if (GET_CODE (x) == SYMBOL_REF
9795 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9797 tree tr = SYMBOL_REF_DECL (x);
9799 if (sh2a_function_vector_p (tr))
9800 return true;
9803 return false;
9806 /* Returns the function vector number, if the attribute
9807 'function_vector' is assigned, otherwise returns zero. */
9809 sh2a_get_function_vector_number (rtx x)
9811 int num;
9812 tree list, t;
9814 if ((GET_CODE (x) == SYMBOL_REF)
9815 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9817 t = SYMBOL_REF_DECL (x);
9819 if (TREE_CODE (t) != FUNCTION_DECL)
9820 return 0;
9822 list = SH_ATTRIBUTES (t);
9823 while (list)
9825 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9827 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9828 return num;
9831 list = TREE_CHAIN (list);
9834 return 0;
9836 else
9837 return 0;
9840 /* Handle an "sp_switch" attribute; arguments as in
9841 struct attribute_spec.handler. */
9842 static tree
9843 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9844 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9846 if (TREE_CODE (*node) != FUNCTION_DECL)
9848 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9849 name);
9850 *no_add_attrs = true;
9852 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9854 /* The argument must be a constant string. */
9855 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9856 name);
9857 *no_add_attrs = true;
9860 return NULL_TREE;
9863 /* Handle an "trap_exit" attribute; arguments as in
9864 struct attribute_spec.handler. */
9865 static tree
9866 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9867 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9869 if (TREE_CODE (*node) != FUNCTION_DECL)
9871 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9872 name);
9873 *no_add_attrs = true;
9875 /* The argument specifies a trap number to be used in a trapa instruction
9876 at function exit (instead of an rte instruction). */
9877 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9879 /* The argument must be a constant integer. */
9880 warning (OPT_Wattributes, "%qE attribute argument not an "
9881 "integer constant", name);
9882 *no_add_attrs = true;
9885 return NULL_TREE;
9888 static tree
9889 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9890 tree name ATTRIBUTE_UNUSED,
9891 tree args ATTRIBUTE_UNUSED,
9892 int flags ATTRIBUTE_UNUSED,
9893 bool *no_add_attrs ATTRIBUTE_UNUSED)
9895 return NULL_TREE;
9898 /* True if __attribute__((renesas)) or -mrenesas. */
9899 bool
9900 sh_attr_renesas_p (const_tree td)
9902 if (TARGET_HITACHI)
9903 return true;
9904 if (td == NULL_TREE)
9905 return false;
9906 if (DECL_P (td))
9907 td = TREE_TYPE (td);
9908 if (td == error_mark_node)
9909 return false;
9910 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9911 != NULL_TREE);
9914 /* True if __attribute__((renesas)) or -mrenesas, for the current
9915 function. */
9916 bool
9917 sh_cfun_attr_renesas_p (void)
9919 return sh_attr_renesas_p (current_function_decl);
9922 /* Returns true if the current function has the "interrupt_handler"
9923 attribute set. */
9924 bool
9925 sh_cfun_interrupt_handler_p (void)
9927 return (lookup_attribute ("interrupt_handler",
9928 DECL_ATTRIBUTES (current_function_decl))
9929 != NULL_TREE);
9932 /* Returns true if FUNC has been assigned the attribute
9933 "function_vector". */
9934 bool
9935 sh2a_function_vector_p (tree func)
9937 tree list;
9938 if (TREE_CODE (func) != FUNCTION_DECL)
9939 return false;
9941 list = SH_ATTRIBUTES (func);
9942 while (list)
9944 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9945 return true;
9947 list = TREE_CHAIN (list);
9949 return false;
9952 /* Returns true if given tree has the "resbank" attribute set. */
9953 bool
9954 sh_cfun_resbank_handler_p (void)
9956 return ((lookup_attribute ("resbank",
9957 DECL_ATTRIBUTES (current_function_decl))
9958 != NULL_TREE)
9959 && (lookup_attribute ("interrupt_handler",
9960 DECL_ATTRIBUTES (current_function_decl))
9961 != NULL_TREE) && TARGET_SH2A);
9964 /* Returns true if the current function has a "trap_exit" attribute set. */
9965 bool
9966 sh_cfun_trap_exit_p (void)
9968 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9969 != NULL_TREE;
9972 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9973 static const char *
9974 sh_check_pch_target_flags (int old_flags)
9976 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9977 | MASK_SH_E | MASK_HARD_SH4
9978 | MASK_FPU_SINGLE | MASK_SH4))
9979 return _("created and used with different architectures / ABIs");
9980 if ((old_flags ^ target_flags) & MASK_HITACHI)
9981 return _("created and used with different ABIs");
9982 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9983 return _("created and used with different endianness");
9984 return NULL;
9987 /* Predicates used by the templates. */
9989 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9990 Used only in general_movsrc_operand. */
9991 bool
9992 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9994 switch (REGNO (op))
9996 case PR_REG:
9997 case MACL_REG:
9998 case MACH_REG:
9999 return true;
10001 return false;
10004 /* Returns true if OP is a floating point value with value 0.0. */
10005 bool
10006 fp_zero_operand (rtx op)
10008 REAL_VALUE_TYPE r;
10010 if (GET_MODE (op) != SFmode)
10011 return false;
10013 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10014 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
10017 /* Returns true if OP is a floating point value with value 1.0. */
10018 bool
10019 fp_one_operand (rtx op)
10021 REAL_VALUE_TYPE r;
10023 if (GET_MODE (op) != SFmode)
10024 return false;
10026 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10027 return REAL_VALUES_EQUAL (r, dconst1);
10030 /* Return the TLS type for TLS symbols. */
10031 enum tls_model
10032 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
10034 if (GET_CODE (op) != SYMBOL_REF)
10035 return TLS_MODEL_NONE;
10036 return SYMBOL_REF_TLS_MODEL (op);
10039 /* Return the destination address of a branch. */
10040 static int
10041 branch_dest (rtx branch)
10043 rtx dest = SET_SRC (PATTERN (branch));
10044 int dest_uid;
10046 if (GET_CODE (dest) == IF_THEN_ELSE)
10047 dest = XEXP (dest, 1);
10048 dest = XEXP (dest, 0);
10049 dest_uid = INSN_UID (dest);
10050 return INSN_ADDRESSES (dest_uid);
10053 /* Return nonzero if REG is not used after INSN.
10054 We assume REG is a reload reg, and therefore does
10055 not live past labels. It may live past calls or jumps though. */
10056 bool
10057 reg_unused_after (rtx reg, rtx_insn *insn)
10059 enum rtx_code code;
10060 rtx set;
10062 /* If the reg is set by this instruction, then it is safe for our
10063 case. Disregard the case where this is a store to memory, since
10064 we are checking a register used in the store address. */
10065 set = single_set (insn);
10066 if (set && !MEM_P (SET_DEST (set))
10067 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10068 return true;
10070 while ((insn = NEXT_INSN (insn)))
10072 rtx set;
10073 if (!INSN_P (insn))
10074 continue;
10076 code = GET_CODE (insn);
10078 #if 0
10079 /* If this is a label that existed before reload, then the register
10080 is dead here. However, if this is a label added by reorg, then
10081 the register may still be live here. We can't tell the difference,
10082 so we just ignore labels completely. */
10083 if (code == CODE_LABEL)
10084 return 1;
10085 /* else */
10086 #endif
10088 if (code == JUMP_INSN)
10089 return false;
10091 /* If this is a sequence, we must handle them all at once.
10092 We could have for instance a call that sets the target register,
10093 and an insn in a delay slot that uses the register. In this case,
10094 we must return 0. */
10095 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
10097 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
10098 int i;
10099 int retval = 0;
10101 for (i = 0; i < seq->len (); i++)
10103 rtx_insn *this_insn = seq->insn (i);
10104 rtx set = single_set (this_insn);
10106 if (CALL_P (this_insn))
10107 code = CALL_INSN;
10108 else if (JUMP_P (this_insn))
10110 if (INSN_ANNULLED_BRANCH_P (this_insn))
10111 return false;
10112 code = JUMP_INSN;
10115 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10116 return false;
10117 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10119 if (!MEM_P (SET_DEST (set)))
10120 retval = true;
10121 else
10122 return false;
10124 if (set == NULL_RTX
10125 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
10126 return false;
10128 if (retval == 1)
10129 return true;
10130 else if (code == JUMP_INSN)
10131 return false;
10134 set = single_set (insn);
10135 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10136 return false;
10137 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10138 return !MEM_P (SET_DEST (set));
10139 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
10140 return false;
10142 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10143 return true;
10145 return true;
10149 static GTY(()) rtx t_reg_rtx;
10151 get_t_reg_rtx (void)
10153 if (! t_reg_rtx)
10154 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10155 return t_reg_rtx;
10158 static GTY(()) tree fpscr_values;
10160 static void
10161 emit_fpu_switch (rtx scratch, int index)
10163 rtx src;
10165 if (fpscr_values == NULL)
10167 tree t;
10169 t = build_index_type (integer_one_node);
10170 t = build_array_type (integer_type_node, t);
10171 t = build_decl (BUILTINS_LOCATION,
10172 VAR_DECL, get_identifier ("__fpscr_values"), t);
10173 DECL_ARTIFICIAL (t) = 1;
10174 DECL_IGNORED_P (t) = 1;
10175 DECL_EXTERNAL (t) = 1;
10176 TREE_STATIC (t) = 1;
10177 TREE_PUBLIC (t) = 1;
10178 TREE_USED (t) = 1;
10180 fpscr_values = t;
10183 src = DECL_RTL (fpscr_values);
10184 if (!can_create_pseudo_p ())
10186 emit_move_insn (scratch, XEXP (src, 0));
10187 if (index != 0)
10188 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10189 src = adjust_automodify_address (src, SImode, scratch, index * 4);
10191 else
10192 src = adjust_address (src, SImode, index * 4);
10194 emit_insn (gen_lds_fpscr (src));
10197 static rtx get_free_reg (HARD_REG_SET);
10199 /* This function returns a register to use to load the address to load
10200 the fpscr from. Currently it always returns r1 or r7, but when we are
10201 able to use pseudo registers after combine, or have a better mechanism
10202 for choosing a register, it should be done here. */
10203 /* REGS_LIVE is the liveness information for the point for which we
10204 need this allocation. In some bare-bones exit blocks, r1 is live at the
10205 start. We can even have all of r0..r3 being live:
10206 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10207 INSN before which new insns are placed with will clobber the register
10208 we return. If a basic block consists only of setting the return value
10209 register to a pseudo and using that register, the return value is not
10210 live before or after this block, yet we we'll insert our insns right in
10211 the middle. */
10212 static rtx
10213 get_free_reg (HARD_REG_SET regs_live)
10215 if (! TEST_HARD_REG_BIT (regs_live, 1))
10216 return gen_rtx_REG (Pmode, 1);
10218 /* Hard reg 1 is live; since this is a small register classes target,
10219 there shouldn't be anything but a jump before the function end. */
10220 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10221 return gen_rtx_REG (Pmode, 7);
10224 /* This function will set the fpscr from memory.
10225 MODE is the mode we are setting it to. */
10226 void
10227 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10229 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10230 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10231 rtx addr_reg;
10233 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10234 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10237 /* Is the given character a logical line separator for the assembler? */
10238 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10239 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10240 #endif
10242 static bool
10243 sequence_insn_p (rtx_insn *insn)
10245 rtx_insn *prev, *next;
10247 prev = PREV_INSN (insn);
10248 if (prev == NULL)
10249 return false;
10251 next = NEXT_INSN (prev);
10252 if (next == NULL)
10253 return false;
10255 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10259 sh_insn_length_adjustment (rtx_insn *insn)
10261 /* Instructions with unfilled delay slots take up an extra two bytes for
10262 the nop in the delay slot. */
10263 if (((NONJUMP_INSN_P (insn)
10264 && GET_CODE (PATTERN (insn)) != USE
10265 && GET_CODE (PATTERN (insn)) != CLOBBER)
10266 || CALL_P (insn) || JUMP_P (insn))
10267 && ! sequence_insn_p (insn)
10268 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10269 return 2;
10271 /* SH2e has a bug that prevents the use of annulled branches, so if
10272 the delay slot is not filled, we'll have to put a NOP in it. */
10273 if (sh_cpu_attr == CPU_SH2E
10274 && JUMP_P (insn)
10275 && get_attr_type (insn) == TYPE_CBRANCH
10276 && ! sequence_insn_p (insn))
10277 return 2;
10279 /* sh-dsp parallel processing insn take four bytes instead of two. */
10281 if (NONJUMP_INSN_P (insn))
10283 int sum = 0;
10284 rtx body = PATTERN (insn);
10285 const char *templ;
10286 char c;
10287 bool maybe_label = true;
10289 if (GET_CODE (body) == ASM_INPUT)
10290 templ = XSTR (body, 0);
10291 else if (asm_noperands (body) >= 0)
10292 templ
10293 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10294 else
10295 return 0;
10298 int ppi_adjust = 0;
10301 c = *templ++;
10302 while (c == ' ' || c == '\t');
10303 /* all sh-dsp parallel-processing insns start with p.
10304 The only non-ppi sh insn starting with p is pref.
10305 The only ppi starting with pr is prnd. */
10306 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10307 ppi_adjust = 2;
10308 /* The repeat pseudo-insn expands two three insns, a total of
10309 six bytes in size. */
10310 else if ((c == 'r' || c == 'R')
10311 && ! strncasecmp ("epeat", templ, 5))
10312 ppi_adjust = 4;
10313 while (c && c != '\n'
10314 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10316 /* If this is a label, it is obviously not a ppi insn. */
10317 if (c == ':' && maybe_label)
10319 ppi_adjust = 0;
10320 break;
10322 else if (c == '\'' || c == '"')
10323 maybe_label = false;
10324 c = *templ++;
10326 sum += ppi_adjust;
10327 maybe_label = c != ':';
10329 while (c);
10330 return sum;
10332 return 0;
10335 /* Return TRUE for a valid displacement for the REG+disp addressing
10336 with MODE. */
10337 bool
10338 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
10339 bool allow_zero)
10341 if (! CONST_INT_P (op))
10342 return false;
10344 if (TARGET_SHMEDIA)
10346 int size;
10348 /* Check if this is the address of an unaligned load / store. */
10349 if (mode == VOIDmode)
10350 return satisfies_constraint_I06 (op);
10352 size = GET_MODE_SIZE (mode);
10353 return (!(INTVAL (op) & (size - 1))
10354 && INTVAL (op) >= -512 * size
10355 && INTVAL (op) < 512 * size);
10357 else
10359 const HOST_WIDE_INT offset = INTVAL (op);
10360 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10361 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10363 /* If the mode does not support any displacement always return false.
10364 Even though an index of '0' is actually always valid, it will cause
10365 troubles when e.g. a DFmode move is split into two SFmode moves,
10366 where one SFmode move will have index '0' and the other move will
10367 have index '4'. */
10368 if (!allow_zero && max_disp < 1)
10369 return false;
10371 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10375 /* Recognize an RTL expression that is a valid memory address for
10376 an instruction.
10377 The MODE argument is the machine mode for the MEM expression
10378 that wants to use this address.
10379 Allow REG
10380 REG+disp
10381 REG+r0
10382 REG++
10383 --REG
10385 GBR+disp */
10386 static bool
10387 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10389 if (! ALLOW_INDEXED_ADDRESS
10390 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10391 return false;
10393 if (REG_P (x) && REGNO (x) == GBR_REG)
10394 return true;
10396 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10397 return true;
10398 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10399 && ! TARGET_SHMEDIA
10400 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10401 return true;
10402 else if (GET_CODE (x) == PLUS)
10404 rtx xop0 = XEXP (x, 0);
10405 rtx xop1 = XEXP (x, 1);
10407 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10408 return gbr_displacement (xop1, mode);
10410 if (GET_MODE_SIZE (mode) <= 8
10411 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10412 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10413 return true;
10415 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10416 || ((xop0 == stack_pointer_rtx
10417 || xop0 == hard_frame_pointer_rtx)
10418 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10419 || ((xop1 == stack_pointer_rtx
10420 || xop1 == hard_frame_pointer_rtx)
10421 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10422 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10423 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10424 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10425 && TARGET_FMOVD && mode == DFmode)))
10427 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10428 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10429 return true;
10430 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10431 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10432 return true;
10436 return false;
10439 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10440 isn't protected by a PIC unspec. */
10441 bool
10442 nonpic_symbol_mentioned_p (rtx x)
10444 const char *fmt;
10445 int i;
10447 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10448 || GET_CODE (x) == PC)
10449 return true;
10451 /* We don't want to look into the possible MEM location of a
10452 CONST_DOUBLE, since we're not going to use it, in general. */
10453 if (GET_CODE (x) == CONST_DOUBLE)
10454 return false;
10456 if (GET_CODE (x) == UNSPEC
10457 && (XINT (x, 1) == UNSPEC_PIC
10458 || XINT (x, 1) == UNSPEC_GOT
10459 || XINT (x, 1) == UNSPEC_GOTOFF
10460 || XINT (x, 1) == UNSPEC_GOTPLT
10461 || XINT (x, 1) == UNSPEC_GOTTPOFF
10462 || XINT (x, 1) == UNSPEC_DTPOFF
10463 || XINT (x, 1) == UNSPEC_TPOFF
10464 || XINT (x, 1) == UNSPEC_PLT
10465 || XINT (x, 1) == UNSPEC_SYMOFF
10466 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10467 return false;
10469 fmt = GET_RTX_FORMAT (GET_CODE (x));
10470 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10472 if (fmt[i] == 'E')
10474 int j;
10475 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10476 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10477 return true;
10479 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10480 return true;
10483 return false;
10486 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10487 @GOTOFF in `reg'. */
10489 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
10490 rtx reg)
10492 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10493 return orig;
10495 if (GET_CODE (orig) == LABEL_REF
10496 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10498 if (reg == NULL_RTX)
10499 reg = gen_reg_rtx (Pmode);
10501 emit_insn (gen_symGOTOFF2reg (reg, orig));
10502 return reg;
10504 else if (GET_CODE (orig) == SYMBOL_REF)
10506 if (reg == NULL_RTX)
10507 reg = gen_reg_rtx (Pmode);
10509 emit_insn (gen_symGOT2reg (reg, orig));
10510 return reg;
10512 return orig;
10515 /* Given a (logical) mode size and an offset in bytes, try to find a the
10516 appropriate displacement value for a mov insn. On SH the displacements
10517 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10518 15 bytes in QImode. To compensate this we create a new base address by
10519 adding an adjustment value to it.
10521 If the originally requested offset is greater than 127 we prefer using
10522 values 124..127 over 128..131 to increase opportunities to use the
10523 add #imm, Rn insn.
10525 In some cases it is possible that a requested offset might seem unaligned
10526 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10527 This is compensated by adjusting the base address so that the effective
10528 address of the displacement move insn will be aligned.
10530 This is not the best possible way of rebasing the base address, as it
10531 does not look at other present displacement addressings around it.
10532 In some cases this can create more base address adjustments than would
10533 actually be necessary. */
10534 struct disp_adjust
10536 rtx offset_adjust;
10537 rtx mov_disp;
10540 static struct disp_adjust
10541 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
10543 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10545 /* Do not try to use SH2A's large displacements here, because this would
10546 effectively disable the small displacement insns. */
10547 const int mode_sz = GET_MODE_SIZE (mode);
10548 const int mov_insn_sz = mov_insn_size (mode, false);
10549 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10550 const int max_disp_next = max_disp + mov_insn_sz;
10551 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10552 HOST_WIDE_INT offset_adjust;
10554 /* In some cases this actually does happen and we must check for it. */
10555 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10556 return res;
10558 /* Keeps the previous behavior for QImode displacement addressing.
10559 This just decides how the offset is re-based. Removing this special
10560 case will result in slightly bigger code on average, but it's not that
10561 bad actually. */
10562 if (mov_insn_sz == 1)
10563 align_modifier = 0;
10565 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10567 if (mode_sz + offset - offset_adjust <= max_disp_next)
10569 res.offset_adjust = GEN_INT (offset_adjust);
10570 res.mov_disp = GEN_INT (offset - offset_adjust);
10573 return res;
10576 /* Try to modify an illegitimate address and make it legitimate.
10577 If we find one, return the new, valid address.
10578 Otherwise, return the original address. */
10579 static rtx
10580 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
10582 if (flag_pic)
10583 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10585 if (TARGET_SHMEDIA)
10586 return x;
10588 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10589 || (TARGET_SH2E && mode == SFmode))
10590 return x;
10592 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10593 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10595 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10596 INTVAL (XEXP (x, 1)));
10598 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10600 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10601 adj.offset_adjust, NULL_RTX, 0,
10602 OPTAB_LIB_WIDEN);
10603 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10606 return x;
10609 /* Attempt to replace *p, which is an address that needs reloading, with
10610 a valid memory address for an operand of mode MODE.
10611 Like for sh_legitimize_address, for the SH we try to get a normal form
10612 of the address. That will allow inheritance of the address reloads. */
10613 bool
10614 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10615 int itype)
10617 enum reload_type type = (enum reload_type) itype;
10618 const int mode_sz = GET_MODE_SIZE (mode);
10620 if (sh_lra_p ())
10621 return false;
10623 if (! ALLOW_INDEXED_ADDRESS
10624 && GET_CODE (*p) == PLUS
10625 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10627 *p = copy_rtx (*p);
10628 push_reload (*p, NULL_RTX, p, NULL,
10629 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10630 return true;
10633 if (! ALLOW_INDEXED_ADDRESS
10634 && GET_CODE (*p) == PLUS
10635 && GET_CODE (XEXP (*p, 0)) == PLUS)
10637 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10638 XEXP (XEXP (*p, 0), 1));
10639 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10640 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10641 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10642 return true;
10645 if (TARGET_SHMEDIA)
10646 return false;
10648 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10649 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10650 && (ALLOW_INDEXED_ADDRESS
10651 || XEXP (*p, 0) == stack_pointer_rtx
10652 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10654 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10655 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10657 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10659 push_reload (*p, NULL_RTX, p, NULL,
10660 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10661 return true;
10664 if (TARGET_SH2E && mode == SFmode)
10666 *p = copy_rtx (*p);
10667 push_reload (*p, NULL_RTX, p, NULL,
10668 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10669 return true;
10672 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10673 moves because then reload has a problem figuring the constraint
10674 that the move insn target/source reg must be R0.
10675 Or maybe some handling is wrong in sh_secondary_reload for this
10676 to work properly? */
10677 if ((mode_sz == 4 || mode_sz == 8)
10678 && ! (TARGET_SH4 && mode == DFmode)
10679 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10681 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10682 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10683 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10684 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10685 return true;
10689 /* We must re-recognize what we created before. */
10690 if (GET_CODE (*p) == PLUS
10691 && (mode_sz == 4 || mode_sz == 8)
10692 && GET_CODE (XEXP (*p, 0)) == PLUS
10693 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10694 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10695 && CONST_INT_P (XEXP (*p, 1))
10696 && ! (TARGET_SH2E && mode == SFmode))
10698 /* Because this address is so complex, we know it must have
10699 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10700 it is already unshared, and needs no further unsharing. */
10701 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10702 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10703 return true;
10706 return false;
10709 /* In the name of slightly smaller debug output, and to cater to
10710 general assembler lossage, recognize various UNSPEC sequences
10711 and turn them back into a direct symbol reference. */
10712 static rtx
10713 sh_delegitimize_address (rtx orig_x)
10715 rtx x, y;
10717 orig_x = delegitimize_mem_from_attrs (orig_x);
10719 x = orig_x;
10720 if (MEM_P (x))
10721 x = XEXP (x, 0);
10722 if (GET_CODE (x) == CONST)
10724 y = XEXP (x, 0);
10725 if (GET_CODE (y) == UNSPEC)
10727 if (XINT (y, 1) == UNSPEC_GOT
10728 || XINT (y, 1) == UNSPEC_GOTOFF
10729 || XINT (y, 1) == UNSPEC_SYMOFF)
10730 return XVECEXP (y, 0, 0);
10731 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10733 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10735 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10737 if (GET_CODE (symplt) == UNSPEC
10738 && XINT (symplt, 1) == UNSPEC_PLT)
10739 return XVECEXP (symplt, 0, 0);
10742 else if (TARGET_SHMEDIA
10743 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10744 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10746 rtx offset = XVECEXP (y, 0, 1);
10748 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10749 if (MEM_P (orig_x))
10750 x = replace_equiv_address_nv (orig_x, x);
10751 return x;
10756 return orig_x;
10759 /* Mark the use of a constant in the literal table. If the constant
10760 has multiple labels, make it unique. */
10761 static rtx
10762 mark_constant_pool_use (rtx x)
10764 rtx_insn *insn, *lab;
10765 rtx pattern;
10767 if (x == NULL_RTX)
10768 return x;
10770 switch (GET_CODE (x))
10772 case LABEL_REF:
10773 x = XEXP (x, 0);
10774 case CODE_LABEL:
10775 break;
10776 default:
10777 return x;
10780 /* Get the first label in the list of labels for the same constant
10781 and delete another labels in the list. */
10782 lab = as_a <rtx_insn *> (x);
10783 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10785 if (!LABEL_P (insn)
10786 || LABEL_REFS (insn) != NEXT_INSN (insn))
10787 break;
10788 lab = insn;
10791 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10792 as_a<rtx_insn *> (insn)->set_deleted ();
10794 /* Mark constants in a window. */
10795 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10797 if (!NONJUMP_INSN_P (insn))
10798 continue;
10800 pattern = PATTERN (insn);
10801 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10802 continue;
10804 switch (XINT (pattern, 1))
10806 case UNSPECV_CONST2:
10807 case UNSPECV_CONST4:
10808 case UNSPECV_CONST8:
10809 XVECEXP (pattern, 0, 1) = const1_rtx;
10810 break;
10811 case UNSPECV_WINDOW_END:
10812 if (XVECEXP (pattern, 0, 0) == x)
10813 return lab;
10814 break;
10815 case UNSPECV_CONST_END:
10816 return lab;
10817 default:
10818 break;
10822 return lab;
10825 /* Return true if it's possible to redirect BRANCH1 to the destination
10826 of an unconditional jump BRANCH2. We only want to do this if the
10827 resulting branch will have a short displacement. */
10828 static bool
10829 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
10831 if (flag_expensive_optimizations && simplejump_p (branch2))
10833 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10834 rtx_insn *insn;
10835 int distance;
10837 for (distance = 0, insn = NEXT_INSN (branch1);
10838 insn && distance < 256;
10839 insn = PREV_INSN (insn))
10841 if (insn == dest)
10842 return true;
10843 else
10844 distance += get_attr_length (insn);
10846 for (distance = 0, insn = NEXT_INSN (branch1);
10847 insn && distance < 256;
10848 insn = NEXT_INSN (insn))
10850 if (insn == dest)
10851 return true;
10852 else
10853 distance += get_attr_length (insn);
10856 return false;
10859 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10860 bool
10861 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10862 unsigned int new_reg)
10864 /* Interrupt functions can only use registers that have already been
10865 saved by the prologue, even if they would normally be
10866 call-clobbered. */
10867 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10868 return false;
10870 return true;
10873 /* Function to update the integer COST
10874 based on the relationship between INSN that is dependent on
10875 DEP_INSN through the dependence LINK. The default is to make no
10876 adjustment to COST. This can be used for example to specify to
10877 the scheduler that an output- or anti-dependence does not incur
10878 the same cost as a data-dependence. The return value should be
10879 the new value for COST. */
10880 static int
10881 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10882 rtx_insn *dep_insn, int cost)
10884 rtx reg, use_pat;
10886 if (TARGET_SHMEDIA)
10888 /* On SHmedia, if the dependence is an anti-dependence or
10889 output-dependence, there is no cost. */
10890 if (REG_NOTE_KIND (link) != 0)
10892 /* However, dependencies between target register loads and
10893 uses of the register in a subsequent block that are separated
10894 by a conditional branch are not modelled - we have to do with
10895 the anti-dependency between the target register load and the
10896 conditional branch that ends the current block. */
10897 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10898 && GET_CODE (PATTERN (dep_insn)) == SET
10899 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10900 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10901 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10903 int orig_cost = cost;
10904 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10905 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10906 ? insn : JUMP_LABEL (insn));
10907 /* On the likely path, the branch costs 1, on the unlikely path,
10908 it costs 3. */
10909 cost--;
10911 target = next_active_insn (target);
10912 while (target && ! flow_dependent_p (target, dep_insn)
10913 && --cost > 0);
10914 /* If two branches are executed in immediate succession, with the
10915 first branch properly predicted, this causes a stall at the
10916 second branch, hence we won't need the target for the
10917 second branch for two cycles after the launch of the first
10918 branch. */
10919 if (cost > orig_cost - 2)
10920 cost = orig_cost - 2;
10922 else
10923 cost = 0;
10926 else if (get_attr_is_mac_media (insn)
10927 && get_attr_is_mac_media (dep_insn))
10928 cost = 1;
10930 else if (! reload_completed
10931 && GET_CODE (PATTERN (insn)) == SET
10932 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10933 && GET_CODE (PATTERN (dep_insn)) == SET
10934 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10935 && cost < 4)
10936 cost = 4;
10937 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10938 that is needed at the target. */
10939 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10940 && ! flow_dependent_p (insn, dep_insn))
10941 cost--;
10943 else if (REG_NOTE_KIND (link) == 0)
10945 enum attr_type type;
10946 rtx dep_set;
10948 if (recog_memoized (insn) < 0
10949 || recog_memoized (dep_insn) < 0)
10950 return cost;
10952 dep_set = single_set (dep_insn);
10954 /* The latency that we specify in the scheduling description refers
10955 to the actual output, not to an auto-increment register; for that,
10956 the latency is one. */
10957 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10959 rtx set = single_set (insn);
10961 if (set
10962 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10963 && (!MEM_P (SET_DEST (set))
10964 || !reg_mentioned_p (SET_DEST (dep_set),
10965 XEXP (SET_DEST (set), 0))))
10966 cost = 1;
10968 /* The only input for a call that is timing-critical is the
10969 function's address. */
10970 if (CALL_P (insn))
10972 rtx call = get_call_rtx_from (insn);
10973 if (call
10974 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10975 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10976 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10977 cost -= TARGET_SH4_300 ? 3 : 6;
10979 /* Likewise, the most timing critical input for an sfuncs call
10980 is the function address. However, sfuncs typically start
10981 using their arguments pretty quickly.
10982 Assume a four cycle delay for SH4 before they are needed.
10983 Cached ST40-300 calls are quicker, so assume only a one
10984 cycle delay there.
10985 ??? Maybe we should encode the delays till input registers
10986 are needed by sfuncs into the sfunc call insn. */
10987 /* All sfunc calls are parallels with at least four components.
10988 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10989 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10990 && XVECLEN (PATTERN (insn), 0) >= 4
10991 && (reg = sfunc_uses_reg (insn)))
10993 if (! reg_set_p (reg, dep_insn))
10994 cost -= TARGET_SH4_300 ? 1 : 4;
10996 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10998 enum attr_type dep_type = get_attr_type (dep_insn);
11000 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
11001 cost--;
11002 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
11003 && (type = get_attr_type (insn)) != TYPE_CALL
11004 && type != TYPE_SFUNC)
11005 cost--;
11006 /* When the preceding instruction loads the shift amount of
11007 the following SHAD/SHLD, the latency of the load is increased
11008 by 1 cycle. */
11009 if (get_attr_type (insn) == TYPE_DYN_SHIFT
11010 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
11011 && reg_overlap_mentioned_p (SET_DEST (dep_set),
11012 XEXP (SET_SRC (single_set (insn)),
11013 1)))
11014 cost++;
11015 /* When an LS group instruction with a latency of less than
11016 3 cycles is followed by a double-precision floating-point
11017 instruction, FIPR, or FTRV, the latency of the first
11018 instruction is increased to 3 cycles. */
11019 else if (cost < 3
11020 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
11021 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
11022 cost = 3;
11023 /* The lsw register of a double-precision computation is ready one
11024 cycle earlier. */
11025 else if (reload_completed
11026 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
11027 && (use_pat = single_set (insn))
11028 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
11029 SET_SRC (use_pat)))
11030 cost -= 1;
11032 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
11033 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
11034 cost -= 1;
11036 else if (TARGET_SH4_300)
11038 /* Stores need their input register two cycles later. */
11039 if (dep_set && cost >= 1
11040 && ((type = get_attr_type (insn)) == TYPE_STORE
11041 || type == TYPE_PSTORE
11042 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
11044 rtx set = single_set (insn);
11046 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
11047 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
11049 cost -= 2;
11050 /* But don't reduce the cost below 1 if the address depends
11051 on a side effect of dep_insn. */
11052 if (cost < 1
11053 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
11054 cost = 1;
11059 /* An anti-dependence penalty of two applies if the first insn is a double
11060 precision fadd / fsub / fmul. */
11061 else if (!TARGET_SH4_300
11062 && REG_NOTE_KIND (link) == REG_DEP_ANTI
11063 && recog_memoized (dep_insn) >= 0
11064 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
11065 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
11066 /* A lot of alleged anti-flow dependences are fake,
11067 so check this one is real. */
11068 && flow_dependent_p (dep_insn, insn))
11069 cost = 2;
11071 return cost;
11074 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
11075 if DEP_INSN is anti-flow dependent on INSN. */
11076 static bool
11077 flow_dependent_p (rtx insn, rtx dep_insn)
11079 rtx tmp = PATTERN (insn);
11081 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
11082 return tmp == NULL_RTX;
11085 /* A helper function for flow_dependent_p called through note_stores. */
11086 static void
11087 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
11089 rtx * pinsn = (rtx *) data;
11091 if (*pinsn && reg_referenced_p (x, *pinsn))
11092 *pinsn = NULL_RTX;
11095 /* For use by sh_allocate_initial_value. Note that sh.md contains some
11096 'special function' patterns (type sfunc) that clobber pr, but that
11097 do not look like function calls to leaf_function_p. Hence we must
11098 do this extra check. */
11099 static int
11100 sh_pr_n_sets (void)
11102 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11105 /* Return where to allocate pseudo for a given hard register initial
11106 value. */
11107 static rtx
11108 sh_allocate_initial_value (rtx hard_reg)
11110 rtx x;
11112 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11114 if (crtl->is_leaf
11115 && ! sh_pr_n_sets ()
11116 && ! (TARGET_SHCOMPACT
11117 && ((crtl->args.info.call_cookie
11118 & ~ CALL_COOKIE_RET_TRAMP (1))
11119 || crtl->saves_all_registers)))
11120 x = hard_reg;
11121 else
11122 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11124 else
11125 x = NULL_RTX;
11127 return x;
11130 /* This function returns "2" to indicate dual issue for the SH4
11131 processor. To be used by the DFA pipeline description. */
11132 static int
11133 sh_issue_rate (void)
11135 if (TARGET_SUPERSCALAR)
11136 return 2;
11137 else
11138 return 1;
11141 /* Functions for ready queue reordering for sched1. */
11143 /* Get weight for mode for a set x. */
11144 static short
11145 find_set_regmode_weight (rtx x, machine_mode mode)
11147 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11148 return 1;
11149 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11151 if (REG_P (SET_DEST (x)))
11153 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11154 return 1;
11155 else
11156 return 0;
11158 return 1;
11160 return 0;
11163 /* Get regmode weight for insn. */
11164 static short
11165 find_insn_regmode_weight (rtx insn, machine_mode mode)
11167 short reg_weight = 0;
11168 rtx x;
11170 /* Increment weight for each register born here. */
11171 x = PATTERN (insn);
11172 reg_weight += find_set_regmode_weight (x, mode);
11173 if (GET_CODE (x) == PARALLEL)
11175 int j;
11176 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11178 x = XVECEXP (PATTERN (insn), 0, j);
11179 reg_weight += find_set_regmode_weight (x, mode);
11182 /* Decrement weight for each register that dies here. */
11183 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11185 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11187 rtx note = XEXP (x, 0);
11188 if (REG_P (note) && GET_MODE (note) == mode)
11189 reg_weight--;
11192 return reg_weight;
11195 /* Calculate regmode weights for all insns of a basic block. */
11196 static void
11197 find_regmode_weight (basic_block b, machine_mode mode)
11199 rtx_insn *insn, *next_tail, *head, *tail;
11201 get_ebb_head_tail (b, b, &head, &tail);
11202 next_tail = NEXT_INSN (tail);
11204 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11206 /* Handle register life information. */
11207 if (!INSN_P (insn))
11208 continue;
11210 if (mode == SFmode)
11211 INSN_REGMODE_WEIGHT (insn, mode) =
11212 find_insn_regmode_weight (insn, mode)
11213 + 2 * find_insn_regmode_weight (insn, DFmode);
11214 else if (mode == SImode)
11215 INSN_REGMODE_WEIGHT (insn, mode) =
11216 find_insn_regmode_weight (insn, mode)
11217 + 2 * find_insn_regmode_weight (insn, DImode);
11221 /* Comparison function for ready queue sorting. */
11222 static int
11223 rank_for_reorder (const void *x, const void *y)
11225 rtx_insn *tmp = *(rtx_insn * const *) y;
11226 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11228 /* The insn in a schedule group should be issued the first. */
11229 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11230 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11232 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11233 minimizes instruction movement, thus minimizing sched's effect on
11234 register pressure. */
11235 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11238 /* Resort the array A in which only element at index N may be out of order. */
11239 static void
11240 swap_reorder (rtx_insn **a, int n)
11242 rtx_insn *insn = a[n - 1];
11243 int i = n - 2;
11245 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11247 a[i + 1] = a[i];
11248 i -= 1;
11250 a[i + 1] = insn;
11253 /* Sort the ready list by ascending priority. */
11254 static void
11255 ready_reorder (rtx_insn **ready, int nready)
11257 if (nready == 2)
11258 swap_reorder (ready, nready);
11259 else if (nready > 2)
11260 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11263 /* Count life regions of r0 for a block. */
11264 static int
11265 find_r0_life_regions (basic_block b)
11267 rtx_insn *end, *insn;
11268 rtx pset;
11269 rtx r0_reg;
11270 int live;
11271 int set;
11272 int death = 0;
11274 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11276 set = 1;
11277 live = 1;
11279 else
11281 set = 0;
11282 live = 0;
11285 insn = BB_HEAD (b);
11286 end = BB_END (b);
11287 r0_reg = gen_rtx_REG (SImode, R0_REG);
11288 while (1)
11290 if (INSN_P (insn))
11292 if (find_regno_note (insn, REG_DEAD, R0_REG))
11294 death++;
11295 live = 0;
11297 if (!live
11298 && (pset = single_set (insn))
11299 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11300 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11302 set++;
11303 live = 1;
11306 if (insn == end)
11307 break;
11308 insn = NEXT_INSN (insn);
11310 return set - death;
11313 /* Calculate regmode weights for all insns of all basic block. */
11314 static void
11315 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11316 int verbose ATTRIBUTE_UNUSED,
11317 int old_max_uid)
11319 basic_block b;
11321 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11322 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11323 r0_life_regions = 0;
11325 FOR_EACH_BB_REVERSE_FN (b, cfun)
11327 find_regmode_weight (b, SImode);
11328 find_regmode_weight (b, SFmode);
11329 if (!reload_completed)
11330 r0_life_regions += find_r0_life_regions (b);
11333 CURR_REGMODE_PRESSURE (SImode) = 0;
11334 CURR_REGMODE_PRESSURE (SFmode) = 0;
11337 /* Cleanup. */
11338 static void
11339 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11340 int verbose ATTRIBUTE_UNUSED)
11342 if (regmode_weight[0])
11344 free (regmode_weight[0]);
11345 regmode_weight[0] = NULL;
11347 if (regmode_weight[1])
11349 free (regmode_weight[1]);
11350 regmode_weight[1] = NULL;
11354 /* The scalar modes supported differs from the default version in TImode
11355 for 32-bit SHMEDIA. */
11356 static bool
11357 sh_scalar_mode_supported_p (machine_mode mode)
11359 if (TARGET_SHMEDIA32 && mode == TImode)
11360 return false;
11362 return default_scalar_mode_supported_p (mode);
11365 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11366 keep count of register pressures on SImode and SFmode. */
11367 static int
11368 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11369 int sched_verbose ATTRIBUTE_UNUSED,
11370 rtx_insn *insn,
11371 int can_issue_more)
11373 if (GET_CODE (PATTERN (insn)) != USE
11374 && GET_CODE (PATTERN (insn)) != CLOBBER)
11375 cached_can_issue_more = can_issue_more - 1;
11376 else
11377 cached_can_issue_more = can_issue_more;
11379 if (reload_completed)
11380 return cached_can_issue_more;
11382 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11383 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11385 return cached_can_issue_more;
11388 static void
11389 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11390 int verbose ATTRIBUTE_UNUSED,
11391 int veclen ATTRIBUTE_UNUSED)
11393 CURR_REGMODE_PRESSURE (SImode) = 0;
11394 CURR_REGMODE_PRESSURE (SFmode) = 0;
11397 /* Some magic numbers. */
11398 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11399 functions that already have high pressure on r0. */
11400 #define R0_MAX_LIFE_REGIONS 2
11401 /* Register Pressure thresholds for SImode and SFmode registers. */
11402 #define SIMODE_MAX_WEIGHT 5
11403 #define SFMODE_MAX_WEIGHT 10
11405 /* Return true if the pressure is high for MODE. */
11406 static bool
11407 high_pressure (machine_mode mode)
11409 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11410 functions that already have high pressure on r0. */
11411 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11412 return true;
11414 if (mode == SFmode)
11415 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11416 else
11417 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11420 /* Reorder ready queue if register pressure is high. */
11421 static int
11422 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11423 int sched_verbose ATTRIBUTE_UNUSED,
11424 rtx_insn **ready,
11425 int *n_readyp,
11426 int clock_var ATTRIBUTE_UNUSED)
11428 if (reload_completed)
11429 return sh_issue_rate ();
11431 if (high_pressure (SFmode) || high_pressure (SImode))
11433 ready_reorder (ready, *n_readyp);
11436 return sh_issue_rate ();
11439 /* Skip cycles if the current register pressure is high. */
11440 static int
11441 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11442 int sched_verbose ATTRIBUTE_UNUSED,
11443 rtx_insn **ready ATTRIBUTE_UNUSED,
11444 int *n_readyp ATTRIBUTE_UNUSED,
11445 int clock_var ATTRIBUTE_UNUSED)
11447 if (reload_completed)
11448 return cached_can_issue_more;
11450 if (high_pressure(SFmode) || high_pressure (SImode))
11451 skip_cycles = 1;
11453 return cached_can_issue_more;
11456 /* Skip cycles without sorting the ready queue. This will move insn from
11457 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11458 queue by sh_reorder. */
11460 /* Generally, skipping these many cycles are sufficient for all insns to move
11461 from Q -> R. */
11462 #define MAX_SKIPS 8
11464 static int
11465 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11466 int sched_verbose ATTRIBUTE_UNUSED,
11467 rtx_insn *insn ATTRIBUTE_UNUSED,
11468 int last_clock_var,
11469 int clock_var,
11470 int *sort_p)
11472 if (reload_completed)
11473 return 0;
11475 if (skip_cycles)
11477 if ((clock_var - last_clock_var) < MAX_SKIPS)
11479 *sort_p = 0;
11480 return 1;
11482 /* If this is the last cycle we are skipping, allow reordering of R. */
11483 if ((clock_var - last_clock_var) == MAX_SKIPS)
11485 *sort_p = 1;
11486 return 1;
11490 skip_cycles = 0;
11492 return 0;
11495 /* SHmedia requires registers for branches, so we can't generate new
11496 branches past reload. */
11497 static bool
11498 sh_cannot_modify_jumps_p (void)
11500 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11503 static reg_class_t
11504 sh_target_reg_class (void)
11506 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11509 static bool
11510 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11512 if (! shmedia_space_reserved_for_target_registers)
11513 return 0;
11514 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11515 return 0;
11517 HARD_REG_SET dummy;
11518 if (calc_live_regs (&dummy) >= 6 * 8)
11519 return 1;
11520 return 0;
11523 static bool
11524 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11526 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11530 On the SH1..SH4, the trampoline looks like
11531 2 0002 D202 mov.l l2,r2
11532 1 0000 D301 mov.l l1,r3
11533 3 0004 422B jmp @r2
11534 4 0006 0009 nop
11535 5 0008 00000000 l1: .long area
11536 6 000c 00000000 l2: .long function
11538 SH5 (compact) uses r1 instead of r3 for the static chain. */
11541 /* Emit RTL insns to initialize the variable parts of a trampoline.
11542 FNADDR is an RTX for the address of the function's pure code.
11543 CXT is an RTX for the static chain value for the function. */
11544 static void
11545 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11547 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11548 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11550 if (TARGET_SHMEDIA64)
11552 rtx tramp_templ;
11553 int fixed_len;
11555 rtx movi1 = GEN_INT (0xcc000010);
11556 rtx shori1 = GEN_INT (0xc8000010);
11557 rtx src, dst;
11559 /* The following trampoline works within a +- 128 KB range for cxt:
11560 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11561 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11562 gettr tr1,r1; blink tr0,r63 */
11563 /* Address rounding makes it hard to compute the exact bounds of the
11564 offset for this trampoline, but we have a rather generous offset
11565 range, so frame_offset should do fine as an upper bound. */
11566 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11568 /* ??? could optimize this trampoline initialization
11569 by writing DImode words with two insns each. */
11570 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11571 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11572 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11573 insn = gen_rtx_AND (DImode, insn, mask);
11574 /* Or in ptb/u .,tr1 pattern */
11575 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11576 insn = force_operand (insn, NULL_RTX);
11577 insn = gen_lowpart (SImode, insn);
11578 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11579 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11580 insn = gen_rtx_AND (DImode, insn, mask);
11581 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11582 insn = gen_lowpart (SImode, insn);
11583 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11584 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11585 insn = gen_rtx_AND (DImode, insn, mask);
11586 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11587 insn = gen_lowpart (SImode, insn);
11588 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11589 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11590 insn = gen_rtx_AND (DImode, insn, mask);
11591 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11592 insn = gen_lowpart (SImode, insn);
11593 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11594 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11595 insn = gen_rtx_AND (DImode, insn, mask);
11596 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11597 insn = gen_lowpart (SImode, insn);
11598 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11599 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11600 GEN_INT (0x6bf10600));
11601 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11602 GEN_INT (0x4415fc10));
11603 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11604 GEN_INT (0x4401fff0));
11605 emit_insn (gen_ic_invalidate_line (tramp));
11606 return;
11608 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11609 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11611 tramp_templ = gen_datalabel_ref (tramp_templ);
11612 dst = tramp_mem;
11613 src = gen_const_mem (BLKmode, tramp_templ);
11614 set_mem_align (dst, 256);
11615 set_mem_align (src, 64);
11616 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11618 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11619 emit_move_insn (adjust_address (tramp_mem, Pmode,
11620 fixed_len + GET_MODE_SIZE (Pmode)),
11621 cxt);
11622 emit_insn (gen_ic_invalidate_line (tramp));
11623 return;
11625 else if (TARGET_SHMEDIA)
11627 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11628 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11629 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11630 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11631 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11632 rotated 10 right, and higher 16 bit of every 32 selected. */
11633 rtx movishori
11634 = force_reg (V2HImode, (simplify_gen_subreg
11635 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11636 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11637 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11639 fnaddr = force_reg (SImode, fnaddr);
11640 cxt = force_reg (SImode, cxt);
11641 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11642 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11643 movishori));
11644 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11645 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11646 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11647 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11648 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11649 gen_rtx_SUBREG (V2HImode, cxt, 0),
11650 movishori));
11651 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11652 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11653 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11654 if (TARGET_LITTLE_ENDIAN)
11656 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11657 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11659 else
11661 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11662 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11664 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11665 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11666 emit_insn (gen_ic_invalidate_line (tramp));
11667 return;
11669 else if (TARGET_SHCOMPACT)
11671 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11672 return;
11674 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11675 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11676 SImode));
11677 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11678 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11679 SImode));
11680 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11681 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11682 if (TARGET_HARD_SH4 || TARGET_SH5)
11684 if (!TARGET_INLINE_IC_INVALIDATE
11685 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
11686 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11687 FUNCTION_ORDINARY),
11688 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11689 else
11690 emit_insn (gen_ic_invalidate_line (tramp));
11694 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11695 static rtx
11696 sh_trampoline_adjust_address (rtx tramp)
11698 if (TARGET_SHMEDIA)
11699 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11700 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11701 return tramp;
11704 /* FIXME: This is overly conservative. A SHcompact function that
11705 receives arguments ``by reference'' will have them stored in its
11706 own stack frame, so it must not pass pointers or references to
11707 these arguments to other functions by means of sibling calls. */
11708 /* If PIC, we cannot make sibling calls to global functions
11709 because the PLT requires r12 to be live. */
11710 static bool
11711 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11713 return (1
11714 && (! TARGET_SHCOMPACT
11715 || crtl->args.info.stack_regs == 0)
11716 && ! sh_cfun_interrupt_handler_p ()
11717 && (! flag_pic
11718 || (decl && ! TREE_PUBLIC (decl))
11719 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11722 /* Machine specific built-in functions. */
11724 struct builtin_description
11726 bool (* const is_enabled) (void);
11727 const enum insn_code icode;
11728 const char *const name;
11729 int signature;
11730 tree fndecl;
11733 static bool
11734 shmedia_builtin_p (void)
11736 return TARGET_SHMEDIA;
11739 /* This function can be used if there are any built-ins that are not for
11740 SHmedia. It's commented out to avoid the defined-but-unused warning. */
11741 static bool
11742 sh1_builtin_p (void)
11744 return TARGET_SH1;
11747 /* describe number and signedness of arguments; arg[0] == result
11748 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11749 /* 9: 64-bit pointer, 10: 32-bit pointer */
11750 static const char signature_args[][4] =
11752 #define SH_BLTIN_V2SI2 0
11753 { 4, 4 },
11754 #define SH_BLTIN_V4HI2 1
11755 { 4, 4 },
11756 #define SH_BLTIN_V2SI3 2
11757 { 4, 4, 4 },
11758 #define SH_BLTIN_V4HI3 3
11759 { 4, 4, 4 },
11760 #define SH_BLTIN_V8QI3 4
11761 { 4, 4, 4 },
11762 #define SH_BLTIN_MAC_HISI 5
11763 { 1, 4, 4, 1 },
11764 #define SH_BLTIN_SH_HI 6
11765 { 4, 4, 1 },
11766 #define SH_BLTIN_SH_SI 7
11767 { 4, 4, 1 },
11768 #define SH_BLTIN_V4HI2V2SI 8
11769 { 4, 4, 4 },
11770 #define SH_BLTIN_V4HI2V8QI 9
11771 { 4, 4, 4 },
11772 #define SH_BLTIN_SISF 10
11773 { 4, 2 },
11774 #define SH_BLTIN_LDUA_L 11
11775 { 2, 10 },
11776 #define SH_BLTIN_LDUA_Q 12
11777 { 1, 10 },
11778 #define SH_BLTIN_STUA_L 13
11779 { 0, 10, 2 },
11780 #define SH_BLTIN_STUA_Q 14
11781 { 0, 10, 1 },
11782 #define SH_BLTIN_LDUA_L64 15
11783 { 2, 9 },
11784 #define SH_BLTIN_LDUA_Q64 16
11785 { 1, 9 },
11786 #define SH_BLTIN_STUA_L64 17
11787 { 0, 9, 2 },
11788 #define SH_BLTIN_STUA_Q64 18
11789 { 0, 9, 1 },
11790 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11791 #define SH_BLTIN_2 19
11792 #define SH_BLTIN_SU 19
11793 { 1, 2 },
11794 #define SH_BLTIN_3 20
11795 #define SH_BLTIN_SUS 20
11796 { 2, 2, 1 },
11797 #define SH_BLTIN_PSSV 21
11798 { 0, 8, 2, 2 },
11799 #define SH_BLTIN_XXUU 22
11800 #define SH_BLTIN_UUUU 22
11801 { 1, 1, 1, 1 },
11802 #define SH_BLTIN_PV 23
11803 { 0, 8 },
11804 #define SH_BLTIN_VP 24
11805 { 8, 0 },
11806 #define SH_BLTIN_UV 25
11807 { 1, 0 },
11808 #define SH_BLTIN_VU 26
11809 { 0, 1 },
11811 /* mcmv: operands considered unsigned. */
11812 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11813 /* mperm: control value considered unsigned int. */
11814 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11815 /* mshards_q: returns signed short. */
11816 /* nsb: takes long long arg, returns unsigned char. */
11817 static struct builtin_description bdesc[] =
11819 { shmedia_builtin_p,
11820 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11821 { shmedia_builtin_p,
11822 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11823 { shmedia_builtin_p,
11824 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11825 { shmedia_builtin_p,
11826 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11827 { shmedia_builtin_p,
11828 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11829 { shmedia_builtin_p,
11830 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11831 { shmedia_builtin_p,
11832 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11833 { shmedia_builtin_p,
11834 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11835 { shmedia_builtin_p,
11836 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11837 { shmedia_builtin_p,
11838 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11839 { shmedia_builtin_p,
11840 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11841 { shmedia_builtin_p,
11842 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11843 { shmedia_builtin_p,
11844 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11845 { shmedia_builtin_p,
11846 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11847 { shmedia_builtin_p,
11848 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11849 { shmedia_builtin_p,
11850 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11851 { shmedia_builtin_p,
11852 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11853 { shmedia_builtin_p,
11854 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11855 { shmedia_builtin_p,
11856 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11857 { shmedia_builtin_p,
11858 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11859 { shmedia_builtin_p,
11860 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11861 { shmedia_builtin_p,
11862 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11863 { shmedia_builtin_p,
11864 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11865 { shmedia_builtin_p,
11866 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11867 { shmedia_builtin_p,
11868 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11869 { shmedia_builtin_p,
11870 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11871 { shmedia_builtin_p,
11872 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11873 { shmedia_builtin_p,
11874 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11875 { shmedia_builtin_p,
11876 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11877 { shmedia_builtin_p,
11878 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11879 { shmedia_builtin_p,
11880 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11881 { shmedia_builtin_p,
11882 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11883 { shmedia_builtin_p,
11884 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11885 { shmedia_builtin_p,
11886 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11887 { shmedia_builtin_p,
11888 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11889 { shmedia_builtin_p,
11890 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11891 { shmedia_builtin_p,
11892 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11893 { shmedia_builtin_p,
11894 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11895 { shmedia_builtin_p,
11896 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11897 { shmedia_builtin_p,
11898 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11899 { shmedia_builtin_p,
11900 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11901 { shmedia_builtin_p,
11902 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11903 { shmedia_builtin_p,
11904 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11905 { shmedia_builtin_p,
11906 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11907 { shmedia_builtin_p,
11908 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11909 { shmedia_builtin_p,
11910 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11911 { shmedia_builtin_p,
11912 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11913 { shmedia_builtin_p,
11914 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11915 { shmedia_builtin_p,
11916 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11917 { shmedia_builtin_p,
11918 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11919 { shmedia_builtin_p,
11920 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11921 { shmedia_builtin_p,
11922 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11923 { shmedia_builtin_p,
11924 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11925 { shmedia_builtin_p,
11926 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11927 { shmedia_builtin_p,
11928 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11929 { shmedia_builtin_p,
11930 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11931 { shmedia_builtin_p,
11932 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11933 { shmedia_builtin_p,
11934 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11935 { shmedia_builtin_p,
11936 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11937 { shmedia_builtin_p,
11938 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11939 { shmedia_builtin_p,
11940 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11941 { shmedia_builtin_p,
11942 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11943 { shmedia_builtin_p,
11944 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11945 { shmedia_builtin_p,
11946 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11947 { shmedia_builtin_p,
11948 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11949 { shmedia_builtin_p,
11950 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11951 { shmedia_builtin_p,
11952 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11953 { shmedia_builtin_p,
11954 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11955 { shmedia_builtin_p,
11956 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11957 { shmedia_builtin_p,
11958 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11959 { shmedia_builtin_p,
11960 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11961 { shmedia_builtin_p,
11962 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11963 { shmedia_builtin_p,
11964 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11965 { shmedia_builtin_p,
11966 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11967 { shmedia_builtin_p,
11968 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11969 { shmedia_builtin_p,
11970 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11971 { shmedia_builtin_p,
11972 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11973 { shmedia_builtin_p,
11974 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11975 { shmedia_builtin_p,
11976 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11977 { shmedia_builtin_p,
11978 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11979 { shmedia_builtin_p,
11980 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11981 { shmedia_builtin_p,
11982 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11983 { shmedia_builtin_p,
11984 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11986 { sh1_builtin_p,
11987 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
11988 { sh1_builtin_p,
11989 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
11992 static tree sh_builtin_get_fpscr;
11993 static tree sh_builtin_set_fpscr;
11995 static void
11996 sh_init_builtins (void)
11998 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11999 memset (shared, 0, sizeof shared);
12001 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
12003 builtin_description* d = &bdesc[di];
12005 if (!d->is_enabled ())
12006 continue;
12008 tree type, arg_type = NULL_TREE;
12009 int signature = d->signature;
12011 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
12012 type = shared[signature];
12013 else
12015 int has_result = signature_args[signature][0] != 0;
12016 tree args[3];
12018 if ((signature_args[signature][1] & 8)
12019 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
12020 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
12021 continue;
12022 if (! TARGET_FPU_ANY
12023 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
12024 continue;
12025 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
12026 args[i] = NULL_TREE;
12027 for (int i = 3; ; i--)
12029 int arg = signature_args[signature][i];
12030 int opno = i - 1 + has_result;
12032 if (arg & 8)
12033 arg_type = ptr_type_node;
12034 else if (arg)
12035 arg_type = (*lang_hooks.types.type_for_mode)
12036 (insn_data[d->icode].operand[opno].mode, (arg & 1));
12037 else if (i)
12038 continue;
12039 else
12040 arg_type = void_type_node;
12041 if (i == 0)
12042 break;
12043 args[i-1] = arg_type;
12045 type = build_function_type_list (arg_type, args[0], args[1],
12046 args[2], NULL_TREE);
12047 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
12048 shared[signature] = type;
12050 d->fndecl =
12051 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
12052 NULL, NULL_TREE);
12053 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
12054 if (d->icode == CODE_FOR_sts_fpscr)
12055 sh_builtin_get_fpscr = d->fndecl;
12056 else if (d->icode == CODE_FOR_set_fpscr)
12057 sh_builtin_set_fpscr = d->fndecl;
12061 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
12063 static void
12064 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12066 const unsigned SH_FE_INVALID = 64;
12067 const unsigned SH_FE_DIVBYZERO = 32;
12068 const unsigned SH_FE_OVERFLOW = 16;
12069 const unsigned SH_FE_UNDERFLOW = 8;
12070 const unsigned SH_FE_INEXACT = 4;
12071 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
12072 | SH_FE_DIVBYZERO
12073 | SH_FE_OVERFLOW
12074 | SH_FE_UNDERFLOW
12075 | SH_FE_INEXACT);
12076 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
12077 tree fenv_var, mask, ld_fenv, masked_fenv;
12078 tree new_fenv_var, reload_fenv, restore_fnenv;
12079 tree update_call, atomic_feraiseexcept, hold_fnclex;
12081 if (! TARGET_FPU_ANY)
12082 return;
12084 /* Generate the equivalent of :
12085 unsigned int fenv_var;
12086 fenv_var = __builtin_sh_get_fpscr ();
12088 unsigned int masked_fenv;
12089 masked_fenv = fenv_var & mask;
12091 __builtin_sh_set_fpscr (masked_fenv); */
12093 fenv_var = create_tmp_var (unsigned_type_node);
12094 mask = build_int_cst (unsigned_type_node,
12095 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
12096 | SH_FE_ALL_EXCEPT));
12097 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
12098 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
12099 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
12100 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12101 *hold = build2 (COMPOUND_EXPR, void_type_node,
12102 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
12103 hold_fnclex);
12105 /* Store the value of masked_fenv to clear the exceptions:
12106 __builtin_sh_set_fpscr (masked_fenv); */
12108 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12110 /* Generate the equivalent of :
12111 unsigned int new_fenv_var;
12112 new_fenv_var = __builtin_sh_get_fpscr ();
12114 __builtin_sh_set_fpscr (fenv_var);
12116 __atomic_feraiseexcept (new_fenv_var); */
12118 new_fenv_var = create_tmp_var (unsigned_type_node);
12119 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
12120 build_call_expr (sh_builtin_get_fpscr, 0));
12121 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
12122 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12123 update_call = build_call_expr (atomic_feraiseexcept, 1,
12124 fold_convert (integer_type_node,
12125 new_fenv_var));
12126 *update = build2 (COMPOUND_EXPR, void_type_node,
12127 build2 (COMPOUND_EXPR, void_type_node,
12128 reload_fenv, restore_fnenv), update_call);
12131 /* Implements target hook vector_mode_supported_p. */
12132 bool
12133 sh_vector_mode_supported_p (machine_mode mode)
12135 if (TARGET_FPU_ANY
12136 && ((mode == V2SFmode)
12137 || (mode == V4SFmode)
12138 || (mode == V16SFmode)))
12139 return true;
12141 else if (TARGET_SHMEDIA
12142 && ((mode == V8QImode)
12143 || (mode == V2HImode)
12144 || (mode == V4HImode)
12145 || (mode == V2SImode)))
12146 return true;
12148 return false;
12151 bool
12152 sh_frame_pointer_required (void)
12154 /* If needed override this in other tm.h files to cope with various OS
12155 lossage requiring a frame pointer. */
12156 if (SUBTARGET_FRAME_POINTER_REQUIRED)
12157 return true;
12159 if (crtl->profile)
12160 return true;
12162 return false;
12165 /* Implements target hook dwarf_calling_convention. Return an enum
12166 of dwarf_calling_convention. */
12168 sh_dwarf_calling_convention (const_tree func)
12170 if (sh_attr_renesas_p (func))
12171 return DW_CC_GNU_renesas_sh;
12173 return DW_CC_normal;
12176 /* Returns the sh builtin decl for CODE. */
12177 static tree
12178 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
12180 if (code >= ARRAY_SIZE (bdesc))
12181 return error_mark_node;
12183 if (!bdesc[code].is_enabled ())
12184 return error_mark_node;
12186 return bdesc[code].fndecl;
12189 /* Expand an expression EXP that calls a built-in function,
12190 with result going to TARGET if that's convenient
12191 (and in mode MODE if that's convenient).
12192 SUBTARGET may be used as the target for computing one of EXP's operands.
12193 IGNORE is nonzero if the value is to be ignored. */
12194 static rtx
12195 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12196 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12198 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12199 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12200 const struct builtin_description *d = &bdesc[fcode];
12201 enum insn_code icode = d->icode;
12202 int signature = d->signature;
12203 int nop = 0;
12204 rtx op[4];
12206 if (signature_args[signature][0])
12208 if (ignore)
12209 return NULL_RTX;
12211 machine_mode tmode = insn_data[icode].operand[0].mode;
12212 if (! target || GET_MODE (target) != tmode
12213 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12214 target = gen_reg_rtx (tmode);
12215 op[nop++] = target;
12217 else
12218 target = NULL_RTX;
12220 for (int i = 1; i <= 3; i++, nop++)
12222 tree arg;
12223 machine_mode opmode, argmode;
12224 tree optype;
12226 if (! signature_args[signature][i])
12227 break;
12228 arg = CALL_EXPR_ARG (exp, i - 1);
12229 if (arg == error_mark_node)
12230 return const0_rtx;
12231 if (signature_args[signature][i] & 8)
12233 opmode = ptr_mode;
12234 optype = ptr_type_node;
12236 else
12238 opmode = insn_data[icode].operand[nop].mode;
12239 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12241 argmode = TYPE_MODE (TREE_TYPE (arg));
12242 if (argmode != opmode)
12243 arg = build1 (NOP_EXPR, optype, arg);
12244 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12245 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12246 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12249 rtx pat = NULL_RTX;
12251 switch (nop)
12253 case 1:
12254 pat = (*insn_data[d->icode].genfun) (op[0]);
12255 break;
12256 case 2:
12257 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12258 break;
12259 case 3:
12260 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12261 break;
12262 case 4:
12263 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12264 break;
12265 default:
12266 gcc_unreachable ();
12268 if (! pat)
12269 return NULL_RTX;
12270 emit_insn (pat);
12271 return target;
12274 void
12275 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12277 rtx sel0 = const0_rtx;
12278 rtx sel1 = const1_rtx;
12279 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12280 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12282 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12283 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12286 void
12287 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12289 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12291 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12292 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12295 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12296 We can allow any mode in any general register. The special registers
12297 only allow SImode. Don't allow any mode in the PR.
12299 We cannot hold DCmode values in the XD registers because alter_reg
12300 handles subregs of them incorrectly. We could work around this by
12301 spacing the XD registers like the DR registers, but this would require
12302 additional memory in every compilation to hold larger register vectors.
12303 We could hold SFmode / SCmode values in XD registers, but that
12304 would require a tertiary reload when reloading from / to memory,
12305 and a secondary reload to reload from / to general regs; that
12306 seems to be a losing proposition.
12308 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12309 it won't be ferried through GP registers first. */
12310 bool
12311 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
12313 if (SPECIAL_REGISTER_P (regno))
12314 return mode == SImode;
12316 if (regno == FPUL_REG)
12317 return (mode == SImode || mode == SFmode);
12319 if (FP_REGISTER_P (regno) && mode == SFmode)
12320 return true;
12322 if (mode == V2SFmode)
12324 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12325 || GENERAL_REGISTER_P (regno)))
12326 return true;
12327 else
12328 return false;
12331 if (mode == V4SFmode)
12333 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12334 || GENERAL_REGISTER_P (regno))
12335 return true;
12336 else
12337 return false;
12340 if (mode == V16SFmode)
12342 if (TARGET_SHMEDIA)
12344 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12345 return true;
12346 else
12347 return false;
12349 else
12350 return regno == FIRST_XD_REG;
12353 if (FP_REGISTER_P (regno))
12355 if (mode == SFmode
12356 || mode == SImode
12357 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12358 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12359 || mode == DCmode
12360 || (TARGET_SHMEDIA
12361 && (mode == DFmode || mode == DImode
12362 || mode == V2SFmode || mode == TImode)))
12363 && ((regno - FIRST_FP_REG) & 1) == 0)
12364 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12365 && ((regno - FIRST_FP_REG) & 3) == 0))
12366 return true;
12367 else
12368 return false;
12371 if (XD_REGISTER_P (regno))
12372 return mode == DFmode;
12374 if (TARGET_REGISTER_P (regno))
12375 return (mode == DImode || mode == SImode || mode == PDImode);
12377 if (regno == PR_REG)
12378 return mode == SImode;
12380 if (regno == FPSCR_REG)
12381 return mode == SImode;
12383 /* FIXME. This works around PR target/37633 for -O0. */
12384 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12386 unsigned int n = GET_MODE_SIZE (mode) / 8;
12388 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12389 && regno <= FIRST_GENERAL_REG + 14)
12390 return false;
12393 return true;
12396 /* Specify the modes required to caller save a given hard regno.
12397 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
12398 and returns ?Imode for float regs when sh_hard_regno_mode_ok
12399 permits integer modes on them. That makes LRA's split process
12400 unhappy. See PR55212.
12402 machine_mode
12403 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
12404 machine_mode mode)
12406 if (FP_REGISTER_P (regno)
12407 && (mode == SFmode
12408 || mode == SCmode
12409 || ((mode == DFmode || mode == DCmode)
12410 && ((regno - FIRST_FP_REG) & 1) == 0)))
12411 return mode;
12413 return choose_hard_reg_mode (regno, nregs, false);
12416 /* Return the class of registers for which a mode change from FROM to TO
12417 is invalid. */
12418 bool
12419 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
12420 enum reg_class rclass)
12422 /* We want to enable the use of SUBREGs as a means to
12423 VEC_SELECT a single element of a vector. */
12425 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12426 This can be problematic when SFmode vector subregs need to be accessed
12427 on the stack with displacement addressing, as it happens with -O0.
12428 Thus we disallow the mode change for -O0. */
12429 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12430 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12432 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12434 if (TARGET_LITTLE_ENDIAN)
12436 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12437 return reg_classes_intersect_p (DF_REGS, rclass);
12439 else
12441 if (GET_MODE_SIZE (from) < 8)
12442 return reg_classes_intersect_p (DF_REGS, rclass);
12445 return false;
12448 /* Return true if registers in machine mode MODE will likely be
12449 allocated to registers in small register classes. */
12450 bool
12451 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
12453 return (! TARGET_SHMEDIA);
12456 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12457 that label is used. */
12458 void
12459 sh_mark_label (rtx address, int nuses)
12461 if (GOTOFF_P (address))
12463 /* Extract the label or symbol. */
12464 address = XEXP (address, 0);
12465 if (GET_CODE (address) == PLUS)
12466 address = XEXP (address, 0);
12467 address = XVECEXP (address, 0, 0);
12469 if (GET_CODE (address) == LABEL_REF
12470 && LABEL_P (XEXP (address, 0)))
12471 LABEL_NUSES (XEXP (address, 0)) += nuses;
12474 /* Compute extra cost of moving data between one register class
12475 and another.
12477 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12478 uses this information. Hence, the general register <-> floating point
12479 register information here is not used for SFmode. */
12480 static int
12481 sh_register_move_cost (machine_mode mode,
12482 reg_class_t srcclass, reg_class_t dstclass)
12484 if (dstclass == T_REGS || dstclass == PR_REGS)
12485 return 10;
12487 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12488 return 4;
12490 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12491 && REGCLASS_HAS_FP_REG (srcclass)
12492 && REGCLASS_HAS_FP_REG (dstclass))
12493 return 4;
12495 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12496 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12498 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12499 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12500 return 9;
12502 if ((REGCLASS_HAS_FP_REG (dstclass)
12503 && REGCLASS_HAS_GENERAL_REG (srcclass))
12504 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12505 && REGCLASS_HAS_FP_REG (srcclass)))
12507 /* Discourage trying to use fp regs for a pointer. This also
12508 discourages fp regs with SImode because Pmode is an alias
12509 of SImode on this target. See PR target/48596. */
12510 int addend = (mode == Pmode) ? 40 : 0;
12512 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12513 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12516 if ((dstclass == FPUL_REGS
12517 && REGCLASS_HAS_GENERAL_REG (srcclass))
12518 || (srcclass == FPUL_REGS
12519 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12520 return 5;
12522 if ((dstclass == FPUL_REGS
12523 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12524 || (srcclass == FPUL_REGS
12525 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12526 return 7;
12528 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12529 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12530 return 20;
12532 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12533 if (TARGET_SHMEDIA
12534 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12536 if (sh_gettrcost >= 0)
12537 return sh_gettrcost;
12538 else if (!TARGET_PT_FIXED)
12539 return 100;
12542 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12543 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12544 return 4;
12546 if (TARGET_SHMEDIA
12547 || (TARGET_FMOVD
12548 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12549 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12550 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12552 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12555 static rtx
12556 emit_load_ptr (rtx reg, rtx addr)
12558 rtx mem = gen_const_mem (ptr_mode, addr);
12560 if (Pmode != ptr_mode)
12561 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12562 return emit_move_insn (reg, mem);
12565 static void
12566 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12567 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12568 tree function)
12570 CUMULATIVE_ARGS cum;
12571 int structure_value_byref = 0;
12572 rtx this_rtx, this_value, sibcall, funexp;
12573 rtx_insn *insns;
12574 tree funtype = TREE_TYPE (function);
12575 int simple_add = CONST_OK_FOR_ADD (delta);
12576 int did_load = 0;
12577 rtx scratch0, scratch1, scratch2;
12578 unsigned i;
12580 reload_completed = 1;
12581 epilogue_completed = 1;
12582 crtl->uses_only_leaf_regs = 1;
12584 emit_note (NOTE_INSN_PROLOGUE_END);
12586 /* Find the "this" pointer. We have such a wide range of ABIs for the
12587 SH that it's best to do this completely machine independently.
12588 "this" is passed as first argument, unless a structure return pointer
12589 comes first, in which case "this" comes second. */
12590 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12591 #ifndef PCC_STATIC_STRUCT_RETURN
12592 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12593 structure_value_byref = 1;
12594 #endif /* not PCC_STATIC_STRUCT_RETURN */
12595 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12597 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12599 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12601 this_rtx
12602 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12604 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12605 static chain pointer (even if you can't have nested virtual functions
12606 right now, someone might implement them sometime), and the rest of the
12607 registers are used for argument passing, are callee-saved, or reserved. */
12608 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12609 -ffixed-reg has been used. */
12610 if (! call_used_regs[0] || fixed_regs[0])
12611 error ("r0 needs to be available as a call-clobbered register");
12612 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12613 if (! TARGET_SH5)
12615 if (call_used_regs[1] && ! fixed_regs[1])
12616 scratch1 = gen_rtx_REG (ptr_mode, 1);
12617 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12618 pointing where to return struct values. */
12619 if (call_used_regs[3] && ! fixed_regs[3])
12620 scratch2 = gen_rtx_REG (Pmode, 3);
12622 else if (TARGET_SHMEDIA)
12624 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12625 if (i != REGNO (scratch0) &&
12626 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12628 scratch1 = gen_rtx_REG (ptr_mode, i);
12629 break;
12631 if (scratch1 == scratch0)
12632 error ("need a second call-clobbered general purpose register");
12633 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12634 if (call_used_regs[i] && ! fixed_regs[i])
12636 scratch2 = gen_rtx_REG (Pmode, i);
12637 break;
12639 if (scratch2 == scratch0)
12640 error ("need a call-clobbered target register");
12643 this_value = plus_constant (Pmode, this_rtx, delta);
12644 if (vcall_offset
12645 && (simple_add || scratch0 != scratch1)
12646 && strict_memory_address_p (ptr_mode, this_value))
12648 emit_load_ptr (scratch0, this_value);
12649 did_load = 1;
12652 if (!delta)
12653 ; /* Do nothing. */
12654 else if (simple_add)
12655 emit_move_insn (this_rtx, this_value);
12656 else
12658 emit_move_insn (scratch1, GEN_INT (delta));
12659 emit_insn (gen_add2_insn (this_rtx, scratch1));
12662 if (vcall_offset)
12664 rtx offset_addr;
12666 if (!did_load)
12667 emit_load_ptr (scratch0, this_rtx);
12669 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12670 if (strict_memory_address_p (ptr_mode, offset_addr))
12671 ; /* Do nothing. */
12672 else if (! TARGET_SH5 && scratch0 != scratch1)
12674 /* scratch0 != scratch1, and we have indexed loads. Get better
12675 schedule by loading the offset into r1 and using an indexed
12676 load - then the load of r1 can issue before the load from
12677 (this_rtx + delta) finishes. */
12678 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12679 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12681 else if (CONST_OK_FOR_ADD (vcall_offset))
12683 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12684 offset_addr = scratch0;
12686 else if (scratch0 != scratch1)
12688 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12689 emit_insn (gen_add2_insn (scratch0, scratch1));
12690 offset_addr = scratch0;
12692 else
12693 gcc_unreachable (); /* FIXME */
12694 emit_load_ptr (scratch0, offset_addr);
12696 if (Pmode != ptr_mode)
12697 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12698 emit_insn (gen_add2_insn (this_rtx, scratch0));
12701 /* Generate a tail call to the target function. */
12702 if (! TREE_USED (function))
12704 assemble_external (function);
12705 TREE_USED (function) = 1;
12707 funexp = XEXP (DECL_RTL (function), 0);
12708 /* If the function is overridden, so is the thunk, hence we don't
12709 need GOT addressing even if this is a public symbol. */
12710 #if 0
12711 if (TARGET_SH1 && ! flag_weak)
12712 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12713 else
12714 #endif
12715 if (TARGET_SH2 && flag_pic)
12717 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12718 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12720 else
12722 if (TARGET_SHMEDIA && flag_pic)
12724 funexp = gen_sym2PIC (funexp);
12725 PUT_MODE (funexp, Pmode);
12727 emit_move_insn (scratch2, funexp);
12728 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12729 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12731 sibcall = emit_call_insn (sibcall);
12732 SIBLING_CALL_P (sibcall) = 1;
12733 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12734 emit_barrier ();
12736 /* Run just enough of rest_of_compilation to do scheduling and get
12737 the insns emitted. Note that use_thunk calls
12738 assemble_start_function and assemble_end_function. */
12740 insns = get_insns ();
12742 if (optimize > 0)
12744 if (! cfun->cfg)
12745 init_flow (cfun);
12746 split_all_insns_noflow ();
12749 sh_reorg ();
12750 shorten_branches (insns);
12751 final_start_function (insns, file, 1);
12752 final (insns, file, 1);
12753 final_end_function ();
12755 reload_completed = 0;
12756 epilogue_completed = 0;
12760 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12762 rtx sym;
12764 /* If this is not an ordinary function, the name usually comes from a
12765 string literal or an sprintf buffer. Make sure we use the same
12766 string consistently, so that cse will be able to unify address loads. */
12767 if (kind != FUNCTION_ORDINARY)
12768 name = IDENTIFIER_POINTER (get_identifier (name));
12769 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12770 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12771 if (flag_pic)
12772 switch (kind)
12774 case FUNCTION_ORDINARY:
12775 break;
12776 case SFUNC_GOT:
12778 rtx reg = target ? target : gen_reg_rtx (Pmode);
12780 emit_insn (gen_symGOT2reg (reg, sym));
12781 sym = reg;
12782 break;
12784 case SFUNC_STATIC:
12786 /* ??? To allow cse to work, we use GOTOFF relocations.
12787 We could add combiner patterns to transform this into
12788 straight pc-relative calls with sym2PIC / bsrf when
12789 label load and function call are still 1:1 and in the
12790 same basic block during combine. */
12791 rtx reg = target ? target : gen_reg_rtx (Pmode);
12793 emit_insn (gen_symGOTOFF2reg (reg, sym));
12794 sym = reg;
12795 break;
12798 if (target && sym != target)
12800 emit_move_insn (target, sym);
12801 return target;
12803 return sym;
12806 /* Find the number of a general purpose register in S. */
12807 static int
12808 scavenge_reg (HARD_REG_SET *s)
12810 int r;
12811 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12812 if (TEST_HARD_REG_BIT (*s, r))
12813 return r;
12814 return -1;
12818 sh_get_pr_initial_val (void)
12820 rtx val;
12822 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12823 PR register on SHcompact, because it might be clobbered by the prologue.
12824 We check first if that is known to be the case. */
12825 if (TARGET_SHCOMPACT
12826 && ((crtl->args.info.call_cookie
12827 & ~ CALL_COOKIE_RET_TRAMP (1))
12828 || crtl->saves_all_registers))
12829 return gen_frame_mem (SImode, return_address_pointer_rtx);
12831 /* If we haven't finished rtl generation, there might be a nonlocal label
12832 that we haven't seen yet.
12833 ??? get_hard_reg_initial_val fails if it is called after register
12834 allocation has started, unless it has been called before for the
12835 same register. And even then, we end in trouble if we didn't use
12836 the register in the same basic block before. So call
12837 get_hard_reg_initial_val now and wrap it in an unspec if we might
12838 need to replace it. */
12839 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12840 combine can put the pseudo returned by get_hard_reg_initial_val into
12841 instructions that need a general purpose registers, which will fail to
12842 be recognized when the pseudo becomes allocated to PR. */
12844 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12845 if (TARGET_SH1)
12846 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12847 return val;
12850 bool
12851 sh_expand_t_scc (rtx operands[])
12853 enum rtx_code code = GET_CODE (operands[1]);
12854 rtx target = operands[0];
12855 rtx op0 = operands[2];
12856 rtx op1 = operands[3];
12857 rtx result = target;
12858 HOST_WIDE_INT val;
12860 if (!REG_P (op0) || REGNO (op0) != T_REG
12861 || !CONST_INT_P (op1))
12862 return false;
12863 if (!REG_P (result))
12864 result = gen_reg_rtx (SImode);
12865 val = INTVAL (op1);
12866 if ((code == EQ && val == 1) || (code == NE && val == 0))
12867 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12868 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12869 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12870 else if (code == EQ || code == NE)
12871 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12872 else
12873 return false;
12874 if (result != target)
12875 emit_move_insn (target, result);
12876 return true;
12879 /* INSN is an sfunc; return the rtx that describes the address used. */
12880 static rtx
12881 extract_sfunc_addr (rtx insn)
12883 rtx pattern, part = NULL_RTX;
12884 int len, i;
12886 pattern = PATTERN (insn);
12887 len = XVECLEN (pattern, 0);
12888 for (i = 0; i < len; i++)
12890 part = XVECEXP (pattern, 0, i);
12891 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12892 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12893 return XEXP (part, 0);
12895 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12896 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12899 /* Verify that the register in use_sfunc_addr still agrees with the address
12900 used in the sfunc. This prevents fill_slots_from_thread from changing
12901 use_sfunc_addr.
12902 INSN is the use_sfunc_addr instruction, and REG is the register it
12903 guards. */
12904 bool
12905 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12907 /* Search for the sfunc. It should really come right after INSN. */
12908 while ((insn = NEXT_INSN (insn)))
12910 if (LABEL_P (insn) || JUMP_P (insn))
12911 break;
12912 if (! INSN_P (insn))
12913 continue;
12915 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12916 insn = seq->insn (0);
12917 if (GET_CODE (PATTERN (insn)) != PARALLEL
12918 || get_attr_type (insn) != TYPE_SFUNC)
12919 continue;
12920 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12922 gcc_unreachable ();
12925 /* This function returns a constant rtx that represents 2**15 / pi in
12926 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12927 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12928 static GTY(()) rtx sh_fsca_sf2int_rtx;
12931 sh_fsca_sf2int (void)
12933 if (! sh_fsca_sf2int_rtx)
12935 REAL_VALUE_TYPE rv;
12937 real_from_string (&rv, "10430.378350470453");
12938 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12941 return sh_fsca_sf2int_rtx;
12944 /* This function returns a constant rtx that represents pi / 2**15 in
12945 SFmode. It's used to scale SFmode angles, in radians, to a
12946 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12947 maps to 0x10000. */
12948 static GTY(()) rtx sh_fsca_int2sf_rtx;
12951 sh_fsca_int2sf (void)
12953 if (! sh_fsca_int2sf_rtx)
12955 REAL_VALUE_TYPE rv;
12957 real_from_string (&rv, "9.587379924285257e-5");
12958 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12961 return sh_fsca_int2sf_rtx;
12964 /* Initialize the CUMULATIVE_ARGS structure. */
12965 void
12966 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12967 tree fntype,
12968 rtx libname ATTRIBUTE_UNUSED,
12969 tree fndecl,
12970 signed int n_named_args,
12971 machine_mode mode)
12973 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12974 pcum->free_single_fp_reg = 0;
12975 pcum->stack_regs = 0;
12976 pcum->byref_regs = 0;
12977 pcum->byref = 0;
12978 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12980 /* XXX - Should we check TARGET_HITACHI here ??? */
12981 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12983 if (fntype)
12985 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12986 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12987 pcum->prototype_p = prototype_p (fntype);
12988 pcum->arg_count [(int) SH_ARG_INT]
12989 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12991 pcum->call_cookie
12992 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12993 && pcum->arg_count [(int) SH_ARG_INT] == 0
12994 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12995 ? int_size_in_bytes (TREE_TYPE (fntype))
12996 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12997 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12998 == FIRST_RET_REG));
13000 else
13002 pcum->arg_count [(int) SH_ARG_INT] = 0;
13003 pcum->prototype_p = FALSE;
13004 if (mode != VOIDmode)
13006 pcum->call_cookie =
13007 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
13008 && GET_MODE_SIZE (mode) > 4
13009 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
13011 /* If the default ABI is the Renesas ABI then all library
13012 calls must assume that the library will be using the
13013 Renesas ABI. So if the function would return its result
13014 in memory then we must force the address of this memory
13015 block onto the stack. Ideally we would like to call
13016 targetm.calls.return_in_memory() here but we do not have
13017 the TYPE or the FNDECL available so we synthesize the
13018 contents of that function as best we can. */
13019 pcum->force_mem =
13020 (TARGET_DEFAULT & MASK_HITACHI)
13021 && (mode == BLKmode
13022 || (GET_MODE_SIZE (mode) > 4
13023 && !(mode == DFmode
13024 && TARGET_FPU_DOUBLE)));
13026 else
13028 pcum->call_cookie = 0;
13029 pcum->force_mem = FALSE;
13034 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
13035 not enter into CONST_DOUBLE for the replace.
13037 Note that copying is not done so X must not be shared unless all copies
13038 are to be modified.
13040 This is like replace_rtx, except that we operate on N_REPLACEMENTS
13041 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
13042 replacements[n*2+1] - and that we take mode changes into account.
13044 If a replacement is ambiguous, return NULL_RTX.
13046 If MODIFY is zero, don't modify any rtl in place,
13047 just return zero or nonzero for failure / success. */
13049 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
13051 int i, j;
13052 const char *fmt;
13054 /* The following prevents loops occurrence when we change MEM in
13055 CONST_DOUBLE onto the same CONST_DOUBLE. */
13056 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
13057 return x;
13059 for (i = n_replacements - 1; i >= 0 ; i--)
13060 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
13061 return replacements[i*2+1];
13063 /* Allow this function to make replacements in EXPR_LISTs. */
13064 if (x == NULL_RTX)
13065 return NULL_RTX;
13067 if (GET_CODE (x) == SUBREG)
13069 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
13070 n_replacements, modify);
13072 if (CONST_INT_P (new_rtx))
13074 x = simplify_subreg (GET_MODE (x), new_rtx,
13075 GET_MODE (SUBREG_REG (x)),
13076 SUBREG_BYTE (x));
13077 if (! x)
13078 abort ();
13080 else if (modify)
13081 SUBREG_REG (x) = new_rtx;
13083 return x;
13085 else if (REG_P (x))
13087 unsigned regno = REGNO (x);
13088 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
13089 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
13090 rtx result = NULL_RTX;
13092 for (i = n_replacements - 1; i >= 0; i--)
13094 rtx from = replacements[i*2];
13095 rtx to = replacements[i*2+1];
13096 unsigned from_regno, from_nregs, to_regno, new_regno;
13098 if (!REG_P (from))
13099 continue;
13100 from_regno = REGNO (from);
13101 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
13102 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
13103 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
13105 if (regno < from_regno
13106 || regno + nregs > from_regno + nregs
13107 || !REG_P (to)
13108 || result)
13109 return NULL_RTX;
13110 to_regno = REGNO (to);
13111 if (to_regno < FIRST_PSEUDO_REGISTER)
13113 new_regno = regno + to_regno - from_regno;
13114 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
13115 != nregs)
13116 return NULL_RTX;
13117 result = gen_rtx_REG (GET_MODE (x), new_regno);
13119 else if (GET_MODE (x) <= GET_MODE (to))
13120 result = gen_lowpart_common (GET_MODE (x), to);
13121 else
13122 result = gen_lowpart_SUBREG (GET_MODE (x), to);
13125 return result ? result : x;
13127 else if (GET_CODE (x) == ZERO_EXTEND)
13129 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
13130 n_replacements, modify);
13132 if (CONST_INT_P (new_rtx))
13134 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
13135 new_rtx, GET_MODE (XEXP (x, 0)));
13136 if (! x)
13137 abort ();
13139 else if (modify)
13140 XEXP (x, 0) = new_rtx;
13142 return x;
13145 fmt = GET_RTX_FORMAT (GET_CODE (x));
13146 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13148 rtx new_rtx;
13150 if (fmt[i] == 'e')
13152 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
13153 n_replacements, modify);
13154 if (!new_rtx)
13155 return NULL_RTX;
13156 if (modify)
13157 XEXP (x, i) = new_rtx;
13159 else if (fmt[i] == 'E')
13160 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13162 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
13163 n_replacements, modify);
13164 if (!new_rtx)
13165 return NULL_RTX;
13166 if (modify)
13167 XVECEXP (x, i, j) = new_rtx;
13171 return x;
13175 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
13177 enum rtx_code code = TRUNCATE;
13179 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
13181 rtx inner = XEXP (x, 0);
13182 machine_mode inner_mode = GET_MODE (inner);
13184 if (inner_mode == mode)
13185 return inner;
13186 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
13187 x = inner;
13188 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
13189 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
13191 code = GET_CODE (x);
13192 x = inner;
13195 return gen_rtx_fmt_e (code, mode, x);
13198 /* Look through X cleaning up truncates of registers that span multiple
13199 actual hard registers. Return the number of changes made. */
13201 shmedia_cleanup_truncate (rtx x)
13203 int n_changes = 0;
13204 subrtx_var_iterator::array_type array;
13205 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
13207 rtx x = *iter;
13208 if (GET_CODE (x) == TRUNCATE)
13210 rtx reg = XEXP (x, 0);
13211 machine_mode reg_mode = GET_MODE (reg);
13212 if (REG_P (reg) && GET_MODE_SIZE (reg_mode) > 8)
13214 int offset = subreg_lowpart_offset (DImode, reg_mode);
13215 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, offset);
13216 n_changes += 1;
13217 iter.skip_subrtxes ();
13221 return n_changes;
13224 /* Load and store depend on the highpart of the address. However,
13225 set_attr_alternative does not give well-defined results before reload,
13226 so we must look at the rtl ourselves to see if any of the feeding
13227 registers is used in a memref.
13229 Return true iff INSN contains a MEM. */
13230 bool
13231 sh_contains_memref_p (rtx insn)
13233 subrtx_iterator::array_type array;
13234 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13235 if (MEM_P (*iter))
13236 return true;
13237 return false;
13240 /* Return true iff INSN loads a banked register. */
13241 bool
13242 sh_loads_bankedreg_p (rtx insn)
13244 if (GET_CODE (PATTERN (insn)) == SET)
13246 rtx op = SET_DEST (PATTERN(insn));
13247 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13248 return true;
13251 return false;
13254 /* FNADDR is the MEM expression from a call expander. Return an address
13255 to use in an SHmedia insn pattern. */
13257 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13259 int is_sym;
13261 fnaddr = XEXP (fnaddr, 0);
13262 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13263 if (flag_pic && is_sym)
13265 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13267 rtx reg = gen_reg_rtx (Pmode);
13269 /* We must not use GOTPLT for sibcalls, because PIC_REG
13270 must be restored before the PLT code gets to run. */
13271 if (is_sibcall)
13272 emit_insn (gen_symGOT2reg (reg, fnaddr));
13273 else
13274 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13275 fnaddr = reg;
13277 else
13279 fnaddr = gen_sym2PIC (fnaddr);
13280 PUT_MODE (fnaddr, Pmode);
13283 /* If ptabs might trap, make this visible to the rest of the compiler.
13284 We generally assume that symbols pertain to valid locations, but
13285 it is possible to generate invalid symbols with asm or linker tricks.
13286 In a list of functions where each returns its successor, an invalid
13287 symbol might denote an empty list. */
13288 if (!TARGET_PT_FIXED
13289 && (!is_sym || TARGET_INVALID_SYMBOLS)
13290 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13292 rtx tr = gen_reg_rtx (PDImode);
13294 emit_insn (gen_ptabs (tr, fnaddr));
13295 fnaddr = tr;
13297 else if (! target_reg_operand (fnaddr, Pmode))
13298 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13299 return fnaddr;
13302 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13303 static reg_class_t
13304 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13306 if (rclass == NO_REGS
13307 && TARGET_SHMEDIA
13308 && (CONST_DOUBLE_P (x)
13309 || GET_CODE (x) == SYMBOL_REF
13310 || PIC_ADDR_P (x)))
13311 return GENERAL_REGS;
13313 return rclass;
13316 /* Implement TARGET_SECONDARY_RELOAD. */
13317 static reg_class_t
13318 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13319 machine_mode mode, secondary_reload_info *sri)
13321 enum reg_class rclass = (enum reg_class) rclass_i;
13323 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13324 && REG_P (XEXP (XEXP (x, 0), 0))
13325 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13326 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13328 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13329 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13331 if (REG_P (x) && REGNO (x) == GBR_REG)
13332 return NO_REGS;
13334 if (in_p)
13336 if (REGCLASS_HAS_FP_REG (rclass)
13337 && ! TARGET_SHMEDIA
13338 && immediate_operand ((x), mode)
13339 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
13340 switch (mode)
13342 case SFmode:
13343 sri->icode = CODE_FOR_reload_insf__frn;
13344 return NO_REGS;
13345 case DFmode:
13346 sri->icode = CODE_FOR_reload_indf__frn;
13347 return NO_REGS;
13348 case SImode:
13349 /* ??? If we knew that we are in the appropriate mode -
13350 single precision - we could use a reload pattern directly. */
13351 return FPUL_REGS;
13352 default:
13353 abort ();
13355 if (rclass == FPUL_REGS
13356 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13357 || REGNO (x) == T_REG))
13358 || GET_CODE (x) == PLUS))
13359 return GENERAL_REGS;
13360 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13362 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13363 return GENERAL_REGS;
13364 else if (mode == SFmode)
13365 return FP_REGS;
13366 sri->icode = CODE_FOR_reload_insi__i_fpul;
13367 return NO_REGS;
13369 if (rclass == FPSCR_REGS
13370 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13371 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13372 return GENERAL_REGS;
13373 if (REGCLASS_HAS_FP_REG (rclass)
13374 && TARGET_SHMEDIA
13375 && immediate_operand (x, mode)
13376 && x != CONST0_RTX (GET_MODE (x))
13377 && GET_MODE (x) != V4SFmode)
13378 return GENERAL_REGS;
13379 if ((mode == QImode || mode == HImode)
13380 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13382 sri->icode = ((mode == QImode)
13383 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13384 return NO_REGS;
13386 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13387 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13388 return TARGET_REGS;
13389 } /* end of input-only processing. */
13391 if (((REGCLASS_HAS_FP_REG (rclass)
13392 && (REG_P (x)
13393 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13394 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13395 && TARGET_FMOVD))))
13396 || (REGCLASS_HAS_GENERAL_REG (rclass)
13397 && REG_P (x)
13398 && FP_REGISTER_P (REGNO (x))))
13399 && ! TARGET_SHMEDIA
13400 && (mode == SFmode || mode == SImode))
13401 return FPUL_REGS;
13402 if ((rclass == FPUL_REGS
13403 || (REGCLASS_HAS_FP_REG (rclass)
13404 && ! TARGET_SHMEDIA && mode == SImode))
13405 && (MEM_P (x)
13406 || (REG_P (x)
13407 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13408 || REGNO (x) == T_REG
13409 || system_reg_operand (x, VOIDmode)))))
13411 if (rclass == FPUL_REGS)
13412 return GENERAL_REGS;
13413 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
13415 if ((rclass == TARGET_REGS
13416 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13417 && !satisfies_constraint_Csy (x)
13418 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13419 return GENERAL_REGS;
13420 if ((rclass == MAC_REGS || rclass == PR_REGS)
13421 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13422 && rclass != REGNO_REG_CLASS (REGNO (x)))
13423 return GENERAL_REGS;
13424 if (rclass != GENERAL_REGS && REG_P (x)
13425 && TARGET_REGISTER_P (REGNO (x)))
13426 return GENERAL_REGS;
13428 /* If here fall back to loading FPUL register through general registers.
13429 This case can happen when movsi_ie insn is picked initially to
13430 load/store the FPUL register from/to another register, and then the
13431 other register is allocated on the stack. */
13432 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13433 return GENERAL_REGS;
13435 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13436 the other operand.
13437 On SH2A could also just leave it alone here, which would result in a
13438 4 byte move insn being generated instead. However, for this to work
13439 the insns must have the appropriate alternatives. */
13440 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13441 && satisfies_constraint_Sdd (x)
13442 && sh_disp_addr_displacement (x)
13443 <= sh_max_mov_insn_displacement (mode, false))
13444 return R0_REGS;
13446 /* When reload is trying to address a QImode or HImode subreg on the stack,
13447 force any subreg byte into R0_REGS, as this is going to become a
13448 displacement address.
13449 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13450 is on the stack, the memref to it might already require a displacement
13451 and that has to be added to the final address. At this point we don't
13452 know the cumulative displacement so we assume the worst case. */
13453 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13454 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13455 return R0_REGS;
13457 return NO_REGS;
13460 /* Return true if SUBST can't safely replace its equivalent during RA. */
13461 static bool
13462 sh_cannot_substitute_mem_equiv_p (rtx)
13464 if (TARGET_SHMEDIA)
13465 return false;
13467 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
13468 uses R0 and may cause spill failure when R0 is already used.
13469 We have to return true for that case at least.
13470 Moreover SH has strong R0 parity and also have not enough numbers of
13471 the hard registers to make the equiv substitution win in the size
13472 and the speed on average working sets. The pseudos produced to
13473 hold the equiv values can't get good hard registers for bad cases
13474 and end up memory save/restore insns which make the code worse. */
13475 return true;
13478 /* Return true if DISP can be legitimized. */
13479 static bool
13480 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
13481 machine_mode mode)
13483 if (TARGET_SHMEDIA)
13484 return false;
13486 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
13487 || (TARGET_SH2E && mode == SFmode))
13488 return false;
13490 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
13491 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
13493 *disp = adj.mov_disp;
13494 *offs = adj.offset_adjust;
13495 return true;
13498 return false;
13501 /* Return true if movsf insn should be splited with an additional
13502 register. */
13503 bool
13504 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
13506 /* op0 == op1 */
13507 if (rtx_equal_p (op0, op1))
13508 return true;
13509 /* fy, FQ, reg */
13510 if (GET_CODE (op1) == CONST_DOUBLE
13511 && ! satisfies_constraint_G (op1)
13512 && ! satisfies_constraint_H (op1)
13513 && REG_P (op0)
13514 && REG_P (op2))
13515 return true;
13516 /* f, r, y */
13517 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
13518 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
13519 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13520 return true;
13521 /* r, f, y */
13522 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
13523 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
13524 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13525 return true;
13527 return false;
13530 static void
13531 sh_conditional_register_usage (void)
13533 int regno;
13534 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13535 if (! VALID_REGISTER_P (regno))
13536 fixed_regs[regno] = call_used_regs[regno] = 1;
13537 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13538 if (TARGET_SH5)
13540 call_used_regs[FIRST_GENERAL_REG + 8]
13541 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13542 call_really_used_regs[FIRST_GENERAL_REG + 8]
13543 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13545 if (TARGET_SHMEDIA)
13547 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13548 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13549 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13551 if (flag_pic)
13553 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13554 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13556 /* Renesas saves and restores mac registers on call. */
13557 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13559 call_really_used_regs[MACH_REG] = 0;
13560 call_really_used_regs[MACL_REG] = 0;
13563 if (TARGET_SHMEDIA)
13565 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13566 if (! fixed_regs[regno] && call_really_used_regs[regno])
13567 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13569 else
13570 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13571 if (! fixed_regs[regno] && call_really_used_regs[regno])
13572 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13574 call_really_used_regs[FPSCR_MODES_REG] = 0;
13575 call_really_used_regs[FPSCR_STAT_REG] = 0;
13578 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13580 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13581 static bool
13582 sh_legitimate_constant_p (machine_mode mode, rtx x)
13584 return (TARGET_SHMEDIA
13585 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13586 || x == CONST0_RTX (mode)
13587 || !TARGET_SHMEDIA_FPU
13588 || TARGET_SHMEDIA64)
13589 : (GET_CODE (x) != CONST_DOUBLE
13590 || mode == DFmode || mode == SFmode
13591 || mode == DImode || GET_MODE (x) == VOIDmode));
13594 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13596 static void
13597 sh_init_sync_libfuncs (void)
13599 init_sync_libfuncs (UNITS_PER_WORD);
13602 /* Return true if it is appropriate to emit `ret' instructions in the
13603 body of a function. */
13604 bool
13605 sh_can_use_simple_return_p (void)
13607 HARD_REG_SET live_regs_mask;
13608 int d;
13610 /* Some targets require special return insns. */
13611 if (TARGET_SHMEDIA
13612 || (TARGET_SHCOMPACT
13613 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13614 return false;
13616 if (! reload_completed || frame_pointer_needed)
13617 return false;
13619 /* Moving prologue around does't reduce the size. */
13620 if (optimize_function_for_size_p (cfun))
13621 return false;
13623 /* Finally, allow for pr save. */
13624 d = calc_live_regs (&live_regs_mask);
13626 if (rounded_frame_size (d) > 4)
13627 return false;
13629 return true;
13632 /*------------------------------------------------------------------------------
13633 Address mode optimization support code
13636 typedef HOST_WIDE_INT disp_t;
13637 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13638 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13639 static const disp_t INVALID_DISP = MAX_DISP;
13641 /* A memory reference which is described by a base register and a
13642 displacement. */
13643 class base_reg_disp
13645 public:
13646 base_reg_disp (rtx br, disp_t d);
13648 bool is_reg (void) const;
13649 bool is_disp (void) const;
13650 rtx reg (void) const;
13651 disp_t disp (void) const;
13653 private:
13654 rtx reg_;
13655 disp_t disp_;
13658 inline
13659 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13660 : reg_ (br), disp_ (d)
13664 inline bool
13665 base_reg_disp::is_reg (void) const
13667 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13670 inline bool
13671 base_reg_disp::is_disp (void) const
13673 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13676 inline rtx
13677 base_reg_disp::reg (void) const
13679 return reg_;
13682 inline disp_t
13683 base_reg_disp::disp (void) const
13685 return disp_;
13688 /* Find the base register and calculate the displacement for a given
13689 address rtx 'x'. */
13690 static base_reg_disp
13691 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
13692 rtx base_reg = NULL)
13694 if (REG_P (x))
13696 if (REGNO (x) == GBR_REG)
13697 return base_reg_disp (x, disp);
13699 /* We've reached a hard-reg. This is probably the point where
13700 function args are copied to pseudos. Do not go any further and
13701 stick to the pseudo. If the original mem addr was in a hard reg
13702 from the beginning, it will become the base reg. */
13703 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13704 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13706 /* Find the def of the reg and trace it. If there are more than one
13707 defs and they are not the same, assume it's not safe to proceed. */
13708 rtx_insn* last_i = NULL;
13709 rtx last_set = NULL;
13710 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
13711 d = DF_REF_NEXT_REG (d))
13713 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
13715 /* Accept multiple defs, as long as they are equal. */
13716 if (last_set == NULL || rtx_equal_p (last_set, set))
13718 last_i = DF_REF_INSN (d);
13719 last_set = set;
13721 else
13723 last_i = NULL;
13724 last_set = NULL;
13725 break;
13729 if (last_set != NULL && last_i != NULL)
13730 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
13731 XEXP (last_set, 0));
13733 /* When here, no previous insn was found that sets the reg.
13734 The input reg is already the base reg. */
13735 return base_reg_disp (x, disp);
13738 else if (GET_CODE (x) == PLUS)
13740 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13741 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13743 /* Either left or right val must be a reg.
13744 We don't handle the case of 'reg + reg' here. */
13745 if (left_val.is_reg () && right_val.is_disp ())
13746 return base_reg_disp (left_val.reg (), left_val.disp ()
13747 + right_val.disp () + disp);
13748 else if (right_val.is_reg () && left_val.is_disp ())
13749 return base_reg_disp (right_val.reg (), right_val.disp ()
13750 + left_val.disp () + disp);
13751 else
13752 return base_reg_disp (base_reg, disp);
13755 else if (CONST_INT_P (x))
13756 return base_reg_disp (NULL, disp + INTVAL (x));
13758 /* Didn't find anything useful. */
13759 return base_reg_disp (base_reg, disp);
13762 /* Given an insn and a memory operand, try to find an equivalent GBR
13763 based memory address and return the corresponding new memory address.
13764 Return NULL_RTX if not found. */
13766 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
13768 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
13769 return NULL_RTX;
13771 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13772 if (side_effects_p (XEXP (mem, 0)))
13773 return NULL_RTX;
13775 /* When not optimizing there might be no dataflow available. */
13776 if (df == NULL)
13777 return NULL_RTX;
13779 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13781 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13783 /* If GBR is marked as call clobbered we bail out if we see a call.
13784 FIXME: Actually should check if this mem refers to the gbr value
13785 before or after the call. If there is a store_gbr preceeding this
13786 mem, it's safe to use GBR for this mem.
13788 If GBR is not marked as call clobbered, but there is some other
13789 def than a call, it's probably a load_gbr upon which we also
13790 bail out to be on the safe side.
13791 FIXME: Should check if we have a use-after-def case, such as
13792 the call case above. */
13793 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
13794 d = DF_REF_NEXT_REG (d))
13796 if (CALL_P (DF_REF_INSN (d)))
13798 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
13799 return NULL_RTX;
13800 else
13801 continue;
13803 else
13804 return NULL_RTX;
13807 rtx disp = GEN_INT (gbr_disp.disp ());
13808 if (gbr_displacement (disp, GET_MODE (mem)))
13809 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13812 return NULL_RTX;
13815 /*------------------------------------------------------------------------------
13816 Manual insn combine support code.
13819 /* Return true if the specified insn contains any UNSPECs or
13820 UNSPEC_VOLATILEs. */
13821 static bool
13822 sh_unspec_insn_p (rtx x)
13824 subrtx_iterator::array_type array;
13825 FOR_EACH_SUBRTX (i, array, x, ALL)
13826 if (*i != NULL
13827 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
13828 return true;
13830 return false;
13833 /* Return true if the register operands of the specified insn are modified
13834 between the specified from and to insns (exclusive of those two). */
13835 bool
13836 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
13837 const rtx_insn* from,
13838 const rtx_insn* to)
13840 /* FIXME: Return true for multiple sets for now. */
13841 rtx s = single_set (operands_insn);
13842 if (s == NULL_RTX)
13843 return true;
13845 subrtx_iterator::array_type array;
13846 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
13847 if (*i != NULL &&
13848 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
13849 return true;
13851 return false;
13854 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
13855 negates the T bit and stores the result in the T bit. */
13856 bool
13857 sh_is_nott_insn (const rtx_insn* i)
13859 return i != NULL && GET_CODE (PATTERN (i)) == SET
13860 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
13861 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
13865 sh_movt_set_dest (const rtx_insn* i)
13867 if (i == NULL)
13868 return NULL;
13870 const_rtx p = PATTERN (i);
13871 return GET_CODE (p) == SET
13872 && arith_reg_dest (XEXP (p, 0), SImode)
13873 && t_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13876 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
13877 that stores the negated T bit in a register, and return the destination
13878 register rtx, or null. */
13880 sh_movrt_set_dest (const rtx_insn* i)
13882 if (i == NULL)
13883 return NULL;
13885 const_rtx p = PATTERN (i);
13887 /* The negc movrt replacement is inside a parallel. */
13888 if (GET_CODE (p) == PARALLEL)
13889 p = XVECEXP (p, 0, 0);
13891 return GET_CODE (p) == SET
13892 && arith_reg_dest (XEXP (p, 0), SImode)
13893 && negt_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13896 /* Given an insn and a reg number, tell whether the reg dies or is unused
13897 after the insn. */
13898 bool
13899 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
13901 return find_regno_note (i, REG_DEAD, regno) != NULL
13902 || find_regno_note (i, REG_UNUSED, regno) != NULL;
13905 /* Given an insn and a reg number, remove reg dead or reg unused notes to
13906 mark it as being used after the insn. */
13907 void
13908 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
13910 if (rtx n = find_regno_note (i, REG_DEAD, regno))
13911 remove_note (i, n);
13912 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
13913 remove_note (i, n);
13916 /* Given an op rtx and an insn, try to find out whether the result of the
13917 specified op consists only of logical operations on T bit stores. */
13918 bool
13919 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
13921 if (!logical_operator (op, SImode))
13922 return false;
13924 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13925 int op_is_t_count = 0;
13927 for (int i = 0; i < 2; ++i)
13929 if (t_reg_operand (ops[i], VOIDmode)
13930 || negt_reg_operand (ops[i], VOIDmode))
13931 op_is_t_count++;
13933 else
13935 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13936 prev_nonnote_insn_bb);
13937 if (op_set.set_src == NULL_RTX)
13938 continue;
13940 if (t_reg_operand (op_set.set_src, VOIDmode)
13941 || negt_reg_operand (op_set.set_src, VOIDmode)
13942 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13943 op_is_t_count++;
13947 return op_is_t_count == 2;
13950 /* Given the operand that is extended in a sign/zero extend insn, and the
13951 insn, try to figure out whether the sign/zero extension can be replaced
13952 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13953 NULL_RTX otherwise. */
13955 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
13957 if (REG_P (extended_op))
13958 extended_op = extended_op;
13959 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13960 extended_op = SUBREG_REG (extended_op);
13961 else
13962 return NULL_RTX;
13964 /* Reg moves must be of the same mode. */
13965 if (GET_MODE (extended_op) != SImode)
13966 return NULL_RTX;
13968 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13969 if (s.set_src == NULL_RTX)
13970 return NULL_RTX;
13972 if (t_reg_operand (s.set_src, VOIDmode)
13973 || negt_reg_operand (s.set_src, VOIDmode))
13974 return extended_op;
13976 /* If the zero extended reg was formed by a logical operation, check the
13977 operands of the logical operation. If both originated from T bit
13978 stores the zero extension can be eliminated. */
13979 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13980 return extended_op;
13982 return NULL_RTX;
13985 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
13986 figure out whether it should be converted into a movt-xor sequence in
13987 the movrt_negc splitter.
13988 Returns true if insns have been modified and the splitter has succeeded. */
13989 bool
13990 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
13992 /* In cases such as
13993 tst r4,r4
13994 mov #-1,r1
13995 negc r1,r1
13996 tst r4,r4
13997 we can replace the T bit clobbering negc with a movt-xor sequence and
13998 eliminate the redundant comparison.
13999 Because the xor insn depends on register allocation results, allow this
14000 only before reload. */
14001 if (!can_create_pseudo_p ())
14002 return false;
14004 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
14005 prev_nonnote_insn_bb);
14006 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
14007 next_nonnote_insn_bb);
14009 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
14010 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
14011 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
14012 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
14013 t_before_negc.insn,
14014 t_after_negc.insn)
14015 && !sh_unspec_insn_p (t_after_negc.insn)
14016 && !volatile_insn_p (PATTERN (t_after_negc.insn))
14017 && !side_effects_p (PATTERN (t_after_negc.insn))
14018 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
14020 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
14021 set_insn_deleted (t_after_negc.insn);
14022 return true;
14024 else
14025 return false;
14028 /* Given a reg and the current insn, see if the value of the reg originated
14029 from a sign or zero extension and return the discovered information. */
14030 sh_extending_set_of_reg
14031 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
14033 if (reg == NULL)
14034 return sh_extending_set_of_reg (curr_insn);
14036 if (SUBREG_P (reg))
14037 reg = SUBREG_REG (reg);
14039 if (!REG_P (reg))
14040 return sh_extending_set_of_reg (curr_insn);
14042 /* FIXME: Also search the predecessor basic blocks. It seems that checking
14043 only the adjacent predecessor blocks would cover most of the cases.
14044 Also try to look through the first extension that we hit. There are some
14045 cases, where a zero_extend is followed an (implicit) sign_extend, and it
14046 fails to see the sign_extend. */
14047 sh_extending_set_of_reg result =
14048 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
14050 if (result.set_src != NULL)
14052 if (GET_CODE (result.set_src) == SIGN_EXTEND
14053 || GET_CODE (result.set_src) == ZERO_EXTEND)
14055 if (dump_file)
14056 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
14057 "explicitly sign/zero extended in insn %d\n",
14058 REGNO (reg), INSN_UID (result.insn));
14059 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
14060 result.ext_code = GET_CODE (result.set_src);
14062 else if (MEM_P (result.set_src)
14063 && (GET_MODE (result.set_src) == QImode
14064 || GET_MODE (result.set_src) == HImode)
14065 && !sh_unspec_insn_p (result.insn))
14067 /* On SH QIHImode memory loads always sign extend. However, in
14068 some cases where it seems that the higher bits are not
14069 interesting, the loads will not be expanded as sign extending
14070 insns, but as QIHImode loads into QIHImode regs. We report that
14071 the reg has been sign extended by the mem load. When it is used
14072 as such, we must convert the mem load into a sign extending insn,
14073 see also sh_extending_set_of_reg::use_as_extended_reg. */
14074 if (dump_file)
14075 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
14076 "implicitly sign extended in insn %d\n",
14077 REGNO (reg), INSN_UID (result.insn));
14078 result.from_mode = GET_MODE (result.set_src);
14079 result.ext_code = SIGN_EXTEND;
14083 return result;
14086 /* Given a reg that is known to be sign or zero extended at some insn,
14087 take the appropriate measures so that the extended value can be used as
14088 a reg at the specified insn and return the resulting reg rtx. */
14090 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
14092 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
14093 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
14094 gcc_assert (from_mode == QImode || from_mode == HImode);
14096 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
14098 if (dump_file)
14099 fprintf (dump_file,
14100 "use_as_extended_reg: converting non-extending mem load in "
14101 "insn %d into sign-extending load\n", INSN_UID (insn));
14103 rtx r = gen_reg_rtx (SImode);
14104 rtx_insn* i0;
14105 if (from_mode == QImode)
14106 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
14107 else if (from_mode == HImode)
14108 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
14109 else
14110 gcc_unreachable ();
14112 emit_insn_after (
14113 gen_move_insn (XEXP (set_rtx, 0),
14114 gen_lowpart (GET_MODE (set_src), r)), i0);
14115 set_insn_deleted (insn);
14116 return r;
14118 else
14120 rtx extension_dst = XEXP (set_rtx, 0);
14121 if (modified_between_p (extension_dst, insn, use_at_insn))
14123 if (dump_file)
14124 fprintf (dump_file,
14125 "use_as_extended_reg: dest reg %d of extending insn %d is "
14126 "modified, inserting a reg-reg copy\n",
14127 REGNO (extension_dst), INSN_UID (insn));
14129 rtx r = gen_reg_rtx (SImode);
14130 emit_insn_after (gen_move_insn (r, extension_dst), insn);
14131 return r;
14133 else
14135 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
14136 return extension_dst;
14141 bool
14142 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
14144 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
14145 && (from_mode == QImode || from_mode == HImode)
14146 && set_src != NULL)
14147 return arith_reg_operand (XEXP (set_src, 0), from_mode);
14148 else
14149 return false;
14153 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
14155 gcc_assert (can_use_as_unextended_reg ());
14157 rtx r = XEXP (set_src, 0);
14158 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
14160 if (modified_between_p (r, insn, use_at_insn))
14162 rtx r1 = gen_reg_rtx (SImode);
14163 emit_insn_after (gen_move_insn (r1, r0), insn);
14164 return r1;
14166 else
14168 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
14169 ? REGNO (SUBREG_REG (r))
14170 : REGNO (r));
14171 return r0;
14175 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
14176 perform the necessary checks on the operands and split it accordingly. */
14177 void
14178 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
14179 int subreg_offset, rtx operands[])
14181 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
14183 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
14184 curr_insn);
14185 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
14186 curr_insn);
14188 /* If one of the operands is known to be zero extended, that's already
14189 sufficient to mask out the unwanted high bits. */
14190 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
14192 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14193 operands[1]));
14194 return;
14196 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
14198 emit_insn (gen_tstsi_t (operands[0],
14199 eop1.use_as_extended_reg (curr_insn)));
14200 return;
14203 /* None of the operands seem to be zero extended.
14204 If both are sign extended it's OK, too. */
14205 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
14206 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
14208 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14209 eop1.use_as_extended_reg (curr_insn)));
14210 return;
14213 /* Otherwise we have to insert a zero extension on one of the operands to
14214 mask out the unwanted high bits.
14215 Prefer the operand that has no known extension. */
14216 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
14217 std::swap (operands[0], operands[1]);
14219 rtx tmp0 = gen_reg_rtx (SImode);
14220 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
14221 GET_MODE (operands[0]), subreg_offset);
14222 emit_insn (subreg_mode == QImode
14223 ? gen_zero_extendqisi2 (tmp0, tmp1)
14224 : gen_zero_extendhisi2 (tmp0, tmp1));
14225 emit_insn (gen_tstsi_t (tmp0, operands[1]));
14228 /* A helper class to increment/decrement a counter variable each time a
14229 function is entered/left. */
14230 class scope_counter
14232 public:
14233 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
14235 ~scope_counter (void)
14237 --m_counter;
14238 gcc_assert (m_counter >= 0);
14241 int count (void) const { return m_counter; }
14243 private:
14244 int& m_counter;
14247 /* Given an rtx x, determine whether the expression can be used to create
14248 an insn that calulates x and stores the result in the T bit.
14249 This is used by the 'treg_set_expr' predicate to construct insns sequences
14250 where T bit results are fed into other insns, such as addc, subc, negc
14251 insns.
14253 FIXME: The patterns that expand 'treg_set_expr' operands tend to
14254 distinguish between 'positive' and 'negative' forms. For now this has to
14255 be done in the preparation code. We could also introduce
14256 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
14257 two different patterns for the 'postive' and 'negative' forms. However,
14258 the total amount of lines of code seems to be about the same and the
14259 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
14260 recog function would need to look inside the expression by temporarily
14261 splitting it. */
14262 static int sh_recog_treg_set_expr_reent_count = 0;
14264 bool
14265 sh_recog_treg_set_expr (rtx op, machine_mode mode)
14267 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
14269 /* Limit the recursion count to avoid nested expressions which we can't
14270 resolve to a single treg set insn. */
14271 if (recursion.count () > 1)
14272 return false;
14274 /* Early accept known possible operands before doing recog. */
14275 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode))
14276 return true;
14278 /* Early reject impossible operands before doing recog.
14279 There are some (set ((t) (subreg ...))) patterns, but we must be careful
14280 not to allow any invalid reg-reg or mem-reg moves, or else other passes
14281 such as lower-subreg will bail out. Some insns such as SH4A movua are
14282 done with UNSPEC, so must reject those, too, or else it would result
14283 in an invalid reg -> treg move. */
14284 if (register_operand (op, mode) || memory_operand (op, mode)
14285 || sh_unspec_insn_p (op))
14286 return false;
14288 if (!can_create_pseudo_p ())
14289 return false;
14291 /* We are going to invoke recog in a re-entrant way and thus
14292 have to capture its current state and restore it afterwards. */
14293 recog_data_d prev_recog_data = recog_data;
14295 rtx_insn* i = make_insn_raw (gen_rtx_SET (VOIDmode, get_t_reg_rtx (), op));
14296 SET_PREV_INSN (i) = NULL;
14297 SET_NEXT_INSN (i) = NULL;
14299 int result = recog (PATTERN (i), i, 0);
14301 /* It seems there is no insn like that. Create a simple negated
14302 version and try again. If we hit a negated form, we'll allow that
14303 and append a nott sequence when splitting out the insns. Insns that
14304 do the split can then remove the trailing nott if they know how to
14305 deal with it. */
14306 if (result < 0 && GET_CODE (op) == EQ)
14308 PUT_CODE (op, NE);
14309 result = recog (PATTERN (i), i, 0);
14310 PUT_CODE (op, EQ);
14312 if (result < 0 && GET_CODE (op) == NE)
14314 PUT_CODE (op, EQ);
14315 result = recog (PATTERN (i), i, 0);
14316 PUT_CODE (op, NE);
14319 recog_data = prev_recog_data;
14320 return result >= 0;
14323 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
14324 This can be used as a condition for insn/split patterns to allow certain
14325 T bit setting patters only to be matched as sub expressions of other
14326 patterns. */
14327 bool
14328 sh_in_recog_treg_set_expr (void)
14330 return sh_recog_treg_set_expr_reent_count > 0;
14333 /* Given an rtx x, which is assumed to be some expression that has been
14334 matched by the 'treg_set_expr' predicate before, split and emit the
14335 insns that are necessary to calculate the expression and store the result
14336 in the T bit.
14337 The splitting is done recursively similar to 'try_split' in emit-rt.c.
14338 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
14339 'delete_insn' which then causes the DF parts to bail out, because we
14340 currently are inside another gen_split* function and would invoke
14341 'try_split' in a reentrant way. */
14342 static std::pair<rtx_insn*, rtx_insn*>
14343 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
14345 if (dump_file)
14347 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
14348 print_rtl_single (dump_file, i);
14349 fprintf (dump_file, "\n");
14352 rtx_insn* seq = safe_as_a<rtx_insn*> (split_insns (PATTERN (i), curr_insn));
14354 if (seq == NULL)
14355 return std::make_pair (i, i);
14357 /* Avoid infinite splitter loops if any insn of the result matches
14358 the original pattern. */
14359 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
14360 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
14361 return std::make_pair (i, i);
14363 unshare_all_rtl_in_chain (seq);
14365 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
14366 a linked list, replace the single insn with the new insns. */
14367 rtx_insn* seqlast = seq;
14368 while (NEXT_INSN (seqlast) != NULL)
14369 seqlast = NEXT_INSN (seqlast);
14371 if (rtx_insn* iprev = PREV_INSN (i))
14372 SET_NEXT_INSN (iprev) = seq;
14373 if (rtx_insn* inext = NEXT_INSN (i))
14374 SET_PREV_INSN (inext) = seqlast;
14376 SET_PREV_INSN (seq) = PREV_INSN (i);
14377 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
14379 SET_PREV_INSN (i) = NULL;
14380 SET_NEXT_INSN (i) = NULL;
14382 /* Recursively split all insns. */
14383 for (i = seq; ; i = NEXT_INSN (i))
14385 std::pair<rtx_insn*, rtx_insn*> ii =
14386 sh_try_split_insn_simple (i, curr_insn, n + 1);
14387 if (i == seq)
14388 seq = ii.first;
14389 if (i == seqlast)
14391 seqlast = ii.second;
14392 break;
14394 i = ii.first;
14397 return std::make_pair (seq, seqlast);
14400 sh_treg_insns
14401 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
14403 if (t_reg_operand (x, VOIDmode))
14404 return sh_treg_insns ();
14406 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
14408 rtx_insn* i = make_insn_raw (gen_rtx_SET (VOIDmode, get_t_reg_rtx (), x));
14409 SET_PREV_INSN (i) = NULL;
14410 SET_NEXT_INSN (i) = NULL;
14412 if (dump_file)
14414 fprintf (dump_file, "split_treg_set_expr insn:\n");
14415 print_rtl (dump_file, i);
14416 fprintf (dump_file, "\n");
14419 /* We are going to invoke recog/split_insns in a re-entrant way and thus
14420 have to capture its current state and restore it afterwards. */
14421 recog_data_d prev_recog_data = recog_data;
14423 int insn_code = recog (PATTERN (i), i, 0);
14425 /* If the insn was not found, see if we matched the negated form before
14426 and append a nott. */
14427 bool append_nott = false;
14429 if (insn_code < 0 && GET_CODE (x) == EQ)
14431 PUT_CODE (x, NE);
14432 insn_code = recog (PATTERN (i), i, 0);
14433 if (insn_code >= 0)
14434 append_nott = true;
14435 else
14436 PUT_CODE (x, EQ);
14438 if (insn_code < 0 && GET_CODE (x) == NE)
14440 PUT_CODE (x, EQ);
14441 insn_code = recog (PATTERN (i), i, 0);
14442 if (insn_code >= 0)
14443 append_nott = true;
14444 else
14445 PUT_CODE (x, NE);
14448 gcc_assert (insn_code >= 0);
14450 /* Try to recursively split the insn. Some insns might refuse to split
14451 any further while we are in the treg_set_expr splitting phase. They
14452 will be emitted as part of the outer insn and then split again. */
14453 std::pair<rtx_insn*, rtx_insn*> insnlist =
14454 sh_try_split_insn_simple (i, curr_insn);
14456 /* Restore recog state. */
14457 recog_data = prev_recog_data;
14459 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
14460 ? insnlist.second
14461 : NULL;
14462 if (dump_file)
14464 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
14465 print_rtl (dump_file, insnlist.first);
14466 fprintf (dump_file, "\n");
14468 if (nott_insn != NULL)
14469 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
14472 emit_insn (insnlist.first);
14474 if (nott_insn != NULL && append_nott)
14476 if (dump_file)
14477 fprintf (dump_file, "removing trailing nott\n");
14478 remove_insn (nott_insn);
14479 nott_insn = NULL;
14480 append_nott = false;
14483 if (append_nott)
14484 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
14486 rtx_insn* first_insn = get_insns ();
14488 if (dump_file)
14490 fprintf (dump_file, "resulting insns:\n");
14491 print_rtl (dump_file, first_insn);
14492 fprintf (dump_file, "\n");
14495 return sh_treg_insns (first_insn, nott_insn);
14498 /*------------------------------------------------------------------------------
14499 Mode switching support code.
14502 static void
14503 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
14504 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14506 if ((TARGET_SH4A_FP || TARGET_SH4_300)
14507 && prev_mode != FP_MODE_NONE && prev_mode != mode)
14509 emit_insn (gen_toggle_pr ());
14510 if (TARGET_FMOVD)
14511 emit_insn (gen_toggle_sz ());
14513 else if (mode != FP_MODE_NONE)
14515 rtx tmp = gen_reg_rtx (SImode);
14516 emit_insn (gen_sts_fpscr (tmp));
14517 rtx i = NULL;
14519 const unsigned HOST_WIDE_INT fpbits =
14520 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
14522 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
14523 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14524 else if (mode == FP_MODE_SINGLE)
14525 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
14526 else if (mode == FP_MODE_DOUBLE)
14527 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14528 else
14529 gcc_unreachable ();
14531 emit_insn (i);
14532 emit_insn (gen_lds_fpscr (tmp));
14536 static int
14537 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
14539 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
14542 static int
14543 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
14545 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
14546 get_attr_fp_set (insn) != FP_SET_NONE)
14547 return (int) get_attr_fp_set (insn);
14548 else
14549 return mode;
14552 static int
14553 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
14555 return NORMAL_MODE (entity);
14558 static int
14559 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
14561 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
14564 static int
14565 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
14567 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
14570 /*------------------------------------------------------------------------------
14571 Misc
14574 /* Return true if we use LRA instead of reload pass. */
14575 static bool
14576 sh_lra_p (void)
14578 return sh_lra_flag;
14581 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14583 static bool
14584 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14585 unsigned int align,
14586 enum by_pieces_operation op,
14587 bool speed_p)
14589 switch (op)
14591 case MOVE_BY_PIECES:
14592 return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
14593 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14594 case STORE_BY_PIECES:
14595 case SET_BY_PIECES:
14596 return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
14597 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14598 default:
14599 return default_use_by_pieces_infrastructure_p (size, align,
14600 op, speed_p);
14604 #include "gt-sh.h"